rtesseract 0.0.13 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5e3171bd39fe1fc4519084f2ee0783000f4b3165
4
+ data.tar.gz: afa7e1efa0522d498d3586c345803a183fb266b7
5
+ SHA512:
6
+ metadata.gz: aa1e4371da76ca048dfa84ede35ee4432361b39f40ef61ad896441f76b310c4c2771797dce86cb34e5c49d3094d514e9a03bea23371df8d6e8d308623ff01ebb
7
+ data.tar.gz: a22a40faf3866b3d446999b30417c5fe3e481715dcd724c51b0cfa3dca5c01d919049f406b181493b6a60c232cf0c9dfda2c866e1895d286424264af60efa581
data/.document CHANGED
@@ -1,5 +1,5 @@
1
- README.rdoc
2
1
  lib/**/*.rb
3
2
  bin/*
3
+ -
4
4
  features/**/*.feature
5
- LICENSE
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+ gem "rmagick"
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "rspec", "~> 2.8.0"
10
+ gem "rdoc", "~> 3.12"
11
+ gem "bundler", "~> 1.0"
12
+ gem "jeweler", "~> 1.8.7"
13
+ gem "simplecov", ">= 0"
14
+ gem 'shoulda-matchers'
15
+ end
@@ -0,0 +1,89 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ activesupport (4.0.1)
5
+ i18n (~> 0.6, >= 0.6.4)
6
+ minitest (~> 4.2)
7
+ multi_json (~> 1.3)
8
+ thread_safe (~> 0.1)
9
+ tzinfo (~> 0.3.37)
10
+ addressable (2.3.5)
11
+ atomic (1.1.14)
12
+ builder (3.2.2)
13
+ diff-lcs (1.1.3)
14
+ docile (1.1.0)
15
+ faraday (0.8.8)
16
+ multipart-post (~> 1.2.0)
17
+ git (1.2.6)
18
+ github_api (0.10.1)
19
+ addressable
20
+ faraday (~> 0.8.1)
21
+ hashie (>= 1.2)
22
+ multi_json (~> 1.4)
23
+ nokogiri (~> 1.5.2)
24
+ oauth2
25
+ hashie (2.0.5)
26
+ highline (1.6.20)
27
+ httpauth (0.2.0)
28
+ i18n (0.6.5)
29
+ jeweler (1.8.8)
30
+ builder
31
+ bundler (~> 1.0)
32
+ git (>= 1.2.5)
33
+ github_api (= 0.10.1)
34
+ highline (>= 1.6.15)
35
+ nokogiri (= 1.5.10)
36
+ rake
37
+ rdoc
38
+ json (1.8.1)
39
+ jwt (0.1.8)
40
+ multi_json (>= 1.5)
41
+ lockfile (2.1.0)
42
+ minitest (4.7.5)
43
+ multi_json (1.8.2)
44
+ multi_xml (0.5.5)
45
+ multipart-post (1.2.0)
46
+ nokogiri (1.5.10)
47
+ oauth2 (0.9.2)
48
+ faraday (~> 0.8)
49
+ httpauth (~> 0.2)
50
+ jwt (~> 0.1.4)
51
+ multi_json (~> 1.0)
52
+ multi_xml (~> 0.5)
53
+ rack (~> 1.2)
54
+ rack (1.5.2)
55
+ rake (10.1.0)
56
+ rdoc (3.12.2)
57
+ json (~> 1.4)
58
+ rmagick (2.13.2)
59
+ rspec (2.8.0)
60
+ rspec-core (~> 2.8.0)
61
+ rspec-expectations (~> 2.8.0)
62
+ rspec-mocks (~> 2.8.0)
63
+ rspec-core (2.8.0)
64
+ rspec-expectations (2.8.0)
65
+ diff-lcs (~> 1.1.2)
66
+ rspec-mocks (2.8.0)
67
+ shoulda-matchers (2.4.0)
68
+ activesupport (>= 3.0.0)
69
+ simplecov (0.8.1)
70
+ docile (~> 1.1.0)
71
+ lockfile (>= 2.1.0)
72
+ multi_json
73
+ simplecov-html (~> 0.8.0)
74
+ simplecov-html (0.8.0)
75
+ thread_safe (0.1.3)
76
+ atomic
77
+ tzinfo (0.3.38)
78
+
79
+ PLATFORMS
80
+ ruby
81
+
82
+ DEPENDENCIES
83
+ bundler (~> 1.0)
84
+ jeweler (~> 1.8.7)
85
+ rdoc (~> 3.12)
86
+ rmagick
87
+ rspec (~> 2.8.0)
88
+ shoulda-matchers
89
+ simplecov
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010 Danilo Jeremias da Silva
1
+ Copyright (c) 2013 Danilo Jeremias da Silva
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
@@ -7,7 +7,9 @@ Ruby library for working with the Tesseract OCR.
7
7
  To work properly rtesseract are needed:
8
8
  * Tesseract - Program
9
9
  * ImageMagic - Program
10
- * RMagick - Gem
10
+ * RMagick or mini_magick - Gem
11
+
12
+ Atention: Version 1.0.0 works fine with Ruby 2.0 and tesseract 3.0 and lower versions of rtesseract works fine with Ruby 1.8 and tesseract 2.0.4.
11
13
 
12
14
  == EXAMPLE USAGE
13
15
 
@@ -65,4 +67,4 @@ It's very simple to use rtesseract:
65
67
 
66
68
  == Copyright
67
69
 
68
- Copyright (c) 2012 Danilo Jeremias da Silva. See LICENSE for details.
70
+ Copyright (c) 2013 Danilo Jeremias da Silva. See LICENSE for details.
data/Rakefile CHANGED
@@ -1,51 +1,45 @@
1
- require 'rubygems'
2
- require 'rake'
1
+ # encoding: utf-8
3
2
 
3
+ require 'rubygems'
4
+ require 'bundler'
4
5
  begin
5
- require 'jeweler'
6
- Jeweler::Tasks.new do |gem|
7
- gem.name = "rtesseract"
8
- gem.version = '0.0.13'
9
- gem.summary = "Ruby library for working with the Tesseract OCR."
10
- gem.description = "Ruby library for working with the Tesseract OCR."
11
- gem.email = "dannnylo@gmail.com"
12
- gem.homepage = "http://github.com/dannnylo/rtesseract"
13
- gem.authors = ["Danilo Jeremias da Silva"]
14
- gem.add_development_dependency "jeweler", ">=1.4.0"
15
- gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
16
-
17
- gem.add_runtime_dependency "rmagick", '>= 2.10.1'
18
- end
19
- Jeweler::GemcutterTasks.new
20
- rescue LoadError
21
- puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
22
11
  end
12
+ require 'rake'
23
13
 
24
- require 'rake/testtask'
25
- Rake::TestTask.new(:test) do |test|
26
- test.libs << 'lib' << 'test'
27
- test.pattern = 'test/**/test_*.rb'
28
- test.verbose = true
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "rtesseract"
18
+ gem.homepage = "http://github.com/dannnylo/rtesseract"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Ruby library for working with the Tesseract OCR.}
21
+ gem.description = %Q{Ruby library for working with the Tesseract OCR.}
22
+ gem.email = "dannnylo@gmail.com"
23
+ gem.authors = ["Danilo Jeremias da Silva"]
24
+ # dependencies defined in Gemfile
29
25
  end
26
+ Jeweler::RubygemsDotOrgTasks.new
30
27
 
31
- begin
32
- require 'rcov/rcovtask'
33
- Rcov::RcovTask.new do |test|
34
- test.libs << 'test'
35
- test.pattern = 'test/**/test_*.rb'
36
- test.verbose = true
37
- end
38
- rescue LoadError
39
- task :rcov do
40
- abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
41
- end
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
42
32
  end
43
33
 
44
- task :test => :check_dependencies
34
+ desc "Code coverage detail"
35
+ task :simplecov do
36
+ ENV['COVERAGE'] = "true"
37
+ Rake::Task['spec'].execute
38
+ end
45
39
 
46
- task :default => :test
40
+ task :default => :spec
47
41
 
48
- require 'rake/rdoctask'
42
+ require 'rdoc/task'
49
43
  Rake::RDocTask.new do |rdoc|
50
44
  version = File.exist?('VERSION') ? File.read('VERSION') : ""
51
45
 
@@ -54,4 +48,3 @@ Rake::RDocTask.new do |rdoc|
54
48
  rdoc.rdoc_files.include('README*')
55
49
  rdoc.rdoc_files.include('lib/**/*.rb')
56
50
  end
57
-
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.0
@@ -1,23 +1,17 @@
1
1
  require 'mini_magick'
2
2
  module MiniMagickProcessor
3
+ extend self
3
4
  def image_to_tiff
4
- generate_uid
5
- tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}.tif").to_s
6
- cat = @instance || MiniMagick::Image.open(@source.to_s)
5
+ tmp_file = Tempfile.new(["",".tif"])
6
+ cat = @instance || read_with_processor(@source.to_s)
7
7
  cat.format("tif")
8
8
  cat.crop("#{@w}x#{@h}+#{@x}+#{@y}") unless [@x, @y, @w, @h].compact == []
9
9
  cat.write tmp_file.to_s
10
10
  return tmp_file
11
11
  end
12
12
 
13
- def image_from_blob(blob)
14
- generate_uid
15
- tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}.tif").to_s
16
- cat = @instance || MiniMagick::Image.read(blob)
17
- cat.format("tif")
18
- cat.crop("#{@w}x#{@h}+#{@x}+#{@y}") unless [@x, @y, @w, @h].compact == []
19
- cat.write tmp_file.to_s
20
- return tmp_file
13
+ def read_with_processor(path)
14
+ MiniMagick::Image.open(path.to_s)
21
15
  end
22
16
 
23
17
  def is_a_instance?(object)
@@ -1,21 +1,16 @@
1
1
  require "RMagick"
2
2
  module RMagickProcessor
3
+ extend self
3
4
  def image_to_tiff
4
- generate_uid
5
- tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}.tif").to_s
5
+ tmp_file = Tempfile.new(["",".tif"])
6
6
  cat = @instance || Magick::Image.read(@source.to_s).first
7
7
  cat.crop!(@x, @y, @w, @h) unless [@x, @y, @w, @h].compact == []
8
- cat.write tmp_file.to_s
8
+ cat.write tmp_file.path.to_s
9
9
  return tmp_file
10
10
  end
11
11
 
12
- def image_from_blob(blob)
13
- generate_uid
14
- tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}.tif").to_s
15
- cat = @instance || Magick::Image.from_blob(blob).first
16
- cat.crop!(@x, @y, @w, @h) unless [@x, @y, @w, @h].compact == []
17
- cat.write tmp_file.to_s
18
- return tmp_file
12
+ def read_with_processor(path)
13
+ Magick::Image.read(path.to_s).first
19
14
  end
20
15
 
21
16
  def is_a_instance?(object)
@@ -1,19 +1,16 @@
1
1
  require "pathname"
2
2
  require "tempfile"
3
- require 'RMagick'
4
3
 
5
4
  require "rtesseract/errors"
6
5
  require "rtesseract/mixed"
7
6
 
8
7
  class RTesseract
9
- VERSION = '0.0.13'
10
8
  attr_accessor :options
11
9
  attr_writer :lang
12
10
  attr_writer :psm
13
11
  attr_reader :processor
14
12
 
15
13
  def initialize(src = "", options = {})
16
- @uid = options.delete(:uid) || nil
17
14
  @command = options.delete(:command) || default_command
18
15
  @lang = options.delete(:lang) || options.delete("lang") || ""
19
16
  @psm = options.delete(:psm) || options.delete("psm") || nil
@@ -41,7 +38,12 @@ class RTesseract
41
38
 
42
39
  def self.read(src = nil, options = {}, &block)
43
40
  raise RTesseract::ImageNotSelectedError if src == nil
44
- image = Magick::Image.read(src.to_s).first
41
+ processor = options.delete(:processor) || options.delete("processor")
42
+ if processor == "mini_magick"
43
+ image = MiniMagickProcessor.read_with_processor(src.to_s)
44
+ else
45
+ image = RMagickProcessor.read_with_processor(src.to_s)
46
+ end
45
47
  yield image
46
48
  object = RTesseract.new("", options)
47
49
  object.from_blob(image.to_blob)
@@ -67,22 +69,14 @@ class RTesseract
67
69
  #Remove files
68
70
  def remove_file(files=[])
69
71
  files.each do |file|
70
- begin
71
- File.unlink(file) if File.exist?(file)
72
- rescue
73
- system "rm -f #{file}"
74
- end
72
+ file.close
73
+ file.unlink
75
74
  end
76
75
  true
77
76
  rescue
78
77
  raise RTesseract::TempFilesNotRemovedError
79
78
  end
80
79
 
81
- def generate_uid
82
- @uid = rand.to_s[2,10] if @uid.nil?
83
- @uid
84
- end
85
-
86
80
  # Select the language
87
81
  #===Languages
88
82
  ## * eng - English
@@ -139,26 +133,24 @@ class RTesseract
139
133
 
140
134
  #Convert image to string
141
135
  def convert
142
- generate_uid
143
- tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}")
136
+ path = Tempfile.new(["",".txt"]).path.to_s
144
137
  tmp_image = image_to_tiff
145
- `#{@command} "#{tmp_image}" "#{tmp_file.to_s}" #{lang} #{psm} #{config_file} #{clear_console_output}`
146
- @value = File.read("#{tmp_file.to_s}.txt").to_s
147
- @uid = nil
148
- remove_file([tmp_image,"#{tmp_file.to_s}.txt"])
138
+ `#{@command} "#{tmp_image.path}" "#{path.gsub(".txt","")}" #{lang} #{psm} #{config_file} #{clear_console_output}`
139
+ @value = File.read("#{path}").to_s
140
+ remove_file([tmp_image])
149
141
  rescue
150
142
  raise RTesseract::ConversionError
151
143
  end
152
144
 
153
145
  #Read image from memory blob
154
146
  def from_blob(blob)
155
- generate_uid
156
- tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}")
157
- tmp_image = image_from_blob(blob)
158
- `#{@command} "#{tmp_image}" "#{tmp_file.to_s}" #{lang} #{psm} #{config_file} #{clear_console_output}`
159
- @value = File.read("#{tmp_file.to_s}.txt").to_s
160
- @uid = nil
161
- remove_file([tmp_image,"#{tmp_file.to_s}.txt"])
147
+ blob_file = Tempfile.new("blob")
148
+ blob_file.write(blob)
149
+ blob_file.rewind
150
+ blob_file.flush
151
+ self.source = blob_file.path
152
+ convert
153
+ remove_file([blob_file])
162
154
  rescue
163
155
  raise RTesseract::ConversionError
164
156
  end
@@ -25,7 +25,7 @@ class RTesseract
25
25
  def convert
26
26
  @value = ""
27
27
  @areas.each do |area|
28
- image = RTesseract.new(@source.to_s,@options)
28
+ image = RTesseract.new(@source.to_s,@options.dup)
29
29
  image.crop!(area[:x].to_i, area[:y].to_i, area[:width].to_i, area[:height].to_i)
30
30
  @value << image.to_s
31
31
  end
@@ -5,60 +5,77 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "rtesseract"
8
- s.version = "0.0.13"
8
+ s.version = "1.0.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Danilo Jeremias da Silva"]
12
- s.date = "2012-05-03"
12
+ s.date = "2013-11-21"
13
13
  s.description = "Ruby library for working with the Tesseract OCR."
14
14
  s.email = "dannnylo@gmail.com"
15
15
  s.extra_rdoc_files = [
16
- "LICENSE",
16
+ "LICENSE.txt",
17
17
  "README.rdoc"
18
18
  ]
19
19
  s.files = [
20
20
  ".document",
21
- "LICENSE",
21
+ ".rspec",
22
+ "Gemfile",
23
+ "Gemfile.lock",
24
+ "LICENSE.txt",
22
25
  "README.rdoc",
23
26
  "Rakefile",
27
+ "VERSION",
24
28
  "lib/processors/mini_magick.rb",
25
29
  "lib/processors/rmagick.rb",
26
30
  "lib/rtesseract.rb",
27
31
  "lib/rtesseract/errors.rb",
28
32
  "lib/rtesseract/mixed.rb",
29
33
  "rtesseract.gemspec",
30
- "test/helper.rb",
31
- "test/images/mixed.tif",
32
- "test/images/test with spaces.tif",
33
- "test/images/test.bmp",
34
- "test/images/test.jpg",
35
- "test/images/test.png",
36
- "test/images/test.tif",
37
- "test/images/test1.tif",
38
- "test/test_mixed.rb",
39
- "test/test_rtesseract.rb"
34
+ "spec/images/mixed.tif",
35
+ "spec/images/test with spaces.tif",
36
+ "spec/images/test.bmp",
37
+ "spec/images/test.jpg",
38
+ "spec/images/test.png",
39
+ "spec/images/test.tif",
40
+ "spec/images/test1.tif",
41
+ "spec/rtesseract_mixed_spec.rb",
42
+ "spec/rtesseract_spec.rb",
43
+ "spec/spec_helper.rb"
40
44
  ]
41
45
  s.homepage = "http://github.com/dannnylo/rtesseract"
46
+ s.licenses = ["MIT"]
42
47
  s.require_paths = ["lib"]
43
- s.rubygems_version = "1.8.11"
48
+ s.rubygems_version = "2.0.3"
44
49
  s.summary = "Ruby library for working with the Tesseract OCR."
45
50
 
46
51
  if s.respond_to? :specification_version then
47
- s.specification_version = 3
52
+ s.specification_version = 4
48
53
 
49
54
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
50
- s.add_development_dependency(%q<jeweler>, [">= 1.4.0"])
51
- s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
52
- s.add_runtime_dependency(%q<rmagick>, [">= 2.10.1"])
55
+ s.add_runtime_dependency(%q<rmagick>, [">= 0"])
56
+ s.add_development_dependency(%q<rspec>, ["~> 2.8.0"])
57
+ s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
58
+ s.add_development_dependency(%q<bundler>, ["~> 1.0"])
59
+ s.add_development_dependency(%q<jeweler>, ["~> 1.8.7"])
60
+ s.add_development_dependency(%q<simplecov>, [">= 0"])
61
+ s.add_development_dependency(%q<shoulda-matchers>, [">= 0"])
53
62
  else
54
- s.add_dependency(%q<jeweler>, [">= 1.4.0"])
55
- s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
56
- s.add_dependency(%q<rmagick>, [">= 2.10.1"])
63
+ s.add_dependency(%q<rmagick>, [">= 0"])
64
+ s.add_dependency(%q<rspec>, ["~> 2.8.0"])
65
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
66
+ s.add_dependency(%q<bundler>, ["~> 1.0"])
67
+ s.add_dependency(%q<jeweler>, ["~> 1.8.7"])
68
+ s.add_dependency(%q<simplecov>, [">= 0"])
69
+ s.add_dependency(%q<shoulda-matchers>, [">= 0"])
57
70
  end
58
71
  else
59
- s.add_dependency(%q<jeweler>, [">= 1.4.0"])
60
- s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
61
- s.add_dependency(%q<rmagick>, [">= 2.10.1"])
72
+ s.add_dependency(%q<rmagick>, [">= 0"])
73
+ s.add_dependency(%q<rspec>, ["~> 2.8.0"])
74
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
75
+ s.add_dependency(%q<bundler>, ["~> 1.0"])
76
+ s.add_dependency(%q<jeweler>, ["~> 1.8.7"])
77
+ s.add_dependency(%q<simplecov>, [">= 0"])
78
+ s.add_dependency(%q<shoulda-matchers>, [">= 0"])
62
79
  end
63
80
  end
64
81
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -0,0 +1,32 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "Rtesseract::Mixed" do
4
+ before do
5
+ @path = Pathname.new(__FILE__.gsub("rtesseract_mixed_spec.rb","")).expand_path
6
+ @image_tiff = @path.join("images","mixed.tif").to_s
7
+ @image2_tiff = @path.join("images","mixed2.tif").to_s
8
+ end
9
+
10
+ it "should be instantiable" do
11
+ RTesseract::Mixed.new.class.should eql(RTesseract::Mixed)
12
+ RTesseract::Mixed.new(@image_tiff).class.should eql(RTesseract::Mixed)
13
+ end
14
+
15
+ it "should translate parts of the image to text" do
16
+ mix_block = RTesseract::Mixed.new(@image_tiff,{:psm=>7}) do |image|
17
+ image.area(28, 19, 25, 25) #position of 4
18
+ image.area(180, 22, 20, 28) # position of 3
19
+ image.area(218, 22, 24, 28) # position of z
20
+ image.area(248, 24, 22, 22) # position of z
21
+ end
22
+ mix_block.to_s_without_spaces.should eql("43ZZ")
23
+
24
+ mix_block = RTesseract::Mixed.new(@image_tiff,{:areas => [
25
+ {:x => 28, :y=>19, :width=>25, :height=>25 }, #position of 4
26
+ {:x => 180, :y=>22, :width=>20, :height=>28}, # position of 3
27
+ {:x => 218, :y=>22, :width=>24, :height=>28}, # position of z
28
+ {:x => 248, :y=>24, :width=>22, :height=>22} # position of z
29
+ ],:psm=>7})
30
+ mix_block.to_s_without_spaces.should eql("43ZZ")
31
+ end
32
+ end
@@ -0,0 +1,92 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ require 'pathname'
3
+
4
+ describe "Rtesseract" do
5
+ before do
6
+ @path = Pathname.new(__FILE__.gsub("rtesseract_spec.rb","")).expand_path
7
+ @image_tiff = @path.join("images","test.tif").to_s
8
+ end
9
+
10
+ it " be instantiable" do
11
+ RTesseract.new.class.should eql(RTesseract)
12
+ RTesseract.new("").class.should eql(RTesseract)
13
+ RTesseract.new(@image_tiff).class.should eql(RTesseract)
14
+ end
15
+
16
+ it " translate image to text" do
17
+ RTesseract.new(@image_tiff).to_s_without_spaces.should eql("43ZZ")
18
+ RTesseract.new(@path.join("images","test1.tif").to_s).to_s_without_spaces.should eql("V2V4")
19
+ RTesseract.new(@path.join("images","test with spaces.tif").to_s).to_s_without_spaces.should eql("V2V4")
20
+ end
21
+
22
+ it " translate images .png, .jpg, .bmp" do
23
+ RTesseract.new(@path.join("images","test.png").to_s).to_s_without_spaces.should eql("HW9W")
24
+ RTesseract.new(@path.join("images","test.jpg").to_s).to_s_without_spaces.should eql("3R8Z")
25
+ RTesseract.new(@path.join("images","test.bmp").to_s).to_s_without_spaces.should eql("ZLA6")
26
+ end
27
+
28
+ it " change the image" do
29
+ image = RTesseract.new(@image_tiff)
30
+ image.to_s_without_spaces.should eql("43ZZ")
31
+ image.source = @path.join("images","test1.tif").to_s
32
+ image.to_s_without_spaces.should eql("V2V4")
33
+ end
34
+
35
+ it " select the language" do
36
+ #English
37
+ RTesseract.new(@image_tiff,{:lang=>"eng"}).lang.should eql(" -l eng ")
38
+ RTesseract.new(@image_tiff,{:lang=>"en"}).lang.should eql(" -l eng ")
39
+ RTesseract.new(@image_tiff,{:lang=>"en-US"}).lang.should eql(" -l eng ")
40
+ RTesseract.new(@image_tiff,{:lang=>"english"}).lang.should eql(" -l eng ")
41
+
42
+ #Portuguese
43
+ RTesseract.new(@image_tiff,{:lang=>"por"}).lang.should eql(" -l por ")
44
+ RTesseract.new(@image_tiff,{:lang=>"pt-BR"}).lang.should eql(" -l por ")
45
+ RTesseract.new(@image_tiff,{:lang=>"pt-br"}).lang.should eql(" -l por ")
46
+ RTesseract.new(@image_tiff,{:lang=>"pt"}).lang.should eql(" -l por ")
47
+ RTesseract.new(@image_tiff,{:lang=>"portuguese"}).lang.should eql(" -l por ")
48
+
49
+ RTesseract.new(@image_tiff,{:lang=>"eng"}).to_s_without_spaces.should eql("43ZZ")
50
+ RTesseract.new(@image_tiff,{:lang=>"por"}).to_s_without_spaces.should eql("43ZZ")
51
+
52
+ RTesseract.new(@image_tiff,{:lang=>"eng"}).lang.should eql(" -l eng ")
53
+ end
54
+
55
+ it " be configurable" do
56
+ RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0,:display_text=>0}).config.should eql("chop_enable 0\nenable_assoc 0\ndisplay_text 0")
57
+ RTesseract.new(@image_tiff,{:chop_enable=>0}).config.should eql("chop_enable 0")
58
+ RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0}).config.should eql("chop_enable 0\nenable_assoc 0")
59
+ RTesseract.new(@image_tiff,{:chop_enable=>0}).to_s_without_spaces.should eql("43ZZ")
60
+ end
61
+
62
+ it " crop image" do
63
+ RTesseract.new(@image_tiff,{:psm=>7}).crop!(140,10,36,40).to_s_without_spaces.should eql("4")
64
+ RTesseract.new(@image_tiff,{:psm=>7}).crop!(180,10,36,40).to_s_without_spaces.should eql("3")
65
+ RTesseract.new(@image_tiff,{:psm=>7}).crop!(200,10,36,40).to_s_without_spaces.should eql("Z")
66
+ RTesseract.new(@image_tiff,{:psm=>7}).crop!(220,10,30,40).to_s_without_spaces.should eql("Z")
67
+ end
68
+
69
+
70
+ it " read image from blob" do
71
+ image = Magick::Image.read(@path.join("images","test.png").to_s).first
72
+ blob = image.quantize(256,Magick::GRAYColorspace).to_blob
73
+
74
+ test = RTesseract.new("", {:psm => 7})
75
+ test.from_blob(blob)
76
+ test.to_s_without_spaces.should eql("HW9W")
77
+ end
78
+
79
+ it " change image in a block" do
80
+ test = RTesseract.read(@path.join("images","test.png").to_s) do |image|
81
+ image = image.white_threshold(245)
82
+ image = image.quantize(256,Magick::GRAYColorspace)
83
+ end
84
+ test.to_s_without_spaces.should eql("HW9W")
85
+
86
+ test = RTesseract.read(@path.join("images","test.jpg").to_s,{:lang=>'en'}) do |image|
87
+ image = image.white_threshold(245).quantize(256,Magick::GRAYColorspace)
88
+ end
89
+ test.to_s_without_spaces.should eql("3R8Z")
90
+ end
91
+
92
+ end
@@ -0,0 +1,15 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require "simplecov"
5
+ SimpleCov.start do
6
+ add_filter "/spec/"
7
+ end
8
+ require 'rtesseract'
9
+ # Requires supporting files with custom matchers and macros, etc,
10
+ # in ./support/ and its subdirectories.
11
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
12
+
13
+ RSpec.configure do |config|
14
+
15
+ end
metadata CHANGED
@@ -1,130 +1,167 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: rtesseract
3
- version: !ruby/object:Gem::Version
4
- hash: 5
5
- prerelease:
6
- segments:
7
- - 0
8
- - 0
9
- - 13
10
- version: 0.0.13
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
11
5
  platform: ruby
12
- authors:
6
+ authors:
13
7
  - Danilo Jeremias da Silva
14
8
  autorequire:
15
9
  bindir: bin
16
10
  cert_chain: []
17
-
18
- date: 2012-05-03 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
11
+ date: 2013-11-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rmagick
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 2.8.0
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: 2.8.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: rdoc
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '3.12'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '3.12'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '1.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: '1.0'
69
+ - !ruby/object:Gem::Dependency
21
70
  name: jeweler
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ version: 1.8.7
76
+ type: :development
22
77
  prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- hash: 7
29
- segments:
30
- - 1
31
- - 4
32
- - 0
33
- version: 1.4.0
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ~>
81
+ - !ruby/object:Gem::Version
82
+ version: 1.8.7
83
+ - !ruby/object:Gem::Dependency
84
+ name: simplecov
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
34
90
  type: :development
35
- version_requirements: *id001
36
- - !ruby/object:Gem::Dependency
37
- name: thoughtbot-shoulda
38
91
  prerelease: false
39
- requirement: &id002 !ruby/object:Gem::Requirement
40
- none: false
41
- requirements:
42
- - - ">="
43
- - !ruby/object:Gem::Version
44
- hash: 3
45
- segments:
46
- - 0
47
- version: "0"
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: shoulda-matchers
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
48
104
  type: :development
49
- version_requirements: *id002
50
- - !ruby/object:Gem::Dependency
51
- name: rmagick
52
105
  prerelease: false
53
- requirement: &id003 !ruby/object:Gem::Requirement
54
- none: false
55
- requirements:
56
- - - ">="
57
- - !ruby/object:Gem::Version
58
- hash: 37
59
- segments:
60
- - 2
61
- - 10
62
- - 1
63
- version: 2.10.1
64
- type: :runtime
65
- version_requirements: *id003
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
66
111
  description: Ruby library for working with the Tesseract OCR.
67
112
  email: dannnylo@gmail.com
68
113
  executables: []
69
-
70
114
  extensions: []
71
-
72
- extra_rdoc_files:
73
- - LICENSE
115
+ extra_rdoc_files:
116
+ - LICENSE.txt
74
117
  - README.rdoc
75
- files:
118
+ files:
76
119
  - .document
77
- - LICENSE
120
+ - .rspec
121
+ - Gemfile
122
+ - Gemfile.lock
123
+ - LICENSE.txt
78
124
  - README.rdoc
79
125
  - Rakefile
126
+ - VERSION
80
127
  - lib/processors/mini_magick.rb
81
128
  - lib/processors/rmagick.rb
82
129
  - lib/rtesseract.rb
83
130
  - lib/rtesseract/errors.rb
84
131
  - lib/rtesseract/mixed.rb
85
132
  - rtesseract.gemspec
86
- - test/helper.rb
87
- - test/images/mixed.tif
88
- - test/images/test with spaces.tif
89
- - test/images/test.bmp
90
- - test/images/test.jpg
91
- - test/images/test.png
92
- - test/images/test.tif
93
- - test/images/test1.tif
94
- - test/test_mixed.rb
95
- - test/test_rtesseract.rb
133
+ - spec/images/mixed.tif
134
+ - spec/images/test with spaces.tif
135
+ - spec/images/test.bmp
136
+ - spec/images/test.jpg
137
+ - spec/images/test.png
138
+ - spec/images/test.tif
139
+ - spec/images/test1.tif
140
+ - spec/rtesseract_mixed_spec.rb
141
+ - spec/rtesseract_spec.rb
142
+ - spec/spec_helper.rb
96
143
  homepage: http://github.com/dannnylo/rtesseract
97
- licenses: []
98
-
144
+ licenses:
145
+ - MIT
146
+ metadata: {}
99
147
  post_install_message:
100
148
  rdoc_options: []
101
-
102
- require_paths:
149
+ require_paths:
103
150
  - lib
104
- required_ruby_version: !ruby/object:Gem::Requirement
105
- none: false
106
- requirements:
107
- - - ">="
108
- - !ruby/object:Gem::Version
109
- hash: 3
110
- segments:
111
- - 0
112
- version: "0"
113
- required_rubygems_version: !ruby/object:Gem::Requirement
114
- none: false
115
- requirements:
116
- - - ">="
117
- - !ruby/object:Gem::Version
118
- hash: 3
119
- segments:
120
- - 0
121
- version: "0"
151
+ required_ruby_version: !ruby/object:Gem::Requirement
152
+ requirements:
153
+ - - '>='
154
+ - !ruby/object:Gem::Version
155
+ version: '0'
156
+ required_rubygems_version: !ruby/object:Gem::Requirement
157
+ requirements:
158
+ - - '>='
159
+ - !ruby/object:Gem::Version
160
+ version: '0'
122
161
  requirements: []
123
-
124
162
  rubyforge_project:
125
- rubygems_version: 1.8.11
163
+ rubygems_version: 2.0.3
126
164
  signing_key:
127
- specification_version: 3
165
+ specification_version: 4
128
166
  summary: Ruby library for working with the Tesseract OCR.
129
167
  test_files: []
130
-
@@ -1,11 +0,0 @@
1
- require 'rubygems'
2
- require 'test/unit'
3
- require 'shoulda'
4
-
5
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
- $LOAD_PATH.unshift(File.dirname(__FILE__))
7
- require 'rtesseract'
8
-
9
- class Test::Unit::TestCase
10
- end
11
-
@@ -1,36 +0,0 @@
1
- require 'helper'
2
- require 'pathname'
3
-
4
- class TestMixed < Test::Unit::TestCase
5
- context "Path" do
6
- setup do
7
- @path = Pathname.new(__FILE__.gsub("test_mixed.rb","")).expand_path
8
- @image_tiff = @path.join("images","mixed.tif").to_s
9
- @image2_tiff = @path.join("images","mixed2.tif").to_s
10
- end
11
-
12
- should "be instantiable" do
13
- assert_equal RTesseract::Mixed.new.class , RTesseract::Mixed
14
- assert_equal RTesseract::Mixed.new(@image_tiff).class , RTesseract::Mixed
15
- end
16
-
17
- should "translate parts of the image to text" do
18
- mix_block = RTesseract::Mixed.new(@image_tiff) do |image|
19
- image.area(28, 19, 25, 25) #position of 4
20
- image.area(180, 22, 20, 28) # position of 3
21
- image.area(218, 22, 24, 28) # position of z
22
- image.area(248, 24, 22, 22) # position of z
23
- end
24
- assert_equal mix_block.to_s_without_spaces , "43ZZ"
25
-
26
- mix_block = RTesseract::Mixed.new(@image_tiff,{:areas => [
27
- {:x => 28, :y=>19, :width=>25, :height=>25 }, #position of 4
28
- {:x => 180, :y=>22, :width=>20, :height=>28}, # position of 3
29
- {:x => 218, :y=>22, :width=>24, :height=>28}, # position of z
30
- {:x => 248, :y=>24, :width=>22, :height=>22} # position of z
31
- ]})
32
- assert_equal mix_block.to_s_without_spaces , "43ZZ"
33
- end
34
- end
35
- end
36
-
@@ -1,104 +0,0 @@
1
- require 'helper'
2
- require 'pathname'
3
-
4
- class TestRtesseract < Test::Unit::TestCase
5
- context "Path" do
6
- setup do
7
- @path = Pathname.new(__FILE__.gsub("test_rtesseract.rb","")).expand_path
8
- @image_tiff = @path.join("images","test.tif").to_s
9
- end
10
-
11
- should "be instantiable" do
12
- assert_equal RTesseract.new.class , RTesseract
13
- assert_equal RTesseract.new("").class , RTesseract
14
- assert_equal RTesseract.new(@image_tiff).class , RTesseract
15
- end
16
-
17
- should "translate image to text" do
18
- assert_equal RTesseract.new(@image_tiff).to_s_without_spaces , "43ZZ"
19
- assert_equal RTesseract.new(@path.join("images","test1.tif").to_s).to_s_without_spaces , "V2V4"
20
- assert_equal RTesseract.new(@path.join("images","test with spaces.tif").to_s).to_s_without_spaces , "V2V4"
21
- end
22
-
23
- should "translate images .png, .jpg, .bmp" do
24
- assert_equal RTesseract.new(@path.join("images","test.png").to_s).to_s_without_spaces , "HW9W"
25
- assert_equal RTesseract.new(@path.join("images","test.jpg").to_s).to_s_without_spaces , "3R8Z"
26
- assert_equal RTesseract.new(@path.join("images","test.bmp").to_s).to_s_without_spaces , "ZLA6"
27
- end
28
-
29
- should "change the image" do
30
- image = RTesseract.new(@image_tiff)
31
- assert_equal image.to_s_without_spaces,"43ZZ"
32
- image.source = @path.join("images","test1.tif").to_s
33
- assert_equal image.to_s_without_spaces,"V2V4"
34
- end
35
-
36
- should "select the language" do
37
- #English
38
- assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).lang , " -l eng "
39
- assert_equal RTesseract.new(@image_tiff,{:lang=>"en"}).lang , " -l eng "
40
- assert_equal RTesseract.new(@image_tiff,{:lang=>"en-US"}).lang , " -l eng "
41
- assert_equal RTesseract.new(@image_tiff,{:lang=>"english"}).lang , " -l eng "
42
-
43
- #Portuguese
44
- assert_equal RTesseract.new(@image_tiff,{:lang=>"por"}).lang , " -l por "
45
- assert_equal RTesseract.new(@image_tiff,{:lang=>"pt-BR"}).lang , " -l por "
46
- assert_equal RTesseract.new(@image_tiff,{:lang=>"pt-br"}).lang , " -l por "
47
- assert_equal RTesseract.new(@image_tiff,{:lang=>"pt"}).lang , " -l por "
48
- assert_equal RTesseract.new(@image_tiff,{:lang=>"portuguese"}).lang , " -l por "
49
-
50
- assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).to_s_without_spaces , "43ZZ"
51
- assert_equal RTesseract.new(@image_tiff,{:lang=>"por"}).to_s_without_spaces , "43ZZ"
52
-
53
- assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).lang , " -l eng "
54
- end
55
-
56
- should "be configurable" do
57
- assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0,:display_text=>0}).config , "chop_enable 0\nenable_assoc 0\ndisplay_text 0"
58
- assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0}).config , "chop_enable 0"
59
- assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0}).config , "chop_enable 0\nenable_assoc 0"
60
- assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0}).to_s_without_spaces , "43ZZ"
61
- end
62
-
63
- should "crop image" do
64
- assert_equal RTesseract.new(@image_tiff).crop!(140,10,36,40).to_s_without_spaces, "4"
65
- assert_equal RTesseract.new(@image_tiff).crop!(180,10,36,40).to_s_without_spaces, "3"
66
- assert_equal RTesseract.new(@image_tiff).crop!(200,10,36,40).to_s_without_spaces, "Z"
67
- assert_equal RTesseract.new(@image_tiff).crop!(220,10,30,40).to_s_without_spaces, "Z"
68
- end
69
-
70
- should "unique uid" do
71
- assert_not_equal RTesseract.new(@image_tiff).generate_uid , RTesseract.new(@image_tiff).generate_uid
72
- end
73
-
74
- should "generate a unique id" do
75
- reg = RTesseract.new(@image_tiff)
76
- assert_equal reg.generate_uid , reg.generate_uid
77
- value = reg.generate_uid
78
- reg.convert
79
- assert_not_equal value , reg.generate_uid
80
- end
81
-
82
- should "read image from blob" do
83
- image = Magick::Image.read(@path.join("images","test.png").to_s).first
84
- blob = image.white_threshold(245).quantize(256,Magick::GRAYColorspace).to_blob
85
-
86
- test = RTesseract.new
87
- test.from_blob(blob)
88
- assert_equal test.to_s_without_spaces , "HW9W"
89
- end
90
-
91
- should "change image in a block" do
92
- test = RTesseract.read(@path.join("images","test.png").to_s) do |image|
93
- image = image.white_threshold(245)
94
- image = image.quantize(256,Magick::GRAYColorspace)
95
- end
96
- assert_equal test.to_s_without_spaces , "HW9W"
97
-
98
- test = RTesseract.read(@path.join("images","test.jpg").to_s,{:lang=>'en'}) do |image|
99
- image = image.white_threshold(245).quantize(256,Magick::GRAYColorspace)
100
- end
101
- assert_equal test.to_s_without_spaces , "3R8Z"
102
- end
103
- end
104
- end