rtesseract 0.0.13 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5e3171bd39fe1fc4519084f2ee0783000f4b3165
4
+ data.tar.gz: afa7e1efa0522d498d3586c345803a183fb266b7
5
+ SHA512:
6
+ metadata.gz: aa1e4371da76ca048dfa84ede35ee4432361b39f40ef61ad896441f76b310c4c2771797dce86cb34e5c49d3094d514e9a03bea23371df8d6e8d308623ff01ebb
7
+ data.tar.gz: a22a40faf3866b3d446999b30417c5fe3e481715dcd724c51b0cfa3dca5c01d919049f406b181493b6a60c232cf0c9dfda2c866e1895d286424264af60efa581
data/.document CHANGED
@@ -1,5 +1,5 @@
1
- README.rdoc
2
1
  lib/**/*.rb
3
2
  bin/*
3
+ -
4
4
  features/**/*.feature
5
- LICENSE
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+ gem "rmagick"
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "rspec", "~> 2.8.0"
10
+ gem "rdoc", "~> 3.12"
11
+ gem "bundler", "~> 1.0"
12
+ gem "jeweler", "~> 1.8.7"
13
+ gem "simplecov", ">= 0"
14
+ gem 'shoulda-matchers'
15
+ end
@@ -0,0 +1,89 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ activesupport (4.0.1)
5
+ i18n (~> 0.6, >= 0.6.4)
6
+ minitest (~> 4.2)
7
+ multi_json (~> 1.3)
8
+ thread_safe (~> 0.1)
9
+ tzinfo (~> 0.3.37)
10
+ addressable (2.3.5)
11
+ atomic (1.1.14)
12
+ builder (3.2.2)
13
+ diff-lcs (1.1.3)
14
+ docile (1.1.0)
15
+ faraday (0.8.8)
16
+ multipart-post (~> 1.2.0)
17
+ git (1.2.6)
18
+ github_api (0.10.1)
19
+ addressable
20
+ faraday (~> 0.8.1)
21
+ hashie (>= 1.2)
22
+ multi_json (~> 1.4)
23
+ nokogiri (~> 1.5.2)
24
+ oauth2
25
+ hashie (2.0.5)
26
+ highline (1.6.20)
27
+ httpauth (0.2.0)
28
+ i18n (0.6.5)
29
+ jeweler (1.8.8)
30
+ builder
31
+ bundler (~> 1.0)
32
+ git (>= 1.2.5)
33
+ github_api (= 0.10.1)
34
+ highline (>= 1.6.15)
35
+ nokogiri (= 1.5.10)
36
+ rake
37
+ rdoc
38
+ json (1.8.1)
39
+ jwt (0.1.8)
40
+ multi_json (>= 1.5)
41
+ lockfile (2.1.0)
42
+ minitest (4.7.5)
43
+ multi_json (1.8.2)
44
+ multi_xml (0.5.5)
45
+ multipart-post (1.2.0)
46
+ nokogiri (1.5.10)
47
+ oauth2 (0.9.2)
48
+ faraday (~> 0.8)
49
+ httpauth (~> 0.2)
50
+ jwt (~> 0.1.4)
51
+ multi_json (~> 1.0)
52
+ multi_xml (~> 0.5)
53
+ rack (~> 1.2)
54
+ rack (1.5.2)
55
+ rake (10.1.0)
56
+ rdoc (3.12.2)
57
+ json (~> 1.4)
58
+ rmagick (2.13.2)
59
+ rspec (2.8.0)
60
+ rspec-core (~> 2.8.0)
61
+ rspec-expectations (~> 2.8.0)
62
+ rspec-mocks (~> 2.8.0)
63
+ rspec-core (2.8.0)
64
+ rspec-expectations (2.8.0)
65
+ diff-lcs (~> 1.1.2)
66
+ rspec-mocks (2.8.0)
67
+ shoulda-matchers (2.4.0)
68
+ activesupport (>= 3.0.0)
69
+ simplecov (0.8.1)
70
+ docile (~> 1.1.0)
71
+ lockfile (>= 2.1.0)
72
+ multi_json
73
+ simplecov-html (~> 0.8.0)
74
+ simplecov-html (0.8.0)
75
+ thread_safe (0.1.3)
76
+ atomic
77
+ tzinfo (0.3.38)
78
+
79
+ PLATFORMS
80
+ ruby
81
+
82
+ DEPENDENCIES
83
+ bundler (~> 1.0)
84
+ jeweler (~> 1.8.7)
85
+ rdoc (~> 3.12)
86
+ rmagick
87
+ rspec (~> 2.8.0)
88
+ shoulda-matchers
89
+ simplecov
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010 Danilo Jeremias da Silva
1
+ Copyright (c) 2013 Danilo Jeremias da Silva
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
@@ -7,7 +7,9 @@ Ruby library for working with the Tesseract OCR.
7
7
  To work properly rtesseract are needed:
8
8
  * Tesseract - Program
9
9
  * ImageMagic - Program
10
- * RMagick - Gem
10
+ * RMagick or mini_magick - Gem
11
+
12
+ Atention: Version 1.0.0 works fine with Ruby 2.0 and tesseract 3.0 and lower versions of rtesseract works fine with Ruby 1.8 and tesseract 2.0.4.
11
13
 
12
14
  == EXAMPLE USAGE
13
15
 
@@ -65,4 +67,4 @@ It's very simple to use rtesseract:
65
67
 
66
68
  == Copyright
67
69
 
68
- Copyright (c) 2012 Danilo Jeremias da Silva. See LICENSE for details.
70
+ Copyright (c) 2013 Danilo Jeremias da Silva. See LICENSE for details.
data/Rakefile CHANGED
@@ -1,51 +1,45 @@
1
- require 'rubygems'
2
- require 'rake'
1
+ # encoding: utf-8
3
2
 
3
+ require 'rubygems'
4
+ require 'bundler'
4
5
  begin
5
- require 'jeweler'
6
- Jeweler::Tasks.new do |gem|
7
- gem.name = "rtesseract"
8
- gem.version = '0.0.13'
9
- gem.summary = "Ruby library for working with the Tesseract OCR."
10
- gem.description = "Ruby library for working with the Tesseract OCR."
11
- gem.email = "dannnylo@gmail.com"
12
- gem.homepage = "http://github.com/dannnylo/rtesseract"
13
- gem.authors = ["Danilo Jeremias da Silva"]
14
- gem.add_development_dependency "jeweler", ">=1.4.0"
15
- gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
16
-
17
- gem.add_runtime_dependency "rmagick", '>= 2.10.1'
18
- end
19
- Jeweler::GemcutterTasks.new
20
- rescue LoadError
21
- puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
22
11
  end
12
+ require 'rake'
23
13
 
24
- require 'rake/testtask'
25
- Rake::TestTask.new(:test) do |test|
26
- test.libs << 'lib' << 'test'
27
- test.pattern = 'test/**/test_*.rb'
28
- test.verbose = true
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "rtesseract"
18
+ gem.homepage = "http://github.com/dannnylo/rtesseract"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Ruby library for working with the Tesseract OCR.}
21
+ gem.description = %Q{Ruby library for working with the Tesseract OCR.}
22
+ gem.email = "dannnylo@gmail.com"
23
+ gem.authors = ["Danilo Jeremias da Silva"]
24
+ # dependencies defined in Gemfile
29
25
  end
26
+ Jeweler::RubygemsDotOrgTasks.new
30
27
 
31
- begin
32
- require 'rcov/rcovtask'
33
- Rcov::RcovTask.new do |test|
34
- test.libs << 'test'
35
- test.pattern = 'test/**/test_*.rb'
36
- test.verbose = true
37
- end
38
- rescue LoadError
39
- task :rcov do
40
- abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
41
- end
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
42
32
  end
43
33
 
44
- task :test => :check_dependencies
34
+ desc "Code coverage detail"
35
+ task :simplecov do
36
+ ENV['COVERAGE'] = "true"
37
+ Rake::Task['spec'].execute
38
+ end
45
39
 
46
- task :default => :test
40
+ task :default => :spec
47
41
 
48
- require 'rake/rdoctask'
42
+ require 'rdoc/task'
49
43
  Rake::RDocTask.new do |rdoc|
50
44
  version = File.exist?('VERSION') ? File.read('VERSION') : ""
51
45
 
@@ -54,4 +48,3 @@ Rake::RDocTask.new do |rdoc|
54
48
  rdoc.rdoc_files.include('README*')
55
49
  rdoc.rdoc_files.include('lib/**/*.rb')
56
50
  end
57
-
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.0
@@ -1,23 +1,17 @@
1
1
  require 'mini_magick'
2
2
  module MiniMagickProcessor
3
+ extend self
3
4
  def image_to_tiff
4
- generate_uid
5
- tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}.tif").to_s
6
- cat = @instance || MiniMagick::Image.open(@source.to_s)
5
+ tmp_file = Tempfile.new(["",".tif"])
6
+ cat = @instance || read_with_processor(@source.to_s)
7
7
  cat.format("tif")
8
8
  cat.crop("#{@w}x#{@h}+#{@x}+#{@y}") unless [@x, @y, @w, @h].compact == []
9
9
  cat.write tmp_file.to_s
10
10
  return tmp_file
11
11
  end
12
12
 
13
- def image_from_blob(blob)
14
- generate_uid
15
- tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}.tif").to_s
16
- cat = @instance || MiniMagick::Image.read(blob)
17
- cat.format("tif")
18
- cat.crop("#{@w}x#{@h}+#{@x}+#{@y}") unless [@x, @y, @w, @h].compact == []
19
- cat.write tmp_file.to_s
20
- return tmp_file
13
+ def read_with_processor(path)
14
+ MiniMagick::Image.open(path.to_s)
21
15
  end
22
16
 
23
17
  def is_a_instance?(object)
@@ -1,21 +1,16 @@
1
1
  require "RMagick"
2
2
  module RMagickProcessor
3
+ extend self
3
4
  def image_to_tiff
4
- generate_uid
5
- tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}.tif").to_s
5
+ tmp_file = Tempfile.new(["",".tif"])
6
6
  cat = @instance || Magick::Image.read(@source.to_s).first
7
7
  cat.crop!(@x, @y, @w, @h) unless [@x, @y, @w, @h].compact == []
8
- cat.write tmp_file.to_s
8
+ cat.write tmp_file.path.to_s
9
9
  return tmp_file
10
10
  end
11
11
 
12
- def image_from_blob(blob)
13
- generate_uid
14
- tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}.tif").to_s
15
- cat = @instance || Magick::Image.from_blob(blob).first
16
- cat.crop!(@x, @y, @w, @h) unless [@x, @y, @w, @h].compact == []
17
- cat.write tmp_file.to_s
18
- return tmp_file
12
+ def read_with_processor(path)
13
+ Magick::Image.read(path.to_s).first
19
14
  end
20
15
 
21
16
  def is_a_instance?(object)
@@ -1,19 +1,16 @@
1
1
  require "pathname"
2
2
  require "tempfile"
3
- require 'RMagick'
4
3
 
5
4
  require "rtesseract/errors"
6
5
  require "rtesseract/mixed"
7
6
 
8
7
  class RTesseract
9
- VERSION = '0.0.13'
10
8
  attr_accessor :options
11
9
  attr_writer :lang
12
10
  attr_writer :psm
13
11
  attr_reader :processor
14
12
 
15
13
  def initialize(src = "", options = {})
16
- @uid = options.delete(:uid) || nil
17
14
  @command = options.delete(:command) || default_command
18
15
  @lang = options.delete(:lang) || options.delete("lang") || ""
19
16
  @psm = options.delete(:psm) || options.delete("psm") || nil
@@ -41,7 +38,12 @@ class RTesseract
41
38
 
42
39
  def self.read(src = nil, options = {}, &block)
43
40
  raise RTesseract::ImageNotSelectedError if src == nil
44
- image = Magick::Image.read(src.to_s).first
41
+ processor = options.delete(:processor) || options.delete("processor")
42
+ if processor == "mini_magick"
43
+ image = MiniMagickProcessor.read_with_processor(src.to_s)
44
+ else
45
+ image = RMagickProcessor.read_with_processor(src.to_s)
46
+ end
45
47
  yield image
46
48
  object = RTesseract.new("", options)
47
49
  object.from_blob(image.to_blob)
@@ -67,22 +69,14 @@ class RTesseract
67
69
  #Remove files
68
70
  def remove_file(files=[])
69
71
  files.each do |file|
70
- begin
71
- File.unlink(file) if File.exist?(file)
72
- rescue
73
- system "rm -f #{file}"
74
- end
72
+ file.close
73
+ file.unlink
75
74
  end
76
75
  true
77
76
  rescue
78
77
  raise RTesseract::TempFilesNotRemovedError
79
78
  end
80
79
 
81
- def generate_uid
82
- @uid = rand.to_s[2,10] if @uid.nil?
83
- @uid
84
- end
85
-
86
80
  # Select the language
87
81
  #===Languages
88
82
  ## * eng - English
@@ -139,26 +133,24 @@ class RTesseract
139
133
 
140
134
  #Convert image to string
141
135
  def convert
142
- generate_uid
143
- tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}")
136
+ path = Tempfile.new(["",".txt"]).path.to_s
144
137
  tmp_image = image_to_tiff
145
- `#{@command} "#{tmp_image}" "#{tmp_file.to_s}" #{lang} #{psm} #{config_file} #{clear_console_output}`
146
- @value = File.read("#{tmp_file.to_s}.txt").to_s
147
- @uid = nil
148
- remove_file([tmp_image,"#{tmp_file.to_s}.txt"])
138
+ `#{@command} "#{tmp_image.path}" "#{path.gsub(".txt","")}" #{lang} #{psm} #{config_file} #{clear_console_output}`
139
+ @value = File.read("#{path}").to_s
140
+ remove_file([tmp_image])
149
141
  rescue
150
142
  raise RTesseract::ConversionError
151
143
  end
152
144
 
153
145
  #Read image from memory blob
154
146
  def from_blob(blob)
155
- generate_uid
156
- tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}")
157
- tmp_image = image_from_blob(blob)
158
- `#{@command} "#{tmp_image}" "#{tmp_file.to_s}" #{lang} #{psm} #{config_file} #{clear_console_output}`
159
- @value = File.read("#{tmp_file.to_s}.txt").to_s
160
- @uid = nil
161
- remove_file([tmp_image,"#{tmp_file.to_s}.txt"])
147
+ blob_file = Tempfile.new("blob")
148
+ blob_file.write(blob)
149
+ blob_file.rewind
150
+ blob_file.flush
151
+ self.source = blob_file.path
152
+ convert
153
+ remove_file([blob_file])
162
154
  rescue
163
155
  raise RTesseract::ConversionError
164
156
  end
@@ -25,7 +25,7 @@ class RTesseract
25
25
  def convert
26
26
  @value = ""
27
27
  @areas.each do |area|
28
- image = RTesseract.new(@source.to_s,@options)
28
+ image = RTesseract.new(@source.to_s,@options.dup)
29
29
  image.crop!(area[:x].to_i, area[:y].to_i, area[:width].to_i, area[:height].to_i)
30
30
  @value << image.to_s
31
31
  end
@@ -5,60 +5,77 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "rtesseract"
8
- s.version = "0.0.13"
8
+ s.version = "1.0.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Danilo Jeremias da Silva"]
12
- s.date = "2012-05-03"
12
+ s.date = "2013-11-21"
13
13
  s.description = "Ruby library for working with the Tesseract OCR."
14
14
  s.email = "dannnylo@gmail.com"
15
15
  s.extra_rdoc_files = [
16
- "LICENSE",
16
+ "LICENSE.txt",
17
17
  "README.rdoc"
18
18
  ]
19
19
  s.files = [
20
20
  ".document",
21
- "LICENSE",
21
+ ".rspec",
22
+ "Gemfile",
23
+ "Gemfile.lock",
24
+ "LICENSE.txt",
22
25
  "README.rdoc",
23
26
  "Rakefile",
27
+ "VERSION",
24
28
  "lib/processors/mini_magick.rb",
25
29
  "lib/processors/rmagick.rb",
26
30
  "lib/rtesseract.rb",
27
31
  "lib/rtesseract/errors.rb",
28
32
  "lib/rtesseract/mixed.rb",
29
33
  "rtesseract.gemspec",
30
- "test/helper.rb",
31
- "test/images/mixed.tif",
32
- "test/images/test with spaces.tif",
33
- "test/images/test.bmp",
34
- "test/images/test.jpg",
35
- "test/images/test.png",
36
- "test/images/test.tif",
37
- "test/images/test1.tif",
38
- "test/test_mixed.rb",
39
- "test/test_rtesseract.rb"
34
+ "spec/images/mixed.tif",
35
+ "spec/images/test with spaces.tif",
36
+ "spec/images/test.bmp",
37
+ "spec/images/test.jpg",
38
+ "spec/images/test.png",
39
+ "spec/images/test.tif",
40
+ "spec/images/test1.tif",
41
+ "spec/rtesseract_mixed_spec.rb",
42
+ "spec/rtesseract_spec.rb",
43
+ "spec/spec_helper.rb"
40
44
  ]
41
45
  s.homepage = "http://github.com/dannnylo/rtesseract"
46
+ s.licenses = ["MIT"]
42
47
  s.require_paths = ["lib"]
43
- s.rubygems_version = "1.8.11"
48
+ s.rubygems_version = "2.0.3"
44
49
  s.summary = "Ruby library for working with the Tesseract OCR."
45
50
 
46
51
  if s.respond_to? :specification_version then
47
- s.specification_version = 3
52
+ s.specification_version = 4
48
53
 
49
54
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
50
- s.add_development_dependency(%q<jeweler>, [">= 1.4.0"])
51
- s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
52
- s.add_runtime_dependency(%q<rmagick>, [">= 2.10.1"])
55
+ s.add_runtime_dependency(%q<rmagick>, [">= 0"])
56
+ s.add_development_dependency(%q<rspec>, ["~> 2.8.0"])
57
+ s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
58
+ s.add_development_dependency(%q<bundler>, ["~> 1.0"])
59
+ s.add_development_dependency(%q<jeweler>, ["~> 1.8.7"])
60
+ s.add_development_dependency(%q<simplecov>, [">= 0"])
61
+ s.add_development_dependency(%q<shoulda-matchers>, [">= 0"])
53
62
  else
54
- s.add_dependency(%q<jeweler>, [">= 1.4.0"])
55
- s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
56
- s.add_dependency(%q<rmagick>, [">= 2.10.1"])
63
+ s.add_dependency(%q<rmagick>, [">= 0"])
64
+ s.add_dependency(%q<rspec>, ["~> 2.8.0"])
65
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
66
+ s.add_dependency(%q<bundler>, ["~> 1.0"])
67
+ s.add_dependency(%q<jeweler>, ["~> 1.8.7"])
68
+ s.add_dependency(%q<simplecov>, [">= 0"])
69
+ s.add_dependency(%q<shoulda-matchers>, [">= 0"])
57
70
  end
58
71
  else
59
- s.add_dependency(%q<jeweler>, [">= 1.4.0"])
60
- s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
61
- s.add_dependency(%q<rmagick>, [">= 2.10.1"])
72
+ s.add_dependency(%q<rmagick>, [">= 0"])
73
+ s.add_dependency(%q<rspec>, ["~> 2.8.0"])
74
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
75
+ s.add_dependency(%q<bundler>, ["~> 1.0"])
76
+ s.add_dependency(%q<jeweler>, ["~> 1.8.7"])
77
+ s.add_dependency(%q<simplecov>, [">= 0"])
78
+ s.add_dependency(%q<shoulda-matchers>, [">= 0"])
62
79
  end
63
80
  end
64
81
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -0,0 +1,32 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "Rtesseract::Mixed" do
4
+ before do
5
+ @path = Pathname.new(__FILE__.gsub("rtesseract_mixed_spec.rb","")).expand_path
6
+ @image_tiff = @path.join("images","mixed.tif").to_s
7
+ @image2_tiff = @path.join("images","mixed2.tif").to_s
8
+ end
9
+
10
+ it "should be instantiable" do
11
+ RTesseract::Mixed.new.class.should eql(RTesseract::Mixed)
12
+ RTesseract::Mixed.new(@image_tiff).class.should eql(RTesseract::Mixed)
13
+ end
14
+
15
+ it "should translate parts of the image to text" do
16
+ mix_block = RTesseract::Mixed.new(@image_tiff,{:psm=>7}) do |image|
17
+ image.area(28, 19, 25, 25) #position of 4
18
+ image.area(180, 22, 20, 28) # position of 3
19
+ image.area(218, 22, 24, 28) # position of z
20
+ image.area(248, 24, 22, 22) # position of z
21
+ end
22
+ mix_block.to_s_without_spaces.should eql("43ZZ")
23
+
24
+ mix_block = RTesseract::Mixed.new(@image_tiff,{:areas => [
25
+ {:x => 28, :y=>19, :width=>25, :height=>25 }, #position of 4
26
+ {:x => 180, :y=>22, :width=>20, :height=>28}, # position of 3
27
+ {:x => 218, :y=>22, :width=>24, :height=>28}, # position of z
28
+ {:x => 248, :y=>24, :width=>22, :height=>22} # position of z
29
+ ],:psm=>7})
30
+ mix_block.to_s_without_spaces.should eql("43ZZ")
31
+ end
32
+ end
@@ -0,0 +1,92 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ require 'pathname'
3
+
4
+ describe "Rtesseract" do
5
+ before do
6
+ @path = Pathname.new(__FILE__.gsub("rtesseract_spec.rb","")).expand_path
7
+ @image_tiff = @path.join("images","test.tif").to_s
8
+ end
9
+
10
+ it " be instantiable" do
11
+ RTesseract.new.class.should eql(RTesseract)
12
+ RTesseract.new("").class.should eql(RTesseract)
13
+ RTesseract.new(@image_tiff).class.should eql(RTesseract)
14
+ end
15
+
16
+ it " translate image to text" do
17
+ RTesseract.new(@image_tiff).to_s_without_spaces.should eql("43ZZ")
18
+ RTesseract.new(@path.join("images","test1.tif").to_s).to_s_without_spaces.should eql("V2V4")
19
+ RTesseract.new(@path.join("images","test with spaces.tif").to_s).to_s_without_spaces.should eql("V2V4")
20
+ end
21
+
22
+ it " translate images .png, .jpg, .bmp" do
23
+ RTesseract.new(@path.join("images","test.png").to_s).to_s_without_spaces.should eql("HW9W")
24
+ RTesseract.new(@path.join("images","test.jpg").to_s).to_s_without_spaces.should eql("3R8Z")
25
+ RTesseract.new(@path.join("images","test.bmp").to_s).to_s_without_spaces.should eql("ZLA6")
26
+ end
27
+
28
+ it " change the image" do
29
+ image = RTesseract.new(@image_tiff)
30
+ image.to_s_without_spaces.should eql("43ZZ")
31
+ image.source = @path.join("images","test1.tif").to_s
32
+ image.to_s_without_spaces.should eql("V2V4")
33
+ end
34
+
35
+ it " select the language" do
36
+ #English
37
+ RTesseract.new(@image_tiff,{:lang=>"eng"}).lang.should eql(" -l eng ")
38
+ RTesseract.new(@image_tiff,{:lang=>"en"}).lang.should eql(" -l eng ")
39
+ RTesseract.new(@image_tiff,{:lang=>"en-US"}).lang.should eql(" -l eng ")
40
+ RTesseract.new(@image_tiff,{:lang=>"english"}).lang.should eql(" -l eng ")
41
+
42
+ #Portuguese
43
+ RTesseract.new(@image_tiff,{:lang=>"por"}).lang.should eql(" -l por ")
44
+ RTesseract.new(@image_tiff,{:lang=>"pt-BR"}).lang.should eql(" -l por ")
45
+ RTesseract.new(@image_tiff,{:lang=>"pt-br"}).lang.should eql(" -l por ")
46
+ RTesseract.new(@image_tiff,{:lang=>"pt"}).lang.should eql(" -l por ")
47
+ RTesseract.new(@image_tiff,{:lang=>"portuguese"}).lang.should eql(" -l por ")
48
+
49
+ RTesseract.new(@image_tiff,{:lang=>"eng"}).to_s_without_spaces.should eql("43ZZ")
50
+ RTesseract.new(@image_tiff,{:lang=>"por"}).to_s_without_spaces.should eql("43ZZ")
51
+
52
+ RTesseract.new(@image_tiff,{:lang=>"eng"}).lang.should eql(" -l eng ")
53
+ end
54
+
55
+ it " be configurable" do
56
+ RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0,:display_text=>0}).config.should eql("chop_enable 0\nenable_assoc 0\ndisplay_text 0")
57
+ RTesseract.new(@image_tiff,{:chop_enable=>0}).config.should eql("chop_enable 0")
58
+ RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0}).config.should eql("chop_enable 0\nenable_assoc 0")
59
+ RTesseract.new(@image_tiff,{:chop_enable=>0}).to_s_without_spaces.should eql("43ZZ")
60
+ end
61
+
62
+ it " crop image" do
63
+ RTesseract.new(@image_tiff,{:psm=>7}).crop!(140,10,36,40).to_s_without_spaces.should eql("4")
64
+ RTesseract.new(@image_tiff,{:psm=>7}).crop!(180,10,36,40).to_s_without_spaces.should eql("3")
65
+ RTesseract.new(@image_tiff,{:psm=>7}).crop!(200,10,36,40).to_s_without_spaces.should eql("Z")
66
+ RTesseract.new(@image_tiff,{:psm=>7}).crop!(220,10,30,40).to_s_without_spaces.should eql("Z")
67
+ end
68
+
69
+
70
+ it " read image from blob" do
71
+ image = Magick::Image.read(@path.join("images","test.png").to_s).first
72
+ blob = image.quantize(256,Magick::GRAYColorspace).to_blob
73
+
74
+ test = RTesseract.new("", {:psm => 7})
75
+ test.from_blob(blob)
76
+ test.to_s_without_spaces.should eql("HW9W")
77
+ end
78
+
79
+ it " change image in a block" do
80
+ test = RTesseract.read(@path.join("images","test.png").to_s) do |image|
81
+ image = image.white_threshold(245)
82
+ image = image.quantize(256,Magick::GRAYColorspace)
83
+ end
84
+ test.to_s_without_spaces.should eql("HW9W")
85
+
86
+ test = RTesseract.read(@path.join("images","test.jpg").to_s,{:lang=>'en'}) do |image|
87
+ image = image.white_threshold(245).quantize(256,Magick::GRAYColorspace)
88
+ end
89
+ test.to_s_without_spaces.should eql("3R8Z")
90
+ end
91
+
92
+ end
@@ -0,0 +1,15 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require "simplecov"
5
+ SimpleCov.start do
6
+ add_filter "/spec/"
7
+ end
8
+ require 'rtesseract'
9
+ # Requires supporting files with custom matchers and macros, etc,
10
+ # in ./support/ and its subdirectories.
11
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
12
+
13
+ RSpec.configure do |config|
14
+
15
+ end
metadata CHANGED
@@ -1,130 +1,167 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: rtesseract
3
- version: !ruby/object:Gem::Version
4
- hash: 5
5
- prerelease:
6
- segments:
7
- - 0
8
- - 0
9
- - 13
10
- version: 0.0.13
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
11
5
  platform: ruby
12
- authors:
6
+ authors:
13
7
  - Danilo Jeremias da Silva
14
8
  autorequire:
15
9
  bindir: bin
16
10
  cert_chain: []
17
-
18
- date: 2012-05-03 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
11
+ date: 2013-11-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rmagick
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 2.8.0
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: 2.8.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: rdoc
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '3.12'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '3.12'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '1.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: '1.0'
69
+ - !ruby/object:Gem::Dependency
21
70
  name: jeweler
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ version: 1.8.7
76
+ type: :development
22
77
  prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- hash: 7
29
- segments:
30
- - 1
31
- - 4
32
- - 0
33
- version: 1.4.0
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ~>
81
+ - !ruby/object:Gem::Version
82
+ version: 1.8.7
83
+ - !ruby/object:Gem::Dependency
84
+ name: simplecov
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
34
90
  type: :development
35
- version_requirements: *id001
36
- - !ruby/object:Gem::Dependency
37
- name: thoughtbot-shoulda
38
91
  prerelease: false
39
- requirement: &id002 !ruby/object:Gem::Requirement
40
- none: false
41
- requirements:
42
- - - ">="
43
- - !ruby/object:Gem::Version
44
- hash: 3
45
- segments:
46
- - 0
47
- version: "0"
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: shoulda-matchers
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
48
104
  type: :development
49
- version_requirements: *id002
50
- - !ruby/object:Gem::Dependency
51
- name: rmagick
52
105
  prerelease: false
53
- requirement: &id003 !ruby/object:Gem::Requirement
54
- none: false
55
- requirements:
56
- - - ">="
57
- - !ruby/object:Gem::Version
58
- hash: 37
59
- segments:
60
- - 2
61
- - 10
62
- - 1
63
- version: 2.10.1
64
- type: :runtime
65
- version_requirements: *id003
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
66
111
  description: Ruby library for working with the Tesseract OCR.
67
112
  email: dannnylo@gmail.com
68
113
  executables: []
69
-
70
114
  extensions: []
71
-
72
- extra_rdoc_files:
73
- - LICENSE
115
+ extra_rdoc_files:
116
+ - LICENSE.txt
74
117
  - README.rdoc
75
- files:
118
+ files:
76
119
  - .document
77
- - LICENSE
120
+ - .rspec
121
+ - Gemfile
122
+ - Gemfile.lock
123
+ - LICENSE.txt
78
124
  - README.rdoc
79
125
  - Rakefile
126
+ - VERSION
80
127
  - lib/processors/mini_magick.rb
81
128
  - lib/processors/rmagick.rb
82
129
  - lib/rtesseract.rb
83
130
  - lib/rtesseract/errors.rb
84
131
  - lib/rtesseract/mixed.rb
85
132
  - rtesseract.gemspec
86
- - test/helper.rb
87
- - test/images/mixed.tif
88
- - test/images/test with spaces.tif
89
- - test/images/test.bmp
90
- - test/images/test.jpg
91
- - test/images/test.png
92
- - test/images/test.tif
93
- - test/images/test1.tif
94
- - test/test_mixed.rb
95
- - test/test_rtesseract.rb
133
+ - spec/images/mixed.tif
134
+ - spec/images/test with spaces.tif
135
+ - spec/images/test.bmp
136
+ - spec/images/test.jpg
137
+ - spec/images/test.png
138
+ - spec/images/test.tif
139
+ - spec/images/test1.tif
140
+ - spec/rtesseract_mixed_spec.rb
141
+ - spec/rtesseract_spec.rb
142
+ - spec/spec_helper.rb
96
143
  homepage: http://github.com/dannnylo/rtesseract
97
- licenses: []
98
-
144
+ licenses:
145
+ - MIT
146
+ metadata: {}
99
147
  post_install_message:
100
148
  rdoc_options: []
101
-
102
- require_paths:
149
+ require_paths:
103
150
  - lib
104
- required_ruby_version: !ruby/object:Gem::Requirement
105
- none: false
106
- requirements:
107
- - - ">="
108
- - !ruby/object:Gem::Version
109
- hash: 3
110
- segments:
111
- - 0
112
- version: "0"
113
- required_rubygems_version: !ruby/object:Gem::Requirement
114
- none: false
115
- requirements:
116
- - - ">="
117
- - !ruby/object:Gem::Version
118
- hash: 3
119
- segments:
120
- - 0
121
- version: "0"
151
+ required_ruby_version: !ruby/object:Gem::Requirement
152
+ requirements:
153
+ - - '>='
154
+ - !ruby/object:Gem::Version
155
+ version: '0'
156
+ required_rubygems_version: !ruby/object:Gem::Requirement
157
+ requirements:
158
+ - - '>='
159
+ - !ruby/object:Gem::Version
160
+ version: '0'
122
161
  requirements: []
123
-
124
162
  rubyforge_project:
125
- rubygems_version: 1.8.11
163
+ rubygems_version: 2.0.3
126
164
  signing_key:
127
- specification_version: 3
165
+ specification_version: 4
128
166
  summary: Ruby library for working with the Tesseract OCR.
129
167
  test_files: []
130
-
@@ -1,11 +0,0 @@
1
- require 'rubygems'
2
- require 'test/unit'
3
- require 'shoulda'
4
-
5
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
- $LOAD_PATH.unshift(File.dirname(__FILE__))
7
- require 'rtesseract'
8
-
9
- class Test::Unit::TestCase
10
- end
11
-
@@ -1,36 +0,0 @@
1
- require 'helper'
2
- require 'pathname'
3
-
4
- class TestMixed < Test::Unit::TestCase
5
- context "Path" do
6
- setup do
7
- @path = Pathname.new(__FILE__.gsub("test_mixed.rb","")).expand_path
8
- @image_tiff = @path.join("images","mixed.tif").to_s
9
- @image2_tiff = @path.join("images","mixed2.tif").to_s
10
- end
11
-
12
- should "be instantiable" do
13
- assert_equal RTesseract::Mixed.new.class , RTesseract::Mixed
14
- assert_equal RTesseract::Mixed.new(@image_tiff).class , RTesseract::Mixed
15
- end
16
-
17
- should "translate parts of the image to text" do
18
- mix_block = RTesseract::Mixed.new(@image_tiff) do |image|
19
- image.area(28, 19, 25, 25) #position of 4
20
- image.area(180, 22, 20, 28) # position of 3
21
- image.area(218, 22, 24, 28) # position of z
22
- image.area(248, 24, 22, 22) # position of z
23
- end
24
- assert_equal mix_block.to_s_without_spaces , "43ZZ"
25
-
26
- mix_block = RTesseract::Mixed.new(@image_tiff,{:areas => [
27
- {:x => 28, :y=>19, :width=>25, :height=>25 }, #position of 4
28
- {:x => 180, :y=>22, :width=>20, :height=>28}, # position of 3
29
- {:x => 218, :y=>22, :width=>24, :height=>28}, # position of z
30
- {:x => 248, :y=>24, :width=>22, :height=>22} # position of z
31
- ]})
32
- assert_equal mix_block.to_s_without_spaces , "43ZZ"
33
- end
34
- end
35
- end
36
-
@@ -1,104 +0,0 @@
1
- require 'helper'
2
- require 'pathname'
3
-
4
- class TestRtesseract < Test::Unit::TestCase
5
- context "Path" do
6
- setup do
7
- @path = Pathname.new(__FILE__.gsub("test_rtesseract.rb","")).expand_path
8
- @image_tiff = @path.join("images","test.tif").to_s
9
- end
10
-
11
- should "be instantiable" do
12
- assert_equal RTesseract.new.class , RTesseract
13
- assert_equal RTesseract.new("").class , RTesseract
14
- assert_equal RTesseract.new(@image_tiff).class , RTesseract
15
- end
16
-
17
- should "translate image to text" do
18
- assert_equal RTesseract.new(@image_tiff).to_s_without_spaces , "43ZZ"
19
- assert_equal RTesseract.new(@path.join("images","test1.tif").to_s).to_s_without_spaces , "V2V4"
20
- assert_equal RTesseract.new(@path.join("images","test with spaces.tif").to_s).to_s_without_spaces , "V2V4"
21
- end
22
-
23
- should "translate images .png, .jpg, .bmp" do
24
- assert_equal RTesseract.new(@path.join("images","test.png").to_s).to_s_without_spaces , "HW9W"
25
- assert_equal RTesseract.new(@path.join("images","test.jpg").to_s).to_s_without_spaces , "3R8Z"
26
- assert_equal RTesseract.new(@path.join("images","test.bmp").to_s).to_s_without_spaces , "ZLA6"
27
- end
28
-
29
- should "change the image" do
30
- image = RTesseract.new(@image_tiff)
31
- assert_equal image.to_s_without_spaces,"43ZZ"
32
- image.source = @path.join("images","test1.tif").to_s
33
- assert_equal image.to_s_without_spaces,"V2V4"
34
- end
35
-
36
- should "select the language" do
37
- #English
38
- assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).lang , " -l eng "
39
- assert_equal RTesseract.new(@image_tiff,{:lang=>"en"}).lang , " -l eng "
40
- assert_equal RTesseract.new(@image_tiff,{:lang=>"en-US"}).lang , " -l eng "
41
- assert_equal RTesseract.new(@image_tiff,{:lang=>"english"}).lang , " -l eng "
42
-
43
- #Portuguese
44
- assert_equal RTesseract.new(@image_tiff,{:lang=>"por"}).lang , " -l por "
45
- assert_equal RTesseract.new(@image_tiff,{:lang=>"pt-BR"}).lang , " -l por "
46
- assert_equal RTesseract.new(@image_tiff,{:lang=>"pt-br"}).lang , " -l por "
47
- assert_equal RTesseract.new(@image_tiff,{:lang=>"pt"}).lang , " -l por "
48
- assert_equal RTesseract.new(@image_tiff,{:lang=>"portuguese"}).lang , " -l por "
49
-
50
- assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).to_s_without_spaces , "43ZZ"
51
- assert_equal RTesseract.new(@image_tiff,{:lang=>"por"}).to_s_without_spaces , "43ZZ"
52
-
53
- assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).lang , " -l eng "
54
- end
55
-
56
- should "be configurable" do
57
- assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0,:display_text=>0}).config , "chop_enable 0\nenable_assoc 0\ndisplay_text 0"
58
- assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0}).config , "chop_enable 0"
59
- assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0}).config , "chop_enable 0\nenable_assoc 0"
60
- assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0}).to_s_without_spaces , "43ZZ"
61
- end
62
-
63
- should "crop image" do
64
- assert_equal RTesseract.new(@image_tiff).crop!(140,10,36,40).to_s_without_spaces, "4"
65
- assert_equal RTesseract.new(@image_tiff).crop!(180,10,36,40).to_s_without_spaces, "3"
66
- assert_equal RTesseract.new(@image_tiff).crop!(200,10,36,40).to_s_without_spaces, "Z"
67
- assert_equal RTesseract.new(@image_tiff).crop!(220,10,30,40).to_s_without_spaces, "Z"
68
- end
69
-
70
- should "unique uid" do
71
- assert_not_equal RTesseract.new(@image_tiff).generate_uid , RTesseract.new(@image_tiff).generate_uid
72
- end
73
-
74
- should "generate a unique id" do
75
- reg = RTesseract.new(@image_tiff)
76
- assert_equal reg.generate_uid , reg.generate_uid
77
- value = reg.generate_uid
78
- reg.convert
79
- assert_not_equal value , reg.generate_uid
80
- end
81
-
82
- should "read image from blob" do
83
- image = Magick::Image.read(@path.join("images","test.png").to_s).first
84
- blob = image.white_threshold(245).quantize(256,Magick::GRAYColorspace).to_blob
85
-
86
- test = RTesseract.new
87
- test.from_blob(blob)
88
- assert_equal test.to_s_without_spaces , "HW9W"
89
- end
90
-
91
- should "change image in a block" do
92
- test = RTesseract.read(@path.join("images","test.png").to_s) do |image|
93
- image = image.white_threshold(245)
94
- image = image.quantize(256,Magick::GRAYColorspace)
95
- end
96
- assert_equal test.to_s_without_spaces , "HW9W"
97
-
98
- test = RTesseract.read(@path.join("images","test.jpg").to_s,{:lang=>'en'}) do |image|
99
- image = image.white_threshold(245).quantize(256,Magick::GRAYColorspace)
100
- end
101
- assert_equal test.to_s_without_spaces , "3R8Z"
102
- end
103
- end
104
- end