rtesseract 0.0.13 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +2 -2
- data/.rspec +1 -0
- data/Gemfile +15 -0
- data/Gemfile.lock +89 -0
- data/{LICENSE → LICENSE.txt} +1 -1
- data/README.rdoc +4 -2
- data/Rakefile +32 -39
- data/VERSION +1 -0
- data/lib/processors/mini_magick.rb +5 -11
- data/lib/processors/rmagick.rb +5 -10
- data/lib/rtesseract.rb +19 -27
- data/lib/rtesseract/mixed.rb +1 -1
- data/rtesseract.gemspec +42 -25
- data/{test → spec}/images/mixed.tif +0 -0
- data/test/images/test with spaces.tif b/data/spec/images/test with → spaces.tif +0 -0
- data/{test → spec}/images/test.bmp +0 -0
- data/{test → spec}/images/test.jpg +0 -0
- data/{test → spec}/images/test.png +0 -0
- data/{test → spec}/images/test.tif +0 -0
- data/{test → spec}/images/test1.tif +0 -0
- data/spec/rtesseract_mixed_spec.rb +32 -0
- data/spec/rtesseract_spec.rb +92 -0
- data/spec/spec_helper.rb +15 -0
- metadata +132 -95
- data/test/helper.rb +0 -11
- data/test/test_mixed.rb +0 -36
- data/test/test_rtesseract.rb +0 -104
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 5e3171bd39fe1fc4519084f2ee0783000f4b3165
|
|
4
|
+
data.tar.gz: afa7e1efa0522d498d3586c345803a183fb266b7
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: aa1e4371da76ca048dfa84ede35ee4432361b39f40ef61ad896441f76b310c4c2771797dce86cb34e5c49d3094d514e9a03bea23371df8d6e8d308623ff01ebb
|
|
7
|
+
data.tar.gz: a22a40faf3866b3d446999b30417c5fe3e481715dcd724c51b0cfa3dca5c01d919049f406b181493b6a60c232cf0c9dfda2c866e1895d286424264af60efa581
|
data/.document
CHANGED
data/.rspec
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
--color
|
data/Gemfile
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
source "http://rubygems.org"
|
|
2
|
+
# Add dependencies required to use your gem here.
|
|
3
|
+
# Example:
|
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
|
5
|
+
gem "rmagick"
|
|
6
|
+
# Add dependencies to develop your gem here.
|
|
7
|
+
# Include everything needed to run rake, tests, features, etc.
|
|
8
|
+
group :development do
|
|
9
|
+
gem "rspec", "~> 2.8.0"
|
|
10
|
+
gem "rdoc", "~> 3.12"
|
|
11
|
+
gem "bundler", "~> 1.0"
|
|
12
|
+
gem "jeweler", "~> 1.8.7"
|
|
13
|
+
gem "simplecov", ">= 0"
|
|
14
|
+
gem 'shoulda-matchers'
|
|
15
|
+
end
|
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
GEM
|
|
2
|
+
remote: http://rubygems.org/
|
|
3
|
+
specs:
|
|
4
|
+
activesupport (4.0.1)
|
|
5
|
+
i18n (~> 0.6, >= 0.6.4)
|
|
6
|
+
minitest (~> 4.2)
|
|
7
|
+
multi_json (~> 1.3)
|
|
8
|
+
thread_safe (~> 0.1)
|
|
9
|
+
tzinfo (~> 0.3.37)
|
|
10
|
+
addressable (2.3.5)
|
|
11
|
+
atomic (1.1.14)
|
|
12
|
+
builder (3.2.2)
|
|
13
|
+
diff-lcs (1.1.3)
|
|
14
|
+
docile (1.1.0)
|
|
15
|
+
faraday (0.8.8)
|
|
16
|
+
multipart-post (~> 1.2.0)
|
|
17
|
+
git (1.2.6)
|
|
18
|
+
github_api (0.10.1)
|
|
19
|
+
addressable
|
|
20
|
+
faraday (~> 0.8.1)
|
|
21
|
+
hashie (>= 1.2)
|
|
22
|
+
multi_json (~> 1.4)
|
|
23
|
+
nokogiri (~> 1.5.2)
|
|
24
|
+
oauth2
|
|
25
|
+
hashie (2.0.5)
|
|
26
|
+
highline (1.6.20)
|
|
27
|
+
httpauth (0.2.0)
|
|
28
|
+
i18n (0.6.5)
|
|
29
|
+
jeweler (1.8.8)
|
|
30
|
+
builder
|
|
31
|
+
bundler (~> 1.0)
|
|
32
|
+
git (>= 1.2.5)
|
|
33
|
+
github_api (= 0.10.1)
|
|
34
|
+
highline (>= 1.6.15)
|
|
35
|
+
nokogiri (= 1.5.10)
|
|
36
|
+
rake
|
|
37
|
+
rdoc
|
|
38
|
+
json (1.8.1)
|
|
39
|
+
jwt (0.1.8)
|
|
40
|
+
multi_json (>= 1.5)
|
|
41
|
+
lockfile (2.1.0)
|
|
42
|
+
minitest (4.7.5)
|
|
43
|
+
multi_json (1.8.2)
|
|
44
|
+
multi_xml (0.5.5)
|
|
45
|
+
multipart-post (1.2.0)
|
|
46
|
+
nokogiri (1.5.10)
|
|
47
|
+
oauth2 (0.9.2)
|
|
48
|
+
faraday (~> 0.8)
|
|
49
|
+
httpauth (~> 0.2)
|
|
50
|
+
jwt (~> 0.1.4)
|
|
51
|
+
multi_json (~> 1.0)
|
|
52
|
+
multi_xml (~> 0.5)
|
|
53
|
+
rack (~> 1.2)
|
|
54
|
+
rack (1.5.2)
|
|
55
|
+
rake (10.1.0)
|
|
56
|
+
rdoc (3.12.2)
|
|
57
|
+
json (~> 1.4)
|
|
58
|
+
rmagick (2.13.2)
|
|
59
|
+
rspec (2.8.0)
|
|
60
|
+
rspec-core (~> 2.8.0)
|
|
61
|
+
rspec-expectations (~> 2.8.0)
|
|
62
|
+
rspec-mocks (~> 2.8.0)
|
|
63
|
+
rspec-core (2.8.0)
|
|
64
|
+
rspec-expectations (2.8.0)
|
|
65
|
+
diff-lcs (~> 1.1.2)
|
|
66
|
+
rspec-mocks (2.8.0)
|
|
67
|
+
shoulda-matchers (2.4.0)
|
|
68
|
+
activesupport (>= 3.0.0)
|
|
69
|
+
simplecov (0.8.1)
|
|
70
|
+
docile (~> 1.1.0)
|
|
71
|
+
lockfile (>= 2.1.0)
|
|
72
|
+
multi_json
|
|
73
|
+
simplecov-html (~> 0.8.0)
|
|
74
|
+
simplecov-html (0.8.0)
|
|
75
|
+
thread_safe (0.1.3)
|
|
76
|
+
atomic
|
|
77
|
+
tzinfo (0.3.38)
|
|
78
|
+
|
|
79
|
+
PLATFORMS
|
|
80
|
+
ruby
|
|
81
|
+
|
|
82
|
+
DEPENDENCIES
|
|
83
|
+
bundler (~> 1.0)
|
|
84
|
+
jeweler (~> 1.8.7)
|
|
85
|
+
rdoc (~> 3.12)
|
|
86
|
+
rmagick
|
|
87
|
+
rspec (~> 2.8.0)
|
|
88
|
+
shoulda-matchers
|
|
89
|
+
simplecov
|
data/{LICENSE → LICENSE.txt}
RENAMED
data/README.rdoc
CHANGED
|
@@ -7,7 +7,9 @@ Ruby library for working with the Tesseract OCR.
|
|
|
7
7
|
To work properly rtesseract are needed:
|
|
8
8
|
* Tesseract - Program
|
|
9
9
|
* ImageMagic - Program
|
|
10
|
-
* RMagick - Gem
|
|
10
|
+
* RMagick or mini_magick - Gem
|
|
11
|
+
|
|
12
|
+
Atention: Version 1.0.0 works fine with Ruby 2.0 and tesseract 3.0 and lower versions of rtesseract works fine with Ruby 1.8 and tesseract 2.0.4.
|
|
11
13
|
|
|
12
14
|
== EXAMPLE USAGE
|
|
13
15
|
|
|
@@ -65,4 +67,4 @@ It's very simple to use rtesseract:
|
|
|
65
67
|
|
|
66
68
|
== Copyright
|
|
67
69
|
|
|
68
|
-
Copyright (c)
|
|
70
|
+
Copyright (c) 2013 Danilo Jeremias da Silva. See LICENSE for details.
|
data/Rakefile
CHANGED
|
@@ -1,51 +1,45 @@
|
|
|
1
|
-
|
|
2
|
-
require 'rake'
|
|
1
|
+
# encoding: utf-8
|
|
3
2
|
|
|
3
|
+
require 'rubygems'
|
|
4
|
+
require 'bundler'
|
|
4
5
|
begin
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
gem.description = "Ruby library for working with the Tesseract OCR."
|
|
11
|
-
gem.email = "dannnylo@gmail.com"
|
|
12
|
-
gem.homepage = "http://github.com/dannnylo/rtesseract"
|
|
13
|
-
gem.authors = ["Danilo Jeremias da Silva"]
|
|
14
|
-
gem.add_development_dependency "jeweler", ">=1.4.0"
|
|
15
|
-
gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
|
|
16
|
-
|
|
17
|
-
gem.add_runtime_dependency "rmagick", '>= 2.10.1'
|
|
18
|
-
end
|
|
19
|
-
Jeweler::GemcutterTasks.new
|
|
20
|
-
rescue LoadError
|
|
21
|
-
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
|
6
|
+
Bundler.setup(:default, :development)
|
|
7
|
+
rescue Bundler::BundlerError => e
|
|
8
|
+
$stderr.puts e.message
|
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
|
10
|
+
exit e.status_code
|
|
22
11
|
end
|
|
12
|
+
require 'rake'
|
|
23
13
|
|
|
24
|
-
require '
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
14
|
+
require 'jeweler'
|
|
15
|
+
Jeweler::Tasks.new do |gem|
|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
|
17
|
+
gem.name = "rtesseract"
|
|
18
|
+
gem.homepage = "http://github.com/dannnylo/rtesseract"
|
|
19
|
+
gem.license = "MIT"
|
|
20
|
+
gem.summary = %Q{Ruby library for working with the Tesseract OCR.}
|
|
21
|
+
gem.description = %Q{Ruby library for working with the Tesseract OCR.}
|
|
22
|
+
gem.email = "dannnylo@gmail.com"
|
|
23
|
+
gem.authors = ["Danilo Jeremias da Silva"]
|
|
24
|
+
# dependencies defined in Gemfile
|
|
29
25
|
end
|
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
|
30
27
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
test.pattern = 'test/**/test_*.rb'
|
|
36
|
-
test.verbose = true
|
|
37
|
-
end
|
|
38
|
-
rescue LoadError
|
|
39
|
-
task :rcov do
|
|
40
|
-
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
|
41
|
-
end
|
|
28
|
+
require 'rspec/core'
|
|
29
|
+
require 'rspec/core/rake_task'
|
|
30
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
|
31
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
|
42
32
|
end
|
|
43
33
|
|
|
44
|
-
|
|
34
|
+
desc "Code coverage detail"
|
|
35
|
+
task :simplecov do
|
|
36
|
+
ENV['COVERAGE'] = "true"
|
|
37
|
+
Rake::Task['spec'].execute
|
|
38
|
+
end
|
|
45
39
|
|
|
46
|
-
task :default => :
|
|
40
|
+
task :default => :spec
|
|
47
41
|
|
|
48
|
-
require '
|
|
42
|
+
require 'rdoc/task'
|
|
49
43
|
Rake::RDocTask.new do |rdoc|
|
|
50
44
|
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
|
51
45
|
|
|
@@ -54,4 +48,3 @@ Rake::RDocTask.new do |rdoc|
|
|
|
54
48
|
rdoc.rdoc_files.include('README*')
|
|
55
49
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
|
56
50
|
end
|
|
57
|
-
|
data/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
1.0.0
|
|
@@ -1,23 +1,17 @@
|
|
|
1
1
|
require 'mini_magick'
|
|
2
2
|
module MiniMagickProcessor
|
|
3
|
+
extend self
|
|
3
4
|
def image_to_tiff
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
cat = @instance || MiniMagick::Image.open(@source.to_s)
|
|
5
|
+
tmp_file = Tempfile.new(["",".tif"])
|
|
6
|
+
cat = @instance || read_with_processor(@source.to_s)
|
|
7
7
|
cat.format("tif")
|
|
8
8
|
cat.crop("#{@w}x#{@h}+#{@x}+#{@y}") unless [@x, @y, @w, @h].compact == []
|
|
9
9
|
cat.write tmp_file.to_s
|
|
10
10
|
return tmp_file
|
|
11
11
|
end
|
|
12
12
|
|
|
13
|
-
def
|
|
14
|
-
|
|
15
|
-
tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}.tif").to_s
|
|
16
|
-
cat = @instance || MiniMagick::Image.read(blob)
|
|
17
|
-
cat.format("tif")
|
|
18
|
-
cat.crop("#{@w}x#{@h}+#{@x}+#{@y}") unless [@x, @y, @w, @h].compact == []
|
|
19
|
-
cat.write tmp_file.to_s
|
|
20
|
-
return tmp_file
|
|
13
|
+
def read_with_processor(path)
|
|
14
|
+
MiniMagick::Image.open(path.to_s)
|
|
21
15
|
end
|
|
22
16
|
|
|
23
17
|
def is_a_instance?(object)
|
data/lib/processors/rmagick.rb
CHANGED
|
@@ -1,21 +1,16 @@
|
|
|
1
1
|
require "RMagick"
|
|
2
2
|
module RMagickProcessor
|
|
3
|
+
extend self
|
|
3
4
|
def image_to_tiff
|
|
4
|
-
|
|
5
|
-
tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}.tif").to_s
|
|
5
|
+
tmp_file = Tempfile.new(["",".tif"])
|
|
6
6
|
cat = @instance || Magick::Image.read(@source.to_s).first
|
|
7
7
|
cat.crop!(@x, @y, @w, @h) unless [@x, @y, @w, @h].compact == []
|
|
8
|
-
cat.write tmp_file.to_s
|
|
8
|
+
cat.write tmp_file.path.to_s
|
|
9
9
|
return tmp_file
|
|
10
10
|
end
|
|
11
11
|
|
|
12
|
-
def
|
|
13
|
-
|
|
14
|
-
tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}.tif").to_s
|
|
15
|
-
cat = @instance || Magick::Image.from_blob(blob).first
|
|
16
|
-
cat.crop!(@x, @y, @w, @h) unless [@x, @y, @w, @h].compact == []
|
|
17
|
-
cat.write tmp_file.to_s
|
|
18
|
-
return tmp_file
|
|
12
|
+
def read_with_processor(path)
|
|
13
|
+
Magick::Image.read(path.to_s).first
|
|
19
14
|
end
|
|
20
15
|
|
|
21
16
|
def is_a_instance?(object)
|
data/lib/rtesseract.rb
CHANGED
|
@@ -1,19 +1,16 @@
|
|
|
1
1
|
require "pathname"
|
|
2
2
|
require "tempfile"
|
|
3
|
-
require 'RMagick'
|
|
4
3
|
|
|
5
4
|
require "rtesseract/errors"
|
|
6
5
|
require "rtesseract/mixed"
|
|
7
6
|
|
|
8
7
|
class RTesseract
|
|
9
|
-
VERSION = '0.0.13'
|
|
10
8
|
attr_accessor :options
|
|
11
9
|
attr_writer :lang
|
|
12
10
|
attr_writer :psm
|
|
13
11
|
attr_reader :processor
|
|
14
12
|
|
|
15
13
|
def initialize(src = "", options = {})
|
|
16
|
-
@uid = options.delete(:uid) || nil
|
|
17
14
|
@command = options.delete(:command) || default_command
|
|
18
15
|
@lang = options.delete(:lang) || options.delete("lang") || ""
|
|
19
16
|
@psm = options.delete(:psm) || options.delete("psm") || nil
|
|
@@ -41,7 +38,12 @@ class RTesseract
|
|
|
41
38
|
|
|
42
39
|
def self.read(src = nil, options = {}, &block)
|
|
43
40
|
raise RTesseract::ImageNotSelectedError if src == nil
|
|
44
|
-
|
|
41
|
+
processor = options.delete(:processor) || options.delete("processor")
|
|
42
|
+
if processor == "mini_magick"
|
|
43
|
+
image = MiniMagickProcessor.read_with_processor(src.to_s)
|
|
44
|
+
else
|
|
45
|
+
image = RMagickProcessor.read_with_processor(src.to_s)
|
|
46
|
+
end
|
|
45
47
|
yield image
|
|
46
48
|
object = RTesseract.new("", options)
|
|
47
49
|
object.from_blob(image.to_blob)
|
|
@@ -67,22 +69,14 @@ class RTesseract
|
|
|
67
69
|
#Remove files
|
|
68
70
|
def remove_file(files=[])
|
|
69
71
|
files.each do |file|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
rescue
|
|
73
|
-
system "rm -f #{file}"
|
|
74
|
-
end
|
|
72
|
+
file.close
|
|
73
|
+
file.unlink
|
|
75
74
|
end
|
|
76
75
|
true
|
|
77
76
|
rescue
|
|
78
77
|
raise RTesseract::TempFilesNotRemovedError
|
|
79
78
|
end
|
|
80
79
|
|
|
81
|
-
def generate_uid
|
|
82
|
-
@uid = rand.to_s[2,10] if @uid.nil?
|
|
83
|
-
@uid
|
|
84
|
-
end
|
|
85
|
-
|
|
86
80
|
# Select the language
|
|
87
81
|
#===Languages
|
|
88
82
|
## * eng - English
|
|
@@ -139,26 +133,24 @@ class RTesseract
|
|
|
139
133
|
|
|
140
134
|
#Convert image to string
|
|
141
135
|
def convert
|
|
142
|
-
|
|
143
|
-
tmp_file = Pathname.new(Dir::tmpdir).join("#{@uid}_#{@source.basename}")
|
|
136
|
+
path = Tempfile.new(["",".txt"]).path.to_s
|
|
144
137
|
tmp_image = image_to_tiff
|
|
145
|
-
`#{@command} "#{tmp_image}" "#{
|
|
146
|
-
@value = File.read("#{
|
|
147
|
-
|
|
148
|
-
remove_file([tmp_image,"#{tmp_file.to_s}.txt"])
|
|
138
|
+
`#{@command} "#{tmp_image.path}" "#{path.gsub(".txt","")}" #{lang} #{psm} #{config_file} #{clear_console_output}`
|
|
139
|
+
@value = File.read("#{path}").to_s
|
|
140
|
+
remove_file([tmp_image])
|
|
149
141
|
rescue
|
|
150
142
|
raise RTesseract::ConversionError
|
|
151
143
|
end
|
|
152
144
|
|
|
153
145
|
#Read image from memory blob
|
|
154
146
|
def from_blob(blob)
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
remove_file([
|
|
147
|
+
blob_file = Tempfile.new("blob")
|
|
148
|
+
blob_file.write(blob)
|
|
149
|
+
blob_file.rewind
|
|
150
|
+
blob_file.flush
|
|
151
|
+
self.source = blob_file.path
|
|
152
|
+
convert
|
|
153
|
+
remove_file([blob_file])
|
|
162
154
|
rescue
|
|
163
155
|
raise RTesseract::ConversionError
|
|
164
156
|
end
|
data/lib/rtesseract/mixed.rb
CHANGED
|
@@ -25,7 +25,7 @@ class RTesseract
|
|
|
25
25
|
def convert
|
|
26
26
|
@value = ""
|
|
27
27
|
@areas.each do |area|
|
|
28
|
-
image = RTesseract.new(@source.to_s,@options)
|
|
28
|
+
image = RTesseract.new(@source.to_s,@options.dup)
|
|
29
29
|
image.crop!(area[:x].to_i, area[:y].to_i, area[:width].to_i, area[:height].to_i)
|
|
30
30
|
@value << image.to_s
|
|
31
31
|
end
|
data/rtesseract.gemspec
CHANGED
|
@@ -5,60 +5,77 @@
|
|
|
5
5
|
|
|
6
6
|
Gem::Specification.new do |s|
|
|
7
7
|
s.name = "rtesseract"
|
|
8
|
-
s.version = "0.0
|
|
8
|
+
s.version = "1.0.0"
|
|
9
9
|
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
11
|
s.authors = ["Danilo Jeremias da Silva"]
|
|
12
|
-
s.date = "
|
|
12
|
+
s.date = "2013-11-21"
|
|
13
13
|
s.description = "Ruby library for working with the Tesseract OCR."
|
|
14
14
|
s.email = "dannnylo@gmail.com"
|
|
15
15
|
s.extra_rdoc_files = [
|
|
16
|
-
"LICENSE",
|
|
16
|
+
"LICENSE.txt",
|
|
17
17
|
"README.rdoc"
|
|
18
18
|
]
|
|
19
19
|
s.files = [
|
|
20
20
|
".document",
|
|
21
|
-
"
|
|
21
|
+
".rspec",
|
|
22
|
+
"Gemfile",
|
|
23
|
+
"Gemfile.lock",
|
|
24
|
+
"LICENSE.txt",
|
|
22
25
|
"README.rdoc",
|
|
23
26
|
"Rakefile",
|
|
27
|
+
"VERSION",
|
|
24
28
|
"lib/processors/mini_magick.rb",
|
|
25
29
|
"lib/processors/rmagick.rb",
|
|
26
30
|
"lib/rtesseract.rb",
|
|
27
31
|
"lib/rtesseract/errors.rb",
|
|
28
32
|
"lib/rtesseract/mixed.rb",
|
|
29
33
|
"rtesseract.gemspec",
|
|
30
|
-
"
|
|
31
|
-
"
|
|
32
|
-
"
|
|
33
|
-
"
|
|
34
|
-
"
|
|
35
|
-
"
|
|
36
|
-
"
|
|
37
|
-
"
|
|
38
|
-
"
|
|
39
|
-
"
|
|
34
|
+
"spec/images/mixed.tif",
|
|
35
|
+
"spec/images/test with spaces.tif",
|
|
36
|
+
"spec/images/test.bmp",
|
|
37
|
+
"spec/images/test.jpg",
|
|
38
|
+
"spec/images/test.png",
|
|
39
|
+
"spec/images/test.tif",
|
|
40
|
+
"spec/images/test1.tif",
|
|
41
|
+
"spec/rtesseract_mixed_spec.rb",
|
|
42
|
+
"spec/rtesseract_spec.rb",
|
|
43
|
+
"spec/spec_helper.rb"
|
|
40
44
|
]
|
|
41
45
|
s.homepage = "http://github.com/dannnylo/rtesseract"
|
|
46
|
+
s.licenses = ["MIT"]
|
|
42
47
|
s.require_paths = ["lib"]
|
|
43
|
-
s.rubygems_version = "
|
|
48
|
+
s.rubygems_version = "2.0.3"
|
|
44
49
|
s.summary = "Ruby library for working with the Tesseract OCR."
|
|
45
50
|
|
|
46
51
|
if s.respond_to? :specification_version then
|
|
47
|
-
s.specification_version =
|
|
52
|
+
s.specification_version = 4
|
|
48
53
|
|
|
49
54
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
|
50
|
-
s.
|
|
51
|
-
s.add_development_dependency(%q<
|
|
52
|
-
s.
|
|
55
|
+
s.add_runtime_dependency(%q<rmagick>, [">= 0"])
|
|
56
|
+
s.add_development_dependency(%q<rspec>, ["~> 2.8.0"])
|
|
57
|
+
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
|
58
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.0"])
|
|
59
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.8.7"])
|
|
60
|
+
s.add_development_dependency(%q<simplecov>, [">= 0"])
|
|
61
|
+
s.add_development_dependency(%q<shoulda-matchers>, [">= 0"])
|
|
53
62
|
else
|
|
54
|
-
s.add_dependency(%q<
|
|
55
|
-
s.add_dependency(%q<
|
|
56
|
-
s.add_dependency(%q<
|
|
63
|
+
s.add_dependency(%q<rmagick>, [">= 0"])
|
|
64
|
+
s.add_dependency(%q<rspec>, ["~> 2.8.0"])
|
|
65
|
+
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
|
66
|
+
s.add_dependency(%q<bundler>, ["~> 1.0"])
|
|
67
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.7"])
|
|
68
|
+
s.add_dependency(%q<simplecov>, [">= 0"])
|
|
69
|
+
s.add_dependency(%q<shoulda-matchers>, [">= 0"])
|
|
57
70
|
end
|
|
58
71
|
else
|
|
59
|
-
s.add_dependency(%q<
|
|
60
|
-
s.add_dependency(%q<
|
|
61
|
-
s.add_dependency(%q<
|
|
72
|
+
s.add_dependency(%q<rmagick>, [">= 0"])
|
|
73
|
+
s.add_dependency(%q<rspec>, ["~> 2.8.0"])
|
|
74
|
+
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
|
75
|
+
s.add_dependency(%q<bundler>, ["~> 1.0"])
|
|
76
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.7"])
|
|
77
|
+
s.add_dependency(%q<simplecov>, [">= 0"])
|
|
78
|
+
s.add_dependency(%q<shoulda-matchers>, [">= 0"])
|
|
62
79
|
end
|
|
63
80
|
end
|
|
64
81
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
2
|
+
|
|
3
|
+
describe "Rtesseract::Mixed" do
|
|
4
|
+
before do
|
|
5
|
+
@path = Pathname.new(__FILE__.gsub("rtesseract_mixed_spec.rb","")).expand_path
|
|
6
|
+
@image_tiff = @path.join("images","mixed.tif").to_s
|
|
7
|
+
@image2_tiff = @path.join("images","mixed2.tif").to_s
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
it "should be instantiable" do
|
|
11
|
+
RTesseract::Mixed.new.class.should eql(RTesseract::Mixed)
|
|
12
|
+
RTesseract::Mixed.new(@image_tiff).class.should eql(RTesseract::Mixed)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
it "should translate parts of the image to text" do
|
|
16
|
+
mix_block = RTesseract::Mixed.new(@image_tiff,{:psm=>7}) do |image|
|
|
17
|
+
image.area(28, 19, 25, 25) #position of 4
|
|
18
|
+
image.area(180, 22, 20, 28) # position of 3
|
|
19
|
+
image.area(218, 22, 24, 28) # position of z
|
|
20
|
+
image.area(248, 24, 22, 22) # position of z
|
|
21
|
+
end
|
|
22
|
+
mix_block.to_s_without_spaces.should eql("43ZZ")
|
|
23
|
+
|
|
24
|
+
mix_block = RTesseract::Mixed.new(@image_tiff,{:areas => [
|
|
25
|
+
{:x => 28, :y=>19, :width=>25, :height=>25 }, #position of 4
|
|
26
|
+
{:x => 180, :y=>22, :width=>20, :height=>28}, # position of 3
|
|
27
|
+
{:x => 218, :y=>22, :width=>24, :height=>28}, # position of z
|
|
28
|
+
{:x => 248, :y=>24, :width=>22, :height=>22} # position of z
|
|
29
|
+
],:psm=>7})
|
|
30
|
+
mix_block.to_s_without_spaces.should eql("43ZZ")
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
2
|
+
require 'pathname'
|
|
3
|
+
|
|
4
|
+
describe "Rtesseract" do
|
|
5
|
+
before do
|
|
6
|
+
@path = Pathname.new(__FILE__.gsub("rtesseract_spec.rb","")).expand_path
|
|
7
|
+
@image_tiff = @path.join("images","test.tif").to_s
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
it " be instantiable" do
|
|
11
|
+
RTesseract.new.class.should eql(RTesseract)
|
|
12
|
+
RTesseract.new("").class.should eql(RTesseract)
|
|
13
|
+
RTesseract.new(@image_tiff).class.should eql(RTesseract)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
it " translate image to text" do
|
|
17
|
+
RTesseract.new(@image_tiff).to_s_without_spaces.should eql("43ZZ")
|
|
18
|
+
RTesseract.new(@path.join("images","test1.tif").to_s).to_s_without_spaces.should eql("V2V4")
|
|
19
|
+
RTesseract.new(@path.join("images","test with spaces.tif").to_s).to_s_without_spaces.should eql("V2V4")
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
it " translate images .png, .jpg, .bmp" do
|
|
23
|
+
RTesseract.new(@path.join("images","test.png").to_s).to_s_without_spaces.should eql("HW9W")
|
|
24
|
+
RTesseract.new(@path.join("images","test.jpg").to_s).to_s_without_spaces.should eql("3R8Z")
|
|
25
|
+
RTesseract.new(@path.join("images","test.bmp").to_s).to_s_without_spaces.should eql("ZLA6")
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
it " change the image" do
|
|
29
|
+
image = RTesseract.new(@image_tiff)
|
|
30
|
+
image.to_s_without_spaces.should eql("43ZZ")
|
|
31
|
+
image.source = @path.join("images","test1.tif").to_s
|
|
32
|
+
image.to_s_without_spaces.should eql("V2V4")
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
it " select the language" do
|
|
36
|
+
#English
|
|
37
|
+
RTesseract.new(@image_tiff,{:lang=>"eng"}).lang.should eql(" -l eng ")
|
|
38
|
+
RTesseract.new(@image_tiff,{:lang=>"en"}).lang.should eql(" -l eng ")
|
|
39
|
+
RTesseract.new(@image_tiff,{:lang=>"en-US"}).lang.should eql(" -l eng ")
|
|
40
|
+
RTesseract.new(@image_tiff,{:lang=>"english"}).lang.should eql(" -l eng ")
|
|
41
|
+
|
|
42
|
+
#Portuguese
|
|
43
|
+
RTesseract.new(@image_tiff,{:lang=>"por"}).lang.should eql(" -l por ")
|
|
44
|
+
RTesseract.new(@image_tiff,{:lang=>"pt-BR"}).lang.should eql(" -l por ")
|
|
45
|
+
RTesseract.new(@image_tiff,{:lang=>"pt-br"}).lang.should eql(" -l por ")
|
|
46
|
+
RTesseract.new(@image_tiff,{:lang=>"pt"}).lang.should eql(" -l por ")
|
|
47
|
+
RTesseract.new(@image_tiff,{:lang=>"portuguese"}).lang.should eql(" -l por ")
|
|
48
|
+
|
|
49
|
+
RTesseract.new(@image_tiff,{:lang=>"eng"}).to_s_without_spaces.should eql("43ZZ")
|
|
50
|
+
RTesseract.new(@image_tiff,{:lang=>"por"}).to_s_without_spaces.should eql("43ZZ")
|
|
51
|
+
|
|
52
|
+
RTesseract.new(@image_tiff,{:lang=>"eng"}).lang.should eql(" -l eng ")
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
it " be configurable" do
|
|
56
|
+
RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0,:display_text=>0}).config.should eql("chop_enable 0\nenable_assoc 0\ndisplay_text 0")
|
|
57
|
+
RTesseract.new(@image_tiff,{:chop_enable=>0}).config.should eql("chop_enable 0")
|
|
58
|
+
RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0}).config.should eql("chop_enable 0\nenable_assoc 0")
|
|
59
|
+
RTesseract.new(@image_tiff,{:chop_enable=>0}).to_s_without_spaces.should eql("43ZZ")
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
it " crop image" do
|
|
63
|
+
RTesseract.new(@image_tiff,{:psm=>7}).crop!(140,10,36,40).to_s_without_spaces.should eql("4")
|
|
64
|
+
RTesseract.new(@image_tiff,{:psm=>7}).crop!(180,10,36,40).to_s_without_spaces.should eql("3")
|
|
65
|
+
RTesseract.new(@image_tiff,{:psm=>7}).crop!(200,10,36,40).to_s_without_spaces.should eql("Z")
|
|
66
|
+
RTesseract.new(@image_tiff,{:psm=>7}).crop!(220,10,30,40).to_s_without_spaces.should eql("Z")
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
it " read image from blob" do
|
|
71
|
+
image = Magick::Image.read(@path.join("images","test.png").to_s).first
|
|
72
|
+
blob = image.quantize(256,Magick::GRAYColorspace).to_blob
|
|
73
|
+
|
|
74
|
+
test = RTesseract.new("", {:psm => 7})
|
|
75
|
+
test.from_blob(blob)
|
|
76
|
+
test.to_s_without_spaces.should eql("HW9W")
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
it " change image in a block" do
|
|
80
|
+
test = RTesseract.read(@path.join("images","test.png").to_s) do |image|
|
|
81
|
+
image = image.white_threshold(245)
|
|
82
|
+
image = image.quantize(256,Magick::GRAYColorspace)
|
|
83
|
+
end
|
|
84
|
+
test.to_s_without_spaces.should eql("HW9W")
|
|
85
|
+
|
|
86
|
+
test = RTesseract.read(@path.join("images","test.jpg").to_s,{:lang=>'en'}) do |image|
|
|
87
|
+
image = image.white_threshold(245).quantize(256,Magick::GRAYColorspace)
|
|
88
|
+
end
|
|
89
|
+
test.to_s_without_spaces.should eql("3R8Z")
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
end
|
data/spec/spec_helper.rb
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
|
2
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
3
|
+
require 'rspec'
|
|
4
|
+
require "simplecov"
|
|
5
|
+
SimpleCov.start do
|
|
6
|
+
add_filter "/spec/"
|
|
7
|
+
end
|
|
8
|
+
require 'rtesseract'
|
|
9
|
+
# Requires supporting files with custom matchers and macros, etc,
|
|
10
|
+
# in ./support/ and its subdirectories.
|
|
11
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
|
12
|
+
|
|
13
|
+
RSpec.configure do |config|
|
|
14
|
+
|
|
15
|
+
end
|
metadata
CHANGED
|
@@ -1,130 +1,167 @@
|
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rtesseract
|
|
3
|
-
version: !ruby/object:Gem::Version
|
|
4
|
-
|
|
5
|
-
prerelease:
|
|
6
|
-
segments:
|
|
7
|
-
- 0
|
|
8
|
-
- 0
|
|
9
|
-
- 13
|
|
10
|
-
version: 0.0.13
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 1.0.0
|
|
11
5
|
platform: ruby
|
|
12
|
-
authors:
|
|
6
|
+
authors:
|
|
13
7
|
- Danilo Jeremias da Silva
|
|
14
8
|
autorequire:
|
|
15
9
|
bindir: bin
|
|
16
10
|
cert_chain: []
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
11
|
+
date: 2013-11-21 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: rmagick
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - '>='
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '0'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - '>='
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: rspec
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ~>
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: 2.8.0
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - ~>
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: 2.8.0
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: rdoc
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - ~>
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '3.12'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - ~>
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '3.12'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: bundler
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - ~>
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '1.0'
|
|
62
|
+
type: :development
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - ~>
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '1.0'
|
|
69
|
+
- !ruby/object:Gem::Dependency
|
|
21
70
|
name: jeweler
|
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - ~>
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: 1.8.7
|
|
76
|
+
type: :development
|
|
22
77
|
prerelease: false
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - ~>
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: 1.8.7
|
|
83
|
+
- !ruby/object:Gem::Dependency
|
|
84
|
+
name: simplecov
|
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
|
86
|
+
requirements:
|
|
87
|
+
- - '>='
|
|
88
|
+
- !ruby/object:Gem::Version
|
|
89
|
+
version: '0'
|
|
34
90
|
type: :development
|
|
35
|
-
version_requirements: *id001
|
|
36
|
-
- !ruby/object:Gem::Dependency
|
|
37
|
-
name: thoughtbot-shoulda
|
|
38
91
|
prerelease: false
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
93
|
+
requirements:
|
|
94
|
+
- - '>='
|
|
95
|
+
- !ruby/object:Gem::Version
|
|
96
|
+
version: '0'
|
|
97
|
+
- !ruby/object:Gem::Dependency
|
|
98
|
+
name: shoulda-matchers
|
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
|
100
|
+
requirements:
|
|
101
|
+
- - '>='
|
|
102
|
+
- !ruby/object:Gem::Version
|
|
103
|
+
version: '0'
|
|
48
104
|
type: :development
|
|
49
|
-
version_requirements: *id002
|
|
50
|
-
- !ruby/object:Gem::Dependency
|
|
51
|
-
name: rmagick
|
|
52
105
|
prerelease: false
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
hash: 37
|
|
59
|
-
segments:
|
|
60
|
-
- 2
|
|
61
|
-
- 10
|
|
62
|
-
- 1
|
|
63
|
-
version: 2.10.1
|
|
64
|
-
type: :runtime
|
|
65
|
-
version_requirements: *id003
|
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
107
|
+
requirements:
|
|
108
|
+
- - '>='
|
|
109
|
+
- !ruby/object:Gem::Version
|
|
110
|
+
version: '0'
|
|
66
111
|
description: Ruby library for working with the Tesseract OCR.
|
|
67
112
|
email: dannnylo@gmail.com
|
|
68
113
|
executables: []
|
|
69
|
-
|
|
70
114
|
extensions: []
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
- LICENSE
|
|
115
|
+
extra_rdoc_files:
|
|
116
|
+
- LICENSE.txt
|
|
74
117
|
- README.rdoc
|
|
75
|
-
files:
|
|
118
|
+
files:
|
|
76
119
|
- .document
|
|
77
|
-
-
|
|
120
|
+
- .rspec
|
|
121
|
+
- Gemfile
|
|
122
|
+
- Gemfile.lock
|
|
123
|
+
- LICENSE.txt
|
|
78
124
|
- README.rdoc
|
|
79
125
|
- Rakefile
|
|
126
|
+
- VERSION
|
|
80
127
|
- lib/processors/mini_magick.rb
|
|
81
128
|
- lib/processors/rmagick.rb
|
|
82
129
|
- lib/rtesseract.rb
|
|
83
130
|
- lib/rtesseract/errors.rb
|
|
84
131
|
- lib/rtesseract/mixed.rb
|
|
85
132
|
- rtesseract.gemspec
|
|
86
|
-
-
|
|
87
|
-
-
|
|
88
|
-
-
|
|
89
|
-
-
|
|
90
|
-
-
|
|
91
|
-
-
|
|
92
|
-
-
|
|
93
|
-
-
|
|
94
|
-
-
|
|
95
|
-
-
|
|
133
|
+
- spec/images/mixed.tif
|
|
134
|
+
- spec/images/test with spaces.tif
|
|
135
|
+
- spec/images/test.bmp
|
|
136
|
+
- spec/images/test.jpg
|
|
137
|
+
- spec/images/test.png
|
|
138
|
+
- spec/images/test.tif
|
|
139
|
+
- spec/images/test1.tif
|
|
140
|
+
- spec/rtesseract_mixed_spec.rb
|
|
141
|
+
- spec/rtesseract_spec.rb
|
|
142
|
+
- spec/spec_helper.rb
|
|
96
143
|
homepage: http://github.com/dannnylo/rtesseract
|
|
97
|
-
licenses:
|
|
98
|
-
|
|
144
|
+
licenses:
|
|
145
|
+
- MIT
|
|
146
|
+
metadata: {}
|
|
99
147
|
post_install_message:
|
|
100
148
|
rdoc_options: []
|
|
101
|
-
|
|
102
|
-
require_paths:
|
|
149
|
+
require_paths:
|
|
103
150
|
- lib
|
|
104
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
none: false
|
|
115
|
-
requirements:
|
|
116
|
-
- - ">="
|
|
117
|
-
- !ruby/object:Gem::Version
|
|
118
|
-
hash: 3
|
|
119
|
-
segments:
|
|
120
|
-
- 0
|
|
121
|
-
version: "0"
|
|
151
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
152
|
+
requirements:
|
|
153
|
+
- - '>='
|
|
154
|
+
- !ruby/object:Gem::Version
|
|
155
|
+
version: '0'
|
|
156
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
157
|
+
requirements:
|
|
158
|
+
- - '>='
|
|
159
|
+
- !ruby/object:Gem::Version
|
|
160
|
+
version: '0'
|
|
122
161
|
requirements: []
|
|
123
|
-
|
|
124
162
|
rubyforge_project:
|
|
125
|
-
rubygems_version:
|
|
163
|
+
rubygems_version: 2.0.3
|
|
126
164
|
signing_key:
|
|
127
|
-
specification_version:
|
|
165
|
+
specification_version: 4
|
|
128
166
|
summary: Ruby library for working with the Tesseract OCR.
|
|
129
167
|
test_files: []
|
|
130
|
-
|
data/test/helper.rb
DELETED
data/test/test_mixed.rb
DELETED
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
require 'helper'
|
|
2
|
-
require 'pathname'
|
|
3
|
-
|
|
4
|
-
class TestMixed < Test::Unit::TestCase
|
|
5
|
-
context "Path" do
|
|
6
|
-
setup do
|
|
7
|
-
@path = Pathname.new(__FILE__.gsub("test_mixed.rb","")).expand_path
|
|
8
|
-
@image_tiff = @path.join("images","mixed.tif").to_s
|
|
9
|
-
@image2_tiff = @path.join("images","mixed2.tif").to_s
|
|
10
|
-
end
|
|
11
|
-
|
|
12
|
-
should "be instantiable" do
|
|
13
|
-
assert_equal RTesseract::Mixed.new.class , RTesseract::Mixed
|
|
14
|
-
assert_equal RTesseract::Mixed.new(@image_tiff).class , RTesseract::Mixed
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
should "translate parts of the image to text" do
|
|
18
|
-
mix_block = RTesseract::Mixed.new(@image_tiff) do |image|
|
|
19
|
-
image.area(28, 19, 25, 25) #position of 4
|
|
20
|
-
image.area(180, 22, 20, 28) # position of 3
|
|
21
|
-
image.area(218, 22, 24, 28) # position of z
|
|
22
|
-
image.area(248, 24, 22, 22) # position of z
|
|
23
|
-
end
|
|
24
|
-
assert_equal mix_block.to_s_without_spaces , "43ZZ"
|
|
25
|
-
|
|
26
|
-
mix_block = RTesseract::Mixed.new(@image_tiff,{:areas => [
|
|
27
|
-
{:x => 28, :y=>19, :width=>25, :height=>25 }, #position of 4
|
|
28
|
-
{:x => 180, :y=>22, :width=>20, :height=>28}, # position of 3
|
|
29
|
-
{:x => 218, :y=>22, :width=>24, :height=>28}, # position of z
|
|
30
|
-
{:x => 248, :y=>24, :width=>22, :height=>22} # position of z
|
|
31
|
-
]})
|
|
32
|
-
assert_equal mix_block.to_s_without_spaces , "43ZZ"
|
|
33
|
-
end
|
|
34
|
-
end
|
|
35
|
-
end
|
|
36
|
-
|
data/test/test_rtesseract.rb
DELETED
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
require 'helper'
|
|
2
|
-
require 'pathname'
|
|
3
|
-
|
|
4
|
-
class TestRtesseract < Test::Unit::TestCase
|
|
5
|
-
context "Path" do
|
|
6
|
-
setup do
|
|
7
|
-
@path = Pathname.new(__FILE__.gsub("test_rtesseract.rb","")).expand_path
|
|
8
|
-
@image_tiff = @path.join("images","test.tif").to_s
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
should "be instantiable" do
|
|
12
|
-
assert_equal RTesseract.new.class , RTesseract
|
|
13
|
-
assert_equal RTesseract.new("").class , RTesseract
|
|
14
|
-
assert_equal RTesseract.new(@image_tiff).class , RTesseract
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
should "translate image to text" do
|
|
18
|
-
assert_equal RTesseract.new(@image_tiff).to_s_without_spaces , "43ZZ"
|
|
19
|
-
assert_equal RTesseract.new(@path.join("images","test1.tif").to_s).to_s_without_spaces , "V2V4"
|
|
20
|
-
assert_equal RTesseract.new(@path.join("images","test with spaces.tif").to_s).to_s_without_spaces , "V2V4"
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
should "translate images .png, .jpg, .bmp" do
|
|
24
|
-
assert_equal RTesseract.new(@path.join("images","test.png").to_s).to_s_without_spaces , "HW9W"
|
|
25
|
-
assert_equal RTesseract.new(@path.join("images","test.jpg").to_s).to_s_without_spaces , "3R8Z"
|
|
26
|
-
assert_equal RTesseract.new(@path.join("images","test.bmp").to_s).to_s_without_spaces , "ZLA6"
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
should "change the image" do
|
|
30
|
-
image = RTesseract.new(@image_tiff)
|
|
31
|
-
assert_equal image.to_s_without_spaces,"43ZZ"
|
|
32
|
-
image.source = @path.join("images","test1.tif").to_s
|
|
33
|
-
assert_equal image.to_s_without_spaces,"V2V4"
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
should "select the language" do
|
|
37
|
-
#English
|
|
38
|
-
assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).lang , " -l eng "
|
|
39
|
-
assert_equal RTesseract.new(@image_tiff,{:lang=>"en"}).lang , " -l eng "
|
|
40
|
-
assert_equal RTesseract.new(@image_tiff,{:lang=>"en-US"}).lang , " -l eng "
|
|
41
|
-
assert_equal RTesseract.new(@image_tiff,{:lang=>"english"}).lang , " -l eng "
|
|
42
|
-
|
|
43
|
-
#Portuguese
|
|
44
|
-
assert_equal RTesseract.new(@image_tiff,{:lang=>"por"}).lang , " -l por "
|
|
45
|
-
assert_equal RTesseract.new(@image_tiff,{:lang=>"pt-BR"}).lang , " -l por "
|
|
46
|
-
assert_equal RTesseract.new(@image_tiff,{:lang=>"pt-br"}).lang , " -l por "
|
|
47
|
-
assert_equal RTesseract.new(@image_tiff,{:lang=>"pt"}).lang , " -l por "
|
|
48
|
-
assert_equal RTesseract.new(@image_tiff,{:lang=>"portuguese"}).lang , " -l por "
|
|
49
|
-
|
|
50
|
-
assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).to_s_without_spaces , "43ZZ"
|
|
51
|
-
assert_equal RTesseract.new(@image_tiff,{:lang=>"por"}).to_s_without_spaces , "43ZZ"
|
|
52
|
-
|
|
53
|
-
assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).lang , " -l eng "
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
should "be configurable" do
|
|
57
|
-
assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0,:display_text=>0}).config , "chop_enable 0\nenable_assoc 0\ndisplay_text 0"
|
|
58
|
-
assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0}).config , "chop_enable 0"
|
|
59
|
-
assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0}).config , "chop_enable 0\nenable_assoc 0"
|
|
60
|
-
assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0}).to_s_without_spaces , "43ZZ"
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
should "crop image" do
|
|
64
|
-
assert_equal RTesseract.new(@image_tiff).crop!(140,10,36,40).to_s_without_spaces, "4"
|
|
65
|
-
assert_equal RTesseract.new(@image_tiff).crop!(180,10,36,40).to_s_without_spaces, "3"
|
|
66
|
-
assert_equal RTesseract.new(@image_tiff).crop!(200,10,36,40).to_s_without_spaces, "Z"
|
|
67
|
-
assert_equal RTesseract.new(@image_tiff).crop!(220,10,30,40).to_s_without_spaces, "Z"
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
should "unique uid" do
|
|
71
|
-
assert_not_equal RTesseract.new(@image_tiff).generate_uid , RTesseract.new(@image_tiff).generate_uid
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
should "generate a unique id" do
|
|
75
|
-
reg = RTesseract.new(@image_tiff)
|
|
76
|
-
assert_equal reg.generate_uid , reg.generate_uid
|
|
77
|
-
value = reg.generate_uid
|
|
78
|
-
reg.convert
|
|
79
|
-
assert_not_equal value , reg.generate_uid
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
should "read image from blob" do
|
|
83
|
-
image = Magick::Image.read(@path.join("images","test.png").to_s).first
|
|
84
|
-
blob = image.white_threshold(245).quantize(256,Magick::GRAYColorspace).to_blob
|
|
85
|
-
|
|
86
|
-
test = RTesseract.new
|
|
87
|
-
test.from_blob(blob)
|
|
88
|
-
assert_equal test.to_s_without_spaces , "HW9W"
|
|
89
|
-
end
|
|
90
|
-
|
|
91
|
-
should "change image in a block" do
|
|
92
|
-
test = RTesseract.read(@path.join("images","test.png").to_s) do |image|
|
|
93
|
-
image = image.white_threshold(245)
|
|
94
|
-
image = image.quantize(256,Magick::GRAYColorspace)
|
|
95
|
-
end
|
|
96
|
-
assert_equal test.to_s_without_spaces , "HW9W"
|
|
97
|
-
|
|
98
|
-
test = RTesseract.read(@path.join("images","test.jpg").to_s,{:lang=>'en'}) do |image|
|
|
99
|
-
image = image.white_threshold(245).quantize(256,Magick::GRAYColorspace)
|
|
100
|
-
end
|
|
101
|
-
assert_equal test.to_s_without_spaces , "3R8Z"
|
|
102
|
-
end
|
|
103
|
-
end
|
|
104
|
-
end
|