rtesseract 2.1.0 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec +1 -0
- data/.travis.yml +4 -5
- data/Gemfile +2 -1
- data/Gemfile.lock +60 -55
- data/VERSION +1 -1
- data/lib/processors/mini_magick.rb +3 -3
- data/lib/rtesseract.rb +7 -1
- data/lib/rtesseract/blob.rb +2 -2
- data/lib/rtesseract/configuration.rb +2 -2
- data/lib/rtesseract/utils.rb +3 -3
- data/lib/rtesseract/uzn.rb +47 -0
- data/rtesseract.gemspec +39 -34
- data/spec/rtesseract_spec.rb +8 -0
- data/spec/rtesseract_uzn_spec.rb +56 -0
- data/spec/spec_helper.rb +4 -1
- metadata +23 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cb2f55f98e4b22827068fd2284a893788d72a751
|
4
|
+
data.tar.gz: f6b3ff2bfff6d4e250c75b239354485670b3d732
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ba2dcb1878a1c98013a6c4c0ad4a583a8b830faf01fede4df8b39a1d6e492ef79cc3610dee60ab856937e7ac7a3c7ae8789ae74fd32f80dbc910cd488a5f0651
|
7
|
+
data.tar.gz: aacbcfe446dd8050a6d45b78dab5d38468cb6110c4c6216c1877bfa2bbee08bdc6763e9b0c5c0ec7fe40aac8fc43a553da533b33449c60f40d21f6e6c8034faa
|
data/.rspec
CHANGED
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -2,83 +2,87 @@ GEM
|
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
4
|
addressable (2.4.0)
|
5
|
-
builder (3.2.
|
6
|
-
coveralls (0.
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
5
|
+
builder (3.2.3)
|
6
|
+
coveralls (0.7.2)
|
7
|
+
multi_json (~> 1.3)
|
8
|
+
rest-client (= 1.6.7)
|
9
|
+
simplecov (>= 0.7)
|
10
|
+
term-ansicolor (= 1.2.2)
|
11
|
+
thor (= 0.18.1)
|
12
12
|
descendants_tracker (0.0.4)
|
13
13
|
thread_safe (~> 0.3, >= 0.3.1)
|
14
|
-
diff-lcs (1.
|
14
|
+
diff-lcs (1.3)
|
15
15
|
docile (1.1.5)
|
16
16
|
faraday (0.9.2)
|
17
17
|
multipart-post (>= 1.2, < 3)
|
18
18
|
git (1.3.0)
|
19
|
-
github_api (0.
|
19
|
+
github_api (0.16.0)
|
20
20
|
addressable (~> 2.4.0)
|
21
21
|
descendants_tracker (~> 0.0.4)
|
22
22
|
faraday (~> 0.8, < 0.10)
|
23
23
|
hashie (>= 3.4)
|
24
|
-
|
25
|
-
oauth2
|
26
|
-
hashie (3.
|
27
|
-
highline (1.7.
|
28
|
-
jeweler (2.
|
24
|
+
mime-types (>= 1.16, < 3.0)
|
25
|
+
oauth2 (~> 1.0)
|
26
|
+
hashie (3.5.7)
|
27
|
+
highline (1.7.10)
|
28
|
+
jeweler (2.3.7)
|
29
29
|
builder
|
30
|
-
bundler (>= 1
|
30
|
+
bundler (>= 1)
|
31
31
|
git (>= 1.2.5)
|
32
|
-
github_api
|
32
|
+
github_api (~> 0.16.0)
|
33
33
|
highline (>= 1.6.15)
|
34
34
|
nokogiri (>= 1.5.10)
|
35
|
+
psych (~> 2.2)
|
35
36
|
rake
|
36
37
|
rdoc
|
37
|
-
|
38
|
-
json (1.
|
39
|
-
jwt (1.5.
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
38
|
+
semver2
|
39
|
+
json (2.1.0)
|
40
|
+
jwt (1.5.6)
|
41
|
+
mime-types (2.99.3)
|
42
|
+
mini_magick (4.8.0)
|
43
|
+
mini_portile2 (2.3.0)
|
44
|
+
multi_json (1.12.2)
|
45
|
+
multi_xml (0.6.0)
|
44
46
|
multipart-post (2.0.0)
|
45
|
-
nokogiri (1.
|
46
|
-
mini_portile2 (~> 2.
|
47
|
-
oauth2 (1.
|
48
|
-
faraday (>= 0.8, < 0.
|
49
|
-
jwt (~> 1.0
|
47
|
+
nokogiri (1.8.1)
|
48
|
+
mini_portile2 (~> 2.3.0)
|
49
|
+
oauth2 (1.4.0)
|
50
|
+
faraday (>= 0.8, < 0.13)
|
51
|
+
jwt (~> 1.0)
|
50
52
|
multi_json (~> 1.3)
|
51
53
|
multi_xml (~> 0.5)
|
52
54
|
rack (>= 1.2, < 3)
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
rspec-
|
62
|
-
|
63
|
-
rspec-
|
64
|
-
rspec-
|
55
|
+
psych (2.2.4)
|
56
|
+
rack (2.0.3)
|
57
|
+
rake (12.3.0)
|
58
|
+
rdoc (6.0.1)
|
59
|
+
rest-client (1.6.7)
|
60
|
+
mime-types (>= 1.16)
|
61
|
+
rmagick (2.16.0)
|
62
|
+
rspec (3.7.0)
|
63
|
+
rspec-core (~> 3.7.0)
|
64
|
+
rspec-expectations (~> 3.7.0)
|
65
|
+
rspec-mocks (~> 3.7.0)
|
66
|
+
rspec-core (3.7.1)
|
67
|
+
rspec-support (~> 3.7.0)
|
68
|
+
rspec-expectations (3.7.0)
|
65
69
|
diff-lcs (>= 1.2.0, < 2.0)
|
66
|
-
rspec-support (~> 3.
|
67
|
-
rspec-mocks (3.
|
70
|
+
rspec-support (~> 3.7.0)
|
71
|
+
rspec-mocks (3.7.0)
|
68
72
|
diff-lcs (>= 1.2.0, < 2.0)
|
69
|
-
rspec-support (~> 3.
|
70
|
-
rspec-support (3.
|
71
|
-
|
72
|
-
simplecov (0.
|
73
|
+
rspec-support (~> 3.7.0)
|
74
|
+
rspec-support (3.7.0)
|
75
|
+
semver2 (3.4.2)
|
76
|
+
simplecov (0.15.1)
|
73
77
|
docile (~> 1.1.0)
|
74
|
-
json (
|
78
|
+
json (>= 1.8, < 3)
|
75
79
|
simplecov-html (~> 0.10.0)
|
76
|
-
simplecov-html (0.10.
|
77
|
-
term-ansicolor (1.
|
78
|
-
tins (~>
|
79
|
-
thor (0.
|
80
|
-
thread_safe (0.3.
|
81
|
-
tins (
|
80
|
+
simplecov-html (0.10.2)
|
81
|
+
term-ansicolor (1.2.2)
|
82
|
+
tins (~> 0.8)
|
83
|
+
thor (0.18.1)
|
84
|
+
thread_safe (0.3.6)
|
85
|
+
tins (0.13.2)
|
82
86
|
|
83
87
|
PLATFORMS
|
84
88
|
ruby
|
@@ -86,7 +90,8 @@ PLATFORMS
|
|
86
90
|
DEPENDENCIES
|
87
91
|
bundler
|
88
92
|
coveralls
|
89
|
-
jeweler
|
93
|
+
jeweler
|
94
|
+
json
|
90
95
|
mini_magick
|
91
96
|
nokogiri
|
92
97
|
rdoc
|
@@ -95,4 +100,4 @@ DEPENDENCIES
|
|
95
100
|
simplecov
|
96
101
|
|
97
102
|
BUNDLED WITH
|
98
|
-
1.
|
103
|
+
1.16.1
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.2.0
|
@@ -21,10 +21,10 @@ class RTesseract
|
|
21
21
|
cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
|
22
22
|
cat.format('tif') do |c|
|
23
23
|
c.compress 'None'
|
24
|
-
c.alpha 'off'
|
24
|
+
c.alpha 'off' if MiniMagick.cli != :graphicsmagick
|
25
25
|
end
|
26
26
|
cat.crop("#{points[:w]}x#{points[:h]}+#{points[:x]}+#{points[:y]}") if points.is_a?(Hash) && points.values.compact != []
|
27
|
-
cat.alpha 'off'
|
27
|
+
cat.alpha 'off' if MiniMagick.cli != :graphicsmagick
|
28
28
|
cat.write tmp_file.path.to_s
|
29
29
|
tmp_file
|
30
30
|
end
|
@@ -40,4 +40,4 @@ class RTesseract
|
|
40
40
|
end
|
41
41
|
end
|
42
42
|
end
|
43
|
-
end
|
43
|
+
end
|
data/lib/rtesseract.rb
CHANGED
@@ -70,6 +70,11 @@ class RTesseract
|
|
70
70
|
option_to_string('-psm', configuration.psm)
|
71
71
|
end
|
72
72
|
|
73
|
+
# Engine Mode
|
74
|
+
def oem
|
75
|
+
option_to_string '--oem', configuration.oem
|
76
|
+
end
|
77
|
+
|
73
78
|
# Tessdata Dir
|
74
79
|
def tessdata_dir
|
75
80
|
option_to_string('--tessdata-dir', configuration.tessdata_dir)
|
@@ -145,7 +150,7 @@ class RTesseract
|
|
145
150
|
|
146
151
|
# Run command
|
147
152
|
def convert_command
|
148
|
-
`#{configuration.command} "#{image}" "#{file_dest}" #{lang} #{psm} #{tessdata_dir} #{user_words} #{user_patterns} #{config_file} #{clear_console_output} #{options_cmd.join(' ')}`
|
153
|
+
`#{configuration.command} "#{image}" "#{file_dest}" #{lang} #{oem} #{psm} #{tessdata_dir} #{user_words} #{user_patterns} #{config_file} #{clear_console_output} #{options_cmd.join(' ')}`
|
149
154
|
end
|
150
155
|
|
151
156
|
# Is pdf output?
|
@@ -227,6 +232,7 @@ class RTesseract
|
|
227
232
|
end
|
228
233
|
|
229
234
|
require 'rtesseract/mixed'
|
235
|
+
require 'rtesseract/uzn'
|
230
236
|
require 'rtesseract/box'
|
231
237
|
require 'rtesseract/box_char'
|
232
238
|
require 'rtesseract/blob'
|
data/lib/rtesseract/blob.rb
CHANGED
@@ -3,7 +3,7 @@ class RTesseract
|
|
3
3
|
# Read image from memory blob
|
4
4
|
def self.read(src = nil, options = {})
|
5
5
|
fail RTesseract::ImageNotSelectedError if src.nil?
|
6
|
-
processor = RTesseract::Processor.choose_processor!(options
|
6
|
+
processor = RTesseract::Processor.choose_processor!(options[:processor])
|
7
7
|
image = processor.read_with_processor(src.to_s)
|
8
8
|
yield(image)
|
9
9
|
object = RTesseract.new('', options).from_blob(image.to_blob)
|
@@ -31,4 +31,4 @@ class RTesseract
|
|
31
31
|
rescue => error
|
32
32
|
raise RTesseract::ConversionError.new(error), error, caller
|
33
33
|
end
|
34
|
-
end
|
34
|
+
end
|
@@ -14,7 +14,7 @@ class RTesseract
|
|
14
14
|
|
15
15
|
# Configuration class
|
16
16
|
class Configuration
|
17
|
-
attr_accessor :processor, :lang, :psm, :tessdata_dir, :user_words, :user_patterns, :command, :debug, :options_cmd
|
17
|
+
attr_accessor :processor, :lang, :psm, :oem, :tessdata_dir, :user_words, :user_patterns, :command, :debug, :options_cmd
|
18
18
|
|
19
19
|
def initialize
|
20
20
|
@processor = 'rmagick'
|
@@ -66,7 +66,7 @@ class RTesseract
|
|
66
66
|
RTesseract::Configuration.new.tap do |config|
|
67
67
|
config.command = config.option(options, :command, RTesseract.default_command)
|
68
68
|
config.processor = config.option(options, :processor, 'rmagick')
|
69
|
-
config.load_options(options, [:lang, :psm, :tessdata_dir, :user_words, :user_patterns])
|
69
|
+
config.load_options(options, [:lang, :psm, :oem, :tessdata_dir, :user_words, :user_patterns])
|
70
70
|
config.debug = config.option(options, :debug, false)
|
71
71
|
pdf_opts = lambda { |o| o == 'pdf' || o == :pdf }
|
72
72
|
config.options_cmd = [options.option(:options, nil)].delete_if(&pdf_opts).flatten.compact
|
data/lib/rtesseract/utils.rb
CHANGED
@@ -27,9 +27,9 @@ class RTesseract
|
|
27
27
|
|
28
28
|
# Extract tesseract version number
|
29
29
|
def self.version_number
|
30
|
-
|
31
|
-
|
32
|
-
version =
|
30
|
+
output, st = Open3.capture2e(RTesseract.default_command, "--version")
|
31
|
+
|
32
|
+
version = output.split("\n")[0].split(" ")[1].split('.')[0, 2].join('.')
|
33
33
|
Float(version) rescue nil
|
34
34
|
end
|
35
35
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
# RTesseract
|
3
|
+
class RTesseract
|
4
|
+
# Alternative approach to Mixed when you want to read from specific areas.
|
5
|
+
# Requires `-psm 4` which means the text must be "a single column of text of variable sizes".
|
6
|
+
class Uzn < RTesseract
|
7
|
+
attr_reader :areas
|
8
|
+
DEFAULT_ALPHABET = 'Text/Latin'
|
9
|
+
|
10
|
+
def initialize(src = '', options = {})
|
11
|
+
@areas = options.delete(:areas) || []
|
12
|
+
@alphabet = options.delete(:alphabet) || DEFAULT_ALPHABET
|
13
|
+
super(src, options.merge(psm: 4))
|
14
|
+
yield self if block_given?
|
15
|
+
end
|
16
|
+
|
17
|
+
# Add areas
|
18
|
+
def area(points)
|
19
|
+
areas << points
|
20
|
+
end
|
21
|
+
|
22
|
+
def convert_command
|
23
|
+
@image = image
|
24
|
+
write_uzn_file
|
25
|
+
`#{configuration.command} "#{@image}" "#{file_dest}" #{lang} #{psm} #{tessdata_dir} #{user_words} #{user_patterns} #{config_file} #{clear_console_output} #{options_cmd.join(' ')}`
|
26
|
+
end
|
27
|
+
|
28
|
+
def after_convert_hook
|
29
|
+
RTesseract::Utils.remove_files([@uzn_file])
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def write_uzn_file
|
35
|
+
folder = File.dirname(@image)
|
36
|
+
basename = File.basename(@image, '.tif')
|
37
|
+
@uzn_file = File.new("#{folder}/#{basename}.uzn", File::CREAT|File::TRUNC|File::RDWR)
|
38
|
+
|
39
|
+
areas.each do |points|
|
40
|
+
s = "#{points[:x]} #{points[:y]} #{points[:w]} #{points[:h]} #{@alphabet}\n"
|
41
|
+
@uzn_file.write(s)
|
42
|
+
@uzn_file.flush
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
data/rtesseract.gemspec
CHANGED
@@ -2,18 +2,18 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: rtesseract 2.
|
5
|
+
# stub: rtesseract 2.2.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
|
-
s.name = "rtesseract"
|
9
|
-
s.version = "2.
|
8
|
+
s.name = "rtesseract".freeze
|
9
|
+
s.version = "2.2.0"
|
10
10
|
|
11
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
|
-
s.require_paths = ["lib"]
|
13
|
-
s.authors = ["Danilo Jeremias da Silva"]
|
14
|
-
s.date = "
|
15
|
-
s.description = "Ruby library for working with the Tesseract OCR."
|
16
|
-
s.email = "dannnylo@gmail.com"
|
11
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
12
|
+
s.require_paths = ["lib".freeze]
|
13
|
+
s.authors = ["Danilo Jeremias da Silva".freeze]
|
14
|
+
s.date = "2018-01-05"
|
15
|
+
s.description = "Ruby library for working with the Tesseract OCR.".freeze
|
16
|
+
s.email = "dannnylo@gmail.com".freeze
|
17
17
|
s.extra_rdoc_files = [
|
18
18
|
"LICENSE.txt",
|
19
19
|
"README.rdoc"
|
@@ -41,6 +41,7 @@ Gem::Specification.new do |s|
|
|
41
41
|
"lib/rtesseract/mixed.rb",
|
42
42
|
"lib/rtesseract/processor.rb",
|
43
43
|
"lib/rtesseract/utils.rb",
|
44
|
+
"lib/rtesseract/uzn.rb",
|
44
45
|
"rtesseract.gemspec",
|
45
46
|
"spec/configs/eng.user-words.txt",
|
46
47
|
"spec/images/README.pdf",
|
@@ -59,41 +60,45 @@ Gem::Specification.new do |s|
|
|
59
60
|
"spec/rtesseract_box_spec.rb",
|
60
61
|
"spec/rtesseract_mixed_spec.rb",
|
61
62
|
"spec/rtesseract_spec.rb",
|
63
|
+
"spec/rtesseract_uzn_spec.rb",
|
62
64
|
"spec/spec_helper.rb"
|
63
65
|
]
|
64
|
-
s.homepage = "http://github.com/dannnylo/rtesseract"
|
65
|
-
s.licenses = ["MIT"]
|
66
|
-
s.rubygems_version = "2.
|
67
|
-
s.summary = "Ruby library for working with the Tesseract OCR."
|
66
|
+
s.homepage = "http://github.com/dannnylo/rtesseract".freeze
|
67
|
+
s.licenses = ["MIT".freeze]
|
68
|
+
s.rubygems_version = "2.6.14".freeze
|
69
|
+
s.summary = "Ruby library for working with the Tesseract OCR.".freeze
|
68
70
|
|
69
71
|
if s.respond_to? :specification_version then
|
70
72
|
s.specification_version = 4
|
71
73
|
|
72
74
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
73
|
-
s.add_runtime_dependency(%q<nokogiri
|
74
|
-
s.add_development_dependency(%q<rspec
|
75
|
-
s.add_development_dependency(%q<rdoc
|
76
|
-
s.add_development_dependency(%q<bundler
|
77
|
-
s.add_development_dependency(%q<jeweler
|
78
|
-
s.add_development_dependency(%q<simplecov
|
79
|
-
s.add_development_dependency(%q<
|
75
|
+
s.add_runtime_dependency(%q<nokogiri>.freeze, [">= 0"])
|
76
|
+
s.add_development_dependency(%q<rspec>.freeze, [">= 0"])
|
77
|
+
s.add_development_dependency(%q<rdoc>.freeze, [">= 0"])
|
78
|
+
s.add_development_dependency(%q<bundler>.freeze, [">= 0"])
|
79
|
+
s.add_development_dependency(%q<jeweler>.freeze, [">= 0"])
|
80
|
+
s.add_development_dependency(%q<simplecov>.freeze, [">= 0"])
|
81
|
+
s.add_development_dependency(%q<json>.freeze, [">= 0"])
|
82
|
+
s.add_development_dependency(%q<coveralls>.freeze, [">= 0"])
|
80
83
|
else
|
81
|
-
s.add_dependency(%q<nokogiri
|
82
|
-
s.add_dependency(%q<rspec
|
83
|
-
s.add_dependency(%q<rdoc
|
84
|
-
s.add_dependency(%q<bundler
|
85
|
-
s.add_dependency(%q<jeweler
|
86
|
-
s.add_dependency(%q<simplecov
|
87
|
-
s.add_dependency(%q<
|
84
|
+
s.add_dependency(%q<nokogiri>.freeze, [">= 0"])
|
85
|
+
s.add_dependency(%q<rspec>.freeze, [">= 0"])
|
86
|
+
s.add_dependency(%q<rdoc>.freeze, [">= 0"])
|
87
|
+
s.add_dependency(%q<bundler>.freeze, [">= 0"])
|
88
|
+
s.add_dependency(%q<jeweler>.freeze, [">= 0"])
|
89
|
+
s.add_dependency(%q<simplecov>.freeze, [">= 0"])
|
90
|
+
s.add_dependency(%q<json>.freeze, [">= 0"])
|
91
|
+
s.add_dependency(%q<coveralls>.freeze, [">= 0"])
|
88
92
|
end
|
89
93
|
else
|
90
|
-
s.add_dependency(%q<nokogiri
|
91
|
-
s.add_dependency(%q<rspec
|
92
|
-
s.add_dependency(%q<rdoc
|
93
|
-
s.add_dependency(%q<bundler
|
94
|
-
s.add_dependency(%q<jeweler
|
95
|
-
s.add_dependency(%q<simplecov
|
96
|
-
s.add_dependency(%q<
|
94
|
+
s.add_dependency(%q<nokogiri>.freeze, [">= 0"])
|
95
|
+
s.add_dependency(%q<rspec>.freeze, [">= 0"])
|
96
|
+
s.add_dependency(%q<rdoc>.freeze, [">= 0"])
|
97
|
+
s.add_dependency(%q<bundler>.freeze, [">= 0"])
|
98
|
+
s.add_dependency(%q<jeweler>.freeze, [">= 0"])
|
99
|
+
s.add_dependency(%q<simplecov>.freeze, [">= 0"])
|
100
|
+
s.add_dependency(%q<json>.freeze, [">= 0"])
|
101
|
+
s.add_dependency(%q<coveralls>.freeze, [">= 0"])
|
97
102
|
end
|
98
103
|
end
|
99
104
|
|
data/spec/rtesseract_spec.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
2
|
# encoding: UTF-8
|
3
3
|
require 'pathname'
|
4
|
+
RTesseract::Processor::RMagickProcessor.setup
|
4
5
|
|
5
6
|
# Class to rise error
|
6
7
|
class MakeStringError
|
@@ -197,6 +198,13 @@ describe 'Rtesseract' do
|
|
197
198
|
expect(test.to_s_without_spaces).to eql('3R8F')
|
198
199
|
end
|
199
200
|
|
201
|
+
it 'does not raise on read with image_magick processor' do
|
202
|
+
expect {
|
203
|
+
instance = RTesseract.read(@image_tif, processor: 'mini_magick') {}
|
204
|
+
expect(instance.processor.a_name?('mini_magick')).to be_truthy
|
205
|
+
}.not_to raise_error
|
206
|
+
end
|
207
|
+
|
200
208
|
it ' get a error' do
|
201
209
|
expect { RTesseract.new(@path.join('images', 'test.jpg').to_s, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError)
|
202
210
|
expect { RTesseract.new(@path.join('images', 'test_not_exists.png').to_s).to_s }.to raise_error(RTesseract::ImageNotSelectedError)
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
|
+
|
4
|
+
describe 'Rtesseract::Uzn' do
|
5
|
+
before do
|
6
|
+
@path = Pathname.new(__FILE__.gsub('rtesseract_uzn_spec.rb', '')).expand_path
|
7
|
+
@image_tif = @path.join('images', 'mixed.tif').to_s
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'should be instantiable' do
|
11
|
+
expect(RTesseract::Uzn.new.class).to eql(RTesseract::Uzn)
|
12
|
+
expect(RTesseract::Uzn.new(@image_tif).class).to eql(RTesseract::Uzn)
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'should translate parts of the image to text from a block' do
|
16
|
+
uzn_block = RTesseract::Uzn.new(@image_tif) do |image|
|
17
|
+
image.area(x: 28, y: 19, w: 25, h: 25) # position of 4
|
18
|
+
image.area(x: 180, y: 22, w: 20, h: 28) # position of 3
|
19
|
+
image.area(x: 218, y: 22, w: 24, h: 28) # position of F
|
20
|
+
image.area(x: 248, y: 24, w: 22, h: 22) # position of F
|
21
|
+
end
|
22
|
+
|
23
|
+
expect(uzn_block.to_s_without_spaces).to eql('43FF')
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should translate parts of the image to text from initializer options' do
|
27
|
+
@areas = []
|
28
|
+
@areas << { x: 28, y: 19, w: 25, h: 25 } # position of 4
|
29
|
+
@areas << { x: 180, y: 22, w: 20, h: 28 } # position of 3
|
30
|
+
@areas << { x: 218, y: 22, w: 24, h: 28 } # position of f
|
31
|
+
@areas << { x: 248, y: 24, w: 22, h: 22 } # position of f
|
32
|
+
|
33
|
+
uzn_block = RTesseract::Uzn.new(@image_tif, areas: @areas)
|
34
|
+
expect(uzn_block.to_s_without_spaces).to eql('43FF')
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'should handle a blank image' do
|
38
|
+
@areas = []
|
39
|
+
@areas << { x: 28, y: 19, w: 25, h: 25 } # position of 4
|
40
|
+
@areas << { x: 180, y: 22, w: 20, h: 28 } # position of 3
|
41
|
+
@areas << { x: 218, y: 22, w: 24, h: 28 } # position of f
|
42
|
+
@areas << { x: 248, y: 24, w: 22, h: 22 } # position of f
|
43
|
+
uzn_block = RTesseract::Uzn.new(@path.join('images', 'blank.tif').to_s, areas: @areas)
|
44
|
+
expect(uzn_block.to_s_without_spaces).to eql('')
|
45
|
+
end
|
46
|
+
|
47
|
+
it ' get a error' do
|
48
|
+
@areas = [{ x: 28, y: 19, w: 25, h: 25 }]
|
49
|
+
|
50
|
+
uzn_block = RTesseract::Uzn.new(@path.join('images', 'test_not_exists.png').to_s, areas: @areas, psm: 7)
|
51
|
+
expect { uzn_block.to_s_without_spaces }.to raise_error(RTesseract::ImageNotSelectedError)
|
52
|
+
|
53
|
+
uzn_block = RTesseract::Uzn.new(@image_tif, areas: @areas, psm: 7, command: 'tesseract_error')
|
54
|
+
expect { uzn_block.to_s }.to raise_error(RTesseract::ConversionError)
|
55
|
+
end
|
56
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -14,5 +14,8 @@ require 'rtesseract'
|
|
14
14
|
# in ./support/ and its subdirectories.
|
15
15
|
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each { |f| require f }
|
16
16
|
|
17
|
-
RSpec.configure do
|
17
|
+
RSpec.configure do |config|
|
18
|
+
config.after(:each) do
|
19
|
+
RTesseract.configuration = RTesseract::Configuration.new
|
20
|
+
end
|
18
21
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rtesseract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danilo Jeremias da Silva
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -70,16 +70,16 @@ dependencies:
|
|
70
70
|
name: jeweler
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- - "
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
75
|
+
version: '0'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- - "
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
82
|
+
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: simplecov
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: json
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
112
|
name: coveralls
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -138,6 +152,7 @@ files:
|
|
138
152
|
- lib/rtesseract/mixed.rb
|
139
153
|
- lib/rtesseract/processor.rb
|
140
154
|
- lib/rtesseract/utils.rb
|
155
|
+
- lib/rtesseract/uzn.rb
|
141
156
|
- rtesseract.gemspec
|
142
157
|
- spec/configs/eng.user-words.txt
|
143
158
|
- spec/images/README.pdf
|
@@ -156,6 +171,7 @@ files:
|
|
156
171
|
- spec/rtesseract_box_spec.rb
|
157
172
|
- spec/rtesseract_mixed_spec.rb
|
158
173
|
- spec/rtesseract_spec.rb
|
174
|
+
- spec/rtesseract_uzn_spec.rb
|
159
175
|
- spec/spec_helper.rb
|
160
176
|
homepage: http://github.com/dannnylo/rtesseract
|
161
177
|
licenses:
|
@@ -177,7 +193,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
177
193
|
version: '0'
|
178
194
|
requirements: []
|
179
195
|
rubyforge_project:
|
180
|
-
rubygems_version: 2.
|
196
|
+
rubygems_version: 2.6.14
|
181
197
|
signing_key:
|
182
198
|
specification_version: 4
|
183
199
|
summary: Ruby library for working with the Tesseract OCR.
|