rtesseract 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +1 -0
- data/.travis.yml +4 -5
- data/Gemfile +2 -1
- data/Gemfile.lock +60 -55
- data/VERSION +1 -1
- data/lib/processors/mini_magick.rb +3 -3
- data/lib/rtesseract.rb +7 -1
- data/lib/rtesseract/blob.rb +2 -2
- data/lib/rtesseract/configuration.rb +2 -2
- data/lib/rtesseract/utils.rb +3 -3
- data/lib/rtesseract/uzn.rb +47 -0
- data/rtesseract.gemspec +39 -34
- data/spec/rtesseract_spec.rb +8 -0
- data/spec/rtesseract_uzn_spec.rb +56 -0
- data/spec/spec_helper.rb +4 -1
- metadata +23 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: cb2f55f98e4b22827068fd2284a893788d72a751
|
|
4
|
+
data.tar.gz: f6b3ff2bfff6d4e250c75b239354485670b3d732
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ba2dcb1878a1c98013a6c4c0ad4a583a8b830faf01fede4df8b39a1d6e492ef79cc3610dee60ab856937e7ac7a3c7ae8789ae74fd32f80dbc910cd488a5f0651
|
|
7
|
+
data.tar.gz: aacbcfe446dd8050a6d45b78dab5d38468cb6110c4c6216c1877bfa2bbee08bdc6763e9b0c5c0ec7fe40aac8fc43a553da533b33449c60f40d21f6e6c8034faa
|
data/.rspec
CHANGED
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
|
@@ -2,83 +2,87 @@ GEM
|
|
|
2
2
|
remote: http://rubygems.org/
|
|
3
3
|
specs:
|
|
4
4
|
addressable (2.4.0)
|
|
5
|
-
builder (3.2.
|
|
6
|
-
coveralls (0.
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
5
|
+
builder (3.2.3)
|
|
6
|
+
coveralls (0.7.2)
|
|
7
|
+
multi_json (~> 1.3)
|
|
8
|
+
rest-client (= 1.6.7)
|
|
9
|
+
simplecov (>= 0.7)
|
|
10
|
+
term-ansicolor (= 1.2.2)
|
|
11
|
+
thor (= 0.18.1)
|
|
12
12
|
descendants_tracker (0.0.4)
|
|
13
13
|
thread_safe (~> 0.3, >= 0.3.1)
|
|
14
|
-
diff-lcs (1.
|
|
14
|
+
diff-lcs (1.3)
|
|
15
15
|
docile (1.1.5)
|
|
16
16
|
faraday (0.9.2)
|
|
17
17
|
multipart-post (>= 1.2, < 3)
|
|
18
18
|
git (1.3.0)
|
|
19
|
-
github_api (0.
|
|
19
|
+
github_api (0.16.0)
|
|
20
20
|
addressable (~> 2.4.0)
|
|
21
21
|
descendants_tracker (~> 0.0.4)
|
|
22
22
|
faraday (~> 0.8, < 0.10)
|
|
23
23
|
hashie (>= 3.4)
|
|
24
|
-
|
|
25
|
-
oauth2
|
|
26
|
-
hashie (3.
|
|
27
|
-
highline (1.7.
|
|
28
|
-
jeweler (2.
|
|
24
|
+
mime-types (>= 1.16, < 3.0)
|
|
25
|
+
oauth2 (~> 1.0)
|
|
26
|
+
hashie (3.5.7)
|
|
27
|
+
highline (1.7.10)
|
|
28
|
+
jeweler (2.3.7)
|
|
29
29
|
builder
|
|
30
|
-
bundler (>= 1
|
|
30
|
+
bundler (>= 1)
|
|
31
31
|
git (>= 1.2.5)
|
|
32
|
-
github_api
|
|
32
|
+
github_api (~> 0.16.0)
|
|
33
33
|
highline (>= 1.6.15)
|
|
34
34
|
nokogiri (>= 1.5.10)
|
|
35
|
+
psych (~> 2.2)
|
|
35
36
|
rake
|
|
36
37
|
rdoc
|
|
37
|
-
|
|
38
|
-
json (1.
|
|
39
|
-
jwt (1.5.
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
38
|
+
semver2
|
|
39
|
+
json (2.1.0)
|
|
40
|
+
jwt (1.5.6)
|
|
41
|
+
mime-types (2.99.3)
|
|
42
|
+
mini_magick (4.8.0)
|
|
43
|
+
mini_portile2 (2.3.0)
|
|
44
|
+
multi_json (1.12.2)
|
|
45
|
+
multi_xml (0.6.0)
|
|
44
46
|
multipart-post (2.0.0)
|
|
45
|
-
nokogiri (1.
|
|
46
|
-
mini_portile2 (~> 2.
|
|
47
|
-
oauth2 (1.
|
|
48
|
-
faraday (>= 0.8, < 0.
|
|
49
|
-
jwt (~> 1.0
|
|
47
|
+
nokogiri (1.8.1)
|
|
48
|
+
mini_portile2 (~> 2.3.0)
|
|
49
|
+
oauth2 (1.4.0)
|
|
50
|
+
faraday (>= 0.8, < 0.13)
|
|
51
|
+
jwt (~> 1.0)
|
|
50
52
|
multi_json (~> 1.3)
|
|
51
53
|
multi_xml (~> 0.5)
|
|
52
54
|
rack (>= 1.2, < 3)
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
rspec-
|
|
62
|
-
|
|
63
|
-
rspec-
|
|
64
|
-
rspec-
|
|
55
|
+
psych (2.2.4)
|
|
56
|
+
rack (2.0.3)
|
|
57
|
+
rake (12.3.0)
|
|
58
|
+
rdoc (6.0.1)
|
|
59
|
+
rest-client (1.6.7)
|
|
60
|
+
mime-types (>= 1.16)
|
|
61
|
+
rmagick (2.16.0)
|
|
62
|
+
rspec (3.7.0)
|
|
63
|
+
rspec-core (~> 3.7.0)
|
|
64
|
+
rspec-expectations (~> 3.7.0)
|
|
65
|
+
rspec-mocks (~> 3.7.0)
|
|
66
|
+
rspec-core (3.7.1)
|
|
67
|
+
rspec-support (~> 3.7.0)
|
|
68
|
+
rspec-expectations (3.7.0)
|
|
65
69
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
66
|
-
rspec-support (~> 3.
|
|
67
|
-
rspec-mocks (3.
|
|
70
|
+
rspec-support (~> 3.7.0)
|
|
71
|
+
rspec-mocks (3.7.0)
|
|
68
72
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
69
|
-
rspec-support (~> 3.
|
|
70
|
-
rspec-support (3.
|
|
71
|
-
|
|
72
|
-
simplecov (0.
|
|
73
|
+
rspec-support (~> 3.7.0)
|
|
74
|
+
rspec-support (3.7.0)
|
|
75
|
+
semver2 (3.4.2)
|
|
76
|
+
simplecov (0.15.1)
|
|
73
77
|
docile (~> 1.1.0)
|
|
74
|
-
json (
|
|
78
|
+
json (>= 1.8, < 3)
|
|
75
79
|
simplecov-html (~> 0.10.0)
|
|
76
|
-
simplecov-html (0.10.
|
|
77
|
-
term-ansicolor (1.
|
|
78
|
-
tins (~>
|
|
79
|
-
thor (0.
|
|
80
|
-
thread_safe (0.3.
|
|
81
|
-
tins (
|
|
80
|
+
simplecov-html (0.10.2)
|
|
81
|
+
term-ansicolor (1.2.2)
|
|
82
|
+
tins (~> 0.8)
|
|
83
|
+
thor (0.18.1)
|
|
84
|
+
thread_safe (0.3.6)
|
|
85
|
+
tins (0.13.2)
|
|
82
86
|
|
|
83
87
|
PLATFORMS
|
|
84
88
|
ruby
|
|
@@ -86,7 +90,8 @@ PLATFORMS
|
|
|
86
90
|
DEPENDENCIES
|
|
87
91
|
bundler
|
|
88
92
|
coveralls
|
|
89
|
-
jeweler
|
|
93
|
+
jeweler
|
|
94
|
+
json
|
|
90
95
|
mini_magick
|
|
91
96
|
nokogiri
|
|
92
97
|
rdoc
|
|
@@ -95,4 +100,4 @@ DEPENDENCIES
|
|
|
95
100
|
simplecov
|
|
96
101
|
|
|
97
102
|
BUNDLED WITH
|
|
98
|
-
1.
|
|
103
|
+
1.16.1
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
2.
|
|
1
|
+
2.2.0
|
|
@@ -21,10 +21,10 @@ class RTesseract
|
|
|
21
21
|
cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
|
|
22
22
|
cat.format('tif') do |c|
|
|
23
23
|
c.compress 'None'
|
|
24
|
-
c.alpha 'off'
|
|
24
|
+
c.alpha 'off' if MiniMagick.cli != :graphicsmagick
|
|
25
25
|
end
|
|
26
26
|
cat.crop("#{points[:w]}x#{points[:h]}+#{points[:x]}+#{points[:y]}") if points.is_a?(Hash) && points.values.compact != []
|
|
27
|
-
cat.alpha 'off'
|
|
27
|
+
cat.alpha 'off' if MiniMagick.cli != :graphicsmagick
|
|
28
28
|
cat.write tmp_file.path.to_s
|
|
29
29
|
tmp_file
|
|
30
30
|
end
|
|
@@ -40,4 +40,4 @@ class RTesseract
|
|
|
40
40
|
end
|
|
41
41
|
end
|
|
42
42
|
end
|
|
43
|
-
end
|
|
43
|
+
end
|
data/lib/rtesseract.rb
CHANGED
|
@@ -70,6 +70,11 @@ class RTesseract
|
|
|
70
70
|
option_to_string('-psm', configuration.psm)
|
|
71
71
|
end
|
|
72
72
|
|
|
73
|
+
# Engine Mode
|
|
74
|
+
def oem
|
|
75
|
+
option_to_string '--oem', configuration.oem
|
|
76
|
+
end
|
|
77
|
+
|
|
73
78
|
# Tessdata Dir
|
|
74
79
|
def tessdata_dir
|
|
75
80
|
option_to_string('--tessdata-dir', configuration.tessdata_dir)
|
|
@@ -145,7 +150,7 @@ class RTesseract
|
|
|
145
150
|
|
|
146
151
|
# Run command
|
|
147
152
|
def convert_command
|
|
148
|
-
`#{configuration.command} "#{image}" "#{file_dest}" #{lang} #{psm} #{tessdata_dir} #{user_words} #{user_patterns} #{config_file} #{clear_console_output} #{options_cmd.join(' ')}`
|
|
153
|
+
`#{configuration.command} "#{image}" "#{file_dest}" #{lang} #{oem} #{psm} #{tessdata_dir} #{user_words} #{user_patterns} #{config_file} #{clear_console_output} #{options_cmd.join(' ')}`
|
|
149
154
|
end
|
|
150
155
|
|
|
151
156
|
# Is pdf output?
|
|
@@ -227,6 +232,7 @@ class RTesseract
|
|
|
227
232
|
end
|
|
228
233
|
|
|
229
234
|
require 'rtesseract/mixed'
|
|
235
|
+
require 'rtesseract/uzn'
|
|
230
236
|
require 'rtesseract/box'
|
|
231
237
|
require 'rtesseract/box_char'
|
|
232
238
|
require 'rtesseract/blob'
|
data/lib/rtesseract/blob.rb
CHANGED
|
@@ -3,7 +3,7 @@ class RTesseract
|
|
|
3
3
|
# Read image from memory blob
|
|
4
4
|
def self.read(src = nil, options = {})
|
|
5
5
|
fail RTesseract::ImageNotSelectedError if src.nil?
|
|
6
|
-
processor = RTesseract::Processor.choose_processor!(options
|
|
6
|
+
processor = RTesseract::Processor.choose_processor!(options[:processor])
|
|
7
7
|
image = processor.read_with_processor(src.to_s)
|
|
8
8
|
yield(image)
|
|
9
9
|
object = RTesseract.new('', options).from_blob(image.to_blob)
|
|
@@ -31,4 +31,4 @@ class RTesseract
|
|
|
31
31
|
rescue => error
|
|
32
32
|
raise RTesseract::ConversionError.new(error), error, caller
|
|
33
33
|
end
|
|
34
|
-
end
|
|
34
|
+
end
|
|
@@ -14,7 +14,7 @@ class RTesseract
|
|
|
14
14
|
|
|
15
15
|
# Configuration class
|
|
16
16
|
class Configuration
|
|
17
|
-
attr_accessor :processor, :lang, :psm, :tessdata_dir, :user_words, :user_patterns, :command, :debug, :options_cmd
|
|
17
|
+
attr_accessor :processor, :lang, :psm, :oem, :tessdata_dir, :user_words, :user_patterns, :command, :debug, :options_cmd
|
|
18
18
|
|
|
19
19
|
def initialize
|
|
20
20
|
@processor = 'rmagick'
|
|
@@ -66,7 +66,7 @@ class RTesseract
|
|
|
66
66
|
RTesseract::Configuration.new.tap do |config|
|
|
67
67
|
config.command = config.option(options, :command, RTesseract.default_command)
|
|
68
68
|
config.processor = config.option(options, :processor, 'rmagick')
|
|
69
|
-
config.load_options(options, [:lang, :psm, :tessdata_dir, :user_words, :user_patterns])
|
|
69
|
+
config.load_options(options, [:lang, :psm, :oem, :tessdata_dir, :user_words, :user_patterns])
|
|
70
70
|
config.debug = config.option(options, :debug, false)
|
|
71
71
|
pdf_opts = lambda { |o| o == 'pdf' || o == :pdf }
|
|
72
72
|
config.options_cmd = [options.option(:options, nil)].delete_if(&pdf_opts).flatten.compact
|
data/lib/rtesseract/utils.rb
CHANGED
|
@@ -27,9 +27,9 @@ class RTesseract
|
|
|
27
27
|
|
|
28
28
|
# Extract tesseract version number
|
|
29
29
|
def self.version_number
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
version =
|
|
30
|
+
output, st = Open3.capture2e(RTesseract.default_command, "--version")
|
|
31
|
+
|
|
32
|
+
version = output.split("\n")[0].split(" ")[1].split('.')[0, 2].join('.')
|
|
33
33
|
Float(version) rescue nil
|
|
34
34
|
end
|
|
35
35
|
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
# RTesseract
|
|
3
|
+
class RTesseract
|
|
4
|
+
# Alternative approach to Mixed when you want to read from specific areas.
|
|
5
|
+
# Requires `-psm 4` which means the text must be "a single column of text of variable sizes".
|
|
6
|
+
class Uzn < RTesseract
|
|
7
|
+
attr_reader :areas
|
|
8
|
+
DEFAULT_ALPHABET = 'Text/Latin'
|
|
9
|
+
|
|
10
|
+
def initialize(src = '', options = {})
|
|
11
|
+
@areas = options.delete(:areas) || []
|
|
12
|
+
@alphabet = options.delete(:alphabet) || DEFAULT_ALPHABET
|
|
13
|
+
super(src, options.merge(psm: 4))
|
|
14
|
+
yield self if block_given?
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Add areas
|
|
18
|
+
def area(points)
|
|
19
|
+
areas << points
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def convert_command
|
|
23
|
+
@image = image
|
|
24
|
+
write_uzn_file
|
|
25
|
+
`#{configuration.command} "#{@image}" "#{file_dest}" #{lang} #{psm} #{tessdata_dir} #{user_words} #{user_patterns} #{config_file} #{clear_console_output} #{options_cmd.join(' ')}`
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def after_convert_hook
|
|
29
|
+
RTesseract::Utils.remove_files([@uzn_file])
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
private
|
|
33
|
+
|
|
34
|
+
def write_uzn_file
|
|
35
|
+
folder = File.dirname(@image)
|
|
36
|
+
basename = File.basename(@image, '.tif')
|
|
37
|
+
@uzn_file = File.new("#{folder}/#{basename}.uzn", File::CREAT|File::TRUNC|File::RDWR)
|
|
38
|
+
|
|
39
|
+
areas.each do |points|
|
|
40
|
+
s = "#{points[:x]} #{points[:y]} #{points[:w]} #{points[:h]} #{@alphabet}\n"
|
|
41
|
+
@uzn_file.write(s)
|
|
42
|
+
@uzn_file.flush
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
end
|
|
47
|
+
end
|
data/rtesseract.gemspec
CHANGED
|
@@ -2,18 +2,18 @@
|
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
|
4
4
|
# -*- encoding: utf-8 -*-
|
|
5
|
-
# stub: rtesseract 2.
|
|
5
|
+
# stub: rtesseract 2.2.0 ruby lib
|
|
6
6
|
|
|
7
7
|
Gem::Specification.new do |s|
|
|
8
|
-
s.name = "rtesseract"
|
|
9
|
-
s.version = "2.
|
|
8
|
+
s.name = "rtesseract".freeze
|
|
9
|
+
s.version = "2.2.0"
|
|
10
10
|
|
|
11
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
12
|
-
s.require_paths = ["lib"]
|
|
13
|
-
s.authors = ["Danilo Jeremias da Silva"]
|
|
14
|
-
s.date = "
|
|
15
|
-
s.description = "Ruby library for working with the Tesseract OCR."
|
|
16
|
-
s.email = "dannnylo@gmail.com"
|
|
11
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
|
12
|
+
s.require_paths = ["lib".freeze]
|
|
13
|
+
s.authors = ["Danilo Jeremias da Silva".freeze]
|
|
14
|
+
s.date = "2018-01-05"
|
|
15
|
+
s.description = "Ruby library for working with the Tesseract OCR.".freeze
|
|
16
|
+
s.email = "dannnylo@gmail.com".freeze
|
|
17
17
|
s.extra_rdoc_files = [
|
|
18
18
|
"LICENSE.txt",
|
|
19
19
|
"README.rdoc"
|
|
@@ -41,6 +41,7 @@ Gem::Specification.new do |s|
|
|
|
41
41
|
"lib/rtesseract/mixed.rb",
|
|
42
42
|
"lib/rtesseract/processor.rb",
|
|
43
43
|
"lib/rtesseract/utils.rb",
|
|
44
|
+
"lib/rtesseract/uzn.rb",
|
|
44
45
|
"rtesseract.gemspec",
|
|
45
46
|
"spec/configs/eng.user-words.txt",
|
|
46
47
|
"spec/images/README.pdf",
|
|
@@ -59,41 +60,45 @@ Gem::Specification.new do |s|
|
|
|
59
60
|
"spec/rtesseract_box_spec.rb",
|
|
60
61
|
"spec/rtesseract_mixed_spec.rb",
|
|
61
62
|
"spec/rtesseract_spec.rb",
|
|
63
|
+
"spec/rtesseract_uzn_spec.rb",
|
|
62
64
|
"spec/spec_helper.rb"
|
|
63
65
|
]
|
|
64
|
-
s.homepage = "http://github.com/dannnylo/rtesseract"
|
|
65
|
-
s.licenses = ["MIT"]
|
|
66
|
-
s.rubygems_version = "2.
|
|
67
|
-
s.summary = "Ruby library for working with the Tesseract OCR."
|
|
66
|
+
s.homepage = "http://github.com/dannnylo/rtesseract".freeze
|
|
67
|
+
s.licenses = ["MIT".freeze]
|
|
68
|
+
s.rubygems_version = "2.6.14".freeze
|
|
69
|
+
s.summary = "Ruby library for working with the Tesseract OCR.".freeze
|
|
68
70
|
|
|
69
71
|
if s.respond_to? :specification_version then
|
|
70
72
|
s.specification_version = 4
|
|
71
73
|
|
|
72
74
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
|
73
|
-
s.add_runtime_dependency(%q<nokogiri
|
|
74
|
-
s.add_development_dependency(%q<rspec
|
|
75
|
-
s.add_development_dependency(%q<rdoc
|
|
76
|
-
s.add_development_dependency(%q<bundler
|
|
77
|
-
s.add_development_dependency(%q<jeweler
|
|
78
|
-
s.add_development_dependency(%q<simplecov
|
|
79
|
-
s.add_development_dependency(%q<
|
|
75
|
+
s.add_runtime_dependency(%q<nokogiri>.freeze, [">= 0"])
|
|
76
|
+
s.add_development_dependency(%q<rspec>.freeze, [">= 0"])
|
|
77
|
+
s.add_development_dependency(%q<rdoc>.freeze, [">= 0"])
|
|
78
|
+
s.add_development_dependency(%q<bundler>.freeze, [">= 0"])
|
|
79
|
+
s.add_development_dependency(%q<jeweler>.freeze, [">= 0"])
|
|
80
|
+
s.add_development_dependency(%q<simplecov>.freeze, [">= 0"])
|
|
81
|
+
s.add_development_dependency(%q<json>.freeze, [">= 0"])
|
|
82
|
+
s.add_development_dependency(%q<coveralls>.freeze, [">= 0"])
|
|
80
83
|
else
|
|
81
|
-
s.add_dependency(%q<nokogiri
|
|
82
|
-
s.add_dependency(%q<rspec
|
|
83
|
-
s.add_dependency(%q<rdoc
|
|
84
|
-
s.add_dependency(%q<bundler
|
|
85
|
-
s.add_dependency(%q<jeweler
|
|
86
|
-
s.add_dependency(%q<simplecov
|
|
87
|
-
s.add_dependency(%q<
|
|
84
|
+
s.add_dependency(%q<nokogiri>.freeze, [">= 0"])
|
|
85
|
+
s.add_dependency(%q<rspec>.freeze, [">= 0"])
|
|
86
|
+
s.add_dependency(%q<rdoc>.freeze, [">= 0"])
|
|
87
|
+
s.add_dependency(%q<bundler>.freeze, [">= 0"])
|
|
88
|
+
s.add_dependency(%q<jeweler>.freeze, [">= 0"])
|
|
89
|
+
s.add_dependency(%q<simplecov>.freeze, [">= 0"])
|
|
90
|
+
s.add_dependency(%q<json>.freeze, [">= 0"])
|
|
91
|
+
s.add_dependency(%q<coveralls>.freeze, [">= 0"])
|
|
88
92
|
end
|
|
89
93
|
else
|
|
90
|
-
s.add_dependency(%q<nokogiri
|
|
91
|
-
s.add_dependency(%q<rspec
|
|
92
|
-
s.add_dependency(%q<rdoc
|
|
93
|
-
s.add_dependency(%q<bundler
|
|
94
|
-
s.add_dependency(%q<jeweler
|
|
95
|
-
s.add_dependency(%q<simplecov
|
|
96
|
-
s.add_dependency(%q<
|
|
94
|
+
s.add_dependency(%q<nokogiri>.freeze, [">= 0"])
|
|
95
|
+
s.add_dependency(%q<rspec>.freeze, [">= 0"])
|
|
96
|
+
s.add_dependency(%q<rdoc>.freeze, [">= 0"])
|
|
97
|
+
s.add_dependency(%q<bundler>.freeze, [">= 0"])
|
|
98
|
+
s.add_dependency(%q<jeweler>.freeze, [">= 0"])
|
|
99
|
+
s.add_dependency(%q<simplecov>.freeze, [">= 0"])
|
|
100
|
+
s.add_dependency(%q<json>.freeze, [">= 0"])
|
|
101
|
+
s.add_dependency(%q<coveralls>.freeze, [">= 0"])
|
|
97
102
|
end
|
|
98
103
|
end
|
|
99
104
|
|
data/spec/rtesseract_spec.rb
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
2
2
|
# encoding: UTF-8
|
|
3
3
|
require 'pathname'
|
|
4
|
+
RTesseract::Processor::RMagickProcessor.setup
|
|
4
5
|
|
|
5
6
|
# Class to rise error
|
|
6
7
|
class MakeStringError
|
|
@@ -197,6 +198,13 @@ describe 'Rtesseract' do
|
|
|
197
198
|
expect(test.to_s_without_spaces).to eql('3R8F')
|
|
198
199
|
end
|
|
199
200
|
|
|
201
|
+
it 'does not raise on read with image_magick processor' do
|
|
202
|
+
expect {
|
|
203
|
+
instance = RTesseract.read(@image_tif, processor: 'mini_magick') {}
|
|
204
|
+
expect(instance.processor.a_name?('mini_magick')).to be_truthy
|
|
205
|
+
}.not_to raise_error
|
|
206
|
+
end
|
|
207
|
+
|
|
200
208
|
it ' get a error' do
|
|
201
209
|
expect { RTesseract.new(@path.join('images', 'test.jpg').to_s, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError)
|
|
202
210
|
expect { RTesseract.new(@path.join('images', 'test_not_exists.png').to_s).to_s }.to raise_error(RTesseract::ImageNotSelectedError)
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
3
|
+
|
|
4
|
+
describe 'Rtesseract::Uzn' do
|
|
5
|
+
before do
|
|
6
|
+
@path = Pathname.new(__FILE__.gsub('rtesseract_uzn_spec.rb', '')).expand_path
|
|
7
|
+
@image_tif = @path.join('images', 'mixed.tif').to_s
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
it 'should be instantiable' do
|
|
11
|
+
expect(RTesseract::Uzn.new.class).to eql(RTesseract::Uzn)
|
|
12
|
+
expect(RTesseract::Uzn.new(@image_tif).class).to eql(RTesseract::Uzn)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
it 'should translate parts of the image to text from a block' do
|
|
16
|
+
uzn_block = RTesseract::Uzn.new(@image_tif) do |image|
|
|
17
|
+
image.area(x: 28, y: 19, w: 25, h: 25) # position of 4
|
|
18
|
+
image.area(x: 180, y: 22, w: 20, h: 28) # position of 3
|
|
19
|
+
image.area(x: 218, y: 22, w: 24, h: 28) # position of F
|
|
20
|
+
image.area(x: 248, y: 24, w: 22, h: 22) # position of F
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
expect(uzn_block.to_s_without_spaces).to eql('43FF')
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
it 'should translate parts of the image to text from initializer options' do
|
|
27
|
+
@areas = []
|
|
28
|
+
@areas << { x: 28, y: 19, w: 25, h: 25 } # position of 4
|
|
29
|
+
@areas << { x: 180, y: 22, w: 20, h: 28 } # position of 3
|
|
30
|
+
@areas << { x: 218, y: 22, w: 24, h: 28 } # position of f
|
|
31
|
+
@areas << { x: 248, y: 24, w: 22, h: 22 } # position of f
|
|
32
|
+
|
|
33
|
+
uzn_block = RTesseract::Uzn.new(@image_tif, areas: @areas)
|
|
34
|
+
expect(uzn_block.to_s_without_spaces).to eql('43FF')
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
it 'should handle a blank image' do
|
|
38
|
+
@areas = []
|
|
39
|
+
@areas << { x: 28, y: 19, w: 25, h: 25 } # position of 4
|
|
40
|
+
@areas << { x: 180, y: 22, w: 20, h: 28 } # position of 3
|
|
41
|
+
@areas << { x: 218, y: 22, w: 24, h: 28 } # position of f
|
|
42
|
+
@areas << { x: 248, y: 24, w: 22, h: 22 } # position of f
|
|
43
|
+
uzn_block = RTesseract::Uzn.new(@path.join('images', 'blank.tif').to_s, areas: @areas)
|
|
44
|
+
expect(uzn_block.to_s_without_spaces).to eql('')
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
it ' get a error' do
|
|
48
|
+
@areas = [{ x: 28, y: 19, w: 25, h: 25 }]
|
|
49
|
+
|
|
50
|
+
uzn_block = RTesseract::Uzn.new(@path.join('images', 'test_not_exists.png').to_s, areas: @areas, psm: 7)
|
|
51
|
+
expect { uzn_block.to_s_without_spaces }.to raise_error(RTesseract::ImageNotSelectedError)
|
|
52
|
+
|
|
53
|
+
uzn_block = RTesseract::Uzn.new(@image_tif, areas: @areas, psm: 7, command: 'tesseract_error')
|
|
54
|
+
expect { uzn_block.to_s }.to raise_error(RTesseract::ConversionError)
|
|
55
|
+
end
|
|
56
|
+
end
|
data/spec/spec_helper.rb
CHANGED
|
@@ -14,5 +14,8 @@ require 'rtesseract'
|
|
|
14
14
|
# in ./support/ and its subdirectories.
|
|
15
15
|
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each { |f| require f }
|
|
16
16
|
|
|
17
|
-
RSpec.configure do
|
|
17
|
+
RSpec.configure do |config|
|
|
18
|
+
config.after(:each) do
|
|
19
|
+
RTesseract.configuration = RTesseract::Configuration.new
|
|
20
|
+
end
|
|
18
21
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rtesseract
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.
|
|
4
|
+
version: 2.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Danilo Jeremias da Silva
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2018-01-05 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: nokogiri
|
|
@@ -70,16 +70,16 @@ dependencies:
|
|
|
70
70
|
name: jeweler
|
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
|
72
72
|
requirements:
|
|
73
|
-
- - "
|
|
73
|
+
- - ">="
|
|
74
74
|
- !ruby/object:Gem::Version
|
|
75
|
-
version:
|
|
75
|
+
version: '0'
|
|
76
76
|
type: :development
|
|
77
77
|
prerelease: false
|
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
|
79
79
|
requirements:
|
|
80
|
-
- - "
|
|
80
|
+
- - ">="
|
|
81
81
|
- !ruby/object:Gem::Version
|
|
82
|
-
version:
|
|
82
|
+
version: '0'
|
|
83
83
|
- !ruby/object:Gem::Dependency
|
|
84
84
|
name: simplecov
|
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -94,6 +94,20 @@ dependencies:
|
|
|
94
94
|
- - ">="
|
|
95
95
|
- !ruby/object:Gem::Version
|
|
96
96
|
version: '0'
|
|
97
|
+
- !ruby/object:Gem::Dependency
|
|
98
|
+
name: json
|
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
|
100
|
+
requirements:
|
|
101
|
+
- - ">="
|
|
102
|
+
- !ruby/object:Gem::Version
|
|
103
|
+
version: '0'
|
|
104
|
+
type: :development
|
|
105
|
+
prerelease: false
|
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
107
|
+
requirements:
|
|
108
|
+
- - ">="
|
|
109
|
+
- !ruby/object:Gem::Version
|
|
110
|
+
version: '0'
|
|
97
111
|
- !ruby/object:Gem::Dependency
|
|
98
112
|
name: coveralls
|
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -138,6 +152,7 @@ files:
|
|
|
138
152
|
- lib/rtesseract/mixed.rb
|
|
139
153
|
- lib/rtesseract/processor.rb
|
|
140
154
|
- lib/rtesseract/utils.rb
|
|
155
|
+
- lib/rtesseract/uzn.rb
|
|
141
156
|
- rtesseract.gemspec
|
|
142
157
|
- spec/configs/eng.user-words.txt
|
|
143
158
|
- spec/images/README.pdf
|
|
@@ -156,6 +171,7 @@ files:
|
|
|
156
171
|
- spec/rtesseract_box_spec.rb
|
|
157
172
|
- spec/rtesseract_mixed_spec.rb
|
|
158
173
|
- spec/rtesseract_spec.rb
|
|
174
|
+
- spec/rtesseract_uzn_spec.rb
|
|
159
175
|
- spec/spec_helper.rb
|
|
160
176
|
homepage: http://github.com/dannnylo/rtesseract
|
|
161
177
|
licenses:
|
|
@@ -177,7 +193,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
177
193
|
version: '0'
|
|
178
194
|
requirements: []
|
|
179
195
|
rubyforge_project:
|
|
180
|
-
rubygems_version: 2.
|
|
196
|
+
rubygems_version: 2.6.14
|
|
181
197
|
signing_key:
|
|
182
198
|
specification_version: 4
|
|
183
199
|
summary: Ruby library for working with the Tesseract OCR.
|