rtesseract 2.0.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -0
- data/CHANGELOG.md +22 -0
- data/Gemfile.lock +6 -20
- data/VERSION +1 -1
- data/lib/processors/mini_magick.rb +37 -26
- data/lib/processors/none.rb +27 -19
- data/lib/processors/rmagick.rb +39 -28
- data/lib/rtesseract.rb +46 -93
- data/lib/rtesseract/blob.rb +34 -0
- data/lib/rtesseract/box.rb +10 -1
- data/lib/rtesseract/box_char.rb +3 -0
- data/lib/rtesseract/configuration.rb +16 -8
- data/lib/rtesseract/errors.rb +1 -0
- data/lib/rtesseract/mixed.rb +7 -4
- data/lib/rtesseract/processor.rb +19 -0
- data/lib/rtesseract/utils.rb +34 -0
- data/rtesseract.gemspec +8 -4
- data/spec/configs/eng.user-words.txt +13 -0
- data/spec/rtesseract_box_char_spec.rb +13 -12
- data/spec/rtesseract_spec.rb +14 -12
- metadata +7 -3
- data/lib/utils.rb +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6eae58279cf744227e79b7bbc9180f7aea852547
|
4
|
+
data.tar.gz: 3836aa96d24b7f1a0b957cf803553f547cc33544
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0ef57359c7c7f43094a50838b6d29d28d7808c9cadd8f2b8514c613be030161f8d640c41ba3d403c00fb59fdf85ffcbc57795f6c65b8418ad348eb1a6c07e901
|
7
|
+
data.tar.gz: ff5f0f94c8039bd0b38b0c9ec2618b4c38b07b9707e28ff29a3bb943abc85d5afaa543dfba1ba2b9e565d056ea558eda9b7f6d222a6adb43614cd86c6e8fdcac
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
## v2.0.1
|
2
|
+
|
3
|
+
#### Changed
|
4
|
+
|
5
|
+
* Refactoring of some small classes
|
6
|
+
|
7
|
+
## v2.0.0
|
8
|
+
|
9
|
+
#### Added
|
10
|
+
|
11
|
+
* Support to options --tessdata-dir, --user-words and --user-patterns
|
12
|
+
* Ruby 2.3.0 to travis tests.
|
13
|
+
|
14
|
+
#### Changed
|
15
|
+
|
16
|
+
* Refactoring of some classes
|
17
|
+
* Crop options is a hash with x,y,w,h keys.
|
18
|
+
* Areas of RTesseract::Mixed now changed :width to :w and :height to :h.
|
19
|
+
|
20
|
+
#### Removed
|
21
|
+
|
22
|
+
* Support to quick_magick gem.
|
data/Gemfile.lock
CHANGED
@@ -3,9 +3,8 @@ GEM
|
|
3
3
|
specs:
|
4
4
|
addressable (2.4.0)
|
5
5
|
builder (3.2.2)
|
6
|
-
coveralls (0.8.
|
6
|
+
coveralls (0.8.13)
|
7
7
|
json (~> 1.8)
|
8
|
-
rest-client (>= 1.6.8, < 2)
|
9
8
|
simplecov (~> 0.11.0)
|
10
9
|
term-ansicolor (~> 1.3)
|
11
10
|
thor (~> 0.19.1)
|
@@ -14,8 +13,6 @@ GEM
|
|
14
13
|
thread_safe (~> 0.3, >= 0.3.1)
|
15
14
|
diff-lcs (1.2.5)
|
16
15
|
docile (1.1.5)
|
17
|
-
domain_name (0.5.25)
|
18
|
-
unf (>= 0.0.5, < 1.0.0)
|
19
16
|
faraday (0.9.2)
|
20
17
|
multipart-post (>= 1.2, < 3)
|
21
18
|
git (1.3.0)
|
@@ -28,8 +25,6 @@ GEM
|
|
28
25
|
oauth2
|
29
26
|
hashie (3.4.3)
|
30
27
|
highline (1.7.8)
|
31
|
-
http-cookie (1.0.2)
|
32
|
-
domain_name (~> 0.5)
|
33
28
|
jeweler (2.1.1)
|
34
29
|
builder
|
35
30
|
bundler (>= 1.0)
|
@@ -42,13 +37,11 @@ GEM
|
|
42
37
|
semver
|
43
38
|
json (1.8.3)
|
44
39
|
jwt (1.5.1)
|
45
|
-
|
46
|
-
mini_magick (4.3.6)
|
40
|
+
mini_magick (4.5.1)
|
47
41
|
mini_portile2 (2.0.0)
|
48
42
|
multi_json (1.11.2)
|
49
43
|
multi_xml (0.5.5)
|
50
44
|
multipart-post (2.0.0)
|
51
|
-
netrc (0.11.0)
|
52
45
|
nokogiri (1.6.7.2)
|
53
46
|
mini_portile2 (~> 2.0.0.rc2)
|
54
47
|
oauth2 (1.1.0)
|
@@ -61,26 +54,22 @@ GEM
|
|
61
54
|
rake (11.1.2)
|
62
55
|
rdoc (4.2.2)
|
63
56
|
json (~> 1.4)
|
64
|
-
rest-client (1.8.0)
|
65
|
-
http-cookie (>= 1.0.2, < 2.0)
|
66
|
-
mime-types (>= 1.16, < 3.0)
|
67
|
-
netrc (~> 0.7)
|
68
57
|
rmagick (2.15.4)
|
69
58
|
rspec (3.4.0)
|
70
59
|
rspec-core (~> 3.4.0)
|
71
60
|
rspec-expectations (~> 3.4.0)
|
72
61
|
rspec-mocks (~> 3.4.0)
|
73
|
-
rspec-core (3.4.
|
62
|
+
rspec-core (3.4.4)
|
74
63
|
rspec-support (~> 3.4.0)
|
75
64
|
rspec-expectations (3.4.0)
|
76
65
|
diff-lcs (>= 1.2.0, < 2.0)
|
77
66
|
rspec-support (~> 3.4.0)
|
78
|
-
rspec-mocks (3.4.
|
67
|
+
rspec-mocks (3.4.1)
|
79
68
|
diff-lcs (>= 1.2.0, < 2.0)
|
80
69
|
rspec-support (~> 3.4.0)
|
81
70
|
rspec-support (3.4.1)
|
82
71
|
semver (1.0.1)
|
83
|
-
simplecov (0.11.
|
72
|
+
simplecov (0.11.2)
|
84
73
|
docile (~> 1.1.0)
|
85
74
|
json (~> 1.8)
|
86
75
|
simplecov-html (~> 0.10.0)
|
@@ -90,9 +79,6 @@ GEM
|
|
90
79
|
thor (0.19.1)
|
91
80
|
thread_safe (0.3.5)
|
92
81
|
tins (1.6.0)
|
93
|
-
unf (0.1.4)
|
94
|
-
unf_ext
|
95
|
-
unf_ext (0.0.7.1)
|
96
82
|
|
97
83
|
PLATFORMS
|
98
84
|
ruby
|
@@ -109,4 +95,4 @@ DEPENDENCIES
|
|
109
95
|
simplecov
|
110
96
|
|
111
97
|
BUNDLED WITH
|
112
|
-
1.
|
98
|
+
1.11.2
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.0.
|
1
|
+
2.0.1
|
@@ -1,32 +1,43 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
#
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
2
|
+
# RTesseract class
|
3
|
+
class RTesseract
|
4
|
+
# Processor Module
|
5
|
+
module Processor
|
6
|
+
# Add to rtesseract a image manipulation with MiniMagick
|
7
|
+
module MiniMagickProcessor
|
8
|
+
# Setup Processor
|
9
|
+
def self.setup
|
10
|
+
require 'mini_magick'
|
11
|
+
end
|
7
12
|
|
8
|
-
|
9
|
-
|
10
|
-
|
13
|
+
# Check if is this Processor
|
14
|
+
def self.a_name?(name)
|
15
|
+
%w(mini_magick MiniMagickProcessor).include?(name.to_s)
|
16
|
+
end
|
11
17
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
18
|
+
# Convert Image to Tiff
|
19
|
+
def self.image_to_tif(source, points = {})
|
20
|
+
tmp_file = Tempfile.new(['', '.tif'])
|
21
|
+
cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
|
22
|
+
cat.format('tif') do |c|
|
23
|
+
c.compress 'None'
|
24
|
+
c.alpha 'off'
|
25
|
+
end
|
26
|
+
cat.crop("#{points[:w]}x#{points[:h]}+#{points[:x]}+#{points[:y]}") if points.is_a?(Hash) && points.values.compact != []
|
27
|
+
cat.alpha 'off'
|
28
|
+
cat.write tmp_file.path.to_s
|
29
|
+
tmp_file
|
30
|
+
end
|
24
31
|
|
25
|
-
|
26
|
-
|
27
|
-
|
32
|
+
# Cast instance of image
|
33
|
+
def self.read_with_processor(path)
|
34
|
+
MiniMagick::Image.open(path.to_s)
|
35
|
+
end
|
28
36
|
|
29
|
-
|
30
|
-
|
37
|
+
# Check if is a MiniMagick image
|
38
|
+
def self.image?(object)
|
39
|
+
object.class == MiniMagick::Image
|
40
|
+
end
|
41
|
+
end
|
31
42
|
end
|
32
|
-
end
|
43
|
+
end
|
data/lib/processors/none.rb
CHANGED
@@ -1,26 +1,34 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
#
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
2
|
+
# RTesseract class
|
3
|
+
class RTesseract
|
4
|
+
# Processor Module
|
5
|
+
module Processor
|
6
|
+
# Add to rtesseract a image without manipulation
|
7
|
+
module NoneProcessor
|
8
|
+
# Setup Processor
|
9
|
+
def self.setup
|
10
|
+
end
|
10
11
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
end
|
12
|
+
# Check if is this Processor
|
13
|
+
def self.a_name?(name)
|
14
|
+
%w(none NoneProcessor).include?(name.to_s)
|
15
|
+
end
|
16
16
|
|
17
|
-
|
18
|
-
|
17
|
+
# Convert Image to Tiff
|
18
|
+
def self.image_to_tif(source, _points = {})
|
19
|
+
tmp_file = Tempfile.new(['', '.tif'])
|
20
|
+
tmp_file.write(read_with_processor(source))
|
21
|
+
tmp_file
|
22
|
+
end
|
19
23
|
|
20
|
-
|
21
|
-
|
22
|
-
|
24
|
+
# Cast instance of image
|
25
|
+
def self.read_with_processor(path)
|
26
|
+
File.read(path)
|
27
|
+
end
|
23
28
|
|
24
|
-
|
29
|
+
# Check if is a image
|
30
|
+
def self.image?(*)
|
31
|
+
end
|
32
|
+
end
|
25
33
|
end
|
26
34
|
end
|
data/lib/processors/rmagick.rb
CHANGED
@@ -1,35 +1,46 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
#
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
2
|
+
# RTesseract class
|
3
|
+
class RTesseract
|
4
|
+
# Processor Module
|
5
|
+
module Processor
|
6
|
+
# Add to rtesseract a image manipulation with RMagick
|
7
|
+
module RMagickProcessor
|
8
|
+
# Setup Processor
|
9
|
+
def self.setup
|
10
|
+
require 'rmagick'
|
11
|
+
rescue LoadError
|
12
|
+
# :nocov:
|
13
|
+
require 'RMagick'
|
14
|
+
# :nocov:
|
15
|
+
end
|
11
16
|
|
12
|
-
|
13
|
-
|
14
|
-
|
17
|
+
# Check if is this Processor
|
18
|
+
def self.a_name?(name)
|
19
|
+
%w(rmagick RMagickProcessor).include?(name.to_s)
|
20
|
+
end
|
15
21
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
22
|
+
# Convert Image to Tiff
|
23
|
+
def self.image_to_tif(source, points = {})
|
24
|
+
tmp_file = Tempfile.new(['', '.tif'])
|
25
|
+
cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
|
26
|
+
cat.crop!(points[:x], points[:y], points[:w], points[:h]) if points.is_a?(Hash) && points.values.compact != []
|
27
|
+
cat.alpha Magick::DeactivateAlphaChannel
|
28
|
+
cat.write(tmp_file.path.to_s) do
|
29
|
+
# self.depth = 16
|
30
|
+
self.compression = Magick::NoCompression
|
31
|
+
end
|
32
|
+
tmp_file
|
33
|
+
end
|
27
34
|
|
28
|
-
|
29
|
-
|
30
|
-
|
35
|
+
# Cast instance of image
|
36
|
+
def self.read_with_processor(path)
|
37
|
+
Magick::Image.read(path.to_s).first
|
38
|
+
end
|
31
39
|
|
32
|
-
|
33
|
-
|
40
|
+
# Check if is a RMagick image
|
41
|
+
def self.image?(object)
|
42
|
+
object.class == Magick::Image
|
43
|
+
end
|
44
|
+
end
|
34
45
|
end
|
35
46
|
end
|
data/lib/rtesseract.rb
CHANGED
@@ -1,18 +1,10 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
require 'pathname'
|
3
3
|
require 'tempfile'
|
4
|
-
require 'utils'
|
5
4
|
|
5
|
+
require 'rtesseract/utils'
|
6
6
|
require 'rtesseract/configuration'
|
7
7
|
require 'rtesseract/errors'
|
8
|
-
require 'rtesseract/mixed'
|
9
|
-
require 'rtesseract/box'
|
10
|
-
require 'rtesseract/box_char'
|
11
|
-
|
12
|
-
# Processors
|
13
|
-
require 'processors/rmagick.rb'
|
14
|
-
require 'processors/mini_magick.rb'
|
15
|
-
require 'processors/none.rb'
|
16
8
|
|
17
9
|
# Ruby wrapper for Tesseract OCR
|
18
10
|
class RTesseract
|
@@ -23,58 +15,30 @@ class RTesseract
|
|
23
15
|
def initialize(src = '', options = {})
|
24
16
|
self.configuration = RTesseract.local_config(options)
|
25
17
|
@options = options || {}
|
26
|
-
@value
|
27
|
-
@
|
18
|
+
@value = nil
|
19
|
+
@points = {}
|
20
|
+
@processor = RTesseract::Processor.choose_processor!(configuration.processor)
|
28
21
|
@source = @processor.image?(src) ? src : Pathname.new(src)
|
29
22
|
initialize_hook
|
30
23
|
end
|
31
24
|
|
25
|
+
# Hook to end of initialize method
|
32
26
|
def initialize_hook
|
33
27
|
end
|
34
28
|
|
35
|
-
|
36
|
-
fail RTesseract::ImageNotSelectedError if src.nil?
|
37
|
-
processor = RTesseract.choose_processor!(options.option(:processor, nil))
|
38
|
-
image = processor.read_with_processor(src.to_s)
|
39
|
-
yield(image)
|
40
|
-
object = RTesseract.new('', options).from_blob(image.to_blob)
|
41
|
-
object
|
42
|
-
end
|
43
|
-
|
44
|
-
def read
|
45
|
-
image = @processor.read_with_processor(@source.to_s)
|
46
|
-
new_image = yield(image)
|
47
|
-
from_blob(new_image.to_blob, File.extname(@source.to_s))
|
48
|
-
self
|
49
|
-
end
|
50
|
-
|
29
|
+
# Define the source
|
51
30
|
def source=(src)
|
52
31
|
@value = nil
|
53
32
|
@source = @processor.image?(src) ? src : Pathname.new(src)
|
54
33
|
end
|
55
34
|
|
56
35
|
# Crop image to convert
|
57
|
-
def crop!(
|
36
|
+
def crop!(points = {})
|
58
37
|
@value = nil
|
59
|
-
@points =
|
38
|
+
@points = points
|
60
39
|
self
|
61
40
|
end
|
62
41
|
|
63
|
-
# Remove files
|
64
|
-
def remove_file(files = [])
|
65
|
-
files.each do |file|
|
66
|
-
if file.is_a?(Tempfile)
|
67
|
-
file.close
|
68
|
-
file.unlink
|
69
|
-
else
|
70
|
-
File.unlink(file)
|
71
|
-
end
|
72
|
-
end
|
73
|
-
true
|
74
|
-
rescue => error
|
75
|
-
raise RTesseract::TempFilesNotRemovedError.new(error: error, files: files)
|
76
|
-
end
|
77
|
-
|
78
42
|
# Select the language
|
79
43
|
# ===Languages
|
80
44
|
## * eng - English
|
@@ -88,58 +52,56 @@ class RTesseract
|
|
88
52
|
## * vie - Vietnamese
|
89
53
|
## Note: Make sure you have installed the language to tesseract
|
90
54
|
def lang
|
91
|
-
language =
|
92
|
-
LANGUAGES
|
93
|
-
|
94
|
-
end
|
95
|
-
return " -l #{language} " if language.size > 0
|
55
|
+
language = (configuration.lang || 'eng').to_s.strip.downcase
|
56
|
+
" -l #{LANGUAGES[language] || language} "
|
57
|
+
rescue
|
96
58
|
''
|
59
|
+
end
|
60
|
+
|
61
|
+
# Convert option to command
|
62
|
+
def option_to_string(prefix, value = nil)
|
63
|
+
(value.nil? ? '' : " #{prefix} #{value} ")
|
97
64
|
rescue
|
98
65
|
''
|
99
66
|
end
|
100
67
|
|
101
68
|
# Page Segment Mode
|
102
69
|
def psm
|
103
|
-
(
|
104
|
-
rescue
|
105
|
-
''
|
70
|
+
option_to_string('-psm', configuration.psm)
|
106
71
|
end
|
107
72
|
|
108
73
|
# Tessdata Dir
|
109
74
|
def tessdata_dir
|
110
|
-
(
|
111
|
-
rescue
|
112
|
-
''
|
75
|
+
option_to_string('--tessdata-dir', configuration.tessdata_dir)
|
113
76
|
end
|
114
77
|
|
115
78
|
# User Words
|
116
79
|
def user_words
|
117
|
-
(
|
118
|
-
rescue
|
119
|
-
''
|
80
|
+
option_to_string('--user-words', configuration.user_words)
|
120
81
|
end
|
121
82
|
|
122
83
|
# User Patterns
|
123
84
|
def user_patterns
|
124
|
-
(
|
125
|
-
rescue
|
126
|
-
''
|
85
|
+
option_to_string('--user-patterns', configuration.user_patterns)
|
127
86
|
end
|
128
87
|
|
129
88
|
# Options on line
|
130
89
|
def options_cmd
|
131
|
-
|
90
|
+
configuration.options_cmd
|
132
91
|
end
|
133
92
|
|
93
|
+
# Hook to before config
|
134
94
|
def config_hook
|
135
95
|
end
|
136
96
|
|
97
|
+
# Convert configurations
|
137
98
|
def config
|
138
99
|
@options ||= {}
|
139
100
|
config_hook
|
140
101
|
@options.map { |k, v| "#{k} #{v}" }.join("\n")
|
141
102
|
end
|
142
103
|
|
104
|
+
# Write config to file
|
143
105
|
def config_file
|
144
106
|
config_hook
|
145
107
|
return '' if @options == {}
|
@@ -151,34 +113,41 @@ class RTesseract
|
|
151
113
|
|
152
114
|
# TODO: Clear console for MacOS or Windows
|
153
115
|
def clear_console_output
|
154
|
-
return '' if
|
116
|
+
return '' if configuration.debug
|
155
117
|
return '2>/dev/null' if File.exist?('/dev/null') # Linux console clear
|
156
118
|
end
|
157
119
|
|
120
|
+
# Get image
|
158
121
|
def image
|
159
122
|
(@image = @processor.image_to_tif(@source, @points)).path
|
160
123
|
end
|
161
124
|
|
125
|
+
# Extension of file
|
162
126
|
def file_ext
|
163
127
|
'.txt'
|
164
128
|
end
|
165
129
|
|
130
|
+
# Rand file path
|
166
131
|
def text_file
|
167
132
|
@text_file = Pathname.new(Dir.tmpdir).join("#{Time.now.to_f}#{rand(1500)}").to_s
|
168
133
|
end
|
169
134
|
|
135
|
+
# Full path of file with extension
|
170
136
|
def text_file_with_ext(ext = nil)
|
171
137
|
[@text_file, ext || file_ext].join('')
|
172
138
|
end
|
173
139
|
|
140
|
+
# Run command
|
174
141
|
def convert_command
|
175
|
-
`#{
|
142
|
+
`#{configuration.command} "#{image}" "#{text_file}" #{lang} #{psm} #{tessdata_dir} #{user_words} #{user_patterns} #{config_file} #{clear_console_output} #{configuration.options_cmd.join(' ')}`
|
176
143
|
end
|
177
144
|
|
145
|
+
# Read result file
|
178
146
|
def convert_text
|
179
147
|
@value = File.read(text_file_with_ext).to_s
|
180
148
|
end
|
181
149
|
|
150
|
+
# Hook to convert
|
182
151
|
def after_convert_hook
|
183
152
|
end
|
184
153
|
|
@@ -187,21 +156,7 @@ class RTesseract
|
|
187
156
|
convert_command
|
188
157
|
after_convert_hook
|
189
158
|
convert_text
|
190
|
-
|
191
|
-
rescue => error
|
192
|
-
raise RTesseract::ConversionError.new(error), error, caller
|
193
|
-
end
|
194
|
-
|
195
|
-
# Read image from memory blob
|
196
|
-
def from_blob(blob, ext = '')
|
197
|
-
blob_file = Tempfile.new(['blob', ext], encoding: 'ascii-8bit')
|
198
|
-
blob_file.binmode.write(blob)
|
199
|
-
blob_file.rewind
|
200
|
-
blob_file.flush
|
201
|
-
self.source = blob_file.path
|
202
|
-
convert
|
203
|
-
remove_file([blob_file])
|
204
|
-
self
|
159
|
+
RTesseract::Utils.remove_files([@image, text_file_with_ext])
|
205
160
|
rescue => error
|
206
161
|
raise RTesseract::ConversionError.new(error), error, caller
|
207
162
|
end
|
@@ -220,19 +175,17 @@ class RTesseract
|
|
220
175
|
|
221
176
|
# Remove spaces and break-lines
|
222
177
|
def to_s_without_spaces
|
223
|
-
to_s.
|
224
|
-
end
|
225
|
-
|
226
|
-
def self.choose_processor!(processor)
|
227
|
-
processor =
|
228
|
-
if MiniMagickProcessor.a_name?(processor.to_s)
|
229
|
-
MiniMagickProcessor
|
230
|
-
elsif NoneProcessor.a_name?(processor.to_s)
|
231
|
-
NoneProcessor
|
232
|
-
else
|
233
|
-
RMagickProcessor
|
234
|
-
end
|
235
|
-
processor.setup
|
236
|
-
processor
|
178
|
+
to_s.delete(' ').delete("\n").delete("\r")
|
237
179
|
end
|
238
180
|
end
|
181
|
+
|
182
|
+
require 'rtesseract/mixed'
|
183
|
+
require 'rtesseract/box'
|
184
|
+
require 'rtesseract/box_char'
|
185
|
+
require 'rtesseract/blob'
|
186
|
+
require 'rtesseract/processor'
|
187
|
+
|
188
|
+
# Processors
|
189
|
+
require 'processors/rmagick.rb'
|
190
|
+
require 'processors/mini_magick.rb'
|
191
|
+
require 'processors/none.rb'
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# Blob methods
|
2
|
+
class RTesseract
|
3
|
+
# Read image from memory blob
|
4
|
+
def self.read(src = nil, options = {})
|
5
|
+
fail RTesseract::ImageNotSelectedError if src.nil?
|
6
|
+
processor = RTesseract::Processor.choose_processor!(options.option(:processor, nil))
|
7
|
+
image = processor.read_with_processor(src.to_s)
|
8
|
+
yield(image)
|
9
|
+
object = RTesseract.new('', options).from_blob(image.to_blob)
|
10
|
+
object
|
11
|
+
end
|
12
|
+
|
13
|
+
# Read image from memory blob
|
14
|
+
def read
|
15
|
+
image = @processor.read_with_processor(@source.to_s)
|
16
|
+
new_image = yield(image)
|
17
|
+
from_blob(new_image.to_blob, File.extname(@source.to_s))
|
18
|
+
self
|
19
|
+
end
|
20
|
+
|
21
|
+
# Read image from memory blob
|
22
|
+
def from_blob(blob, ext = '')
|
23
|
+
blob_file = Tempfile.new(['blob', ext], encoding: 'ascii-8bit')
|
24
|
+
blob_file.binmode.write(blob)
|
25
|
+
blob_file.rewind
|
26
|
+
blob_file.flush
|
27
|
+
self.source = blob_file.path
|
28
|
+
convert
|
29
|
+
RTesseract::Utils.remove_files([blob_file])
|
30
|
+
self
|
31
|
+
rescue => error
|
32
|
+
raise RTesseract::ConversionError.new(error), error, caller
|
33
|
+
end
|
34
|
+
end
|
data/lib/rtesseract/box.rb
CHANGED
@@ -2,37 +2,45 @@
|
|
2
2
|
require 'nokogiri'
|
3
3
|
require 'fileutils'
|
4
4
|
|
5
|
+
# RTesseract
|
5
6
|
class RTesseract
|
6
7
|
# Class to read char positions from an image
|
7
8
|
class Box < RTesseract
|
9
|
+
# Setting value as blank array
|
8
10
|
def initialize_hook
|
9
|
-
@value
|
11
|
+
@value = []
|
10
12
|
end
|
11
13
|
|
14
|
+
# Aditional options to config file
|
12
15
|
def config_hook
|
13
16
|
@options['tessedit_create_hocr'] = 1 # Split Words configuration
|
14
17
|
end
|
15
18
|
|
19
|
+
# Words converted
|
16
20
|
def words
|
17
21
|
convert if @value == []
|
18
22
|
@value
|
19
23
|
end
|
20
24
|
|
25
|
+
# Extension of file
|
21
26
|
def file_ext
|
22
27
|
'.hocr'
|
23
28
|
end
|
24
29
|
|
30
|
+
# Read the result file
|
25
31
|
def parse_file
|
26
32
|
html = Nokogiri::HTML(File.read(text_file_with_ext))
|
27
33
|
html.css('span.ocrx_word, span.ocr_word')
|
28
34
|
end
|
29
35
|
|
36
|
+
# Return words to value
|
30
37
|
def convert_text
|
31
38
|
text_objects = []
|
32
39
|
parse_file.each { |word| text_objects << BoxParser.new(word).to_h }
|
33
40
|
@value = text_objects
|
34
41
|
end
|
35
42
|
|
43
|
+
# Move file html to hocr
|
36
44
|
def after_convert_hook
|
37
45
|
FileUtils.mv(text_file_with_ext('.html'), text_file_with_ext) rescue nil
|
38
46
|
end
|
@@ -56,6 +64,7 @@ class RTesseract
|
|
56
64
|
@attributes = title.gsub(';', '').split(' ')
|
57
65
|
end
|
58
66
|
|
67
|
+
# Hash of word and position
|
59
68
|
def to_h
|
60
69
|
{
|
61
70
|
word: @word.text,
|
data/lib/rtesseract/box_char.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
+
# RTesseract
|
2
3
|
class RTesseract
|
3
4
|
# Class to read char positions from an image
|
4
5
|
class BoxChar < Box
|
@@ -8,10 +9,12 @@ class RTesseract
|
|
8
9
|
|
9
10
|
alias_method :characters, :words
|
10
11
|
|
12
|
+
# Extension of file
|
11
13
|
def file_ext
|
12
14
|
'.box'
|
13
15
|
end
|
14
16
|
|
17
|
+
# Read the result file
|
15
18
|
def parse_file
|
16
19
|
File.read(text_file_with_ext).to_s
|
17
20
|
end
|
@@ -1,12 +1,16 @@
|
|
1
|
-
#
|
1
|
+
# RTesseract
|
2
2
|
class RTesseract
|
3
3
|
# Aliases to languages names
|
4
4
|
LANGUAGES = {
|
5
|
-
'
|
6
|
-
'
|
7
|
-
'
|
8
|
-
'
|
9
|
-
|
5
|
+
'en' => 'eng',
|
6
|
+
'en-us' => 'eng',
|
7
|
+
'english' => 'eng',
|
8
|
+
'pt' => 'por',
|
9
|
+
'pt-br' => 'por',
|
10
|
+
'portuguese' => 'por',
|
11
|
+
'it' => 'ita',
|
12
|
+
'sp' => 'spa'
|
13
|
+
}.freeze
|
10
14
|
|
11
15
|
# Configuration class
|
12
16
|
class Configuration
|
@@ -16,16 +20,19 @@ class RTesseract
|
|
16
20
|
@processor = 'rmagick'
|
17
21
|
end
|
18
22
|
|
23
|
+
# Global configuration
|
19
24
|
def parent
|
20
25
|
@parent ||= RTesseract.configuration || RTesseract::Configuration.new
|
21
26
|
end
|
22
27
|
|
28
|
+
# Set value of option
|
23
29
|
def option(options, name, default = nil)
|
24
30
|
self.instance_variable_set("@#{name}", options.option(name, parent.send(name)) || default)
|
25
31
|
end
|
26
32
|
|
33
|
+
# Return the values of options
|
27
34
|
def load_options(options, names = [])
|
28
|
-
names.each{ |name| option(options, name, nil) }
|
35
|
+
names.each { |name| option(options, name, nil) }
|
29
36
|
end
|
30
37
|
end
|
31
38
|
|
@@ -38,6 +45,7 @@ class RTesseract
|
|
38
45
|
yield(configuration)
|
39
46
|
end
|
40
47
|
|
48
|
+
# Default command
|
41
49
|
def self.default_command
|
42
50
|
TesseractBin::Executables[:tesseract] || 'tesseract'
|
43
51
|
rescue
|
@@ -49,7 +57,7 @@ class RTesseract
|
|
49
57
|
RTesseract::Configuration.new.tap do |config|
|
50
58
|
config.command = config.option(options, :command, RTesseract.default_command)
|
51
59
|
config.processor = config.option(options, :processor, 'rmagick')
|
52
|
-
config.load_options(options, [
|
60
|
+
config.load_options(options, [:lang, :psm, :tessdata_dir, :user_words, :user_patterns])
|
53
61
|
config.debug = config.option(options, :debug, false)
|
54
62
|
config.options_cmd = [options.option(:options, nil)].flatten.compact
|
55
63
|
end
|
data/lib/rtesseract/errors.rb
CHANGED
data/lib/rtesseract/mixed.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
+
# RTesseract
|
2
3
|
class RTesseract
|
3
4
|
# Class to read an image from specified areas
|
4
5
|
class Mixed
|
@@ -12,11 +13,13 @@ class RTesseract
|
|
12
13
|
yield self if block_given?
|
13
14
|
end
|
14
15
|
|
15
|
-
|
16
|
+
# Add areas
|
17
|
+
def area(points)
|
16
18
|
@value = ''
|
17
|
-
@areas <<
|
19
|
+
@areas << points
|
18
20
|
end
|
19
21
|
|
22
|
+
# Clear areas
|
20
23
|
def clear_areas
|
21
24
|
@areas = []
|
22
25
|
end
|
@@ -25,7 +28,7 @@ class RTesseract
|
|
25
28
|
def convert
|
26
29
|
@value = []
|
27
30
|
@areas.each_with_object(RTesseract.new(@source.to_s, @options.dup)) do |area, image|
|
28
|
-
image.crop!(area)
|
31
|
+
image.crop!(area)
|
29
32
|
@value << image.to_s
|
30
33
|
end
|
31
34
|
rescue => error
|
@@ -45,7 +48,7 @@ class RTesseract
|
|
45
48
|
|
46
49
|
# Remove spaces and break-lines
|
47
50
|
def to_s_without_spaces
|
48
|
-
to_s.
|
51
|
+
to_s.delete(' ').delete("\n").delete("\r")
|
49
52
|
end
|
50
53
|
end
|
51
54
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# RTesseract
|
2
|
+
class RTesseract
|
3
|
+
# Processor managment
|
4
|
+
module Processor
|
5
|
+
# Return the processor
|
6
|
+
def self.choose_processor!(processor)
|
7
|
+
processor =
|
8
|
+
if RTesseract::Processor::MiniMagickProcessor.a_name?(processor.to_s)
|
9
|
+
MiniMagickProcessor
|
10
|
+
elsif RTesseract::Processor::NoneProcessor.a_name?(processor.to_s)
|
11
|
+
NoneProcessor
|
12
|
+
else
|
13
|
+
RMagickProcessor
|
14
|
+
end
|
15
|
+
processor.setup
|
16
|
+
processor
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# RTesseract
|
2
|
+
class RTesseract
|
3
|
+
# Some utils methods
|
4
|
+
module Utils
|
5
|
+
# Remove files or Tempfile
|
6
|
+
def self.remove_files(files = [])
|
7
|
+
files.each do |file|
|
8
|
+
self.remove_file(file)
|
9
|
+
end
|
10
|
+
true
|
11
|
+
rescue => error
|
12
|
+
raise RTesseract::TempFilesNotRemovedError.new(error: error, files: files)
|
13
|
+
end
|
14
|
+
|
15
|
+
# Remove file or Tempfile
|
16
|
+
def self.remove_file(file)
|
17
|
+
if file.is_a?(Tempfile)
|
18
|
+
file.close
|
19
|
+
file.unlink
|
20
|
+
else
|
21
|
+
File.unlink(file)
|
22
|
+
end
|
23
|
+
true
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Hash
|
29
|
+
class Hash
|
30
|
+
# return the value and remove from hash
|
31
|
+
def option(attr_name, default)
|
32
|
+
delete(attr_name.to_s) || delete(attr_name) || default
|
33
|
+
end
|
34
|
+
end
|
data/rtesseract.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: rtesseract 2.0.
|
5
|
+
# stub: rtesseract 2.0.1 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "rtesseract"
|
9
|
-
s.version = "2.0.
|
9
|
+
s.version = "2.0.1"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Danilo Jeremias da Silva"]
|
14
|
-
s.date = "2016-
|
14
|
+
s.date = "2016-05-17"
|
15
15
|
s.description = "Ruby library for working with the Tesseract OCR."
|
16
16
|
s.email = "dannnylo@gmail.com"
|
17
17
|
s.extra_rdoc_files = [
|
@@ -22,6 +22,7 @@ Gem::Specification.new do |s|
|
|
22
22
|
".document",
|
23
23
|
".rspec",
|
24
24
|
".travis.yml",
|
25
|
+
"CHANGELOG.md",
|
25
26
|
"Gemfile",
|
26
27
|
"Gemfile.lock",
|
27
28
|
"LICENSE.txt",
|
@@ -32,13 +33,16 @@ Gem::Specification.new do |s|
|
|
32
33
|
"lib/processors/none.rb",
|
33
34
|
"lib/processors/rmagick.rb",
|
34
35
|
"lib/rtesseract.rb",
|
36
|
+
"lib/rtesseract/blob.rb",
|
35
37
|
"lib/rtesseract/box.rb",
|
36
38
|
"lib/rtesseract/box_char.rb",
|
37
39
|
"lib/rtesseract/configuration.rb",
|
38
40
|
"lib/rtesseract/errors.rb",
|
39
41
|
"lib/rtesseract/mixed.rb",
|
40
|
-
"lib/
|
42
|
+
"lib/rtesseract/processor.rb",
|
43
|
+
"lib/rtesseract/utils.rb",
|
41
44
|
"rtesseract.gemspec",
|
45
|
+
"spec/configs/eng.user-words.txt",
|
42
46
|
"spec/images/README.pdf",
|
43
47
|
"spec/images/blank.tif",
|
44
48
|
"spec/images/mixed.tif",
|
@@ -6,17 +6,7 @@ describe 'Rtesseract::BoxChar' do
|
|
6
6
|
@path = Pathname.new(__FILE__.gsub('rtesseract_box_char_spec.rb', '')).expand_path
|
7
7
|
@image_tiff = @path.join('images', 'test.tif').to_s
|
8
8
|
@words_image = @path.join('images', 'test_words.png').to_s
|
9
|
-
|
10
|
-
|
11
|
-
it 'bounding box by char' do
|
12
|
-
expect(RTesseract::BoxChar.new(@image_tiff).characters.is_a?(Array)).to eql(true)
|
13
|
-
expect(RTesseract::BoxChar.new(@image_tiff).characters).to eql([
|
14
|
-
{ char: '4', x_start: 145, y_start: 14, x_end: 159, y_end: 33 },
|
15
|
-
{ char: '3', x_start: 184, y_start: 14, x_end: 196, y_end: 33 },
|
16
|
-
{ char: 'X', x_start: 222, y_start: 14, x_end: 238, y_end: 32 },
|
17
|
-
{ char: 'F', x_start: 260, y_start: 14, x_end: 273, y_end: 32 }])
|
18
|
-
|
19
|
-
expect(RTesseract::BoxChar.new(@words_image).characters).to eql([
|
9
|
+
@values = [
|
20
10
|
{ char: 'I', x_start: 52, y_start: 91, x_end: 54, y_end: 104 },
|
21
11
|
{ char: 'f', x_start: 56, y_start: 91, x_end: 63, y_end: 105 },
|
22
12
|
{ char: 'y', x_start: 69, y_start: 87, x_end: 79, y_end: 101 },
|
@@ -72,7 +62,18 @@ describe 'Rtesseract::BoxChar' do
|
|
72
62
|
{ char: 'p', x_start: 228, y_start: 43, x_end: 237, y_end: 57 },
|
73
63
|
{ char: 'e', x_start: 238, y_start: 47, x_end: 248, y_end: 57 },
|
74
64
|
{ char: 'n', x_start: 250, y_start: 47, x_end: 258, y_end: 57 },
|
75
|
-
{ char: '.', x_start: 261, y_start: 47, x_end: 263, y_end: 49 }]
|
65
|
+
{ char: '.', x_start: 261, y_start: 47, x_end: 263, y_end: 49 }]
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'bounding box by char' do
|
69
|
+
expect(RTesseract::BoxChar.new(@image_tiff).characters.is_a?(Array)).to eql(true)
|
70
|
+
expect(RTesseract::BoxChar.new(@image_tiff).characters).to eql([
|
71
|
+
{ char: '4', x_start: 145, y_start: 14, x_end: 159, y_end: 33 },
|
72
|
+
{ char: '3', x_start: 184, y_start: 14, x_end: 196, y_end: 33 },
|
73
|
+
{ char: 'X', x_start: 222, y_start: 14, x_end: 238, y_end: 32 },
|
74
|
+
{ char: 'F', x_start: 260, y_start: 14, x_end: 273, y_end: 32 }])
|
75
|
+
|
76
|
+
expect(RTesseract::BoxChar.new(@words_image).characters).to eql(@values)
|
76
77
|
|
77
78
|
expect { RTesseract::BoxChar.new(@image_tiff, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError)
|
78
79
|
expect { RTesseract::BoxChar.new(@image_tiff + '_not_exist').to_s }.to raise_error(RTesseract::ImageNotSelectedError)
|
data/spec/rtesseract_spec.rb
CHANGED
@@ -81,6 +81,7 @@ describe 'Rtesseract' do
|
|
81
81
|
expect(RTesseract.new(@image_tif, lang: 'eng').to_s_without_spaces).to eql('43XF')
|
82
82
|
|
83
83
|
expect(RTesseract.new(@image_tif, lang: 'eng').lang).to eql(' -l eng ')
|
84
|
+
expect(RTesseract.new(@image_tif, lang: 'it').lang).to eql(' -l ita ')
|
84
85
|
|
85
86
|
# Invalid lang object
|
86
87
|
expect(RTesseract.new(@image_tif, lang: MakeStringError.new).lang).to eql('')
|
@@ -98,6 +99,7 @@ describe 'Rtesseract' do
|
|
98
99
|
expect(RTesseract.new(@image_tif, chop_enable: 0).config).to eql('chop_enable 0')
|
99
100
|
expect(RTesseract.new(@image_tif, chop_enable: 0, enable_assoc: 0).config).to eql("chop_enable 0\nenable_assoc 0")
|
100
101
|
expect(RTesseract.new(@image_tif, chop_enable: 0).to_s_without_spaces).to eql('43XF')
|
102
|
+
expect(RTesseract.new(@image_tif, tessedit_char_whitelist: "ABCDEF12345").to_s_without_spaces).to eql('43F')
|
101
103
|
end
|
102
104
|
|
103
105
|
it ' crop image' do
|
@@ -121,14 +123,14 @@ describe 'Rtesseract' do
|
|
121
123
|
|
122
124
|
it ' use a instance' do
|
123
125
|
expect(RTesseract.new(Magick::Image.read(@image_tif.to_s).first).to_s_without_spaces).to eql('43XF')
|
124
|
-
expect(RMagickProcessor.a_name?('teste')).to eql(false)
|
125
|
-
expect(RMagickProcessor.a_name?('rmagick')).to eql(true)
|
126
|
-
expect(RMagickProcessor.a_name?('RMagickProcessor')).to eql(true)
|
127
|
-
expect(MiniMagickProcessor.a_name?('teste')).to eql(false)
|
128
|
-
expect(MiniMagickProcessor.a_name?('mini_magick')).to eql(true)
|
129
|
-
expect(MiniMagickProcessor.a_name?('MiniMagickProcessor')).to eql(true)
|
130
|
-
expect(NoneProcessor.a_name?('none')).to eql(true)
|
131
|
-
expect(NoneProcessor.a_name?('NoneProcessor')).to eql(true)
|
126
|
+
expect(RTesseract::Processor::RMagickProcessor.a_name?('teste')).to eql(false)
|
127
|
+
expect(RTesseract::Processor::RMagickProcessor.a_name?('rmagick')).to eql(true)
|
128
|
+
expect(RTesseract::Processor::RMagickProcessor.a_name?('RMagickProcessor')).to eql(true)
|
129
|
+
expect(RTesseract::Processor::MiniMagickProcessor.a_name?('teste')).to eql(false)
|
130
|
+
expect(RTesseract::Processor::MiniMagickProcessor.a_name?('mini_magick')).to eql(true)
|
131
|
+
expect(RTesseract::Processor::MiniMagickProcessor.a_name?('MiniMagickProcessor')).to eql(true)
|
132
|
+
expect(RTesseract::Processor::NoneProcessor.a_name?('none')).to eql(true)
|
133
|
+
expect(RTesseract::Processor::NoneProcessor.a_name?('NoneProcessor')).to eql(true)
|
132
134
|
end
|
133
135
|
|
134
136
|
it ' change image in a block' do
|
@@ -172,10 +174,9 @@ describe 'Rtesseract' do
|
|
172
174
|
end
|
173
175
|
|
174
176
|
it 'remove a file' do
|
175
|
-
|
176
|
-
rtesseract.remove_file(Tempfile.new('config'))
|
177
|
+
RTesseract::Utils.remove_files(Tempfile.new('config'))
|
177
178
|
|
178
|
-
expect {
|
179
|
+
expect { RTesseract::Utils.remove_files(Pathname.new(Dir.tmpdir).join('test_not_exists')) }.to raise_error(RTesseract::TempFilesNotRemovedError)
|
179
180
|
end
|
180
181
|
|
181
182
|
it ' support default config processors' do
|
@@ -201,7 +202,6 @@ describe 'Rtesseract' do
|
|
201
202
|
RTesseract.configure { |config| config.psm = 7 }
|
202
203
|
expect(RTesseract.new(@image_tif).psm).to eql(' -psm 7 ')
|
203
204
|
|
204
|
-
|
205
205
|
RTesseract.configure { |config| config.tessdata_dir = '/tmp/test' }
|
206
206
|
expect(RTesseract.new(@image_tif).tessdata_dir).to eql(' --tessdata-dir /tmp/test ')
|
207
207
|
|
@@ -220,5 +220,7 @@ describe 'Rtesseract' do
|
|
220
220
|
expect(RTesseract.new(@image_tif, tessdata_dir: MakeStringError.new).tessdata_dir).to eql('')
|
221
221
|
expect(RTesseract.new(@image_tif, user_words: MakeStringError.new).user_words).to eql('')
|
222
222
|
expect(RTesseract.new(@image_tif, user_patterns: MakeStringError.new).user_patterns).to eql('')
|
223
|
+
|
224
|
+
# expect(RTesseract.new(@path.join('images', 'test_words.png').to_s, psm: 3, user_words: @path.join('configs', 'eng.user-words.txt').to_s).to_s).to eql("If you are a friend,\nyou speak the password,\nand the doors will open.\n\n")
|
223
225
|
end
|
224
226
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rtesseract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danilo Jeremias da Silva
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-05-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -119,6 +119,7 @@ files:
|
|
119
119
|
- ".document"
|
120
120
|
- ".rspec"
|
121
121
|
- ".travis.yml"
|
122
|
+
- CHANGELOG.md
|
122
123
|
- Gemfile
|
123
124
|
- Gemfile.lock
|
124
125
|
- LICENSE.txt
|
@@ -129,13 +130,16 @@ files:
|
|
129
130
|
- lib/processors/none.rb
|
130
131
|
- lib/processors/rmagick.rb
|
131
132
|
- lib/rtesseract.rb
|
133
|
+
- lib/rtesseract/blob.rb
|
132
134
|
- lib/rtesseract/box.rb
|
133
135
|
- lib/rtesseract/box_char.rb
|
134
136
|
- lib/rtesseract/configuration.rb
|
135
137
|
- lib/rtesseract/errors.rb
|
136
138
|
- lib/rtesseract/mixed.rb
|
137
|
-
- lib/
|
139
|
+
- lib/rtesseract/processor.rb
|
140
|
+
- lib/rtesseract/utils.rb
|
138
141
|
- rtesseract.gemspec
|
142
|
+
- spec/configs/eng.user-words.txt
|
139
143
|
- spec/images/README.pdf
|
140
144
|
- spec/images/blank.tif
|
141
145
|
- spec/images/mixed.tif
|