rtesseract 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -0
- data/CHANGELOG.md +22 -0
- data/Gemfile.lock +6 -20
- data/VERSION +1 -1
- data/lib/processors/mini_magick.rb +37 -26
- data/lib/processors/none.rb +27 -19
- data/lib/processors/rmagick.rb +39 -28
- data/lib/rtesseract.rb +46 -93
- data/lib/rtesseract/blob.rb +34 -0
- data/lib/rtesseract/box.rb +10 -1
- data/lib/rtesseract/box_char.rb +3 -0
- data/lib/rtesseract/configuration.rb +16 -8
- data/lib/rtesseract/errors.rb +1 -0
- data/lib/rtesseract/mixed.rb +7 -4
- data/lib/rtesseract/processor.rb +19 -0
- data/lib/rtesseract/utils.rb +34 -0
- data/rtesseract.gemspec +8 -4
- data/spec/configs/eng.user-words.txt +13 -0
- data/spec/rtesseract_box_char_spec.rb +13 -12
- data/spec/rtesseract_spec.rb +14 -12
- metadata +7 -3
- data/lib/utils.rb +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6eae58279cf744227e79b7bbc9180f7aea852547
|
4
|
+
data.tar.gz: 3836aa96d24b7f1a0b957cf803553f547cc33544
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0ef57359c7c7f43094a50838b6d29d28d7808c9cadd8f2b8514c613be030161f8d640c41ba3d403c00fb59fdf85ffcbc57795f6c65b8418ad348eb1a6c07e901
|
7
|
+
data.tar.gz: ff5f0f94c8039bd0b38b0c9ec2618b4c38b07b9707e28ff29a3bb943abc85d5afaa543dfba1ba2b9e565d056ea558eda9b7f6d222a6adb43614cd86c6e8fdcac
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
## v2.0.1
|
2
|
+
|
3
|
+
#### Changed
|
4
|
+
|
5
|
+
* Refactoring of some small classes
|
6
|
+
|
7
|
+
## v2.0.0
|
8
|
+
|
9
|
+
#### Added
|
10
|
+
|
11
|
+
* Support to options --tessdata-dir, --user-words and --user-patterns
|
12
|
+
* Ruby 2.3.0 to travis tests.
|
13
|
+
|
14
|
+
#### Changed
|
15
|
+
|
16
|
+
* Refactoring of some classes
|
17
|
+
* Crop options is a hash with x,y,w,h keys.
|
18
|
+
* Areas of RTesseract::Mixed now changed :width to :w and :height to :h.
|
19
|
+
|
20
|
+
#### Removed
|
21
|
+
|
22
|
+
* Support to quick_magick gem.
|
data/Gemfile.lock
CHANGED
@@ -3,9 +3,8 @@ GEM
|
|
3
3
|
specs:
|
4
4
|
addressable (2.4.0)
|
5
5
|
builder (3.2.2)
|
6
|
-
coveralls (0.8.
|
6
|
+
coveralls (0.8.13)
|
7
7
|
json (~> 1.8)
|
8
|
-
rest-client (>= 1.6.8, < 2)
|
9
8
|
simplecov (~> 0.11.0)
|
10
9
|
term-ansicolor (~> 1.3)
|
11
10
|
thor (~> 0.19.1)
|
@@ -14,8 +13,6 @@ GEM
|
|
14
13
|
thread_safe (~> 0.3, >= 0.3.1)
|
15
14
|
diff-lcs (1.2.5)
|
16
15
|
docile (1.1.5)
|
17
|
-
domain_name (0.5.25)
|
18
|
-
unf (>= 0.0.5, < 1.0.0)
|
19
16
|
faraday (0.9.2)
|
20
17
|
multipart-post (>= 1.2, < 3)
|
21
18
|
git (1.3.0)
|
@@ -28,8 +25,6 @@ GEM
|
|
28
25
|
oauth2
|
29
26
|
hashie (3.4.3)
|
30
27
|
highline (1.7.8)
|
31
|
-
http-cookie (1.0.2)
|
32
|
-
domain_name (~> 0.5)
|
33
28
|
jeweler (2.1.1)
|
34
29
|
builder
|
35
30
|
bundler (>= 1.0)
|
@@ -42,13 +37,11 @@ GEM
|
|
42
37
|
semver
|
43
38
|
json (1.8.3)
|
44
39
|
jwt (1.5.1)
|
45
|
-
|
46
|
-
mini_magick (4.3.6)
|
40
|
+
mini_magick (4.5.1)
|
47
41
|
mini_portile2 (2.0.0)
|
48
42
|
multi_json (1.11.2)
|
49
43
|
multi_xml (0.5.5)
|
50
44
|
multipart-post (2.0.0)
|
51
|
-
netrc (0.11.0)
|
52
45
|
nokogiri (1.6.7.2)
|
53
46
|
mini_portile2 (~> 2.0.0.rc2)
|
54
47
|
oauth2 (1.1.0)
|
@@ -61,26 +54,22 @@ GEM
|
|
61
54
|
rake (11.1.2)
|
62
55
|
rdoc (4.2.2)
|
63
56
|
json (~> 1.4)
|
64
|
-
rest-client (1.8.0)
|
65
|
-
http-cookie (>= 1.0.2, < 2.0)
|
66
|
-
mime-types (>= 1.16, < 3.0)
|
67
|
-
netrc (~> 0.7)
|
68
57
|
rmagick (2.15.4)
|
69
58
|
rspec (3.4.0)
|
70
59
|
rspec-core (~> 3.4.0)
|
71
60
|
rspec-expectations (~> 3.4.0)
|
72
61
|
rspec-mocks (~> 3.4.0)
|
73
|
-
rspec-core (3.4.
|
62
|
+
rspec-core (3.4.4)
|
74
63
|
rspec-support (~> 3.4.0)
|
75
64
|
rspec-expectations (3.4.0)
|
76
65
|
diff-lcs (>= 1.2.0, < 2.0)
|
77
66
|
rspec-support (~> 3.4.0)
|
78
|
-
rspec-mocks (3.4.
|
67
|
+
rspec-mocks (3.4.1)
|
79
68
|
diff-lcs (>= 1.2.0, < 2.0)
|
80
69
|
rspec-support (~> 3.4.0)
|
81
70
|
rspec-support (3.4.1)
|
82
71
|
semver (1.0.1)
|
83
|
-
simplecov (0.11.
|
72
|
+
simplecov (0.11.2)
|
84
73
|
docile (~> 1.1.0)
|
85
74
|
json (~> 1.8)
|
86
75
|
simplecov-html (~> 0.10.0)
|
@@ -90,9 +79,6 @@ GEM
|
|
90
79
|
thor (0.19.1)
|
91
80
|
thread_safe (0.3.5)
|
92
81
|
tins (1.6.0)
|
93
|
-
unf (0.1.4)
|
94
|
-
unf_ext
|
95
|
-
unf_ext (0.0.7.1)
|
96
82
|
|
97
83
|
PLATFORMS
|
98
84
|
ruby
|
@@ -109,4 +95,4 @@ DEPENDENCIES
|
|
109
95
|
simplecov
|
110
96
|
|
111
97
|
BUNDLED WITH
|
112
|
-
1.
|
98
|
+
1.11.2
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.0.
|
1
|
+
2.0.1
|
@@ -1,32 +1,43 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
#
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
2
|
+
# RTesseract class
|
3
|
+
class RTesseract
|
4
|
+
# Processor Module
|
5
|
+
module Processor
|
6
|
+
# Add to rtesseract a image manipulation with MiniMagick
|
7
|
+
module MiniMagickProcessor
|
8
|
+
# Setup Processor
|
9
|
+
def self.setup
|
10
|
+
require 'mini_magick'
|
11
|
+
end
|
7
12
|
|
8
|
-
|
9
|
-
|
10
|
-
|
13
|
+
# Check if is this Processor
|
14
|
+
def self.a_name?(name)
|
15
|
+
%w(mini_magick MiniMagickProcessor).include?(name.to_s)
|
16
|
+
end
|
11
17
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
18
|
+
# Convert Image to Tiff
|
19
|
+
def self.image_to_tif(source, points = {})
|
20
|
+
tmp_file = Tempfile.new(['', '.tif'])
|
21
|
+
cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
|
22
|
+
cat.format('tif') do |c|
|
23
|
+
c.compress 'None'
|
24
|
+
c.alpha 'off'
|
25
|
+
end
|
26
|
+
cat.crop("#{points[:w]}x#{points[:h]}+#{points[:x]}+#{points[:y]}") if points.is_a?(Hash) && points.values.compact != []
|
27
|
+
cat.alpha 'off'
|
28
|
+
cat.write tmp_file.path.to_s
|
29
|
+
tmp_file
|
30
|
+
end
|
24
31
|
|
25
|
-
|
26
|
-
|
27
|
-
|
32
|
+
# Cast instance of image
|
33
|
+
def self.read_with_processor(path)
|
34
|
+
MiniMagick::Image.open(path.to_s)
|
35
|
+
end
|
28
36
|
|
29
|
-
|
30
|
-
|
37
|
+
# Check if is a MiniMagick image
|
38
|
+
def self.image?(object)
|
39
|
+
object.class == MiniMagick::Image
|
40
|
+
end
|
41
|
+
end
|
31
42
|
end
|
32
|
-
end
|
43
|
+
end
|
data/lib/processors/none.rb
CHANGED
@@ -1,26 +1,34 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
#
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
2
|
+
# RTesseract class
|
3
|
+
class RTesseract
|
4
|
+
# Processor Module
|
5
|
+
module Processor
|
6
|
+
# Add to rtesseract a image without manipulation
|
7
|
+
module NoneProcessor
|
8
|
+
# Setup Processor
|
9
|
+
def self.setup
|
10
|
+
end
|
10
11
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
end
|
12
|
+
# Check if is this Processor
|
13
|
+
def self.a_name?(name)
|
14
|
+
%w(none NoneProcessor).include?(name.to_s)
|
15
|
+
end
|
16
16
|
|
17
|
-
|
18
|
-
|
17
|
+
# Convert Image to Tiff
|
18
|
+
def self.image_to_tif(source, _points = {})
|
19
|
+
tmp_file = Tempfile.new(['', '.tif'])
|
20
|
+
tmp_file.write(read_with_processor(source))
|
21
|
+
tmp_file
|
22
|
+
end
|
19
23
|
|
20
|
-
|
21
|
-
|
22
|
-
|
24
|
+
# Cast instance of image
|
25
|
+
def self.read_with_processor(path)
|
26
|
+
File.read(path)
|
27
|
+
end
|
23
28
|
|
24
|
-
|
29
|
+
# Check if is a image
|
30
|
+
def self.image?(*)
|
31
|
+
end
|
32
|
+
end
|
25
33
|
end
|
26
34
|
end
|
data/lib/processors/rmagick.rb
CHANGED
@@ -1,35 +1,46 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
#
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
2
|
+
# RTesseract class
|
3
|
+
class RTesseract
|
4
|
+
# Processor Module
|
5
|
+
module Processor
|
6
|
+
# Add to rtesseract a image manipulation with RMagick
|
7
|
+
module RMagickProcessor
|
8
|
+
# Setup Processor
|
9
|
+
def self.setup
|
10
|
+
require 'rmagick'
|
11
|
+
rescue LoadError
|
12
|
+
# :nocov:
|
13
|
+
require 'RMagick'
|
14
|
+
# :nocov:
|
15
|
+
end
|
11
16
|
|
12
|
-
|
13
|
-
|
14
|
-
|
17
|
+
# Check if is this Processor
|
18
|
+
def self.a_name?(name)
|
19
|
+
%w(rmagick RMagickProcessor).include?(name.to_s)
|
20
|
+
end
|
15
21
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
22
|
+
# Convert Image to Tiff
|
23
|
+
def self.image_to_tif(source, points = {})
|
24
|
+
tmp_file = Tempfile.new(['', '.tif'])
|
25
|
+
cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
|
26
|
+
cat.crop!(points[:x], points[:y], points[:w], points[:h]) if points.is_a?(Hash) && points.values.compact != []
|
27
|
+
cat.alpha Magick::DeactivateAlphaChannel
|
28
|
+
cat.write(tmp_file.path.to_s) do
|
29
|
+
# self.depth = 16
|
30
|
+
self.compression = Magick::NoCompression
|
31
|
+
end
|
32
|
+
tmp_file
|
33
|
+
end
|
27
34
|
|
28
|
-
|
29
|
-
|
30
|
-
|
35
|
+
# Cast instance of image
|
36
|
+
def self.read_with_processor(path)
|
37
|
+
Magick::Image.read(path.to_s).first
|
38
|
+
end
|
31
39
|
|
32
|
-
|
33
|
-
|
40
|
+
# Check if is a RMagick image
|
41
|
+
def self.image?(object)
|
42
|
+
object.class == Magick::Image
|
43
|
+
end
|
44
|
+
end
|
34
45
|
end
|
35
46
|
end
|
data/lib/rtesseract.rb
CHANGED
@@ -1,18 +1,10 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
require 'pathname'
|
3
3
|
require 'tempfile'
|
4
|
-
require 'utils'
|
5
4
|
|
5
|
+
require 'rtesseract/utils'
|
6
6
|
require 'rtesseract/configuration'
|
7
7
|
require 'rtesseract/errors'
|
8
|
-
require 'rtesseract/mixed'
|
9
|
-
require 'rtesseract/box'
|
10
|
-
require 'rtesseract/box_char'
|
11
|
-
|
12
|
-
# Processors
|
13
|
-
require 'processors/rmagick.rb'
|
14
|
-
require 'processors/mini_magick.rb'
|
15
|
-
require 'processors/none.rb'
|
16
8
|
|
17
9
|
# Ruby wrapper for Tesseract OCR
|
18
10
|
class RTesseract
|
@@ -23,58 +15,30 @@ class RTesseract
|
|
23
15
|
def initialize(src = '', options = {})
|
24
16
|
self.configuration = RTesseract.local_config(options)
|
25
17
|
@options = options || {}
|
26
|
-
@value
|
27
|
-
@
|
18
|
+
@value = nil
|
19
|
+
@points = {}
|
20
|
+
@processor = RTesseract::Processor.choose_processor!(configuration.processor)
|
28
21
|
@source = @processor.image?(src) ? src : Pathname.new(src)
|
29
22
|
initialize_hook
|
30
23
|
end
|
31
24
|
|
25
|
+
# Hook to end of initialize method
|
32
26
|
def initialize_hook
|
33
27
|
end
|
34
28
|
|
35
|
-
|
36
|
-
fail RTesseract::ImageNotSelectedError if src.nil?
|
37
|
-
processor = RTesseract.choose_processor!(options.option(:processor, nil))
|
38
|
-
image = processor.read_with_processor(src.to_s)
|
39
|
-
yield(image)
|
40
|
-
object = RTesseract.new('', options).from_blob(image.to_blob)
|
41
|
-
object
|
42
|
-
end
|
43
|
-
|
44
|
-
def read
|
45
|
-
image = @processor.read_with_processor(@source.to_s)
|
46
|
-
new_image = yield(image)
|
47
|
-
from_blob(new_image.to_blob, File.extname(@source.to_s))
|
48
|
-
self
|
49
|
-
end
|
50
|
-
|
29
|
+
# Define the source
|
51
30
|
def source=(src)
|
52
31
|
@value = nil
|
53
32
|
@source = @processor.image?(src) ? src : Pathname.new(src)
|
54
33
|
end
|
55
34
|
|
56
35
|
# Crop image to convert
|
57
|
-
def crop!(
|
36
|
+
def crop!(points = {})
|
58
37
|
@value = nil
|
59
|
-
@points =
|
38
|
+
@points = points
|
60
39
|
self
|
61
40
|
end
|
62
41
|
|
63
|
-
# Remove files
|
64
|
-
def remove_file(files = [])
|
65
|
-
files.each do |file|
|
66
|
-
if file.is_a?(Tempfile)
|
67
|
-
file.close
|
68
|
-
file.unlink
|
69
|
-
else
|
70
|
-
File.unlink(file)
|
71
|
-
end
|
72
|
-
end
|
73
|
-
true
|
74
|
-
rescue => error
|
75
|
-
raise RTesseract::TempFilesNotRemovedError.new(error: error, files: files)
|
76
|
-
end
|
77
|
-
|
78
42
|
# Select the language
|
79
43
|
# ===Languages
|
80
44
|
## * eng - English
|
@@ -88,58 +52,56 @@ class RTesseract
|
|
88
52
|
## * vie - Vietnamese
|
89
53
|
## Note: Make sure you have installed the language to tesseract
|
90
54
|
def lang
|
91
|
-
language =
|
92
|
-
LANGUAGES
|
93
|
-
|
94
|
-
end
|
95
|
-
return " -l #{language} " if language.size > 0
|
55
|
+
language = (configuration.lang || 'eng').to_s.strip.downcase
|
56
|
+
" -l #{LANGUAGES[language] || language} "
|
57
|
+
rescue
|
96
58
|
''
|
59
|
+
end
|
60
|
+
|
61
|
+
# Convert option to command
|
62
|
+
def option_to_string(prefix, value = nil)
|
63
|
+
(value.nil? ? '' : " #{prefix} #{value} ")
|
97
64
|
rescue
|
98
65
|
''
|
99
66
|
end
|
100
67
|
|
101
68
|
# Page Segment Mode
|
102
69
|
def psm
|
103
|
-
(
|
104
|
-
rescue
|
105
|
-
''
|
70
|
+
option_to_string('-psm', configuration.psm)
|
106
71
|
end
|
107
72
|
|
108
73
|
# Tessdata Dir
|
109
74
|
def tessdata_dir
|
110
|
-
(
|
111
|
-
rescue
|
112
|
-
''
|
75
|
+
option_to_string('--tessdata-dir', configuration.tessdata_dir)
|
113
76
|
end
|
114
77
|
|
115
78
|
# User Words
|
116
79
|
def user_words
|
117
|
-
(
|
118
|
-
rescue
|
119
|
-
''
|
80
|
+
option_to_string('--user-words', configuration.user_words)
|
120
81
|
end
|
121
82
|
|
122
83
|
# User Patterns
|
123
84
|
def user_patterns
|
124
|
-
(
|
125
|
-
rescue
|
126
|
-
''
|
85
|
+
option_to_string('--user-patterns', configuration.user_patterns)
|
127
86
|
end
|
128
87
|
|
129
88
|
# Options on line
|
130
89
|
def options_cmd
|
131
|
-
|
90
|
+
configuration.options_cmd
|
132
91
|
end
|
133
92
|
|
93
|
+
# Hook to before config
|
134
94
|
def config_hook
|
135
95
|
end
|
136
96
|
|
97
|
+
# Convert configurations
|
137
98
|
def config
|
138
99
|
@options ||= {}
|
139
100
|
config_hook
|
140
101
|
@options.map { |k, v| "#{k} #{v}" }.join("\n")
|
141
102
|
end
|
142
103
|
|
104
|
+
# Write config to file
|
143
105
|
def config_file
|
144
106
|
config_hook
|
145
107
|
return '' if @options == {}
|
@@ -151,34 +113,41 @@ class RTesseract
|
|
151
113
|
|
152
114
|
# TODO: Clear console for MacOS or Windows
|
153
115
|
def clear_console_output
|
154
|
-
return '' if
|
116
|
+
return '' if configuration.debug
|
155
117
|
return '2>/dev/null' if File.exist?('/dev/null') # Linux console clear
|
156
118
|
end
|
157
119
|
|
120
|
+
# Get image
|
158
121
|
def image
|
159
122
|
(@image = @processor.image_to_tif(@source, @points)).path
|
160
123
|
end
|
161
124
|
|
125
|
+
# Extension of file
|
162
126
|
def file_ext
|
163
127
|
'.txt'
|
164
128
|
end
|
165
129
|
|
130
|
+
# Rand file path
|
166
131
|
def text_file
|
167
132
|
@text_file = Pathname.new(Dir.tmpdir).join("#{Time.now.to_f}#{rand(1500)}").to_s
|
168
133
|
end
|
169
134
|
|
135
|
+
# Full path of file with extension
|
170
136
|
def text_file_with_ext(ext = nil)
|
171
137
|
[@text_file, ext || file_ext].join('')
|
172
138
|
end
|
173
139
|
|
140
|
+
# Run command
|
174
141
|
def convert_command
|
175
|
-
`#{
|
142
|
+
`#{configuration.command} "#{image}" "#{text_file}" #{lang} #{psm} #{tessdata_dir} #{user_words} #{user_patterns} #{config_file} #{clear_console_output} #{configuration.options_cmd.join(' ')}`
|
176
143
|
end
|
177
144
|
|
145
|
+
# Read result file
|
178
146
|
def convert_text
|
179
147
|
@value = File.read(text_file_with_ext).to_s
|
180
148
|
end
|
181
149
|
|
150
|
+
# Hook to convert
|
182
151
|
def after_convert_hook
|
183
152
|
end
|
184
153
|
|
@@ -187,21 +156,7 @@ class RTesseract
|
|
187
156
|
convert_command
|
188
157
|
after_convert_hook
|
189
158
|
convert_text
|
190
|
-
|
191
|
-
rescue => error
|
192
|
-
raise RTesseract::ConversionError.new(error), error, caller
|
193
|
-
end
|
194
|
-
|
195
|
-
# Read image from memory blob
|
196
|
-
def from_blob(blob, ext = '')
|
197
|
-
blob_file = Tempfile.new(['blob', ext], encoding: 'ascii-8bit')
|
198
|
-
blob_file.binmode.write(blob)
|
199
|
-
blob_file.rewind
|
200
|
-
blob_file.flush
|
201
|
-
self.source = blob_file.path
|
202
|
-
convert
|
203
|
-
remove_file([blob_file])
|
204
|
-
self
|
159
|
+
RTesseract::Utils.remove_files([@image, text_file_with_ext])
|
205
160
|
rescue => error
|
206
161
|
raise RTesseract::ConversionError.new(error), error, caller
|
207
162
|
end
|
@@ -220,19 +175,17 @@ class RTesseract
|
|
220
175
|
|
221
176
|
# Remove spaces and break-lines
|
222
177
|
def to_s_without_spaces
|
223
|
-
to_s.
|
224
|
-
end
|
225
|
-
|
226
|
-
def self.choose_processor!(processor)
|
227
|
-
processor =
|
228
|
-
if MiniMagickProcessor.a_name?(processor.to_s)
|
229
|
-
MiniMagickProcessor
|
230
|
-
elsif NoneProcessor.a_name?(processor.to_s)
|
231
|
-
NoneProcessor
|
232
|
-
else
|
233
|
-
RMagickProcessor
|
234
|
-
end
|
235
|
-
processor.setup
|
236
|
-
processor
|
178
|
+
to_s.delete(' ').delete("\n").delete("\r")
|
237
179
|
end
|
238
180
|
end
|
181
|
+
|
182
|
+
require 'rtesseract/mixed'
|
183
|
+
require 'rtesseract/box'
|
184
|
+
require 'rtesseract/box_char'
|
185
|
+
require 'rtesseract/blob'
|
186
|
+
require 'rtesseract/processor'
|
187
|
+
|
188
|
+
# Processors
|
189
|
+
require 'processors/rmagick.rb'
|
190
|
+
require 'processors/mini_magick.rb'
|
191
|
+
require 'processors/none.rb'
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# Blob methods
|
2
|
+
class RTesseract
|
3
|
+
# Read image from memory blob
|
4
|
+
def self.read(src = nil, options = {})
|
5
|
+
fail RTesseract::ImageNotSelectedError if src.nil?
|
6
|
+
processor = RTesseract::Processor.choose_processor!(options.option(:processor, nil))
|
7
|
+
image = processor.read_with_processor(src.to_s)
|
8
|
+
yield(image)
|
9
|
+
object = RTesseract.new('', options).from_blob(image.to_blob)
|
10
|
+
object
|
11
|
+
end
|
12
|
+
|
13
|
+
# Read image from memory blob
|
14
|
+
def read
|
15
|
+
image = @processor.read_with_processor(@source.to_s)
|
16
|
+
new_image = yield(image)
|
17
|
+
from_blob(new_image.to_blob, File.extname(@source.to_s))
|
18
|
+
self
|
19
|
+
end
|
20
|
+
|
21
|
+
# Read image from memory blob
|
22
|
+
def from_blob(blob, ext = '')
|
23
|
+
blob_file = Tempfile.new(['blob', ext], encoding: 'ascii-8bit')
|
24
|
+
blob_file.binmode.write(blob)
|
25
|
+
blob_file.rewind
|
26
|
+
blob_file.flush
|
27
|
+
self.source = blob_file.path
|
28
|
+
convert
|
29
|
+
RTesseract::Utils.remove_files([blob_file])
|
30
|
+
self
|
31
|
+
rescue => error
|
32
|
+
raise RTesseract::ConversionError.new(error), error, caller
|
33
|
+
end
|
34
|
+
end
|
data/lib/rtesseract/box.rb
CHANGED
@@ -2,37 +2,45 @@
|
|
2
2
|
require 'nokogiri'
|
3
3
|
require 'fileutils'
|
4
4
|
|
5
|
+
# RTesseract
|
5
6
|
class RTesseract
|
6
7
|
# Class to read char positions from an image
|
7
8
|
class Box < RTesseract
|
9
|
+
# Setting value as blank array
|
8
10
|
def initialize_hook
|
9
|
-
@value
|
11
|
+
@value = []
|
10
12
|
end
|
11
13
|
|
14
|
+
# Aditional options to config file
|
12
15
|
def config_hook
|
13
16
|
@options['tessedit_create_hocr'] = 1 # Split Words configuration
|
14
17
|
end
|
15
18
|
|
19
|
+
# Words converted
|
16
20
|
def words
|
17
21
|
convert if @value == []
|
18
22
|
@value
|
19
23
|
end
|
20
24
|
|
25
|
+
# Extension of file
|
21
26
|
def file_ext
|
22
27
|
'.hocr'
|
23
28
|
end
|
24
29
|
|
30
|
+
# Read the result file
|
25
31
|
def parse_file
|
26
32
|
html = Nokogiri::HTML(File.read(text_file_with_ext))
|
27
33
|
html.css('span.ocrx_word, span.ocr_word')
|
28
34
|
end
|
29
35
|
|
36
|
+
# Return words to value
|
30
37
|
def convert_text
|
31
38
|
text_objects = []
|
32
39
|
parse_file.each { |word| text_objects << BoxParser.new(word).to_h }
|
33
40
|
@value = text_objects
|
34
41
|
end
|
35
42
|
|
43
|
+
# Move file html to hocr
|
36
44
|
def after_convert_hook
|
37
45
|
FileUtils.mv(text_file_with_ext('.html'), text_file_with_ext) rescue nil
|
38
46
|
end
|
@@ -56,6 +64,7 @@ class RTesseract
|
|
56
64
|
@attributes = title.gsub(';', '').split(' ')
|
57
65
|
end
|
58
66
|
|
67
|
+
# Hash of word and position
|
59
68
|
def to_h
|
60
69
|
{
|
61
70
|
word: @word.text,
|
data/lib/rtesseract/box_char.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
+
# RTesseract
|
2
3
|
class RTesseract
|
3
4
|
# Class to read char positions from an image
|
4
5
|
class BoxChar < Box
|
@@ -8,10 +9,12 @@ class RTesseract
|
|
8
9
|
|
9
10
|
alias_method :characters, :words
|
10
11
|
|
12
|
+
# Extension of file
|
11
13
|
def file_ext
|
12
14
|
'.box'
|
13
15
|
end
|
14
16
|
|
17
|
+
# Read the result file
|
15
18
|
def parse_file
|
16
19
|
File.read(text_file_with_ext).to_s
|
17
20
|
end
|
@@ -1,12 +1,16 @@
|
|
1
|
-
#
|
1
|
+
# RTesseract
|
2
2
|
class RTesseract
|
3
3
|
# Aliases to languages names
|
4
4
|
LANGUAGES = {
|
5
|
-
'
|
6
|
-
'
|
7
|
-
'
|
8
|
-
'
|
9
|
-
|
5
|
+
'en' => 'eng',
|
6
|
+
'en-us' => 'eng',
|
7
|
+
'english' => 'eng',
|
8
|
+
'pt' => 'por',
|
9
|
+
'pt-br' => 'por',
|
10
|
+
'portuguese' => 'por',
|
11
|
+
'it' => 'ita',
|
12
|
+
'sp' => 'spa'
|
13
|
+
}.freeze
|
10
14
|
|
11
15
|
# Configuration class
|
12
16
|
class Configuration
|
@@ -16,16 +20,19 @@ class RTesseract
|
|
16
20
|
@processor = 'rmagick'
|
17
21
|
end
|
18
22
|
|
23
|
+
# Global configuration
|
19
24
|
def parent
|
20
25
|
@parent ||= RTesseract.configuration || RTesseract::Configuration.new
|
21
26
|
end
|
22
27
|
|
28
|
+
# Set value of option
|
23
29
|
def option(options, name, default = nil)
|
24
30
|
self.instance_variable_set("@#{name}", options.option(name, parent.send(name)) || default)
|
25
31
|
end
|
26
32
|
|
33
|
+
# Return the values of options
|
27
34
|
def load_options(options, names = [])
|
28
|
-
names.each{ |name| option(options, name, nil) }
|
35
|
+
names.each { |name| option(options, name, nil) }
|
29
36
|
end
|
30
37
|
end
|
31
38
|
|
@@ -38,6 +45,7 @@ class RTesseract
|
|
38
45
|
yield(configuration)
|
39
46
|
end
|
40
47
|
|
48
|
+
# Default command
|
41
49
|
def self.default_command
|
42
50
|
TesseractBin::Executables[:tesseract] || 'tesseract'
|
43
51
|
rescue
|
@@ -49,7 +57,7 @@ class RTesseract
|
|
49
57
|
RTesseract::Configuration.new.tap do |config|
|
50
58
|
config.command = config.option(options, :command, RTesseract.default_command)
|
51
59
|
config.processor = config.option(options, :processor, 'rmagick')
|
52
|
-
config.load_options(options, [
|
60
|
+
config.load_options(options, [:lang, :psm, :tessdata_dir, :user_words, :user_patterns])
|
53
61
|
config.debug = config.option(options, :debug, false)
|
54
62
|
config.options_cmd = [options.option(:options, nil)].flatten.compact
|
55
63
|
end
|
data/lib/rtesseract/errors.rb
CHANGED
data/lib/rtesseract/mixed.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
+
# RTesseract
|
2
3
|
class RTesseract
|
3
4
|
# Class to read an image from specified areas
|
4
5
|
class Mixed
|
@@ -12,11 +13,13 @@ class RTesseract
|
|
12
13
|
yield self if block_given?
|
13
14
|
end
|
14
15
|
|
15
|
-
|
16
|
+
# Add areas
|
17
|
+
def area(points)
|
16
18
|
@value = ''
|
17
|
-
@areas <<
|
19
|
+
@areas << points
|
18
20
|
end
|
19
21
|
|
22
|
+
# Clear areas
|
20
23
|
def clear_areas
|
21
24
|
@areas = []
|
22
25
|
end
|
@@ -25,7 +28,7 @@ class RTesseract
|
|
25
28
|
def convert
|
26
29
|
@value = []
|
27
30
|
@areas.each_with_object(RTesseract.new(@source.to_s, @options.dup)) do |area, image|
|
28
|
-
image.crop!(area)
|
31
|
+
image.crop!(area)
|
29
32
|
@value << image.to_s
|
30
33
|
end
|
31
34
|
rescue => error
|
@@ -45,7 +48,7 @@ class RTesseract
|
|
45
48
|
|
46
49
|
# Remove spaces and break-lines
|
47
50
|
def to_s_without_spaces
|
48
|
-
to_s.
|
51
|
+
to_s.delete(' ').delete("\n").delete("\r")
|
49
52
|
end
|
50
53
|
end
|
51
54
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# RTesseract
|
2
|
+
class RTesseract
|
3
|
+
# Processor managment
|
4
|
+
module Processor
|
5
|
+
# Return the processor
|
6
|
+
def self.choose_processor!(processor)
|
7
|
+
processor =
|
8
|
+
if RTesseract::Processor::MiniMagickProcessor.a_name?(processor.to_s)
|
9
|
+
MiniMagickProcessor
|
10
|
+
elsif RTesseract::Processor::NoneProcessor.a_name?(processor.to_s)
|
11
|
+
NoneProcessor
|
12
|
+
else
|
13
|
+
RMagickProcessor
|
14
|
+
end
|
15
|
+
processor.setup
|
16
|
+
processor
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# RTesseract
|
2
|
+
class RTesseract
|
3
|
+
# Some utils methods
|
4
|
+
module Utils
|
5
|
+
# Remove files or Tempfile
|
6
|
+
def self.remove_files(files = [])
|
7
|
+
files.each do |file|
|
8
|
+
self.remove_file(file)
|
9
|
+
end
|
10
|
+
true
|
11
|
+
rescue => error
|
12
|
+
raise RTesseract::TempFilesNotRemovedError.new(error: error, files: files)
|
13
|
+
end
|
14
|
+
|
15
|
+
# Remove file or Tempfile
|
16
|
+
def self.remove_file(file)
|
17
|
+
if file.is_a?(Tempfile)
|
18
|
+
file.close
|
19
|
+
file.unlink
|
20
|
+
else
|
21
|
+
File.unlink(file)
|
22
|
+
end
|
23
|
+
true
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Hash
|
29
|
+
class Hash
|
30
|
+
# return the value and remove from hash
|
31
|
+
def option(attr_name, default)
|
32
|
+
delete(attr_name.to_s) || delete(attr_name) || default
|
33
|
+
end
|
34
|
+
end
|
data/rtesseract.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: rtesseract 2.0.
|
5
|
+
# stub: rtesseract 2.0.1 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "rtesseract"
|
9
|
-
s.version = "2.0.
|
9
|
+
s.version = "2.0.1"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Danilo Jeremias da Silva"]
|
14
|
-
s.date = "2016-
|
14
|
+
s.date = "2016-05-17"
|
15
15
|
s.description = "Ruby library for working with the Tesseract OCR."
|
16
16
|
s.email = "dannnylo@gmail.com"
|
17
17
|
s.extra_rdoc_files = [
|
@@ -22,6 +22,7 @@ Gem::Specification.new do |s|
|
|
22
22
|
".document",
|
23
23
|
".rspec",
|
24
24
|
".travis.yml",
|
25
|
+
"CHANGELOG.md",
|
25
26
|
"Gemfile",
|
26
27
|
"Gemfile.lock",
|
27
28
|
"LICENSE.txt",
|
@@ -32,13 +33,16 @@ Gem::Specification.new do |s|
|
|
32
33
|
"lib/processors/none.rb",
|
33
34
|
"lib/processors/rmagick.rb",
|
34
35
|
"lib/rtesseract.rb",
|
36
|
+
"lib/rtesseract/blob.rb",
|
35
37
|
"lib/rtesseract/box.rb",
|
36
38
|
"lib/rtesseract/box_char.rb",
|
37
39
|
"lib/rtesseract/configuration.rb",
|
38
40
|
"lib/rtesseract/errors.rb",
|
39
41
|
"lib/rtesseract/mixed.rb",
|
40
|
-
"lib/
|
42
|
+
"lib/rtesseract/processor.rb",
|
43
|
+
"lib/rtesseract/utils.rb",
|
41
44
|
"rtesseract.gemspec",
|
45
|
+
"spec/configs/eng.user-words.txt",
|
42
46
|
"spec/images/README.pdf",
|
43
47
|
"spec/images/blank.tif",
|
44
48
|
"spec/images/mixed.tif",
|
@@ -6,17 +6,7 @@ describe 'Rtesseract::BoxChar' do
|
|
6
6
|
@path = Pathname.new(__FILE__.gsub('rtesseract_box_char_spec.rb', '')).expand_path
|
7
7
|
@image_tiff = @path.join('images', 'test.tif').to_s
|
8
8
|
@words_image = @path.join('images', 'test_words.png').to_s
|
9
|
-
|
10
|
-
|
11
|
-
it 'bounding box by char' do
|
12
|
-
expect(RTesseract::BoxChar.new(@image_tiff).characters.is_a?(Array)).to eql(true)
|
13
|
-
expect(RTesseract::BoxChar.new(@image_tiff).characters).to eql([
|
14
|
-
{ char: '4', x_start: 145, y_start: 14, x_end: 159, y_end: 33 },
|
15
|
-
{ char: '3', x_start: 184, y_start: 14, x_end: 196, y_end: 33 },
|
16
|
-
{ char: 'X', x_start: 222, y_start: 14, x_end: 238, y_end: 32 },
|
17
|
-
{ char: 'F', x_start: 260, y_start: 14, x_end: 273, y_end: 32 }])
|
18
|
-
|
19
|
-
expect(RTesseract::BoxChar.new(@words_image).characters).to eql([
|
9
|
+
@values = [
|
20
10
|
{ char: 'I', x_start: 52, y_start: 91, x_end: 54, y_end: 104 },
|
21
11
|
{ char: 'f', x_start: 56, y_start: 91, x_end: 63, y_end: 105 },
|
22
12
|
{ char: 'y', x_start: 69, y_start: 87, x_end: 79, y_end: 101 },
|
@@ -72,7 +62,18 @@ describe 'Rtesseract::BoxChar' do
|
|
72
62
|
{ char: 'p', x_start: 228, y_start: 43, x_end: 237, y_end: 57 },
|
73
63
|
{ char: 'e', x_start: 238, y_start: 47, x_end: 248, y_end: 57 },
|
74
64
|
{ char: 'n', x_start: 250, y_start: 47, x_end: 258, y_end: 57 },
|
75
|
-
{ char: '.', x_start: 261, y_start: 47, x_end: 263, y_end: 49 }]
|
65
|
+
{ char: '.', x_start: 261, y_start: 47, x_end: 263, y_end: 49 }]
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'bounding box by char' do
|
69
|
+
expect(RTesseract::BoxChar.new(@image_tiff).characters.is_a?(Array)).to eql(true)
|
70
|
+
expect(RTesseract::BoxChar.new(@image_tiff).characters).to eql([
|
71
|
+
{ char: '4', x_start: 145, y_start: 14, x_end: 159, y_end: 33 },
|
72
|
+
{ char: '3', x_start: 184, y_start: 14, x_end: 196, y_end: 33 },
|
73
|
+
{ char: 'X', x_start: 222, y_start: 14, x_end: 238, y_end: 32 },
|
74
|
+
{ char: 'F', x_start: 260, y_start: 14, x_end: 273, y_end: 32 }])
|
75
|
+
|
76
|
+
expect(RTesseract::BoxChar.new(@words_image).characters).to eql(@values)
|
76
77
|
|
77
78
|
expect { RTesseract::BoxChar.new(@image_tiff, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError)
|
78
79
|
expect { RTesseract::BoxChar.new(@image_tiff + '_not_exist').to_s }.to raise_error(RTesseract::ImageNotSelectedError)
|
data/spec/rtesseract_spec.rb
CHANGED
@@ -81,6 +81,7 @@ describe 'Rtesseract' do
|
|
81
81
|
expect(RTesseract.new(@image_tif, lang: 'eng').to_s_without_spaces).to eql('43XF')
|
82
82
|
|
83
83
|
expect(RTesseract.new(@image_tif, lang: 'eng').lang).to eql(' -l eng ')
|
84
|
+
expect(RTesseract.new(@image_tif, lang: 'it').lang).to eql(' -l ita ')
|
84
85
|
|
85
86
|
# Invalid lang object
|
86
87
|
expect(RTesseract.new(@image_tif, lang: MakeStringError.new).lang).to eql('')
|
@@ -98,6 +99,7 @@ describe 'Rtesseract' do
|
|
98
99
|
expect(RTesseract.new(@image_tif, chop_enable: 0).config).to eql('chop_enable 0')
|
99
100
|
expect(RTesseract.new(@image_tif, chop_enable: 0, enable_assoc: 0).config).to eql("chop_enable 0\nenable_assoc 0")
|
100
101
|
expect(RTesseract.new(@image_tif, chop_enable: 0).to_s_without_spaces).to eql('43XF')
|
102
|
+
expect(RTesseract.new(@image_tif, tessedit_char_whitelist: "ABCDEF12345").to_s_without_spaces).to eql('43F')
|
101
103
|
end
|
102
104
|
|
103
105
|
it ' crop image' do
|
@@ -121,14 +123,14 @@ describe 'Rtesseract' do
|
|
121
123
|
|
122
124
|
it ' use a instance' do
|
123
125
|
expect(RTesseract.new(Magick::Image.read(@image_tif.to_s).first).to_s_without_spaces).to eql('43XF')
|
124
|
-
expect(RMagickProcessor.a_name?('teste')).to eql(false)
|
125
|
-
expect(RMagickProcessor.a_name?('rmagick')).to eql(true)
|
126
|
-
expect(RMagickProcessor.a_name?('RMagickProcessor')).to eql(true)
|
127
|
-
expect(MiniMagickProcessor.a_name?('teste')).to eql(false)
|
128
|
-
expect(MiniMagickProcessor.a_name?('mini_magick')).to eql(true)
|
129
|
-
expect(MiniMagickProcessor.a_name?('MiniMagickProcessor')).to eql(true)
|
130
|
-
expect(NoneProcessor.a_name?('none')).to eql(true)
|
131
|
-
expect(NoneProcessor.a_name?('NoneProcessor')).to eql(true)
|
126
|
+
expect(RTesseract::Processor::RMagickProcessor.a_name?('teste')).to eql(false)
|
127
|
+
expect(RTesseract::Processor::RMagickProcessor.a_name?('rmagick')).to eql(true)
|
128
|
+
expect(RTesseract::Processor::RMagickProcessor.a_name?('RMagickProcessor')).to eql(true)
|
129
|
+
expect(RTesseract::Processor::MiniMagickProcessor.a_name?('teste')).to eql(false)
|
130
|
+
expect(RTesseract::Processor::MiniMagickProcessor.a_name?('mini_magick')).to eql(true)
|
131
|
+
expect(RTesseract::Processor::MiniMagickProcessor.a_name?('MiniMagickProcessor')).to eql(true)
|
132
|
+
expect(RTesseract::Processor::NoneProcessor.a_name?('none')).to eql(true)
|
133
|
+
expect(RTesseract::Processor::NoneProcessor.a_name?('NoneProcessor')).to eql(true)
|
132
134
|
end
|
133
135
|
|
134
136
|
it ' change image in a block' do
|
@@ -172,10 +174,9 @@ describe 'Rtesseract' do
|
|
172
174
|
end
|
173
175
|
|
174
176
|
it 'remove a file' do
|
175
|
-
|
176
|
-
rtesseract.remove_file(Tempfile.new('config'))
|
177
|
+
RTesseract::Utils.remove_files(Tempfile.new('config'))
|
177
178
|
|
178
|
-
expect {
|
179
|
+
expect { RTesseract::Utils.remove_files(Pathname.new(Dir.tmpdir).join('test_not_exists')) }.to raise_error(RTesseract::TempFilesNotRemovedError)
|
179
180
|
end
|
180
181
|
|
181
182
|
it ' support default config processors' do
|
@@ -201,7 +202,6 @@ describe 'Rtesseract' do
|
|
201
202
|
RTesseract.configure { |config| config.psm = 7 }
|
202
203
|
expect(RTesseract.new(@image_tif).psm).to eql(' -psm 7 ')
|
203
204
|
|
204
|
-
|
205
205
|
RTesseract.configure { |config| config.tessdata_dir = '/tmp/test' }
|
206
206
|
expect(RTesseract.new(@image_tif).tessdata_dir).to eql(' --tessdata-dir /tmp/test ')
|
207
207
|
|
@@ -220,5 +220,7 @@ describe 'Rtesseract' do
|
|
220
220
|
expect(RTesseract.new(@image_tif, tessdata_dir: MakeStringError.new).tessdata_dir).to eql('')
|
221
221
|
expect(RTesseract.new(@image_tif, user_words: MakeStringError.new).user_words).to eql('')
|
222
222
|
expect(RTesseract.new(@image_tif, user_patterns: MakeStringError.new).user_patterns).to eql('')
|
223
|
+
|
224
|
+
# expect(RTesseract.new(@path.join('images', 'test_words.png').to_s, psm: 3, user_words: @path.join('configs', 'eng.user-words.txt').to_s).to_s).to eql("If you are a friend,\nyou speak the password,\nand the doors will open.\n\n")
|
223
225
|
end
|
224
226
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rtesseract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danilo Jeremias da Silva
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-05-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -119,6 +119,7 @@ files:
|
|
119
119
|
- ".document"
|
120
120
|
- ".rspec"
|
121
121
|
- ".travis.yml"
|
122
|
+
- CHANGELOG.md
|
122
123
|
- Gemfile
|
123
124
|
- Gemfile.lock
|
124
125
|
- LICENSE.txt
|
@@ -129,13 +130,16 @@ files:
|
|
129
130
|
- lib/processors/none.rb
|
130
131
|
- lib/processors/rmagick.rb
|
131
132
|
- lib/rtesseract.rb
|
133
|
+
- lib/rtesseract/blob.rb
|
132
134
|
- lib/rtesseract/box.rb
|
133
135
|
- lib/rtesseract/box_char.rb
|
134
136
|
- lib/rtesseract/configuration.rb
|
135
137
|
- lib/rtesseract/errors.rb
|
136
138
|
- lib/rtesseract/mixed.rb
|
137
|
-
- lib/
|
139
|
+
- lib/rtesseract/processor.rb
|
140
|
+
- lib/rtesseract/utils.rb
|
138
141
|
- rtesseract.gemspec
|
142
|
+
- spec/configs/eng.user-words.txt
|
139
143
|
- spec/images/README.pdf
|
140
144
|
- spec/images/blank.tif
|
141
145
|
- spec/images/mixed.tif
|