rtesseract 1.2.6 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/Gemfile.lock +42 -33
- data/VERSION +1 -1
- data/lib/processors/mini_magick.rb +3 -3
- data/lib/processors/none.rb +2 -2
- data/lib/processors/quick_magick.rb +2 -2
- data/lib/processors/rmagick.rb +5 -1
- data/lib/rtesseract.rb +67 -27
- data/lib/rtesseract/box.rb +53 -0
- data/lib/rtesseract/box_char.rb +28 -0
- data/rtesseract.gemspec +13 -4
- data/spec/images/orientation_reverse.png +0 -0
- data/spec/images/test_words.png +0 -0
- data/spec/rtesseract_box_char_spec.rb +82 -0
- data/spec/rtesseract_box_spec.rb +36 -0
- data/spec/rtesseract_mixed_spec.rb +5 -5
- data/spec/rtesseract_spec.rb +70 -64
- metadata +23 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 68e5f82fed2132e93d4ba1f432d5beca598682e9
|
4
|
+
data.tar.gz: f51f2ba570d2b0fdbb6483a334843dfd6ce0f8be
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a6ff8c4f5bb5587f7bf8656876e9b83e0a252cf4a52d5e002c68867faddafdf5b46f2a7e2d65c70d5db583ea00321cddb126e542a5c6321e318b7083933c5bce
|
7
|
+
data.tar.gz: f5a9763a75e42c0d7fe40be660a6864af58463260153335be319ba1d1f5334579ff3a19edfc14c8c7476d202251fa7f172eb5b4bdae8ce79fd286a55f58617af
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,18 +1,20 @@
|
|
1
1
|
GEM
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
|
-
addressable (2.3.
|
4
|
+
addressable (2.3.8)
|
5
5
|
builder (3.2.2)
|
6
|
-
coveralls (0.
|
7
|
-
|
6
|
+
coveralls (0.8.1)
|
7
|
+
json (~> 1.8)
|
8
8
|
rest-client (>= 1.6.8, < 2)
|
9
|
-
simplecov (~> 0.
|
9
|
+
simplecov (~> 0.10.0)
|
10
10
|
term-ansicolor (~> 1.3)
|
11
11
|
thor (~> 0.19.1)
|
12
12
|
descendants_tracker (0.0.4)
|
13
13
|
thread_safe (~> 0.3, >= 0.3.1)
|
14
14
|
diff-lcs (1.2.5)
|
15
15
|
docile (1.1.5)
|
16
|
+
domain_name (0.5.24)
|
17
|
+
unf (>= 0.0.5, < 1.0.0)
|
16
18
|
faraday (0.9.1)
|
17
19
|
multipart-post (>= 1.2, < 3)
|
18
20
|
git (1.2.9.1)
|
@@ -24,8 +26,10 @@ GEM
|
|
24
26
|
multi_json (>= 1.7.5, < 2.0)
|
25
27
|
nokogiri (~> 1.6.3)
|
26
28
|
oauth2
|
27
|
-
hashie (3.4.
|
28
|
-
highline (1.7.
|
29
|
+
hashie (3.4.2)
|
30
|
+
highline (1.7.2)
|
31
|
+
http-cookie (1.0.2)
|
32
|
+
domain_name (~> 0.5)
|
29
33
|
jeweler (2.0.1)
|
30
34
|
builder
|
31
35
|
bundler (>= 1.0)
|
@@ -35,15 +39,15 @@ GEM
|
|
35
39
|
nokogiri (>= 1.5.10)
|
36
40
|
rake
|
37
41
|
rdoc
|
38
|
-
json (1.8.
|
39
|
-
jwt (1.
|
40
|
-
mime-types (2.
|
41
|
-
mini_magick (4.
|
42
|
+
json (1.8.3)
|
43
|
+
jwt (1.5.1)
|
44
|
+
mime-types (2.6.1)
|
45
|
+
mini_magick (4.2.7)
|
42
46
|
mini_portile (0.6.2)
|
43
|
-
multi_json (1.
|
47
|
+
multi_json (1.11.1)
|
44
48
|
multi_xml (0.5.5)
|
45
49
|
multipart-post (2.0.0)
|
46
|
-
netrc (0.10.
|
50
|
+
netrc (0.10.3)
|
47
51
|
nokogiri (1.6.6.2)
|
48
52
|
mini_portile (~> 0.6.0)
|
49
53
|
oauth2 (1.0.0)
|
@@ -53,37 +57,41 @@ GEM
|
|
53
57
|
multi_xml (~> 0.5)
|
54
58
|
rack (~> 1.2)
|
55
59
|
quick_magick (0.8.0)
|
56
|
-
rack (1.6.
|
60
|
+
rack (1.6.4)
|
57
61
|
rake (10.4.2)
|
58
62
|
rdoc (4.2.0)
|
59
63
|
json (~> 1.4)
|
60
|
-
rest-client (1.
|
64
|
+
rest-client (1.8.0)
|
65
|
+
http-cookie (>= 1.0.2, < 2.0)
|
61
66
|
mime-types (>= 1.16, < 3.0)
|
62
67
|
netrc (~> 0.7)
|
63
|
-
rmagick (2.
|
64
|
-
rspec (3.
|
65
|
-
rspec-core (~> 3.
|
66
|
-
rspec-expectations (~> 3.
|
67
|
-
rspec-mocks (~> 3.
|
68
|
-
rspec-core (3.
|
69
|
-
rspec-support (~> 3.
|
70
|
-
rspec-expectations (3.
|
68
|
+
rmagick (2.15.2)
|
69
|
+
rspec (3.3.0)
|
70
|
+
rspec-core (~> 3.3.0)
|
71
|
+
rspec-expectations (~> 3.3.0)
|
72
|
+
rspec-mocks (~> 3.3.0)
|
73
|
+
rspec-core (3.3.1)
|
74
|
+
rspec-support (~> 3.3.0)
|
75
|
+
rspec-expectations (3.3.0)
|
71
76
|
diff-lcs (>= 1.2.0, < 2.0)
|
72
|
-
rspec-support (~> 3.
|
73
|
-
rspec-mocks (3.
|
77
|
+
rspec-support (~> 3.3.0)
|
78
|
+
rspec-mocks (3.3.1)
|
74
79
|
diff-lcs (>= 1.2.0, < 2.0)
|
75
|
-
rspec-support (~> 3.
|
76
|
-
rspec-support (3.
|
77
|
-
simplecov (0.
|
80
|
+
rspec-support (~> 3.3.0)
|
81
|
+
rspec-support (3.3.0)
|
82
|
+
simplecov (0.10.0)
|
78
83
|
docile (~> 1.1.0)
|
79
|
-
|
80
|
-
simplecov-html (~> 0.
|
81
|
-
simplecov-html (0.
|
82
|
-
term-ansicolor (1.3.
|
84
|
+
json (~> 1.8)
|
85
|
+
simplecov-html (~> 0.10.0)
|
86
|
+
simplecov-html (0.10.0)
|
87
|
+
term-ansicolor (1.3.2)
|
83
88
|
tins (~> 1.0)
|
84
89
|
thor (0.19.1)
|
85
|
-
thread_safe (0.3.
|
86
|
-
tins (1.
|
90
|
+
thread_safe (0.3.5)
|
91
|
+
tins (1.5.4)
|
92
|
+
unf (0.1.4)
|
93
|
+
unf_ext
|
94
|
+
unf_ext (0.0.7.1)
|
87
95
|
|
88
96
|
PLATFORMS
|
89
97
|
ruby
|
@@ -93,6 +101,7 @@ DEPENDENCIES
|
|
93
101
|
coveralls
|
94
102
|
jeweler (~> 2.0.1)
|
95
103
|
mini_magick
|
104
|
+
nokogiri
|
96
105
|
quick_magick
|
97
106
|
rdoc
|
98
107
|
rmagick
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.
|
1
|
+
1.3.0
|
@@ -12,10 +12,10 @@ module MiniMagickProcessor
|
|
12
12
|
def self.image_to_tif(source, x = nil, y = nil, w = nil, h = nil)
|
13
13
|
tmp_file = Tempfile.new(['', '.tif'])
|
14
14
|
cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
|
15
|
-
cat.format('tif')
|
15
|
+
cat.format('tif') do |c|
|
16
16
|
c.compress 'None'
|
17
|
-
c.alpha 'off'
|
18
|
-
|
17
|
+
c.alpha 'off'
|
18
|
+
end
|
19
19
|
cat.crop("#{w}x#{h}+#{x}+#{y}") unless [x, y, w, h].compact == []
|
20
20
|
cat.alpha 'off'
|
21
21
|
cat.write tmp_file.path.to_s
|
data/lib/processors/none.rb
CHANGED
@@ -8,13 +8,13 @@ module NoneProcessor
|
|
8
8
|
%w(none NoneProcessor).include?(name.to_s)
|
9
9
|
end
|
10
10
|
|
11
|
-
def self.image_to_tif(source,
|
11
|
+
def self.image_to_tif(source, _x = nil, _y = nil, _w = nil, _h = nil)
|
12
12
|
tmp_file = Tempfile.new(['', '.tif'])
|
13
13
|
tmp_file.write(self.read_with_processor(source))
|
14
14
|
tmp_file
|
15
15
|
end
|
16
16
|
|
17
|
-
def self.need_crop?(
|
17
|
+
def self.need_crop?(*)
|
18
18
|
end
|
19
19
|
|
20
20
|
def self.read_with_processor(path)
|
@@ -14,8 +14,8 @@ module QuickMagickProcessor
|
|
14
14
|
cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
|
15
15
|
cat.compress = 'None'
|
16
16
|
cat.format = 'tif'
|
17
|
-
cat.alpha =
|
18
|
-
cat.crop("#{w}x#{h}+#{x}+#{y}") if need_crop?(
|
17
|
+
cat.alpha = 'off'
|
18
|
+
cat.crop("#{w}x#{h}+#{x}+#{y}") if need_crop?(x, y, w, h)
|
19
19
|
cat.write tmp_file.path.to_s
|
20
20
|
tmp_file
|
21
21
|
end
|
data/lib/processors/rmagick.rb
CHANGED
data/lib/rtesseract.rb
CHANGED
@@ -4,6 +4,8 @@ require 'tempfile'
|
|
4
4
|
|
5
5
|
require 'rtesseract/errors'
|
6
6
|
require 'rtesseract/mixed'
|
7
|
+
require 'rtesseract/box'
|
8
|
+
require 'rtesseract/box_char'
|
7
9
|
|
8
10
|
# Processors
|
9
11
|
require 'processors/rmagick.rb'
|
@@ -13,14 +15,15 @@ require 'processors/none.rb'
|
|
13
15
|
|
14
16
|
# Ruby wrapper for Tesseract OCR
|
15
17
|
class RTesseract
|
18
|
+
attr_accessor :image_object
|
16
19
|
attr_accessor :options
|
17
|
-
attr_writer
|
18
|
-
attr_writer
|
19
|
-
attr_reader
|
20
|
+
attr_writer :lang
|
21
|
+
attr_writer :psm
|
22
|
+
attr_reader :processor
|
20
23
|
attr_accessor :options_cmd
|
21
24
|
|
22
25
|
OPTIONS = %w(command lang psm processor debug clear_console_output options)
|
23
|
-
|
26
|
+
# Aliases to languages names
|
24
27
|
LANGUAGES = {
|
25
28
|
'eng' => %w(en en-us english),
|
26
29
|
'ita' => %w(it),
|
@@ -33,6 +36,10 @@ class RTesseract
|
|
33
36
|
@value, @x, @y, @w, @h = ['']
|
34
37
|
@processor = RTesseract.choose_processor!(@processor)
|
35
38
|
@source = @processor.image?(src) ? src : Pathname.new(src)
|
39
|
+
initialize_hook
|
40
|
+
end
|
41
|
+
|
42
|
+
def initialize_hook
|
36
43
|
end
|
37
44
|
|
38
45
|
def fetch_option(options, name, default)
|
@@ -40,13 +47,13 @@ class RTesseract
|
|
40
47
|
end
|
41
48
|
|
42
49
|
def command_line_options(options)
|
43
|
-
@command
|
44
|
-
@lang
|
45
|
-
@psm
|
46
|
-
@processor
|
47
|
-
@debug
|
50
|
+
@command = fetch_option(options, :command, default_command)
|
51
|
+
@lang = fetch_option(options, :lang, '')
|
52
|
+
@psm = fetch_option(options, :psm, nil)
|
53
|
+
@processor = fetch_option(options, :processor, 'rmagick')
|
54
|
+
@debug = fetch_option(options, :debug, false)
|
48
55
|
@options_cmd = fetch_option(options, :options, [])
|
49
|
-
@options_cmd = [@options_cmd] unless @options_cmd.
|
56
|
+
@options_cmd = [@options_cmd] unless @options_cmd.is_a?(Array)
|
50
57
|
|
51
58
|
# Disable clear console if debug mode
|
52
59
|
@clear_console_output = @debug ? false : fetch_option(options, :clear_console_output, true)
|
@@ -66,12 +73,19 @@ class RTesseract
|
|
66
73
|
processor = RTesseract.choose_processor!(options.delete(:processor) || options.delete('processor'))
|
67
74
|
image = processor.read_with_processor(src.to_s)
|
68
75
|
|
69
|
-
yield
|
76
|
+
yield(image)
|
70
77
|
object = RTesseract.new('', options)
|
71
78
|
object.from_blob(image.to_blob)
|
72
79
|
object
|
73
80
|
end
|
74
81
|
|
82
|
+
def read(&block)
|
83
|
+
image = @processor.read_with_processor(@source.to_s)
|
84
|
+
new_image = yield(image)
|
85
|
+
self.from_blob(new_image.to_blob, File.extname(@source.to_s))
|
86
|
+
self
|
87
|
+
end
|
88
|
+
|
75
89
|
def source=(src)
|
76
90
|
@value = ''
|
77
91
|
@source = @processor.image?(src) ? src : Pathname.new(src)
|
@@ -129,12 +143,17 @@ class RTesseract
|
|
129
143
|
''
|
130
144
|
end
|
131
145
|
|
146
|
+
def config_hook
|
147
|
+
end
|
148
|
+
|
132
149
|
def config
|
133
150
|
@options ||= {}
|
151
|
+
config_hook
|
134
152
|
@options.map { |k, v| "#{k} #{v}" }.join("\n")
|
135
153
|
end
|
136
154
|
|
137
155
|
def config_file
|
156
|
+
config_hook
|
138
157
|
return '' if @options == {}
|
139
158
|
conf = Tempfile.new('config')
|
140
159
|
conf.write(config)
|
@@ -142,7 +161,7 @@ class RTesseract
|
|
142
161
|
conf.path
|
143
162
|
end
|
144
163
|
|
145
|
-
#TODO: Clear console for MacOS or Windows
|
164
|
+
# TODO: Clear console for MacOS or Windows
|
146
165
|
def clear_console_output
|
147
166
|
return '' unless @clear_console_output
|
148
167
|
return '2>/dev/null' if File.exist?('/dev/null') # Linux console clear
|
@@ -152,22 +171,42 @@ class RTesseract
|
|
152
171
|
(@image = @processor.image_to_tif(@source, @x, @y, @w, @h)).path
|
153
172
|
end
|
154
173
|
|
174
|
+
def file_ext
|
175
|
+
'.txt'
|
176
|
+
end
|
177
|
+
|
155
178
|
def text_file
|
156
|
-
@text_file = Pathname.new(Dir.tmpdir).join("#{Time.now.to_f}#{rand(1500)}
|
179
|
+
@text_file = Pathname.new(Dir.tmpdir).join("#{Time.now.to_f}#{rand(1500)}").to_s
|
180
|
+
end
|
181
|
+
|
182
|
+
def text_file_with_ext(ext = nil)
|
183
|
+
[@text_file, ext || file_ext].join('')
|
184
|
+
end
|
185
|
+
|
186
|
+
def convert_command
|
187
|
+
`#{@command} "#{image}" "#{text_file}" #{lang} #{psm} #{config_file} #{clear_console_output} #{@options_cmd.join(' ')}`
|
188
|
+
end
|
189
|
+
|
190
|
+
def convert_text
|
191
|
+
@value = File.read(text_file_with_ext).to_s
|
192
|
+
end
|
193
|
+
|
194
|
+
def after_convert_hook
|
157
195
|
end
|
158
196
|
|
159
197
|
# Convert image to string
|
160
198
|
def convert
|
161
|
-
|
162
|
-
|
163
|
-
|
199
|
+
convert_command
|
200
|
+
after_convert_hook
|
201
|
+
convert_text
|
202
|
+
remove_file([@image, text_file_with_ext])
|
164
203
|
rescue => error
|
165
204
|
raise RTesseract::ConversionError.new(error)
|
166
205
|
end
|
167
206
|
|
168
207
|
# Read image from memory blob
|
169
|
-
def from_blob(blob)
|
170
|
-
blob_file = Tempfile.new('blob', :encoding => 'ascii-8bit')
|
208
|
+
def from_blob(blob, ext = '')
|
209
|
+
blob_file = Tempfile.new(['blob', ext], :encoding => 'ascii-8bit')
|
171
210
|
blob_file.binmode
|
172
211
|
blob_file.write(blob)
|
173
212
|
blob_file.rewind
|
@@ -196,15 +235,16 @@ class RTesseract
|
|
196
235
|
end
|
197
236
|
|
198
237
|
def self.choose_processor!(processor)
|
199
|
-
processor =
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
238
|
+
processor =
|
239
|
+
if MiniMagickProcessor.a_name?(processor.to_s)
|
240
|
+
MiniMagickProcessor
|
241
|
+
elsif QuickMagickProcessor.a_name?(processor.to_s)
|
242
|
+
QuickMagickProcessor
|
243
|
+
elsif NoneProcessor.a_name?(processor.to_s)
|
244
|
+
NoneProcessor
|
245
|
+
else
|
246
|
+
RMagickProcessor
|
247
|
+
end
|
208
248
|
processor.setup
|
209
249
|
processor
|
210
250
|
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
class RTesseract
|
6
|
+
# Class to read char positions from an image
|
7
|
+
class Box < RTesseract
|
8
|
+
def initialize_hook
|
9
|
+
@value, @x, @y, @w, @h = [[]]
|
10
|
+
end
|
11
|
+
|
12
|
+
def config_hook
|
13
|
+
@options['tessedit_create_hocr'] = 1 # Split Words configuration
|
14
|
+
end
|
15
|
+
|
16
|
+
def words
|
17
|
+
convert if @value == []
|
18
|
+
@value
|
19
|
+
end
|
20
|
+
|
21
|
+
def file_ext
|
22
|
+
'.hocr'
|
23
|
+
end
|
24
|
+
|
25
|
+
def parse_file
|
26
|
+
Nokogiri::HTML(File.read(text_file_with_ext)).css('span.ocrx_word, span.ocr_word')
|
27
|
+
end
|
28
|
+
|
29
|
+
def convert_text
|
30
|
+
text_objects = []
|
31
|
+
parse_file.each do |word|
|
32
|
+
attributes = word.attributes['title'].value.to_s.gsub(';', '').split(' ')
|
33
|
+
text_objects << { :word => word.text, :x_start => attributes[1].to_i, :y_start => attributes[2].to_i , :x_end => attributes[3].to_i, :y_end => attributes[4].to_i }
|
34
|
+
end
|
35
|
+
@value = text_objects
|
36
|
+
end
|
37
|
+
|
38
|
+
def after_convert_hook
|
39
|
+
FileUtils.mv(text_file_with_ext('.html'), text_file_with_ext) rescue nil
|
40
|
+
end
|
41
|
+
|
42
|
+
# Output value
|
43
|
+
def to_s
|
44
|
+
return @value.map { |word| word[:word] } if @value != []
|
45
|
+
if @processor.image?(@source) || @source.file?
|
46
|
+
convert
|
47
|
+
@value.map { |word| word[:word] }.join(' ')
|
48
|
+
else
|
49
|
+
fail RTesseract::ImageNotSelectedError.new(@source)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
class RTesseract
|
3
|
+
# Class to read char positions from an image
|
4
|
+
class BoxChar < Box
|
5
|
+
def config_hook
|
6
|
+
@options['tessedit_create_boxfile'] = 1 # Split chars
|
7
|
+
end
|
8
|
+
|
9
|
+
alias_method :characters, :words
|
10
|
+
|
11
|
+
def file_ext
|
12
|
+
'.box'
|
13
|
+
end
|
14
|
+
|
15
|
+
def parse_file
|
16
|
+
File.read(text_file_with_ext).to_s
|
17
|
+
end
|
18
|
+
|
19
|
+
def convert_text
|
20
|
+
text_objects = []
|
21
|
+
parse_file.each_line do |line|
|
22
|
+
char, x_start, y_start, x_end, y_end, word = line.split(' ')
|
23
|
+
text_objects << { :char => char, :x_start => x_start.to_i, :y_start => y_start.to_i , :x_end => x_end.to_i, :y_end => y_end.to_i }
|
24
|
+
end
|
25
|
+
@value = text_objects
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/rtesseract.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: rtesseract 1.
|
5
|
+
# stub: rtesseract 1.3.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "rtesseract"
|
9
|
-
s.version = "1.
|
9
|
+
s.version = "1.3.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Danilo Jeremias da Silva"]
|
14
|
-
s.date = "2015-
|
14
|
+
s.date = "2015-06-23"
|
15
15
|
s.description = "Ruby library for working with the Tesseract OCR."
|
16
16
|
s.email = "dannnylo@gmail.com"
|
17
17
|
s.extra_rdoc_files = [
|
@@ -34,29 +34,36 @@ Gem::Specification.new do |s|
|
|
34
34
|
"lib/processors/quick_magick.rb",
|
35
35
|
"lib/processors/rmagick.rb",
|
36
36
|
"lib/rtesseract.rb",
|
37
|
+
"lib/rtesseract/box.rb",
|
38
|
+
"lib/rtesseract/box_char.rb",
|
37
39
|
"lib/rtesseract/errors.rb",
|
38
40
|
"lib/rtesseract/mixed.rb",
|
39
41
|
"rtesseract.gemspec",
|
40
42
|
"spec/images/mixed.tif",
|
43
|
+
"spec/images/orientation_reverse.png",
|
41
44
|
"spec/images/test with spaces.tif",
|
42
45
|
"spec/images/test.bmp",
|
43
46
|
"spec/images/test.jpg",
|
44
47
|
"spec/images/test.png",
|
45
48
|
"spec/images/test.tif",
|
46
49
|
"spec/images/test1.tif",
|
50
|
+
"spec/images/test_words.png",
|
51
|
+
"spec/rtesseract_box_char_spec.rb",
|
52
|
+
"spec/rtesseract_box_spec.rb",
|
47
53
|
"spec/rtesseract_mixed_spec.rb",
|
48
54
|
"spec/rtesseract_spec.rb",
|
49
55
|
"spec/spec_helper.rb"
|
50
56
|
]
|
51
57
|
s.homepage = "http://github.com/dannnylo/rtesseract"
|
52
58
|
s.licenses = ["MIT"]
|
53
|
-
s.rubygems_version = "2.2.
|
59
|
+
s.rubygems_version = "2.2.2"
|
54
60
|
s.summary = "Ruby library for working with the Tesseract OCR."
|
55
61
|
|
56
62
|
if s.respond_to? :specification_version then
|
57
63
|
s.specification_version = 4
|
58
64
|
|
59
65
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
66
|
+
s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
|
60
67
|
s.add_development_dependency(%q<rspec>, [">= 0"])
|
61
68
|
s.add_development_dependency(%q<rdoc>, [">= 0"])
|
62
69
|
s.add_development_dependency(%q<bundler>, [">= 0"])
|
@@ -64,6 +71,7 @@ Gem::Specification.new do |s|
|
|
64
71
|
s.add_development_dependency(%q<simplecov>, [">= 0"])
|
65
72
|
s.add_development_dependency(%q<coveralls>, [">= 0"])
|
66
73
|
else
|
74
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
67
75
|
s.add_dependency(%q<rspec>, [">= 0"])
|
68
76
|
s.add_dependency(%q<rdoc>, [">= 0"])
|
69
77
|
s.add_dependency(%q<bundler>, [">= 0"])
|
@@ -72,6 +80,7 @@ Gem::Specification.new do |s|
|
|
72
80
|
s.add_dependency(%q<coveralls>, [">= 0"])
|
73
81
|
end
|
74
82
|
else
|
83
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
75
84
|
s.add_dependency(%q<rspec>, [">= 0"])
|
76
85
|
s.add_dependency(%q<rdoc>, [">= 0"])
|
77
86
|
s.add_dependency(%q<bundler>, [">= 0"])
|
Binary file
|
Binary file
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
|
+
|
4
|
+
describe "Rtesseract::BoxChar" do
|
5
|
+
before do
|
6
|
+
@path = Pathname.new(__FILE__.gsub('rtesseract_box_char_spec.rb','')).expand_path
|
7
|
+
@image_tiff = @path.join('images', 'test.tif').to_s
|
8
|
+
@words_image = @path.join('images', 'test_words.png').to_s
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
it "bounding box by char" do
|
13
|
+
expect(RTesseract::BoxChar.new(@image_tiff).characters.is_a?(Array)).to eql(true)
|
14
|
+
expect(RTesseract::BoxChar.new(@image_tiff).characters).to eql([
|
15
|
+
{:char=>"4", :x_start=>145, :y_start=>14, :x_end=>159, :y_end=>33},
|
16
|
+
{:char=>"3", :x_start=>184, :y_start=>14, :x_end=>196, :y_end=>33},
|
17
|
+
{:char=>"X", :x_start=>222, :y_start=>14, :x_end=>238, :y_end=>32},
|
18
|
+
{:char=>"F", :x_start=>260, :y_start=>14, :x_end=>273, :y_end=>32}]
|
19
|
+
)
|
20
|
+
expect(RTesseract::BoxChar.new(@words_image).characters).to eql([
|
21
|
+
{:char => 'I', :x_start=>52, :y_start=>91, :x_end=>54, :y_end=>104},
|
22
|
+
{:char => 'f', :x_start=>56, :y_start=>91, :x_end=>63, :y_end=>105},
|
23
|
+
{:char => 'y', :x_start=>69, :y_start=>87, :x_end=>79, :y_end=>101},
|
24
|
+
{:char => 'o', :x_start=>80, :y_start=>91, :x_end=>90, :y_end=>101},
|
25
|
+
{:char => 'u', :x_start=>92, :y_start=>91, :x_end=>100, :y_end=>101},
|
26
|
+
{:char => 'a', :x_start=>108, :y_start=>91, :x_end=>116, :y_end=>101},
|
27
|
+
{:char => 'r', :x_start=>119, :y_start=>91, :x_end=>125, :y_end=>101},
|
28
|
+
{:char => 'e', :x_start=>126, :y_start=>91, :x_end=>136, :y_end=>101},
|
29
|
+
{:char => 'a', :x_start=>143, :y_start=>91, :x_end=>151, :y_end=>101},
|
30
|
+
{:char => 'f', :x_start=>158, :y_start=>91, :x_end=>165, :y_end=>105},
|
31
|
+
{:char => 'r', :x_start=>166, :y_start=>91, :x_end=>172, :y_end=>101},
|
32
|
+
{:char => 'i', :x_start=>174, :y_start=>91, :x_end=>176, :y_end=>105},
|
33
|
+
{:char => 'e', :x_start=>178, :y_start=>91, :x_end=>188, :y_end=>101},
|
34
|
+
{:char => 'n', :x_start=>190, :y_start=>91, :x_end=>198, :y_end=>101},
|
35
|
+
{:char => 'd', :x_start=>200, :y_start=>91, :x_end=>209, :y_end=>105},
|
36
|
+
{:char => ',', :x_start=>211, :y_start=>89, :x_end=>214, :y_end=>93},
|
37
|
+
{:char => 'y', :x_start=>51, :y_start=>65, :x_end=>61, :y_end=>79},
|
38
|
+
{:char => 'o', :x_start=>62, :y_start=>69, :x_end=>72, :y_end=>79},
|
39
|
+
{:char => 'u', :x_start=>74, :y_start=>69, :x_end=>82, :y_end=>79},
|
40
|
+
{:char => 's', :x_start=>90, :y_start=>69, :x_end=>97, :y_end=>79},
|
41
|
+
{:char => 'p', :x_start=>99, :y_start=>65, :x_end=>108, :y_end=>79},
|
42
|
+
{:char => 'e', :x_start=>109, :y_start=>69, :x_end=>119, :y_end=>79},
|
43
|
+
{:char => 'a', :x_start=>120, :y_start=>69, :x_end=>128, :y_end=>79},
|
44
|
+
{:char => 'k', :x_start=>131, :y_start=>69, :x_end=>140, :y_end=>83},
|
45
|
+
{:char => 't', :x_start=>146, :y_start=>69, :x_end=>152, :y_end=>82},
|
46
|
+
{:char => 'h', :x_start=>154, :y_start=>69, :x_end=>162, :y_end=>83},
|
47
|
+
{:char => 'e', :x_start=>164, :y_start=>69, :x_end=>174, :y_end=>79},
|
48
|
+
{:char => 'p', :x_start=>182, :y_start=>65, :x_end=>191, :y_end=>79},
|
49
|
+
{:char => 'a', :x_start=>192, :y_start=>69, :x_end=>200, :y_end=>79},
|
50
|
+
{:char => 's', :x_start=>202, :y_start=>69, :x_end=>209, :y_end=>79},
|
51
|
+
{:char => 's', :x_start=>210, :y_start=>69, :x_end=>217, :y_end=>79},
|
52
|
+
{:char => 'w', :x_start=>219, :y_start=>69, :x_end=>232, :y_end=>79},
|
53
|
+
{:char => 'o', :x_start=>234, :y_start=>69, :x_end=>244, :y_end=>79},
|
54
|
+
{:char => 'r', :x_start=>246, :y_start=>69, :x_end=>252, :y_end=>79},
|
55
|
+
{:char => 'd', :x_start=>253, :y_start=>69, :x_end=>262, :y_end=>83},
|
56
|
+
{:char => ',', :x_start=>264, :y_start=>67, :x_end=>267, :y_end=>71},
|
57
|
+
{:char => 'a', :x_start=>51, :y_start=>47, :x_end=>59, :y_end=>57},
|
58
|
+
{:char => 'n', :x_start=>62, :y_start=>47, :x_end=>70, :y_end=>57},
|
59
|
+
{:char => 'd', :x_start=>72, :y_start=>47, :x_end=>81, :y_end=>61},
|
60
|
+
{:char => 't', :x_start=>89, :y_start=>47, :x_end=>95, :y_end=>60},
|
61
|
+
{:char => 'h', :x_start=>97, :y_start=>47, :x_end=>105, :y_end=>61},
|
62
|
+
{:char => 'e', :x_start=>107, :y_start=>47, :x_end=>117, :y_end=>57},
|
63
|
+
{:char => 'd', :x_start=>124, :y_start=>47, :x_end=>133, :y_end=>61},
|
64
|
+
{:char => 'o', :x_start=>135, :y_start=>47, :x_end=>145, :y_end=>57},
|
65
|
+
{:char => 'o', :x_start=>146, :y_start=>47, :x_end=>156, :y_end=>57},
|
66
|
+
{:char => 'r', :x_start=>158, :y_start=>47, :x_end=>164, :y_end=>57},
|
67
|
+
{:char => 's', :x_start=>165, :y_start=>47, :x_end=>172, :y_end=>57},
|
68
|
+
{:char => 'w', :x_start=>180, :y_start=>47, :x_end=>193, :y_end=>57},
|
69
|
+
{:char => 'i', :x_start=>196, :y_start=>47, :x_end=>198, :y_end=>61},
|
70
|
+
{:char => 'l', :x_start=>201, :y_start=>47, :x_end=>203, :y_end=>61},
|
71
|
+
{:char => 'l', :x_start=>206, :y_start=>47, :x_end=>208, :y_end=>61},
|
72
|
+
{:char => 'o', :x_start=>216, :y_start=>47, :x_end=>226, :y_end=>57},
|
73
|
+
{:char => 'p', :x_start=>228, :y_start=>43, :x_end=>237, :y_end=>57},
|
74
|
+
{:char => 'e', :x_start=>238, :y_start=>47, :x_end=>248, :y_end=>57},
|
75
|
+
{:char => 'n', :x_start=>250, :y_start=>47, :x_end=>258, :y_end=>57},
|
76
|
+
{:char => '.', :x_start=>261, :y_start=>47, :x_end=>263, :y_end=>49}]
|
77
|
+
)
|
78
|
+
|
79
|
+
expect{RTesseract::BoxChar.new(@image_tiff, {:command => "tesseract_error"}).to_s }.to raise_error(RTesseract::ConversionError)
|
80
|
+
expect{RTesseract::BoxChar.new(@image_tiff + "_not_exist").to_s }.to raise_error(RTesseract::ImageNotSelectedError)
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
|
+
|
4
|
+
describe "Rtesseract::Box" do
|
5
|
+
before do
|
6
|
+
@path = Pathname.new(__FILE__.gsub("rtesseract_box_spec.rb","")).expand_path
|
7
|
+
@image_tiff = @path.join("images","test.tif").to_s
|
8
|
+
@words_image = @path.join("images","test_words.png").to_s
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
it "bounding box" do
|
13
|
+
expect(RTesseract.new(@words_image).to_s).to eql("If you are a friend,\nyou speak the password,\nand the doors will open.\n\n")
|
14
|
+
expect(RTesseract::Box.new(@words_image).words).to eql([
|
15
|
+
{:word => 'If', :x_start=>52, :y_start=>13, :x_end=>63, :y_end=>27},
|
16
|
+
{:word => 'you', :x_start=>69, :y_start=>17, :x_end=>100, :y_end=>31},
|
17
|
+
{:word => 'are', :x_start=>108, :y_start=>17, :x_end=>136, :y_end=>27},
|
18
|
+
{:word => 'a', :x_start=>143, :y_start=>17, :x_end=>151, :y_end=>27},
|
19
|
+
{:word => 'friend,', :x_start=>158, :y_start=>13, :x_end=>214, :y_end=>29},
|
20
|
+
{:word => 'you', :x_start=>51, :y_start=>39, :x_end=>82, :y_end=>53},
|
21
|
+
{:word => 'speak', :x_start=>90, :y_start=>35, :x_end=>140, :y_end=>53},
|
22
|
+
{:word => 'the', :x_start=>146, :y_start=>35, :x_end=>174, :y_end=>49},
|
23
|
+
{:word => 'password,', :x_start=>182, :y_start=>35, :x_end=>267, :y_end=>53},
|
24
|
+
{:word => 'and', :x_start=>51, :y_start=>57, :x_end=>81, :y_end=>71},
|
25
|
+
{:word => 'the', :x_start=>89, :y_start=>57, :x_end=>117, :y_end=>71},
|
26
|
+
{:word => 'doors', :x_start=>124, :y_start=>57, :x_end=>172, :y_end=>71},
|
27
|
+
{:word => 'will', :x_start=>180, :y_start=>57, :x_end=>208, :y_end=>71},
|
28
|
+
{:word => 'open.', :x_start=>216, :y_start=>61, :x_end=>263, :y_end=>75}
|
29
|
+
])
|
30
|
+
|
31
|
+
expect(RTesseract::Box.new(@image_tiff).words.is_a?(Array)).to eql(true)
|
32
|
+
expect(RTesseract::Box.new(@words_image).to_s).to eql("If you are a friend, you speak the password, and the doors will open.")
|
33
|
+
expect{RTesseract::Box.new(@image_tiff, {:command => "tesseract_error"}).to_s }.to raise_error(RTesseract::ConversionError)
|
34
|
+
expect{RTesseract::Box.new(@image_tiff + "_not_exist").to_s }.to raise_error(RTesseract::ImageNotSelectedError)
|
35
|
+
end
|
36
|
+
end
|
@@ -9,8 +9,8 @@ describe "Rtesseract::Mixed" do
|
|
9
9
|
end
|
10
10
|
|
11
11
|
it "should be instantiable" do
|
12
|
-
RTesseract::Mixed.new.class.
|
13
|
-
RTesseract::Mixed.new(@image_tif).class.
|
12
|
+
expect(RTesseract::Mixed.new.class).to eql(RTesseract::Mixed)
|
13
|
+
expect(RTesseract::Mixed.new(@image_tif).class).to eql(RTesseract::Mixed)
|
14
14
|
end
|
15
15
|
|
16
16
|
it "should translate parts of the image to text" do
|
@@ -20,9 +20,9 @@ describe "Rtesseract::Mixed" do
|
|
20
20
|
image.area(218, 22, 24, 28) # position of z
|
21
21
|
image.area(248, 24, 22, 22) # position of z
|
22
22
|
end
|
23
|
-
mix_block.to_s_without_spaces.
|
23
|
+
expect(mix_block.to_s_without_spaces).to eql("43FF")
|
24
24
|
mix_block.clear_areas
|
25
|
-
mix_block.areas.
|
25
|
+
expect(mix_block.areas).to eql([])
|
26
26
|
|
27
27
|
mix_block = RTesseract::Mixed.new(@image_tif,{:areas => [
|
28
28
|
{:x => 28, :y=>19, :width=>25, :height=>25 }, #position of 4
|
@@ -30,7 +30,7 @@ describe "Rtesseract::Mixed" do
|
|
30
30
|
{:x => 218, :y=>22, :width=>24, :height=>28}, # position of z
|
31
31
|
{:x => 248, :y=>24, :width=>22, :height=>22} # position of z
|
32
32
|
],:psm=>7})
|
33
|
-
mix_block.to_s_without_spaces.
|
33
|
+
expect(mix_block.to_s_without_spaces).to eql("43FF")
|
34
34
|
end
|
35
35
|
|
36
36
|
it " get a error" do
|
data/spec/rtesseract_spec.rb
CHANGED
@@ -14,70 +14,70 @@ describe "Rtesseract" do
|
|
14
14
|
end
|
15
15
|
|
16
16
|
it " be instantiable" do
|
17
|
-
RTesseract.new.class.
|
18
|
-
RTesseract.new("").class.
|
19
|
-
RTesseract.new(@image_tif).class.
|
17
|
+
expect(RTesseract.new.class).to eql(RTesseract)
|
18
|
+
expect(RTesseract.new("").class).to eql(RTesseract)
|
19
|
+
expect(RTesseract.new(@image_tif).class).to eql(RTesseract)
|
20
20
|
end
|
21
21
|
|
22
22
|
it " translate image to text" do
|
23
|
-
RTesseract.new(@image_tif).to_s_without_spaces.
|
24
|
-
RTesseract.new(@image_tif, {:processor => 'mini_magick'}).to_s_without_spaces.
|
25
|
-
RTesseract.new(@path.join("images","test1.tif").to_s).to_s_without_spaces.
|
26
|
-
RTesseract.new(@path.join("images","test with spaces.tif").to_s).to_s_without_spaces.
|
23
|
+
expect(RTesseract.new(@image_tif).to_s_without_spaces).to eql("43XF")
|
24
|
+
expect(RTesseract.new(@image_tif, {:processor => 'mini_magick'}).to_s_without_spaces).to eql("43XF")
|
25
|
+
expect(RTesseract.new(@path.join("images","test1.tif").to_s).to_s_without_spaces).to eql("V2V4")
|
26
|
+
expect(RTesseract.new(@path.join("images","test with spaces.tif").to_s).to_s_without_spaces).to eql("V2V4")
|
27
27
|
|
28
28
|
end
|
29
29
|
|
30
30
|
it " translate images .png, .jpg, .bmp" do
|
31
|
-
RTesseract.new(@path.join("images","test.png").to_s).to_s_without_spaces.
|
32
|
-
RTesseract.new(@path.join("images","test.jpg").to_s).to_s_without_spaces.
|
33
|
-
RTesseract.new(@path.join("images","test.bmp").to_s).to_s_without_spaces.
|
31
|
+
expect(RTesseract.new(@path.join("images","test.png").to_s).to_s_without_spaces).to eql("HW9W")
|
32
|
+
expect(RTesseract.new(@path.join("images","test.jpg").to_s).to_s_without_spaces).to eql("3R8F")
|
33
|
+
expect(RTesseract.new(@path.join("images","test.bmp").to_s).to_s_without_spaces).to eql("FLA6")
|
34
34
|
end
|
35
35
|
|
36
36
|
it " support diferent processors" do
|
37
37
|
#Rmagick
|
38
|
-
RTesseract.new(@image_tif).to_s_without_spaces.
|
39
|
-
RTesseract.new(@image_tif, :processor => 'rmagick').to_s_without_spaces.
|
40
|
-
RTesseract.new(@path.join("images","test.png").to_s, :processor => 'rmagick').to_s_without_spaces.
|
38
|
+
expect(RTesseract.new(@image_tif).to_s_without_spaces).to eql("43XF")
|
39
|
+
expect(RTesseract.new(@image_tif, :processor => 'rmagick').to_s_without_spaces).to eql("43XF")
|
40
|
+
expect(RTesseract.new(@path.join("images","test.png").to_s, :processor => 'rmagick').to_s_without_spaces).to eql("HW9W")
|
41
41
|
|
42
42
|
#MiniMagick
|
43
|
-
RTesseract.new(@image_tif, :processor => 'mini_magick').to_s_without_spaces.
|
44
|
-
RTesseract.new(@path.join("images","test.png").to_s, :processor => 'mini_magick').to_s_without_spaces.
|
43
|
+
expect(RTesseract.new(@image_tif, :processor => 'mini_magick').to_s_without_spaces).to eql("43XF")
|
44
|
+
expect(RTesseract.new(@path.join("images","test.png").to_s, :processor => 'mini_magick').to_s_without_spaces).to eql("HW9W")
|
45
45
|
|
46
46
|
#QuickMagick
|
47
|
-
RTesseract.new(@image_tif, :processor => 'quick_magick').to_s_without_spaces.
|
48
|
-
RTesseract.new(@path.join("images","test.png").to_s, :processor => 'quick_magick').to_s_without_spaces.
|
47
|
+
expect(RTesseract.new(@image_tif, :processor => 'quick_magick').to_s_without_spaces).to eql("43XF")
|
48
|
+
expect(RTesseract.new(@path.join("images","test.png").to_s, :processor => 'quick_magick').to_s_without_spaces).to eql("HW9W")
|
49
49
|
|
50
50
|
#NoneMagick
|
51
|
-
RTesseract.new(@image_tif, :processor => 'none').to_s_without_spaces.
|
51
|
+
expect(RTesseract.new(@image_tif, :processor => 'none').to_s_without_spaces).to eql("43XF")
|
52
52
|
end
|
53
53
|
|
54
54
|
it " change the image" do
|
55
55
|
image = RTesseract.new(@image_tif)
|
56
|
-
image.to_s_without_spaces.
|
56
|
+
expect(image.to_s_without_spaces).to eql("43XF")
|
57
57
|
image.source = @path.join("images","test1.tif").to_s
|
58
|
-
image.to_s_without_spaces.
|
58
|
+
expect(image.to_s_without_spaces).to eql("V2V4")
|
59
59
|
end
|
60
60
|
|
61
61
|
it " select the language" do
|
62
62
|
#English
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
63
|
+
expect(RTesseract.new(@image_tif,{:lang=>"eng"}).lang).to eql(" -l eng ")
|
64
|
+
expect(RTesseract.new(@image_tif,{:lang=>"en"}).lang).to eql(" -l eng ")
|
65
|
+
expect(RTesseract.new(@image_tif,{:lang=>"en-US"}).lang).to eql(" -l eng ")
|
66
|
+
expect(RTesseract.new(@image_tif,{:lang=>"english"}).lang).to eql(" -l eng ")
|
67
67
|
|
68
68
|
#Portuguese
|
69
|
-
RTesseract.new(@image_tif,{:lang=>"por"}).lang.
|
70
|
-
RTesseract.new(@image_tif,{:lang=>"pt-BR"}).lang.
|
71
|
-
RTesseract.new(@image_tif,{:lang=>"pt-br"}).lang.
|
72
|
-
RTesseract.new(@image_tif,{:lang=>"pt"}).lang.
|
73
|
-
RTesseract.new(@image_tif,{:lang=>"portuguese"}).lang.
|
69
|
+
expect(RTesseract.new(@image_tif,{:lang=>"por"}).lang).to eql(" -l por ")
|
70
|
+
expect(RTesseract.new(@image_tif,{:lang=>"pt-BR"}).lang).to eql(" -l por ")
|
71
|
+
expect(RTesseract.new(@image_tif,{:lang=>"pt-br"}).lang).to eql(" -l por ")
|
72
|
+
expect(RTesseract.new(@image_tif,{:lang=>"pt"}).lang).to eql(" -l por ")
|
73
|
+
expect(RTesseract.new(@image_tif,{:lang=>"portuguese"}).lang).to eql(" -l por ")
|
74
74
|
|
75
|
-
RTesseract.new(@image_tif,{:lang=>"eng"}).to_s_without_spaces.
|
76
|
-
#RTesseract.new(@image_tif,{:lang=>"por"}).to_s_without_spaces.
|
77
|
-
RTesseract.new(@image_tif,{:lang=>"eng"}).lang.
|
75
|
+
expect(RTesseract.new(@image_tif,{:lang=>"eng"}).to_s_without_spaces).to eql("43XF")
|
76
|
+
#RTesseract.new(@image_tif,{:lang=>"por"}).to_s_without_spaces).to eql("43XF")
|
77
|
+
expect(RTesseract.new(@image_tif,{:lang=>"eng"}).lang).to eql(" -l eng ")
|
78
78
|
|
79
79
|
#Inválid lang object
|
80
|
-
RTesseract.new(@image_tif,{:lang=>MakeStringError.new}).lang.
|
80
|
+
expect(RTesseract.new(@image_tif,{:lang=>MakeStringError.new}).lang).to eql("")
|
81
81
|
end
|
82
82
|
|
83
83
|
it " select options" do
|
@@ -88,17 +88,17 @@ describe "Rtesseract" do
|
|
88
88
|
end
|
89
89
|
|
90
90
|
it " be configurable" do
|
91
|
-
RTesseract.new(@image_tif,{:chop_enable=>0,:enable_assoc=>0,:display_text=>0}).config.
|
92
|
-
RTesseract.new(@image_tif,{:chop_enable=>0}).config.
|
93
|
-
RTesseract.new(@image_tif,{:chop_enable=>0,:enable_assoc=>0}).config.
|
94
|
-
RTesseract.new(@image_tif,{:chop_enable=>0}).to_s_without_spaces.
|
91
|
+
expect(RTesseract.new(@image_tif,{:chop_enable=>0,:enable_assoc=>0,:display_text=>0}).config).to eql("chop_enable 0\nenable_assoc 0\ndisplay_text 0")
|
92
|
+
expect(RTesseract.new(@image_tif,{:chop_enable=>0}).config).to eql("chop_enable 0")
|
93
|
+
expect(RTesseract.new(@image_tif,{:chop_enable=>0,:enable_assoc=>0}).config).to eql("chop_enable 0\nenable_assoc 0")
|
94
|
+
expect(RTesseract.new(@image_tif,{:chop_enable=>0}).to_s_without_spaces).to eql("43XF")
|
95
95
|
end
|
96
96
|
|
97
97
|
it " crop image" do
|
98
|
-
RTesseract.new(@image_tif,{:psm=>7}).crop!(140,10,36,40).to_s_without_spaces.
|
99
|
-
RTesseract.new(@image_tif,{:psm=>7}).crop!(180,10,36,40).to_s_without_spaces.
|
100
|
-
RTesseract.new(@image_tif,{:psm=>7}).crop!(216,10,20,40).to_s_without_spaces.
|
101
|
-
RTesseract.new(@image_tif,{:psm=>7}).crop!(240,10,30,40).to_s_without_spaces.
|
98
|
+
expect(RTesseract.new(@image_tif,{:psm=>7}).crop!(140,10,36,40).to_s_without_spaces).to eql("4")
|
99
|
+
expect(RTesseract.new(@image_tif,{:psm=>7}).crop!(180,10,36,40).to_s_without_spaces).to eql("3")
|
100
|
+
expect(RTesseract.new(@image_tif,{:psm=>7}).crop!(216,10,20,40).to_s_without_spaces).to eql("X")
|
101
|
+
expect(RTesseract.new(@image_tif,{:psm=>7}).crop!(240,10,30,40).to_s_without_spaces).to eql("F")
|
102
102
|
end
|
103
103
|
|
104
104
|
|
@@ -108,47 +108,53 @@ describe "Rtesseract" do
|
|
108
108
|
|
109
109
|
test = RTesseract.new("", {:psm => 7})
|
110
110
|
test.from_blob(blob)
|
111
|
-
test.to_s_without_spaces.
|
111
|
+
expect(test.to_s_without_spaces).to eql("HW9W")
|
112
112
|
|
113
113
|
test = RTesseract.new("", {:psm => 7})
|
114
114
|
expect{test.from_blob('') }.to raise_error(RTesseract::ConversionError)
|
115
115
|
end
|
116
116
|
|
117
117
|
it " use a instance" do
|
118
|
-
RTesseract.new(Magick::Image.read(@image_tif.to_s).first).to_s_without_spaces.
|
119
|
-
RMagickProcessor.a_name?('teste').
|
120
|
-
RMagickProcessor.a_name?('rmagick').
|
121
|
-
RMagickProcessor.a_name?('RMagickProcessor').
|
122
|
-
|
123
|
-
MiniMagickProcessor.a_name?('
|
124
|
-
MiniMagickProcessor.a_name?('
|
125
|
-
|
126
|
-
|
127
|
-
QuickMagickProcessor.a_name?('
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
NoneProcessor.a_name?('none').should == true
|
132
|
-
NoneProcessor.a_name?('NoneProcessor').should == true
|
118
|
+
expect(RTesseract.new(Magick::Image.read(@image_tif.to_s).first).to_s_without_spaces).to eql("43XF")
|
119
|
+
expect(RMagickProcessor.a_name?('teste')).to eql(false)
|
120
|
+
expect(RMagickProcessor.a_name?('rmagick')).to eql(true)
|
121
|
+
expect(RMagickProcessor.a_name?('RMagickProcessor')).to eql(true)
|
122
|
+
expect(MiniMagickProcessor.a_name?('teste')).to eql(false)
|
123
|
+
expect(MiniMagickProcessor.a_name?('mini_magick')).to eql(true)
|
124
|
+
expect(MiniMagickProcessor.a_name?('MiniMagickProcessor')).to eql(true)
|
125
|
+
expect(QuickMagickProcessor.a_name?('teste')).to eql(false)
|
126
|
+
expect(QuickMagickProcessor.a_name?('quick_magick')).to eql(true)
|
127
|
+
expect(QuickMagickProcessor.a_name?('QuickMagickProcessor')).to eql(true)
|
128
|
+
expect(NoneProcessor.a_name?('none')).to eql(true)
|
129
|
+
expect(NoneProcessor.a_name?('NoneProcessor')).to eql(true)
|
133
130
|
end
|
134
131
|
|
135
132
|
it " change image in a block" do
|
133
|
+
test = RTesseract.read(@path.join("images","test.png").to_s){}
|
134
|
+
expect(test.class).to eql(RTesseract)
|
135
|
+
|
136
|
+
test = RTesseract.new(@image_tif)
|
137
|
+
test.read do |image|
|
138
|
+
image = image.quantize(256, Magick::GRAYColorspace)
|
139
|
+
end
|
140
|
+
expect(test.to_s_without_spaces).to eql("43XF")
|
141
|
+
|
136
142
|
test = RTesseract.read(@path.join("images","test.png").to_s) do |image|
|
137
|
-
image = image.white_threshold(245)
|
138
|
-
image = image.quantize(256,Magick::GRAYColorspace)
|
143
|
+
#image = image.white_threshold(245)
|
144
|
+
#image = image.quantize(256,Magick::GRAYColorspace)
|
145
|
+
image.rotate(90)
|
139
146
|
end
|
140
|
-
test.to_s_without_spaces.
|
147
|
+
expect(test.to_s_without_spaces).to eql("HW9W")
|
141
148
|
|
142
149
|
test = RTesseract.read(@path.join("images","test.jpg").to_s,{:lang=>'en'}) do |image|
|
143
150
|
image = image.white_threshold(245).quantize(256, Magick::GRAYColorspace)
|
144
151
|
end
|
145
|
-
test.to_s_without_spaces.
|
152
|
+
expect(test.to_s_without_spaces).to eql("3R8F")
|
146
153
|
|
147
154
|
test = RTesseract.read(@path.join("images","test.jpg").to_s,{:lang=>'en', :processor => 'mini_magick'}) do |image|
|
148
|
-
#image.white_threshold(245)
|
149
155
|
image.gravity "south"
|
150
156
|
end
|
151
|
-
test.to_s_without_spaces.
|
157
|
+
expect(test.to_s_without_spaces).to eql("3R8F")
|
152
158
|
end
|
153
159
|
|
154
160
|
it " get a error" do
|
@@ -156,7 +162,7 @@ describe "Rtesseract" do
|
|
156
162
|
expect{ RTesseract.new(@path.join("images","test_not_exists.png").to_s).to_s }.to raise_error(RTesseract::ImageNotSelectedError)
|
157
163
|
|
158
164
|
#Inválid psm object
|
159
|
-
RTesseract.new(@image_tif,{:psm=>MakeStringError.new}).psm.
|
165
|
+
expect(RTesseract.new(@image_tif,{:psm=>MakeStringError.new}).psm).to eql("")
|
160
166
|
end
|
161
167
|
|
162
168
|
it "remove a file" do
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rtesseract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danilo Jeremias da Silva
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: rspec
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -117,16 +131,22 @@ files:
|
|
117
131
|
- lib/processors/quick_magick.rb
|
118
132
|
- lib/processors/rmagick.rb
|
119
133
|
- lib/rtesseract.rb
|
134
|
+
- lib/rtesseract/box.rb
|
135
|
+
- lib/rtesseract/box_char.rb
|
120
136
|
- lib/rtesseract/errors.rb
|
121
137
|
- lib/rtesseract/mixed.rb
|
122
138
|
- rtesseract.gemspec
|
123
139
|
- spec/images/mixed.tif
|
140
|
+
- spec/images/orientation_reverse.png
|
124
141
|
- spec/images/test with spaces.tif
|
125
142
|
- spec/images/test.bmp
|
126
143
|
- spec/images/test.jpg
|
127
144
|
- spec/images/test.png
|
128
145
|
- spec/images/test.tif
|
129
146
|
- spec/images/test1.tif
|
147
|
+
- spec/images/test_words.png
|
148
|
+
- spec/rtesseract_box_char_spec.rb
|
149
|
+
- spec/rtesseract_box_spec.rb
|
130
150
|
- spec/rtesseract_mixed_spec.rb
|
131
151
|
- spec/rtesseract_spec.rb
|
132
152
|
- spec/spec_helper.rb
|
@@ -150,7 +170,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
150
170
|
version: '0'
|
151
171
|
requirements: []
|
152
172
|
rubyforge_project:
|
153
|
-
rubygems_version: 2.2.
|
173
|
+
rubygems_version: 2.2.2
|
154
174
|
signing_key:
|
155
175
|
specification_version: 4
|
156
176
|
summary: Ruby library for working with the Tesseract OCR.
|