rtesseract 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +7 -4
- data/Gemfile +9 -10
- data/README.rdoc +27 -8
- data/Rakefile +12 -12
- data/VERSION +1 -1
- data/lib/processors/none.rb +2 -2
- data/lib/processors/rmagick.rb +9 -6
- data/lib/rtesseract.rb +28 -35
- data/lib/rtesseract/box.rb +22 -5
- data/lib/rtesseract/box_char.rb +2 -2
- data/lib/rtesseract/mixed.rb +2 -2
- data/lib/utils.rb +5 -0
- data/rtesseract.gemspec +7 -5
- data/spec/images/README.pdf +0 -0
- data/spec/images/blank.tif +0 -0
- data/spec/rtesseract_box_char_spec.rb +68 -68
- data/spec/rtesseract_box_spec.rb +24 -23
- data/spec/rtesseract_mixed_spec.rb +28 -25
- data/spec/rtesseract_spec.rb +106 -93
- data/spec/spec_helper.rb +4 -5
- metadata +6 -4
- data/.travis.sh +0 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3c5a4dc5260f5eee845925b59f89fa712466f2a6
|
4
|
+
data.tar.gz: 21b1ad6d79f1d1f82483c46c199eb4d6fd06120e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4a78a082daf437c8e7e20313973e93a7e04f2dec161c46d2bf7d37cce86718001d45b12995c12d63930f26ef93ab3c492e0413a6be64d30129ede2fa8f689276
|
7
|
+
data.tar.gz: b622c914be59d0f3fdfae141f5bdfdaadfb53fa22d03b4497cf20a1b12a689fef340e2446b67112565ca1d5510782dc3f3925614ee978e7ba2b9c2dc2d977e03
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
@@ -1,20 +1,19 @@
|
|
1
|
-
source
|
1
|
+
source 'http://rubygems.org'
|
2
2
|
# Add dependencies to develop your gem here.
|
3
3
|
# Include everything needed to run rake, tests, features, etc.
|
4
4
|
gem 'nokogiri'
|
5
5
|
|
6
6
|
group :development do
|
7
|
-
gem
|
8
|
-
gem
|
9
|
-
gem
|
10
|
-
gem
|
11
|
-
gem
|
7
|
+
gem 'rspec'
|
8
|
+
gem 'rdoc'
|
9
|
+
gem 'bundler'
|
10
|
+
gem 'jeweler', '~> 2.0.1'
|
11
|
+
gem 'simplecov'
|
12
12
|
gem 'coveralls', require: false
|
13
13
|
end
|
14
14
|
|
15
15
|
group :test do
|
16
|
-
gem
|
17
|
-
gem
|
18
|
-
gem
|
16
|
+
gem 'rmagick'
|
17
|
+
gem 'mini_magick'
|
18
|
+
gem 'quick_magick'
|
19
19
|
end
|
20
|
-
|
data/README.rdoc
CHANGED
@@ -60,8 +60,8 @@ It's very simple to use rtesseract:
|
|
60
60
|
mix_block.to_s
|
61
61
|
|
62
62
|
=== OPTIONS
|
63
|
-
|
64
|
-
Processors Options (_Rmagick_ is default)
|
63
|
+
|
64
|
+
Processors Options (_Rmagick_ is default)
|
65
65
|
|
66
66
|
RTesseract.new("test.jpg", :processor => "mini_magick")
|
67
67
|
RTesseract.new("test.jpg", :processor => "quick_magick")
|
@@ -70,7 +70,7 @@ Processors Options (_Rmagick_ is default)
|
|
70
70
|
|
71
71
|
RTesseract.new("test.jpg", :processor => "none")
|
72
72
|
|
73
|
-
Language Options
|
73
|
+
Language Options
|
74
74
|
|
75
75
|
RTesseract.new("test.jpg", :lang => "deu")
|
76
76
|
* eng - English
|
@@ -83,18 +83,37 @@ Language Options
|
|
83
83
|
* spa - Spanish
|
84
84
|
* vie - Vietnamese
|
85
85
|
Note: Make sure you have installed the language to tesseract
|
86
|
-
|
86
|
+
|
87
87
|
Other Options
|
88
88
|
|
89
89
|
RTesseract.new("test.jpg", options: :digits) # Only digit recognition
|
90
|
-
|
90
|
+
|
91
91
|
OR
|
92
|
-
|
92
|
+
|
93
93
|
RTesseract.new("test.jpg", options: [:digits, :quiet])
|
94
|
-
|
94
|
+
|
95
|
+
=== BOUNDING BOX: TO GET WORDS WITH THEIR POSITIONS
|
96
|
+
|
97
|
+
RTesseract::Box.new('test_words.png').words
|
98
|
+
# => [
|
99
|
+
# {:word => 'If', :x_start=>52, :y_start=>13, :x_end=>63, :y_end=>27},
|
100
|
+
# {:word => 'you', :x_start=>69, :y_start=>17, :x_end=>100, :y_end=>31},
|
101
|
+
# {:word => 'are', :x_start=>108, :y_start=>17, :x_end=>136, :y_end=>27},
|
102
|
+
# {:word => 'a', :x_start=>143, :y_start=>17, :x_end=>151, :y_end=>27},
|
103
|
+
# {:word => 'friend,', :x_start=>158, :y_start=>13, :x_end=>214, :y_end=>29},
|
104
|
+
# {:word => 'you', :x_start=>51, :y_start=>39, :x_end=>82, :y_end=>53},
|
105
|
+
# {:word => 'speak', :x_start=>90, :y_start=>35, :x_end=>140, :y_end=>53},
|
106
|
+
# {:word => 'the', :x_start=>146, :y_start=>35, :x_end=>174, :y_end=>49},
|
107
|
+
# {:word => 'password,', :x_start=>182, :y_start=>35, :x_end=>267, :y_end=>53},
|
108
|
+
# {:word => 'and', :x_start=>51, :y_start=>57, :x_end=>81, :y_end=>71},
|
109
|
+
# {:word => 'the', :x_start=>89, :y_start=>57, :x_end=>117, :y_end=>71},
|
110
|
+
# {:word => 'doors', :x_start=>124, :y_start=>57, :x_end=>172, :y_end=>71},
|
111
|
+
# {:word => 'will', :x_start=>180, :y_start=>57, :x_end=>208, :y_end=>71},
|
112
|
+
# {:word => 'open.', :x_start=>216, :y_start=>61, :x_end=>263, :y_end=>75}
|
113
|
+
# ]
|
95
114
|
|
96
115
|
== Contributing to rtesseract
|
97
|
-
|
116
|
+
|
98
117
|
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
|
99
118
|
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
|
100
119
|
* Fork the project.
|
data/Rakefile
CHANGED
@@ -6,7 +6,7 @@ begin
|
|
6
6
|
Bundler.setup(:default, :development)
|
7
7
|
rescue Bundler::BundlerError => e
|
8
8
|
$stderr.puts e.message
|
9
|
-
$stderr.puts
|
9
|
+
$stderr.puts 'Run `bundle install` to install missing gems'
|
10
10
|
exit e.status_code
|
11
11
|
end
|
12
12
|
require 'rake'
|
@@ -14,13 +14,13 @@ require 'rake'
|
|
14
14
|
require 'jeweler'
|
15
15
|
Jeweler::Tasks.new do |gem|
|
16
16
|
# gem is a Gem::Specification... see http://guides.rubygems.org/specification-reference/ for more options
|
17
|
-
gem.name =
|
18
|
-
gem.homepage =
|
19
|
-
gem.license =
|
20
|
-
gem.summary =
|
21
|
-
gem.description =
|
22
|
-
gem.email =
|
23
|
-
gem.authors = [
|
17
|
+
gem.name = 'rtesseract'
|
18
|
+
gem.homepage = 'http://github.com/dannnylo/rtesseract'
|
19
|
+
gem.license = 'MIT'
|
20
|
+
gem.summary = 'Ruby library for working with the Tesseract OCR.'
|
21
|
+
gem.description = 'Ruby library for working with the Tesseract OCR.'
|
22
|
+
gem.email = 'dannnylo@gmail.com'
|
23
|
+
gem.authors = ['Danilo Jeremias da Silva']
|
24
24
|
# dependencies defined in Gemfile
|
25
25
|
end
|
26
26
|
Jeweler::RubygemsDotOrgTasks.new
|
@@ -31,17 +31,17 @@ RSpec::Core::RakeTask.new(:spec) do |spec|
|
|
31
31
|
spec.pattern = FileList['spec/**/*_spec.rb']
|
32
32
|
end
|
33
33
|
|
34
|
-
desc
|
34
|
+
desc 'Code coverage detail'
|
35
35
|
task :simplecov do
|
36
|
-
ENV['COVERAGE'] =
|
36
|
+
ENV['COVERAGE'] = 'true'
|
37
37
|
Rake::Task['spec'].execute
|
38
38
|
end
|
39
39
|
|
40
|
-
task :
|
40
|
+
task default: :spec
|
41
41
|
|
42
42
|
require 'rdoc/task'
|
43
43
|
Rake::RDocTask.new do |rdoc|
|
44
|
-
version = File.exist?('VERSION') ? File.read('VERSION') :
|
44
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ''
|
45
45
|
|
46
46
|
rdoc.rdoc_dir = 'rdoc'
|
47
47
|
rdoc.title = "rtesseract #{version}"
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.3.
|
1
|
+
1.3.1
|
data/lib/processors/none.rb
CHANGED
@@ -10,7 +10,7 @@ module NoneProcessor
|
|
10
10
|
|
11
11
|
def self.image_to_tif(source, _x = nil, _y = nil, _w = nil, _h = nil)
|
12
12
|
tmp_file = Tempfile.new(['', '.tif'])
|
13
|
-
tmp_file.write(
|
13
|
+
tmp_file.write(read_with_processor(source))
|
14
14
|
tmp_file
|
15
15
|
end
|
16
16
|
|
@@ -21,6 +21,6 @@ module NoneProcessor
|
|
21
21
|
File.read(path)
|
22
22
|
end
|
23
23
|
|
24
|
-
def self.image?(
|
24
|
+
def self.image?(*)
|
25
25
|
end
|
26
26
|
end
|
data/lib/processors/rmagick.rb
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
# Add to rtesseract a image manipulation with RMagick
|
3
3
|
module RMagickProcessor
|
4
4
|
def self.setup
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
5
|
+
require 'rmagick'
|
6
|
+
rescue LoadError
|
7
|
+
# :nocov:
|
8
|
+
require 'RMagick'
|
9
|
+
# :nocov:
|
10
10
|
end
|
11
11
|
|
12
12
|
def self.a_name?(name)
|
@@ -18,7 +18,10 @@ module RMagickProcessor
|
|
18
18
|
cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
|
19
19
|
cat.crop!(x, y, w, h) unless [x, y, w, h].compact == []
|
20
20
|
cat.alpha Magick::DeactivateAlphaChannel
|
21
|
-
cat.write(tmp_file.path.to_s) {
|
21
|
+
cat.write(tmp_file.path.to_s) {
|
22
|
+
# self.depth = 16
|
23
|
+
self.compression = Magick::NoCompression
|
24
|
+
}
|
22
25
|
tmp_file
|
23
26
|
end
|
24
27
|
|
data/lib/rtesseract.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
require 'pathname'
|
3
3
|
require 'tempfile'
|
4
|
+
require 'utils'
|
4
5
|
|
5
6
|
require 'rtesseract/errors'
|
6
7
|
require 'rtesseract/mixed'
|
@@ -17,10 +18,11 @@ require 'processors/none.rb'
|
|
17
18
|
class RTesseract
|
18
19
|
attr_accessor :image_object
|
19
20
|
attr_accessor :options
|
21
|
+
attr_accessor :options_cmd
|
20
22
|
attr_writer :lang
|
21
23
|
attr_writer :psm
|
22
24
|
attr_reader :processor
|
23
|
-
|
25
|
+
attr_reader :source
|
24
26
|
|
25
27
|
OPTIONS = %w(command lang psm processor debug clear_console_output options)
|
26
28
|
# Aliases to languages names
|
@@ -32,8 +34,8 @@ class RTesseract
|
|
32
34
|
}
|
33
35
|
|
34
36
|
def initialize(src = '', options = {})
|
35
|
-
|
36
|
-
@value, @x, @y, @w, @h = [
|
37
|
+
command_line_options(options)
|
38
|
+
@value, @x, @y, @w, @h = [nil]
|
37
39
|
@processor = RTesseract.choose_processor!(@processor)
|
38
40
|
@source = @processor.image?(src) ? src : Pathname.new(src)
|
39
41
|
initialize_hook
|
@@ -42,24 +44,17 @@ class RTesseract
|
|
42
44
|
def initialize_hook
|
43
45
|
end
|
44
46
|
|
45
|
-
def fetch_option(options, name, default)
|
46
|
-
options.fetch(name.to_s, options.fetch(name, default))
|
47
|
-
end
|
48
|
-
|
49
47
|
def command_line_options(options)
|
50
|
-
@
|
51
|
-
@
|
52
|
-
@
|
53
|
-
@
|
54
|
-
@
|
55
|
-
@
|
48
|
+
@options = options
|
49
|
+
@command = @options.option(:command, default_command)
|
50
|
+
@lang = @options.option(:lang, '')
|
51
|
+
@psm = @options.option(:psm, nil)
|
52
|
+
@processor = @options.option(:processor, 'rmagick')
|
53
|
+
@debug = @options.option(:debug, false)
|
54
|
+
@options_cmd = @options.option(:options, [])
|
56
55
|
@options_cmd = [@options_cmd] unless @options_cmd.is_a?(Array)
|
57
|
-
|
58
56
|
# Disable clear console if debug mode
|
59
|
-
@clear_console_output = @debug ? false :
|
60
|
-
|
61
|
-
options.delete_if { |k, v| OPTIONS.include?(k.to_s) }
|
62
|
-
options
|
57
|
+
@clear_console_output = @debug ? false : options.option(:clear_console_output, true)
|
63
58
|
end
|
64
59
|
|
65
60
|
def default_command
|
@@ -68,32 +63,30 @@ class RTesseract
|
|
68
63
|
'tesseract'
|
69
64
|
end
|
70
65
|
|
71
|
-
def self.read(src = nil, options = {}
|
66
|
+
def self.read(src = nil, options = {})
|
72
67
|
fail RTesseract::ImageNotSelectedError if src.nil?
|
73
|
-
processor = RTesseract.choose_processor!(options.
|
68
|
+
processor = RTesseract.choose_processor!(options.option(:processor, nil))
|
74
69
|
image = processor.read_with_processor(src.to_s)
|
75
|
-
|
76
70
|
yield(image)
|
77
|
-
object = RTesseract.new('', options)
|
78
|
-
object.from_blob(image.to_blob)
|
71
|
+
object = RTesseract.new('', options).from_blob(image.to_blob)
|
79
72
|
object
|
80
73
|
end
|
81
74
|
|
82
|
-
def read
|
75
|
+
def read
|
83
76
|
image = @processor.read_with_processor(@source.to_s)
|
84
77
|
new_image = yield(image)
|
85
|
-
|
78
|
+
from_blob(new_image.to_blob, File.extname(@source.to_s))
|
86
79
|
self
|
87
80
|
end
|
88
81
|
|
89
82
|
def source=(src)
|
90
|
-
@value =
|
83
|
+
@value = nil
|
91
84
|
@source = @processor.image?(src) ? src : Pathname.new(src)
|
92
85
|
end
|
93
86
|
|
94
87
|
# Crop image to convert
|
95
88
|
def crop!(x, y, width, height)
|
96
|
-
@value =
|
89
|
+
@value = nil
|
97
90
|
@x, @y, @w, @h = x.to_i, y.to_i, width.to_i, height.to_i
|
98
91
|
self
|
99
92
|
end
|
@@ -110,7 +103,7 @@ class RTesseract
|
|
110
103
|
end
|
111
104
|
true
|
112
105
|
rescue => error
|
113
|
-
raise RTesseract::TempFilesNotRemovedError.new(:
|
106
|
+
raise RTesseract::TempFilesNotRemovedError.new(error: error, files: files)
|
114
107
|
end
|
115
108
|
|
116
109
|
# Select the language
|
@@ -201,26 +194,27 @@ class RTesseract
|
|
201
194
|
convert_text
|
202
195
|
remove_file([@image, text_file_with_ext])
|
203
196
|
rescue => error
|
204
|
-
raise RTesseract::ConversionError.new(error)
|
197
|
+
raise RTesseract::ConversionError.new(error), error, caller
|
205
198
|
end
|
206
199
|
|
207
200
|
# Read image from memory blob
|
208
201
|
def from_blob(blob, ext = '')
|
209
|
-
blob_file = Tempfile.new(['blob', ext], :
|
210
|
-
blob_file.binmode
|
211
|
-
blob_file.write(blob)
|
202
|
+
blob_file = Tempfile.new(['blob', ext], encoding: 'ascii-8bit')
|
203
|
+
blob_file.binmode.write(blob)
|
212
204
|
blob_file.rewind
|
213
205
|
blob_file.flush
|
214
206
|
self.source = blob_file.path
|
215
207
|
convert
|
216
208
|
remove_file([blob_file])
|
209
|
+
self
|
217
210
|
rescue => error
|
218
|
-
raise RTesseract::ConversionError.new(error)
|
211
|
+
raise RTesseract::ConversionError.new(error), error, caller
|
219
212
|
end
|
220
213
|
|
221
214
|
# Output value
|
222
215
|
def to_s
|
223
|
-
return @value if @value !=
|
216
|
+
return @value if @value != nil
|
217
|
+
|
224
218
|
if @processor.image?(@source) || @source.file?
|
225
219
|
convert
|
226
220
|
@value
|
@@ -249,4 +243,3 @@ class RTesseract
|
|
249
243
|
processor
|
250
244
|
end
|
251
245
|
end
|
252
|
-
|
data/lib/rtesseract/box.rb
CHANGED
@@ -23,15 +23,13 @@ class RTesseract
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def parse_file
|
26
|
-
Nokogiri::HTML(File.read(text_file_with_ext))
|
26
|
+
html = Nokogiri::HTML(File.read(text_file_with_ext))
|
27
|
+
html.css('span.ocrx_word, span.ocr_word')
|
27
28
|
end
|
28
29
|
|
29
30
|
def convert_text
|
30
31
|
text_objects = []
|
31
|
-
parse_file.each
|
32
|
-
attributes = word.attributes['title'].value.to_s.gsub(';', '').split(' ')
|
33
|
-
text_objects << { :word => word.text, :x_start => attributes[1].to_i, :y_start => attributes[2].to_i , :x_end => attributes[3].to_i, :y_end => attributes[4].to_i }
|
34
|
-
end
|
32
|
+
parse_file.each { |word| text_objects << BoxParser.new(word).to_h }
|
35
33
|
@value = text_objects
|
36
34
|
end
|
37
35
|
|
@@ -49,5 +47,24 @@ class RTesseract
|
|
49
47
|
fail RTesseract::ImageNotSelectedError.new(@source)
|
50
48
|
end
|
51
49
|
end
|
50
|
+
|
51
|
+
# Parse word data from html.
|
52
|
+
class BoxParser
|
53
|
+
def initialize(word_html)
|
54
|
+
@word = word_html
|
55
|
+
title = @word.attributes['title'].value.to_s
|
56
|
+
@attributes = title.gsub(';', '').split(' ')
|
57
|
+
end
|
58
|
+
|
59
|
+
def to_h
|
60
|
+
{
|
61
|
+
word: @word.text,
|
62
|
+
x_start: @attributes[1].to_i,
|
63
|
+
y_start: @attributes[2].to_i,
|
64
|
+
x_end: @attributes[3].to_i,
|
65
|
+
y_end: @attributes[4].to_i
|
66
|
+
}
|
67
|
+
end
|
68
|
+
end
|
52
69
|
end
|
53
70
|
end
|
data/lib/rtesseract/box_char.rb
CHANGED
@@ -19,8 +19,8 @@ class RTesseract
|
|
19
19
|
def convert_text
|
20
20
|
text_objects = []
|
21
21
|
parse_file.each_line do |line|
|
22
|
-
char, x_start, y_start, x_end, y_end,
|
23
|
-
text_objects << { :
|
22
|
+
char, x_start, y_start, x_end, y_end, _word = line.split(' ')
|
23
|
+
text_objects << { char: char, x_start: x_start.to_i, y_start: y_start.to_i, x_end: x_end.to_i, y_end: y_end.to_i }
|
24
24
|
end
|
25
25
|
@value = text_objects
|
26
26
|
end
|
data/lib/rtesseract/mixed.rb
CHANGED
@@ -14,7 +14,7 @@ class RTesseract
|
|
14
14
|
|
15
15
|
def area(x, y, width, height)
|
16
16
|
@value = ''
|
17
|
-
@areas << { :
|
17
|
+
@areas << { x: x, y: y, width: width, height: height }
|
18
18
|
end
|
19
19
|
|
20
20
|
def clear_areas
|
@@ -29,7 +29,7 @@ class RTesseract
|
|
29
29
|
@value << image.to_s
|
30
30
|
end
|
31
31
|
rescue => error
|
32
|
-
raise RTesseract::ConversionError.new(error)
|
32
|
+
raise RTesseract::ConversionError.new(error), error, caller
|
33
33
|
end
|
34
34
|
|
35
35
|
# Output value
|
data/lib/utils.rb
ADDED
data/rtesseract.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: rtesseract 1.3.
|
5
|
+
# stub: rtesseract 1.3.1 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "rtesseract"
|
9
|
-
s.version = "1.3.
|
9
|
+
s.version = "1.3.1"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Danilo Jeremias da Silva"]
|
14
|
-
s.date = "2015-
|
14
|
+
s.date = "2015-10-07"
|
15
15
|
s.description = "Ruby library for working with the Tesseract OCR."
|
16
16
|
s.email = "dannnylo@gmail.com"
|
17
17
|
s.extra_rdoc_files = [
|
@@ -21,7 +21,6 @@ Gem::Specification.new do |s|
|
|
21
21
|
s.files = [
|
22
22
|
".document",
|
23
23
|
".rspec",
|
24
|
-
".travis.sh",
|
25
24
|
".travis.yml",
|
26
25
|
"Gemfile",
|
27
26
|
"Gemfile.lock",
|
@@ -38,7 +37,10 @@ Gem::Specification.new do |s|
|
|
38
37
|
"lib/rtesseract/box_char.rb",
|
39
38
|
"lib/rtesseract/errors.rb",
|
40
39
|
"lib/rtesseract/mixed.rb",
|
40
|
+
"lib/utils.rb",
|
41
41
|
"rtesseract.gemspec",
|
42
|
+
"spec/images/README.pdf",
|
43
|
+
"spec/images/blank.tif",
|
42
44
|
"spec/images/mixed.tif",
|
43
45
|
"spec/images/orientation_reverse.png",
|
44
46
|
"spec/images/test with spaces.tif",
|
@@ -56,7 +58,7 @@ Gem::Specification.new do |s|
|
|
56
58
|
]
|
57
59
|
s.homepage = "http://github.com/dannnylo/rtesseract"
|
58
60
|
s.licenses = ["MIT"]
|
59
|
-
s.rubygems_version = "2.
|
61
|
+
s.rubygems_version = "2.4.3"
|
60
62
|
s.summary = "Ruby library for working with the Tesseract OCR."
|
61
63
|
|
62
64
|
if s.respond_to? :specification_version then
|
Binary file
|
Binary file
|
@@ -1,82 +1,82 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
3
|
|
4
|
-
describe
|
4
|
+
describe 'Rtesseract::BoxChar' do
|
5
5
|
before do
|
6
|
-
@path = Pathname.new(__FILE__.gsub('rtesseract_box_char_spec.rb','')).expand_path
|
6
|
+
@path = Pathname.new(__FILE__.gsub('rtesseract_box_char_spec.rb', '')).expand_path
|
7
7
|
@image_tiff = @path.join('images', 'test.tif').to_s
|
8
8
|
@words_image = @path.join('images', 'test_words.png').to_s
|
9
9
|
end
|
10
10
|
|
11
|
-
|
12
|
-
it "bounding box by char" do
|
11
|
+
it 'bounding box by char' do
|
13
12
|
expect(RTesseract::BoxChar.new(@image_tiff).characters.is_a?(Array)).to eql(true)
|
14
13
|
expect(RTesseract::BoxChar.new(@image_tiff).characters).to eql([
|
15
|
-
{:
|
16
|
-
{:
|
17
|
-
{:
|
18
|
-
{:
|
19
|
-
|
14
|
+
{ char: '4', x_start: 145, y_start: 14, x_end: 159, y_end: 33 },
|
15
|
+
{ char: '3', x_start: 184, y_start: 14, x_end: 196, y_end: 33 },
|
16
|
+
{ char: 'X', x_start: 222, y_start: 14, x_end: 238, y_end: 32 },
|
17
|
+
{ char: 'F', x_start: 260, y_start: 14, x_end: 273, y_end: 32 }])
|
18
|
+
|
20
19
|
expect(RTesseract::BoxChar.new(@words_image).characters).to eql([
|
21
|
-
{:
|
22
|
-
{:
|
23
|
-
{:
|
24
|
-
{:
|
25
|
-
{:
|
26
|
-
{:
|
27
|
-
{:
|
28
|
-
{:
|
29
|
-
{:
|
30
|
-
{:
|
31
|
-
{:
|
32
|
-
{:
|
33
|
-
{:
|
34
|
-
{:
|
35
|
-
{:
|
36
|
-
{:
|
37
|
-
{:
|
38
|
-
{:
|
39
|
-
{:
|
40
|
-
{:
|
41
|
-
{:
|
42
|
-
{:
|
43
|
-
{:
|
44
|
-
{:
|
45
|
-
{:
|
46
|
-
{:
|
47
|
-
{:
|
48
|
-
{:
|
49
|
-
{:
|
50
|
-
{:
|
51
|
-
{:
|
52
|
-
{:
|
53
|
-
{:
|
54
|
-
{:
|
55
|
-
{:
|
56
|
-
{:
|
57
|
-
{:
|
58
|
-
{:
|
59
|
-
{:
|
60
|
-
{:
|
61
|
-
{:
|
62
|
-
{:
|
63
|
-
{:
|
64
|
-
{:
|
65
|
-
{:
|
66
|
-
{:
|
67
|
-
{:
|
68
|
-
{:
|
69
|
-
{:
|
70
|
-
{:
|
71
|
-
{:
|
72
|
-
{:
|
73
|
-
{:
|
74
|
-
{:
|
75
|
-
{:
|
76
|
-
{:
|
77
|
-
|
20
|
+
{ char: 'I', x_start: 52, y_start: 91, x_end: 54, y_end: 104 },
|
21
|
+
{ char: 'f', x_start: 56, y_start: 91, x_end: 63, y_end: 105 },
|
22
|
+
{ char: 'y', x_start: 69, y_start: 87, x_end: 79, y_end: 101 },
|
23
|
+
{ char: 'o', x_start: 80, y_start: 91, x_end: 90, y_end: 101 },
|
24
|
+
{ char: 'u', x_start: 92, y_start: 91, x_end: 100, y_end: 101 },
|
25
|
+
{ char: 'a', x_start: 108, y_start: 91, x_end: 116, y_end: 101 },
|
26
|
+
{ char: 'r', x_start: 119, y_start: 91, x_end: 125, y_end: 101 },
|
27
|
+
{ char: 'e', x_start: 126, y_start: 91, x_end: 136, y_end: 101 },
|
28
|
+
{ char: 'a', x_start: 143, y_start: 91, x_end: 151, y_end: 101 },
|
29
|
+
{ char: 'f', x_start: 158, y_start: 91, x_end: 165, y_end: 105 },
|
30
|
+
{ char: 'r', x_start: 166, y_start: 91, x_end: 172, y_end: 101 },
|
31
|
+
{ char: 'i', x_start: 174, y_start: 91, x_end: 176, y_end: 105 },
|
32
|
+
{ char: 'e', x_start: 178, y_start: 91, x_end: 188, y_end: 101 },
|
33
|
+
{ char: 'n', x_start: 190, y_start: 91, x_end: 198, y_end: 101 },
|
34
|
+
{ char: 'd', x_start: 200, y_start: 91, x_end: 209, y_end: 105 },
|
35
|
+
{ char: ',', x_start: 211, y_start: 89, x_end: 214, y_end: 93 },
|
36
|
+
{ char: 'y', x_start: 51, y_start: 65, x_end: 61, y_end: 79 },
|
37
|
+
{ char: 'o', x_start: 62, y_start: 69, x_end: 72, y_end: 79 },
|
38
|
+
{ char: 'u', x_start: 74, y_start: 69, x_end: 82, y_end: 79 },
|
39
|
+
{ char: 's', x_start: 90, y_start: 69, x_end: 97, y_end: 79 },
|
40
|
+
{ char: 'p', x_start: 99, y_start: 65, x_end: 108, y_end: 79 },
|
41
|
+
{ char: 'e', x_start: 109, y_start: 69, x_end: 119, y_end: 79 },
|
42
|
+
{ char: 'a', x_start: 120, y_start: 69, x_end: 128, y_end: 79 },
|
43
|
+
{ char: 'k', x_start: 131, y_start: 69, x_end: 140, y_end: 83 },
|
44
|
+
{ char: 't', x_start: 146, y_start: 69, x_end: 152, y_end: 82 },
|
45
|
+
{ char: 'h', x_start: 154, y_start: 69, x_end: 162, y_end: 83 },
|
46
|
+
{ char: 'e', x_start: 164, y_start: 69, x_end: 174, y_end: 79 },
|
47
|
+
{ char: 'p', x_start: 182, y_start: 65, x_end: 191, y_end: 79 },
|
48
|
+
{ char: 'a', x_start: 192, y_start: 69, x_end: 200, y_end: 79 },
|
49
|
+
{ char: 's', x_start: 202, y_start: 69, x_end: 209, y_end: 79 },
|
50
|
+
{ char: 's', x_start: 210, y_start: 69, x_end: 217, y_end: 79 },
|
51
|
+
{ char: 'w', x_start: 219, y_start: 69, x_end: 232, y_end: 79 },
|
52
|
+
{ char: 'o', x_start: 234, y_start: 69, x_end: 244, y_end: 79 },
|
53
|
+
{ char: 'r', x_start: 246, y_start: 69, x_end: 252, y_end: 79 },
|
54
|
+
{ char: 'd', x_start: 253, y_start: 69, x_end: 262, y_end: 83 },
|
55
|
+
{ char: ',', x_start: 264, y_start: 67, x_end: 267, y_end: 71 },
|
56
|
+
{ char: 'a', x_start: 51, y_start: 47, x_end: 59, y_end: 57 },
|
57
|
+
{ char: 'n', x_start: 62, y_start: 47, x_end: 70, y_end: 57 },
|
58
|
+
{ char: 'd', x_start: 72, y_start: 47, x_end: 81, y_end: 61 },
|
59
|
+
{ char: 't', x_start: 89, y_start: 47, x_end: 95, y_end: 60 },
|
60
|
+
{ char: 'h', x_start: 97, y_start: 47, x_end: 105, y_end: 61 },
|
61
|
+
{ char: 'e', x_start: 107, y_start: 47, x_end: 117, y_end: 57 },
|
62
|
+
{ char: 'd', x_start: 124, y_start: 47, x_end: 133, y_end: 61 },
|
63
|
+
{ char: 'o', x_start: 135, y_start: 47, x_end: 145, y_end: 57 },
|
64
|
+
{ char: 'o', x_start: 146, y_start: 47, x_end: 156, y_end: 57 },
|
65
|
+
{ char: 'r', x_start: 158, y_start: 47, x_end: 164, y_end: 57 },
|
66
|
+
{ char: 's', x_start: 165, y_start: 47, x_end: 172, y_end: 57 },
|
67
|
+
{ char: 'w', x_start: 180, y_start: 47, x_end: 193, y_end: 57 },
|
68
|
+
{ char: 'i', x_start: 196, y_start: 47, x_end: 198, y_end: 61 },
|
69
|
+
{ char: 'l', x_start: 201, y_start: 47, x_end: 203, y_end: 61 },
|
70
|
+
{ char: 'l', x_start: 206, y_start: 47, x_end: 208, y_end: 61 },
|
71
|
+
{ char: 'o', x_start: 216, y_start: 47, x_end: 226, y_end: 57 },
|
72
|
+
{ char: 'p', x_start: 228, y_start: 43, x_end: 237, y_end: 57 },
|
73
|
+
{ char: 'e', x_start: 238, y_start: 47, x_end: 248, y_end: 57 },
|
74
|
+
{ char: 'n', x_start: 250, y_start: 47, x_end: 258, y_end: 57 },
|
75
|
+
{ char: '.', x_start: 261, y_start: 47, x_end: 263, y_end: 49 }])
|
76
|
+
|
77
|
+
expect { RTesseract::BoxChar.new(@image_tiff, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError)
|
78
|
+
expect { RTesseract::BoxChar.new(@image_tiff + '_not_exist').to_s }.to raise_error(RTesseract::ImageNotSelectedError)
|
78
79
|
|
79
|
-
expect
|
80
|
-
expect{RTesseract::BoxChar.new(@image_tiff + "_not_exist").to_s }.to raise_error(RTesseract::ImageNotSelectedError)
|
80
|
+
#expect(RTesseract::BoxChar.new(@path.join('images', 'blank.tif').to_s, options: :digits).characters).to eql([])
|
81
81
|
end
|
82
82
|
end
|
data/spec/rtesseract_box_spec.rb
CHANGED
@@ -1,36 +1,37 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
3
|
|
4
|
-
describe
|
4
|
+
describe 'Rtesseract::Box' do
|
5
5
|
before do
|
6
|
-
@path = Pathname.new(__FILE__.gsub(
|
7
|
-
@image_tiff = @path.join(
|
8
|
-
@words_image = @path.join(
|
6
|
+
@path = Pathname.new(__FILE__.gsub('rtesseract_box_spec.rb', '')).expand_path
|
7
|
+
@image_tiff = @path.join('images', 'test.tif').to_s
|
8
|
+
@words_image = @path.join('images', 'test_words.png').to_s
|
9
9
|
end
|
10
10
|
|
11
|
-
|
12
|
-
it "bounding box" do
|
11
|
+
it 'bounding box' do
|
13
12
|
expect(RTesseract.new(@words_image).to_s).to eql("If you are a friend,\nyou speak the password,\nand the doors will open.\n\n")
|
14
13
|
expect(RTesseract::Box.new(@words_image).words).to eql([
|
15
|
-
{:
|
16
|
-
{:
|
17
|
-
{:
|
18
|
-
{:
|
19
|
-
{:
|
20
|
-
{:
|
21
|
-
{:
|
22
|
-
{:
|
23
|
-
{:
|
24
|
-
{:
|
25
|
-
{:
|
26
|
-
{:
|
27
|
-
{:
|
28
|
-
{:
|
14
|
+
{ word: 'If', x_start: 52, y_start: 13, x_end: 63, y_end: 27 },
|
15
|
+
{ word: 'you', x_start: 69, y_start: 17, x_end: 100, y_end: 31 },
|
16
|
+
{ word: 'are', x_start: 108, y_start: 17, x_end: 136, y_end: 27 },
|
17
|
+
{ word: 'a', x_start: 143, y_start: 17, x_end: 151, y_end: 27 },
|
18
|
+
{ word: 'friend,', x_start: 158, y_start: 13, x_end: 214, y_end: 29 },
|
19
|
+
{ word: 'you', x_start: 51, y_start: 39, x_end: 82, y_end: 53 },
|
20
|
+
{ word: 'speak', x_start: 90, y_start: 35, x_end: 140, y_end: 53 },
|
21
|
+
{ word: 'the', x_start: 146, y_start: 35, x_end: 174, y_end: 49 },
|
22
|
+
{ word: 'password,', x_start: 182, y_start: 35, x_end: 267, y_end: 53 },
|
23
|
+
{ word: 'and', x_start: 51, y_start: 57, x_end: 81, y_end: 71 },
|
24
|
+
{ word: 'the', x_start: 89, y_start: 57, x_end: 117, y_end: 71 },
|
25
|
+
{ word: 'doors', x_start: 124, y_start: 57, x_end: 172, y_end: 71 },
|
26
|
+
{ word: 'will', x_start: 180, y_start: 57, x_end: 208, y_end: 71 },
|
27
|
+
{ word: 'open.', x_start: 216, y_start: 61, x_end: 263, y_end: 75 }
|
29
28
|
])
|
30
29
|
|
31
30
|
expect(RTesseract::Box.new(@image_tiff).words.is_a?(Array)).to eql(true)
|
32
|
-
expect(RTesseract::Box.new(@words_image).to_s).to eql(
|
33
|
-
expect{RTesseract::Box.new(@image_tiff,
|
34
|
-
expect{RTesseract::Box.new(@image_tiff +
|
31
|
+
expect(RTesseract::Box.new(@words_image).to_s).to eql('If you are a friend, you speak the password, and the doors will open.')
|
32
|
+
expect { RTesseract::Box.new(@image_tiff, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError)
|
33
|
+
expect { RTesseract::Box.new(@image_tiff + '_not_exist').to_s }.to raise_error(RTesseract::ImageNotSelectedError)
|
34
|
+
|
35
|
+
#expect(RTesseract::Box.new(@path.join('images', 'blank.tif').to_s, options: :digits).words).to eql([])
|
35
36
|
end
|
36
37
|
end
|
@@ -1,46 +1,49 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
3
|
|
4
|
-
describe
|
4
|
+
describe 'Rtesseract::Mixed' do
|
5
5
|
before do
|
6
|
-
@path = Pathname.new(__FILE__.gsub(
|
7
|
-
@image_tif = @path.join(
|
8
|
-
@image2_tif = @path.join(
|
6
|
+
@path = Pathname.new(__FILE__.gsub('rtesseract_mixed_spec.rb', '')).expand_path
|
7
|
+
@image_tif = @path.join('images', 'mixed.tif').to_s
|
8
|
+
@image2_tif = @path.join('images', 'mixed2.tif').to_s
|
9
9
|
end
|
10
10
|
|
11
|
-
it
|
11
|
+
it 'should be instantiable' do
|
12
12
|
expect(RTesseract::Mixed.new.class).to eql(RTesseract::Mixed)
|
13
13
|
expect(RTesseract::Mixed.new(@image_tif).class).to eql(RTesseract::Mixed)
|
14
14
|
end
|
15
15
|
|
16
|
-
it
|
17
|
-
mix_block = RTesseract::Mixed.new(@image_tif,
|
18
|
-
image.area(28, 19, 25, 25) #position of 4
|
16
|
+
it 'should translate parts of the image to text' do
|
17
|
+
mix_block = RTesseract::Mixed.new(@image_tif, psm: 7) do |image|
|
18
|
+
image.area(28, 19, 25, 25) # position of 4
|
19
19
|
image.area(180, 22, 20, 28) # position of 3
|
20
|
-
image.area(218, 22, 24, 28) # position of
|
21
|
-
image.area(248, 24, 22, 22) # position of
|
20
|
+
image.area(218, 22, 24, 28) # position of F
|
21
|
+
image.area(248, 24, 22, 22) # position of F
|
22
22
|
end
|
23
|
-
expect(mix_block.to_s_without_spaces).to eql(
|
23
|
+
expect(mix_block.to_s_without_spaces).to eql('43FF')
|
24
24
|
mix_block.clear_areas
|
25
25
|
expect(mix_block.areas).to eql([])
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
27
|
+
@areas = []
|
28
|
+
@areas << { x: 28, y: 19, width: 25, height: 25 } # position of 4
|
29
|
+
@areas << { x: 180, y: 22, width: 20, height: 28 } # position of 3
|
30
|
+
@areas << { x: 218, y: 22, width: 24, height: 28 } # position of f
|
31
|
+
@areas << { x: 248, y: 24, width: 22, height: 22 } # position of f
|
32
|
+
|
33
|
+
mix_block = RTesseract::Mixed.new(@image_tif, areas: @areas, psm: 7)
|
34
|
+
expect(mix_block.to_s_without_spaces).to eql('43FF')
|
35
|
+
|
36
|
+
mix_block = RTesseract::Mixed.new(@path.join('images', 'blank.tif').to_s, areas: @areas, psm: 7)
|
37
|
+
expect(mix_block.to_s_without_spaces).to eql('')
|
34
38
|
end
|
35
39
|
|
36
|
-
it
|
37
|
-
|
38
|
-
],:psm=>7})
|
39
|
-
expect{ mix_block.to_s_without_spaces }.to raise_error(RTesseract::ImageNotSelectedError)
|
40
|
+
it ' get a error' do
|
41
|
+
@areas = [{ x: 28, y: 19, width: 25, height: 25 }]
|
40
42
|
|
43
|
+
mix_block = RTesseract::Mixed.new(@path.join('images', 'test_not_exists.png').to_s, areas: @areas, psm: 7)
|
44
|
+
expect { mix_block.to_s_without_spaces }.to raise_error(RTesseract::ImageNotSelectedError)
|
41
45
|
|
42
|
-
mix_block = RTesseract::Mixed.new(@image_tif,
|
43
|
-
|
44
|
-
expect{ mix_block.to_s }.to raise_error(RTesseract::ConversionError)
|
46
|
+
mix_block = RTesseract::Mixed.new(@image_tif, areas: @areas, psm: 7, command: 'tesseract_error')
|
47
|
+
expect { mix_block.to_s }.to raise_error(RTesseract::ConversionError)
|
45
48
|
end
|
46
49
|
end
|
data/spec/rtesseract_spec.rb
CHANGED
@@ -1,121 +1,130 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
2
|
# encoding: UTF-8
|
3
3
|
require 'pathname'
|
4
|
+
|
5
|
+
# Class to rise error
|
4
6
|
class MakeStringError
|
5
7
|
def to_s
|
6
|
-
|
8
|
+
fail 'error'
|
7
9
|
end
|
8
10
|
end
|
9
11
|
|
10
|
-
describe
|
12
|
+
describe 'Rtesseract' do
|
11
13
|
before do
|
12
|
-
@path = Pathname.new(__FILE__.gsub(
|
13
|
-
@image_tif = @path.join(
|
14
|
+
@path = Pathname.new(__FILE__.gsub('rtesseract_spec.rb', '')).expand_path
|
15
|
+
@image_tif = @path.join('images', 'test.tif').to_s
|
14
16
|
end
|
15
17
|
|
16
|
-
it
|
18
|
+
it ' be instantiable' do
|
17
19
|
expect(RTesseract.new.class).to eql(RTesseract)
|
18
|
-
expect(RTesseract.new(
|
20
|
+
expect(RTesseract.new('').class).to eql(RTesseract)
|
19
21
|
expect(RTesseract.new(@image_tif).class).to eql(RTesseract)
|
20
22
|
end
|
21
23
|
|
22
|
-
it
|
23
|
-
expect(RTesseract.new(@image_tif).to_s_without_spaces).to eql(
|
24
|
-
expect(RTesseract.new(@image_tif,
|
25
|
-
expect(RTesseract.new(@path.join(
|
26
|
-
expect(RTesseract.new(@path.join(
|
24
|
+
it ' translate image to text' do
|
25
|
+
expect(RTesseract.new(@image_tif).to_s_without_spaces).to eql('43XF')
|
26
|
+
expect(RTesseract.new(@image_tif, processor: 'mini_magick').to_s_without_spaces).to eql('43XF')
|
27
|
+
expect(RTesseract.new(@path.join('images', 'test1.tif').to_s).to_s_without_spaces).to eql('V2V4')
|
28
|
+
expect(RTesseract.new(@path.join('images', 'test with spaces.tif').to_s).to_s_without_spaces).to eql('V2V4')
|
29
|
+
end
|
27
30
|
|
31
|
+
it ' translate images .png, .jpg, .bmp' do
|
32
|
+
expect(RTesseract.new(@path.join('images', 'test.png').to_s).to_s_without_spaces).to eql('HW9W')
|
33
|
+
expect(RTesseract.new(@path.join('images', 'test.jpg').to_s).to_s_without_spaces).to eql('3R8F')
|
34
|
+
expect(RTesseract.new(@path.join('images', 'test.bmp').to_s).to_s_without_spaces).to eql('FLA6')
|
28
35
|
end
|
29
36
|
|
30
|
-
it
|
31
|
-
expect(RTesseract.new(@path.join(
|
32
|
-
expect(RTesseract.new(@path.join("images","test.jpg").to_s).to_s_without_spaces).to eql("3R8F")
|
33
|
-
expect(RTesseract.new(@path.join("images","test.bmp").to_s).to_s_without_spaces).to eql("FLA6")
|
37
|
+
it ' should not error with depth > 32' do
|
38
|
+
#expect(RTesseract.new(@path.join('images', 'README.pdf').to_s, debug: true).to_s_without_spaces).to eql('')
|
34
39
|
end
|
35
40
|
|
36
|
-
it
|
37
|
-
#Rmagick
|
38
|
-
expect(RTesseract.new(@image_tif).to_s_without_spaces).to eql(
|
39
|
-
expect(RTesseract.new(@image_tif, :
|
40
|
-
expect(RTesseract.new(@path.join(
|
41
|
+
it ' support diferent processors' do
|
42
|
+
# Rmagick
|
43
|
+
expect(RTesseract.new(@image_tif).to_s_without_spaces).to eql('43XF')
|
44
|
+
expect(RTesseract.new(@image_tif, processor: 'rmagick').to_s_without_spaces).to eql('43XF')
|
45
|
+
expect(RTesseract.new(@path.join('images', 'test.png').to_s, processor: 'rmagick').to_s_without_spaces).to eql('HW9W')
|
41
46
|
|
42
|
-
#MiniMagick
|
43
|
-
expect(RTesseract.new(@image_tif, :
|
44
|
-
expect(RTesseract.new(@path.join(
|
47
|
+
# MiniMagick
|
48
|
+
expect(RTesseract.new(@image_tif, processor: 'mini_magick').to_s_without_spaces).to eql('43XF')
|
49
|
+
expect(RTesseract.new(@path.join('images', 'test.png').to_s, processor: 'mini_magick').to_s_without_spaces).to eql('HW9W')
|
45
50
|
|
46
|
-
#QuickMagick
|
47
|
-
expect(RTesseract.new(@image_tif, :
|
48
|
-
expect(RTesseract.new(@path.join(
|
51
|
+
# QuickMagick
|
52
|
+
expect(RTesseract.new(@image_tif, processor: 'quick_magick').to_s_without_spaces).to eql('43XF')
|
53
|
+
expect(RTesseract.new(@path.join('images', 'test.png').to_s, processor: 'quick_magick').to_s_without_spaces).to eql('HW9W')
|
49
54
|
|
50
|
-
#NoneMagick
|
51
|
-
expect(RTesseract.new(@image_tif, :
|
55
|
+
# NoneMagick
|
56
|
+
expect(RTesseract.new(@image_tif, processor: 'none').to_s_without_spaces).to eql('43XF')
|
52
57
|
end
|
53
58
|
|
54
|
-
it
|
59
|
+
it ' change the image' do
|
55
60
|
image = RTesseract.new(@image_tif)
|
56
|
-
expect(image.to_s_without_spaces).to eql(
|
57
|
-
image.source = @path.join(
|
58
|
-
expect(image.to_s_without_spaces).to eql(
|
61
|
+
expect(image.to_s_without_spaces).to eql('43XF')
|
62
|
+
image.source = @path.join('images', 'test1.tif').to_s
|
63
|
+
expect(image.to_s_without_spaces).to eql('V2V4')
|
59
64
|
end
|
60
65
|
|
61
|
-
it
|
62
|
-
|
63
|
-
expect(
|
64
|
-
|
65
|
-
expect(RTesseract.new(@image_tif,{:lang=>"en-US"}).lang).to eql(" -l eng ")
|
66
|
-
expect(RTesseract.new(@image_tif,{:lang=>"english"}).lang).to eql(" -l eng ")
|
66
|
+
it ' returns the source' do
|
67
|
+
image = RTesseract.new(@image_tif)
|
68
|
+
expect(image.source).to eql(Pathname.new(@image_tif))
|
69
|
+
end
|
67
70
|
|
68
|
-
|
69
|
-
|
70
|
-
expect(RTesseract.new(@image_tif,
|
71
|
-
expect(RTesseract.new(@image_tif,
|
72
|
-
expect(RTesseract.new(@image_tif,
|
73
|
-
expect(RTesseract.new(@image_tif,
|
71
|
+
it ' select the language' do
|
72
|
+
# English
|
73
|
+
expect(RTesseract.new(@image_tif, lang: 'eng').lang).to eql(' -l eng ')
|
74
|
+
expect(RTesseract.new(@image_tif, lang: 'en').lang).to eql(' -l eng ')
|
75
|
+
expect(RTesseract.new(@image_tif, lang: 'en-US').lang).to eql(' -l eng ')
|
76
|
+
expect(RTesseract.new(@image_tif, lang: 'english').lang).to eql(' -l eng ')
|
74
77
|
|
75
|
-
|
76
|
-
|
77
|
-
expect(RTesseract.new(@image_tif,
|
78
|
+
# Portuguese
|
79
|
+
expect(RTesseract.new(@image_tif, lang: 'por').lang).to eql(' -l por ')
|
80
|
+
expect(RTesseract.new(@image_tif, lang: 'pt-BR').lang).to eql(' -l por ')
|
81
|
+
expect(RTesseract.new(@image_tif, lang: 'pt-br').lang).to eql(' -l por ')
|
82
|
+
expect(RTesseract.new(@image_tif, lang: 'pt').lang).to eql(' -l por ')
|
83
|
+
expect(RTesseract.new(@image_tif, lang: 'portuguese').lang).to eql(' -l por ')
|
78
84
|
|
79
|
-
|
80
|
-
expect(RTesseract.new(@image_tif,{:lang=>MakeStringError.new}).lang).to eql("")
|
81
|
-
end
|
85
|
+
expect(RTesseract.new(@image_tif, lang: 'eng').to_s_without_spaces).to eql('43XF')
|
82
86
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
expect(RTesseract.new(@image_tif, options: [:digits, :quiet]).options_cmd).to eql([:digits, :quiet])
|
87
|
+
expect(RTesseract.new(@image_tif, lang: 'eng').lang).to eql(' -l eng ')
|
88
|
+
|
89
|
+
# Invalid lang object
|
90
|
+
expect(RTesseract.new(@image_tif, lang: MakeStringError.new).lang).to eql('')
|
88
91
|
end
|
89
92
|
|
90
|
-
it
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
93
|
+
it ' select options' do
|
94
|
+
expect(RTesseract.new(@image_tif).options_cmd).to eql([])
|
95
|
+
expect(RTesseract.new(@image_tif, options: 'digits').options_cmd).to eql(['digits'])
|
96
|
+
expect(RTesseract.new(@image_tif, options: :digits).options_cmd).to eql([:digits])
|
97
|
+
expect(RTesseract.new(@image_tif, options: [:digits, :quiet]).options_cmd).to eql([:digits, :quiet])
|
95
98
|
end
|
96
99
|
|
97
|
-
it
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
100
|
+
it ' be configurable' do
|
101
|
+
expect(RTesseract.new(@image_tif, chop_enable: 0, enable_assoc: 0, display_text: 0).config).to eql("chop_enable 0\nenable_assoc 0\ndisplay_text 0")
|
102
|
+
expect(RTesseract.new(@image_tif, chop_enable: 0).config).to eql('chop_enable 0')
|
103
|
+
expect(RTesseract.new(@image_tif, chop_enable: 0, enable_assoc: 0).config).to eql("chop_enable 0\nenable_assoc 0")
|
104
|
+
expect(RTesseract.new(@image_tif, chop_enable: 0).to_s_without_spaces).to eql('43XF')
|
102
105
|
end
|
103
106
|
|
107
|
+
it ' crop image' do
|
108
|
+
expect(RTesseract.new(@image_tif, psm: 7).crop!(140, 10, 36, 40).to_s_without_spaces).to eql('4')
|
109
|
+
expect(RTesseract.new(@image_tif, psm: 7).crop!(180, 10, 36, 40).to_s_without_spaces).to eql('3')
|
110
|
+
expect(RTesseract.new(@image_tif, psm: 7).crop!(216, 10, 20, 40).to_s_without_spaces).to eql('X')
|
111
|
+
expect(RTesseract.new(@image_tif, psm: 7).crop!(240, 10, 30, 40).to_s_without_spaces).to eql('F')
|
112
|
+
end
|
104
113
|
|
105
|
-
it
|
106
|
-
image = Magick::Image.read(@path.join(
|
107
|
-
blob = image.quantize(256,Magick::GRAYColorspace).to_blob
|
114
|
+
it ' read image from blob' do
|
115
|
+
image = Magick::Image.read(@path.join('images', 'test.png').to_s).first
|
116
|
+
blob = image.quantize(256, Magick::GRAYColorspace).to_blob
|
108
117
|
|
109
|
-
test = RTesseract.new(
|
118
|
+
test = RTesseract.new('', psm: 7)
|
110
119
|
test.from_blob(blob)
|
111
|
-
expect(test.to_s_without_spaces).to eql(
|
120
|
+
expect(test.to_s_without_spaces).to eql('HW9W')
|
112
121
|
|
113
|
-
test = RTesseract.new(
|
114
|
-
expect{test.from_blob('') }.to raise_error(RTesseract::ConversionError)
|
122
|
+
test = RTesseract.new('', psm: 7)
|
123
|
+
expect { test.from_blob('') }.to raise_error(RTesseract::ConversionError)
|
115
124
|
end
|
116
125
|
|
117
|
-
it
|
118
|
-
expect(RTesseract.new(Magick::Image.read(@image_tif.to_s).first).to_s_without_spaces).to eql(
|
126
|
+
it ' use a instance' do
|
127
|
+
expect(RTesseract.new(Magick::Image.read(@image_tif.to_s).first).to_s_without_spaces).to eql('43XF')
|
119
128
|
expect(RMagickProcessor.a_name?('teste')).to eql(false)
|
120
129
|
expect(RMagickProcessor.a_name?('rmagick')).to eql(true)
|
121
130
|
expect(RMagickProcessor.a_name?('RMagickProcessor')).to eql(true)
|
@@ -129,46 +138,50 @@ describe "Rtesseract" do
|
|
129
138
|
expect(NoneProcessor.a_name?('NoneProcessor')).to eql(true)
|
130
139
|
end
|
131
140
|
|
132
|
-
it
|
133
|
-
test = RTesseract.read(@path.join(
|
141
|
+
it ' change image in a block' do
|
142
|
+
test = RTesseract.read(@path.join('images', 'test.png').to_s) {}
|
134
143
|
expect(test.class).to eql(RTesseract)
|
135
144
|
|
136
145
|
test = RTesseract.new(@image_tif)
|
137
146
|
test.read do |image|
|
138
147
|
image = image.quantize(256, Magick::GRAYColorspace)
|
139
148
|
end
|
140
|
-
expect(test.to_s_without_spaces).to eql(
|
149
|
+
expect(test.to_s_without_spaces).to eql('43XF')
|
150
|
+
|
151
|
+
test = RTesseract.new(@path.join('images', 'blank.tif').to_s)
|
152
|
+
test.read do |image|
|
153
|
+
image
|
154
|
+
end
|
155
|
+
expect(test.to_s_without_spaces).to eql('')
|
141
156
|
|
142
|
-
test = RTesseract.read(@path.join(
|
143
|
-
#image = image.white_threshold(245)
|
144
|
-
#image = image.quantize(256,Magick::GRAYColorspace)
|
157
|
+
test = RTesseract.read(@path.join('images', 'test.png').to_s) do |image|
|
145
158
|
image.rotate(90)
|
146
159
|
end
|
147
|
-
expect(test.to_s_without_spaces).to eql(
|
160
|
+
expect(test.to_s_without_spaces).to eql('HW9W')
|
148
161
|
|
149
|
-
test = RTesseract.read(@path.join(
|
162
|
+
test = RTesseract.read(@path.join('images', 'test.jpg').to_s, lang: 'en') do |image|
|
150
163
|
image = image.white_threshold(245).quantize(256, Magick::GRAYColorspace)
|
151
164
|
end
|
152
|
-
expect(test.to_s_without_spaces).to eql(
|
165
|
+
expect(test.to_s_without_spaces).to eql('3R8F')
|
153
166
|
|
154
|
-
test = RTesseract.read(@path.join(
|
155
|
-
image.gravity
|
167
|
+
test = RTesseract.read(@path.join('images', 'test.jpg').to_s, lang: 'en', processor: 'mini_magick') do |image|
|
168
|
+
image.gravity 'south'
|
156
169
|
end
|
157
|
-
expect(test.to_s_without_spaces).to eql(
|
170
|
+
expect(test.to_s_without_spaces).to eql('3R8F')
|
158
171
|
end
|
159
172
|
|
160
|
-
it
|
161
|
-
expect{ RTesseract.new(@path.join(
|
162
|
-
expect{ RTesseract.new(@path.join(
|
173
|
+
it ' get a error' do
|
174
|
+
expect { RTesseract.new(@path.join('images', 'test.jpg').to_s, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError)
|
175
|
+
expect { RTesseract.new(@path.join('images', 'test_not_exists.png').to_s).to_s }.to raise_error(RTesseract::ImageNotSelectedError)
|
163
176
|
|
164
|
-
#
|
165
|
-
expect(RTesseract.new(@image_tif,
|
177
|
+
# Invalid psm object
|
178
|
+
expect(RTesseract.new(@image_tif, psm: MakeStringError.new).psm).to eql('')
|
166
179
|
end
|
167
180
|
|
168
|
-
it
|
181
|
+
it 'remove a file' do
|
169
182
|
rtesseract = RTesseract.new('.')
|
170
183
|
rtesseract.remove_file(Tempfile.new('config'))
|
171
184
|
|
172
|
-
expect{ rtesseract.remove_file(Pathname.new(Dir.tmpdir).join(
|
185
|
+
expect { rtesseract.remove_file(Pathname.new(Dir.tmpdir).join('test_not_exists')) }.to raise_error(RTesseract::TempFilesNotRemovedError)
|
173
186
|
end
|
174
187
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -3,17 +3,16 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
|
3
3
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
4
4
|
require 'rspec'
|
5
5
|
require 'coveralls'
|
6
|
-
require
|
6
|
+
require 'simplecov'
|
7
7
|
SimpleCov.start do
|
8
|
-
add_filter
|
8
|
+
add_filter '/spec/'
|
9
9
|
end
|
10
10
|
Coveralls.wear!
|
11
11
|
|
12
12
|
require 'rtesseract'
|
13
13
|
# Requires supporting files with custom matchers and macros, etc,
|
14
14
|
# in ./support/ and its subdirectories.
|
15
|
-
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
16
|
-
|
17
|
-
RSpec.configure do |config|
|
15
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each { |f| require f }
|
18
16
|
|
17
|
+
RSpec.configure do # |config|
|
19
18
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rtesseract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danilo Jeremias da Silva
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-10-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -118,7 +118,6 @@ extra_rdoc_files:
|
|
118
118
|
files:
|
119
119
|
- ".document"
|
120
120
|
- ".rspec"
|
121
|
-
- ".travis.sh"
|
122
121
|
- ".travis.yml"
|
123
122
|
- Gemfile
|
124
123
|
- Gemfile.lock
|
@@ -135,7 +134,10 @@ files:
|
|
135
134
|
- lib/rtesseract/box_char.rb
|
136
135
|
- lib/rtesseract/errors.rb
|
137
136
|
- lib/rtesseract/mixed.rb
|
137
|
+
- lib/utils.rb
|
138
138
|
- rtesseract.gemspec
|
139
|
+
- spec/images/README.pdf
|
140
|
+
- spec/images/blank.tif
|
139
141
|
- spec/images/mixed.tif
|
140
142
|
- spec/images/orientation_reverse.png
|
141
143
|
- spec/images/test with spaces.tif
|
@@ -170,7 +172,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
170
172
|
version: '0'
|
171
173
|
requirements: []
|
172
174
|
rubyforge_project:
|
173
|
-
rubygems_version: 2.
|
175
|
+
rubygems_version: 2.4.3
|
174
176
|
signing_key:
|
175
177
|
specification_version: 4
|
176
178
|
summary: Ruby library for working with the Tesseract OCR.
|
data/.travis.sh
DELETED