rtesseract 1.3.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +7 -4
- data/Gemfile +9 -10
- data/README.rdoc +27 -8
- data/Rakefile +12 -12
- data/VERSION +1 -1
- data/lib/processors/none.rb +2 -2
- data/lib/processors/rmagick.rb +9 -6
- data/lib/rtesseract.rb +28 -35
- data/lib/rtesseract/box.rb +22 -5
- data/lib/rtesseract/box_char.rb +2 -2
- data/lib/rtesseract/mixed.rb +2 -2
- data/lib/utils.rb +5 -0
- data/rtesseract.gemspec +7 -5
- data/spec/images/README.pdf +0 -0
- data/spec/images/blank.tif +0 -0
- data/spec/rtesseract_box_char_spec.rb +68 -68
- data/spec/rtesseract_box_spec.rb +24 -23
- data/spec/rtesseract_mixed_spec.rb +28 -25
- data/spec/rtesseract_spec.rb +106 -93
- data/spec/spec_helper.rb +4 -5
- metadata +6 -4
- data/.travis.sh +0 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3c5a4dc5260f5eee845925b59f89fa712466f2a6
|
4
|
+
data.tar.gz: 21b1ad6d79f1d1f82483c46c199eb4d6fd06120e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4a78a082daf437c8e7e20313973e93a7e04f2dec161c46d2bf7d37cce86718001d45b12995c12d63930f26ef93ab3c492e0413a6be64d30129ede2fa8f689276
|
7
|
+
data.tar.gz: b622c914be59d0f3fdfae141f5bdfdaadfb53fa22d03b4497cf20a1b12a689fef340e2446b67112565ca1d5510782dc3f3925614ee978e7ba2b9c2dc2d977e03
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
@@ -1,20 +1,19 @@
|
|
1
|
-
source
|
1
|
+
source 'http://rubygems.org'
|
2
2
|
# Add dependencies to develop your gem here.
|
3
3
|
# Include everything needed to run rake, tests, features, etc.
|
4
4
|
gem 'nokogiri'
|
5
5
|
|
6
6
|
group :development do
|
7
|
-
gem
|
8
|
-
gem
|
9
|
-
gem
|
10
|
-
gem
|
11
|
-
gem
|
7
|
+
gem 'rspec'
|
8
|
+
gem 'rdoc'
|
9
|
+
gem 'bundler'
|
10
|
+
gem 'jeweler', '~> 2.0.1'
|
11
|
+
gem 'simplecov'
|
12
12
|
gem 'coveralls', require: false
|
13
13
|
end
|
14
14
|
|
15
15
|
group :test do
|
16
|
-
gem
|
17
|
-
gem
|
18
|
-
gem
|
16
|
+
gem 'rmagick'
|
17
|
+
gem 'mini_magick'
|
18
|
+
gem 'quick_magick'
|
19
19
|
end
|
20
|
-
|
data/README.rdoc
CHANGED
@@ -60,8 +60,8 @@ It's very simple to use rtesseract:
|
|
60
60
|
mix_block.to_s
|
61
61
|
|
62
62
|
=== OPTIONS
|
63
|
-
|
64
|
-
Processors Options (_Rmagick_ is default)
|
63
|
+
|
64
|
+
Processors Options (_Rmagick_ is default)
|
65
65
|
|
66
66
|
RTesseract.new("test.jpg", :processor => "mini_magick")
|
67
67
|
RTesseract.new("test.jpg", :processor => "quick_magick")
|
@@ -70,7 +70,7 @@ Processors Options (_Rmagick_ is default)
|
|
70
70
|
|
71
71
|
RTesseract.new("test.jpg", :processor => "none")
|
72
72
|
|
73
|
-
Language Options
|
73
|
+
Language Options
|
74
74
|
|
75
75
|
RTesseract.new("test.jpg", :lang => "deu")
|
76
76
|
* eng - English
|
@@ -83,18 +83,37 @@ Language Options
|
|
83
83
|
* spa - Spanish
|
84
84
|
* vie - Vietnamese
|
85
85
|
Note: Make sure you have installed the language to tesseract
|
86
|
-
|
86
|
+
|
87
87
|
Other Options
|
88
88
|
|
89
89
|
RTesseract.new("test.jpg", options: :digits) # Only digit recognition
|
90
|
-
|
90
|
+
|
91
91
|
OR
|
92
|
-
|
92
|
+
|
93
93
|
RTesseract.new("test.jpg", options: [:digits, :quiet])
|
94
|
-
|
94
|
+
|
95
|
+
=== BOUNDING BOX: TO GET WORDS WITH THEIR POSITIONS
|
96
|
+
|
97
|
+
RTesseract::Box.new('test_words.png').words
|
98
|
+
# => [
|
99
|
+
# {:word => 'If', :x_start=>52, :y_start=>13, :x_end=>63, :y_end=>27},
|
100
|
+
# {:word => 'you', :x_start=>69, :y_start=>17, :x_end=>100, :y_end=>31},
|
101
|
+
# {:word => 'are', :x_start=>108, :y_start=>17, :x_end=>136, :y_end=>27},
|
102
|
+
# {:word => 'a', :x_start=>143, :y_start=>17, :x_end=>151, :y_end=>27},
|
103
|
+
# {:word => 'friend,', :x_start=>158, :y_start=>13, :x_end=>214, :y_end=>29},
|
104
|
+
# {:word => 'you', :x_start=>51, :y_start=>39, :x_end=>82, :y_end=>53},
|
105
|
+
# {:word => 'speak', :x_start=>90, :y_start=>35, :x_end=>140, :y_end=>53},
|
106
|
+
# {:word => 'the', :x_start=>146, :y_start=>35, :x_end=>174, :y_end=>49},
|
107
|
+
# {:word => 'password,', :x_start=>182, :y_start=>35, :x_end=>267, :y_end=>53},
|
108
|
+
# {:word => 'and', :x_start=>51, :y_start=>57, :x_end=>81, :y_end=>71},
|
109
|
+
# {:word => 'the', :x_start=>89, :y_start=>57, :x_end=>117, :y_end=>71},
|
110
|
+
# {:word => 'doors', :x_start=>124, :y_start=>57, :x_end=>172, :y_end=>71},
|
111
|
+
# {:word => 'will', :x_start=>180, :y_start=>57, :x_end=>208, :y_end=>71},
|
112
|
+
# {:word => 'open.', :x_start=>216, :y_start=>61, :x_end=>263, :y_end=>75}
|
113
|
+
# ]
|
95
114
|
|
96
115
|
== Contributing to rtesseract
|
97
|
-
|
116
|
+
|
98
117
|
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
|
99
118
|
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
|
100
119
|
* Fork the project.
|
data/Rakefile
CHANGED
@@ -6,7 +6,7 @@ begin
|
|
6
6
|
Bundler.setup(:default, :development)
|
7
7
|
rescue Bundler::BundlerError => e
|
8
8
|
$stderr.puts e.message
|
9
|
-
$stderr.puts
|
9
|
+
$stderr.puts 'Run `bundle install` to install missing gems'
|
10
10
|
exit e.status_code
|
11
11
|
end
|
12
12
|
require 'rake'
|
@@ -14,13 +14,13 @@ require 'rake'
|
|
14
14
|
require 'jeweler'
|
15
15
|
Jeweler::Tasks.new do |gem|
|
16
16
|
# gem is a Gem::Specification... see http://guides.rubygems.org/specification-reference/ for more options
|
17
|
-
gem.name =
|
18
|
-
gem.homepage =
|
19
|
-
gem.license =
|
20
|
-
gem.summary =
|
21
|
-
gem.description =
|
22
|
-
gem.email =
|
23
|
-
gem.authors = [
|
17
|
+
gem.name = 'rtesseract'
|
18
|
+
gem.homepage = 'http://github.com/dannnylo/rtesseract'
|
19
|
+
gem.license = 'MIT'
|
20
|
+
gem.summary = 'Ruby library for working with the Tesseract OCR.'
|
21
|
+
gem.description = 'Ruby library for working with the Tesseract OCR.'
|
22
|
+
gem.email = 'dannnylo@gmail.com'
|
23
|
+
gem.authors = ['Danilo Jeremias da Silva']
|
24
24
|
# dependencies defined in Gemfile
|
25
25
|
end
|
26
26
|
Jeweler::RubygemsDotOrgTasks.new
|
@@ -31,17 +31,17 @@ RSpec::Core::RakeTask.new(:spec) do |spec|
|
|
31
31
|
spec.pattern = FileList['spec/**/*_spec.rb']
|
32
32
|
end
|
33
33
|
|
34
|
-
desc
|
34
|
+
desc 'Code coverage detail'
|
35
35
|
task :simplecov do
|
36
|
-
ENV['COVERAGE'] =
|
36
|
+
ENV['COVERAGE'] = 'true'
|
37
37
|
Rake::Task['spec'].execute
|
38
38
|
end
|
39
39
|
|
40
|
-
task :
|
40
|
+
task default: :spec
|
41
41
|
|
42
42
|
require 'rdoc/task'
|
43
43
|
Rake::RDocTask.new do |rdoc|
|
44
|
-
version = File.exist?('VERSION') ? File.read('VERSION') :
|
44
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ''
|
45
45
|
|
46
46
|
rdoc.rdoc_dir = 'rdoc'
|
47
47
|
rdoc.title = "rtesseract #{version}"
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.3.
|
1
|
+
1.3.1
|
data/lib/processors/none.rb
CHANGED
@@ -10,7 +10,7 @@ module NoneProcessor
|
|
10
10
|
|
11
11
|
def self.image_to_tif(source, _x = nil, _y = nil, _w = nil, _h = nil)
|
12
12
|
tmp_file = Tempfile.new(['', '.tif'])
|
13
|
-
tmp_file.write(
|
13
|
+
tmp_file.write(read_with_processor(source))
|
14
14
|
tmp_file
|
15
15
|
end
|
16
16
|
|
@@ -21,6 +21,6 @@ module NoneProcessor
|
|
21
21
|
File.read(path)
|
22
22
|
end
|
23
23
|
|
24
|
-
def self.image?(
|
24
|
+
def self.image?(*)
|
25
25
|
end
|
26
26
|
end
|
data/lib/processors/rmagick.rb
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
# Add to rtesseract a image manipulation with RMagick
|
3
3
|
module RMagickProcessor
|
4
4
|
def self.setup
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
5
|
+
require 'rmagick'
|
6
|
+
rescue LoadError
|
7
|
+
# :nocov:
|
8
|
+
require 'RMagick'
|
9
|
+
# :nocov:
|
10
10
|
end
|
11
11
|
|
12
12
|
def self.a_name?(name)
|
@@ -18,7 +18,10 @@ module RMagickProcessor
|
|
18
18
|
cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
|
19
19
|
cat.crop!(x, y, w, h) unless [x, y, w, h].compact == []
|
20
20
|
cat.alpha Magick::DeactivateAlphaChannel
|
21
|
-
cat.write(tmp_file.path.to_s) {
|
21
|
+
cat.write(tmp_file.path.to_s) {
|
22
|
+
# self.depth = 16
|
23
|
+
self.compression = Magick::NoCompression
|
24
|
+
}
|
22
25
|
tmp_file
|
23
26
|
end
|
24
27
|
|
data/lib/rtesseract.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
require 'pathname'
|
3
3
|
require 'tempfile'
|
4
|
+
require 'utils'
|
4
5
|
|
5
6
|
require 'rtesseract/errors'
|
6
7
|
require 'rtesseract/mixed'
|
@@ -17,10 +18,11 @@ require 'processors/none.rb'
|
|
17
18
|
class RTesseract
|
18
19
|
attr_accessor :image_object
|
19
20
|
attr_accessor :options
|
21
|
+
attr_accessor :options_cmd
|
20
22
|
attr_writer :lang
|
21
23
|
attr_writer :psm
|
22
24
|
attr_reader :processor
|
23
|
-
|
25
|
+
attr_reader :source
|
24
26
|
|
25
27
|
OPTIONS = %w(command lang psm processor debug clear_console_output options)
|
26
28
|
# Aliases to languages names
|
@@ -32,8 +34,8 @@ class RTesseract
|
|
32
34
|
}
|
33
35
|
|
34
36
|
def initialize(src = '', options = {})
|
35
|
-
|
36
|
-
@value, @x, @y, @w, @h = [
|
37
|
+
command_line_options(options)
|
38
|
+
@value, @x, @y, @w, @h = [nil]
|
37
39
|
@processor = RTesseract.choose_processor!(@processor)
|
38
40
|
@source = @processor.image?(src) ? src : Pathname.new(src)
|
39
41
|
initialize_hook
|
@@ -42,24 +44,17 @@ class RTesseract
|
|
42
44
|
def initialize_hook
|
43
45
|
end
|
44
46
|
|
45
|
-
def fetch_option(options, name, default)
|
46
|
-
options.fetch(name.to_s, options.fetch(name, default))
|
47
|
-
end
|
48
|
-
|
49
47
|
def command_line_options(options)
|
50
|
-
@
|
51
|
-
@
|
52
|
-
@
|
53
|
-
@
|
54
|
-
@
|
55
|
-
@
|
48
|
+
@options = options
|
49
|
+
@command = @options.option(:command, default_command)
|
50
|
+
@lang = @options.option(:lang, '')
|
51
|
+
@psm = @options.option(:psm, nil)
|
52
|
+
@processor = @options.option(:processor, 'rmagick')
|
53
|
+
@debug = @options.option(:debug, false)
|
54
|
+
@options_cmd = @options.option(:options, [])
|
56
55
|
@options_cmd = [@options_cmd] unless @options_cmd.is_a?(Array)
|
57
|
-
|
58
56
|
# Disable clear console if debug mode
|
59
|
-
@clear_console_output = @debug ? false :
|
60
|
-
|
61
|
-
options.delete_if { |k, v| OPTIONS.include?(k.to_s) }
|
62
|
-
options
|
57
|
+
@clear_console_output = @debug ? false : options.option(:clear_console_output, true)
|
63
58
|
end
|
64
59
|
|
65
60
|
def default_command
|
@@ -68,32 +63,30 @@ class RTesseract
|
|
68
63
|
'tesseract'
|
69
64
|
end
|
70
65
|
|
71
|
-
def self.read(src = nil, options = {}
|
66
|
+
def self.read(src = nil, options = {})
|
72
67
|
fail RTesseract::ImageNotSelectedError if src.nil?
|
73
|
-
processor = RTesseract.choose_processor!(options.
|
68
|
+
processor = RTesseract.choose_processor!(options.option(:processor, nil))
|
74
69
|
image = processor.read_with_processor(src.to_s)
|
75
|
-
|
76
70
|
yield(image)
|
77
|
-
object = RTesseract.new('', options)
|
78
|
-
object.from_blob(image.to_blob)
|
71
|
+
object = RTesseract.new('', options).from_blob(image.to_blob)
|
79
72
|
object
|
80
73
|
end
|
81
74
|
|
82
|
-
def read
|
75
|
+
def read
|
83
76
|
image = @processor.read_with_processor(@source.to_s)
|
84
77
|
new_image = yield(image)
|
85
|
-
|
78
|
+
from_blob(new_image.to_blob, File.extname(@source.to_s))
|
86
79
|
self
|
87
80
|
end
|
88
81
|
|
89
82
|
def source=(src)
|
90
|
-
@value =
|
83
|
+
@value = nil
|
91
84
|
@source = @processor.image?(src) ? src : Pathname.new(src)
|
92
85
|
end
|
93
86
|
|
94
87
|
# Crop image to convert
|
95
88
|
def crop!(x, y, width, height)
|
96
|
-
@value =
|
89
|
+
@value = nil
|
97
90
|
@x, @y, @w, @h = x.to_i, y.to_i, width.to_i, height.to_i
|
98
91
|
self
|
99
92
|
end
|
@@ -110,7 +103,7 @@ class RTesseract
|
|
110
103
|
end
|
111
104
|
true
|
112
105
|
rescue => error
|
113
|
-
raise RTesseract::TempFilesNotRemovedError.new(:
|
106
|
+
raise RTesseract::TempFilesNotRemovedError.new(error: error, files: files)
|
114
107
|
end
|
115
108
|
|
116
109
|
# Select the language
|
@@ -201,26 +194,27 @@ class RTesseract
|
|
201
194
|
convert_text
|
202
195
|
remove_file([@image, text_file_with_ext])
|
203
196
|
rescue => error
|
204
|
-
raise RTesseract::ConversionError.new(error)
|
197
|
+
raise RTesseract::ConversionError.new(error), error, caller
|
205
198
|
end
|
206
199
|
|
207
200
|
# Read image from memory blob
|
208
201
|
def from_blob(blob, ext = '')
|
209
|
-
blob_file = Tempfile.new(['blob', ext], :
|
210
|
-
blob_file.binmode
|
211
|
-
blob_file.write(blob)
|
202
|
+
blob_file = Tempfile.new(['blob', ext], encoding: 'ascii-8bit')
|
203
|
+
blob_file.binmode.write(blob)
|
212
204
|
blob_file.rewind
|
213
205
|
blob_file.flush
|
214
206
|
self.source = blob_file.path
|
215
207
|
convert
|
216
208
|
remove_file([blob_file])
|
209
|
+
self
|
217
210
|
rescue => error
|
218
|
-
raise RTesseract::ConversionError.new(error)
|
211
|
+
raise RTesseract::ConversionError.new(error), error, caller
|
219
212
|
end
|
220
213
|
|
221
214
|
# Output value
|
222
215
|
def to_s
|
223
|
-
return @value if @value !=
|
216
|
+
return @value if @value != nil
|
217
|
+
|
224
218
|
if @processor.image?(@source) || @source.file?
|
225
219
|
convert
|
226
220
|
@value
|
@@ -249,4 +243,3 @@ class RTesseract
|
|
249
243
|
processor
|
250
244
|
end
|
251
245
|
end
|
252
|
-
|
data/lib/rtesseract/box.rb
CHANGED
@@ -23,15 +23,13 @@ class RTesseract
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def parse_file
|
26
|
-
Nokogiri::HTML(File.read(text_file_with_ext))
|
26
|
+
html = Nokogiri::HTML(File.read(text_file_with_ext))
|
27
|
+
html.css('span.ocrx_word, span.ocr_word')
|
27
28
|
end
|
28
29
|
|
29
30
|
def convert_text
|
30
31
|
text_objects = []
|
31
|
-
parse_file.each
|
32
|
-
attributes = word.attributes['title'].value.to_s.gsub(';', '').split(' ')
|
33
|
-
text_objects << { :word => word.text, :x_start => attributes[1].to_i, :y_start => attributes[2].to_i , :x_end => attributes[3].to_i, :y_end => attributes[4].to_i }
|
34
|
-
end
|
32
|
+
parse_file.each { |word| text_objects << BoxParser.new(word).to_h }
|
35
33
|
@value = text_objects
|
36
34
|
end
|
37
35
|
|
@@ -49,5 +47,24 @@ class RTesseract
|
|
49
47
|
fail RTesseract::ImageNotSelectedError.new(@source)
|
50
48
|
end
|
51
49
|
end
|
50
|
+
|
51
|
+
# Parse word data from html.
|
52
|
+
class BoxParser
|
53
|
+
def initialize(word_html)
|
54
|
+
@word = word_html
|
55
|
+
title = @word.attributes['title'].value.to_s
|
56
|
+
@attributes = title.gsub(';', '').split(' ')
|
57
|
+
end
|
58
|
+
|
59
|
+
def to_h
|
60
|
+
{
|
61
|
+
word: @word.text,
|
62
|
+
x_start: @attributes[1].to_i,
|
63
|
+
y_start: @attributes[2].to_i,
|
64
|
+
x_end: @attributes[3].to_i,
|
65
|
+
y_end: @attributes[4].to_i
|
66
|
+
}
|
67
|
+
end
|
68
|
+
end
|
52
69
|
end
|
53
70
|
end
|
data/lib/rtesseract/box_char.rb
CHANGED
@@ -19,8 +19,8 @@ class RTesseract
|
|
19
19
|
def convert_text
|
20
20
|
text_objects = []
|
21
21
|
parse_file.each_line do |line|
|
22
|
-
char, x_start, y_start, x_end, y_end,
|
23
|
-
text_objects << { :
|
22
|
+
char, x_start, y_start, x_end, y_end, _word = line.split(' ')
|
23
|
+
text_objects << { char: char, x_start: x_start.to_i, y_start: y_start.to_i, x_end: x_end.to_i, y_end: y_end.to_i }
|
24
24
|
end
|
25
25
|
@value = text_objects
|
26
26
|
end
|
data/lib/rtesseract/mixed.rb
CHANGED
@@ -14,7 +14,7 @@ class RTesseract
|
|
14
14
|
|
15
15
|
def area(x, y, width, height)
|
16
16
|
@value = ''
|
17
|
-
@areas << { :
|
17
|
+
@areas << { x: x, y: y, width: width, height: height }
|
18
18
|
end
|
19
19
|
|
20
20
|
def clear_areas
|
@@ -29,7 +29,7 @@ class RTesseract
|
|
29
29
|
@value << image.to_s
|
30
30
|
end
|
31
31
|
rescue => error
|
32
|
-
raise RTesseract::ConversionError.new(error)
|
32
|
+
raise RTesseract::ConversionError.new(error), error, caller
|
33
33
|
end
|
34
34
|
|
35
35
|
# Output value
|
data/lib/utils.rb
ADDED
data/rtesseract.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: rtesseract 1.3.
|
5
|
+
# stub: rtesseract 1.3.1 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "rtesseract"
|
9
|
-
s.version = "1.3.
|
9
|
+
s.version = "1.3.1"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Danilo Jeremias da Silva"]
|
14
|
-
s.date = "2015-
|
14
|
+
s.date = "2015-10-07"
|
15
15
|
s.description = "Ruby library for working with the Tesseract OCR."
|
16
16
|
s.email = "dannnylo@gmail.com"
|
17
17
|
s.extra_rdoc_files = [
|
@@ -21,7 +21,6 @@ Gem::Specification.new do |s|
|
|
21
21
|
s.files = [
|
22
22
|
".document",
|
23
23
|
".rspec",
|
24
|
-
".travis.sh",
|
25
24
|
".travis.yml",
|
26
25
|
"Gemfile",
|
27
26
|
"Gemfile.lock",
|
@@ -38,7 +37,10 @@ Gem::Specification.new do |s|
|
|
38
37
|
"lib/rtesseract/box_char.rb",
|
39
38
|
"lib/rtesseract/errors.rb",
|
40
39
|
"lib/rtesseract/mixed.rb",
|
40
|
+
"lib/utils.rb",
|
41
41
|
"rtesseract.gemspec",
|
42
|
+
"spec/images/README.pdf",
|
43
|
+
"spec/images/blank.tif",
|
42
44
|
"spec/images/mixed.tif",
|
43
45
|
"spec/images/orientation_reverse.png",
|
44
46
|
"spec/images/test with spaces.tif",
|
@@ -56,7 +58,7 @@ Gem::Specification.new do |s|
|
|
56
58
|
]
|
57
59
|
s.homepage = "http://github.com/dannnylo/rtesseract"
|
58
60
|
s.licenses = ["MIT"]
|
59
|
-
s.rubygems_version = "2.
|
61
|
+
s.rubygems_version = "2.4.3"
|
60
62
|
s.summary = "Ruby library for working with the Tesseract OCR."
|
61
63
|
|
62
64
|
if s.respond_to? :specification_version then
|
Binary file
|
Binary file
|
@@ -1,82 +1,82 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
3
|
|
4
|
-
describe
|
4
|
+
describe 'Rtesseract::BoxChar' do
|
5
5
|
before do
|
6
|
-
@path = Pathname.new(__FILE__.gsub('rtesseract_box_char_spec.rb','')).expand_path
|
6
|
+
@path = Pathname.new(__FILE__.gsub('rtesseract_box_char_spec.rb', '')).expand_path
|
7
7
|
@image_tiff = @path.join('images', 'test.tif').to_s
|
8
8
|
@words_image = @path.join('images', 'test_words.png').to_s
|
9
9
|
end
|
10
10
|
|
11
|
-
|
12
|
-
it "bounding box by char" do
|
11
|
+
it 'bounding box by char' do
|
13
12
|
expect(RTesseract::BoxChar.new(@image_tiff).characters.is_a?(Array)).to eql(true)
|
14
13
|
expect(RTesseract::BoxChar.new(@image_tiff).characters).to eql([
|
15
|
-
{:
|
16
|
-
{:
|
17
|
-
{:
|
18
|
-
{:
|
19
|
-
|
14
|
+
{ char: '4', x_start: 145, y_start: 14, x_end: 159, y_end: 33 },
|
15
|
+
{ char: '3', x_start: 184, y_start: 14, x_end: 196, y_end: 33 },
|
16
|
+
{ char: 'X', x_start: 222, y_start: 14, x_end: 238, y_end: 32 },
|
17
|
+
{ char: 'F', x_start: 260, y_start: 14, x_end: 273, y_end: 32 }])
|
18
|
+
|
20
19
|
expect(RTesseract::BoxChar.new(@words_image).characters).to eql([
|
21
|
-
{:
|
22
|
-
{:
|
23
|
-
{:
|
24
|
-
{:
|
25
|
-
{:
|
26
|
-
{:
|
27
|
-
{:
|
28
|
-
{:
|
29
|
-
{:
|
30
|
-
{:
|
31
|
-
{:
|
32
|
-
{:
|
33
|
-
{:
|
34
|
-
{:
|
35
|
-
{:
|
36
|
-
{:
|
37
|
-
{:
|
38
|
-
{:
|
39
|
-
{:
|
40
|
-
{:
|
41
|
-
{:
|
42
|
-
{:
|
43
|
-
{:
|
44
|
-
{:
|
45
|
-
{:
|
46
|
-
{:
|
47
|
-
{:
|
48
|
-
{:
|
49
|
-
{:
|
50
|
-
{:
|
51
|
-
{:
|
52
|
-
{:
|
53
|
-
{:
|
54
|
-
{:
|
55
|
-
{:
|
56
|
-
{:
|
57
|
-
{:
|
58
|
-
{:
|
59
|
-
{:
|
60
|
-
{:
|
61
|
-
{:
|
62
|
-
{:
|
63
|
-
{:
|
64
|
-
{:
|
65
|
-
{:
|
66
|
-
{:
|
67
|
-
{:
|
68
|
-
{:
|
69
|
-
{:
|
70
|
-
{:
|
71
|
-
{:
|
72
|
-
{:
|
73
|
-
{:
|
74
|
-
{:
|
75
|
-
{:
|
76
|
-
{:
|
77
|
-
|
20
|
+
{ char: 'I', x_start: 52, y_start: 91, x_end: 54, y_end: 104 },
|
21
|
+
{ char: 'f', x_start: 56, y_start: 91, x_end: 63, y_end: 105 },
|
22
|
+
{ char: 'y', x_start: 69, y_start: 87, x_end: 79, y_end: 101 },
|
23
|
+
{ char: 'o', x_start: 80, y_start: 91, x_end: 90, y_end: 101 },
|
24
|
+
{ char: 'u', x_start: 92, y_start: 91, x_end: 100, y_end: 101 },
|
25
|
+
{ char: 'a', x_start: 108, y_start: 91, x_end: 116, y_end: 101 },
|
26
|
+
{ char: 'r', x_start: 119, y_start: 91, x_end: 125, y_end: 101 },
|
27
|
+
{ char: 'e', x_start: 126, y_start: 91, x_end: 136, y_end: 101 },
|
28
|
+
{ char: 'a', x_start: 143, y_start: 91, x_end: 151, y_end: 101 },
|
29
|
+
{ char: 'f', x_start: 158, y_start: 91, x_end: 165, y_end: 105 },
|
30
|
+
{ char: 'r', x_start: 166, y_start: 91, x_end: 172, y_end: 101 },
|
31
|
+
{ char: 'i', x_start: 174, y_start: 91, x_end: 176, y_end: 105 },
|
32
|
+
{ char: 'e', x_start: 178, y_start: 91, x_end: 188, y_end: 101 },
|
33
|
+
{ char: 'n', x_start: 190, y_start: 91, x_end: 198, y_end: 101 },
|
34
|
+
{ char: 'd', x_start: 200, y_start: 91, x_end: 209, y_end: 105 },
|
35
|
+
{ char: ',', x_start: 211, y_start: 89, x_end: 214, y_end: 93 },
|
36
|
+
{ char: 'y', x_start: 51, y_start: 65, x_end: 61, y_end: 79 },
|
37
|
+
{ char: 'o', x_start: 62, y_start: 69, x_end: 72, y_end: 79 },
|
38
|
+
{ char: 'u', x_start: 74, y_start: 69, x_end: 82, y_end: 79 },
|
39
|
+
{ char: 's', x_start: 90, y_start: 69, x_end: 97, y_end: 79 },
|
40
|
+
{ char: 'p', x_start: 99, y_start: 65, x_end: 108, y_end: 79 },
|
41
|
+
{ char: 'e', x_start: 109, y_start: 69, x_end: 119, y_end: 79 },
|
42
|
+
{ char: 'a', x_start: 120, y_start: 69, x_end: 128, y_end: 79 },
|
43
|
+
{ char: 'k', x_start: 131, y_start: 69, x_end: 140, y_end: 83 },
|
44
|
+
{ char: 't', x_start: 146, y_start: 69, x_end: 152, y_end: 82 },
|
45
|
+
{ char: 'h', x_start: 154, y_start: 69, x_end: 162, y_end: 83 },
|
46
|
+
{ char: 'e', x_start: 164, y_start: 69, x_end: 174, y_end: 79 },
|
47
|
+
{ char: 'p', x_start: 182, y_start: 65, x_end: 191, y_end: 79 },
|
48
|
+
{ char: 'a', x_start: 192, y_start: 69, x_end: 200, y_end: 79 },
|
49
|
+
{ char: 's', x_start: 202, y_start: 69, x_end: 209, y_end: 79 },
|
50
|
+
{ char: 's', x_start: 210, y_start: 69, x_end: 217, y_end: 79 },
|
51
|
+
{ char: 'w', x_start: 219, y_start: 69, x_end: 232, y_end: 79 },
|
52
|
+
{ char: 'o', x_start: 234, y_start: 69, x_end: 244, y_end: 79 },
|
53
|
+
{ char: 'r', x_start: 246, y_start: 69, x_end: 252, y_end: 79 },
|
54
|
+
{ char: 'd', x_start: 253, y_start: 69, x_end: 262, y_end: 83 },
|
55
|
+
{ char: ',', x_start: 264, y_start: 67, x_end: 267, y_end: 71 },
|
56
|
+
{ char: 'a', x_start: 51, y_start: 47, x_end: 59, y_end: 57 },
|
57
|
+
{ char: 'n', x_start: 62, y_start: 47, x_end: 70, y_end: 57 },
|
58
|
+
{ char: 'd', x_start: 72, y_start: 47, x_end: 81, y_end: 61 },
|
59
|
+
{ char: 't', x_start: 89, y_start: 47, x_end: 95, y_end: 60 },
|
60
|
+
{ char: 'h', x_start: 97, y_start: 47, x_end: 105, y_end: 61 },
|
61
|
+
{ char: 'e', x_start: 107, y_start: 47, x_end: 117, y_end: 57 },
|
62
|
+
{ char: 'd', x_start: 124, y_start: 47, x_end: 133, y_end: 61 },
|
63
|
+
{ char: 'o', x_start: 135, y_start: 47, x_end: 145, y_end: 57 },
|
64
|
+
{ char: 'o', x_start: 146, y_start: 47, x_end: 156, y_end: 57 },
|
65
|
+
{ char: 'r', x_start: 158, y_start: 47, x_end: 164, y_end: 57 },
|
66
|
+
{ char: 's', x_start: 165, y_start: 47, x_end: 172, y_end: 57 },
|
67
|
+
{ char: 'w', x_start: 180, y_start: 47, x_end: 193, y_end: 57 },
|
68
|
+
{ char: 'i', x_start: 196, y_start: 47, x_end: 198, y_end: 61 },
|
69
|
+
{ char: 'l', x_start: 201, y_start: 47, x_end: 203, y_end: 61 },
|
70
|
+
{ char: 'l', x_start: 206, y_start: 47, x_end: 208, y_end: 61 },
|
71
|
+
{ char: 'o', x_start: 216, y_start: 47, x_end: 226, y_end: 57 },
|
72
|
+
{ char: 'p', x_start: 228, y_start: 43, x_end: 237, y_end: 57 },
|
73
|
+
{ char: 'e', x_start: 238, y_start: 47, x_end: 248, y_end: 57 },
|
74
|
+
{ char: 'n', x_start: 250, y_start: 47, x_end: 258, y_end: 57 },
|
75
|
+
{ char: '.', x_start: 261, y_start: 47, x_end: 263, y_end: 49 }])
|
76
|
+
|
77
|
+
expect { RTesseract::BoxChar.new(@image_tiff, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError)
|
78
|
+
expect { RTesseract::BoxChar.new(@image_tiff + '_not_exist').to_s }.to raise_error(RTesseract::ImageNotSelectedError)
|
78
79
|
|
79
|
-
expect
|
80
|
-
expect{RTesseract::BoxChar.new(@image_tiff + "_not_exist").to_s }.to raise_error(RTesseract::ImageNotSelectedError)
|
80
|
+
#expect(RTesseract::BoxChar.new(@path.join('images', 'blank.tif').to_s, options: :digits).characters).to eql([])
|
81
81
|
end
|
82
82
|
end
|
data/spec/rtesseract_box_spec.rb
CHANGED
@@ -1,36 +1,37 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
3
|
|
4
|
-
describe
|
4
|
+
describe 'Rtesseract::Box' do
|
5
5
|
before do
|
6
|
-
@path = Pathname.new(__FILE__.gsub(
|
7
|
-
@image_tiff = @path.join(
|
8
|
-
@words_image = @path.join(
|
6
|
+
@path = Pathname.new(__FILE__.gsub('rtesseract_box_spec.rb', '')).expand_path
|
7
|
+
@image_tiff = @path.join('images', 'test.tif').to_s
|
8
|
+
@words_image = @path.join('images', 'test_words.png').to_s
|
9
9
|
end
|
10
10
|
|
11
|
-
|
12
|
-
it "bounding box" do
|
11
|
+
it 'bounding box' do
|
13
12
|
expect(RTesseract.new(@words_image).to_s).to eql("If you are a friend,\nyou speak the password,\nand the doors will open.\n\n")
|
14
13
|
expect(RTesseract::Box.new(@words_image).words).to eql([
|
15
|
-
{:
|
16
|
-
{:
|
17
|
-
{:
|
18
|
-
{:
|
19
|
-
{:
|
20
|
-
{:
|
21
|
-
{:
|
22
|
-
{:
|
23
|
-
{:
|
24
|
-
{:
|
25
|
-
{:
|
26
|
-
{:
|
27
|
-
{:
|
28
|
-
{:
|
14
|
+
{ word: 'If', x_start: 52, y_start: 13, x_end: 63, y_end: 27 },
|
15
|
+
{ word: 'you', x_start: 69, y_start: 17, x_end: 100, y_end: 31 },
|
16
|
+
{ word: 'are', x_start: 108, y_start: 17, x_end: 136, y_end: 27 },
|
17
|
+
{ word: 'a', x_start: 143, y_start: 17, x_end: 151, y_end: 27 },
|
18
|
+
{ word: 'friend,', x_start: 158, y_start: 13, x_end: 214, y_end: 29 },
|
19
|
+
{ word: 'you', x_start: 51, y_start: 39, x_end: 82, y_end: 53 },
|
20
|
+
{ word: 'speak', x_start: 90, y_start: 35, x_end: 140, y_end: 53 },
|
21
|
+
{ word: 'the', x_start: 146, y_start: 35, x_end: 174, y_end: 49 },
|
22
|
+
{ word: 'password,', x_start: 182, y_start: 35, x_end: 267, y_end: 53 },
|
23
|
+
{ word: 'and', x_start: 51, y_start: 57, x_end: 81, y_end: 71 },
|
24
|
+
{ word: 'the', x_start: 89, y_start: 57, x_end: 117, y_end: 71 },
|
25
|
+
{ word: 'doors', x_start: 124, y_start: 57, x_end: 172, y_end: 71 },
|
26
|
+
{ word: 'will', x_start: 180, y_start: 57, x_end: 208, y_end: 71 },
|
27
|
+
{ word: 'open.', x_start: 216, y_start: 61, x_end: 263, y_end: 75 }
|
29
28
|
])
|
30
29
|
|
31
30
|
expect(RTesseract::Box.new(@image_tiff).words.is_a?(Array)).to eql(true)
|
32
|
-
expect(RTesseract::Box.new(@words_image).to_s).to eql(
|
33
|
-
expect{RTesseract::Box.new(@image_tiff,
|
34
|
-
expect{RTesseract::Box.new(@image_tiff +
|
31
|
+
expect(RTesseract::Box.new(@words_image).to_s).to eql('If you are a friend, you speak the password, and the doors will open.')
|
32
|
+
expect { RTesseract::Box.new(@image_tiff, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError)
|
33
|
+
expect { RTesseract::Box.new(@image_tiff + '_not_exist').to_s }.to raise_error(RTesseract::ImageNotSelectedError)
|
34
|
+
|
35
|
+
#expect(RTesseract::Box.new(@path.join('images', 'blank.tif').to_s, options: :digits).words).to eql([])
|
35
36
|
end
|
36
37
|
end
|
@@ -1,46 +1,49 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
3
|
|
4
|
-
describe
|
4
|
+
describe 'Rtesseract::Mixed' do
|
5
5
|
before do
|
6
|
-
@path = Pathname.new(__FILE__.gsub(
|
7
|
-
@image_tif = @path.join(
|
8
|
-
@image2_tif = @path.join(
|
6
|
+
@path = Pathname.new(__FILE__.gsub('rtesseract_mixed_spec.rb', '')).expand_path
|
7
|
+
@image_tif = @path.join('images', 'mixed.tif').to_s
|
8
|
+
@image2_tif = @path.join('images', 'mixed2.tif').to_s
|
9
9
|
end
|
10
10
|
|
11
|
-
it
|
11
|
+
it 'should be instantiable' do
|
12
12
|
expect(RTesseract::Mixed.new.class).to eql(RTesseract::Mixed)
|
13
13
|
expect(RTesseract::Mixed.new(@image_tif).class).to eql(RTesseract::Mixed)
|
14
14
|
end
|
15
15
|
|
16
|
-
it
|
17
|
-
mix_block = RTesseract::Mixed.new(@image_tif,
|
18
|
-
image.area(28, 19, 25, 25) #position of 4
|
16
|
+
it 'should translate parts of the image to text' do
|
17
|
+
mix_block = RTesseract::Mixed.new(@image_tif, psm: 7) do |image|
|
18
|
+
image.area(28, 19, 25, 25) # position of 4
|
19
19
|
image.area(180, 22, 20, 28) # position of 3
|
20
|
-
image.area(218, 22, 24, 28) # position of
|
21
|
-
image.area(248, 24, 22, 22) # position of
|
20
|
+
image.area(218, 22, 24, 28) # position of F
|
21
|
+
image.area(248, 24, 22, 22) # position of F
|
22
22
|
end
|
23
|
-
expect(mix_block.to_s_without_spaces).to eql(
|
23
|
+
expect(mix_block.to_s_without_spaces).to eql('43FF')
|
24
24
|
mix_block.clear_areas
|
25
25
|
expect(mix_block.areas).to eql([])
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
27
|
+
@areas = []
|
28
|
+
@areas << { x: 28, y: 19, width: 25, height: 25 } # position of 4
|
29
|
+
@areas << { x: 180, y: 22, width: 20, height: 28 } # position of 3
|
30
|
+
@areas << { x: 218, y: 22, width: 24, height: 28 } # position of f
|
31
|
+
@areas << { x: 248, y: 24, width: 22, height: 22 } # position of f
|
32
|
+
|
33
|
+
mix_block = RTesseract::Mixed.new(@image_tif, areas: @areas, psm: 7)
|
34
|
+
expect(mix_block.to_s_without_spaces).to eql('43FF')
|
35
|
+
|
36
|
+
mix_block = RTesseract::Mixed.new(@path.join('images', 'blank.tif').to_s, areas: @areas, psm: 7)
|
37
|
+
expect(mix_block.to_s_without_spaces).to eql('')
|
34
38
|
end
|
35
39
|
|
36
|
-
it
|
37
|
-
|
38
|
-
],:psm=>7})
|
39
|
-
expect{ mix_block.to_s_without_spaces }.to raise_error(RTesseract::ImageNotSelectedError)
|
40
|
+
it ' get a error' do
|
41
|
+
@areas = [{ x: 28, y: 19, width: 25, height: 25 }]
|
40
42
|
|
43
|
+
mix_block = RTesseract::Mixed.new(@path.join('images', 'test_not_exists.png').to_s, areas: @areas, psm: 7)
|
44
|
+
expect { mix_block.to_s_without_spaces }.to raise_error(RTesseract::ImageNotSelectedError)
|
41
45
|
|
42
|
-
mix_block = RTesseract::Mixed.new(@image_tif,
|
43
|
-
|
44
|
-
expect{ mix_block.to_s }.to raise_error(RTesseract::ConversionError)
|
46
|
+
mix_block = RTesseract::Mixed.new(@image_tif, areas: @areas, psm: 7, command: 'tesseract_error')
|
47
|
+
expect { mix_block.to_s }.to raise_error(RTesseract::ConversionError)
|
45
48
|
end
|
46
49
|
end
|
data/spec/rtesseract_spec.rb
CHANGED
@@ -1,121 +1,130 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
2
|
# encoding: UTF-8
|
3
3
|
require 'pathname'
|
4
|
+
|
5
|
+
# Class to rise error
|
4
6
|
class MakeStringError
|
5
7
|
def to_s
|
6
|
-
|
8
|
+
fail 'error'
|
7
9
|
end
|
8
10
|
end
|
9
11
|
|
10
|
-
describe
|
12
|
+
describe 'Rtesseract' do
|
11
13
|
before do
|
12
|
-
@path = Pathname.new(__FILE__.gsub(
|
13
|
-
@image_tif = @path.join(
|
14
|
+
@path = Pathname.new(__FILE__.gsub('rtesseract_spec.rb', '')).expand_path
|
15
|
+
@image_tif = @path.join('images', 'test.tif').to_s
|
14
16
|
end
|
15
17
|
|
16
|
-
it
|
18
|
+
it ' be instantiable' do
|
17
19
|
expect(RTesseract.new.class).to eql(RTesseract)
|
18
|
-
expect(RTesseract.new(
|
20
|
+
expect(RTesseract.new('').class).to eql(RTesseract)
|
19
21
|
expect(RTesseract.new(@image_tif).class).to eql(RTesseract)
|
20
22
|
end
|
21
23
|
|
22
|
-
it
|
23
|
-
expect(RTesseract.new(@image_tif).to_s_without_spaces).to eql(
|
24
|
-
expect(RTesseract.new(@image_tif,
|
25
|
-
expect(RTesseract.new(@path.join(
|
26
|
-
expect(RTesseract.new(@path.join(
|
24
|
+
it ' translate image to text' do
|
25
|
+
expect(RTesseract.new(@image_tif).to_s_without_spaces).to eql('43XF')
|
26
|
+
expect(RTesseract.new(@image_tif, processor: 'mini_magick').to_s_without_spaces).to eql('43XF')
|
27
|
+
expect(RTesseract.new(@path.join('images', 'test1.tif').to_s).to_s_without_spaces).to eql('V2V4')
|
28
|
+
expect(RTesseract.new(@path.join('images', 'test with spaces.tif').to_s).to_s_without_spaces).to eql('V2V4')
|
29
|
+
end
|
27
30
|
|
31
|
+
it ' translate images .png, .jpg, .bmp' do
|
32
|
+
expect(RTesseract.new(@path.join('images', 'test.png').to_s).to_s_without_spaces).to eql('HW9W')
|
33
|
+
expect(RTesseract.new(@path.join('images', 'test.jpg').to_s).to_s_without_spaces).to eql('3R8F')
|
34
|
+
expect(RTesseract.new(@path.join('images', 'test.bmp').to_s).to_s_without_spaces).to eql('FLA6')
|
28
35
|
end
|
29
36
|
|
30
|
-
it
|
31
|
-
expect(RTesseract.new(@path.join(
|
32
|
-
expect(RTesseract.new(@path.join("images","test.jpg").to_s).to_s_without_spaces).to eql("3R8F")
|
33
|
-
expect(RTesseract.new(@path.join("images","test.bmp").to_s).to_s_without_spaces).to eql("FLA6")
|
37
|
+
it ' should not error with depth > 32' do
|
38
|
+
#expect(RTesseract.new(@path.join('images', 'README.pdf').to_s, debug: true).to_s_without_spaces).to eql('')
|
34
39
|
end
|
35
40
|
|
36
|
-
it
|
37
|
-
#Rmagick
|
38
|
-
expect(RTesseract.new(@image_tif).to_s_without_spaces).to eql(
|
39
|
-
expect(RTesseract.new(@image_tif, :
|
40
|
-
expect(RTesseract.new(@path.join(
|
41
|
+
it ' support diferent processors' do
|
42
|
+
# Rmagick
|
43
|
+
expect(RTesseract.new(@image_tif).to_s_without_spaces).to eql('43XF')
|
44
|
+
expect(RTesseract.new(@image_tif, processor: 'rmagick').to_s_without_spaces).to eql('43XF')
|
45
|
+
expect(RTesseract.new(@path.join('images', 'test.png').to_s, processor: 'rmagick').to_s_without_spaces).to eql('HW9W')
|
41
46
|
|
42
|
-
#MiniMagick
|
43
|
-
expect(RTesseract.new(@image_tif, :
|
44
|
-
expect(RTesseract.new(@path.join(
|
47
|
+
# MiniMagick
|
48
|
+
expect(RTesseract.new(@image_tif, processor: 'mini_magick').to_s_without_spaces).to eql('43XF')
|
49
|
+
expect(RTesseract.new(@path.join('images', 'test.png').to_s, processor: 'mini_magick').to_s_without_spaces).to eql('HW9W')
|
45
50
|
|
46
|
-
#QuickMagick
|
47
|
-
expect(RTesseract.new(@image_tif, :
|
48
|
-
expect(RTesseract.new(@path.join(
|
51
|
+
# QuickMagick
|
52
|
+
expect(RTesseract.new(@image_tif, processor: 'quick_magick').to_s_without_spaces).to eql('43XF')
|
53
|
+
expect(RTesseract.new(@path.join('images', 'test.png').to_s, processor: 'quick_magick').to_s_without_spaces).to eql('HW9W')
|
49
54
|
|
50
|
-
#NoneMagick
|
51
|
-
expect(RTesseract.new(@image_tif, :
|
55
|
+
# NoneMagick
|
56
|
+
expect(RTesseract.new(@image_tif, processor: 'none').to_s_without_spaces).to eql('43XF')
|
52
57
|
end
|
53
58
|
|
54
|
-
it
|
59
|
+
it ' change the image' do
|
55
60
|
image = RTesseract.new(@image_tif)
|
56
|
-
expect(image.to_s_without_spaces).to eql(
|
57
|
-
image.source = @path.join(
|
58
|
-
expect(image.to_s_without_spaces).to eql(
|
61
|
+
expect(image.to_s_without_spaces).to eql('43XF')
|
62
|
+
image.source = @path.join('images', 'test1.tif').to_s
|
63
|
+
expect(image.to_s_without_spaces).to eql('V2V4')
|
59
64
|
end
|
60
65
|
|
61
|
-
it
|
62
|
-
|
63
|
-
expect(
|
64
|
-
|
65
|
-
expect(RTesseract.new(@image_tif,{:lang=>"en-US"}).lang).to eql(" -l eng ")
|
66
|
-
expect(RTesseract.new(@image_tif,{:lang=>"english"}).lang).to eql(" -l eng ")
|
66
|
+
it ' returns the source' do
|
67
|
+
image = RTesseract.new(@image_tif)
|
68
|
+
expect(image.source).to eql(Pathname.new(@image_tif))
|
69
|
+
end
|
67
70
|
|
68
|
-
|
69
|
-
|
70
|
-
expect(RTesseract.new(@image_tif,
|
71
|
-
expect(RTesseract.new(@image_tif,
|
72
|
-
expect(RTesseract.new(@image_tif,
|
73
|
-
expect(RTesseract.new(@image_tif,
|
71
|
+
it ' select the language' do
|
72
|
+
# English
|
73
|
+
expect(RTesseract.new(@image_tif, lang: 'eng').lang).to eql(' -l eng ')
|
74
|
+
expect(RTesseract.new(@image_tif, lang: 'en').lang).to eql(' -l eng ')
|
75
|
+
expect(RTesseract.new(@image_tif, lang: 'en-US').lang).to eql(' -l eng ')
|
76
|
+
expect(RTesseract.new(@image_tif, lang: 'english').lang).to eql(' -l eng ')
|
74
77
|
|
75
|
-
|
76
|
-
|
77
|
-
expect(RTesseract.new(@image_tif,
|
78
|
+
# Portuguese
|
79
|
+
expect(RTesseract.new(@image_tif, lang: 'por').lang).to eql(' -l por ')
|
80
|
+
expect(RTesseract.new(@image_tif, lang: 'pt-BR').lang).to eql(' -l por ')
|
81
|
+
expect(RTesseract.new(@image_tif, lang: 'pt-br').lang).to eql(' -l por ')
|
82
|
+
expect(RTesseract.new(@image_tif, lang: 'pt').lang).to eql(' -l por ')
|
83
|
+
expect(RTesseract.new(@image_tif, lang: 'portuguese').lang).to eql(' -l por ')
|
78
84
|
|
79
|
-
|
80
|
-
expect(RTesseract.new(@image_tif,{:lang=>MakeStringError.new}).lang).to eql("")
|
81
|
-
end
|
85
|
+
expect(RTesseract.new(@image_tif, lang: 'eng').to_s_without_spaces).to eql('43XF')
|
82
86
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
expect(RTesseract.new(@image_tif, options: [:digits, :quiet]).options_cmd).to eql([:digits, :quiet])
|
87
|
+
expect(RTesseract.new(@image_tif, lang: 'eng').lang).to eql(' -l eng ')
|
88
|
+
|
89
|
+
# Invalid lang object
|
90
|
+
expect(RTesseract.new(@image_tif, lang: MakeStringError.new).lang).to eql('')
|
88
91
|
end
|
89
92
|
|
90
|
-
it
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
93
|
+
it ' select options' do
|
94
|
+
expect(RTesseract.new(@image_tif).options_cmd).to eql([])
|
95
|
+
expect(RTesseract.new(@image_tif, options: 'digits').options_cmd).to eql(['digits'])
|
96
|
+
expect(RTesseract.new(@image_tif, options: :digits).options_cmd).to eql([:digits])
|
97
|
+
expect(RTesseract.new(@image_tif, options: [:digits, :quiet]).options_cmd).to eql([:digits, :quiet])
|
95
98
|
end
|
96
99
|
|
97
|
-
it
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
100
|
+
it ' be configurable' do
|
101
|
+
expect(RTesseract.new(@image_tif, chop_enable: 0, enable_assoc: 0, display_text: 0).config).to eql("chop_enable 0\nenable_assoc 0\ndisplay_text 0")
|
102
|
+
expect(RTesseract.new(@image_tif, chop_enable: 0).config).to eql('chop_enable 0')
|
103
|
+
expect(RTesseract.new(@image_tif, chop_enable: 0, enable_assoc: 0).config).to eql("chop_enable 0\nenable_assoc 0")
|
104
|
+
expect(RTesseract.new(@image_tif, chop_enable: 0).to_s_without_spaces).to eql('43XF')
|
102
105
|
end
|
103
106
|
|
107
|
+
it ' crop image' do
|
108
|
+
expect(RTesseract.new(@image_tif, psm: 7).crop!(140, 10, 36, 40).to_s_without_spaces).to eql('4')
|
109
|
+
expect(RTesseract.new(@image_tif, psm: 7).crop!(180, 10, 36, 40).to_s_without_spaces).to eql('3')
|
110
|
+
expect(RTesseract.new(@image_tif, psm: 7).crop!(216, 10, 20, 40).to_s_without_spaces).to eql('X')
|
111
|
+
expect(RTesseract.new(@image_tif, psm: 7).crop!(240, 10, 30, 40).to_s_without_spaces).to eql('F')
|
112
|
+
end
|
104
113
|
|
105
|
-
it
|
106
|
-
image = Magick::Image.read(@path.join(
|
107
|
-
blob = image.quantize(256,Magick::GRAYColorspace).to_blob
|
114
|
+
it ' read image from blob' do
|
115
|
+
image = Magick::Image.read(@path.join('images', 'test.png').to_s).first
|
116
|
+
blob = image.quantize(256, Magick::GRAYColorspace).to_blob
|
108
117
|
|
109
|
-
test = RTesseract.new(
|
118
|
+
test = RTesseract.new('', psm: 7)
|
110
119
|
test.from_blob(blob)
|
111
|
-
expect(test.to_s_without_spaces).to eql(
|
120
|
+
expect(test.to_s_without_spaces).to eql('HW9W')
|
112
121
|
|
113
|
-
test = RTesseract.new(
|
114
|
-
expect{test.from_blob('') }.to raise_error(RTesseract::ConversionError)
|
122
|
+
test = RTesseract.new('', psm: 7)
|
123
|
+
expect { test.from_blob('') }.to raise_error(RTesseract::ConversionError)
|
115
124
|
end
|
116
125
|
|
117
|
-
it
|
118
|
-
expect(RTesseract.new(Magick::Image.read(@image_tif.to_s).first).to_s_without_spaces).to eql(
|
126
|
+
it ' use a instance' do
|
127
|
+
expect(RTesseract.new(Magick::Image.read(@image_tif.to_s).first).to_s_without_spaces).to eql('43XF')
|
119
128
|
expect(RMagickProcessor.a_name?('teste')).to eql(false)
|
120
129
|
expect(RMagickProcessor.a_name?('rmagick')).to eql(true)
|
121
130
|
expect(RMagickProcessor.a_name?('RMagickProcessor')).to eql(true)
|
@@ -129,46 +138,50 @@ describe "Rtesseract" do
|
|
129
138
|
expect(NoneProcessor.a_name?('NoneProcessor')).to eql(true)
|
130
139
|
end
|
131
140
|
|
132
|
-
it
|
133
|
-
test = RTesseract.read(@path.join(
|
141
|
+
it ' change image in a block' do
|
142
|
+
test = RTesseract.read(@path.join('images', 'test.png').to_s) {}
|
134
143
|
expect(test.class).to eql(RTesseract)
|
135
144
|
|
136
145
|
test = RTesseract.new(@image_tif)
|
137
146
|
test.read do |image|
|
138
147
|
image = image.quantize(256, Magick::GRAYColorspace)
|
139
148
|
end
|
140
|
-
expect(test.to_s_without_spaces).to eql(
|
149
|
+
expect(test.to_s_without_spaces).to eql('43XF')
|
150
|
+
|
151
|
+
test = RTesseract.new(@path.join('images', 'blank.tif').to_s)
|
152
|
+
test.read do |image|
|
153
|
+
image
|
154
|
+
end
|
155
|
+
expect(test.to_s_without_spaces).to eql('')
|
141
156
|
|
142
|
-
test = RTesseract.read(@path.join(
|
143
|
-
#image = image.white_threshold(245)
|
144
|
-
#image = image.quantize(256,Magick::GRAYColorspace)
|
157
|
+
test = RTesseract.read(@path.join('images', 'test.png').to_s) do |image|
|
145
158
|
image.rotate(90)
|
146
159
|
end
|
147
|
-
expect(test.to_s_without_spaces).to eql(
|
160
|
+
expect(test.to_s_without_spaces).to eql('HW9W')
|
148
161
|
|
149
|
-
test = RTesseract.read(@path.join(
|
162
|
+
test = RTesseract.read(@path.join('images', 'test.jpg').to_s, lang: 'en') do |image|
|
150
163
|
image = image.white_threshold(245).quantize(256, Magick::GRAYColorspace)
|
151
164
|
end
|
152
|
-
expect(test.to_s_without_spaces).to eql(
|
165
|
+
expect(test.to_s_without_spaces).to eql('3R8F')
|
153
166
|
|
154
|
-
test = RTesseract.read(@path.join(
|
155
|
-
image.gravity
|
167
|
+
test = RTesseract.read(@path.join('images', 'test.jpg').to_s, lang: 'en', processor: 'mini_magick') do |image|
|
168
|
+
image.gravity 'south'
|
156
169
|
end
|
157
|
-
expect(test.to_s_without_spaces).to eql(
|
170
|
+
expect(test.to_s_without_spaces).to eql('3R8F')
|
158
171
|
end
|
159
172
|
|
160
|
-
it
|
161
|
-
expect{ RTesseract.new(@path.join(
|
162
|
-
expect{ RTesseract.new(@path.join(
|
173
|
+
it ' get a error' do
|
174
|
+
expect { RTesseract.new(@path.join('images', 'test.jpg').to_s, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError)
|
175
|
+
expect { RTesseract.new(@path.join('images', 'test_not_exists.png').to_s).to_s }.to raise_error(RTesseract::ImageNotSelectedError)
|
163
176
|
|
164
|
-
#
|
165
|
-
expect(RTesseract.new(@image_tif,
|
177
|
+
# Invalid psm object
|
178
|
+
expect(RTesseract.new(@image_tif, psm: MakeStringError.new).psm).to eql('')
|
166
179
|
end
|
167
180
|
|
168
|
-
it
|
181
|
+
it 'remove a file' do
|
169
182
|
rtesseract = RTesseract.new('.')
|
170
183
|
rtesseract.remove_file(Tempfile.new('config'))
|
171
184
|
|
172
|
-
expect{ rtesseract.remove_file(Pathname.new(Dir.tmpdir).join(
|
185
|
+
expect { rtesseract.remove_file(Pathname.new(Dir.tmpdir).join('test_not_exists')) }.to raise_error(RTesseract::TempFilesNotRemovedError)
|
173
186
|
end
|
174
187
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -3,17 +3,16 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
|
3
3
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
4
4
|
require 'rspec'
|
5
5
|
require 'coveralls'
|
6
|
-
require
|
6
|
+
require 'simplecov'
|
7
7
|
SimpleCov.start do
|
8
|
-
add_filter
|
8
|
+
add_filter '/spec/'
|
9
9
|
end
|
10
10
|
Coveralls.wear!
|
11
11
|
|
12
12
|
require 'rtesseract'
|
13
13
|
# Requires supporting files with custom matchers and macros, etc,
|
14
14
|
# in ./support/ and its subdirectories.
|
15
|
-
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
16
|
-
|
17
|
-
RSpec.configure do |config|
|
15
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each { |f| require f }
|
18
16
|
|
17
|
+
RSpec.configure do # |config|
|
19
18
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rtesseract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danilo Jeremias da Silva
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-10-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -118,7 +118,6 @@ extra_rdoc_files:
|
|
118
118
|
files:
|
119
119
|
- ".document"
|
120
120
|
- ".rspec"
|
121
|
-
- ".travis.sh"
|
122
121
|
- ".travis.yml"
|
123
122
|
- Gemfile
|
124
123
|
- Gemfile.lock
|
@@ -135,7 +134,10 @@ files:
|
|
135
134
|
- lib/rtesseract/box_char.rb
|
136
135
|
- lib/rtesseract/errors.rb
|
137
136
|
- lib/rtesseract/mixed.rb
|
137
|
+
- lib/utils.rb
|
138
138
|
- rtesseract.gemspec
|
139
|
+
- spec/images/README.pdf
|
140
|
+
- spec/images/blank.tif
|
139
141
|
- spec/images/mixed.tif
|
140
142
|
- spec/images/orientation_reverse.png
|
141
143
|
- spec/images/test with spaces.tif
|
@@ -170,7 +172,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
170
172
|
version: '0'
|
171
173
|
requirements: []
|
172
174
|
rubyforge_project:
|
173
|
-
rubygems_version: 2.
|
175
|
+
rubygems_version: 2.4.3
|
174
176
|
signing_key:
|
175
177
|
specification_version: 4
|
176
178
|
summary: Ruby library for working with the Tesseract OCR.
|
data/.travis.sh
DELETED