rtesseract 1.3.3 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -0
- data/Gemfile +1 -2
- data/Gemfile.lock +14 -15
- data/README.rdoc +5 -6
- data/VERSION +1 -1
- data/lib/processors/mini_magick.rb +2 -2
- data/lib/processors/none.rb +1 -1
- data/lib/processors/rmagick.rb +4 -4
- data/lib/rtesseract/box.rb +1 -1
- data/lib/rtesseract/configuration.rb +57 -0
- data/lib/rtesseract/mixed.rb +3 -3
- data/lib/rtesseract.rb +39 -68
- data/rtesseract.gemspec +7 -7
- data/spec/rtesseract_box_char_spec.rb +1 -2
- data/spec/rtesseract_box_spec.rb +1 -2
- data/spec/rtesseract_mixed_spec.rb +9 -9
- data/spec/rtesseract_spec.rb +41 -34
- metadata +5 -5
- data/lib/processors/quick_magick.rb +0 -34
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 695108fd1fe3b6bb921444dc8daadb248466dd13
|
4
|
+
data.tar.gz: d2e8b38f6a54c7ffd004863de72a9d880be9bdbc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c28af3ffb9d288fb580d22f68f99d3e159919284735ff6cf84a91ba8da636d4c771568019c0db4ae968bbad030cfc8e187100ca075b7904fb05eda5658ca8c0d
|
7
|
+
data.tar.gz: 3fdb3195471c7b0a3674c000d6ebafe4a7474cb6336911ff93e898233f872bb220b56b309be2e1148a19eb2abd1a764fa60ef09bfcfea66a103bdb35f836d8bd
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
@@ -7,7 +7,7 @@ group :development do
|
|
7
7
|
gem 'rspec'
|
8
8
|
gem 'rdoc'
|
9
9
|
gem 'bundler'
|
10
|
-
gem 'jeweler', '~> 2.
|
10
|
+
gem 'jeweler', '~> 2.1.1'
|
11
11
|
gem 'simplecov'
|
12
12
|
gem 'coveralls', require: false
|
13
13
|
end
|
@@ -15,5 +15,4 @@ end
|
|
15
15
|
group :test do
|
16
16
|
gem 'rmagick'
|
17
17
|
gem 'mini_magick'
|
18
|
-
gem 'quick_magick'
|
19
18
|
end
|
data/Gemfile.lock
CHANGED
@@ -18,20 +18,19 @@ GEM
|
|
18
18
|
unf (>= 0.0.5, < 1.0.0)
|
19
19
|
faraday (0.9.2)
|
20
20
|
multipart-post (>= 1.2, < 3)
|
21
|
-
git (1.
|
22
|
-
github_api (0.13.
|
23
|
-
addressable (~> 2.
|
21
|
+
git (1.3.0)
|
22
|
+
github_api (0.13.1)
|
23
|
+
addressable (~> 2.4.0)
|
24
24
|
descendants_tracker (~> 0.0.4)
|
25
25
|
faraday (~> 0.8, < 0.10)
|
26
26
|
hashie (>= 3.4)
|
27
27
|
multi_json (>= 1.7.5, < 2.0)
|
28
|
-
nokogiri (~> 1.6.6)
|
29
28
|
oauth2
|
30
29
|
hashie (3.4.3)
|
31
30
|
highline (1.7.8)
|
32
31
|
http-cookie (1.0.2)
|
33
32
|
domain_name (~> 0.5)
|
34
|
-
jeweler (2.
|
33
|
+
jeweler (2.1.1)
|
35
34
|
builder
|
36
35
|
bundler (>= 1.0)
|
37
36
|
git (>= 1.2.5)
|
@@ -40,8 +39,9 @@ GEM
|
|
40
39
|
nokogiri (>= 1.5.10)
|
41
40
|
rake
|
42
41
|
rdoc
|
42
|
+
semver
|
43
43
|
json (1.8.3)
|
44
|
-
jwt (1.5.
|
44
|
+
jwt (1.5.1)
|
45
45
|
mime-types (2.99)
|
46
46
|
mini_magick (4.3.6)
|
47
47
|
mini_portile2 (2.0.0)
|
@@ -49,18 +49,17 @@ GEM
|
|
49
49
|
multi_xml (0.5.5)
|
50
50
|
multipart-post (2.0.0)
|
51
51
|
netrc (0.11.0)
|
52
|
-
nokogiri (1.6.7)
|
52
|
+
nokogiri (1.6.7.2)
|
53
53
|
mini_portile2 (~> 2.0.0.rc2)
|
54
|
-
oauth2 (1.
|
54
|
+
oauth2 (1.1.0)
|
55
55
|
faraday (>= 0.8, < 0.10)
|
56
|
-
jwt (~> 1.0)
|
56
|
+
jwt (~> 1.0, < 1.5.2)
|
57
57
|
multi_json (~> 1.3)
|
58
58
|
multi_xml (~> 0.5)
|
59
|
-
rack (
|
60
|
-
quick_magick (0.8.0)
|
59
|
+
rack (>= 1.2, < 3)
|
61
60
|
rack (1.6.4)
|
62
|
-
rake (
|
63
|
-
rdoc (4.2.
|
61
|
+
rake (11.1.2)
|
62
|
+
rdoc (4.2.2)
|
64
63
|
json (~> 1.4)
|
65
64
|
rest-client (1.8.0)
|
66
65
|
http-cookie (>= 1.0.2, < 2.0)
|
@@ -80,6 +79,7 @@ GEM
|
|
80
79
|
diff-lcs (>= 1.2.0, < 2.0)
|
81
80
|
rspec-support (~> 3.4.0)
|
82
81
|
rspec-support (3.4.1)
|
82
|
+
semver (1.0.1)
|
83
83
|
simplecov (0.11.1)
|
84
84
|
docile (~> 1.1.0)
|
85
85
|
json (~> 1.8)
|
@@ -100,10 +100,9 @@ PLATFORMS
|
|
100
100
|
DEPENDENCIES
|
101
101
|
bundler
|
102
102
|
coveralls
|
103
|
-
jeweler (~> 2.
|
103
|
+
jeweler (~> 2.1.1)
|
104
104
|
mini_magick
|
105
105
|
nokogiri
|
106
|
-
quick_magick
|
107
106
|
rdoc
|
108
107
|
rmagick
|
109
108
|
rspec
|
data/README.rdoc
CHANGED
@@ -12,7 +12,7 @@ Ruby library for working with the Tesseract OCR.
|
|
12
12
|
To work properly rtesseract are needed:
|
13
13
|
* Tesseract - Program
|
14
14
|
* ImageMagick - Program
|
15
|
-
* RMagick or mini_magick
|
15
|
+
* RMagick or mini_magick - Gem
|
16
16
|
|
17
17
|
Atention: Version 1.0.0 works fine with Ruby 2.0 and tesseract 3.0 and lower versions of rtesseract works fine with Ruby 1.8 and tesseract 2.0.4.
|
18
18
|
|
@@ -52,10 +52,10 @@ It's very simple to use rtesseract:
|
|
52
52
|
OR
|
53
53
|
|
54
54
|
mix_block = RTesseract::Mixed.new("test.jpg",{:areas => [
|
55
|
-
{:x => 28, :y=>19, :
|
56
|
-
{:x => 180, :y=>22, :
|
57
|
-
{:x => 218, :y=>22, :
|
58
|
-
{:x => 248, :y=>24, :
|
55
|
+
{:x => 28, :y=>19, :w=>25, :h=>25 },
|
56
|
+
{:x => 180, :y=>22, :w=>20, :h=>28},
|
57
|
+
{:x => 218, :y=>22, :w=>24, :h=>28},
|
58
|
+
{:x => 248, :y=>24, :w=>22, :h=>22}
|
59
59
|
]})
|
60
60
|
mix_block.to_s
|
61
61
|
|
@@ -64,7 +64,6 @@ It's very simple to use rtesseract:
|
|
64
64
|
Processors Options (_Rmagick_ is default)
|
65
65
|
|
66
66
|
RTesseract.new("test.jpg", :processor => "mini_magick")
|
67
|
-
RTesseract.new("test.jpg", :processor => "quick_magick")
|
68
67
|
|
69
68
|
Note: For non process the image use NoneProcessor
|
70
69
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
2.0.0
|
@@ -9,14 +9,14 @@ module MiniMagickProcessor
|
|
9
9
|
%w(mini_magick MiniMagickProcessor).include?(name.to_s)
|
10
10
|
end
|
11
11
|
|
12
|
-
def self.image_to_tif(source,
|
12
|
+
def self.image_to_tif(source, _points = {})
|
13
13
|
tmp_file = Tempfile.new(['', '.tif'])
|
14
14
|
cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
|
15
15
|
cat.format('tif') do |c|
|
16
16
|
c.compress 'None'
|
17
17
|
c.alpha 'off'
|
18
18
|
end
|
19
|
-
cat.crop("#{w}x#{h}+#{x}+#{y}")
|
19
|
+
cat.crop("#{_points[:w]}x#{_points[:h]}+#{_points[:x]}+#{_points[:y]}") if _points.is_a?(Hash) && _points.values.compact != []
|
20
20
|
cat.alpha 'off'
|
21
21
|
cat.write tmp_file.path.to_s
|
22
22
|
tmp_file
|
data/lib/processors/none.rb
CHANGED
@@ -8,7 +8,7 @@ module NoneProcessor
|
|
8
8
|
%w(none NoneProcessor).include?(name.to_s)
|
9
9
|
end
|
10
10
|
|
11
|
-
def self.image_to_tif(source,
|
11
|
+
def self.image_to_tif(source, _points = {})
|
12
12
|
tmp_file = Tempfile.new(['', '.tif'])
|
13
13
|
tmp_file.write(read_with_processor(source))
|
14
14
|
tmp_file
|
data/lib/processors/rmagick.rb
CHANGED
@@ -13,15 +13,15 @@ module RMagickProcessor
|
|
13
13
|
%w(rmagick RMagickProcessor).include?(name.to_s)
|
14
14
|
end
|
15
15
|
|
16
|
-
def self.image_to_tif(source,
|
16
|
+
def self.image_to_tif(source, _points = {})
|
17
17
|
tmp_file = Tempfile.new(['', '.tif'])
|
18
18
|
cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
|
19
|
-
cat.crop!(x, y, w, h)
|
19
|
+
cat.crop!(_points[:x], _points[:y], _points[:w], _points[:h]) if _points.is_a?(Hash) && _points.values.compact != []
|
20
20
|
cat.alpha Magick::DeactivateAlphaChannel
|
21
|
-
cat.write(tmp_file.path.to_s)
|
21
|
+
cat.write(tmp_file.path.to_s) do
|
22
22
|
# self.depth = 16
|
23
23
|
self.compression = Magick::NoCompression
|
24
|
-
|
24
|
+
end
|
25
25
|
tmp_file
|
26
26
|
end
|
27
27
|
|
data/lib/rtesseract/box.rb
CHANGED
@@ -0,0 +1,57 @@
|
|
1
|
+
# Configuration
|
2
|
+
class RTesseract
|
3
|
+
# Aliases to languages names
|
4
|
+
LANGUAGES = {
|
5
|
+
'eng' => %w(en en-us english),
|
6
|
+
'ita' => %w(it),
|
7
|
+
'por' => %w(pt pt-br portuguese),
|
8
|
+
'spa' => %w(sp)
|
9
|
+
}
|
10
|
+
|
11
|
+
# Configuration class
|
12
|
+
class Configuration
|
13
|
+
attr_accessor :processor, :lang, :psm, :tessdata_dir, :user_words, :user_patterns, :command, :debug, :options_cmd
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
@processor = 'rmagick'
|
17
|
+
end
|
18
|
+
|
19
|
+
def parent
|
20
|
+
@parent ||= RTesseract.configuration || RTesseract::Configuration.new
|
21
|
+
end
|
22
|
+
|
23
|
+
def option(options, name, default = nil)
|
24
|
+
self.instance_variable_set("@#{name}", options.option(name, parent.send(name)) || default)
|
25
|
+
end
|
26
|
+
|
27
|
+
def load_options(options, names = [])
|
28
|
+
names.each{ |name| option(options, name, nil) }
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class << self
|
33
|
+
attr_accessor :configuration
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.configure
|
37
|
+
self.configuration ||= Configuration.new
|
38
|
+
yield(configuration)
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.default_command
|
42
|
+
TesseractBin::Executables[:tesseract] || 'tesseract'
|
43
|
+
rescue
|
44
|
+
'tesseract'
|
45
|
+
end
|
46
|
+
|
47
|
+
# Local config to instance
|
48
|
+
def self.local_config(options = {})
|
49
|
+
RTesseract::Configuration.new.tap do |config|
|
50
|
+
config.command = config.option(options, :command, RTesseract.default_command)
|
51
|
+
config.processor = config.option(options, :processor, 'rmagick')
|
52
|
+
config.load_options(options, [ :lang, :psm, :tessdata_dir, :user_words, :user_patterns ])
|
53
|
+
config.debug = config.option(options, :debug, false)
|
54
|
+
config.options_cmd = [options.option(:options, nil)].flatten.compact
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
data/lib/rtesseract/mixed.rb
CHANGED
@@ -12,9 +12,9 @@ class RTesseract
|
|
12
12
|
yield self if block_given?
|
13
13
|
end
|
14
14
|
|
15
|
-
def area(
|
15
|
+
def area(_points)
|
16
16
|
@value = ''
|
17
|
-
@areas << { x: x, y: y, width: width, height: height }
|
17
|
+
@areas << _points # { x: x, y: y, width: width, height: height }
|
18
18
|
end
|
19
19
|
|
20
20
|
def clear_areas
|
@@ -25,7 +25,7 @@ class RTesseract
|
|
25
25
|
def convert
|
26
26
|
@value = []
|
27
27
|
@areas.each_with_object(RTesseract.new(@source.to_s, @options.dup)) do |area, image|
|
28
|
-
image.crop!(area[:x], area[:y], area[:width], area[:height])
|
28
|
+
image.crop!(area) # area[:x], area[:y], area[:width], area[:height])
|
29
29
|
@value << image.to_s
|
30
30
|
end
|
31
31
|
rescue => error
|
data/lib/rtesseract.rb
CHANGED
@@ -3,6 +3,7 @@ require 'pathname'
|
|
3
3
|
require 'tempfile'
|
4
4
|
require 'utils'
|
5
5
|
|
6
|
+
require 'rtesseract/configuration'
|
6
7
|
require 'rtesseract/errors'
|
7
8
|
require 'rtesseract/mixed'
|
8
9
|
require 'rtesseract/box'
|
@@ -11,53 +12,19 @@ require 'rtesseract/box_char'
|
|
11
12
|
# Processors
|
12
13
|
require 'processors/rmagick.rb'
|
13
14
|
require 'processors/mini_magick.rb'
|
14
|
-
require 'processors/quick_magick.rb'
|
15
15
|
require 'processors/none.rb'
|
16
16
|
|
17
17
|
# Ruby wrapper for Tesseract OCR
|
18
18
|
class RTesseract
|
19
|
-
attr_accessor :
|
20
|
-
attr_accessor :options
|
21
|
-
attr_accessor :options_cmd
|
22
|
-
attr_writer :lang
|
23
|
-
attr_writer :psm
|
19
|
+
attr_accessor :configuration
|
24
20
|
attr_reader :processor
|
25
21
|
attr_reader :source
|
26
22
|
|
27
|
-
OPTIONS = %w(command lang psm processor debug clear_console_output options)
|
28
|
-
# Aliases to languages names
|
29
|
-
LANGUAGES = {
|
30
|
-
'eng' => %w(en en-us english),
|
31
|
-
'ita' => %w(it),
|
32
|
-
'por' => %w(pt pt-br portuguese),
|
33
|
-
'spa' => %w(sp)
|
34
|
-
}
|
35
|
-
|
36
|
-
class << self
|
37
|
-
attr_accessor :configuration
|
38
|
-
end
|
39
|
-
|
40
|
-
def self.configure
|
41
|
-
self.configuration ||= Configuration.new
|
42
|
-
yield(configuration)
|
43
|
-
end
|
44
|
-
|
45
|
-
class Configuration
|
46
|
-
attr_accessor :processor, :lang, :psm
|
47
|
-
|
48
|
-
def initialize
|
49
|
-
@processor = 'rmagick'
|
50
|
-
end
|
51
|
-
|
52
|
-
def to_hash
|
53
|
-
{processor: @processor, lang: lang, psm: psm}
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
23
|
def initialize(src = '', options = {})
|
58
|
-
|
59
|
-
@
|
60
|
-
@
|
24
|
+
self.configuration = RTesseract.local_config(options)
|
25
|
+
@options = options || {}
|
26
|
+
@value, @points = [nil, {}]
|
27
|
+
@processor = RTesseract.choose_processor!(self.configuration.processor)
|
61
28
|
@source = @processor.image?(src) ? src : Pathname.new(src)
|
62
29
|
initialize_hook
|
63
30
|
end
|
@@ -65,26 +32,6 @@ class RTesseract
|
|
65
32
|
def initialize_hook
|
66
33
|
end
|
67
34
|
|
68
|
-
def command_line_options(options)
|
69
|
-
default_config = RTesseract.configuration ? RTesseract.configuration.to_hash : {}
|
70
|
-
@options = default_config.merge(options)
|
71
|
-
@command = @options.option(:command, default_command)
|
72
|
-
@lang = @options.option(:lang, '')
|
73
|
-
@psm = @options.option(:psm, nil)
|
74
|
-
@processor = @options.option(:processor, 'rmagick')
|
75
|
-
@debug = @options.option(:debug, false)
|
76
|
-
@options_cmd = @options.option(:options, [])
|
77
|
-
@options_cmd = [@options_cmd] unless @options_cmd.is_a?(Array)
|
78
|
-
# Disable clear console if debug mode
|
79
|
-
@clear_console_output = @debug ? false : options.option(:clear_console_output, true)
|
80
|
-
end
|
81
|
-
|
82
|
-
def default_command
|
83
|
-
TesseractBin::Executables[:tesseract] || 'tesseract'
|
84
|
-
rescue
|
85
|
-
'tesseract'
|
86
|
-
end
|
87
|
-
|
88
35
|
def self.read(src = nil, options = {})
|
89
36
|
fail RTesseract::ImageNotSelectedError if src.nil?
|
90
37
|
processor = RTesseract.choose_processor!(options.option(:processor, nil))
|
@@ -107,9 +54,9 @@ class RTesseract
|
|
107
54
|
end
|
108
55
|
|
109
56
|
# Crop image to convert
|
110
|
-
def crop!(
|
57
|
+
def crop!(_points = {})
|
111
58
|
@value = nil
|
112
|
-
@
|
59
|
+
@points = _points
|
113
60
|
self
|
114
61
|
end
|
115
62
|
|
@@ -141,7 +88,7 @@ class RTesseract
|
|
141
88
|
## * vie - Vietnamese
|
142
89
|
## Note: Make sure you have installed the language to tesseract
|
143
90
|
def lang
|
144
|
-
language = "#{
|
91
|
+
language = "#{self.configuration.lang}".strip.downcase
|
145
92
|
LANGUAGES.each do |value, names|
|
146
93
|
return " -l #{value} " if names.include? language
|
147
94
|
end
|
@@ -153,11 +100,37 @@ class RTesseract
|
|
153
100
|
|
154
101
|
# Page Segment Mode
|
155
102
|
def psm
|
156
|
-
(
|
103
|
+
(self.configuration.psm.nil? ? '' : " -psm #{self.configuration.psm} ")
|
157
104
|
rescue
|
158
105
|
''
|
159
106
|
end
|
160
107
|
|
108
|
+
# Tessdata Dir
|
109
|
+
def tessdata_dir
|
110
|
+
(self.configuration.tessdata_dir.nil? ? '' : " --tessdata-dir #{self.configuration.tessdata_dir} ")
|
111
|
+
rescue
|
112
|
+
''
|
113
|
+
end
|
114
|
+
|
115
|
+
# User Words
|
116
|
+
def user_words
|
117
|
+
(self.configuration.user_words.nil? ? '' : " --user-words #{self.configuration.user_words} ")
|
118
|
+
rescue
|
119
|
+
''
|
120
|
+
end
|
121
|
+
|
122
|
+
# User Patterns
|
123
|
+
def user_patterns
|
124
|
+
(self.configuration.user_patterns.nil? ? '' : " --user-patterns #{self.configuration.user_patterns} ")
|
125
|
+
rescue
|
126
|
+
''
|
127
|
+
end
|
128
|
+
|
129
|
+
# Options on line
|
130
|
+
def options_cmd
|
131
|
+
self.configuration.options_cmd
|
132
|
+
end
|
133
|
+
|
161
134
|
def config_hook
|
162
135
|
end
|
163
136
|
|
@@ -178,12 +151,12 @@ class RTesseract
|
|
178
151
|
|
179
152
|
# TODO: Clear console for MacOS or Windows
|
180
153
|
def clear_console_output
|
181
|
-
return ''
|
154
|
+
return '' if self.configuration.debug
|
182
155
|
return '2>/dev/null' if File.exist?('/dev/null') # Linux console clear
|
183
156
|
end
|
184
157
|
|
185
158
|
def image
|
186
|
-
(@image = @processor.image_to_tif(@source, @
|
159
|
+
(@image = @processor.image_to_tif(@source, @points)).path
|
187
160
|
end
|
188
161
|
|
189
162
|
def file_ext
|
@@ -199,7 +172,7 @@ class RTesseract
|
|
199
172
|
end
|
200
173
|
|
201
174
|
def convert_command
|
202
|
-
`#{
|
175
|
+
`#{self.configuration.command} "#{image}" "#{text_file}" #{lang} #{psm} #{tessdata_dir} #{user_words} #{user_patterns} #{config_file} #{clear_console_output} #{self.configuration.options_cmd.join(' ')}`
|
203
176
|
end
|
204
177
|
|
205
178
|
def convert_text
|
@@ -254,8 +227,6 @@ class RTesseract
|
|
254
227
|
processor =
|
255
228
|
if MiniMagickProcessor.a_name?(processor.to_s)
|
256
229
|
MiniMagickProcessor
|
257
|
-
elsif QuickMagickProcessor.a_name?(processor.to_s)
|
258
|
-
QuickMagickProcessor
|
259
230
|
elsif NoneProcessor.a_name?(processor.to_s)
|
260
231
|
NoneProcessor
|
261
232
|
else
|
data/rtesseract.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: rtesseract
|
5
|
+
# stub: rtesseract 2.0.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "rtesseract"
|
9
|
-
s.version = "
|
9
|
+
s.version = "2.0.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Danilo Jeremias da Silva"]
|
14
|
-
s.date = "2016-
|
14
|
+
s.date = "2016-04-19"
|
15
15
|
s.description = "Ruby library for working with the Tesseract OCR."
|
16
16
|
s.email = "dannnylo@gmail.com"
|
17
17
|
s.extra_rdoc_files = [
|
@@ -30,11 +30,11 @@ Gem::Specification.new do |s|
|
|
30
30
|
"VERSION",
|
31
31
|
"lib/processors/mini_magick.rb",
|
32
32
|
"lib/processors/none.rb",
|
33
|
-
"lib/processors/quick_magick.rb",
|
34
33
|
"lib/processors/rmagick.rb",
|
35
34
|
"lib/rtesseract.rb",
|
36
35
|
"lib/rtesseract/box.rb",
|
37
36
|
"lib/rtesseract/box_char.rb",
|
37
|
+
"lib/rtesseract/configuration.rb",
|
38
38
|
"lib/rtesseract/errors.rb",
|
39
39
|
"lib/rtesseract/mixed.rb",
|
40
40
|
"lib/utils.rb",
|
@@ -69,7 +69,7 @@ Gem::Specification.new do |s|
|
|
69
69
|
s.add_development_dependency(%q<rspec>, [">= 0"])
|
70
70
|
s.add_development_dependency(%q<rdoc>, [">= 0"])
|
71
71
|
s.add_development_dependency(%q<bundler>, [">= 0"])
|
72
|
-
s.add_development_dependency(%q<jeweler>, ["~> 2.
|
72
|
+
s.add_development_dependency(%q<jeweler>, ["~> 2.1.1"])
|
73
73
|
s.add_development_dependency(%q<simplecov>, [">= 0"])
|
74
74
|
s.add_development_dependency(%q<coveralls>, [">= 0"])
|
75
75
|
else
|
@@ -77,7 +77,7 @@ Gem::Specification.new do |s|
|
|
77
77
|
s.add_dependency(%q<rspec>, [">= 0"])
|
78
78
|
s.add_dependency(%q<rdoc>, [">= 0"])
|
79
79
|
s.add_dependency(%q<bundler>, [">= 0"])
|
80
|
-
s.add_dependency(%q<jeweler>, ["~> 2.
|
80
|
+
s.add_dependency(%q<jeweler>, ["~> 2.1.1"])
|
81
81
|
s.add_dependency(%q<simplecov>, [">= 0"])
|
82
82
|
s.add_dependency(%q<coveralls>, [">= 0"])
|
83
83
|
end
|
@@ -86,7 +86,7 @@ Gem::Specification.new do |s|
|
|
86
86
|
s.add_dependency(%q<rspec>, [">= 0"])
|
87
87
|
s.add_dependency(%q<rdoc>, [">= 0"])
|
88
88
|
s.add_dependency(%q<bundler>, [">= 0"])
|
89
|
-
s.add_dependency(%q<jeweler>, ["~> 2.
|
89
|
+
s.add_dependency(%q<jeweler>, ["~> 2.1.1"])
|
90
90
|
s.add_dependency(%q<simplecov>, [">= 0"])
|
91
91
|
s.add_dependency(%q<coveralls>, [">= 0"])
|
92
92
|
end
|
@@ -76,7 +76,6 @@ describe 'Rtesseract::BoxChar' do
|
|
76
76
|
|
77
77
|
expect { RTesseract::BoxChar.new(@image_tiff, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError)
|
78
78
|
expect { RTesseract::BoxChar.new(@image_tiff + '_not_exist').to_s }.to raise_error(RTesseract::ImageNotSelectedError)
|
79
|
-
|
80
|
-
#expect(RTesseract::BoxChar.new(@path.join('images', 'blank.tif').to_s, options: :digits).characters).to eql([])
|
79
|
+
# expect(RTesseract::BoxChar.new(@path.join('images', 'blank.tif').to_s, options: :digits).characters).to eql([])
|
81
80
|
end
|
82
81
|
end
|
data/spec/rtesseract_box_spec.rb
CHANGED
@@ -31,7 +31,6 @@ describe 'Rtesseract::Box' do
|
|
31
31
|
expect(RTesseract::Box.new(@words_image).to_s).to eql('If you are a friend, you speak the password, and the doors will open.')
|
32
32
|
expect { RTesseract::Box.new(@image_tiff, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError)
|
33
33
|
expect { RTesseract::Box.new(@image_tiff + '_not_exist').to_s }.to raise_error(RTesseract::ImageNotSelectedError)
|
34
|
-
|
35
|
-
#expect(RTesseract::Box.new(@path.join('images', 'blank.tif').to_s, options: :digits).words).to eql([])
|
34
|
+
# expect(RTesseract::Box.new(@path.join('images', 'blank.tif').to_s, options: :digits).words).to eql([])
|
36
35
|
end
|
37
36
|
end
|
@@ -15,20 +15,20 @@ describe 'Rtesseract::Mixed' do
|
|
15
15
|
|
16
16
|
it 'should translate parts of the image to text' do
|
17
17
|
mix_block = RTesseract::Mixed.new(@image_tif, psm: 7) do |image|
|
18
|
-
image.area(28,
|
19
|
-
image.area(180, 22, 20, 28) # position of 3
|
20
|
-
image.area(218, 22, 24, 28) # position of F
|
21
|
-
image.area(248, 24, 22, 22) # position of F
|
18
|
+
image.area(x: 28, y: 19, w: 25, h: 25) # position of 4
|
19
|
+
image.area(x: 180, y: 22, w: 20, h: 28) # position of 3
|
20
|
+
image.area(x: 218, y: 22, w: 24, h: 28) # position of F
|
21
|
+
image.area(x: 248, y: 24, w: 22, h: 22) # position of F
|
22
22
|
end
|
23
23
|
expect(mix_block.to_s_without_spaces).to eql('43FF')
|
24
24
|
mix_block.clear_areas
|
25
25
|
expect(mix_block.areas).to eql([])
|
26
26
|
|
27
27
|
@areas = []
|
28
|
-
@areas << { x: 28, y: 19,
|
29
|
-
@areas << { x: 180, y: 22,
|
30
|
-
@areas << { x: 218, y: 22,
|
31
|
-
@areas << { x: 248, y: 24,
|
28
|
+
@areas << { x: 28, y: 19, w: 25, h: 25 } # position of 4
|
29
|
+
@areas << { x: 180, y: 22, w: 20, h: 28 } # position of 3
|
30
|
+
@areas << { x: 218, y: 22, w: 24, h: 28 } # position of f
|
31
|
+
@areas << { x: 248, y: 24, w: 22, h: 22 } # position of f
|
32
32
|
|
33
33
|
mix_block = RTesseract::Mixed.new(@image_tif, areas: @areas, psm: 7)
|
34
34
|
expect(mix_block.to_s_without_spaces).to eql('43FF')
|
@@ -38,7 +38,7 @@ describe 'Rtesseract::Mixed' do
|
|
38
38
|
end
|
39
39
|
|
40
40
|
it ' get a error' do
|
41
|
-
@areas = [{ x: 28, y: 19,
|
41
|
+
@areas = [{ x: 28, y: 19, w: 25, h: 25 }]
|
42
42
|
|
43
43
|
mix_block = RTesseract::Mixed.new(@path.join('images', 'test_not_exists.png').to_s, areas: @areas, psm: 7)
|
44
44
|
expect { mix_block.to_s_without_spaces }.to raise_error(RTesseract::ImageNotSelectedError)
|
data/spec/rtesseract_spec.rb
CHANGED
@@ -35,7 +35,7 @@ describe 'Rtesseract' do
|
|
35
35
|
end
|
36
36
|
|
37
37
|
it ' should not error with depth > 32' do
|
38
|
-
#expect(RTesseract.new(@path.join('images', 'README.pdf').to_s, debug: true).to_s_without_spaces).to eql('')
|
38
|
+
# expect(RTesseract.new(@path.join('images', 'README.pdf').to_s, debug: true).to_s_without_spaces).to eql('')
|
39
39
|
end
|
40
40
|
|
41
41
|
it ' support different processors' do
|
@@ -48,15 +48,10 @@ describe 'Rtesseract' do
|
|
48
48
|
expect(RTesseract.new(@image_tif, processor: 'mini_magick').to_s_without_spaces).to eql('43XF')
|
49
49
|
expect(RTesseract.new(@path.join('images', 'test.png').to_s, processor: 'mini_magick').to_s_without_spaces).to eql('HW9W')
|
50
50
|
|
51
|
-
# QuickMagick
|
52
|
-
expect(RTesseract.new(@image_tif, processor: 'quick_magick').to_s_without_spaces).to eql('43XF')
|
53
|
-
expect(RTesseract.new(@path.join('images', 'test.png').to_s, processor: 'quick_magick').to_s_without_spaces).to eql('HW9W')
|
54
|
-
|
55
51
|
# NoneMagick
|
56
52
|
expect(RTesseract.new(@image_tif, processor: 'none').to_s_without_spaces).to eql('43XF')
|
57
53
|
end
|
58
54
|
|
59
|
-
|
60
55
|
it ' change the image' do
|
61
56
|
image = RTesseract.new(@image_tif)
|
62
57
|
expect(image.to_s_without_spaces).to eql('43XF')
|
@@ -106,10 +101,10 @@ describe 'Rtesseract' do
|
|
106
101
|
end
|
107
102
|
|
108
103
|
it ' crop image' do
|
109
|
-
expect(RTesseract.new(@image_tif, psm: 7).crop!(
|
110
|
-
expect(RTesseract.new(@image_tif, psm: 7).crop!(
|
111
|
-
expect(RTesseract.new(@image_tif, psm: 7).crop!(
|
112
|
-
expect(RTesseract.new(@image_tif, psm: 7).crop!(
|
104
|
+
expect(RTesseract.new(@image_tif, psm: 7).crop!(w: 36, h: 40, x: 140, y: 10).to_s_without_spaces).to eql('4')
|
105
|
+
expect(RTesseract.new(@image_tif, psm: 7).crop!(w: 36, h: 40, x: 180, y: 10).to_s_without_spaces).to eql('3')
|
106
|
+
expect(RTesseract.new(@image_tif, psm: 7).crop!(w: 20, h: 40, x: 216, y: 10).to_s_without_spaces).to eql('X')
|
107
|
+
expect(RTesseract.new(@image_tif, psm: 7).crop!(w: 30, h: 40, x: 240, y: 10).to_s_without_spaces).to eql('F')
|
113
108
|
end
|
114
109
|
|
115
110
|
it ' read image from blob' do
|
@@ -132,9 +127,6 @@ describe 'Rtesseract' do
|
|
132
127
|
expect(MiniMagickProcessor.a_name?('teste')).to eql(false)
|
133
128
|
expect(MiniMagickProcessor.a_name?('mini_magick')).to eql(true)
|
134
129
|
expect(MiniMagickProcessor.a_name?('MiniMagickProcessor')).to eql(true)
|
135
|
-
expect(QuickMagickProcessor.a_name?('teste')).to eql(false)
|
136
|
-
expect(QuickMagickProcessor.a_name?('quick_magick')).to eql(true)
|
137
|
-
expect(QuickMagickProcessor.a_name?('QuickMagickProcessor')).to eql(true)
|
138
130
|
expect(NoneProcessor.a_name?('none')).to eql(true)
|
139
131
|
expect(NoneProcessor.a_name?('NoneProcessor')).to eql(true)
|
140
132
|
end
|
@@ -144,29 +136,29 @@ describe 'Rtesseract' do
|
|
144
136
|
expect(test.class).to eql(RTesseract)
|
145
137
|
|
146
138
|
test = RTesseract.new(@image_tif)
|
147
|
-
test.read do |
|
148
|
-
|
139
|
+
test.read do |_image|
|
140
|
+
_image = _image.quantize(256, Magick::GRAYColorspace)
|
149
141
|
end
|
150
142
|
expect(test.to_s_without_spaces).to eql('43XF')
|
151
143
|
|
152
144
|
test = RTesseract.new(@path.join('images', 'blank.tif').to_s)
|
153
|
-
test.read do |
|
154
|
-
|
145
|
+
test.read do |_image|
|
146
|
+
_image
|
155
147
|
end
|
156
148
|
expect(test.to_s_without_spaces).to eql('')
|
157
149
|
|
158
|
-
test = RTesseract.read(@path.join('images', 'test.png').to_s) do |
|
159
|
-
|
150
|
+
test = RTesseract.read(@path.join('images', 'test.png').to_s) do |_image|
|
151
|
+
_image.rotate(90)
|
160
152
|
end
|
161
153
|
expect(test.to_s_without_spaces).to eql('HW9W')
|
162
154
|
|
163
|
-
test = RTesseract.read(@path.join('images', 'test.jpg').to_s, lang: 'en') do |
|
164
|
-
|
155
|
+
test = RTesseract.read(@path.join('images', 'test.jpg').to_s, lang: 'en') do |_image|
|
156
|
+
_image = _image.white_threshold(245).quantize(256, Magick::GRAYColorspace)
|
165
157
|
end
|
166
158
|
expect(test.to_s_without_spaces).to eql('3R8F')
|
167
159
|
|
168
|
-
test = RTesseract.read(@path.join('images', 'test.jpg').to_s, lang: 'en', processor: 'mini_magick') do |
|
169
|
-
|
160
|
+
test = RTesseract.read(@path.join('images', 'test.jpg').to_s, lang: 'en', processor: 'mini_magick') do |_image|
|
161
|
+
_image.gravity 'south'
|
170
162
|
end
|
171
163
|
expect(test.to_s_without_spaces).to eql('3R8F')
|
172
164
|
end
|
@@ -188,30 +180,45 @@ describe 'Rtesseract' do
|
|
188
180
|
|
189
181
|
it ' support default config processors' do
|
190
182
|
# Rmagick
|
191
|
-
RTesseract.configure {|config| config.processor = 'rmagick' }
|
183
|
+
RTesseract.configure { |config| config.processor = 'rmagick' }
|
192
184
|
expect(RTesseract.new(@image_tif).processor.a_name?('rmagick')).to eql(true)
|
193
185
|
|
194
186
|
# MiniMagick
|
195
|
-
RTesseract.configure {|config| config.processor = 'mini_magick' }
|
187
|
+
RTesseract.configure { |config| config.processor = 'mini_magick' }
|
196
188
|
expect(RTesseract.new(@image_tif).processor.a_name?('mini_magick')).to eql(true)
|
197
189
|
|
198
|
-
# QuickMagick
|
199
|
-
RTesseract.configure {|config| config.processor = 'quick_magick' }
|
200
|
-
expect(RTesseract.new(@image_tif).processor.a_name?('quick_magick')).to eql(true)
|
201
|
-
|
202
190
|
# NoneMagick
|
203
|
-
RTesseract.configure {|config| config.processor = 'none' }
|
191
|
+
RTesseract.configure { |config| config.processor = 'none' }
|
204
192
|
expect(RTesseract.new(@image_tif).processor.a_name?('none')).to eql(true)
|
205
193
|
|
206
194
|
# overwrite default
|
207
|
-
RTesseract.configure {|config| config.processor = '
|
208
|
-
expect(RTesseract.new(@image_tif, processor: '
|
195
|
+
RTesseract.configure { |config| config.processor = 'rmagick' }
|
196
|
+
expect(RTesseract.new(@image_tif, processor: 'mini_magick').processor.a_name?('mini_magick')).to eql(true)
|
209
197
|
|
210
|
-
RTesseract.configure {|config| config.lang = 'portuguese' }
|
198
|
+
RTesseract.configure { |config| config.lang = 'portuguese' }
|
211
199
|
expect(RTesseract.new(@image_tif).lang).to eql(' -l por ')
|
212
200
|
|
213
|
-
RTesseract.configure {|config| config.psm = 7 }
|
201
|
+
RTesseract.configure { |config| config.psm = 7 }
|
214
202
|
expect(RTesseract.new(@image_tif).psm).to eql(' -psm 7 ')
|
203
|
+
|
204
|
+
|
205
|
+
RTesseract.configure { |config| config.tessdata_dir = '/tmp/test' }
|
206
|
+
expect(RTesseract.new(@image_tif).tessdata_dir).to eql(' --tessdata-dir /tmp/test ')
|
207
|
+
|
208
|
+
RTesseract.configure { |config| config.user_words = '/tmp/test' }
|
209
|
+
expect(RTesseract.new(@image_tif).user_words).to eql(' --user-words /tmp/test ')
|
210
|
+
|
211
|
+
RTesseract.configure { |config| config.user_patterns = '/tmp/test' }
|
212
|
+
expect(RTesseract.new(@image_tif).user_patterns).to eql(' --user-patterns /tmp/test ')
|
215
213
|
end
|
216
214
|
|
215
|
+
it ' support new configs' do
|
216
|
+
expect(RTesseract.new(@image_tif, tessdata_dir: '/tmp/test').tessdata_dir).to eql(' --tessdata-dir /tmp/test ')
|
217
|
+
expect(RTesseract.new(@image_tif, user_words: '/tmp/test').user_words).to eql(' --user-words /tmp/test ')
|
218
|
+
expect(RTesseract.new(@image_tif, user_patterns: '/tmp/test').user_patterns).to eql(' --user-patterns /tmp/test ')
|
219
|
+
|
220
|
+
expect(RTesseract.new(@image_tif, tessdata_dir: MakeStringError.new).tessdata_dir).to eql('')
|
221
|
+
expect(RTesseract.new(@image_tif, user_words: MakeStringError.new).user_words).to eql('')
|
222
|
+
expect(RTesseract.new(@image_tif, user_patterns: MakeStringError.new).user_patterns).to eql('')
|
223
|
+
end
|
217
224
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rtesseract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danilo Jeremias da Silva
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-04-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -72,14 +72,14 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 2.
|
75
|
+
version: 2.1.1
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 2.
|
82
|
+
version: 2.1.1
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: simplecov
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -127,11 +127,11 @@ files:
|
|
127
127
|
- VERSION
|
128
128
|
- lib/processors/mini_magick.rb
|
129
129
|
- lib/processors/none.rb
|
130
|
-
- lib/processors/quick_magick.rb
|
131
130
|
- lib/processors/rmagick.rb
|
132
131
|
- lib/rtesseract.rb
|
133
132
|
- lib/rtesseract/box.rb
|
134
133
|
- lib/rtesseract/box_char.rb
|
134
|
+
- lib/rtesseract/configuration.rb
|
135
135
|
- lib/rtesseract/errors.rb
|
136
136
|
- lib/rtesseract/mixed.rb
|
137
137
|
- lib/utils.rb
|
@@ -1,34 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
# Add to rtesseract a image manipulation with QuickMagick
|
3
|
-
module QuickMagickProcessor
|
4
|
-
def self.setup
|
5
|
-
require 'quick_magick'
|
6
|
-
end
|
7
|
-
|
8
|
-
def self.a_name?(name)
|
9
|
-
%w(quick_magick QuickMagickProcessor).include?(name.to_s)
|
10
|
-
end
|
11
|
-
|
12
|
-
def self.image_to_tif(source, x = nil, y = nil, w = nil, h = nil)
|
13
|
-
tmp_file = Tempfile.new(['', '.tif'])
|
14
|
-
cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
|
15
|
-
cat.compress = 'None'
|
16
|
-
cat.format = 'tif'
|
17
|
-
cat.alpha = 'off'
|
18
|
-
cat.crop("#{w}x#{h}+#{x}+#{y}") if need_crop?(x, y, w, h)
|
19
|
-
cat.write tmp_file.path.to_s
|
20
|
-
tmp_file
|
21
|
-
end
|
22
|
-
|
23
|
-
def self.need_crop?(x = nil, y = nil, w = nil, h = nil)
|
24
|
-
x.to_f + y.to_f + w.to_f + h.to_f > 0
|
25
|
-
end
|
26
|
-
|
27
|
-
def self.read_with_processor(path)
|
28
|
-
QuickMagick::Image.read(path.to_s).first
|
29
|
-
end
|
30
|
-
|
31
|
-
def self.image?(object)
|
32
|
-
object.class == QuickMagick::Image
|
33
|
-
end
|
34
|
-
end
|