rtesseract 1.3.3 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -0
- data/Gemfile +1 -2
- data/Gemfile.lock +14 -15
- data/README.rdoc +5 -6
- data/VERSION +1 -1
- data/lib/processors/mini_magick.rb +2 -2
- data/lib/processors/none.rb +1 -1
- data/lib/processors/rmagick.rb +4 -4
- data/lib/rtesseract/box.rb +1 -1
- data/lib/rtesseract/configuration.rb +57 -0
- data/lib/rtesseract/mixed.rb +3 -3
- data/lib/rtesseract.rb +39 -68
- data/rtesseract.gemspec +7 -7
- data/spec/rtesseract_box_char_spec.rb +1 -2
- data/spec/rtesseract_box_spec.rb +1 -2
- data/spec/rtesseract_mixed_spec.rb +9 -9
- data/spec/rtesseract_spec.rb +41 -34
- metadata +5 -5
- data/lib/processors/quick_magick.rb +0 -34
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 695108fd1fe3b6bb921444dc8daadb248466dd13
|
4
|
+
data.tar.gz: d2e8b38f6a54c7ffd004863de72a9d880be9bdbc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c28af3ffb9d288fb580d22f68f99d3e159919284735ff6cf84a91ba8da636d4c771568019c0db4ae968bbad030cfc8e187100ca075b7904fb05eda5658ca8c0d
|
7
|
+
data.tar.gz: 3fdb3195471c7b0a3674c000d6ebafe4a7474cb6336911ff93e898233f872bb220b56b309be2e1148a19eb2abd1a764fa60ef09bfcfea66a103bdb35f836d8bd
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
@@ -7,7 +7,7 @@ group :development do
|
|
7
7
|
gem 'rspec'
|
8
8
|
gem 'rdoc'
|
9
9
|
gem 'bundler'
|
10
|
-
gem 'jeweler', '~> 2.
|
10
|
+
gem 'jeweler', '~> 2.1.1'
|
11
11
|
gem 'simplecov'
|
12
12
|
gem 'coveralls', require: false
|
13
13
|
end
|
@@ -15,5 +15,4 @@ end
|
|
15
15
|
group :test do
|
16
16
|
gem 'rmagick'
|
17
17
|
gem 'mini_magick'
|
18
|
-
gem 'quick_magick'
|
19
18
|
end
|
data/Gemfile.lock
CHANGED
@@ -18,20 +18,19 @@ GEM
|
|
18
18
|
unf (>= 0.0.5, < 1.0.0)
|
19
19
|
faraday (0.9.2)
|
20
20
|
multipart-post (>= 1.2, < 3)
|
21
|
-
git (1.
|
22
|
-
github_api (0.13.
|
23
|
-
addressable (~> 2.
|
21
|
+
git (1.3.0)
|
22
|
+
github_api (0.13.1)
|
23
|
+
addressable (~> 2.4.0)
|
24
24
|
descendants_tracker (~> 0.0.4)
|
25
25
|
faraday (~> 0.8, < 0.10)
|
26
26
|
hashie (>= 3.4)
|
27
27
|
multi_json (>= 1.7.5, < 2.0)
|
28
|
-
nokogiri (~> 1.6.6)
|
29
28
|
oauth2
|
30
29
|
hashie (3.4.3)
|
31
30
|
highline (1.7.8)
|
32
31
|
http-cookie (1.0.2)
|
33
32
|
domain_name (~> 0.5)
|
34
|
-
jeweler (2.
|
33
|
+
jeweler (2.1.1)
|
35
34
|
builder
|
36
35
|
bundler (>= 1.0)
|
37
36
|
git (>= 1.2.5)
|
@@ -40,8 +39,9 @@ GEM
|
|
40
39
|
nokogiri (>= 1.5.10)
|
41
40
|
rake
|
42
41
|
rdoc
|
42
|
+
semver
|
43
43
|
json (1.8.3)
|
44
|
-
jwt (1.5.
|
44
|
+
jwt (1.5.1)
|
45
45
|
mime-types (2.99)
|
46
46
|
mini_magick (4.3.6)
|
47
47
|
mini_portile2 (2.0.0)
|
@@ -49,18 +49,17 @@ GEM
|
|
49
49
|
multi_xml (0.5.5)
|
50
50
|
multipart-post (2.0.0)
|
51
51
|
netrc (0.11.0)
|
52
|
-
nokogiri (1.6.7)
|
52
|
+
nokogiri (1.6.7.2)
|
53
53
|
mini_portile2 (~> 2.0.0.rc2)
|
54
|
-
oauth2 (1.
|
54
|
+
oauth2 (1.1.0)
|
55
55
|
faraday (>= 0.8, < 0.10)
|
56
|
-
jwt (~> 1.0)
|
56
|
+
jwt (~> 1.0, < 1.5.2)
|
57
57
|
multi_json (~> 1.3)
|
58
58
|
multi_xml (~> 0.5)
|
59
|
-
rack (
|
60
|
-
quick_magick (0.8.0)
|
59
|
+
rack (>= 1.2, < 3)
|
61
60
|
rack (1.6.4)
|
62
|
-
rake (
|
63
|
-
rdoc (4.2.
|
61
|
+
rake (11.1.2)
|
62
|
+
rdoc (4.2.2)
|
64
63
|
json (~> 1.4)
|
65
64
|
rest-client (1.8.0)
|
66
65
|
http-cookie (>= 1.0.2, < 2.0)
|
@@ -80,6 +79,7 @@ GEM
|
|
80
79
|
diff-lcs (>= 1.2.0, < 2.0)
|
81
80
|
rspec-support (~> 3.4.0)
|
82
81
|
rspec-support (3.4.1)
|
82
|
+
semver (1.0.1)
|
83
83
|
simplecov (0.11.1)
|
84
84
|
docile (~> 1.1.0)
|
85
85
|
json (~> 1.8)
|
@@ -100,10 +100,9 @@ PLATFORMS
|
|
100
100
|
DEPENDENCIES
|
101
101
|
bundler
|
102
102
|
coveralls
|
103
|
-
jeweler (~> 2.
|
103
|
+
jeweler (~> 2.1.1)
|
104
104
|
mini_magick
|
105
105
|
nokogiri
|
106
|
-
quick_magick
|
107
106
|
rdoc
|
108
107
|
rmagick
|
109
108
|
rspec
|
data/README.rdoc
CHANGED
@@ -12,7 +12,7 @@ Ruby library for working with the Tesseract OCR.
|
|
12
12
|
To work properly rtesseract are needed:
|
13
13
|
* Tesseract - Program
|
14
14
|
* ImageMagick - Program
|
15
|
-
* RMagick or mini_magick
|
15
|
+
* RMagick or mini_magick - Gem
|
16
16
|
|
17
17
|
Atention: Version 1.0.0 works fine with Ruby 2.0 and tesseract 3.0 and lower versions of rtesseract works fine with Ruby 1.8 and tesseract 2.0.4.
|
18
18
|
|
@@ -52,10 +52,10 @@ It's very simple to use rtesseract:
|
|
52
52
|
OR
|
53
53
|
|
54
54
|
mix_block = RTesseract::Mixed.new("test.jpg",{:areas => [
|
55
|
-
{:x => 28, :y=>19, :
|
56
|
-
{:x => 180, :y=>22, :
|
57
|
-
{:x => 218, :y=>22, :
|
58
|
-
{:x => 248, :y=>24, :
|
55
|
+
{:x => 28, :y=>19, :w=>25, :h=>25 },
|
56
|
+
{:x => 180, :y=>22, :w=>20, :h=>28},
|
57
|
+
{:x => 218, :y=>22, :w=>24, :h=>28},
|
58
|
+
{:x => 248, :y=>24, :w=>22, :h=>22}
|
59
59
|
]})
|
60
60
|
mix_block.to_s
|
61
61
|
|
@@ -64,7 +64,6 @@ It's very simple to use rtesseract:
|
|
64
64
|
Processors Options (_Rmagick_ is default)
|
65
65
|
|
66
66
|
RTesseract.new("test.jpg", :processor => "mini_magick")
|
67
|
-
RTesseract.new("test.jpg", :processor => "quick_magick")
|
68
67
|
|
69
68
|
Note: For non process the image use NoneProcessor
|
70
69
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
2.0.0
|
@@ -9,14 +9,14 @@ module MiniMagickProcessor
|
|
9
9
|
%w(mini_magick MiniMagickProcessor).include?(name.to_s)
|
10
10
|
end
|
11
11
|
|
12
|
-
def self.image_to_tif(source,
|
12
|
+
def self.image_to_tif(source, _points = {})
|
13
13
|
tmp_file = Tempfile.new(['', '.tif'])
|
14
14
|
cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
|
15
15
|
cat.format('tif') do |c|
|
16
16
|
c.compress 'None'
|
17
17
|
c.alpha 'off'
|
18
18
|
end
|
19
|
-
cat.crop("#{w}x#{h}+#{x}+#{y}")
|
19
|
+
cat.crop("#{_points[:w]}x#{_points[:h]}+#{_points[:x]}+#{_points[:y]}") if _points.is_a?(Hash) && _points.values.compact != []
|
20
20
|
cat.alpha 'off'
|
21
21
|
cat.write tmp_file.path.to_s
|
22
22
|
tmp_file
|
data/lib/processors/none.rb
CHANGED
@@ -8,7 +8,7 @@ module NoneProcessor
|
|
8
8
|
%w(none NoneProcessor).include?(name.to_s)
|
9
9
|
end
|
10
10
|
|
11
|
-
def self.image_to_tif(source,
|
11
|
+
def self.image_to_tif(source, _points = {})
|
12
12
|
tmp_file = Tempfile.new(['', '.tif'])
|
13
13
|
tmp_file.write(read_with_processor(source))
|
14
14
|
tmp_file
|
data/lib/processors/rmagick.rb
CHANGED
@@ -13,15 +13,15 @@ module RMagickProcessor
|
|
13
13
|
%w(rmagick RMagickProcessor).include?(name.to_s)
|
14
14
|
end
|
15
15
|
|
16
|
-
def self.image_to_tif(source,
|
16
|
+
def self.image_to_tif(source, _points = {})
|
17
17
|
tmp_file = Tempfile.new(['', '.tif'])
|
18
18
|
cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
|
19
|
-
cat.crop!(x, y, w, h)
|
19
|
+
cat.crop!(_points[:x], _points[:y], _points[:w], _points[:h]) if _points.is_a?(Hash) && _points.values.compact != []
|
20
20
|
cat.alpha Magick::DeactivateAlphaChannel
|
21
|
-
cat.write(tmp_file.path.to_s)
|
21
|
+
cat.write(tmp_file.path.to_s) do
|
22
22
|
# self.depth = 16
|
23
23
|
self.compression = Magick::NoCompression
|
24
|
-
|
24
|
+
end
|
25
25
|
tmp_file
|
26
26
|
end
|
27
27
|
|
data/lib/rtesseract/box.rb
CHANGED
@@ -0,0 +1,57 @@
|
|
1
|
+
# Configuration
|
2
|
+
class RTesseract
|
3
|
+
# Aliases to languages names
|
4
|
+
LANGUAGES = {
|
5
|
+
'eng' => %w(en en-us english),
|
6
|
+
'ita' => %w(it),
|
7
|
+
'por' => %w(pt pt-br portuguese),
|
8
|
+
'spa' => %w(sp)
|
9
|
+
}
|
10
|
+
|
11
|
+
# Configuration class
|
12
|
+
class Configuration
|
13
|
+
attr_accessor :processor, :lang, :psm, :tessdata_dir, :user_words, :user_patterns, :command, :debug, :options_cmd
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
@processor = 'rmagick'
|
17
|
+
end
|
18
|
+
|
19
|
+
def parent
|
20
|
+
@parent ||= RTesseract.configuration || RTesseract::Configuration.new
|
21
|
+
end
|
22
|
+
|
23
|
+
def option(options, name, default = nil)
|
24
|
+
self.instance_variable_set("@#{name}", options.option(name, parent.send(name)) || default)
|
25
|
+
end
|
26
|
+
|
27
|
+
def load_options(options, names = [])
|
28
|
+
names.each{ |name| option(options, name, nil) }
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class << self
|
33
|
+
attr_accessor :configuration
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.configure
|
37
|
+
self.configuration ||= Configuration.new
|
38
|
+
yield(configuration)
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.default_command
|
42
|
+
TesseractBin::Executables[:tesseract] || 'tesseract'
|
43
|
+
rescue
|
44
|
+
'tesseract'
|
45
|
+
end
|
46
|
+
|
47
|
+
# Local config to instance
|
48
|
+
def self.local_config(options = {})
|
49
|
+
RTesseract::Configuration.new.tap do |config|
|
50
|
+
config.command = config.option(options, :command, RTesseract.default_command)
|
51
|
+
config.processor = config.option(options, :processor, 'rmagick')
|
52
|
+
config.load_options(options, [ :lang, :psm, :tessdata_dir, :user_words, :user_patterns ])
|
53
|
+
config.debug = config.option(options, :debug, false)
|
54
|
+
config.options_cmd = [options.option(:options, nil)].flatten.compact
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
data/lib/rtesseract/mixed.rb
CHANGED
@@ -12,9 +12,9 @@ class RTesseract
|
|
12
12
|
yield self if block_given?
|
13
13
|
end
|
14
14
|
|
15
|
-
def area(
|
15
|
+
def area(_points)
|
16
16
|
@value = ''
|
17
|
-
@areas << { x: x, y: y, width: width, height: height }
|
17
|
+
@areas << _points # { x: x, y: y, width: width, height: height }
|
18
18
|
end
|
19
19
|
|
20
20
|
def clear_areas
|
@@ -25,7 +25,7 @@ class RTesseract
|
|
25
25
|
def convert
|
26
26
|
@value = []
|
27
27
|
@areas.each_with_object(RTesseract.new(@source.to_s, @options.dup)) do |area, image|
|
28
|
-
image.crop!(area[:x], area[:y], area[:width], area[:height])
|
28
|
+
image.crop!(area) # area[:x], area[:y], area[:width], area[:height])
|
29
29
|
@value << image.to_s
|
30
30
|
end
|
31
31
|
rescue => error
|
data/lib/rtesseract.rb
CHANGED
@@ -3,6 +3,7 @@ require 'pathname'
|
|
3
3
|
require 'tempfile'
|
4
4
|
require 'utils'
|
5
5
|
|
6
|
+
require 'rtesseract/configuration'
|
6
7
|
require 'rtesseract/errors'
|
7
8
|
require 'rtesseract/mixed'
|
8
9
|
require 'rtesseract/box'
|
@@ -11,53 +12,19 @@ require 'rtesseract/box_char'
|
|
11
12
|
# Processors
|
12
13
|
require 'processors/rmagick.rb'
|
13
14
|
require 'processors/mini_magick.rb'
|
14
|
-
require 'processors/quick_magick.rb'
|
15
15
|
require 'processors/none.rb'
|
16
16
|
|
17
17
|
# Ruby wrapper for Tesseract OCR
|
18
18
|
class RTesseract
|
19
|
-
attr_accessor :
|
20
|
-
attr_accessor :options
|
21
|
-
attr_accessor :options_cmd
|
22
|
-
attr_writer :lang
|
23
|
-
attr_writer :psm
|
19
|
+
attr_accessor :configuration
|
24
20
|
attr_reader :processor
|
25
21
|
attr_reader :source
|
26
22
|
|
27
|
-
OPTIONS = %w(command lang psm processor debug clear_console_output options)
|
28
|
-
# Aliases to languages names
|
29
|
-
LANGUAGES = {
|
30
|
-
'eng' => %w(en en-us english),
|
31
|
-
'ita' => %w(it),
|
32
|
-
'por' => %w(pt pt-br portuguese),
|
33
|
-
'spa' => %w(sp)
|
34
|
-
}
|
35
|
-
|
36
|
-
class << self
|
37
|
-
attr_accessor :configuration
|
38
|
-
end
|
39
|
-
|
40
|
-
def self.configure
|
41
|
-
self.configuration ||= Configuration.new
|
42
|
-
yield(configuration)
|
43
|
-
end
|
44
|
-
|
45
|
-
class Configuration
|
46
|
-
attr_accessor :processor, :lang, :psm
|
47
|
-
|
48
|
-
def initialize
|
49
|
-
@processor = 'rmagick'
|
50
|
-
end
|
51
|
-
|
52
|
-
def to_hash
|
53
|
-
{processor: @processor, lang: lang, psm: psm}
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
23
|
def initialize(src = '', options = {})
|
58
|
-
|
59
|
-
@
|
60
|
-
@
|
24
|
+
self.configuration = RTesseract.local_config(options)
|
25
|
+
@options = options || {}
|
26
|
+
@value, @points = [nil, {}]
|
27
|
+
@processor = RTesseract.choose_processor!(self.configuration.processor)
|
61
28
|
@source = @processor.image?(src) ? src : Pathname.new(src)
|
62
29
|
initialize_hook
|
63
30
|
end
|
@@ -65,26 +32,6 @@ class RTesseract
|
|
65
32
|
def initialize_hook
|
66
33
|
end
|
67
34
|
|
68
|
-
def command_line_options(options)
|
69
|
-
default_config = RTesseract.configuration ? RTesseract.configuration.to_hash : {}
|
70
|
-
@options = default_config.merge(options)
|
71
|
-
@command = @options.option(:command, default_command)
|
72
|
-
@lang = @options.option(:lang, '')
|
73
|
-
@psm = @options.option(:psm, nil)
|
74
|
-
@processor = @options.option(:processor, 'rmagick')
|
75
|
-
@debug = @options.option(:debug, false)
|
76
|
-
@options_cmd = @options.option(:options, [])
|
77
|
-
@options_cmd = [@options_cmd] unless @options_cmd.is_a?(Array)
|
78
|
-
# Disable clear console if debug mode
|
79
|
-
@clear_console_output = @debug ? false : options.option(:clear_console_output, true)
|
80
|
-
end
|
81
|
-
|
82
|
-
def default_command
|
83
|
-
TesseractBin::Executables[:tesseract] || 'tesseract'
|
84
|
-
rescue
|
85
|
-
'tesseract'
|
86
|
-
end
|
87
|
-
|
88
35
|
def self.read(src = nil, options = {})
|
89
36
|
fail RTesseract::ImageNotSelectedError if src.nil?
|
90
37
|
processor = RTesseract.choose_processor!(options.option(:processor, nil))
|
@@ -107,9 +54,9 @@ class RTesseract
|
|
107
54
|
end
|
108
55
|
|
109
56
|
# Crop image to convert
|
110
|
-
def crop!(
|
57
|
+
def crop!(_points = {})
|
111
58
|
@value = nil
|
112
|
-
@
|
59
|
+
@points = _points
|
113
60
|
self
|
114
61
|
end
|
115
62
|
|
@@ -141,7 +88,7 @@ class RTesseract
|
|
141
88
|
## * vie - Vietnamese
|
142
89
|
## Note: Make sure you have installed the language to tesseract
|
143
90
|
def lang
|
144
|
-
language = "#{
|
91
|
+
language = "#{self.configuration.lang}".strip.downcase
|
145
92
|
LANGUAGES.each do |value, names|
|
146
93
|
return " -l #{value} " if names.include? language
|
147
94
|
end
|
@@ -153,11 +100,37 @@ class RTesseract
|
|
153
100
|
|
154
101
|
# Page Segment Mode
|
155
102
|
def psm
|
156
|
-
(
|
103
|
+
(self.configuration.psm.nil? ? '' : " -psm #{self.configuration.psm} ")
|
157
104
|
rescue
|
158
105
|
''
|
159
106
|
end
|
160
107
|
|
108
|
+
# Tessdata Dir
|
109
|
+
def tessdata_dir
|
110
|
+
(self.configuration.tessdata_dir.nil? ? '' : " --tessdata-dir #{self.configuration.tessdata_dir} ")
|
111
|
+
rescue
|
112
|
+
''
|
113
|
+
end
|
114
|
+
|
115
|
+
# User Words
|
116
|
+
def user_words
|
117
|
+
(self.configuration.user_words.nil? ? '' : " --user-words #{self.configuration.user_words} ")
|
118
|
+
rescue
|
119
|
+
''
|
120
|
+
end
|
121
|
+
|
122
|
+
# User Patterns
|
123
|
+
def user_patterns
|
124
|
+
(self.configuration.user_patterns.nil? ? '' : " --user-patterns #{self.configuration.user_patterns} ")
|
125
|
+
rescue
|
126
|
+
''
|
127
|
+
end
|
128
|
+
|
129
|
+
# Options on line
|
130
|
+
def options_cmd
|
131
|
+
self.configuration.options_cmd
|
132
|
+
end
|
133
|
+
|
161
134
|
def config_hook
|
162
135
|
end
|
163
136
|
|
@@ -178,12 +151,12 @@ class RTesseract
|
|
178
151
|
|
179
152
|
# TODO: Clear console for MacOS or Windows
|
180
153
|
def clear_console_output
|
181
|
-
return ''
|
154
|
+
return '' if self.configuration.debug
|
182
155
|
return '2>/dev/null' if File.exist?('/dev/null') # Linux console clear
|
183
156
|
end
|
184
157
|
|
185
158
|
def image
|
186
|
-
(@image = @processor.image_to_tif(@source, @
|
159
|
+
(@image = @processor.image_to_tif(@source, @points)).path
|
187
160
|
end
|
188
161
|
|
189
162
|
def file_ext
|
@@ -199,7 +172,7 @@ class RTesseract
|
|
199
172
|
end
|
200
173
|
|
201
174
|
def convert_command
|
202
|
-
`#{
|
175
|
+
`#{self.configuration.command} "#{image}" "#{text_file}" #{lang} #{psm} #{tessdata_dir} #{user_words} #{user_patterns} #{config_file} #{clear_console_output} #{self.configuration.options_cmd.join(' ')}`
|
203
176
|
end
|
204
177
|
|
205
178
|
def convert_text
|
@@ -254,8 +227,6 @@ class RTesseract
|
|
254
227
|
processor =
|
255
228
|
if MiniMagickProcessor.a_name?(processor.to_s)
|
256
229
|
MiniMagickProcessor
|
257
|
-
elsif QuickMagickProcessor.a_name?(processor.to_s)
|
258
|
-
QuickMagickProcessor
|
259
230
|
elsif NoneProcessor.a_name?(processor.to_s)
|
260
231
|
NoneProcessor
|
261
232
|
else
|
data/rtesseract.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: rtesseract
|
5
|
+
# stub: rtesseract 2.0.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "rtesseract"
|
9
|
-
s.version = "
|
9
|
+
s.version = "2.0.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Danilo Jeremias da Silva"]
|
14
|
-
s.date = "2016-
|
14
|
+
s.date = "2016-04-19"
|
15
15
|
s.description = "Ruby library for working with the Tesseract OCR."
|
16
16
|
s.email = "dannnylo@gmail.com"
|
17
17
|
s.extra_rdoc_files = [
|
@@ -30,11 +30,11 @@ Gem::Specification.new do |s|
|
|
30
30
|
"VERSION",
|
31
31
|
"lib/processors/mini_magick.rb",
|
32
32
|
"lib/processors/none.rb",
|
33
|
-
"lib/processors/quick_magick.rb",
|
34
33
|
"lib/processors/rmagick.rb",
|
35
34
|
"lib/rtesseract.rb",
|
36
35
|
"lib/rtesseract/box.rb",
|
37
36
|
"lib/rtesseract/box_char.rb",
|
37
|
+
"lib/rtesseract/configuration.rb",
|
38
38
|
"lib/rtesseract/errors.rb",
|
39
39
|
"lib/rtesseract/mixed.rb",
|
40
40
|
"lib/utils.rb",
|
@@ -69,7 +69,7 @@ Gem::Specification.new do |s|
|
|
69
69
|
s.add_development_dependency(%q<rspec>, [">= 0"])
|
70
70
|
s.add_development_dependency(%q<rdoc>, [">= 0"])
|
71
71
|
s.add_development_dependency(%q<bundler>, [">= 0"])
|
72
|
-
s.add_development_dependency(%q<jeweler>, ["~> 2.
|
72
|
+
s.add_development_dependency(%q<jeweler>, ["~> 2.1.1"])
|
73
73
|
s.add_development_dependency(%q<simplecov>, [">= 0"])
|
74
74
|
s.add_development_dependency(%q<coveralls>, [">= 0"])
|
75
75
|
else
|
@@ -77,7 +77,7 @@ Gem::Specification.new do |s|
|
|
77
77
|
s.add_dependency(%q<rspec>, [">= 0"])
|
78
78
|
s.add_dependency(%q<rdoc>, [">= 0"])
|
79
79
|
s.add_dependency(%q<bundler>, [">= 0"])
|
80
|
-
s.add_dependency(%q<jeweler>, ["~> 2.
|
80
|
+
s.add_dependency(%q<jeweler>, ["~> 2.1.1"])
|
81
81
|
s.add_dependency(%q<simplecov>, [">= 0"])
|
82
82
|
s.add_dependency(%q<coveralls>, [">= 0"])
|
83
83
|
end
|
@@ -86,7 +86,7 @@ Gem::Specification.new do |s|
|
|
86
86
|
s.add_dependency(%q<rspec>, [">= 0"])
|
87
87
|
s.add_dependency(%q<rdoc>, [">= 0"])
|
88
88
|
s.add_dependency(%q<bundler>, [">= 0"])
|
89
|
-
s.add_dependency(%q<jeweler>, ["~> 2.
|
89
|
+
s.add_dependency(%q<jeweler>, ["~> 2.1.1"])
|
90
90
|
s.add_dependency(%q<simplecov>, [">= 0"])
|
91
91
|
s.add_dependency(%q<coveralls>, [">= 0"])
|
92
92
|
end
|
@@ -76,7 +76,6 @@ describe 'Rtesseract::BoxChar' do
|
|
76
76
|
|
77
77
|
expect { RTesseract::BoxChar.new(@image_tiff, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError)
|
78
78
|
expect { RTesseract::BoxChar.new(@image_tiff + '_not_exist').to_s }.to raise_error(RTesseract::ImageNotSelectedError)
|
79
|
-
|
80
|
-
#expect(RTesseract::BoxChar.new(@path.join('images', 'blank.tif').to_s, options: :digits).characters).to eql([])
|
79
|
+
# expect(RTesseract::BoxChar.new(@path.join('images', 'blank.tif').to_s, options: :digits).characters).to eql([])
|
81
80
|
end
|
82
81
|
end
|
data/spec/rtesseract_box_spec.rb
CHANGED
@@ -31,7 +31,6 @@ describe 'Rtesseract::Box' do
|
|
31
31
|
expect(RTesseract::Box.new(@words_image).to_s).to eql('If you are a friend, you speak the password, and the doors will open.')
|
32
32
|
expect { RTesseract::Box.new(@image_tiff, command: 'tesseract_error').to_s }.to raise_error(RTesseract::ConversionError)
|
33
33
|
expect { RTesseract::Box.new(@image_tiff + '_not_exist').to_s }.to raise_error(RTesseract::ImageNotSelectedError)
|
34
|
-
|
35
|
-
#expect(RTesseract::Box.new(@path.join('images', 'blank.tif').to_s, options: :digits).words).to eql([])
|
34
|
+
# expect(RTesseract::Box.new(@path.join('images', 'blank.tif').to_s, options: :digits).words).to eql([])
|
36
35
|
end
|
37
36
|
end
|
@@ -15,20 +15,20 @@ describe 'Rtesseract::Mixed' do
|
|
15
15
|
|
16
16
|
it 'should translate parts of the image to text' do
|
17
17
|
mix_block = RTesseract::Mixed.new(@image_tif, psm: 7) do |image|
|
18
|
-
image.area(28,
|
19
|
-
image.area(180, 22, 20, 28) # position of 3
|
20
|
-
image.area(218, 22, 24, 28) # position of F
|
21
|
-
image.area(248, 24, 22, 22) # position of F
|
18
|
+
image.area(x: 28, y: 19, w: 25, h: 25) # position of 4
|
19
|
+
image.area(x: 180, y: 22, w: 20, h: 28) # position of 3
|
20
|
+
image.area(x: 218, y: 22, w: 24, h: 28) # position of F
|
21
|
+
image.area(x: 248, y: 24, w: 22, h: 22) # position of F
|
22
22
|
end
|
23
23
|
expect(mix_block.to_s_without_spaces).to eql('43FF')
|
24
24
|
mix_block.clear_areas
|
25
25
|
expect(mix_block.areas).to eql([])
|
26
26
|
|
27
27
|
@areas = []
|
28
|
-
@areas << { x: 28, y: 19,
|
29
|
-
@areas << { x: 180, y: 22,
|
30
|
-
@areas << { x: 218, y: 22,
|
31
|
-
@areas << { x: 248, y: 24,
|
28
|
+
@areas << { x: 28, y: 19, w: 25, h: 25 } # position of 4
|
29
|
+
@areas << { x: 180, y: 22, w: 20, h: 28 } # position of 3
|
30
|
+
@areas << { x: 218, y: 22, w: 24, h: 28 } # position of f
|
31
|
+
@areas << { x: 248, y: 24, w: 22, h: 22 } # position of f
|
32
32
|
|
33
33
|
mix_block = RTesseract::Mixed.new(@image_tif, areas: @areas, psm: 7)
|
34
34
|
expect(mix_block.to_s_without_spaces).to eql('43FF')
|
@@ -38,7 +38,7 @@ describe 'Rtesseract::Mixed' do
|
|
38
38
|
end
|
39
39
|
|
40
40
|
it ' get a error' do
|
41
|
-
@areas = [{ x: 28, y: 19,
|
41
|
+
@areas = [{ x: 28, y: 19, w: 25, h: 25 }]
|
42
42
|
|
43
43
|
mix_block = RTesseract::Mixed.new(@path.join('images', 'test_not_exists.png').to_s, areas: @areas, psm: 7)
|
44
44
|
expect { mix_block.to_s_without_spaces }.to raise_error(RTesseract::ImageNotSelectedError)
|
data/spec/rtesseract_spec.rb
CHANGED
@@ -35,7 +35,7 @@ describe 'Rtesseract' do
|
|
35
35
|
end
|
36
36
|
|
37
37
|
it ' should not error with depth > 32' do
|
38
|
-
#expect(RTesseract.new(@path.join('images', 'README.pdf').to_s, debug: true).to_s_without_spaces).to eql('')
|
38
|
+
# expect(RTesseract.new(@path.join('images', 'README.pdf').to_s, debug: true).to_s_without_spaces).to eql('')
|
39
39
|
end
|
40
40
|
|
41
41
|
it ' support different processors' do
|
@@ -48,15 +48,10 @@ describe 'Rtesseract' do
|
|
48
48
|
expect(RTesseract.new(@image_tif, processor: 'mini_magick').to_s_without_spaces).to eql('43XF')
|
49
49
|
expect(RTesseract.new(@path.join('images', 'test.png').to_s, processor: 'mini_magick').to_s_without_spaces).to eql('HW9W')
|
50
50
|
|
51
|
-
# QuickMagick
|
52
|
-
expect(RTesseract.new(@image_tif, processor: 'quick_magick').to_s_without_spaces).to eql('43XF')
|
53
|
-
expect(RTesseract.new(@path.join('images', 'test.png').to_s, processor: 'quick_magick').to_s_without_spaces).to eql('HW9W')
|
54
|
-
|
55
51
|
# NoneMagick
|
56
52
|
expect(RTesseract.new(@image_tif, processor: 'none').to_s_without_spaces).to eql('43XF')
|
57
53
|
end
|
58
54
|
|
59
|
-
|
60
55
|
it ' change the image' do
|
61
56
|
image = RTesseract.new(@image_tif)
|
62
57
|
expect(image.to_s_without_spaces).to eql('43XF')
|
@@ -106,10 +101,10 @@ describe 'Rtesseract' do
|
|
106
101
|
end
|
107
102
|
|
108
103
|
it ' crop image' do
|
109
|
-
expect(RTesseract.new(@image_tif, psm: 7).crop!(
|
110
|
-
expect(RTesseract.new(@image_tif, psm: 7).crop!(
|
111
|
-
expect(RTesseract.new(@image_tif, psm: 7).crop!(
|
112
|
-
expect(RTesseract.new(@image_tif, psm: 7).crop!(
|
104
|
+
expect(RTesseract.new(@image_tif, psm: 7).crop!(w: 36, h: 40, x: 140, y: 10).to_s_without_spaces).to eql('4')
|
105
|
+
expect(RTesseract.new(@image_tif, psm: 7).crop!(w: 36, h: 40, x: 180, y: 10).to_s_without_spaces).to eql('3')
|
106
|
+
expect(RTesseract.new(@image_tif, psm: 7).crop!(w: 20, h: 40, x: 216, y: 10).to_s_without_spaces).to eql('X')
|
107
|
+
expect(RTesseract.new(@image_tif, psm: 7).crop!(w: 30, h: 40, x: 240, y: 10).to_s_without_spaces).to eql('F')
|
113
108
|
end
|
114
109
|
|
115
110
|
it ' read image from blob' do
|
@@ -132,9 +127,6 @@ describe 'Rtesseract' do
|
|
132
127
|
expect(MiniMagickProcessor.a_name?('teste')).to eql(false)
|
133
128
|
expect(MiniMagickProcessor.a_name?('mini_magick')).to eql(true)
|
134
129
|
expect(MiniMagickProcessor.a_name?('MiniMagickProcessor')).to eql(true)
|
135
|
-
expect(QuickMagickProcessor.a_name?('teste')).to eql(false)
|
136
|
-
expect(QuickMagickProcessor.a_name?('quick_magick')).to eql(true)
|
137
|
-
expect(QuickMagickProcessor.a_name?('QuickMagickProcessor')).to eql(true)
|
138
130
|
expect(NoneProcessor.a_name?('none')).to eql(true)
|
139
131
|
expect(NoneProcessor.a_name?('NoneProcessor')).to eql(true)
|
140
132
|
end
|
@@ -144,29 +136,29 @@ describe 'Rtesseract' do
|
|
144
136
|
expect(test.class).to eql(RTesseract)
|
145
137
|
|
146
138
|
test = RTesseract.new(@image_tif)
|
147
|
-
test.read do |
|
148
|
-
|
139
|
+
test.read do |_image|
|
140
|
+
_image = _image.quantize(256, Magick::GRAYColorspace)
|
149
141
|
end
|
150
142
|
expect(test.to_s_without_spaces).to eql('43XF')
|
151
143
|
|
152
144
|
test = RTesseract.new(@path.join('images', 'blank.tif').to_s)
|
153
|
-
test.read do |
|
154
|
-
|
145
|
+
test.read do |_image|
|
146
|
+
_image
|
155
147
|
end
|
156
148
|
expect(test.to_s_without_spaces).to eql('')
|
157
149
|
|
158
|
-
test = RTesseract.read(@path.join('images', 'test.png').to_s) do |
|
159
|
-
|
150
|
+
test = RTesseract.read(@path.join('images', 'test.png').to_s) do |_image|
|
151
|
+
_image.rotate(90)
|
160
152
|
end
|
161
153
|
expect(test.to_s_without_spaces).to eql('HW9W')
|
162
154
|
|
163
|
-
test = RTesseract.read(@path.join('images', 'test.jpg').to_s, lang: 'en') do |
|
164
|
-
|
155
|
+
test = RTesseract.read(@path.join('images', 'test.jpg').to_s, lang: 'en') do |_image|
|
156
|
+
_image = _image.white_threshold(245).quantize(256, Magick::GRAYColorspace)
|
165
157
|
end
|
166
158
|
expect(test.to_s_without_spaces).to eql('3R8F')
|
167
159
|
|
168
|
-
test = RTesseract.read(@path.join('images', 'test.jpg').to_s, lang: 'en', processor: 'mini_magick') do |
|
169
|
-
|
160
|
+
test = RTesseract.read(@path.join('images', 'test.jpg').to_s, lang: 'en', processor: 'mini_magick') do |_image|
|
161
|
+
_image.gravity 'south'
|
170
162
|
end
|
171
163
|
expect(test.to_s_without_spaces).to eql('3R8F')
|
172
164
|
end
|
@@ -188,30 +180,45 @@ describe 'Rtesseract' do
|
|
188
180
|
|
189
181
|
it ' support default config processors' do
|
190
182
|
# Rmagick
|
191
|
-
RTesseract.configure {|config| config.processor = 'rmagick' }
|
183
|
+
RTesseract.configure { |config| config.processor = 'rmagick' }
|
192
184
|
expect(RTesseract.new(@image_tif).processor.a_name?('rmagick')).to eql(true)
|
193
185
|
|
194
186
|
# MiniMagick
|
195
|
-
RTesseract.configure {|config| config.processor = 'mini_magick' }
|
187
|
+
RTesseract.configure { |config| config.processor = 'mini_magick' }
|
196
188
|
expect(RTesseract.new(@image_tif).processor.a_name?('mini_magick')).to eql(true)
|
197
189
|
|
198
|
-
# QuickMagick
|
199
|
-
RTesseract.configure {|config| config.processor = 'quick_magick' }
|
200
|
-
expect(RTesseract.new(@image_tif).processor.a_name?('quick_magick')).to eql(true)
|
201
|
-
|
202
190
|
# NoneMagick
|
203
|
-
RTesseract.configure {|config| config.processor = 'none' }
|
191
|
+
RTesseract.configure { |config| config.processor = 'none' }
|
204
192
|
expect(RTesseract.new(@image_tif).processor.a_name?('none')).to eql(true)
|
205
193
|
|
206
194
|
# overwrite default
|
207
|
-
RTesseract.configure {|config| config.processor = '
|
208
|
-
expect(RTesseract.new(@image_tif, processor: '
|
195
|
+
RTesseract.configure { |config| config.processor = 'rmagick' }
|
196
|
+
expect(RTesseract.new(@image_tif, processor: 'mini_magick').processor.a_name?('mini_magick')).to eql(true)
|
209
197
|
|
210
|
-
RTesseract.configure {|config| config.lang = 'portuguese' }
|
198
|
+
RTesseract.configure { |config| config.lang = 'portuguese' }
|
211
199
|
expect(RTesseract.new(@image_tif).lang).to eql(' -l por ')
|
212
200
|
|
213
|
-
RTesseract.configure {|config| config.psm = 7 }
|
201
|
+
RTesseract.configure { |config| config.psm = 7 }
|
214
202
|
expect(RTesseract.new(@image_tif).psm).to eql(' -psm 7 ')
|
203
|
+
|
204
|
+
|
205
|
+
RTesseract.configure { |config| config.tessdata_dir = '/tmp/test' }
|
206
|
+
expect(RTesseract.new(@image_tif).tessdata_dir).to eql(' --tessdata-dir /tmp/test ')
|
207
|
+
|
208
|
+
RTesseract.configure { |config| config.user_words = '/tmp/test' }
|
209
|
+
expect(RTesseract.new(@image_tif).user_words).to eql(' --user-words /tmp/test ')
|
210
|
+
|
211
|
+
RTesseract.configure { |config| config.user_patterns = '/tmp/test' }
|
212
|
+
expect(RTesseract.new(@image_tif).user_patterns).to eql(' --user-patterns /tmp/test ')
|
215
213
|
end
|
216
214
|
|
215
|
+
it ' support new configs' do
|
216
|
+
expect(RTesseract.new(@image_tif, tessdata_dir: '/tmp/test').tessdata_dir).to eql(' --tessdata-dir /tmp/test ')
|
217
|
+
expect(RTesseract.new(@image_tif, user_words: '/tmp/test').user_words).to eql(' --user-words /tmp/test ')
|
218
|
+
expect(RTesseract.new(@image_tif, user_patterns: '/tmp/test').user_patterns).to eql(' --user-patterns /tmp/test ')
|
219
|
+
|
220
|
+
expect(RTesseract.new(@image_tif, tessdata_dir: MakeStringError.new).tessdata_dir).to eql('')
|
221
|
+
expect(RTesseract.new(@image_tif, user_words: MakeStringError.new).user_words).to eql('')
|
222
|
+
expect(RTesseract.new(@image_tif, user_patterns: MakeStringError.new).user_patterns).to eql('')
|
223
|
+
end
|
217
224
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rtesseract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danilo Jeremias da Silva
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-04-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -72,14 +72,14 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 2.
|
75
|
+
version: 2.1.1
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 2.
|
82
|
+
version: 2.1.1
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: simplecov
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -127,11 +127,11 @@ files:
|
|
127
127
|
- VERSION
|
128
128
|
- lib/processors/mini_magick.rb
|
129
129
|
- lib/processors/none.rb
|
130
|
-
- lib/processors/quick_magick.rb
|
131
130
|
- lib/processors/rmagick.rb
|
132
131
|
- lib/rtesseract.rb
|
133
132
|
- lib/rtesseract/box.rb
|
134
133
|
- lib/rtesseract/box_char.rb
|
134
|
+
- lib/rtesseract/configuration.rb
|
135
135
|
- lib/rtesseract/errors.rb
|
136
136
|
- lib/rtesseract/mixed.rb
|
137
137
|
- lib/utils.rb
|
@@ -1,34 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
# Add to rtesseract a image manipulation with QuickMagick
|
3
|
-
module QuickMagickProcessor
|
4
|
-
def self.setup
|
5
|
-
require 'quick_magick'
|
6
|
-
end
|
7
|
-
|
8
|
-
def self.a_name?(name)
|
9
|
-
%w(quick_magick QuickMagickProcessor).include?(name.to_s)
|
10
|
-
end
|
11
|
-
|
12
|
-
def self.image_to_tif(source, x = nil, y = nil, w = nil, h = nil)
|
13
|
-
tmp_file = Tempfile.new(['', '.tif'])
|
14
|
-
cat = source.is_a?(Pathname) ? read_with_processor(source.to_s) : source
|
15
|
-
cat.compress = 'None'
|
16
|
-
cat.format = 'tif'
|
17
|
-
cat.alpha = 'off'
|
18
|
-
cat.crop("#{w}x#{h}+#{x}+#{y}") if need_crop?(x, y, w, h)
|
19
|
-
cat.write tmp_file.path.to_s
|
20
|
-
tmp_file
|
21
|
-
end
|
22
|
-
|
23
|
-
def self.need_crop?(x = nil, y = nil, w = nil, h = nil)
|
24
|
-
x.to_f + y.to_f + w.to_f + h.to_f > 0
|
25
|
-
end
|
26
|
-
|
27
|
-
def self.read_with_processor(path)
|
28
|
-
QuickMagick::Image.read(path.to_s).first
|
29
|
-
end
|
30
|
-
|
31
|
-
def self.image?(object)
|
32
|
-
object.class == QuickMagick::Image
|
33
|
-
end
|
34
|
-
end
|