tesseract-ocr 0.0.1.3 → 0.0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/tesseract/api.rb +10 -3
- data/lib/tesseract/engine.rb +17 -9
- data/lib/tesseract/version.rb +1 -1
- data/test/tesseract_spec.rb +39 -0
- metadata +9 -9
data/lib/tesseract/api.rb
CHANGED
@@ -22,7 +22,6 @@
|
|
22
22
|
# or implied, of meh.
|
23
23
|
#++
|
24
24
|
|
25
|
-
require 'namedic'
|
26
25
|
require 'iso-639'
|
27
26
|
require 'tesseract/c'
|
28
27
|
|
@@ -42,6 +41,16 @@ class API
|
|
42
41
|
else
|
43
42
|
raise ArgumentError, 'invalid image'
|
44
43
|
end.tap {|image|
|
44
|
+
class << image
|
45
|
+
def width
|
46
|
+
C::pix_get_width(self)
|
47
|
+
end
|
48
|
+
|
49
|
+
def height
|
50
|
+
C::pix_get_height(self)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
45
54
|
ObjectSpace.define_finalizer image, image_finalizer(image)
|
46
55
|
}
|
47
56
|
end
|
@@ -139,8 +148,6 @@ class API
|
|
139
148
|
C::set_image(to_ffi, pix)
|
140
149
|
end
|
141
150
|
|
142
|
-
namedic :left, :top, :width, :height,
|
143
|
-
:alias => { :l => :left, :t => :top, :w => :width, :h => :height }
|
144
151
|
def set_rectangle (left, top, width, height)
|
145
152
|
C::set_rectangle(to_ffi, left, top, width, height)
|
146
153
|
end
|
data/lib/tesseract/engine.rb
CHANGED
@@ -93,17 +93,17 @@ class Engine
|
|
93
93
|
|
94
94
|
def blacklist (what = nil)
|
95
95
|
if what
|
96
|
-
set
|
96
|
+
set('tessedit_char_blacklist', what.respond_to?(:to_a) ? what.to_a.join : what.to_s)
|
97
97
|
else
|
98
|
-
get
|
98
|
+
get('tessedit_char_blacklist').chars.to_a
|
99
99
|
end
|
100
100
|
end
|
101
101
|
|
102
102
|
def whitelist (what = nil)
|
103
103
|
if what
|
104
|
-
set
|
104
|
+
set('tessedit_char_whitelist', what.respond_to?(:to_a) ? what.to_a.join : what.to_s)
|
105
105
|
else
|
106
|
-
get
|
106
|
+
get('tessedit_char_whitelist').chars.to_a
|
107
107
|
end
|
108
108
|
end
|
109
109
|
|
@@ -132,8 +132,12 @@ class Engine
|
|
132
132
|
|
133
133
|
x ||= 0
|
134
134
|
y ||= 0
|
135
|
-
width ||=
|
136
|
-
height ||=
|
135
|
+
width ||= image.width
|
136
|
+
height ||= image.height
|
137
|
+
|
138
|
+
if (x + width) > image.width || (y + height) > image.height
|
139
|
+
raise IndexError, 'image access out of boundaries'
|
140
|
+
end
|
137
141
|
|
138
142
|
@api.set_image(image)
|
139
143
|
@api.set_rectangle(x, y, width, height)
|
@@ -175,8 +179,12 @@ class Engine
|
|
175
179
|
page ||= 0
|
176
180
|
x ||= 0
|
177
181
|
y ||= 0
|
178
|
-
width ||=
|
179
|
-
height ||=
|
182
|
+
width ||= image.width
|
183
|
+
height ||= image.height
|
184
|
+
|
185
|
+
if (x + width) > image.width || (y + height) > image.height
|
186
|
+
raise IndexError, 'image access out of boundaries'
|
187
|
+
end
|
180
188
|
|
181
189
|
@api.set_image(image)
|
182
190
|
@api.set_rectangle(x, y, width, height)
|
@@ -189,7 +197,7 @@ class Engine
|
|
189
197
|
@y = y.to_i
|
190
198
|
@width = width.to_i
|
191
199
|
@height = height.to_i
|
192
|
-
@page = page.to_i
|
200
|
+
@page = page.to_i
|
193
201
|
|
194
202
|
class << self
|
195
203
|
attr_reader :x, :y, :width, :height, :page
|
data/lib/tesseract/version.rb
CHANGED
data/test/tesseract_spec.rb
CHANGED
@@ -15,6 +15,12 @@ describe Tesseract::Engine do
|
|
15
15
|
it 'can read the second test image' do
|
16
16
|
engine.text_for('second.png').strip.should == "|'m 12 and what is this.\nINSTALL GENTOO\nOH HAI 1234"
|
17
17
|
end
|
18
|
+
|
19
|
+
it 'raises when going out of the image boundaries' do
|
20
|
+
expect {
|
21
|
+
engine.text_for('second.png', 0, 0, 1000, 1000)
|
22
|
+
}.should raise_error
|
23
|
+
end
|
18
24
|
end
|
19
25
|
|
20
26
|
describe '#text_at' do
|
@@ -27,6 +33,13 @@ describe Tesseract::Engine do
|
|
27
33
|
engine.image = 'second.png'
|
28
34
|
engine.text_at(242, 191, 129, 31).strip.should == 'OH HAI 1234'
|
29
35
|
end
|
36
|
+
|
37
|
+
it 'raises when going out of the image boundaries' do
|
38
|
+
expect {
|
39
|
+
engine.image = 'second.png'
|
40
|
+
engine.text_at(10, 20, 1000, 1000)
|
41
|
+
}.should raise_error
|
42
|
+
end
|
30
43
|
end
|
31
44
|
|
32
45
|
describe '#words_for' do
|
@@ -37,6 +50,12 @@ describe Tesseract::Engine do
|
|
37
50
|
it 'can read the second test image' do
|
38
51
|
engine.words_for('second.png').should == %w(|'m 12 and what is this. INSTALL GENTOO OH HAI 1234)
|
39
52
|
end
|
53
|
+
|
54
|
+
it 'raises when going out of the image boundaries' do
|
55
|
+
expect {
|
56
|
+
engine.words_for('second.png', 0, 0, 1000, 1000)
|
57
|
+
}.should raise_error
|
58
|
+
end
|
40
59
|
end
|
41
60
|
|
42
61
|
describe '#words_at' do
|
@@ -49,6 +68,13 @@ describe Tesseract::Engine do
|
|
49
68
|
engine.image = 'second.png'
|
50
69
|
engine.words_at(242, 191, 129, 31).should == %w(OH HAI 1234)
|
51
70
|
end
|
71
|
+
|
72
|
+
it 'raises when going out of the image boundaries' do
|
73
|
+
expect {
|
74
|
+
engine.image = 'second.png'
|
75
|
+
engine.words_at(10, 20, 1000, 1000)
|
76
|
+
}.should raise_error
|
77
|
+
end
|
52
78
|
end
|
53
79
|
|
54
80
|
describe '#chars_for' do
|
@@ -59,6 +85,12 @@ describe Tesseract::Engine do
|
|
59
85
|
it 'can read the second test image' do
|
60
86
|
engine.chars_for('second.png').should == "|'m 12 and what is this.\nINSTALL GENTOO\nOH HAI 1234".gsub(/\s+/, '').split('')
|
61
87
|
end
|
88
|
+
|
89
|
+
it 'raises when going out of the image boundaries' do
|
90
|
+
expect {
|
91
|
+
engine.chars_for('second.png', 0, 0, 1000, 1000)
|
92
|
+
}.should raise_error
|
93
|
+
end
|
62
94
|
end
|
63
95
|
|
64
96
|
describe '#chars_at' do
|
@@ -75,6 +107,13 @@ describe Tesseract::Engine do
|
|
75
107
|
engine.image = 'second.png'
|
76
108
|
engine.chars_at(242, 191, 129, 31).should == 'OH HAI 1234'.gsub(/\s+/, '').split('')
|
77
109
|
end
|
110
|
+
|
111
|
+
it 'raises when going out of the image boundaries' do
|
112
|
+
expect {
|
113
|
+
engine.image = 'second.png'
|
114
|
+
engine.words_at(10, 20, 1000, 1000)
|
115
|
+
}.should raise_error
|
116
|
+
end
|
78
117
|
end
|
79
118
|
|
80
119
|
describe '#blacklist' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tesseract-ocr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.1.
|
4
|
+
version: 0.0.1.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2011-11-27 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: namedic
|
16
|
-
requirement: &
|
16
|
+
requirement: &14007960 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *14007960
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: iso-639
|
27
|
-
requirement: &
|
27
|
+
requirement: &14007440 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *14007440
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: ffi-extra
|
38
|
-
requirement: &
|
38
|
+
requirement: &14006760 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *14006760
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: ffi-inliner
|
49
|
-
requirement: &
|
49
|
+
requirement: &14005940 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *14005940
|
58
58
|
description:
|
59
59
|
email: meh@paranoici.org
|
60
60
|
executables:
|