tesseract-ocr 0.0.1.3 → 0.0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/tesseract/api.rb +10 -3
- data/lib/tesseract/engine.rb +17 -9
- data/lib/tesseract/version.rb +1 -1
- data/test/tesseract_spec.rb +39 -0
- metadata +9 -9
data/lib/tesseract/api.rb
CHANGED
@@ -22,7 +22,6 @@
|
|
22
22
|
# or implied, of meh.
|
23
23
|
#++
|
24
24
|
|
25
|
-
require 'namedic'
|
26
25
|
require 'iso-639'
|
27
26
|
require 'tesseract/c'
|
28
27
|
|
@@ -42,6 +41,16 @@ class API
|
|
42
41
|
else
|
43
42
|
raise ArgumentError, 'invalid image'
|
44
43
|
end.tap {|image|
|
44
|
+
class << image
|
45
|
+
def width
|
46
|
+
C::pix_get_width(self)
|
47
|
+
end
|
48
|
+
|
49
|
+
def height
|
50
|
+
C::pix_get_height(self)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
45
54
|
ObjectSpace.define_finalizer image, image_finalizer(image)
|
46
55
|
}
|
47
56
|
end
|
@@ -139,8 +148,6 @@ class API
|
|
139
148
|
C::set_image(to_ffi, pix)
|
140
149
|
end
|
141
150
|
|
142
|
-
namedic :left, :top, :width, :height,
|
143
|
-
:alias => { :l => :left, :t => :top, :w => :width, :h => :height }
|
144
151
|
def set_rectangle (left, top, width, height)
|
145
152
|
C::set_rectangle(to_ffi, left, top, width, height)
|
146
153
|
end
|
data/lib/tesseract/engine.rb
CHANGED
@@ -93,17 +93,17 @@ class Engine
|
|
93
93
|
|
94
94
|
def blacklist (what = nil)
|
95
95
|
if what
|
96
|
-
set
|
96
|
+
set('tessedit_char_blacklist', what.respond_to?(:to_a) ? what.to_a.join : what.to_s)
|
97
97
|
else
|
98
|
-
get
|
98
|
+
get('tessedit_char_blacklist').chars.to_a
|
99
99
|
end
|
100
100
|
end
|
101
101
|
|
102
102
|
def whitelist (what = nil)
|
103
103
|
if what
|
104
|
-
set
|
104
|
+
set('tessedit_char_whitelist', what.respond_to?(:to_a) ? what.to_a.join : what.to_s)
|
105
105
|
else
|
106
|
-
get
|
106
|
+
get('tessedit_char_whitelist').chars.to_a
|
107
107
|
end
|
108
108
|
end
|
109
109
|
|
@@ -132,8 +132,12 @@ class Engine
|
|
132
132
|
|
133
133
|
x ||= 0
|
134
134
|
y ||= 0
|
135
|
-
width ||=
|
136
|
-
height ||=
|
135
|
+
width ||= image.width
|
136
|
+
height ||= image.height
|
137
|
+
|
138
|
+
if (x + width) > image.width || (y + height) > image.height
|
139
|
+
raise IndexError, 'image access out of boundaries'
|
140
|
+
end
|
137
141
|
|
138
142
|
@api.set_image(image)
|
139
143
|
@api.set_rectangle(x, y, width, height)
|
@@ -175,8 +179,12 @@ class Engine
|
|
175
179
|
page ||= 0
|
176
180
|
x ||= 0
|
177
181
|
y ||= 0
|
178
|
-
width ||=
|
179
|
-
height ||=
|
182
|
+
width ||= image.width
|
183
|
+
height ||= image.height
|
184
|
+
|
185
|
+
if (x + width) > image.width || (y + height) > image.height
|
186
|
+
raise IndexError, 'image access out of boundaries'
|
187
|
+
end
|
180
188
|
|
181
189
|
@api.set_image(image)
|
182
190
|
@api.set_rectangle(x, y, width, height)
|
@@ -189,7 +197,7 @@ class Engine
|
|
189
197
|
@y = y.to_i
|
190
198
|
@width = width.to_i
|
191
199
|
@height = height.to_i
|
192
|
-
@page = page.to_i
|
200
|
+
@page = page.to_i
|
193
201
|
|
194
202
|
class << self
|
195
203
|
attr_reader :x, :y, :width, :height, :page
|
data/lib/tesseract/version.rb
CHANGED
data/test/tesseract_spec.rb
CHANGED
@@ -15,6 +15,12 @@ describe Tesseract::Engine do
|
|
15
15
|
it 'can read the second test image' do
|
16
16
|
engine.text_for('second.png').strip.should == "|'m 12 and what is this.\nINSTALL GENTOO\nOH HAI 1234"
|
17
17
|
end
|
18
|
+
|
19
|
+
it 'raises when going out of the image boundaries' do
|
20
|
+
expect {
|
21
|
+
engine.text_for('second.png', 0, 0, 1000, 1000)
|
22
|
+
}.should raise_error
|
23
|
+
end
|
18
24
|
end
|
19
25
|
|
20
26
|
describe '#text_at' do
|
@@ -27,6 +33,13 @@ describe Tesseract::Engine do
|
|
27
33
|
engine.image = 'second.png'
|
28
34
|
engine.text_at(242, 191, 129, 31).strip.should == 'OH HAI 1234'
|
29
35
|
end
|
36
|
+
|
37
|
+
it 'raises when going out of the image boundaries' do
|
38
|
+
expect {
|
39
|
+
engine.image = 'second.png'
|
40
|
+
engine.text_at(10, 20, 1000, 1000)
|
41
|
+
}.should raise_error
|
42
|
+
end
|
30
43
|
end
|
31
44
|
|
32
45
|
describe '#words_for' do
|
@@ -37,6 +50,12 @@ describe Tesseract::Engine do
|
|
37
50
|
it 'can read the second test image' do
|
38
51
|
engine.words_for('second.png').should == %w(|'m 12 and what is this. INSTALL GENTOO OH HAI 1234)
|
39
52
|
end
|
53
|
+
|
54
|
+
it 'raises when going out of the image boundaries' do
|
55
|
+
expect {
|
56
|
+
engine.words_for('second.png', 0, 0, 1000, 1000)
|
57
|
+
}.should raise_error
|
58
|
+
end
|
40
59
|
end
|
41
60
|
|
42
61
|
describe '#words_at' do
|
@@ -49,6 +68,13 @@ describe Tesseract::Engine do
|
|
49
68
|
engine.image = 'second.png'
|
50
69
|
engine.words_at(242, 191, 129, 31).should == %w(OH HAI 1234)
|
51
70
|
end
|
71
|
+
|
72
|
+
it 'raises when going out of the image boundaries' do
|
73
|
+
expect {
|
74
|
+
engine.image = 'second.png'
|
75
|
+
engine.words_at(10, 20, 1000, 1000)
|
76
|
+
}.should raise_error
|
77
|
+
end
|
52
78
|
end
|
53
79
|
|
54
80
|
describe '#chars_for' do
|
@@ -59,6 +85,12 @@ describe Tesseract::Engine do
|
|
59
85
|
it 'can read the second test image' do
|
60
86
|
engine.chars_for('second.png').should == "|'m 12 and what is this.\nINSTALL GENTOO\nOH HAI 1234".gsub(/\s+/, '').split('')
|
61
87
|
end
|
88
|
+
|
89
|
+
it 'raises when going out of the image boundaries' do
|
90
|
+
expect {
|
91
|
+
engine.chars_for('second.png', 0, 0, 1000, 1000)
|
92
|
+
}.should raise_error
|
93
|
+
end
|
62
94
|
end
|
63
95
|
|
64
96
|
describe '#chars_at' do
|
@@ -75,6 +107,13 @@ describe Tesseract::Engine do
|
|
75
107
|
engine.image = 'second.png'
|
76
108
|
engine.chars_at(242, 191, 129, 31).should == 'OH HAI 1234'.gsub(/\s+/, '').split('')
|
77
109
|
end
|
110
|
+
|
111
|
+
it 'raises when going out of the image boundaries' do
|
112
|
+
expect {
|
113
|
+
engine.image = 'second.png'
|
114
|
+
engine.words_at(10, 20, 1000, 1000)
|
115
|
+
}.should raise_error
|
116
|
+
end
|
78
117
|
end
|
79
118
|
|
80
119
|
describe '#blacklist' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tesseract-ocr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.1.
|
4
|
+
version: 0.0.1.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2011-11-27 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: namedic
|
16
|
-
requirement: &
|
16
|
+
requirement: &14007960 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *14007960
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: iso-639
|
27
|
-
requirement: &
|
27
|
+
requirement: &14007440 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *14007440
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: ffi-extra
|
38
|
-
requirement: &
|
38
|
+
requirement: &14006760 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *14006760
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: ffi-inliner
|
49
|
-
requirement: &
|
49
|
+
requirement: &14005940 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *14005940
|
58
58
|
description:
|
59
59
|
email: meh@paranoici.org
|
60
60
|
executables:
|