tesseract-ocr 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +18 -7
- data/Rakefile +5 -1
- data/examples/nerdz-captcha-breaker.rb +56 -0
- data/lib/tesseract/api.rb +36 -65
- data/lib/tesseract/api/image.rb +84 -0
- data/lib/tesseract/api/iterator.rb +122 -0
- data/lib/tesseract/c.rb +7 -274
- data/lib/tesseract/c/baseapi.rb +237 -0
- data/lib/tesseract/c/iterator.rb +282 -0
- data/lib/tesseract/c/leptonica.rb +88 -0
- data/lib/tesseract/engine.rb +71 -81
- data/lib/tesseract/engine/baseline.rb +43 -0
- data/lib/tesseract/engine/bounding_box.rb +54 -0
- data/lib/tesseract/engine/font_attributes.rb +50 -0
- data/lib/tesseract/engine/iterator.rb +161 -0
- data/lib/tesseract/engine/orientation.rb +45 -0
- data/lib/tesseract/extensions.rb +2 -12
- data/lib/tesseract/iterator.rb +38 -0
- data/lib/tesseract/version.rb +1 -1
- data/tesseract-ocr.gemspec +1 -0
- data/test/tesseract_bench.rb +31 -0
- data/test/tesseract_spec.rb +16 -60
- metadata +35 -10
data/test/tesseract_spec.rb
CHANGED
@@ -42,89 +42,45 @@ describe Tesseract::Engine do
|
|
42
42
|
end
|
43
43
|
end
|
44
44
|
|
45
|
-
describe '#
|
46
|
-
it 'can read the first test image' do
|
47
|
-
engine.words_for('first.png').should == ['ABC']
|
48
|
-
end
|
49
|
-
|
50
|
-
it 'can read the second test image' do
|
51
|
-
engine.words_for('second.png').should == %w(|'m 12 and what is this. INSTALL GENTOO OH HAI 1234)
|
52
|
-
end
|
53
|
-
|
54
|
-
it 'raises when going out of the image boundaries' do
|
55
|
-
expect {
|
56
|
-
engine.words_for('second.png', 0, 0, 1000, 1000)
|
57
|
-
}.should raise_error
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
describe '#words_at' do
|
45
|
+
describe '#text' do
|
62
46
|
it 'can read the first test image' do
|
63
47
|
engine.image = 'first.png'
|
64
|
-
engine.
|
48
|
+
engine.select 2, 2, 2, 2
|
49
|
+
|
50
|
+
engine.text.strip.should == ''
|
65
51
|
end
|
66
52
|
|
67
53
|
it 'can read the second test image' do
|
68
54
|
engine.image = 'second.png'
|
69
|
-
engine.
|
55
|
+
engine.select 242, 191, 129, 31
|
56
|
+
engine.text.strip.should == 'OH HAI 1234'
|
70
57
|
end
|
71
58
|
|
72
59
|
it 'raises when going out of the image boundaries' do
|
73
60
|
expect {
|
74
61
|
engine.image = 'second.png'
|
75
|
-
engine.
|
62
|
+
engine.select 10, 20, 1000, 1000
|
63
|
+
engine.text
|
76
64
|
}.should raise_error
|
77
65
|
end
|
78
|
-
end
|
79
|
-
|
80
|
-
describe '#chars_for' do
|
81
|
-
it 'can read the first test image' do
|
82
|
-
engine.chars_for('first.png').should == 'ABC'.split('')
|
83
|
-
end
|
84
66
|
|
85
|
-
it 'can read the second test image' do
|
86
|
-
engine.chars_for('second.png').should == "|'m 12 and what is this.\nINSTALL GENTOO\nOH HAI 1234".gsub(/\s+/, '').split('')
|
87
|
-
end
|
88
|
-
|
89
|
-
it 'raises when going out of the image boundaries' do
|
90
|
-
expect {
|
91
|
-
engine.chars_for('second.png', 0, 0, 1000, 1000)
|
92
|
-
}.should raise_error
|
93
|
-
end
|
94
67
|
end
|
95
68
|
|
96
|
-
describe '#chars_at' do
|
97
|
-
it 'can read the first test image' do
|
98
|
-
pending 'weird results'
|
99
|
-
|
100
|
-
engine.image = 'first.png'
|
101
|
-
engine.chars_at(2, 2, 2, 2).should == []
|
102
|
-
end
|
103
|
-
|
104
|
-
it 'can read the second test image' do
|
105
|
-
pending 'weird results'
|
106
|
-
|
107
|
-
engine.image = 'second.png'
|
108
|
-
engine.chars_at(242, 191, 129, 31).should == 'OH HAI 1234'.gsub(/\s+/, '').split('')
|
109
|
-
end
|
110
|
-
|
111
|
-
it 'raises when going out of the image boundaries' do
|
112
|
-
expect {
|
113
|
-
engine.image = 'second.png'
|
114
|
-
engine.words_at(10, 20, 1000, 1000)
|
115
|
-
}.should raise_error
|
116
|
-
end
|
117
|
-
end
|
118
|
-
|
119
69
|
describe '#blacklist' do
|
120
70
|
it 'works with removing weird signs' do
|
121
|
-
engine.with { |e| e.blacklist '|' }.text_for('second.png').strip.should == "I'm 12 and what is this.\nINSTALL GENTOO\nOH HAI 1234"
|
71
|
+
engine.with { |e| e.blacklist = '|' }.text_for('second.png').strip.should == "I'm 12 and what is this.\nINSTALL GENTOO\nOH HAI 1234"
|
122
72
|
end
|
123
73
|
end
|
124
74
|
|
125
75
|
describe '#whitelist' do
|
126
76
|
it 'makes everything into a number' do
|
127
|
-
engine.with { |e| e.whitelist '1234567890' }.text_for('second.png').strip.should == "11111 12 3116 1111113115111151\n11157411 6511700\n014 11141 1234"
|
77
|
+
engine.with { |e| e.whitelist = '1234567890' }.text_for('second.png').strip.should == "11111 12 3116 1111113115111151\n11157411 6511700\n014 11141 1234"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
describe '#each_block' do
|
82
|
+
it 'works properly with first image' do
|
83
|
+
|
128
84
|
end
|
129
85
|
end
|
130
86
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tesseract-ocr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-11-
|
12
|
+
date: 2011-11-29 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: namedic
|
16
|
-
requirement: &
|
16
|
+
requirement: &3194800 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,21 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *3194800
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: memoized
|
27
|
+
requirement: &3193660 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *3193660
|
25
36
|
- !ruby/object:Gem::Dependency
|
26
37
|
name: iso-639
|
27
|
-
requirement: &
|
38
|
+
requirement: &3207460 !ruby/object:Gem::Requirement
|
28
39
|
none: false
|
29
40
|
requirements:
|
30
41
|
- - ! '>='
|
@@ -32,10 +43,10 @@ dependencies:
|
|
32
43
|
version: '0'
|
33
44
|
type: :runtime
|
34
45
|
prerelease: false
|
35
|
-
version_requirements: *
|
46
|
+
version_requirements: *3207460
|
36
47
|
- !ruby/object:Gem::Dependency
|
37
48
|
name: ffi-extra
|
38
|
-
requirement: &
|
49
|
+
requirement: &3205940 !ruby/object:Gem::Requirement
|
39
50
|
none: false
|
40
51
|
requirements:
|
41
52
|
- - ! '>='
|
@@ -43,10 +54,10 @@ dependencies:
|
|
43
54
|
version: '0'
|
44
55
|
type: :runtime
|
45
56
|
prerelease: false
|
46
|
-
version_requirements: *
|
57
|
+
version_requirements: *3205940
|
47
58
|
- !ruby/object:Gem::Dependency
|
48
59
|
name: ffi-inliner
|
49
|
-
requirement: &
|
60
|
+
requirement: &3205380 !ruby/object:Gem::Requirement
|
50
61
|
none: false
|
51
62
|
requirements:
|
52
63
|
- - ! '>='
|
@@ -54,7 +65,7 @@ dependencies:
|
|
54
65
|
version: '0'
|
55
66
|
type: :runtime
|
56
67
|
prerelease: false
|
57
|
-
version_requirements: *
|
68
|
+
version_requirements: *3205380
|
58
69
|
description:
|
59
70
|
email: meh@paranoici.org
|
60
71
|
executables:
|
@@ -65,16 +76,29 @@ files:
|
|
65
76
|
- README.md
|
66
77
|
- Rakefile
|
67
78
|
- bin/tesseract.rb
|
79
|
+
- examples/nerdz-captcha-breaker.rb
|
68
80
|
- lib/tesseract-ocr.rb
|
69
81
|
- lib/tesseract.rb
|
70
82
|
- lib/tesseract/api.rb
|
83
|
+
- lib/tesseract/api/image.rb
|
84
|
+
- lib/tesseract/api/iterator.rb
|
71
85
|
- lib/tesseract/c.rb
|
86
|
+
- lib/tesseract/c/baseapi.rb
|
87
|
+
- lib/tesseract/c/iterator.rb
|
88
|
+
- lib/tesseract/c/leptonica.rb
|
72
89
|
- lib/tesseract/engine.rb
|
90
|
+
- lib/tesseract/engine/baseline.rb
|
91
|
+
- lib/tesseract/engine/bounding_box.rb
|
92
|
+
- lib/tesseract/engine/font_attributes.rb
|
93
|
+
- lib/tesseract/engine/iterator.rb
|
94
|
+
- lib/tesseract/engine/orientation.rb
|
73
95
|
- lib/tesseract/extensions.rb
|
96
|
+
- lib/tesseract/iterator.rb
|
74
97
|
- lib/tesseract/version.rb
|
75
98
|
- tesseract-ocr.gemspec
|
76
99
|
- test/first.png
|
77
100
|
- test/second.png
|
101
|
+
- test/tesseract_bench.rb
|
78
102
|
- test/tesseract_spec.rb
|
79
103
|
- test/test-european.jpg
|
80
104
|
- test/test.png
|
@@ -105,6 +129,7 @@ summary: A wrapper library to the tesseract-ocr API.
|
|
105
129
|
test_files:
|
106
130
|
- test/first.png
|
107
131
|
- test/second.png
|
132
|
+
- test/tesseract_bench.rb
|
108
133
|
- test/tesseract_spec.rb
|
109
134
|
- test/test-european.jpg
|
110
135
|
- test/test.png
|