tesseract-ocr 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +18 -7
- data/Rakefile +5 -1
- data/examples/nerdz-captcha-breaker.rb +56 -0
- data/lib/tesseract/api.rb +36 -65
- data/lib/tesseract/api/image.rb +84 -0
- data/lib/tesseract/api/iterator.rb +122 -0
- data/lib/tesseract/c.rb +7 -274
- data/lib/tesseract/c/baseapi.rb +237 -0
- data/lib/tesseract/c/iterator.rb +282 -0
- data/lib/tesseract/c/leptonica.rb +88 -0
- data/lib/tesseract/engine.rb +71 -81
- data/lib/tesseract/engine/baseline.rb +43 -0
- data/lib/tesseract/engine/bounding_box.rb +54 -0
- data/lib/tesseract/engine/font_attributes.rb +50 -0
- data/lib/tesseract/engine/iterator.rb +161 -0
- data/lib/tesseract/engine/orientation.rb +45 -0
- data/lib/tesseract/extensions.rb +2 -12
- data/lib/tesseract/iterator.rb +38 -0
- data/lib/tesseract/version.rb +1 -1
- data/tesseract-ocr.gemspec +1 -0
- data/test/tesseract_bench.rb +31 -0
- data/test/tesseract_spec.rb +16 -60
- metadata +35 -10
data/test/tesseract_spec.rb
CHANGED
@@ -42,89 +42,45 @@ describe Tesseract::Engine do
|
|
42
42
|
end
|
43
43
|
end
|
44
44
|
|
45
|
-
describe '#
|
46
|
-
it 'can read the first test image' do
|
47
|
-
engine.words_for('first.png').should == ['ABC']
|
48
|
-
end
|
49
|
-
|
50
|
-
it 'can read the second test image' do
|
51
|
-
engine.words_for('second.png').should == %w(|'m 12 and what is this. INSTALL GENTOO OH HAI 1234)
|
52
|
-
end
|
53
|
-
|
54
|
-
it 'raises when going out of the image boundaries' do
|
55
|
-
expect {
|
56
|
-
engine.words_for('second.png', 0, 0, 1000, 1000)
|
57
|
-
}.should raise_error
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
describe '#words_at' do
|
45
|
+
describe '#text' do
|
62
46
|
it 'can read the first test image' do
|
63
47
|
engine.image = 'first.png'
|
64
|
-
engine.
|
48
|
+
engine.select 2, 2, 2, 2
|
49
|
+
|
50
|
+
engine.text.strip.should == ''
|
65
51
|
end
|
66
52
|
|
67
53
|
it 'can read the second test image' do
|
68
54
|
engine.image = 'second.png'
|
69
|
-
engine.
|
55
|
+
engine.select 242, 191, 129, 31
|
56
|
+
engine.text.strip.should == 'OH HAI 1234'
|
70
57
|
end
|
71
58
|
|
72
59
|
it 'raises when going out of the image boundaries' do
|
73
60
|
expect {
|
74
61
|
engine.image = 'second.png'
|
75
|
-
engine.
|
62
|
+
engine.select 10, 20, 1000, 1000
|
63
|
+
engine.text
|
76
64
|
}.should raise_error
|
77
65
|
end
|
78
|
-
end
|
79
|
-
|
80
|
-
describe '#chars_for' do
|
81
|
-
it 'can read the first test image' do
|
82
|
-
engine.chars_for('first.png').should == 'ABC'.split('')
|
83
|
-
end
|
84
66
|
|
85
|
-
it 'can read the second test image' do
|
86
|
-
engine.chars_for('second.png').should == "|'m 12 and what is this.\nINSTALL GENTOO\nOH HAI 1234".gsub(/\s+/, '').split('')
|
87
|
-
end
|
88
|
-
|
89
|
-
it 'raises when going out of the image boundaries' do
|
90
|
-
expect {
|
91
|
-
engine.chars_for('second.png', 0, 0, 1000, 1000)
|
92
|
-
}.should raise_error
|
93
|
-
end
|
94
67
|
end
|
95
68
|
|
96
|
-
describe '#chars_at' do
|
97
|
-
it 'can read the first test image' do
|
98
|
-
pending 'weird results'
|
99
|
-
|
100
|
-
engine.image = 'first.png'
|
101
|
-
engine.chars_at(2, 2, 2, 2).should == []
|
102
|
-
end
|
103
|
-
|
104
|
-
it 'can read the second test image' do
|
105
|
-
pending 'weird results'
|
106
|
-
|
107
|
-
engine.image = 'second.png'
|
108
|
-
engine.chars_at(242, 191, 129, 31).should == 'OH HAI 1234'.gsub(/\s+/, '').split('')
|
109
|
-
end
|
110
|
-
|
111
|
-
it 'raises when going out of the image boundaries' do
|
112
|
-
expect {
|
113
|
-
engine.image = 'second.png'
|
114
|
-
engine.words_at(10, 20, 1000, 1000)
|
115
|
-
}.should raise_error
|
116
|
-
end
|
117
|
-
end
|
118
|
-
|
119
69
|
describe '#blacklist' do
|
120
70
|
it 'works with removing weird signs' do
|
121
|
-
engine.with { |e| e.blacklist '|' }.text_for('second.png').strip.should == "I'm 12 and what is this.\nINSTALL GENTOO\nOH HAI 1234"
|
71
|
+
engine.with { |e| e.blacklist = '|' }.text_for('second.png').strip.should == "I'm 12 and what is this.\nINSTALL GENTOO\nOH HAI 1234"
|
122
72
|
end
|
123
73
|
end
|
124
74
|
|
125
75
|
describe '#whitelist' do
|
126
76
|
it 'makes everything into a number' do
|
127
|
-
engine.with { |e| e.whitelist '1234567890' }.text_for('second.png').strip.should == "11111 12 3116 1111113115111151\n11157411 6511700\n014 11141 1234"
|
77
|
+
engine.with { |e| e.whitelist = '1234567890' }.text_for('second.png').strip.should == "11111 12 3116 1111113115111151\n11157411 6511700\n014 11141 1234"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
describe '#each_block' do
|
82
|
+
it 'works properly with first image' do
|
83
|
+
|
128
84
|
end
|
129
85
|
end
|
130
86
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tesseract-ocr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-11-
|
12
|
+
date: 2011-11-29 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: namedic
|
16
|
-
requirement: &
|
16
|
+
requirement: &3194800 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,21 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *3194800
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: memoized
|
27
|
+
requirement: &3193660 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *3193660
|
25
36
|
- !ruby/object:Gem::Dependency
|
26
37
|
name: iso-639
|
27
|
-
requirement: &
|
38
|
+
requirement: &3207460 !ruby/object:Gem::Requirement
|
28
39
|
none: false
|
29
40
|
requirements:
|
30
41
|
- - ! '>='
|
@@ -32,10 +43,10 @@ dependencies:
|
|
32
43
|
version: '0'
|
33
44
|
type: :runtime
|
34
45
|
prerelease: false
|
35
|
-
version_requirements: *
|
46
|
+
version_requirements: *3207460
|
36
47
|
- !ruby/object:Gem::Dependency
|
37
48
|
name: ffi-extra
|
38
|
-
requirement: &
|
49
|
+
requirement: &3205940 !ruby/object:Gem::Requirement
|
39
50
|
none: false
|
40
51
|
requirements:
|
41
52
|
- - ! '>='
|
@@ -43,10 +54,10 @@ dependencies:
|
|
43
54
|
version: '0'
|
44
55
|
type: :runtime
|
45
56
|
prerelease: false
|
46
|
-
version_requirements: *
|
57
|
+
version_requirements: *3205940
|
47
58
|
- !ruby/object:Gem::Dependency
|
48
59
|
name: ffi-inliner
|
49
|
-
requirement: &
|
60
|
+
requirement: &3205380 !ruby/object:Gem::Requirement
|
50
61
|
none: false
|
51
62
|
requirements:
|
52
63
|
- - ! '>='
|
@@ -54,7 +65,7 @@ dependencies:
|
|
54
65
|
version: '0'
|
55
66
|
type: :runtime
|
56
67
|
prerelease: false
|
57
|
-
version_requirements: *
|
68
|
+
version_requirements: *3205380
|
58
69
|
description:
|
59
70
|
email: meh@paranoici.org
|
60
71
|
executables:
|
@@ -65,16 +76,29 @@ files:
|
|
65
76
|
- README.md
|
66
77
|
- Rakefile
|
67
78
|
- bin/tesseract.rb
|
79
|
+
- examples/nerdz-captcha-breaker.rb
|
68
80
|
- lib/tesseract-ocr.rb
|
69
81
|
- lib/tesseract.rb
|
70
82
|
- lib/tesseract/api.rb
|
83
|
+
- lib/tesseract/api/image.rb
|
84
|
+
- lib/tesseract/api/iterator.rb
|
71
85
|
- lib/tesseract/c.rb
|
86
|
+
- lib/tesseract/c/baseapi.rb
|
87
|
+
- lib/tesseract/c/iterator.rb
|
88
|
+
- lib/tesseract/c/leptonica.rb
|
72
89
|
- lib/tesseract/engine.rb
|
90
|
+
- lib/tesseract/engine/baseline.rb
|
91
|
+
- lib/tesseract/engine/bounding_box.rb
|
92
|
+
- lib/tesseract/engine/font_attributes.rb
|
93
|
+
- lib/tesseract/engine/iterator.rb
|
94
|
+
- lib/tesseract/engine/orientation.rb
|
73
95
|
- lib/tesseract/extensions.rb
|
96
|
+
- lib/tesseract/iterator.rb
|
74
97
|
- lib/tesseract/version.rb
|
75
98
|
- tesseract-ocr.gemspec
|
76
99
|
- test/first.png
|
77
100
|
- test/second.png
|
101
|
+
- test/tesseract_bench.rb
|
78
102
|
- test/tesseract_spec.rb
|
79
103
|
- test/test-european.jpg
|
80
104
|
- test/test.png
|
@@ -105,6 +129,7 @@ summary: A wrapper library to the tesseract-ocr API.
|
|
105
129
|
test_files:
|
106
130
|
- test/first.png
|
107
131
|
- test/second.png
|
132
|
+
- test/tesseract_bench.rb
|
108
133
|
- test/tesseract_spec.rb
|
109
134
|
- test/test-european.jpg
|
110
135
|
- test/test.png
|