tesseract-ocr 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/tesseract/api.rb +2 -2
- data/lib/tesseract/c/baseapi.rb +28 -6
- data/lib/tesseract/version.rb +1 -1
- data/test/tesseract_spec.rb +6 -6
- metadata +1 -1
data/lib/tesseract/api.rb
CHANGED
@@ -105,8 +105,8 @@ class API
|
|
105
105
|
end
|
106
106
|
end
|
107
107
|
|
108
|
-
def read_config_file (path
|
109
|
-
C::BaseAPI.read_config_file(to_ffi, path
|
108
|
+
def read_config_file (path)
|
109
|
+
C::BaseAPI.read_config_file(to_ffi, path)
|
110
110
|
end
|
111
111
|
|
112
112
|
def get_page_seg_mode
|
data/lib/tesseract/c/baseapi.rb
CHANGED
@@ -154,12 +154,6 @@ module BaseAPI
|
|
154
154
|
}
|
155
155
|
}
|
156
156
|
|
157
|
-
cpp.function %{
|
158
|
-
void read_config_file (TessBaseAPI* api, const char* filename, bool init_only) {
|
159
|
-
api->ReadConfigFile(filename, init_only);
|
160
|
-
}
|
161
|
-
}
|
162
|
-
|
163
157
|
cpp.function %{
|
164
158
|
void set_page_seg_mode (TessBaseAPI* api, PageSegMode mode) {
|
165
159
|
api->SetPageSegMode(mode);
|
@@ -244,6 +238,34 @@ module BaseAPI
|
|
244
238
|
}
|
245
239
|
}
|
246
240
|
end
|
241
|
+
|
242
|
+
begin
|
243
|
+
inline 'C++' do |cpp|
|
244
|
+
cpp.include 'tesseract/baseapi.h'
|
245
|
+
cpp.libraries 'tesseract'
|
246
|
+
|
247
|
+
cpp.raw 'using namespace tesseract;'
|
248
|
+
|
249
|
+
cpp.function %{
|
250
|
+
void read_config_file (TessBaseAPI* api, const char* filename) {
|
251
|
+
api->ReadConfigFile(filename, false);
|
252
|
+
}
|
253
|
+
}
|
254
|
+
end
|
255
|
+
rescue CompilationError
|
256
|
+
inline 'C++' do |cpp|
|
257
|
+
cpp.include 'tesseract/baseapi.h'
|
258
|
+
cpp.libraries 'tesseract'
|
259
|
+
|
260
|
+
cpp.raw 'using namespace tesseract;'
|
261
|
+
|
262
|
+
cpp.function %{
|
263
|
+
void read_config_file (TessBaseAPI* api, const char* filename) {
|
264
|
+
api->ReadConfigFile(filename);
|
265
|
+
}
|
266
|
+
}
|
267
|
+
end
|
268
|
+
end
|
247
269
|
end
|
248
270
|
|
249
271
|
end; end
|
data/lib/tesseract/version.rb
CHANGED
data/test/tesseract_spec.rb
CHANGED
@@ -13,7 +13,7 @@ describe Tesseract::Engine do
|
|
13
13
|
end
|
14
14
|
|
15
15
|
it 'can read the second test image' do
|
16
|
-
engine.text_for('second.png').strip.should == "
|
16
|
+
engine.text_for('second.png').strip.should == "#{Tesseract::API.new.version == '3.01' ? ?| : ?I}'m 12 and what is this.\nINSTALL GENTOO\nOH HAI 1234"
|
17
17
|
end
|
18
18
|
|
19
19
|
it 'raises when going out of the image boundaries' do
|
@@ -74,14 +74,14 @@ describe Tesseract::Engine do
|
|
74
74
|
|
75
75
|
describe '#whitelist' do
|
76
76
|
it 'makes everything into a number' do
|
77
|
-
engine.with { |e| e.whitelist = '1234567890' }.text_for('second.png').strip.should
|
77
|
+
engine.with { |e| e.whitelist = '1234567890' }.text_for('second.png').strip.should match(/^[\d\s]*$/)
|
78
78
|
end
|
79
79
|
end
|
80
80
|
|
81
81
|
describe '#page_segmentation_mode' do
|
82
82
|
it 'sets it correctly' do
|
83
83
|
engine.with {|e|
|
84
|
-
e.page_segmentation_mode =
|
84
|
+
e.page_segmentation_mode = :single_line
|
85
85
|
e.whitelist = [*'a'..'z', *'A'..'Z', *0..9, " ."].join
|
86
86
|
}.text_for('jsmj.png').strip.should == 'JSmj'
|
87
87
|
end
|
@@ -89,19 +89,19 @@ describe Tesseract::Engine do
|
|
89
89
|
|
90
90
|
describe '#blocks' do
|
91
91
|
it 'works properly with first image' do
|
92
|
-
engine.blocks_for('first.png').first.to_s.should ==
|
92
|
+
engine.blocks_for('first.png').first.to_s.strip.should == 'ABC'
|
93
93
|
end
|
94
94
|
end
|
95
95
|
|
96
96
|
describe '#paragraphs' do
|
97
97
|
it 'works properly with first image' do
|
98
|
-
engine.paragraphs_for('first.png').first.to_s.should ==
|
98
|
+
engine.paragraphs_for('first.png').first.to_s.strip.should == 'ABC'
|
99
99
|
end
|
100
100
|
end
|
101
101
|
|
102
102
|
describe '#lines' do
|
103
103
|
it 'works properly with first image' do
|
104
|
-
engine.lines_for('first.png').first.to_s.should ==
|
104
|
+
engine.lines_for('first.png').first.to_s.strip.should == 'ABC'
|
105
105
|
end
|
106
106
|
end
|
107
107
|
|