tesseract-ocr 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/lib/tesseract/c.rb CHANGED
@@ -32,191 +32,6 @@ module C
32
32
  extend FFI::Inliner
33
33
 
34
34
  inline 'C++' do |cpp|
35
- # leptonica wrapping
36
- cpp.include 'leptonica/allheaders.h'
37
- cpp.libraries 'lept'
38
-
39
- cpp.function %{
40
- Pix* pix_read (const char* path) {
41
- return pixRead(path);
42
- }
43
- }
44
-
45
- cpp.function %{
46
- Pix* pix_read_fd (int fd) {
47
- return pixReadStream(fdopen(fd, "rb"), 0);
48
- }
49
- }
50
-
51
- cpp.function %{
52
- Pix* pix_read_mem (const l_uint8* data, size_t size) {
53
- return pixReadMem(data, size);
54
- }
55
- }
56
-
57
- cpp.function %{
58
- void pix_destroy (Pix* pix) {
59
- pixDestroy(&pix);
60
- }
61
- }
62
-
63
- cpp.function %{
64
- int32_t pix_get_width (Pix* pix) {
65
- return pixGetWidth(pix);
66
- }
67
- }
68
-
69
- cpp.function %{
70
- int32_t pix_get_height (Pix* pix) {
71
- return pixGetHeight(pix);
72
- }
73
- }
74
-
75
- # tesseract wrapping
76
- cpp.include 'tesseract/baseapi.h'
77
- cpp.libraries 'tesseract'
78
-
79
- cpp.raw 'using namespace tesseract;'
80
-
81
- cpp.eval {
82
- enum :OcrEngineMode, [
83
- :TESSERACT_ONLY, :CUBE_ONLY, :TESSERACT_CUBE_COMBINED, :DEFAULT
84
- ]
85
-
86
- enum :PageSegMode, [
87
- :OSD_ONLY, :AUTO_OSD,
88
- :AUTO_ONLY, :AUTO, :SINGLE_COLUMN, :SINGLE_BLOCK_VERT_TEXT,
89
- :SINGLE_BLOCK, :SINGLE_LINE, :SINGLE_WORD, :CIRCLE_WORD, :SINGLE_CHAR,
90
- :COUNT
91
- ]
92
- }
93
-
94
- cpp.function %{
95
- TessBaseAPI* create (void) {
96
- return new TessBaseAPI();
97
- }
98
- }
99
-
100
- cpp.function %{
101
- void destroy (TessBaseAPI* api) {
102
- delete api;
103
- }
104
- }
105
-
106
- cpp.function %{
107
- const char* version (TessBaseAPI* api) {
108
- return api->Version();
109
- }
110
- }, return: :string
111
-
112
- cpp.function %{
113
- void set_input_name (TessBaseAPI* api, const char* name) {
114
- api->SetInputName(name);
115
- }
116
- }
117
-
118
- cpp.function %{
119
- void set_output_name (TessBaseAPI* api, const char* name) {
120
- api->SetOutputName(name);
121
- }
122
- }
123
-
124
- cpp.function %{
125
- bool set_variable (TessBaseAPI* api, const char* name, const char* value) {
126
- return api->SetVariable(name, value);
127
- }
128
- }
129
-
130
- cpp.function %{
131
- bool has_int_variable (TessBaseAPI* api, const char* name) {
132
- int tmp;
133
-
134
- return api->GetIntVariable(name, &tmp);
135
- }
136
- }
137
-
138
- cpp.function %{
139
- bool has_bool_variable (TessBaseAPI* api, const char* name) {
140
- bool tmp;
141
-
142
- return api->GetBoolVariable(name, &tmp);
143
- }
144
- }
145
-
146
- cpp.function %{
147
- bool has_double_variable (TessBaseAPI* api, const char* name) {
148
- double tmp;
149
-
150
- return api->GetDoubleVariable(name, &tmp);
151
- }
152
- }
153
-
154
- cpp.function %{
155
- bool has_string_variable (TessBaseAPI* api, const char* name) {
156
- return api->GetStringVariable(name) != NULL;
157
- }
158
- }
159
-
160
- cpp.function %{
161
- int get_int_variable (TessBaseAPI* api, const char* name) {
162
- int result = 0;
163
-
164
- api->GetIntVariable(name, &result);
165
-
166
- return result;
167
- }
168
- }
169
-
170
- cpp.function %{
171
- bool get_bool_variable (TessBaseAPI* api, const char* name) {
172
- bool result = false;
173
-
174
- api->GetBoolVariable(name, &result);
175
-
176
- return result;
177
- }
178
- }
179
-
180
- cpp.function %{
181
- double get_double_variable (TessBaseAPI* api, const char* name) {
182
- double result = 0;
183
-
184
- api->GetDoubleVariable(name, &result);
185
-
186
- return result;
187
- }
188
- }
189
-
190
- cpp.function %{
191
- const char* get_string_variable (TessBaseAPI* api, const char* name) {
192
- return api->GetStringVariable(name);
193
- }
194
- }, return: :string
195
-
196
- cpp.function %{
197
- int init (TessBaseAPI* api, const char* datapath, const char* language, OcrEngineMode oem) {
198
- return api->Init(datapath, language, oem);
199
- }
200
- }
201
-
202
- cpp.function %{
203
- void read_config_file (TessBaseAPI* api, const char* filename, bool init_only) {
204
- api->ReadConfigFile(filename, init_only);
205
- }
206
- }
207
-
208
- cpp.function %{
209
- void set_page_seg_mode (TessBaseAPI* api, PageSegMode mode) {
210
- api->SetPageSegMode(mode);
211
- }
212
- }
213
-
214
- cpp.function %{
215
- PageSegMode get_page_seg_mode (TessBaseAPI* api) {
216
- return api->GetPageSegMode();
217
- }
218
- }
219
-
220
35
  cpp.function %{
221
36
  void free_string (char* pointer) {
222
37
  delete [] pointer;
@@ -228,97 +43,15 @@ module C
228
43
  delete [] pointer;
229
44
  }
230
45
  }
46
+ end
231
47
 
232
- cpp.function %{
233
- void set_image (TessBaseAPI* api, const Pix* pix) {
234
- api->SetImage(pix);
235
- }
236
- }
237
-
238
- cpp.function %{
239
- void set_rectangle (TessBaseAPI* api, int left, int top, int width, int height) {
240
- api->SetRectangle(left, top, width, height);
241
- }
242
- }
243
-
244
- cpp.function %{
245
- Pix* get_thresholded_image (TessBaseAPI* api) {
246
- return api->GetThresholdedImage();
247
- }
248
- }
249
-
250
- cpp.function %{
251
- Boxa* get_regions (TessBaseAPI* api, Pixa** pixa) {
252
- return api->GetRegions(pixa);
253
- }
254
- }
255
-
256
- cpp.function %{
257
- Boxa* get_textlines (TessBaseAPI* api, Pixa** pixa, int** blockids) {
258
- return api->GetTextlines(pixa, blockids);
259
- }
260
- }
261
-
262
- cpp.function %{
263
- Boxa* get_words (TessBaseAPI* api, Pixa** pixa) {
264
- return api->GetWords(pixa);
265
- }
266
- }
267
-
268
- cpp.function %{
269
- Boxa* get_connected_components (TessBaseAPI* api, Pixa** cc) {
270
- return api->GetConnectedComponents(cc);
271
- }
272
- }
273
-
274
- cpp.function %{
275
- char* get_utf8_text (TessBaseAPI* api) {
276
- return api->GetUTF8Text();
277
- }
278
- }
279
-
280
- cpp.function %{
281
- char* get_hocr_text (TessBaseAPI* api, int page_number) {
282
- return api->GetHOCRText(page_number);
283
- }
284
- }
285
-
286
- cpp.function %{
287
- char* get_box_text (TessBaseAPI* api, int page_number) {
288
- return api->GetBoxText(page_number);
289
- }
290
- }
291
-
292
- cpp.function %{
293
- char* get_unlv_text (TessBaseAPI* api) {
294
- return api->GetUNLVText();
295
- }
296
- }
297
-
298
- cpp.function %{
299
- int mean_text_conf (TessBaseAPI* api) {
300
- return api->MeanTextConf();
301
- }
302
- }
303
-
304
- cpp.function %{
305
- int* all_word_confidences (TessBaseAPI* api) {
306
- return api->AllWordConfidences();
307
- }
308
- }
309
-
310
- cpp.function %{
311
- void clear (TessBaseAPI* api) {
312
- api->Clear();
313
- }
314
- }
315
-
316
- cpp.function %{
317
- void end (TessBaseAPI* api) {
318
- api->End();
319
- }
320
- }
48
+ def self.for_enum (what)
49
+ what.is_a?(Integer) ? what : what.to_s.upcase.to_sym
321
50
  end
322
51
  end
323
52
 
324
53
  end
54
+
55
+ require 'tesseract/c/leptonica'
56
+ require 'tesseract/c/baseapi'
57
+ require 'tesseract/c/iterator'
@@ -0,0 +1,237 @@
1
+ #--
2
+ # Copyright 2011 meh. All rights reserved.
3
+ #
4
+ # Redistribution and use in source and binary forms, with or without modification, are
5
+ # permitted provided that the following conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above copyright notice, this list of
8
+ # conditions and the following disclaimer.
9
+ #
10
+ # THIS SOFTWARE IS PROVIDED BY meh ''AS IS'' AND ANY EXPRESS OR IMPLIED
11
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
12
+ # FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL meh OR
13
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
14
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
15
+ # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
16
+ # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
17
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
18
+ # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
+ #
20
+ # The views and conclusions contained in the software and documentation are those of the
21
+ # authors and should not be interpreted as representing official policies, either expressed
22
+ # or implied, of meh.
23
+ #++
24
+
25
+ module Tesseract; module C
26
+
27
+ module BaseAPI
28
+ extend FFI::Inliner
29
+
30
+ inline 'C++' do |cpp|
31
+ cpp.include 'tesseract/baseapi.h'
32
+ cpp.libraries 'tesseract'
33
+
34
+ cpp.raw 'using namespace tesseract;'
35
+
36
+ cpp.eval {
37
+ enum :OcrEngineMode, [
38
+ :TESSERACT_ONLY, :CUBE_ONLY, :TESSERACT_CUBE_COMBINED, :DEFAULT
39
+ ]
40
+
41
+ enum :PageSegMode, [
42
+ :OSD_ONLY, :AUTO_OSD,
43
+ :AUTO_ONLY, :AUTO, :SINGLE_COLUMN, :SINGLE_BLOCK_VERT_TEXT,
44
+ :SINGLE_BLOCK, :SINGLE_LINE, :SINGLE_WORD, :CIRCLE_WORD, :SINGLE_CHAR,
45
+ :COUNT
46
+ ]
47
+ }
48
+
49
+ cpp.function %{
50
+ TessBaseAPI* create (void) {
51
+ return new TessBaseAPI();
52
+ }
53
+ }
54
+
55
+ cpp.function %{
56
+ void destroy (TessBaseAPI* api) {
57
+ delete api;
58
+ }
59
+ }
60
+
61
+ cpp.function %{
62
+ const char* version (TessBaseAPI* api) {
63
+ return api->Version();
64
+ }
65
+ }, return: :string
66
+
67
+ cpp.function %{
68
+ void set_input_name (TessBaseAPI* api, const char* name) {
69
+ api->SetInputName(name);
70
+ }
71
+ }
72
+
73
+ cpp.function %{
74
+ void set_output_name (TessBaseAPI* api, const char* name) {
75
+ api->SetOutputName(name);
76
+ }
77
+ }
78
+
79
+ cpp.function %{
80
+ bool set_variable (TessBaseAPI* api, const char* name, const char* value) {
81
+ return api->SetVariable(name, value);
82
+ }
83
+ }
84
+
85
+ cpp.function %{
86
+ bool has_int_variable (TessBaseAPI* api, const char* name) {
87
+ int tmp;
88
+
89
+ return api->GetIntVariable(name, &tmp);
90
+ }
91
+ }
92
+
93
+ cpp.function %{
94
+ bool has_bool_variable (TessBaseAPI* api, const char* name) {
95
+ bool tmp;
96
+
97
+ return api->GetBoolVariable(name, &tmp);
98
+ }
99
+ }
100
+
101
+ cpp.function %{
102
+ bool has_double_variable (TessBaseAPI* api, const char* name) {
103
+ double tmp;
104
+
105
+ return api->GetDoubleVariable(name, &tmp);
106
+ }
107
+ }
108
+
109
+ cpp.function %{
110
+ bool has_string_variable (TessBaseAPI* api, const char* name) {
111
+ return api->GetStringVariable(name) != NULL;
112
+ }
113
+ }
114
+
115
+ cpp.function %{
116
+ int get_int_variable (TessBaseAPI* api, const char* name) {
117
+ int result = 0;
118
+
119
+ api->GetIntVariable(name, &result);
120
+
121
+ return result;
122
+ }
123
+ }
124
+
125
+ cpp.function %{
126
+ bool get_bool_variable (TessBaseAPI* api, const char* name) {
127
+ bool result = false;
128
+
129
+ api->GetBoolVariable(name, &result);
130
+
131
+ return result;
132
+ }
133
+ }
134
+
135
+ cpp.function %{
136
+ double get_double_variable (TessBaseAPI* api, const char* name) {
137
+ double result = 0;
138
+
139
+ api->GetDoubleVariable(name, &result);
140
+
141
+ return result;
142
+ }
143
+ }
144
+
145
+ cpp.function %{
146
+ const char* get_string_variable (TessBaseAPI* api, const char* name) {
147
+ return api->GetStringVariable(name);
148
+ }
149
+ }, return: :string
150
+
151
+ cpp.function %{
152
+ int init (TessBaseAPI* api, const char* datapath, const char* language, OcrEngineMode oem) {
153
+ return api->Init(datapath, language, oem);
154
+ }
155
+ }
156
+
157
+ cpp.function %{
158
+ void read_config_file (TessBaseAPI* api, const char* filename, bool init_only) {
159
+ api->ReadConfigFile(filename, init_only);
160
+ }
161
+ }
162
+
163
+ cpp.function %{
164
+ void set_page_seg_mode (TessBaseAPI* api, PageSegMode mode) {
165
+ api->SetPageSegMode(mode);
166
+ }
167
+ }
168
+
169
+ cpp.function %{
170
+ PageSegMode get_page_seg_mode (TessBaseAPI* api) {
171
+ return api->GetPageSegMode();
172
+ }
173
+ }
174
+
175
+ cpp.function %{
176
+ void set_image (TessBaseAPI* api, const Pix* pix) {
177
+ api->SetImage(pix);
178
+ }
179
+ }
180
+
181
+ cpp.function %{
182
+ void set_rectangle (TessBaseAPI* api, int left, int top, int width, int height) {
183
+ api->SetRectangle(left, top, width, height);
184
+ }
185
+ }
186
+
187
+ cpp.function %{
188
+ ResultIterator* get_iterator (TessBaseAPI* api) {
189
+ return api->GetIterator();
190
+ }
191
+ }
192
+
193
+ cpp.function %{
194
+ char* get_utf8_text (TessBaseAPI* api) {
195
+ return api->GetUTF8Text();
196
+ }
197
+ }
198
+
199
+ cpp.function %{
200
+ char* get_box_text (TessBaseAPI* api, int page_number) {
201
+ return api->GetBoxText(page_number);
202
+ }
203
+ }
204
+
205
+ cpp.function %{
206
+ char* get_unlv_text (TessBaseAPI* api) {
207
+ return api->GetUNLVText();
208
+ }
209
+ }
210
+
211
+ cpp.function %{
212
+ int mean_text_conf (TessBaseAPI* api) {
213
+ return api->MeanTextConf();
214
+ }
215
+ }
216
+
217
+ cpp.function %{
218
+ int* all_word_confidences (TessBaseAPI* api) {
219
+ return api->AllWordConfidences();
220
+ }
221
+ }
222
+
223
+ cpp.function %{
224
+ void clear (TessBaseAPI* api) {
225
+ api->Clear();
226
+ }
227
+ }
228
+
229
+ cpp.function %{
230
+ void end (TessBaseAPI* api) {
231
+ api->End();
232
+ }
233
+ }
234
+ end
235
+ end
236
+
237
+ end; end