tesseract-ocr 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/tesseract/c.rb CHANGED
@@ -32,191 +32,6 @@ module C
32
32
  extend FFI::Inliner
33
33
 
34
34
  inline 'C++' do |cpp|
35
- # leptonica wrapping
36
- cpp.include 'leptonica/allheaders.h'
37
- cpp.libraries 'lept'
38
-
39
- cpp.function %{
40
- Pix* pix_read (const char* path) {
41
- return pixRead(path);
42
- }
43
- }
44
-
45
- cpp.function %{
46
- Pix* pix_read_fd (int fd) {
47
- return pixReadStream(fdopen(fd, "rb"), 0);
48
- }
49
- }
50
-
51
- cpp.function %{
52
- Pix* pix_read_mem (const l_uint8* data, size_t size) {
53
- return pixReadMem(data, size);
54
- }
55
- }
56
-
57
- cpp.function %{
58
- void pix_destroy (Pix* pix) {
59
- pixDestroy(&pix);
60
- }
61
- }
62
-
63
- cpp.function %{
64
- int32_t pix_get_width (Pix* pix) {
65
- return pixGetWidth(pix);
66
- }
67
- }
68
-
69
- cpp.function %{
70
- int32_t pix_get_height (Pix* pix) {
71
- return pixGetHeight(pix);
72
- }
73
- }
74
-
75
- # tesseract wrapping
76
- cpp.include 'tesseract/baseapi.h'
77
- cpp.libraries 'tesseract'
78
-
79
- cpp.raw 'using namespace tesseract;'
80
-
81
- cpp.eval {
82
- enum :OcrEngineMode, [
83
- :TESSERACT_ONLY, :CUBE_ONLY, :TESSERACT_CUBE_COMBINED, :DEFAULT
84
- ]
85
-
86
- enum :PageSegMode, [
87
- :OSD_ONLY, :AUTO_OSD,
88
- :AUTO_ONLY, :AUTO, :SINGLE_COLUMN, :SINGLE_BLOCK_VERT_TEXT,
89
- :SINGLE_BLOCK, :SINGLE_LINE, :SINGLE_WORD, :CIRCLE_WORD, :SINGLE_CHAR,
90
- :COUNT
91
- ]
92
- }
93
-
94
- cpp.function %{
95
- TessBaseAPI* create (void) {
96
- return new TessBaseAPI();
97
- }
98
- }
99
-
100
- cpp.function %{
101
- void destroy (TessBaseAPI* api) {
102
- delete api;
103
- }
104
- }
105
-
106
- cpp.function %{
107
- const char* version (TessBaseAPI* api) {
108
- return api->Version();
109
- }
110
- }, return: :string
111
-
112
- cpp.function %{
113
- void set_input_name (TessBaseAPI* api, const char* name) {
114
- api->SetInputName(name);
115
- }
116
- }
117
-
118
- cpp.function %{
119
- void set_output_name (TessBaseAPI* api, const char* name) {
120
- api->SetOutputName(name);
121
- }
122
- }
123
-
124
- cpp.function %{
125
- bool set_variable (TessBaseAPI* api, const char* name, const char* value) {
126
- return api->SetVariable(name, value);
127
- }
128
- }
129
-
130
- cpp.function %{
131
- bool has_int_variable (TessBaseAPI* api, const char* name) {
132
- int tmp;
133
-
134
- return api->GetIntVariable(name, &tmp);
135
- }
136
- }
137
-
138
- cpp.function %{
139
- bool has_bool_variable (TessBaseAPI* api, const char* name) {
140
- bool tmp;
141
-
142
- return api->GetBoolVariable(name, &tmp);
143
- }
144
- }
145
-
146
- cpp.function %{
147
- bool has_double_variable (TessBaseAPI* api, const char* name) {
148
- double tmp;
149
-
150
- return api->GetDoubleVariable(name, &tmp);
151
- }
152
- }
153
-
154
- cpp.function %{
155
- bool has_string_variable (TessBaseAPI* api, const char* name) {
156
- return api->GetStringVariable(name) != NULL;
157
- }
158
- }
159
-
160
- cpp.function %{
161
- int get_int_variable (TessBaseAPI* api, const char* name) {
162
- int result = 0;
163
-
164
- api->GetIntVariable(name, &result);
165
-
166
- return result;
167
- }
168
- }
169
-
170
- cpp.function %{
171
- bool get_bool_variable (TessBaseAPI* api, const char* name) {
172
- bool result = false;
173
-
174
- api->GetBoolVariable(name, &result);
175
-
176
- return result;
177
- }
178
- }
179
-
180
- cpp.function %{
181
- double get_double_variable (TessBaseAPI* api, const char* name) {
182
- double result = 0;
183
-
184
- api->GetDoubleVariable(name, &result);
185
-
186
- return result;
187
- }
188
- }
189
-
190
- cpp.function %{
191
- const char* get_string_variable (TessBaseAPI* api, const char* name) {
192
- return api->GetStringVariable(name);
193
- }
194
- }, return: :string
195
-
196
- cpp.function %{
197
- int init (TessBaseAPI* api, const char* datapath, const char* language, OcrEngineMode oem) {
198
- return api->Init(datapath, language, oem);
199
- }
200
- }
201
-
202
- cpp.function %{
203
- void read_config_file (TessBaseAPI* api, const char* filename, bool init_only) {
204
- api->ReadConfigFile(filename, init_only);
205
- }
206
- }
207
-
208
- cpp.function %{
209
- void set_page_seg_mode (TessBaseAPI* api, PageSegMode mode) {
210
- api->SetPageSegMode(mode);
211
- }
212
- }
213
-
214
- cpp.function %{
215
- PageSegMode get_page_seg_mode (TessBaseAPI* api) {
216
- return api->GetPageSegMode();
217
- }
218
- }
219
-
220
35
  cpp.function %{
221
36
  void free_string (char* pointer) {
222
37
  delete [] pointer;
@@ -228,97 +43,15 @@ module C
228
43
  delete [] pointer;
229
44
  }
230
45
  }
46
+ end
231
47
 
232
- cpp.function %{
233
- void set_image (TessBaseAPI* api, const Pix* pix) {
234
- api->SetImage(pix);
235
- }
236
- }
237
-
238
- cpp.function %{
239
- void set_rectangle (TessBaseAPI* api, int left, int top, int width, int height) {
240
- api->SetRectangle(left, top, width, height);
241
- }
242
- }
243
-
244
- cpp.function %{
245
- Pix* get_thresholded_image (TessBaseAPI* api) {
246
- return api->GetThresholdedImage();
247
- }
248
- }
249
-
250
- cpp.function %{
251
- Boxa* get_regions (TessBaseAPI* api, Pixa** pixa) {
252
- return api->GetRegions(pixa);
253
- }
254
- }
255
-
256
- cpp.function %{
257
- Boxa* get_textlines (TessBaseAPI* api, Pixa** pixa, int** blockids) {
258
- return api->GetTextlines(pixa, blockids);
259
- }
260
- }
261
-
262
- cpp.function %{
263
- Boxa* get_words (TessBaseAPI* api, Pixa** pixa) {
264
- return api->GetWords(pixa);
265
- }
266
- }
267
-
268
- cpp.function %{
269
- Boxa* get_connected_components (TessBaseAPI* api, Pixa** cc) {
270
- return api->GetConnectedComponents(cc);
271
- }
272
- }
273
-
274
- cpp.function %{
275
- char* get_utf8_text (TessBaseAPI* api) {
276
- return api->GetUTF8Text();
277
- }
278
- }
279
-
280
- cpp.function %{
281
- char* get_hocr_text (TessBaseAPI* api, int page_number) {
282
- return api->GetHOCRText(page_number);
283
- }
284
- }
285
-
286
- cpp.function %{
287
- char* get_box_text (TessBaseAPI* api, int page_number) {
288
- return api->GetBoxText(page_number);
289
- }
290
- }
291
-
292
- cpp.function %{
293
- char* get_unlv_text (TessBaseAPI* api) {
294
- return api->GetUNLVText();
295
- }
296
- }
297
-
298
- cpp.function %{
299
- int mean_text_conf (TessBaseAPI* api) {
300
- return api->MeanTextConf();
301
- }
302
- }
303
-
304
- cpp.function %{
305
- int* all_word_confidences (TessBaseAPI* api) {
306
- return api->AllWordConfidences();
307
- }
308
- }
309
-
310
- cpp.function %{
311
- void clear (TessBaseAPI* api) {
312
- api->Clear();
313
- }
314
- }
315
-
316
- cpp.function %{
317
- void end (TessBaseAPI* api) {
318
- api->End();
319
- }
320
- }
48
+ def self.for_enum (what)
49
+ what.is_a?(Integer) ? what : what.to_s.upcase.to_sym
321
50
  end
322
51
  end
323
52
 
324
53
  end
54
+
55
+ require 'tesseract/c/leptonica'
56
+ require 'tesseract/c/baseapi'
57
+ require 'tesseract/c/iterator'
@@ -0,0 +1,237 @@
1
+ #--
2
+ # Copyright 2011 meh. All rights reserved.
3
+ #
4
+ # Redistribution and use in source and binary forms, with or without modification, are
5
+ # permitted provided that the following conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above copyright notice, this list of
8
+ # conditions and the following disclaimer.
9
+ #
10
+ # THIS SOFTWARE IS PROVIDED BY meh ''AS IS'' AND ANY EXPRESS OR IMPLIED
11
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
12
+ # FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL meh OR
13
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
14
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
15
+ # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
16
+ # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
17
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
18
+ # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
+ #
20
+ # The views and conclusions contained in the software and documentation are those of the
21
+ # authors and should not be interpreted as representing official policies, either expressed
22
+ # or implied, of meh.
23
+ #++
24
+
25
+ module Tesseract; module C
26
+
27
+ module BaseAPI
28
+ extend FFI::Inliner
29
+
30
+ inline 'C++' do |cpp|
31
+ cpp.include 'tesseract/baseapi.h'
32
+ cpp.libraries 'tesseract'
33
+
34
+ cpp.raw 'using namespace tesseract;'
35
+
36
+ cpp.eval {
37
+ enum :OcrEngineMode, [
38
+ :TESSERACT_ONLY, :CUBE_ONLY, :TESSERACT_CUBE_COMBINED, :DEFAULT
39
+ ]
40
+
41
+ enum :PageSegMode, [
42
+ :OSD_ONLY, :AUTO_OSD,
43
+ :AUTO_ONLY, :AUTO, :SINGLE_COLUMN, :SINGLE_BLOCK_VERT_TEXT,
44
+ :SINGLE_BLOCK, :SINGLE_LINE, :SINGLE_WORD, :CIRCLE_WORD, :SINGLE_CHAR,
45
+ :COUNT
46
+ ]
47
+ }
48
+
49
+ cpp.function %{
50
+ TessBaseAPI* create (void) {
51
+ return new TessBaseAPI();
52
+ }
53
+ }
54
+
55
+ cpp.function %{
56
+ void destroy (TessBaseAPI* api) {
57
+ delete api;
58
+ }
59
+ }
60
+
61
+ cpp.function %{
62
+ const char* version (TessBaseAPI* api) {
63
+ return api->Version();
64
+ }
65
+ }, return: :string
66
+
67
+ cpp.function %{
68
+ void set_input_name (TessBaseAPI* api, const char* name) {
69
+ api->SetInputName(name);
70
+ }
71
+ }
72
+
73
+ cpp.function %{
74
+ void set_output_name (TessBaseAPI* api, const char* name) {
75
+ api->SetOutputName(name);
76
+ }
77
+ }
78
+
79
+ cpp.function %{
80
+ bool set_variable (TessBaseAPI* api, const char* name, const char* value) {
81
+ return api->SetVariable(name, value);
82
+ }
83
+ }
84
+
85
+ cpp.function %{
86
+ bool has_int_variable (TessBaseAPI* api, const char* name) {
87
+ int tmp;
88
+
89
+ return api->GetIntVariable(name, &tmp);
90
+ }
91
+ }
92
+
93
+ cpp.function %{
94
+ bool has_bool_variable (TessBaseAPI* api, const char* name) {
95
+ bool tmp;
96
+
97
+ return api->GetBoolVariable(name, &tmp);
98
+ }
99
+ }
100
+
101
+ cpp.function %{
102
+ bool has_double_variable (TessBaseAPI* api, const char* name) {
103
+ double tmp;
104
+
105
+ return api->GetDoubleVariable(name, &tmp);
106
+ }
107
+ }
108
+
109
+ cpp.function %{
110
+ bool has_string_variable (TessBaseAPI* api, const char* name) {
111
+ return api->GetStringVariable(name) != NULL;
112
+ }
113
+ }
114
+
115
+ cpp.function %{
116
+ int get_int_variable (TessBaseAPI* api, const char* name) {
117
+ int result = 0;
118
+
119
+ api->GetIntVariable(name, &result);
120
+
121
+ return result;
122
+ }
123
+ }
124
+
125
+ cpp.function %{
126
+ bool get_bool_variable (TessBaseAPI* api, const char* name) {
127
+ bool result = false;
128
+
129
+ api->GetBoolVariable(name, &result);
130
+
131
+ return result;
132
+ }
133
+ }
134
+
135
+ cpp.function %{
136
+ double get_double_variable (TessBaseAPI* api, const char* name) {
137
+ double result = 0;
138
+
139
+ api->GetDoubleVariable(name, &result);
140
+
141
+ return result;
142
+ }
143
+ }
144
+
145
+ cpp.function %{
146
+ const char* get_string_variable (TessBaseAPI* api, const char* name) {
147
+ return api->GetStringVariable(name);
148
+ }
149
+ }, return: :string
150
+
151
+ cpp.function %{
152
+ int init (TessBaseAPI* api, const char* datapath, const char* language, OcrEngineMode oem) {
153
+ return api->Init(datapath, language, oem);
154
+ }
155
+ }
156
+
157
+ cpp.function %{
158
+ void read_config_file (TessBaseAPI* api, const char* filename, bool init_only) {
159
+ api->ReadConfigFile(filename, init_only);
160
+ }
161
+ }
162
+
163
+ cpp.function %{
164
+ void set_page_seg_mode (TessBaseAPI* api, PageSegMode mode) {
165
+ api->SetPageSegMode(mode);
166
+ }
167
+ }
168
+
169
+ cpp.function %{
170
+ PageSegMode get_page_seg_mode (TessBaseAPI* api) {
171
+ return api->GetPageSegMode();
172
+ }
173
+ }
174
+
175
+ cpp.function %{
176
+ void set_image (TessBaseAPI* api, const Pix* pix) {
177
+ api->SetImage(pix);
178
+ }
179
+ }
180
+
181
+ cpp.function %{
182
+ void set_rectangle (TessBaseAPI* api, int left, int top, int width, int height) {
183
+ api->SetRectangle(left, top, width, height);
184
+ }
185
+ }
186
+
187
+ cpp.function %{
188
+ ResultIterator* get_iterator (TessBaseAPI* api) {
189
+ return api->GetIterator();
190
+ }
191
+ }
192
+
193
+ cpp.function %{
194
+ char* get_utf8_text (TessBaseAPI* api) {
195
+ return api->GetUTF8Text();
196
+ }
197
+ }
198
+
199
+ cpp.function %{
200
+ char* get_box_text (TessBaseAPI* api, int page_number) {
201
+ return api->GetBoxText(page_number);
202
+ }
203
+ }
204
+
205
+ cpp.function %{
206
+ char* get_unlv_text (TessBaseAPI* api) {
207
+ return api->GetUNLVText();
208
+ }
209
+ }
210
+
211
+ cpp.function %{
212
+ int mean_text_conf (TessBaseAPI* api) {
213
+ return api->MeanTextConf();
214
+ }
215
+ }
216
+
217
+ cpp.function %{
218
+ int* all_word_confidences (TessBaseAPI* api) {
219
+ return api->AllWordConfidences();
220
+ }
221
+ }
222
+
223
+ cpp.function %{
224
+ void clear (TessBaseAPI* api) {
225
+ api->Clear();
226
+ }
227
+ }
228
+
229
+ cpp.function %{
230
+ void end (TessBaseAPI* api) {
231
+ api->End();
232
+ }
233
+ }
234
+ end
235
+ end
236
+
237
+ end; end