@luii/node-tesseract-ocr 2.0.13 → 2.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +3 -3
- package/README.md +547 -153
- package/binding-options.js +4 -0
- package/dist/cjs/index.cjs +144 -18
- package/dist/cjs/index.d.ts +6 -859
- package/dist/cjs/types.d.ts +1272 -0
- package/dist/cjs/types.js +17 -0
- package/dist/cjs/utils.d.ts +1 -0
- package/dist/cjs/utils.js +38 -0
- package/dist/esm/index.d.ts +6 -859
- package/dist/esm/index.mjs +129 -14
- package/dist/esm/types.d.ts +1272 -0
- package/dist/esm/types.js +16 -0
- package/dist/esm/utils.d.ts +1 -0
- package/dist/esm/utils.js +25 -0
- package/package.json +15 -10
- package/prebuilds/node-tesseract-ocr-darwin-arm64/node-napi-v10.node +0 -0
- package/prebuilds/node-tesseract-ocr-linux-x64/node-napi-v10.node +0 -0
- package/src/commands.hpp +656 -88
- package/src/tesseract_wrapper.cpp +643 -187
- package/src/tesseract_wrapper.hpp +27 -4
- package/src/worker_thread.cpp +146 -2
- package/src/worker_thread.hpp +4 -1
package/src/commands.hpp
CHANGED
|
@@ -18,15 +18,21 @@
|
|
|
18
18
|
|
|
19
19
|
#include "monitor.hpp"
|
|
20
20
|
#include "utils.hpp"
|
|
21
|
+
#include <allheaders.h>
|
|
22
|
+
#include <atomic>
|
|
23
|
+
#include <cstddef>
|
|
24
|
+
#include <cstdint>
|
|
25
|
+
#include <exception>
|
|
21
26
|
#include <iostream>
|
|
22
27
|
#include <memory>
|
|
23
28
|
#include <napi.h>
|
|
24
29
|
#include <optional>
|
|
25
|
-
#include <
|
|
30
|
+
#include <ostream>
|
|
26
31
|
#include <string>
|
|
27
32
|
#include <tesseract/baseapi.h>
|
|
28
33
|
#include <tesseract/ocrclass.h>
|
|
29
34
|
#include <tesseract/publictypes.h>
|
|
35
|
+
#include <tesseract/renderer.h>
|
|
30
36
|
#include <unordered_map>
|
|
31
37
|
#include <variant>
|
|
32
38
|
#include <vector>
|
|
@@ -53,20 +59,27 @@ struct ResultString {
|
|
|
53
59
|
std::string value;
|
|
54
60
|
};
|
|
55
61
|
|
|
62
|
+
struct ResultBuffer {
|
|
63
|
+
std::vector<uint8_t> value;
|
|
64
|
+
};
|
|
65
|
+
|
|
56
66
|
using ObjectValue = std::variant<bool, int, double, float, std::string,
|
|
57
|
-
std::vector<std::string
|
|
67
|
+
std::vector<std::string>, std::vector<uint8_t>,
|
|
68
|
+
std::vector<int>>;
|
|
58
69
|
|
|
59
70
|
struct ResultObject {
|
|
60
71
|
std::unordered_map<std::string, ObjectValue> value;
|
|
61
72
|
};
|
|
62
73
|
|
|
74
|
+
using ArrayValue = std::variant<std::vector<int>, std::vector<std::string>>;
|
|
75
|
+
|
|
63
76
|
struct ResultArray {
|
|
64
|
-
|
|
77
|
+
ArrayValue value;
|
|
65
78
|
};
|
|
66
79
|
|
|
67
80
|
using Result =
|
|
68
81
|
std::variant<ResultVoid, ResultBool, ResultInt, ResultDouble, ResultFloat,
|
|
69
|
-
ResultString, ResultArray, ResultObject>;
|
|
82
|
+
ResultString, ResultArray, ResultBuffer, ResultObject>;
|
|
70
83
|
|
|
71
84
|
template <class... Ts> struct match : Ts... {
|
|
72
85
|
using Ts::operator()...;
|
|
@@ -74,6 +87,15 @@ template <class... Ts> struct match : Ts... {
|
|
|
74
87
|
|
|
75
88
|
template <class... Ts> match(Ts...) -> match<Ts...>;
|
|
76
89
|
|
|
90
|
+
template <typename T>
|
|
91
|
+
static Napi::Array VectorToNapiArray(Napi::Env env, const std::vector<T> &vec) {
|
|
92
|
+
Napi::Array arr = Napi::Array::New(env, vec.size());
|
|
93
|
+
for (size_t i = 0; i < vec.size(); ++i) {
|
|
94
|
+
arr.Set(static_cast<uint32_t>(i), vec[i]);
|
|
95
|
+
}
|
|
96
|
+
return arr;
|
|
97
|
+
}
|
|
98
|
+
|
|
77
99
|
static Napi::Value ToNapiValue(Napi::Env env, const ObjectValue &v) {
|
|
78
100
|
return std::visit(
|
|
79
101
|
match{
|
|
@@ -81,15 +103,18 @@ static Napi::Value ToNapiValue(Napi::Env env, const ObjectValue &v) {
|
|
|
81
103
|
[&](int i) -> Napi::Value { return Napi::Number::New(env, i); },
|
|
82
104
|
[&](double d) -> Napi::Value { return Napi::Number::New(env, d); },
|
|
83
105
|
[&](float f) -> Napi::Value { return Napi::Number::New(env, f); },
|
|
84
|
-
[&](const std::string &s) -> Napi::Value {
|
|
106
|
+
[&](const std::string &s) -> Napi::Value { // String
|
|
85
107
|
return Napi::String::New(env, s);
|
|
86
108
|
},
|
|
87
|
-
[&](const std::vector<
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
109
|
+
[&](const std::vector<uint8_t> &vec) -> Napi::Value { // Buffer
|
|
110
|
+
return Napi::Buffer<uint8_t>::Copy(env, vec.data(), vec.size());
|
|
111
|
+
},
|
|
112
|
+
[&](const std::vector<int> &vec) -> Napi::Value {
|
|
113
|
+
return VectorToNapiArray(env, vec);
|
|
114
|
+
},
|
|
115
|
+
[&](const std::vector<std::string> &vec)
|
|
116
|
+
-> Napi::Value { // string array
|
|
117
|
+
return VectorToNapiArray(env, vec);
|
|
93
118
|
},
|
|
94
119
|
},
|
|
95
120
|
v);
|
|
@@ -113,19 +138,16 @@ inline Napi::Value MatchResult(Napi::Env env, const Result &r) {
|
|
|
113
138
|
[&](const ResultString &v) -> Napi::Value {
|
|
114
139
|
return Napi::String::New(env, v.value);
|
|
115
140
|
},
|
|
141
|
+
[&](const ResultBuffer &v) -> Napi::Value {
|
|
142
|
+
return Napi::Buffer<uint8_t>::Copy(env, v.value.data(),
|
|
143
|
+
v.value.size());
|
|
144
|
+
},
|
|
116
145
|
[&](const ResultArray &v) -> Napi::Value {
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
Napi::Array array = Napi::Array::New(env, v.value.size());
|
|
125
|
-
for (size_t i = 0; i < v.value.size(); i++) {
|
|
126
|
-
array.Set(static_cast<uint32_t>(i), v.value[i]);
|
|
127
|
-
}
|
|
128
|
-
return array;
|
|
146
|
+
return std::visit(
|
|
147
|
+
[&](const auto &vec) -> Napi::Value {
|
|
148
|
+
return VectorToNapiArray(env, vec);
|
|
149
|
+
},
|
|
150
|
+
v.value);
|
|
129
151
|
},
|
|
130
152
|
[&](const ResultObject &v) -> Napi::Value {
|
|
131
153
|
Napi::Object obj = Napi::Object::New(env);
|
|
@@ -138,6 +160,179 @@ inline Napi::Value MatchResult(Napi::Env env, const Result &r) {
|
|
|
138
160
|
r);
|
|
139
161
|
}
|
|
140
162
|
|
|
163
|
+
inline void RequireInitialized(const std::atomic<bool> &initialized,
|
|
164
|
+
const char *method) {
|
|
165
|
+
if (!initialized.load(std::memory_order_acquire)) {
|
|
166
|
+
throw_runtime("{}: call init(...) first", method);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
struct CommandVersion {
|
|
171
|
+
Result invoke(tesseract::TessBaseAPI &api) const {
|
|
172
|
+
return ResultString{api.Version()};
|
|
173
|
+
}
|
|
174
|
+
};
|
|
175
|
+
|
|
176
|
+
struct CommandIsInitialized {
|
|
177
|
+
Result invoke(tesseract::TessBaseAPI &,
|
|
178
|
+
const std::atomic<bool> &initialized) const {
|
|
179
|
+
return ResultBool{initialized.load(std::memory_order_acquire)};
|
|
180
|
+
}
|
|
181
|
+
};
|
|
182
|
+
|
|
183
|
+
struct CommandSetInputName {
|
|
184
|
+
std::string input_name;
|
|
185
|
+
Result invoke(tesseract::TessBaseAPI &api) const {
|
|
186
|
+
api.SetInputName(input_name.c_str());
|
|
187
|
+
return ResultVoid{};
|
|
188
|
+
}
|
|
189
|
+
};
|
|
190
|
+
|
|
191
|
+
struct CommandGetInputName {
|
|
192
|
+
Result invoke(tesseract::TessBaseAPI &api) const {
|
|
193
|
+
return ResultString{api.GetInputName()};
|
|
194
|
+
}
|
|
195
|
+
};
|
|
196
|
+
|
|
197
|
+
struct CommandSetInputImage {
|
|
198
|
+
std::vector<uint8_t> bytes;
|
|
199
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
200
|
+
const std::atomic<bool> &initialized) const {
|
|
201
|
+
RequireInitialized(initialized, "setInputImage");
|
|
202
|
+
if (bytes.size() == 0) {
|
|
203
|
+
throw_runtime("setInputImage: input buffer is empty");
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
Pix *pix = pixReadMem(bytes.data(), bytes.size());
|
|
207
|
+
if (pix == nullptr) {
|
|
208
|
+
throw_runtime("setInputImage: failed to decode image buffer");
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// TessBaseAPI::SetInputImage takes ownership of pix.
|
|
212
|
+
api.SetInputImage(pix);
|
|
213
|
+
return ResultVoid{};
|
|
214
|
+
}
|
|
215
|
+
};
|
|
216
|
+
|
|
217
|
+
struct CommandGetInputImage {
|
|
218
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
219
|
+
const std::atomic<bool> &initialized) const {
|
|
220
|
+
RequireInitialized(initialized, "getInputImage");
|
|
221
|
+
Pix *source = api.GetInputImage();
|
|
222
|
+
|
|
223
|
+
std::cout << source << std::endl;
|
|
224
|
+
|
|
225
|
+
if (source == nullptr) {
|
|
226
|
+
throw_runtime("getInputImage: TessBaseAPI::GetInputImage returned null");
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// GetInputImage has no caller-ownership contract; work on a clone.
|
|
230
|
+
Pix *pix = pixClone(source);
|
|
231
|
+
if (pix == nullptr) {
|
|
232
|
+
throw_runtime("getInputImage: failed to clone source image");
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
l_uint32 *data = pixGetData(pix);
|
|
236
|
+
l_int32 wpl = pixGetWpl(pix);
|
|
237
|
+
l_int32 h = pixGetHeight(pix);
|
|
238
|
+
|
|
239
|
+
size_t bytecount = wpl * 4 * h;
|
|
240
|
+
const uint8_t *start = reinterpret_cast<const uint8_t *>(data);
|
|
241
|
+
std::vector<uint8_t> buffer(start, start + bytecount);
|
|
242
|
+
pixDestroy(&pix);
|
|
243
|
+
|
|
244
|
+
return ResultBuffer{buffer};
|
|
245
|
+
}
|
|
246
|
+
};
|
|
247
|
+
|
|
248
|
+
struct CommandGetSourceYResolution {
|
|
249
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
250
|
+
const std::atomic<bool> &initialized) const {
|
|
251
|
+
RequireInitialized(initialized, "getSourceYResolution");
|
|
252
|
+
int source_y_resolution = api.GetSourceYResolution();
|
|
253
|
+
return ResultInt{source_y_resolution};
|
|
254
|
+
}
|
|
255
|
+
};
|
|
256
|
+
|
|
257
|
+
struct CommandGetDataPath {
|
|
258
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
259
|
+
const std::atomic<bool> &initialized) const {
|
|
260
|
+
RequireInitialized(initialized, "getDataPath");
|
|
261
|
+
const char *data_path = api.GetDatapath();
|
|
262
|
+
|
|
263
|
+
if (data_path == nullptr) {
|
|
264
|
+
throw_runtime("getDataPath: TessBaseAPI::GetDatapath returned null");
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
return ResultString{data_path};
|
|
268
|
+
}
|
|
269
|
+
};
|
|
270
|
+
|
|
271
|
+
struct CommandSetOutputName {
|
|
272
|
+
std::string output_name;
|
|
273
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
274
|
+
const std::atomic<bool> &initialized) const {
|
|
275
|
+
RequireInitialized(initialized, "setOutputName");
|
|
276
|
+
if (output_name.empty()) {
|
|
277
|
+
throw_runtime("setOutputName: output name is empty");
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
api.SetOutputName(output_name.c_str());
|
|
281
|
+
return ResultVoid{};
|
|
282
|
+
}
|
|
283
|
+
};
|
|
284
|
+
|
|
285
|
+
struct CommandClearPersistentCache {
|
|
286
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
287
|
+
const std::atomic<bool> &initialized) const {
|
|
288
|
+
RequireInitialized(initialized, "clearPersistentCache");
|
|
289
|
+
api.ClearPersistentCache();
|
|
290
|
+
return ResultVoid{};
|
|
291
|
+
}
|
|
292
|
+
};
|
|
293
|
+
|
|
294
|
+
struct CommandClearAdaptiveClassifier {
|
|
295
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
296
|
+
const std::atomic<bool> &initialized) const {
|
|
297
|
+
RequireInitialized(initialized, "clearAdaptiveClassifier");
|
|
298
|
+
api.ClearAdaptiveClassifier();
|
|
299
|
+
return ResultVoid{};
|
|
300
|
+
}
|
|
301
|
+
};
|
|
302
|
+
|
|
303
|
+
struct CommandGetThresholdedImage {
|
|
304
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
305
|
+
const std::atomic<bool> &initialized) const {
|
|
306
|
+
RequireInitialized(initialized, "getThresholdedImage");
|
|
307
|
+
Pix *pix = api.GetThresholdedImage();
|
|
308
|
+
|
|
309
|
+
if (pix == nullptr) {
|
|
310
|
+
throw_runtime("getThresholdedImage: TessBaseAPI::GetThresholdedImage "
|
|
311
|
+
"returned null");
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
l_uint32 *data = pixGetData(pix);
|
|
315
|
+
l_int32 wpl = pixGetWpl(pix);
|
|
316
|
+
l_int32 h = pixGetHeight(pix);
|
|
317
|
+
|
|
318
|
+
size_t bytecount = wpl * 4 * h;
|
|
319
|
+
const uint8_t *start = reinterpret_cast<const uint8_t *>(data);
|
|
320
|
+
std::vector<uint8_t> buffer(start, start + bytecount);
|
|
321
|
+
pixDestroy(&pix);
|
|
322
|
+
|
|
323
|
+
return ResultBuffer{buffer};
|
|
324
|
+
}
|
|
325
|
+
};
|
|
326
|
+
|
|
327
|
+
struct CommandGetThresholdedImageScaleFactor {
|
|
328
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
329
|
+
const std::atomic<bool> &initialized) const {
|
|
330
|
+
RequireInitialized(initialized, "getThresholdedImageScaleFactor");
|
|
331
|
+
int scale_factor = api.GetThresholdedImageScaleFactor();
|
|
332
|
+
return ResultInt{scale_factor};
|
|
333
|
+
}
|
|
334
|
+
};
|
|
335
|
+
|
|
141
336
|
struct CommandInit {
|
|
142
337
|
std::string data_path, language;
|
|
143
338
|
tesseract::OcrEngineMode oem{tesseract::OEM_DEFAULT};
|
|
@@ -147,15 +342,17 @@ struct CommandInit {
|
|
|
147
342
|
std::vector<std::string> vars_values;
|
|
148
343
|
bool set_only_non_debug_params{false};
|
|
149
344
|
|
|
150
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
345
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
346
|
+
std::atomic<bool> &initialized) const {
|
|
151
347
|
const std::vector<std::string> *vv = vars_vec.empty() ? nullptr : &vars_vec;
|
|
152
348
|
const std::vector<std::string> *vval =
|
|
153
349
|
vars_values.empty() ? nullptr : &vars_values;
|
|
154
350
|
|
|
155
351
|
if ((vv == nullptr) != (vval == nullptr) ||
|
|
156
352
|
(vv && vv->size() != vval->size())) {
|
|
157
|
-
|
|
158
|
-
"vars_vec and vars_values must both be
|
|
353
|
+
throw_runtime(
|
|
354
|
+
"init: vars_vec and vars_values must either both be empty or have "
|
|
355
|
+
"the same length");
|
|
159
356
|
}
|
|
160
357
|
|
|
161
358
|
if (api.Init(data_path.empty() ? nullptr : data_path.c_str(),
|
|
@@ -164,15 +361,18 @@ struct CommandInit {
|
|
|
164
361
|
: const_cast<char **>(configs.data()),
|
|
165
362
|
static_cast<int>(configs.size()), vv, vval,
|
|
166
363
|
set_only_non_debug_params) != 0) {
|
|
167
|
-
|
|
364
|
+
throw_runtime("init: TessBaseAPI::Init returned non-zero status");
|
|
168
365
|
}
|
|
169
366
|
|
|
367
|
+
initialized.store(true, std::memory_order_release);
|
|
170
368
|
return ResultVoid{};
|
|
171
369
|
}
|
|
172
370
|
};
|
|
173
371
|
|
|
174
372
|
struct CommandInitForAnalysePage {
|
|
175
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
373
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
374
|
+
const std::atomic<bool> &initialized) const {
|
|
375
|
+
RequireInitialized(initialized, "initForAnalysePage");
|
|
176
376
|
api.InitForAnalysePage();
|
|
177
377
|
return ResultVoid{};
|
|
178
378
|
}
|
|
@@ -180,13 +380,15 @@ struct CommandInitForAnalysePage {
|
|
|
180
380
|
|
|
181
381
|
struct CommandAnalyseLayout {
|
|
182
382
|
bool merge_similar_words = false;
|
|
183
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
383
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
384
|
+
const std::atomic<bool> &initialized) const {
|
|
385
|
+
RequireInitialized(initialized, "analyseLayout");
|
|
184
386
|
|
|
185
387
|
tesseract::PageIterator *p_iter = api.AnalyseLayout(merge_similar_words);
|
|
186
388
|
|
|
187
389
|
// returns nullptr on error or empty page
|
|
188
390
|
if (p_iter == nullptr) {
|
|
189
|
-
|
|
391
|
+
throw_runtime("analyseLayout: TessBaseAPI::AnalyseLayout returned null");
|
|
190
392
|
}
|
|
191
393
|
|
|
192
394
|
// Convert PageIterator to a feasible object here
|
|
@@ -195,21 +397,242 @@ struct CommandAnalyseLayout {
|
|
|
195
397
|
}
|
|
196
398
|
};
|
|
197
399
|
|
|
400
|
+
struct EncodedImageBuffer {
|
|
401
|
+
std::vector<uint8_t> bytes;
|
|
402
|
+
};
|
|
403
|
+
|
|
404
|
+
struct ProcessPagesSession {
|
|
405
|
+
std::unique_ptr<tesseract::TessPDFRenderer> renderer;
|
|
406
|
+
std::string output_base;
|
|
407
|
+
int timeout_millisec{0};
|
|
408
|
+
bool textonly{false};
|
|
409
|
+
int next_page_index{0};
|
|
410
|
+
};
|
|
411
|
+
|
|
412
|
+
struct CommandBeginProcessPages {
|
|
413
|
+
std::string output_base;
|
|
414
|
+
std::string title;
|
|
415
|
+
int timeout_millisec{0}; // 0 = unlimited timeout
|
|
416
|
+
bool textonly{false};
|
|
417
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
418
|
+
std::optional<ProcessPagesSession> &session,
|
|
419
|
+
const std::atomic<bool> &initialized) const {
|
|
420
|
+
RequireInitialized(initialized, "beginProcessPages");
|
|
421
|
+
if (session.has_value()) {
|
|
422
|
+
throw_runtime(
|
|
423
|
+
"beginProcessPages called while a session is already active");
|
|
424
|
+
}
|
|
425
|
+
if (title.empty()) {
|
|
426
|
+
throw_runtime("beginProcessPages: title cannot be empty");
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
const char *input_name = api.GetInputName();
|
|
430
|
+
std::string effective_output_base = output_base;
|
|
431
|
+
if (effective_output_base.empty()) {
|
|
432
|
+
if (input_name == nullptr || *input_name == '\0') {
|
|
433
|
+
throw_runtime("beginProcessPages: output_base is empty and "
|
|
434
|
+
"TessBaseAPI::GetInputName() returned null/empty");
|
|
435
|
+
}
|
|
436
|
+
effective_output_base = input_name;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
auto renderer = std::make_unique<tesseract::TessPDFRenderer>(
|
|
440
|
+
effective_output_base.c_str(), api.GetDatapath(), textonly);
|
|
441
|
+
if (!renderer->happy()) {
|
|
442
|
+
throw_runtime("beginProcessPages: renderer is not healthy");
|
|
443
|
+
}
|
|
444
|
+
if (!renderer->BeginDocument(title.c_str())) {
|
|
445
|
+
throw_runtime("beginProcessPages: could not begin document");
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
session.emplace();
|
|
449
|
+
session->renderer = std::move(renderer);
|
|
450
|
+
session->output_base = std::move(effective_output_base);
|
|
451
|
+
session->timeout_millisec = timeout_millisec;
|
|
452
|
+
session->textonly = textonly;
|
|
453
|
+
session->next_page_index = 0;
|
|
454
|
+
return ResultVoid{};
|
|
455
|
+
}
|
|
456
|
+
};
|
|
457
|
+
|
|
458
|
+
struct CommandAddProcessPage {
|
|
459
|
+
EncodedImageBuffer page;
|
|
460
|
+
std::string filename;
|
|
461
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
462
|
+
std::optional<ProcessPagesSession> &session,
|
|
463
|
+
const std::atomic<bool> &initialized) const {
|
|
464
|
+
RequireInitialized(initialized, "addProcessPage");
|
|
465
|
+
if (!session.has_value()) {
|
|
466
|
+
throw_runtime("addProcessPage: called without an active session");
|
|
467
|
+
}
|
|
468
|
+
if (!session->renderer->happy()) {
|
|
469
|
+
throw_runtime("addProcessPage: renderer is not healthy");
|
|
470
|
+
}
|
|
471
|
+
if (page.bytes.empty()) {
|
|
472
|
+
throw_runtime("addProcessPage: buffer is empty");
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
Pix *pix = pixReadMem(page.bytes.data(), page.bytes.size());
|
|
476
|
+
if (pix == nullptr) {
|
|
477
|
+
throw_runtime("addProcessPage: failed to decode image buffer");
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
if (pixGetColormap(pix) != nullptr) {
|
|
481
|
+
Pix *no_cmap = pixRemoveColormap(pix, REMOVE_CMAP_BASED_ON_SRC);
|
|
482
|
+
if (no_cmap == nullptr) {
|
|
483
|
+
pixDestroy(&pix);
|
|
484
|
+
throw_runtime("addProcessPage: failed to remove image colormap");
|
|
485
|
+
}
|
|
486
|
+
if (no_cmap != pix) {
|
|
487
|
+
pixDestroy(&pix);
|
|
488
|
+
pix = no_cmap;
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
if (pixGetSpp(pix) == 4) {
|
|
493
|
+
Pix *no_alpha = pixRemoveAlpha(pix);
|
|
494
|
+
if (no_alpha == nullptr) {
|
|
495
|
+
pixDestroy(&pix);
|
|
496
|
+
throw_runtime("addProcessPage: failed to remove alpha channel");
|
|
497
|
+
}
|
|
498
|
+
if (no_alpha != pix) {
|
|
499
|
+
pixDestroy(&pix);
|
|
500
|
+
pix = no_alpha;
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
const int depth = pixGetDepth(pix);
|
|
505
|
+
if (depth > 0 && depth < 8) {
|
|
506
|
+
Pix *normalized = pixConvertTo8(pix, false);
|
|
507
|
+
if (normalized == nullptr) {
|
|
508
|
+
pixDestroy(&pix);
|
|
509
|
+
throw_runtime(
|
|
510
|
+
"addProcessPage: failed to normalize low-bit-depth image");
|
|
511
|
+
}
|
|
512
|
+
if (normalized != pix) {
|
|
513
|
+
pixDestroy(&pix);
|
|
514
|
+
pix = normalized;
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
const int x_res = pixGetXRes(pix);
|
|
519
|
+
const int y_res = pixGetYRes(pix);
|
|
520
|
+
if (x_res <= 0 || y_res <= 0) {
|
|
521
|
+
pixSetResolution(pix, 300, 300);
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
const char *effective_filename =
|
|
525
|
+
filename.empty() ? nullptr : filename.c_str();
|
|
526
|
+
|
|
527
|
+
bool success = api.ProcessPage(
|
|
528
|
+
pix, session->next_page_index, effective_filename, nullptr,
|
|
529
|
+
session->timeout_millisec, session->renderer.get());
|
|
530
|
+
pixDestroy(&pix);
|
|
531
|
+
|
|
532
|
+
if (!success) {
|
|
533
|
+
throw_runtime("addProcessPage: ProcessPage failed at page {}",
|
|
534
|
+
session->next_page_index);
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
session->next_page_index++;
|
|
538
|
+
return ResultVoid{};
|
|
539
|
+
}
|
|
540
|
+
};
|
|
541
|
+
|
|
542
|
+
struct CommandFinishProcessPages {
|
|
543
|
+
Result invoke(tesseract::TessBaseAPI &,
|
|
544
|
+
std::optional<ProcessPagesSession> &session,
|
|
545
|
+
const std::atomic<bool> &initialized) const {
|
|
546
|
+
RequireInitialized(initialized, "finishProcessPages");
|
|
547
|
+
if (!session.has_value()) {
|
|
548
|
+
throw_runtime("finishProcessPages: called without an active session");
|
|
549
|
+
}
|
|
550
|
+
if (!session->renderer->happy()) {
|
|
551
|
+
throw_runtime("finishProcessPages: renderer is not healthy");
|
|
552
|
+
}
|
|
553
|
+
if (!session->renderer->EndDocument()) {
|
|
554
|
+
throw_runtime("finishProcessPages: could not finalize document");
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
std::string output_filepath = session->output_base + ".pdf";
|
|
558
|
+
session.reset();
|
|
559
|
+
return ResultString{std::move(output_filepath)};
|
|
560
|
+
}
|
|
561
|
+
};
|
|
562
|
+
|
|
563
|
+
struct CommandAbortProcessPages {
|
|
564
|
+
std::string reason;
|
|
565
|
+
Result invoke(tesseract::TessBaseAPI &,
|
|
566
|
+
std::optional<ProcessPagesSession> &session) const {
|
|
567
|
+
session.reset();
|
|
568
|
+
return ResultVoid{};
|
|
569
|
+
}
|
|
570
|
+
};
|
|
571
|
+
|
|
572
|
+
struct CommandGetProcessPagesStatus {
|
|
573
|
+
Result invoke(tesseract::TessBaseAPI &,
|
|
574
|
+
std::optional<ProcessPagesSession> &session) const {
|
|
575
|
+
if (!session.has_value()) {
|
|
576
|
+
return ResultObject{{
|
|
577
|
+
{"active", false},
|
|
578
|
+
{"healthy", false},
|
|
579
|
+
{"processedPages", 0},
|
|
580
|
+
{"nextPageIndex", 0},
|
|
581
|
+
{"outputBase", std::string{}},
|
|
582
|
+
{"timeoutMillisec", 0},
|
|
583
|
+
{"textonly", false},
|
|
584
|
+
}};
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
return ResultObject{{
|
|
588
|
+
{"active", true},
|
|
589
|
+
{"healthy", session->renderer->happy()},
|
|
590
|
+
{"processedPages", session->next_page_index},
|
|
591
|
+
{"nextPageIndex", session->next_page_index},
|
|
592
|
+
{"outputBase", session->output_base},
|
|
593
|
+
{"timeoutMillisec", session->timeout_millisec},
|
|
594
|
+
{"textonly", session->textonly},
|
|
595
|
+
}};
|
|
596
|
+
}
|
|
597
|
+
};
|
|
598
|
+
|
|
599
|
+
struct CommandSetDebugVariable {
|
|
600
|
+
std::string name, value;
|
|
601
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
602
|
+
const std::atomic<bool> &initialized) const {
|
|
603
|
+
RequireInitialized(initialized, "setDebugVariable");
|
|
604
|
+
if (name.empty()) {
|
|
605
|
+
throw_runtime("setDebugVariable: variable name is empty");
|
|
606
|
+
} else if (value.empty()) {
|
|
607
|
+
throw_runtime("setDebugVariable: variable value is empty");
|
|
608
|
+
}
|
|
609
|
+
return ResultBool{api.SetDebugVariable(name.c_str(), value.c_str())};
|
|
610
|
+
}
|
|
611
|
+
};
|
|
612
|
+
|
|
198
613
|
struct CommandSetVariable {
|
|
199
614
|
std::string name, value;
|
|
200
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
615
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
616
|
+
const std::atomic<bool> &initialized) const {
|
|
617
|
+
RequireInitialized(initialized, "setVariable");
|
|
618
|
+
if (name.empty()) {
|
|
619
|
+
throw_runtime("setVariable: variable name is empty");
|
|
620
|
+
} else if (value.empty()) {
|
|
621
|
+
throw_runtime("setVariable: variable value is empty");
|
|
622
|
+
}
|
|
201
623
|
return ResultBool{api.SetVariable(name.c_str(), value.c_str())};
|
|
202
624
|
}
|
|
203
625
|
};
|
|
204
626
|
|
|
205
627
|
struct CommandGetIntVariable {
|
|
206
628
|
std::string name;
|
|
207
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
629
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
630
|
+
const std::atomic<bool> &initialized) const {
|
|
631
|
+
RequireInitialized(initialized, "getIntVariable");
|
|
208
632
|
int value;
|
|
209
633
|
if (!api.GetIntVariable(name.c_str(), &value)) {
|
|
210
|
-
throw_runtime(
|
|
211
|
-
|
|
212
|
-
name.c_str());
|
|
634
|
+
throw_runtime("getIntVariable: variable '{}' was not found",
|
|
635
|
+
name.c_str());
|
|
213
636
|
}
|
|
214
637
|
|
|
215
638
|
return ResultInt{value};
|
|
@@ -218,11 +641,12 @@ struct CommandGetIntVariable {
|
|
|
218
641
|
|
|
219
642
|
struct CommandGetBoolVariable {
|
|
220
643
|
std::string name;
|
|
221
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
644
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
645
|
+
const std::atomic<bool> &initialized) const {
|
|
646
|
+
RequireInitialized(initialized, "getBoolVariable");
|
|
222
647
|
bool value;
|
|
223
648
|
if (!api.GetBoolVariable(name.c_str(), &value)) {
|
|
224
|
-
throw_runtime("
|
|
225
|
-
"was not found",
|
|
649
|
+
throw_runtime("getBoolVariable: variable '{}' was not found",
|
|
226
650
|
name.c_str());
|
|
227
651
|
}
|
|
228
652
|
return ResultBool{value};
|
|
@@ -231,11 +655,12 @@ struct CommandGetBoolVariable {
|
|
|
231
655
|
|
|
232
656
|
struct CommandGetDoubleVariable {
|
|
233
657
|
std::string name;
|
|
234
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
658
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
659
|
+
const std::atomic<bool> &initialized) const {
|
|
660
|
+
RequireInitialized(initialized, "getDoubleVariable");
|
|
235
661
|
double value;
|
|
236
662
|
if (!api.GetDoubleVariable(name.c_str(), &value)) {
|
|
237
|
-
throw_runtime("
|
|
238
|
-
"was not found",
|
|
663
|
+
throw_runtime("getDoubleVariable: variable '{}' was not found",
|
|
239
664
|
name.c_str());
|
|
240
665
|
}
|
|
241
666
|
return ResultDouble{value};
|
|
@@ -244,30 +669,27 @@ struct CommandGetDoubleVariable {
|
|
|
244
669
|
|
|
245
670
|
struct CommandGetStringVariable {
|
|
246
671
|
std::string name;
|
|
247
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
672
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
673
|
+
const std::atomic<bool> &initialized) const {
|
|
674
|
+
RequireInitialized(initialized, "getStringVariable");
|
|
248
675
|
auto value = api.GetStringVariable(name.c_str());
|
|
249
676
|
if (value == nullptr) {
|
|
250
|
-
throw_runtime("
|
|
251
|
-
"was not found",
|
|
677
|
+
throw_runtime("getStringVariable: variable '{}' was not found",
|
|
252
678
|
name.c_str());
|
|
253
679
|
}
|
|
254
680
|
return ResultString{value};
|
|
255
681
|
}
|
|
256
682
|
};
|
|
257
683
|
|
|
258
|
-
// struct CommandPrintVariables {
|
|
259
|
-
// Result invoke(tesseract::TessBaseAPI &api) const {
|
|
260
|
-
// api.PrintVariables(FILE *fp);
|
|
261
|
-
// }
|
|
262
|
-
// };
|
|
263
|
-
|
|
264
684
|
struct CommandSetImage {
|
|
265
685
|
std::vector<uint8_t> bytes;
|
|
266
686
|
int width = 0;
|
|
267
687
|
int height = 0;
|
|
268
688
|
int bytes_per_pixel = 0; // bpp/8
|
|
269
689
|
int bytes_per_line = 0;
|
|
270
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
690
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
691
|
+
const std::atomic<bool> &initialized) const {
|
|
692
|
+
RequireInitialized(initialized, "setImage");
|
|
271
693
|
api.SetImage(bytes.data(), width, height, bytes_per_pixel, bytes_per_line);
|
|
272
694
|
return ResultVoid{};
|
|
273
695
|
}
|
|
@@ -275,12 +697,14 @@ struct CommandSetImage {
|
|
|
275
697
|
|
|
276
698
|
struct CommandSetPageMode {
|
|
277
699
|
tesseract::PageSegMode psm;
|
|
278
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
700
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
701
|
+
const std::atomic<bool> &initialized) const {
|
|
702
|
+
RequireInitialized(initialized, "setPageMode");
|
|
279
703
|
if (psm < 0 || psm >= tesseract::PageSegMode::PSM_COUNT) {
|
|
280
704
|
|
|
281
|
-
throw_runtime(
|
|
282
|
-
|
|
283
|
-
|
|
705
|
+
throw_runtime("setPageMode: page segmentation mode is out of range; "
|
|
706
|
+
"received {}",
|
|
707
|
+
static_cast<int>(psm));
|
|
284
708
|
}
|
|
285
709
|
api.SetPageSegMode(psm);
|
|
286
710
|
return ResultVoid{};
|
|
@@ -289,7 +713,9 @@ struct CommandSetPageMode {
|
|
|
289
713
|
|
|
290
714
|
struct CommandSetRectangle {
|
|
291
715
|
int left, top, width, height;
|
|
292
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
716
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
717
|
+
const std::atomic<bool> &initialized) const {
|
|
718
|
+
RequireInitialized(initialized, "setRectangle");
|
|
293
719
|
api.SetRectangle(left, top, width, height);
|
|
294
720
|
return ResultVoid{};
|
|
295
721
|
}
|
|
@@ -297,7 +723,9 @@ struct CommandSetRectangle {
|
|
|
297
723
|
|
|
298
724
|
struct CommandSetSourceResolution {
|
|
299
725
|
int ppi;
|
|
300
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
726
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
727
|
+
const std::atomic<bool> &initialized) const {
|
|
728
|
+
RequireInitialized(initialized, "setSourceResolution");
|
|
301
729
|
api.SetSourceResolution(ppi);
|
|
302
730
|
return ResultVoid{};
|
|
303
731
|
}
|
|
@@ -305,11 +733,14 @@ struct CommandSetSourceResolution {
|
|
|
305
733
|
|
|
306
734
|
struct CommandRecognize {
|
|
307
735
|
std::shared_ptr<MonitorContext> monitor_context;
|
|
308
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
736
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
737
|
+
const std::atomic<bool> &initialized) const {
|
|
738
|
+
RequireInitialized(initialized, "recognize");
|
|
309
739
|
MonitorHandle handle{monitor_context};
|
|
310
740
|
auto *monitor = monitor_context ? &handle.monitor : nullptr;
|
|
311
741
|
if (api.Recognize(monitor) != 0) {
|
|
312
|
-
|
|
742
|
+
throw_runtime(
|
|
743
|
+
"recognize: TessBaseAPI::Recognize returned non-zero status");
|
|
313
744
|
}
|
|
314
745
|
return ResultVoid{};
|
|
315
746
|
}
|
|
@@ -322,7 +753,9 @@ struct CommandRecognize {
|
|
|
322
753
|
// };
|
|
323
754
|
|
|
324
755
|
struct CommandDetectOrientationScript {
|
|
325
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
756
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
757
|
+
const std::atomic<bool> &initialized) const {
|
|
758
|
+
RequireInitialized(initialized, "detectOrientationScript");
|
|
326
759
|
int orient_deg;
|
|
327
760
|
float orient_conf;
|
|
328
761
|
const char *script_name;
|
|
@@ -330,8 +763,9 @@ struct CommandDetectOrientationScript {
|
|
|
330
763
|
|
|
331
764
|
if (!api.DetectOrientationScript(&orient_deg, &orient_conf, &script_name,
|
|
332
765
|
&script_conf)) {
|
|
333
|
-
|
|
334
|
-
"
|
|
766
|
+
throw_runtime(
|
|
767
|
+
"detectOrientationScript: TessBaseAPI::DetectOrientationScript "
|
|
768
|
+
"returned false");
|
|
335
769
|
}
|
|
336
770
|
|
|
337
771
|
return ResultObject{{
|
|
@@ -344,16 +778,122 @@ struct CommandDetectOrientationScript {
|
|
|
344
778
|
};
|
|
345
779
|
|
|
346
780
|
struct CommandMeanTextConf {
|
|
347
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
781
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
782
|
+
const std::atomic<bool> &initialized) const {
|
|
783
|
+
RequireInitialized(initialized, "meanTextConf");
|
|
348
784
|
return ResultInt{api.MeanTextConf()};
|
|
349
785
|
}
|
|
350
786
|
};
|
|
351
787
|
|
|
788
|
+
struct CommandGetPAGEText {
|
|
789
|
+
int page_number;
|
|
790
|
+
std::shared_ptr<MonitorContext> monitor_context;
|
|
791
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
792
|
+
const std::atomic<bool> &initialized) const {
|
|
793
|
+
RequireInitialized(initialized, "getPAGEText");
|
|
794
|
+
MonitorHandle handle{monitor_context};
|
|
795
|
+
auto *monitor = monitor_context ? &handle.monitor : nullptr;
|
|
796
|
+
char *page_text = api.GetPAGEText(monitor, page_number);
|
|
797
|
+
if (!page_text) {
|
|
798
|
+
throw_runtime("getPAGEText: TessBaseAPI::GetPAGEText returned null");
|
|
799
|
+
}
|
|
800
|
+
std::string text = std::string{page_text};
|
|
801
|
+
|
|
802
|
+
delete[] page_text;
|
|
803
|
+
return ResultString(text);
|
|
804
|
+
}
|
|
805
|
+
};
|
|
806
|
+
|
|
807
|
+
struct CommandGetLSTMBoxText {
|
|
808
|
+
int page_number;
|
|
809
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
810
|
+
const std::atomic<bool> &initialized) const {
|
|
811
|
+
RequireInitialized(initialized, "getLSTMBoxText");
|
|
812
|
+
char *lstm_box_text = api.GetLSTMBoxText(page_number);
|
|
813
|
+
if (!lstm_box_text) {
|
|
814
|
+
throw_runtime(
|
|
815
|
+
"getLSTMBoxText: TessBaseAPI::GetLSTMBoxText returned null");
|
|
816
|
+
}
|
|
817
|
+
std::string text = std::string{lstm_box_text};
|
|
818
|
+
|
|
819
|
+
delete[] lstm_box_text;
|
|
820
|
+
return ResultString(text);
|
|
821
|
+
}
|
|
822
|
+
};
|
|
823
|
+
|
|
824
|
+
struct CommandGetBoxText {
|
|
825
|
+
int page_number;
|
|
826
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
827
|
+
const std::atomic<bool> &initialized) const {
|
|
828
|
+
RequireInitialized(initialized, "getBoxText");
|
|
829
|
+
char *box_text = api.GetBoxText(page_number);
|
|
830
|
+
if (!box_text) {
|
|
831
|
+
throw_runtime("getBoxText: TessBaseAPI::GetBoxText returned null");
|
|
832
|
+
}
|
|
833
|
+
std::string text = std::string{box_text};
|
|
834
|
+
|
|
835
|
+
delete[] box_text;
|
|
836
|
+
return ResultString(text);
|
|
837
|
+
}
|
|
838
|
+
};
|
|
839
|
+
|
|
840
|
+
struct CommandGetWordStrBoxText {
|
|
841
|
+
int page_number;
|
|
842
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
843
|
+
const std::atomic<bool> &initialized) const {
|
|
844
|
+
RequireInitialized(initialized, "getWordStrBoxText");
|
|
845
|
+
char *word_str_box_text = api.GetWordStrBoxText(page_number);
|
|
846
|
+
if (!word_str_box_text) {
|
|
847
|
+
throw_runtime(
|
|
848
|
+
"getWordStrBoxText: TessBaseAPI::GetWordStrBoxText returned null");
|
|
849
|
+
}
|
|
850
|
+
std::string text = std::string{word_str_box_text};
|
|
851
|
+
|
|
852
|
+
delete[] word_str_box_text;
|
|
853
|
+
return ResultString(text);
|
|
854
|
+
}
|
|
855
|
+
};
|
|
856
|
+
|
|
857
|
+
struct CommandGetOSDText {
|
|
858
|
+
int page_number;
|
|
859
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
860
|
+
const std::atomic<bool> &initialized) const {
|
|
861
|
+
RequireInitialized(initialized, "getOSDText");
|
|
862
|
+
char *ost_text = api.GetOsdText(page_number);
|
|
863
|
+
if (!ost_text) {
|
|
864
|
+
throw_runtime("getOSDText: TessBaseAPI::GetOsdText returned null");
|
|
865
|
+
}
|
|
866
|
+
std::string text = std::string{ost_text};
|
|
867
|
+
|
|
868
|
+
delete[] ost_text;
|
|
869
|
+
return ResultString(text);
|
|
870
|
+
}
|
|
871
|
+
};
|
|
872
|
+
|
|
873
|
+
struct CommandAllWordConfidences {
|
|
874
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
875
|
+
const std::atomic<bool> &initialized) const {
|
|
876
|
+
RequireInitialized(initialized, "allWordConfidences");
|
|
877
|
+
int *all_word_confidences = api.AllWordConfidences();
|
|
878
|
+
|
|
879
|
+
std::vector<int> confidences;
|
|
880
|
+
if (all_word_confidences != nullptr) {
|
|
881
|
+
for (int i = 0; all_word_confidences[i] != -1; ++i) {
|
|
882
|
+
confidences.push_back(all_word_confidences[i]);
|
|
883
|
+
}
|
|
884
|
+
}
|
|
885
|
+
delete[] all_word_confidences;
|
|
886
|
+
return ResultArray{confidences};
|
|
887
|
+
}
|
|
888
|
+
};
|
|
889
|
+
|
|
352
890
|
struct CommandGetUTF8Text {
|
|
353
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
891
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
892
|
+
const std::atomic<bool> &initialized) const {
|
|
893
|
+
RequireInitialized(initialized, "getUTF8Text");
|
|
354
894
|
char *utf8_text = api.GetUTF8Text();
|
|
355
895
|
if (!utf8_text) {
|
|
356
|
-
throw_runtime("GetUTF8Text returned null");
|
|
896
|
+
throw_runtime("getUTF8Text: TessBaseAPI::GetUTF8Text returned null");
|
|
357
897
|
}
|
|
358
898
|
std::string text = std::string{utf8_text};
|
|
359
899
|
|
|
@@ -365,13 +905,15 @@ struct CommandGetUTF8Text {
|
|
|
365
905
|
struct CommandGetHOCRText {
|
|
366
906
|
int page_number;
|
|
367
907
|
std::shared_ptr<MonitorContext> monitor_context;
|
|
368
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
908
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
909
|
+
const std::atomic<bool> &initialized) const {
|
|
910
|
+
RequireInitialized(initialized, "getHOCRText");
|
|
369
911
|
|
|
370
912
|
MonitorHandle handle{monitor_context};
|
|
371
913
|
auto *monitor = monitor_context ? &handle.monitor : nullptr;
|
|
372
914
|
char *hocr_text = api.GetHOCRText(monitor, page_number);
|
|
373
915
|
if (!hocr_text) {
|
|
374
|
-
throw_runtime("GetHOCRText returned null");
|
|
916
|
+
throw_runtime("getHOCRText: TessBaseAPI::GetHOCRText returned null");
|
|
375
917
|
}
|
|
376
918
|
|
|
377
919
|
std::string text = std::string{hocr_text};
|
|
@@ -383,10 +925,12 @@ struct CommandGetHOCRText {
|
|
|
383
925
|
|
|
384
926
|
struct CommandGetTSVText {
|
|
385
927
|
int page_number;
|
|
386
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
928
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
929
|
+
const std::atomic<bool> &initialized) const {
|
|
930
|
+
RequireInitialized(initialized, "getTSVText");
|
|
387
931
|
char *tsv_text = api.GetTSVText(page_number);
|
|
388
932
|
if (!tsv_text) {
|
|
389
|
-
throw_runtime("GetTSVText returned null");
|
|
933
|
+
throw_runtime("getTSVText: TessBaseAPI::GetTSVText returned null");
|
|
390
934
|
}
|
|
391
935
|
std::string text = std::string{tsv_text};
|
|
392
936
|
|
|
@@ -396,10 +940,12 @@ struct CommandGetTSVText {
|
|
|
396
940
|
};
|
|
397
941
|
|
|
398
942
|
struct CommandGetUNLVText {
|
|
399
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
943
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
944
|
+
const std::atomic<bool> &initialized) const {
|
|
945
|
+
RequireInitialized(initialized, "getUNLVText");
|
|
400
946
|
char *unlv_text = api.GetUNLVText();
|
|
401
947
|
if (!unlv_text) {
|
|
402
|
-
throw_runtime("GetUNLVText returned null");
|
|
948
|
+
throw_runtime("getUNLVText: TessBaseAPI::GetUNLVText returned null");
|
|
403
949
|
}
|
|
404
950
|
std::string text = std::string{unlv_text};
|
|
405
951
|
delete[] unlv_text;
|
|
@@ -410,12 +956,14 @@ struct CommandGetUNLVText {
|
|
|
410
956
|
struct CommandGetALTOText {
|
|
411
957
|
int page_number;
|
|
412
958
|
std::shared_ptr<MonitorContext> monitor_context;
|
|
413
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
959
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
960
|
+
const std::atomic<bool> &initialized) const {
|
|
961
|
+
RequireInitialized(initialized, "getALTOText");
|
|
414
962
|
MonitorHandle handle{monitor_context};
|
|
415
963
|
auto *monitor = monitor_context ? &handle.monitor : nullptr;
|
|
416
964
|
char *alto_text = api.GetAltoText(monitor, page_number);
|
|
417
965
|
if (!alto_text) {
|
|
418
|
-
throw_runtime("
|
|
966
|
+
throw_runtime("getALTOText: TessBaseAPI::GetAltoText returned null");
|
|
419
967
|
}
|
|
420
968
|
std::string text = std::string{alto_text};
|
|
421
969
|
delete[] alto_text;
|
|
@@ -425,12 +973,15 @@ struct CommandGetALTOText {
|
|
|
425
973
|
};
|
|
426
974
|
|
|
427
975
|
struct CommandGetInitLanguages {
|
|
428
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
976
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
977
|
+
const std::atomic<bool> &initialized) const {
|
|
978
|
+
RequireInitialized(initialized, "getInitLanguages");
|
|
429
979
|
const char *p_init_languages = api.GetInitLanguagesAsString();
|
|
430
980
|
|
|
431
981
|
if (p_init_languages == nullptr) {
|
|
432
|
-
|
|
433
|
-
|
|
982
|
+
throw_runtime("getInitLanguages: TessBaseAPI::GetInitLanguagesAsString "
|
|
983
|
+
"returned null; call init(...) first with at least one "
|
|
984
|
+
"valid language");
|
|
434
985
|
}
|
|
435
986
|
|
|
436
987
|
std::string init_languages = std::string{p_init_languages};
|
|
@@ -440,7 +991,9 @@ struct CommandGetInitLanguages {
|
|
|
440
991
|
};
|
|
441
992
|
|
|
442
993
|
struct CommandGetLoadedLanguages {
|
|
443
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
994
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
995
|
+
const std::atomic<bool> &initialized) const {
|
|
996
|
+
RequireInitialized(initialized, "getLoadedLanguages");
|
|
444
997
|
std::vector<std::string> langs;
|
|
445
998
|
api.GetLoadedLanguagesAsVector(&langs);
|
|
446
999
|
return ResultArray{langs};
|
|
@@ -456,29 +1009,42 @@ struct CommandGetAvailableLanguages {
|
|
|
456
1009
|
};
|
|
457
1010
|
|
|
458
1011
|
struct CommandClear {
|
|
459
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
1012
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
1013
|
+
const std::atomic<bool> &initialized) const {
|
|
1014
|
+
RequireInitialized(initialized, "clear");
|
|
460
1015
|
api.Clear();
|
|
461
1016
|
return ResultVoid{};
|
|
462
1017
|
}
|
|
463
1018
|
};
|
|
464
1019
|
|
|
465
1020
|
struct CommandEnd {
|
|
466
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
1021
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
1022
|
+
std::atomic<bool> &initialized) const {
|
|
467
1023
|
api.End();
|
|
1024
|
+
initialized.store(false, std::memory_order_release);
|
|
468
1025
|
return ResultVoid{};
|
|
469
1026
|
}
|
|
470
1027
|
};
|
|
471
1028
|
|
|
472
1029
|
using Command = std::variant<
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
1030
|
+
CommandVersion, CommandIsInitialized, CommandInit,
|
|
1031
|
+
CommandInitForAnalysePage, CommandSetVariable, CommandSetDebugVariable,
|
|
1032
|
+
CommandGetIntVariable, CommandGetBoolVariable, CommandGetDoubleVariable,
|
|
1033
|
+
CommandGetStringVariable, CommandSetInputName, CommandGetInputName,
|
|
1034
|
+
CommandSetOutputName, CommandGetDataPath, CommandSetInputImage,
|
|
1035
|
+
CommandGetInputImage, CommandSetPageMode, CommandSetRectangle,
|
|
1036
|
+
CommandSetSourceResolution, CommandGetSourceYResolution, CommandSetImage,
|
|
1037
|
+
CommandGetThresholdedImage, CommandGetThresholdedImageScaleFactor,
|
|
1038
|
+
CommandRecognize, CommandAnalyseLayout, CommandDetectOrientationScript,
|
|
1039
|
+
CommandMeanTextConf, CommandAllWordConfidences, CommandGetUTF8Text,
|
|
1040
|
+
CommandGetHOCRText, CommandGetTSVText, CommandGetUNLVText,
|
|
1041
|
+
CommandGetALTOText, CommandGetPAGEText, CommandGetLSTMBoxText,
|
|
1042
|
+
CommandGetBoxText, CommandGetWordStrBoxText, CommandGetOSDText,
|
|
1043
|
+
CommandBeginProcessPages, CommandAddProcessPage, CommandFinishProcessPages,
|
|
1044
|
+
CommandAbortProcessPages, CommandGetProcessPagesStatus,
|
|
1045
|
+
CommandGetInitLanguages, CommandGetLoadedLanguages,
|
|
1046
|
+
CommandGetAvailableLanguages, CommandClearPersistentCache,
|
|
1047
|
+
CommandClearAdaptiveClassifier, CommandClear, CommandEnd>;
|
|
482
1048
|
|
|
483
1049
|
struct Job {
|
|
484
1050
|
Command command;
|
|
@@ -486,4 +1052,6 @@ struct Job {
|
|
|
486
1052
|
|
|
487
1053
|
std::optional<Result> result;
|
|
488
1054
|
std::optional<std::string> error;
|
|
1055
|
+
std::optional<std::string> error_code;
|
|
1056
|
+
std::optional<std::string> error_method;
|
|
489
1057
|
};
|