@luii/node-tesseract-ocr 2.1.0 → 2.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +3 -3
- package/README.md +461 -104
- package/binding-options.js +4 -0
- package/dist/cjs/index.cjs +21 -9
- package/dist/cjs/index.d.ts +4 -926
- package/dist/cjs/types.d.ts +1272 -0
- package/dist/cjs/types.js +17 -0
- package/dist/cjs/utils.js +15 -0
- package/dist/esm/index.d.ts +4 -926
- package/dist/esm/index.mjs +16 -9
- package/dist/esm/types.d.ts +1272 -0
- package/dist/esm/types.js +16 -0
- package/dist/esm/utils.js +15 -0
- package/package.json +6 -3
- package/prebuilds/node-tesseract-ocr-darwin-arm64/node-napi-v10.node +0 -0
- package/prebuilds/node-tesseract-ocr-linux-x64/node-napi-v10.node +0 -0
- package/src/commands.hpp +657 -88
- package/src/tesseract_wrapper.cpp +630 -187
- package/src/tesseract_wrapper.hpp +27 -2
- package/src/worker_thread.cpp +146 -2
- package/src/worker_thread.hpp +4 -1
package/src/commands.hpp
CHANGED
|
@@ -18,14 +18,21 @@
|
|
|
18
18
|
|
|
19
19
|
#include "monitor.hpp"
|
|
20
20
|
#include "utils.hpp"
|
|
21
|
+
#include <allheaders.h>
|
|
22
|
+
#include <atomic>
|
|
23
|
+
#include <cstddef>
|
|
24
|
+
#include <cstdint>
|
|
25
|
+
#include <exception>
|
|
26
|
+
#include <iostream>
|
|
21
27
|
#include <memory>
|
|
22
28
|
#include <napi.h>
|
|
23
29
|
#include <optional>
|
|
24
|
-
#include <
|
|
30
|
+
#include <ostream>
|
|
25
31
|
#include <string>
|
|
26
32
|
#include <tesseract/baseapi.h>
|
|
27
33
|
#include <tesseract/ocrclass.h>
|
|
28
34
|
#include <tesseract/publictypes.h>
|
|
35
|
+
#include <tesseract/renderer.h>
|
|
29
36
|
#include <unordered_map>
|
|
30
37
|
#include <variant>
|
|
31
38
|
#include <vector>
|
|
@@ -52,20 +59,27 @@ struct ResultString {
|
|
|
52
59
|
std::string value;
|
|
53
60
|
};
|
|
54
61
|
|
|
62
|
+
struct ResultBuffer {
|
|
63
|
+
std::vector<uint8_t> value;
|
|
64
|
+
};
|
|
65
|
+
|
|
55
66
|
using ObjectValue = std::variant<bool, int, double, float, std::string,
|
|
56
|
-
std::vector<std::string
|
|
67
|
+
std::vector<std::string>, std::vector<uint8_t>,
|
|
68
|
+
std::vector<int>>;
|
|
57
69
|
|
|
58
70
|
struct ResultObject {
|
|
59
71
|
std::unordered_map<std::string, ObjectValue> value;
|
|
60
72
|
};
|
|
61
73
|
|
|
74
|
+
using ArrayValue = std::variant<std::vector<int>, std::vector<std::string>>;
|
|
75
|
+
|
|
62
76
|
struct ResultArray {
|
|
63
|
-
|
|
77
|
+
ArrayValue value;
|
|
64
78
|
};
|
|
65
79
|
|
|
66
80
|
using Result =
|
|
67
81
|
std::variant<ResultVoid, ResultBool, ResultInt, ResultDouble, ResultFloat,
|
|
68
|
-
ResultString, ResultArray, ResultObject>;
|
|
82
|
+
ResultString, ResultArray, ResultBuffer, ResultObject>;
|
|
69
83
|
|
|
70
84
|
template <class... Ts> struct match : Ts... {
|
|
71
85
|
using Ts::operator()...;
|
|
@@ -73,6 +87,15 @@ template <class... Ts> struct match : Ts... {
|
|
|
73
87
|
|
|
74
88
|
template <class... Ts> match(Ts...) -> match<Ts...>;
|
|
75
89
|
|
|
90
|
+
template <typename T>
|
|
91
|
+
static Napi::Array VectorToNapiArray(Napi::Env env, const std::vector<T> &vec) {
|
|
92
|
+
Napi::Array arr = Napi::Array::New(env, vec.size());
|
|
93
|
+
for (size_t i = 0; i < vec.size(); ++i) {
|
|
94
|
+
arr.Set(static_cast<uint32_t>(i), vec[i]);
|
|
95
|
+
}
|
|
96
|
+
return arr;
|
|
97
|
+
}
|
|
98
|
+
|
|
76
99
|
static Napi::Value ToNapiValue(Napi::Env env, const ObjectValue &v) {
|
|
77
100
|
return std::visit(
|
|
78
101
|
match{
|
|
@@ -80,15 +103,18 @@ static Napi::Value ToNapiValue(Napi::Env env, const ObjectValue &v) {
|
|
|
80
103
|
[&](int i) -> Napi::Value { return Napi::Number::New(env, i); },
|
|
81
104
|
[&](double d) -> Napi::Value { return Napi::Number::New(env, d); },
|
|
82
105
|
[&](float f) -> Napi::Value { return Napi::Number::New(env, f); },
|
|
83
|
-
[&](const std::string &s) -> Napi::Value {
|
|
106
|
+
[&](const std::string &s) -> Napi::Value { // String
|
|
84
107
|
return Napi::String::New(env, s);
|
|
85
108
|
},
|
|
86
|
-
[&](const std::vector<
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
109
|
+
[&](const std::vector<uint8_t> &vec) -> Napi::Value { // Buffer
|
|
110
|
+
return Napi::Buffer<uint8_t>::Copy(env, vec.data(), vec.size());
|
|
111
|
+
},
|
|
112
|
+
[&](const std::vector<int> &vec) -> Napi::Value {
|
|
113
|
+
return VectorToNapiArray(env, vec);
|
|
114
|
+
},
|
|
115
|
+
[&](const std::vector<std::string> &vec)
|
|
116
|
+
-> Napi::Value { // string array
|
|
117
|
+
return VectorToNapiArray(env, vec);
|
|
92
118
|
},
|
|
93
119
|
},
|
|
94
120
|
v);
|
|
@@ -112,19 +138,16 @@ inline Napi::Value MatchResult(Napi::Env env, const Result &r) {
|
|
|
112
138
|
[&](const ResultString &v) -> Napi::Value {
|
|
113
139
|
return Napi::String::New(env, v.value);
|
|
114
140
|
},
|
|
141
|
+
[&](const ResultBuffer &v) -> Napi::Value {
|
|
142
|
+
return Napi::Buffer<uint8_t>::Copy(env, v.value.data(),
|
|
143
|
+
v.value.size());
|
|
144
|
+
},
|
|
115
145
|
[&](const ResultArray &v) -> Napi::Value {
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
Napi::Array array = Napi::Array::New(env, v.value.size());
|
|
124
|
-
for (size_t i = 0; i < v.value.size(); i++) {
|
|
125
|
-
array.Set(static_cast<uint32_t>(i), v.value[i]);
|
|
126
|
-
}
|
|
127
|
-
return array;
|
|
146
|
+
return std::visit(
|
|
147
|
+
[&](const auto &vec) -> Napi::Value {
|
|
148
|
+
return VectorToNapiArray(env, vec);
|
|
149
|
+
},
|
|
150
|
+
v.value);
|
|
128
151
|
},
|
|
129
152
|
[&](const ResultObject &v) -> Napi::Value {
|
|
130
153
|
Napi::Object obj = Napi::Object::New(env);
|
|
@@ -137,6 +160,179 @@ inline Napi::Value MatchResult(Napi::Env env, const Result &r) {
|
|
|
137
160
|
r);
|
|
138
161
|
}
|
|
139
162
|
|
|
163
|
+
inline void RequireInitialized(const std::atomic<bool> &initialized,
|
|
164
|
+
const char *method) {
|
|
165
|
+
if (!initialized.load(std::memory_order_acquire)) {
|
|
166
|
+
throw_runtime("{}: call init(...) first", method);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
struct CommandVersion {
|
|
171
|
+
Result invoke(tesseract::TessBaseAPI &api) const {
|
|
172
|
+
return ResultString{api.Version()};
|
|
173
|
+
}
|
|
174
|
+
};
|
|
175
|
+
|
|
176
|
+
struct CommandIsInitialized {
|
|
177
|
+
Result invoke(tesseract::TessBaseAPI &,
|
|
178
|
+
const std::atomic<bool> &initialized) const {
|
|
179
|
+
return ResultBool{initialized.load(std::memory_order_acquire)};
|
|
180
|
+
}
|
|
181
|
+
};
|
|
182
|
+
|
|
183
|
+
struct CommandSetInputName {
|
|
184
|
+
std::string input_name;
|
|
185
|
+
Result invoke(tesseract::TessBaseAPI &api) const {
|
|
186
|
+
api.SetInputName(input_name.c_str());
|
|
187
|
+
return ResultVoid{};
|
|
188
|
+
}
|
|
189
|
+
};
|
|
190
|
+
|
|
191
|
+
struct CommandGetInputName {
|
|
192
|
+
Result invoke(tesseract::TessBaseAPI &api) const {
|
|
193
|
+
return ResultString{api.GetInputName()};
|
|
194
|
+
}
|
|
195
|
+
};
|
|
196
|
+
|
|
197
|
+
struct CommandSetInputImage {
|
|
198
|
+
std::vector<uint8_t> bytes;
|
|
199
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
200
|
+
const std::atomic<bool> &initialized) const {
|
|
201
|
+
RequireInitialized(initialized, "setInputImage");
|
|
202
|
+
if (bytes.size() == 0) {
|
|
203
|
+
throw_runtime("setInputImage: input buffer is empty");
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
Pix *pix = pixReadMem(bytes.data(), bytes.size());
|
|
207
|
+
if (pix == nullptr) {
|
|
208
|
+
throw_runtime("setInputImage: failed to decode image buffer");
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// TessBaseAPI::SetInputImage takes ownership of pix.
|
|
212
|
+
api.SetInputImage(pix);
|
|
213
|
+
return ResultVoid{};
|
|
214
|
+
}
|
|
215
|
+
};
|
|
216
|
+
|
|
217
|
+
struct CommandGetInputImage {
|
|
218
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
219
|
+
const std::atomic<bool> &initialized) const {
|
|
220
|
+
RequireInitialized(initialized, "getInputImage");
|
|
221
|
+
Pix *source = api.GetInputImage();
|
|
222
|
+
|
|
223
|
+
std::cout << source << std::endl;
|
|
224
|
+
|
|
225
|
+
if (source == nullptr) {
|
|
226
|
+
throw_runtime("getInputImage: TessBaseAPI::GetInputImage returned null");
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// GetInputImage has no caller-ownership contract; work on a clone.
|
|
230
|
+
Pix *pix = pixClone(source);
|
|
231
|
+
if (pix == nullptr) {
|
|
232
|
+
throw_runtime("getInputImage: failed to clone source image");
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
l_uint32 *data = pixGetData(pix);
|
|
236
|
+
l_int32 wpl = pixGetWpl(pix);
|
|
237
|
+
l_int32 h = pixGetHeight(pix);
|
|
238
|
+
|
|
239
|
+
size_t bytecount = wpl * 4 * h;
|
|
240
|
+
const uint8_t *start = reinterpret_cast<const uint8_t *>(data);
|
|
241
|
+
std::vector<uint8_t> buffer(start, start + bytecount);
|
|
242
|
+
pixDestroy(&pix);
|
|
243
|
+
|
|
244
|
+
return ResultBuffer{buffer};
|
|
245
|
+
}
|
|
246
|
+
};
|
|
247
|
+
|
|
248
|
+
struct CommandGetSourceYResolution {
|
|
249
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
250
|
+
const std::atomic<bool> &initialized) const {
|
|
251
|
+
RequireInitialized(initialized, "getSourceYResolution");
|
|
252
|
+
int source_y_resolution = api.GetSourceYResolution();
|
|
253
|
+
return ResultInt{source_y_resolution};
|
|
254
|
+
}
|
|
255
|
+
};
|
|
256
|
+
|
|
257
|
+
struct CommandGetDataPath {
|
|
258
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
259
|
+
const std::atomic<bool> &initialized) const {
|
|
260
|
+
RequireInitialized(initialized, "getDataPath");
|
|
261
|
+
const char *data_path = api.GetDatapath();
|
|
262
|
+
|
|
263
|
+
if (data_path == nullptr) {
|
|
264
|
+
throw_runtime("getDataPath: TessBaseAPI::GetDatapath returned null");
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
return ResultString{data_path};
|
|
268
|
+
}
|
|
269
|
+
};
|
|
270
|
+
|
|
271
|
+
struct CommandSetOutputName {
|
|
272
|
+
std::string output_name;
|
|
273
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
274
|
+
const std::atomic<bool> &initialized) const {
|
|
275
|
+
RequireInitialized(initialized, "setOutputName");
|
|
276
|
+
if (output_name.empty()) {
|
|
277
|
+
throw_runtime("setOutputName: output name is empty");
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
api.SetOutputName(output_name.c_str());
|
|
281
|
+
return ResultVoid{};
|
|
282
|
+
}
|
|
283
|
+
};
|
|
284
|
+
|
|
285
|
+
struct CommandClearPersistentCache {
|
|
286
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
287
|
+
const std::atomic<bool> &initialized) const {
|
|
288
|
+
RequireInitialized(initialized, "clearPersistentCache");
|
|
289
|
+
api.ClearPersistentCache();
|
|
290
|
+
return ResultVoid{};
|
|
291
|
+
}
|
|
292
|
+
};
|
|
293
|
+
|
|
294
|
+
struct CommandClearAdaptiveClassifier {
|
|
295
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
296
|
+
const std::atomic<bool> &initialized) const {
|
|
297
|
+
RequireInitialized(initialized, "clearAdaptiveClassifier");
|
|
298
|
+
api.ClearAdaptiveClassifier();
|
|
299
|
+
return ResultVoid{};
|
|
300
|
+
}
|
|
301
|
+
};
|
|
302
|
+
|
|
303
|
+
struct CommandGetThresholdedImage {
|
|
304
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
305
|
+
const std::atomic<bool> &initialized) const {
|
|
306
|
+
RequireInitialized(initialized, "getThresholdedImage");
|
|
307
|
+
Pix *pix = api.GetThresholdedImage();
|
|
308
|
+
|
|
309
|
+
if (pix == nullptr) {
|
|
310
|
+
throw_runtime("getThresholdedImage: TessBaseAPI::GetThresholdedImage "
|
|
311
|
+
"returned null");
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
l_uint32 *data = pixGetData(pix);
|
|
315
|
+
l_int32 wpl = pixGetWpl(pix);
|
|
316
|
+
l_int32 h = pixGetHeight(pix);
|
|
317
|
+
|
|
318
|
+
size_t bytecount = wpl * 4 * h;
|
|
319
|
+
const uint8_t *start = reinterpret_cast<const uint8_t *>(data);
|
|
320
|
+
std::vector<uint8_t> buffer(start, start + bytecount);
|
|
321
|
+
pixDestroy(&pix);
|
|
322
|
+
|
|
323
|
+
return ResultBuffer{buffer};
|
|
324
|
+
}
|
|
325
|
+
};
|
|
326
|
+
|
|
327
|
+
struct CommandGetThresholdedImageScaleFactor {
|
|
328
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
329
|
+
const std::atomic<bool> &initialized) const {
|
|
330
|
+
RequireInitialized(initialized, "getThresholdedImageScaleFactor");
|
|
331
|
+
int scale_factor = api.GetThresholdedImageScaleFactor();
|
|
332
|
+
return ResultInt{scale_factor};
|
|
333
|
+
}
|
|
334
|
+
};
|
|
335
|
+
|
|
140
336
|
struct CommandInit {
|
|
141
337
|
std::string data_path, language;
|
|
142
338
|
tesseract::OcrEngineMode oem{tesseract::OEM_DEFAULT};
|
|
@@ -146,15 +342,17 @@ struct CommandInit {
|
|
|
146
342
|
std::vector<std::string> vars_values;
|
|
147
343
|
bool set_only_non_debug_params{false};
|
|
148
344
|
|
|
149
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
345
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
346
|
+
std::atomic<bool> &initialized) const {
|
|
150
347
|
const std::vector<std::string> *vv = vars_vec.empty() ? nullptr : &vars_vec;
|
|
151
348
|
const std::vector<std::string> *vval =
|
|
152
349
|
vars_values.empty() ? nullptr : &vars_values;
|
|
153
350
|
|
|
154
351
|
if ((vv == nullptr) != (vval == nullptr) ||
|
|
155
352
|
(vv && vv->size() != vval->size())) {
|
|
156
|
-
|
|
157
|
-
"vars_vec and vars_values must both be
|
|
353
|
+
throw_runtime(
|
|
354
|
+
"init: vars_vec and vars_values must either both be empty or have "
|
|
355
|
+
"the same length");
|
|
158
356
|
}
|
|
159
357
|
|
|
160
358
|
if (api.Init(data_path.empty() ? nullptr : data_path.c_str(),
|
|
@@ -163,15 +361,18 @@ struct CommandInit {
|
|
|
163
361
|
: const_cast<char **>(configs.data()),
|
|
164
362
|
static_cast<int>(configs.size()), vv, vval,
|
|
165
363
|
set_only_non_debug_params) != 0) {
|
|
166
|
-
|
|
364
|
+
throw_runtime("init: TessBaseAPI::Init returned non-zero status");
|
|
167
365
|
}
|
|
168
366
|
|
|
367
|
+
initialized.store(true, std::memory_order_release);
|
|
169
368
|
return ResultVoid{};
|
|
170
369
|
}
|
|
171
370
|
};
|
|
172
371
|
|
|
173
372
|
struct CommandInitForAnalysePage {
|
|
174
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
373
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
374
|
+
const std::atomic<bool> &initialized) const {
|
|
375
|
+
RequireInitialized(initialized, "initForAnalysePage");
|
|
175
376
|
api.InitForAnalysePage();
|
|
176
377
|
return ResultVoid{};
|
|
177
378
|
}
|
|
@@ -179,13 +380,15 @@ struct CommandInitForAnalysePage {
|
|
|
179
380
|
|
|
180
381
|
struct CommandAnalyseLayout {
|
|
181
382
|
bool merge_similar_words = false;
|
|
182
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
383
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
384
|
+
const std::atomic<bool> &initialized) const {
|
|
385
|
+
RequireInitialized(initialized, "analyseLayout");
|
|
183
386
|
|
|
184
387
|
tesseract::PageIterator *p_iter = api.AnalyseLayout(merge_similar_words);
|
|
185
388
|
|
|
186
389
|
// returns nullptr on error or empty page
|
|
187
390
|
if (p_iter == nullptr) {
|
|
188
|
-
|
|
391
|
+
throw_runtime("analyseLayout: TessBaseAPI::AnalyseLayout returned null");
|
|
189
392
|
}
|
|
190
393
|
|
|
191
394
|
// Convert PageIterator to a feasible object here
|
|
@@ -194,21 +397,242 @@ struct CommandAnalyseLayout {
|
|
|
194
397
|
}
|
|
195
398
|
};
|
|
196
399
|
|
|
400
|
+
struct EncodedImageBuffer {
|
|
401
|
+
std::vector<uint8_t> bytes;
|
|
402
|
+
};
|
|
403
|
+
|
|
404
|
+
struct ProcessPagesSession {
|
|
405
|
+
std::unique_ptr<tesseract::TessPDFRenderer> renderer;
|
|
406
|
+
std::string output_base;
|
|
407
|
+
int timeout_millisec{0};
|
|
408
|
+
bool textonly{false};
|
|
409
|
+
int next_page_index{0};
|
|
410
|
+
};
|
|
411
|
+
|
|
412
|
+
struct CommandBeginProcessPages {
|
|
413
|
+
std::string output_base;
|
|
414
|
+
std::string title;
|
|
415
|
+
int timeout_millisec{0}; // 0 = unlimited timeout
|
|
416
|
+
bool textonly{false};
|
|
417
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
418
|
+
std::optional<ProcessPagesSession> &session,
|
|
419
|
+
const std::atomic<bool> &initialized) const {
|
|
420
|
+
RequireInitialized(initialized, "beginProcessPages");
|
|
421
|
+
if (session.has_value()) {
|
|
422
|
+
throw_runtime(
|
|
423
|
+
"beginProcessPages called while a session is already active");
|
|
424
|
+
}
|
|
425
|
+
if (title.empty()) {
|
|
426
|
+
throw_runtime("beginProcessPages: title cannot be empty");
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
const char *input_name = api.GetInputName();
|
|
430
|
+
std::string effective_output_base = output_base;
|
|
431
|
+
if (effective_output_base.empty()) {
|
|
432
|
+
if (input_name == nullptr || *input_name == '\0') {
|
|
433
|
+
throw_runtime("beginProcessPages: output_base is empty and "
|
|
434
|
+
"TessBaseAPI::GetInputName() returned null/empty");
|
|
435
|
+
}
|
|
436
|
+
effective_output_base = input_name;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
auto renderer = std::make_unique<tesseract::TessPDFRenderer>(
|
|
440
|
+
effective_output_base.c_str(), api.GetDatapath(), textonly);
|
|
441
|
+
if (!renderer->happy()) {
|
|
442
|
+
throw_runtime("beginProcessPages: renderer is not healthy");
|
|
443
|
+
}
|
|
444
|
+
if (!renderer->BeginDocument(title.c_str())) {
|
|
445
|
+
throw_runtime("beginProcessPages: could not begin document");
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
session.emplace();
|
|
449
|
+
session->renderer = std::move(renderer);
|
|
450
|
+
session->output_base = std::move(effective_output_base);
|
|
451
|
+
session->timeout_millisec = timeout_millisec;
|
|
452
|
+
session->textonly = textonly;
|
|
453
|
+
session->next_page_index = 0;
|
|
454
|
+
return ResultVoid{};
|
|
455
|
+
}
|
|
456
|
+
};
|
|
457
|
+
|
|
458
|
+
struct CommandAddProcessPage {
|
|
459
|
+
EncodedImageBuffer page;
|
|
460
|
+
std::string filename;
|
|
461
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
462
|
+
std::optional<ProcessPagesSession> &session,
|
|
463
|
+
const std::atomic<bool> &initialized) const {
|
|
464
|
+
RequireInitialized(initialized, "addProcessPage");
|
|
465
|
+
if (!session.has_value()) {
|
|
466
|
+
throw_runtime("addProcessPage: called without an active session");
|
|
467
|
+
}
|
|
468
|
+
if (!session->renderer->happy()) {
|
|
469
|
+
throw_runtime("addProcessPage: renderer is not healthy");
|
|
470
|
+
}
|
|
471
|
+
if (page.bytes.empty()) {
|
|
472
|
+
throw_runtime("addProcessPage: buffer is empty");
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
Pix *pix = pixReadMem(page.bytes.data(), page.bytes.size());
|
|
476
|
+
if (pix == nullptr) {
|
|
477
|
+
throw_runtime("addProcessPage: failed to decode image buffer");
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
if (pixGetColormap(pix) != nullptr) {
|
|
481
|
+
Pix *no_cmap = pixRemoveColormap(pix, REMOVE_CMAP_BASED_ON_SRC);
|
|
482
|
+
if (no_cmap == nullptr) {
|
|
483
|
+
pixDestroy(&pix);
|
|
484
|
+
throw_runtime("addProcessPage: failed to remove image colormap");
|
|
485
|
+
}
|
|
486
|
+
if (no_cmap != pix) {
|
|
487
|
+
pixDestroy(&pix);
|
|
488
|
+
pix = no_cmap;
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
if (pixGetSpp(pix) == 4) {
|
|
493
|
+
Pix *no_alpha = pixRemoveAlpha(pix);
|
|
494
|
+
if (no_alpha == nullptr) {
|
|
495
|
+
pixDestroy(&pix);
|
|
496
|
+
throw_runtime("addProcessPage: failed to remove alpha channel");
|
|
497
|
+
}
|
|
498
|
+
if (no_alpha != pix) {
|
|
499
|
+
pixDestroy(&pix);
|
|
500
|
+
pix = no_alpha;
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
const int depth = pixGetDepth(pix);
|
|
505
|
+
if (depth > 0 && depth < 8) {
|
|
506
|
+
Pix *normalized = pixConvertTo8(pix, false);
|
|
507
|
+
if (normalized == nullptr) {
|
|
508
|
+
pixDestroy(&pix);
|
|
509
|
+
throw_runtime(
|
|
510
|
+
"addProcessPage: failed to normalize low-bit-depth image");
|
|
511
|
+
}
|
|
512
|
+
if (normalized != pix) {
|
|
513
|
+
pixDestroy(&pix);
|
|
514
|
+
pix = normalized;
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
const int x_res = pixGetXRes(pix);
|
|
519
|
+
const int y_res = pixGetYRes(pix);
|
|
520
|
+
if (x_res <= 0 || y_res <= 0) {
|
|
521
|
+
pixSetResolution(pix, 300, 300);
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
const char *effective_filename =
|
|
525
|
+
filename.empty() ? nullptr : filename.c_str();
|
|
526
|
+
|
|
527
|
+
bool success = api.ProcessPage(
|
|
528
|
+
pix, session->next_page_index, effective_filename, nullptr,
|
|
529
|
+
session->timeout_millisec, session->renderer.get());
|
|
530
|
+
pixDestroy(&pix);
|
|
531
|
+
|
|
532
|
+
if (!success) {
|
|
533
|
+
throw_runtime("addProcessPage: ProcessPage failed at page {}",
|
|
534
|
+
session->next_page_index);
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
session->next_page_index++;
|
|
538
|
+
return ResultVoid{};
|
|
539
|
+
}
|
|
540
|
+
};
|
|
541
|
+
|
|
542
|
+
struct CommandFinishProcessPages {
|
|
543
|
+
Result invoke(tesseract::TessBaseAPI &,
|
|
544
|
+
std::optional<ProcessPagesSession> &session,
|
|
545
|
+
const std::atomic<bool> &initialized) const {
|
|
546
|
+
RequireInitialized(initialized, "finishProcessPages");
|
|
547
|
+
if (!session.has_value()) {
|
|
548
|
+
throw_runtime("finishProcessPages: called without an active session");
|
|
549
|
+
}
|
|
550
|
+
if (!session->renderer->happy()) {
|
|
551
|
+
throw_runtime("finishProcessPages: renderer is not healthy");
|
|
552
|
+
}
|
|
553
|
+
if (!session->renderer->EndDocument()) {
|
|
554
|
+
throw_runtime("finishProcessPages: could not finalize document");
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
std::string output_filepath = session->output_base + ".pdf";
|
|
558
|
+
session.reset();
|
|
559
|
+
return ResultString{std::move(output_filepath)};
|
|
560
|
+
}
|
|
561
|
+
};
|
|
562
|
+
|
|
563
|
+
struct CommandAbortProcessPages {
|
|
564
|
+
std::string reason;
|
|
565
|
+
Result invoke(tesseract::TessBaseAPI &,
|
|
566
|
+
std::optional<ProcessPagesSession> &session) const {
|
|
567
|
+
session.reset();
|
|
568
|
+
return ResultVoid{};
|
|
569
|
+
}
|
|
570
|
+
};
|
|
571
|
+
|
|
572
|
+
struct CommandGetProcessPagesStatus {
|
|
573
|
+
Result invoke(tesseract::TessBaseAPI &,
|
|
574
|
+
std::optional<ProcessPagesSession> &session) const {
|
|
575
|
+
if (!session.has_value()) {
|
|
576
|
+
return ResultObject{{
|
|
577
|
+
{"active", false},
|
|
578
|
+
{"healthy", false},
|
|
579
|
+
{"processedPages", 0},
|
|
580
|
+
{"nextPageIndex", 0},
|
|
581
|
+
{"outputBase", std::string{}},
|
|
582
|
+
{"timeoutMillisec", 0},
|
|
583
|
+
{"textonly", false},
|
|
584
|
+
}};
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
return ResultObject{{
|
|
588
|
+
{"active", true},
|
|
589
|
+
{"healthy", session->renderer->happy()},
|
|
590
|
+
{"processedPages", session->next_page_index},
|
|
591
|
+
{"nextPageIndex", session->next_page_index},
|
|
592
|
+
{"outputBase", session->output_base},
|
|
593
|
+
{"timeoutMillisec", session->timeout_millisec},
|
|
594
|
+
{"textonly", session->textonly},
|
|
595
|
+
}};
|
|
596
|
+
}
|
|
597
|
+
};
|
|
598
|
+
|
|
599
|
+
struct CommandSetDebugVariable {
|
|
600
|
+
std::string name, value;
|
|
601
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
602
|
+
const std::atomic<bool> &initialized) const {
|
|
603
|
+
RequireInitialized(initialized, "setDebugVariable");
|
|
604
|
+
if (name.empty()) {
|
|
605
|
+
throw_runtime("setDebugVariable: variable name is empty");
|
|
606
|
+
} else if (value.empty()) {
|
|
607
|
+
throw_runtime("setDebugVariable: variable value is empty");
|
|
608
|
+
}
|
|
609
|
+
return ResultBool{api.SetDebugVariable(name.c_str(), value.c_str())};
|
|
610
|
+
}
|
|
611
|
+
};
|
|
612
|
+
|
|
197
613
|
struct CommandSetVariable {
|
|
198
614
|
std::string name, value;
|
|
199
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
615
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
616
|
+
const std::atomic<bool> &initialized) const {
|
|
617
|
+
RequireInitialized(initialized, "setVariable");
|
|
618
|
+
if (name.empty()) {
|
|
619
|
+
throw_runtime("setVariable: variable name is empty");
|
|
620
|
+
} else if (value.empty()) {
|
|
621
|
+
throw_runtime("setVariable: variable value is empty");
|
|
622
|
+
}
|
|
200
623
|
return ResultBool{api.SetVariable(name.c_str(), value.c_str())};
|
|
201
624
|
}
|
|
202
625
|
};
|
|
203
626
|
|
|
204
627
|
struct CommandGetIntVariable {
|
|
205
628
|
std::string name;
|
|
206
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
629
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
630
|
+
const std::atomic<bool> &initialized) const {
|
|
631
|
+
RequireInitialized(initialized, "getIntVariable");
|
|
207
632
|
int value;
|
|
208
633
|
if (!api.GetIntVariable(name.c_str(), &value)) {
|
|
209
|
-
throw_runtime(
|
|
210
|
-
|
|
211
|
-
name.c_str());
|
|
634
|
+
throw_runtime("getIntVariable: variable '{}' was not found",
|
|
635
|
+
name.c_str());
|
|
212
636
|
}
|
|
213
637
|
|
|
214
638
|
return ResultInt{value};
|
|
@@ -217,11 +641,12 @@ struct CommandGetIntVariable {
|
|
|
217
641
|
|
|
218
642
|
struct CommandGetBoolVariable {
|
|
219
643
|
std::string name;
|
|
220
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
644
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
645
|
+
const std::atomic<bool> &initialized) const {
|
|
646
|
+
RequireInitialized(initialized, "getBoolVariable");
|
|
221
647
|
bool value;
|
|
222
648
|
if (!api.GetBoolVariable(name.c_str(), &value)) {
|
|
223
|
-
throw_runtime("
|
|
224
|
-
"was not found",
|
|
649
|
+
throw_runtime("getBoolVariable: variable '{}' was not found",
|
|
225
650
|
name.c_str());
|
|
226
651
|
}
|
|
227
652
|
return ResultBool{value};
|
|
@@ -230,11 +655,12 @@ struct CommandGetBoolVariable {
|
|
|
230
655
|
|
|
231
656
|
struct CommandGetDoubleVariable {
|
|
232
657
|
std::string name;
|
|
233
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
658
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
659
|
+
const std::atomic<bool> &initialized) const {
|
|
660
|
+
RequireInitialized(initialized, "getDoubleVariable");
|
|
234
661
|
double value;
|
|
235
662
|
if (!api.GetDoubleVariable(name.c_str(), &value)) {
|
|
236
|
-
throw_runtime("
|
|
237
|
-
"was not found",
|
|
663
|
+
throw_runtime("getDoubleVariable: variable '{}' was not found",
|
|
238
664
|
name.c_str());
|
|
239
665
|
}
|
|
240
666
|
return ResultDouble{value};
|
|
@@ -243,30 +669,27 @@ struct CommandGetDoubleVariable {
|
|
|
243
669
|
|
|
244
670
|
struct CommandGetStringVariable {
|
|
245
671
|
std::string name;
|
|
246
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
672
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
673
|
+
const std::atomic<bool> &initialized) const {
|
|
674
|
+
RequireInitialized(initialized, "getStringVariable");
|
|
247
675
|
auto value = api.GetStringVariable(name.c_str());
|
|
248
676
|
if (value == nullptr) {
|
|
249
|
-
throw_runtime("
|
|
250
|
-
"was not found",
|
|
677
|
+
throw_runtime("getStringVariable: variable '{}' was not found",
|
|
251
678
|
name.c_str());
|
|
252
679
|
}
|
|
253
680
|
return ResultString{value};
|
|
254
681
|
}
|
|
255
682
|
};
|
|
256
683
|
|
|
257
|
-
// struct CommandPrintVariables {
|
|
258
|
-
// Result invoke(tesseract::TessBaseAPI &api) const {
|
|
259
|
-
// api.PrintVariables(FILE *fp);
|
|
260
|
-
// }
|
|
261
|
-
// };
|
|
262
|
-
|
|
263
684
|
struct CommandSetImage {
|
|
264
685
|
std::vector<uint8_t> bytes;
|
|
265
686
|
int width = 0;
|
|
266
687
|
int height = 0;
|
|
267
688
|
int bytes_per_pixel = 0; // bpp/8
|
|
268
689
|
int bytes_per_line = 0;
|
|
269
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
690
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
691
|
+
const std::atomic<bool> &initialized) const {
|
|
692
|
+
RequireInitialized(initialized, "setImage");
|
|
270
693
|
api.SetImage(bytes.data(), width, height, bytes_per_pixel, bytes_per_line);
|
|
271
694
|
return ResultVoid{};
|
|
272
695
|
}
|
|
@@ -274,12 +697,14 @@ struct CommandSetImage {
|
|
|
274
697
|
|
|
275
698
|
struct CommandSetPageMode {
|
|
276
699
|
tesseract::PageSegMode psm;
|
|
277
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
700
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
701
|
+
const std::atomic<bool> &initialized) const {
|
|
702
|
+
RequireInitialized(initialized, "setPageMode");
|
|
278
703
|
if (psm < 0 || psm >= tesseract::PageSegMode::PSM_COUNT) {
|
|
279
704
|
|
|
280
|
-
throw_runtime(
|
|
281
|
-
|
|
282
|
-
|
|
705
|
+
throw_runtime("setPageMode: page segmentation mode is out of range; "
|
|
706
|
+
"received {}",
|
|
707
|
+
static_cast<int>(psm));
|
|
283
708
|
}
|
|
284
709
|
api.SetPageSegMode(psm);
|
|
285
710
|
return ResultVoid{};
|
|
@@ -288,7 +713,9 @@ struct CommandSetPageMode {
|
|
|
288
713
|
|
|
289
714
|
struct CommandSetRectangle {
|
|
290
715
|
int left, top, width, height;
|
|
291
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
716
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
717
|
+
const std::atomic<bool> &initialized) const {
|
|
718
|
+
RequireInitialized(initialized, "setRectangle");
|
|
292
719
|
api.SetRectangle(left, top, width, height);
|
|
293
720
|
return ResultVoid{};
|
|
294
721
|
}
|
|
@@ -296,7 +723,9 @@ struct CommandSetRectangle {
|
|
|
296
723
|
|
|
297
724
|
struct CommandSetSourceResolution {
|
|
298
725
|
int ppi;
|
|
299
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
726
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
727
|
+
const std::atomic<bool> &initialized) const {
|
|
728
|
+
RequireInitialized(initialized, "setSourceResolution");
|
|
300
729
|
api.SetSourceResolution(ppi);
|
|
301
730
|
return ResultVoid{};
|
|
302
731
|
}
|
|
@@ -304,11 +733,14 @@ struct CommandSetSourceResolution {
|
|
|
304
733
|
|
|
305
734
|
struct CommandRecognize {
|
|
306
735
|
std::shared_ptr<MonitorContext> monitor_context;
|
|
307
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
736
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
737
|
+
const std::atomic<bool> &initialized) const {
|
|
738
|
+
RequireInitialized(initialized, "recognize");
|
|
308
739
|
MonitorHandle handle{monitor_context};
|
|
309
740
|
auto *monitor = monitor_context ? &handle.monitor : nullptr;
|
|
310
741
|
if (api.Recognize(monitor) != 0) {
|
|
311
|
-
|
|
742
|
+
throw_runtime(
|
|
743
|
+
"recognize: TessBaseAPI::Recognize returned non-zero status");
|
|
312
744
|
}
|
|
313
745
|
return ResultVoid{};
|
|
314
746
|
}
|
|
@@ -321,7 +753,9 @@ struct CommandRecognize {
|
|
|
321
753
|
// };
|
|
322
754
|
|
|
323
755
|
struct CommandDetectOrientationScript {
|
|
324
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
756
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
757
|
+
const std::atomic<bool> &initialized) const {
|
|
758
|
+
RequireInitialized(initialized, "detectOrientationScript");
|
|
325
759
|
int orient_deg;
|
|
326
760
|
float orient_conf;
|
|
327
761
|
const char *script_name;
|
|
@@ -329,8 +763,9 @@ struct CommandDetectOrientationScript {
|
|
|
329
763
|
|
|
330
764
|
if (!api.DetectOrientationScript(&orient_deg, &orient_conf, &script_name,
|
|
331
765
|
&script_conf)) {
|
|
332
|
-
|
|
333
|
-
"
|
|
766
|
+
throw_runtime(
|
|
767
|
+
"detectOrientationScript: TessBaseAPI::DetectOrientationScript "
|
|
768
|
+
"returned false");
|
|
334
769
|
}
|
|
335
770
|
|
|
336
771
|
return ResultObject{{
|
|
@@ -343,16 +778,122 @@ struct CommandDetectOrientationScript {
|
|
|
343
778
|
};
|
|
344
779
|
|
|
345
780
|
struct CommandMeanTextConf {
|
|
346
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
781
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
782
|
+
const std::atomic<bool> &initialized) const {
|
|
783
|
+
RequireInitialized(initialized, "meanTextConf");
|
|
347
784
|
return ResultInt{api.MeanTextConf()};
|
|
348
785
|
}
|
|
349
786
|
};
|
|
350
787
|
|
|
788
|
+
struct CommandGetPAGEText {
|
|
789
|
+
int page_number;
|
|
790
|
+
std::shared_ptr<MonitorContext> monitor_context;
|
|
791
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
792
|
+
const std::atomic<bool> &initialized) const {
|
|
793
|
+
RequireInitialized(initialized, "getPAGEText");
|
|
794
|
+
MonitorHandle handle{monitor_context};
|
|
795
|
+
auto *monitor = monitor_context ? &handle.monitor : nullptr;
|
|
796
|
+
char *page_text = api.GetPAGEText(monitor, page_number);
|
|
797
|
+
if (!page_text) {
|
|
798
|
+
throw_runtime("getPAGEText: TessBaseAPI::GetPAGEText returned null");
|
|
799
|
+
}
|
|
800
|
+
std::string text = std::string{page_text};
|
|
801
|
+
|
|
802
|
+
delete[] page_text;
|
|
803
|
+
return ResultString(text);
|
|
804
|
+
}
|
|
805
|
+
};
|
|
806
|
+
|
|
807
|
+
struct CommandGetLSTMBoxText {
|
|
808
|
+
int page_number;
|
|
809
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
810
|
+
const std::atomic<bool> &initialized) const {
|
|
811
|
+
RequireInitialized(initialized, "getLSTMBoxText");
|
|
812
|
+
char *lstm_box_text = api.GetLSTMBoxText(page_number);
|
|
813
|
+
if (!lstm_box_text) {
|
|
814
|
+
throw_runtime(
|
|
815
|
+
"getLSTMBoxText: TessBaseAPI::GetLSTMBoxText returned null");
|
|
816
|
+
}
|
|
817
|
+
std::string text = std::string{lstm_box_text};
|
|
818
|
+
|
|
819
|
+
delete[] lstm_box_text;
|
|
820
|
+
return ResultString(text);
|
|
821
|
+
}
|
|
822
|
+
};
|
|
823
|
+
|
|
824
|
+
struct CommandGetBoxText {
|
|
825
|
+
int page_number;
|
|
826
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
827
|
+
const std::atomic<bool> &initialized) const {
|
|
828
|
+
RequireInitialized(initialized, "getBoxText");
|
|
829
|
+
char *box_text = api.GetBoxText(page_number);
|
|
830
|
+
if (!box_text) {
|
|
831
|
+
throw_runtime("getBoxText: TessBaseAPI::GetBoxText returned null");
|
|
832
|
+
}
|
|
833
|
+
std::string text = std::string{box_text};
|
|
834
|
+
|
|
835
|
+
delete[] box_text;
|
|
836
|
+
return ResultString(text);
|
|
837
|
+
}
|
|
838
|
+
};
|
|
839
|
+
|
|
840
|
+
struct CommandGetWordStrBoxText {
|
|
841
|
+
int page_number;
|
|
842
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
843
|
+
const std::atomic<bool> &initialized) const {
|
|
844
|
+
RequireInitialized(initialized, "getWordStrBoxText");
|
|
845
|
+
char *word_str_box_text = api.GetWordStrBoxText(page_number);
|
|
846
|
+
if (!word_str_box_text) {
|
|
847
|
+
throw_runtime(
|
|
848
|
+
"getWordStrBoxText: TessBaseAPI::GetWordStrBoxText returned null");
|
|
849
|
+
}
|
|
850
|
+
std::string text = std::string{word_str_box_text};
|
|
851
|
+
|
|
852
|
+
delete[] word_str_box_text;
|
|
853
|
+
return ResultString(text);
|
|
854
|
+
}
|
|
855
|
+
};
|
|
856
|
+
|
|
857
|
+
struct CommandGetOSDText {
|
|
858
|
+
int page_number;
|
|
859
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
860
|
+
const std::atomic<bool> &initialized) const {
|
|
861
|
+
RequireInitialized(initialized, "getOSDText");
|
|
862
|
+
char *ost_text = api.GetOsdText(page_number);
|
|
863
|
+
if (!ost_text) {
|
|
864
|
+
throw_runtime("getOSDText: TessBaseAPI::GetOsdText returned null");
|
|
865
|
+
}
|
|
866
|
+
std::string text = std::string{ost_text};
|
|
867
|
+
|
|
868
|
+
delete[] ost_text;
|
|
869
|
+
return ResultString(text);
|
|
870
|
+
}
|
|
871
|
+
};
|
|
872
|
+
|
|
873
|
+
struct CommandAllWordConfidences {
|
|
874
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
875
|
+
const std::atomic<bool> &initialized) const {
|
|
876
|
+
RequireInitialized(initialized, "allWordConfidences");
|
|
877
|
+
int *all_word_confidences = api.AllWordConfidences();
|
|
878
|
+
|
|
879
|
+
std::vector<int> confidences;
|
|
880
|
+
if (all_word_confidences != nullptr) {
|
|
881
|
+
for (int i = 0; all_word_confidences[i] != -1; ++i) {
|
|
882
|
+
confidences.push_back(all_word_confidences[i]);
|
|
883
|
+
}
|
|
884
|
+
}
|
|
885
|
+
delete[] all_word_confidences;
|
|
886
|
+
return ResultArray{confidences};
|
|
887
|
+
}
|
|
888
|
+
};
|
|
889
|
+
|
|
351
890
|
struct CommandGetUTF8Text {
|
|
352
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
891
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
892
|
+
const std::atomic<bool> &initialized) const {
|
|
893
|
+
RequireInitialized(initialized, "getUTF8Text");
|
|
353
894
|
char *utf8_text = api.GetUTF8Text();
|
|
354
895
|
if (!utf8_text) {
|
|
355
|
-
throw_runtime("GetUTF8Text returned null");
|
|
896
|
+
throw_runtime("getUTF8Text: TessBaseAPI::GetUTF8Text returned null");
|
|
356
897
|
}
|
|
357
898
|
std::string text = std::string{utf8_text};
|
|
358
899
|
|
|
@@ -364,13 +905,15 @@ struct CommandGetUTF8Text {
|
|
|
364
905
|
struct CommandGetHOCRText {
|
|
365
906
|
int page_number;
|
|
366
907
|
std::shared_ptr<MonitorContext> monitor_context;
|
|
367
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
908
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
909
|
+
const std::atomic<bool> &initialized) const {
|
|
910
|
+
RequireInitialized(initialized, "getHOCRText");
|
|
368
911
|
|
|
369
912
|
MonitorHandle handle{monitor_context};
|
|
370
913
|
auto *monitor = monitor_context ? &handle.monitor : nullptr;
|
|
371
914
|
char *hocr_text = api.GetHOCRText(monitor, page_number);
|
|
372
915
|
if (!hocr_text) {
|
|
373
|
-
throw_runtime("GetHOCRText returned null");
|
|
916
|
+
throw_runtime("getHOCRText: TessBaseAPI::GetHOCRText returned null");
|
|
374
917
|
}
|
|
375
918
|
|
|
376
919
|
std::string text = std::string{hocr_text};
|
|
@@ -382,10 +925,12 @@ struct CommandGetHOCRText {
|
|
|
382
925
|
|
|
383
926
|
struct CommandGetTSVText {
|
|
384
927
|
int page_number;
|
|
385
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
928
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
929
|
+
const std::atomic<bool> &initialized) const {
|
|
930
|
+
RequireInitialized(initialized, "getTSVText");
|
|
386
931
|
char *tsv_text = api.GetTSVText(page_number);
|
|
387
932
|
if (!tsv_text) {
|
|
388
|
-
throw_runtime("GetTSVText returned null");
|
|
933
|
+
throw_runtime("getTSVText: TessBaseAPI::GetTSVText returned null");
|
|
389
934
|
}
|
|
390
935
|
std::string text = std::string{tsv_text};
|
|
391
936
|
|
|
@@ -395,10 +940,12 @@ struct CommandGetTSVText {
|
|
|
395
940
|
};
|
|
396
941
|
|
|
397
942
|
struct CommandGetUNLVText {
|
|
398
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
943
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
944
|
+
const std::atomic<bool> &initialized) const {
|
|
945
|
+
RequireInitialized(initialized, "getUNLVText");
|
|
399
946
|
char *unlv_text = api.GetUNLVText();
|
|
400
947
|
if (!unlv_text) {
|
|
401
|
-
throw_runtime("GetUNLVText returned null");
|
|
948
|
+
throw_runtime("getUNLVText: TessBaseAPI::GetUNLVText returned null");
|
|
402
949
|
}
|
|
403
950
|
std::string text = std::string{unlv_text};
|
|
404
951
|
delete[] unlv_text;
|
|
@@ -409,12 +956,14 @@ struct CommandGetUNLVText {
|
|
|
409
956
|
struct CommandGetALTOText {
|
|
410
957
|
int page_number;
|
|
411
958
|
std::shared_ptr<MonitorContext> monitor_context;
|
|
412
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
959
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
960
|
+
const std::atomic<bool> &initialized) const {
|
|
961
|
+
RequireInitialized(initialized, "getALTOText");
|
|
413
962
|
MonitorHandle handle{monitor_context};
|
|
414
963
|
auto *monitor = monitor_context ? &handle.monitor : nullptr;
|
|
415
964
|
char *alto_text = api.GetAltoText(monitor, page_number);
|
|
416
965
|
if (!alto_text) {
|
|
417
|
-
throw_runtime("
|
|
966
|
+
throw_runtime("getALTOText: TessBaseAPI::GetAltoText returned null");
|
|
418
967
|
}
|
|
419
968
|
std::string text = std::string{alto_text};
|
|
420
969
|
delete[] alto_text;
|
|
@@ -424,12 +973,15 @@ struct CommandGetALTOText {
|
|
|
424
973
|
};
|
|
425
974
|
|
|
426
975
|
struct CommandGetInitLanguages {
|
|
427
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
976
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
977
|
+
const std::atomic<bool> &initialized) const {
|
|
978
|
+
RequireInitialized(initialized, "getInitLanguages");
|
|
428
979
|
const char *p_init_languages = api.GetInitLanguagesAsString();
|
|
429
980
|
|
|
430
981
|
if (p_init_languages == nullptr) {
|
|
431
|
-
|
|
432
|
-
|
|
982
|
+
throw_runtime("getInitLanguages: TessBaseAPI::GetInitLanguagesAsString "
|
|
983
|
+
"returned null; call init(...) first with at least one "
|
|
984
|
+
"valid language");
|
|
433
985
|
}
|
|
434
986
|
|
|
435
987
|
std::string init_languages = std::string{p_init_languages};
|
|
@@ -439,7 +991,9 @@ struct CommandGetInitLanguages {
|
|
|
439
991
|
};
|
|
440
992
|
|
|
441
993
|
struct CommandGetLoadedLanguages {
|
|
442
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
994
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
995
|
+
const std::atomic<bool> &initialized) const {
|
|
996
|
+
RequireInitialized(initialized, "getLoadedLanguages");
|
|
443
997
|
std::vector<std::string> langs;
|
|
444
998
|
api.GetLoadedLanguagesAsVector(&langs);
|
|
445
999
|
return ResultArray{langs};
|
|
@@ -455,29 +1009,42 @@ struct CommandGetAvailableLanguages {
|
|
|
455
1009
|
};
|
|
456
1010
|
|
|
457
1011
|
struct CommandClear {
|
|
458
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
1012
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
1013
|
+
const std::atomic<bool> &initialized) const {
|
|
1014
|
+
RequireInitialized(initialized, "clear");
|
|
459
1015
|
api.Clear();
|
|
460
1016
|
return ResultVoid{};
|
|
461
1017
|
}
|
|
462
1018
|
};
|
|
463
1019
|
|
|
464
1020
|
struct CommandEnd {
|
|
465
|
-
Result invoke(tesseract::TessBaseAPI &api
|
|
1021
|
+
Result invoke(tesseract::TessBaseAPI &api,
|
|
1022
|
+
std::atomic<bool> &initialized) const {
|
|
466
1023
|
api.End();
|
|
1024
|
+
initialized.store(false, std::memory_order_release);
|
|
467
1025
|
return ResultVoid{};
|
|
468
1026
|
}
|
|
469
1027
|
};
|
|
470
1028
|
|
|
471
1029
|
using Command = std::variant<
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
1030
|
+
CommandVersion, CommandIsInitialized, CommandInit,
|
|
1031
|
+
CommandInitForAnalysePage, CommandSetVariable, CommandSetDebugVariable,
|
|
1032
|
+
CommandGetIntVariable, CommandGetBoolVariable, CommandGetDoubleVariable,
|
|
1033
|
+
CommandGetStringVariable, CommandSetInputName, CommandGetInputName,
|
|
1034
|
+
CommandSetOutputName, CommandGetDataPath, CommandSetInputImage,
|
|
1035
|
+
CommandGetInputImage, CommandSetPageMode, CommandSetRectangle,
|
|
1036
|
+
CommandSetSourceResolution, CommandGetSourceYResolution, CommandSetImage,
|
|
1037
|
+
CommandGetThresholdedImage, CommandGetThresholdedImageScaleFactor,
|
|
1038
|
+
CommandRecognize, CommandAnalyseLayout, CommandDetectOrientationScript,
|
|
1039
|
+
CommandMeanTextConf, CommandAllWordConfidences, CommandGetUTF8Text,
|
|
1040
|
+
CommandGetHOCRText, CommandGetTSVText, CommandGetUNLVText,
|
|
1041
|
+
CommandGetALTOText, CommandGetPAGEText, CommandGetLSTMBoxText,
|
|
1042
|
+
CommandGetBoxText, CommandGetWordStrBoxText, CommandGetOSDText,
|
|
1043
|
+
CommandBeginProcessPages, CommandAddProcessPage, CommandFinishProcessPages,
|
|
1044
|
+
CommandAbortProcessPages, CommandGetProcessPagesStatus,
|
|
1045
|
+
CommandGetInitLanguages, CommandGetLoadedLanguages,
|
|
1046
|
+
CommandGetAvailableLanguages, CommandClearPersistentCache,
|
|
1047
|
+
CommandClearAdaptiveClassifier, CommandClear, CommandEnd>;
|
|
481
1048
|
|
|
482
1049
|
struct Job {
|
|
483
1050
|
Command command;
|
|
@@ -485,4 +1052,6 @@ struct Job {
|
|
|
485
1052
|
|
|
486
1053
|
std::optional<Result> result;
|
|
487
1054
|
std::optional<std::string> error;
|
|
1055
|
+
std::optional<std::string> error_code;
|
|
1056
|
+
std::optional<std::string> error_method;
|
|
488
1057
|
};
|