@luii/node-tesseract-ocr 2.3.2 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/types.d.ts +18 -7
- package/dist/esm/types.d.ts +18 -7
- package/package.json +1 -1
- package/prebuilds/node-tesseract-ocr-darwin-arm64/node-napi-v10.node +0 -0
- package/prebuilds/node-tesseract-ocr-linux-x64/node-napi-v10.node +0 -0
- package/src/commands.hpp +38 -7
- package/src/tesseract_wrapper.cpp +34 -12
package/dist/cjs/types.d.ts
CHANGED
|
@@ -735,6 +735,11 @@ export interface TesseractBeginProcessPagesOptions {
|
|
|
735
735
|
timeout: number;
|
|
736
736
|
textonly: boolean;
|
|
737
737
|
}
|
|
738
|
+
export interface TesseractAddProcessPageOptions {
|
|
739
|
+
buffer: Buffer<ArrayBuffer>;
|
|
740
|
+
filename?: string;
|
|
741
|
+
progressCallback?: (info: ProgressChangedInfo) => void;
|
|
742
|
+
}
|
|
738
743
|
export interface TesseractProcessPagesStatus {
|
|
739
744
|
active: boolean;
|
|
740
745
|
healthy: boolean;
|
|
@@ -844,12 +849,15 @@ export interface TesseractDocumentApi {
|
|
|
844
849
|
/**
|
|
845
850
|
* Adds one encoded page to the active multipage session.
|
|
846
851
|
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
847
|
-
* @
|
|
848
|
-
* @throws {TesseractArgumentError} If `
|
|
852
|
+
* @param {TesseractAddProcessPageOptions} options Page options.
|
|
853
|
+
* @throws {TesseractArgumentError} If `options` is missing/invalid.
|
|
854
|
+
* @throws {TesseractArgumentError} If `options.buffer` is not a non-empty Buffer.
|
|
855
|
+
* @throws {TesseractArgumentError} If `options.filename` is provided but is not a string.
|
|
856
|
+
* @throws {TesseractArgumentError} If `options.progressCallback` is provided but is not a function.
|
|
849
857
|
* @throws {TesseractRuntimeError} If no session is active, decode fails, or page processing fails.
|
|
850
858
|
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
851
859
|
*/
|
|
852
|
-
addPage(
|
|
860
|
+
addPage(options: TesseractAddProcessPageOptions): Promise<void>;
|
|
853
861
|
/**
|
|
854
862
|
* Finalizes the active multipage session and returns output PDF path.
|
|
855
863
|
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
@@ -996,15 +1004,18 @@ export interface TesseractInstance {
|
|
|
996
1004
|
*/
|
|
997
1005
|
beginProcessPages(options: TesseractBeginProcessPagesOptions): Promise<void>;
|
|
998
1006
|
/**
|
|
999
|
-
* Adds one encoded page to the
|
|
1007
|
+
* Adds one encoded page to the active multipage session.
|
|
1000
1008
|
* @deprecated use `document.addPage()`
|
|
1001
|
-
* @
|
|
1002
|
-
* @throws {TesseractArgumentError} If `
|
|
1009
|
+
* @param {TesseractAddProcessPageOptions} options Page options.
|
|
1010
|
+
* @throws {TesseractArgumentError} If `options` is missing/invalid.
|
|
1011
|
+
* @throws {TesseractArgumentError} If `options.buffer` is not a non-empty Buffer.
|
|
1012
|
+
* @throws {TesseractArgumentError} If `options.filename` is provided but is not a string.
|
|
1013
|
+
* @throws {TesseractArgumentError} If `options.progressCallback` is provided but is not a function.
|
|
1003
1014
|
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1004
1015
|
* @throws {TesseractRuntimeError} If no session is active, decode fails, or page processing fails.
|
|
1005
1016
|
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1006
1017
|
*/
|
|
1007
|
-
addProcessPage(
|
|
1018
|
+
addProcessPage(options: TesseractAddProcessPageOptions): Promise<void>;
|
|
1008
1019
|
/**
|
|
1009
1020
|
* Finalizes the current multipage session and returns the output PDF path.
|
|
1010
1021
|
* @deprecated use `document.finish()`
|
package/dist/esm/types.d.ts
CHANGED
|
@@ -735,6 +735,11 @@ export interface TesseractBeginProcessPagesOptions {
|
|
|
735
735
|
timeout: number;
|
|
736
736
|
textonly: boolean;
|
|
737
737
|
}
|
|
738
|
+
export interface TesseractAddProcessPageOptions {
|
|
739
|
+
buffer: Buffer<ArrayBuffer>;
|
|
740
|
+
filename?: string;
|
|
741
|
+
progressCallback?: (info: ProgressChangedInfo) => void;
|
|
742
|
+
}
|
|
738
743
|
export interface TesseractProcessPagesStatus {
|
|
739
744
|
active: boolean;
|
|
740
745
|
healthy: boolean;
|
|
@@ -844,12 +849,15 @@ export interface TesseractDocumentApi {
|
|
|
844
849
|
/**
|
|
845
850
|
* Adds one encoded page to the active multipage session.
|
|
846
851
|
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
847
|
-
* @
|
|
848
|
-
* @throws {TesseractArgumentError} If `
|
|
852
|
+
* @param {TesseractAddProcessPageOptions} options Page options.
|
|
853
|
+
* @throws {TesseractArgumentError} If `options` is missing/invalid.
|
|
854
|
+
* @throws {TesseractArgumentError} If `options.buffer` is not a non-empty Buffer.
|
|
855
|
+
* @throws {TesseractArgumentError} If `options.filename` is provided but is not a string.
|
|
856
|
+
* @throws {TesseractArgumentError} If `options.progressCallback` is provided but is not a function.
|
|
849
857
|
* @throws {TesseractRuntimeError} If no session is active, decode fails, or page processing fails.
|
|
850
858
|
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
851
859
|
*/
|
|
852
|
-
addPage(
|
|
860
|
+
addPage(options: TesseractAddProcessPageOptions): Promise<void>;
|
|
853
861
|
/**
|
|
854
862
|
* Finalizes the active multipage session and returns output PDF path.
|
|
855
863
|
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
@@ -996,15 +1004,18 @@ export interface TesseractInstance {
|
|
|
996
1004
|
*/
|
|
997
1005
|
beginProcessPages(options: TesseractBeginProcessPagesOptions): Promise<void>;
|
|
998
1006
|
/**
|
|
999
|
-
* Adds one encoded page to the
|
|
1007
|
+
* Adds one encoded page to the active multipage session.
|
|
1000
1008
|
* @deprecated use `document.addPage()`
|
|
1001
|
-
* @
|
|
1002
|
-
* @throws {TesseractArgumentError} If `
|
|
1009
|
+
* @param {TesseractAddProcessPageOptions} options Page options.
|
|
1010
|
+
* @throws {TesseractArgumentError} If `options` is missing/invalid.
|
|
1011
|
+
* @throws {TesseractArgumentError} If `options.buffer` is not a non-empty Buffer.
|
|
1012
|
+
* @throws {TesseractArgumentError} If `options.filename` is provided but is not a string.
|
|
1013
|
+
* @throws {TesseractArgumentError} If `options.progressCallback` is provided but is not a function.
|
|
1003
1014
|
* @throws {TesseractRuntimeError} If called before `init(...)`.
|
|
1004
1015
|
* @throws {TesseractRuntimeError} If no session is active, decode fails, or page processing fails.
|
|
1005
1016
|
* @throws {TesseractWorkerError} If the worker is closing/stopped.
|
|
1006
1017
|
*/
|
|
1007
|
-
addProcessPage(
|
|
1018
|
+
addProcessPage(options: TesseractAddProcessPageOptions): Promise<void>;
|
|
1008
1019
|
/**
|
|
1009
1020
|
* Finalizes the current multipage session and returns the output PDF path.
|
|
1010
1021
|
* @deprecated use `document.finish()`
|
package/package.json
CHANGED
|
Binary file
|
|
Binary file
|
package/src/commands.hpp
CHANGED
|
@@ -458,6 +458,7 @@ struct CommandBeginProcessPages {
|
|
|
458
458
|
struct CommandAddProcessPage {
|
|
459
459
|
EncodedImageBuffer page;
|
|
460
460
|
std::string filename;
|
|
461
|
+
std::shared_ptr<MonitorContext> monitor_context;
|
|
461
462
|
Result invoke(tesseract::TessBaseAPI &api,
|
|
462
463
|
std::optional<ProcessPagesSession> &session,
|
|
463
464
|
const std::atomic<bool> &initialized) const {
|
|
@@ -523,18 +524,48 @@ struct CommandAddProcessPage {
|
|
|
523
524
|
|
|
524
525
|
const char *effective_filename =
|
|
525
526
|
filename.empty() ? nullptr : filename.c_str();
|
|
527
|
+
api.SetInputName(effective_filename);
|
|
528
|
+
api.SetImage(pix);
|
|
526
529
|
|
|
527
|
-
bool
|
|
528
|
-
|
|
529
|
-
|
|
530
|
+
bool failed = false;
|
|
531
|
+
MonitorHandle handle{monitor_context};
|
|
532
|
+
auto *monitor = monitor_context ? &handle.monitor : nullptr;
|
|
533
|
+
|
|
534
|
+
if (session->timeout_millisec > 0) {
|
|
535
|
+
tesseract::ETEXT_DESC timeout_only_monitor{};
|
|
536
|
+
if (monitor != nullptr) {
|
|
537
|
+
monitor->set_deadline_msecs(session->timeout_millisec);
|
|
538
|
+
} else {
|
|
539
|
+
timeout_only_monitor.cancel = nullptr;
|
|
540
|
+
timeout_only_monitor.cancel_this = nullptr;
|
|
541
|
+
timeout_only_monitor.set_deadline_msecs(session->timeout_millisec);
|
|
542
|
+
monitor = &timeout_only_monitor;
|
|
543
|
+
}
|
|
544
|
+
failed = api.Recognize(monitor) < 0;
|
|
545
|
+
} else if (api.GetPageSegMode() == tesseract::PSM_OSD_ONLY ||
|
|
546
|
+
api.GetPageSegMode() == tesseract::PSM_AUTO_ONLY) {
|
|
547
|
+
tesseract::PageIterator *it = api.AnalyseLayout();
|
|
548
|
+
if (it == nullptr) {
|
|
549
|
+
failed = true;
|
|
550
|
+
} else {
|
|
551
|
+
delete it;
|
|
552
|
+
}
|
|
553
|
+
} else {
|
|
554
|
+
failed = api.Recognize(monitor) < 0;
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
if (session->renderer && !failed) {
|
|
558
|
+
failed = !session->renderer->AddImage(&api);
|
|
559
|
+
}
|
|
530
560
|
pixDestroy(&pix);
|
|
531
561
|
|
|
532
|
-
if (!
|
|
533
|
-
|
|
534
|
-
|
|
562
|
+
if (!failed) {
|
|
563
|
+
session->next_page_index++;
|
|
564
|
+
return ResultVoid{};
|
|
535
565
|
}
|
|
536
566
|
|
|
537
|
-
|
|
567
|
+
throw_runtime("addProcessPage: ProcessPage failed at page {}",
|
|
568
|
+
session->next_page_index);
|
|
538
569
|
return ResultVoid{};
|
|
539
570
|
}
|
|
540
571
|
};
|
|
@@ -553,33 +553,55 @@ Napi::Value TesseractWrapper::AddProcessPage(const Napi::CallbackInfo &info) {
|
|
|
553
553
|
Napi::Env env = info.Env();
|
|
554
554
|
CommandAddProcessPage command{};
|
|
555
555
|
|
|
556
|
-
if (info.Length()
|
|
557
|
-
return RejectTypeError(
|
|
558
|
-
|
|
559
|
-
|
|
556
|
+
if (info.Length() != 1 || !info[0].IsObject()) {
|
|
557
|
+
return RejectTypeError(env,
|
|
558
|
+
"addProcessPage(options): options must be an object",
|
|
559
|
+
"addProcessPage");
|
|
560
560
|
}
|
|
561
561
|
|
|
562
|
-
|
|
562
|
+
Napi::Object options = info[0].As<Napi::Object>();
|
|
563
|
+
|
|
564
|
+
Napi::Value buffer_value = options.Get("buffer");
|
|
565
|
+
if (!buffer_value.IsBuffer()) {
|
|
563
566
|
return RejectTypeError(
|
|
564
|
-
env, "addProcessPage(
|
|
567
|
+
env, "addProcessPage(options): options.buffer must be a Buffer",
|
|
565
568
|
"addProcessPage");
|
|
566
569
|
}
|
|
567
570
|
|
|
568
|
-
Napi::Buffer<uint8_t> page_buffer =
|
|
571
|
+
Napi::Buffer<uint8_t> page_buffer = buffer_value.As<Napi::Buffer<uint8_t>>();
|
|
569
572
|
const size_t length = page_buffer.Length();
|
|
570
573
|
if (length == 0) {
|
|
571
574
|
return RejectTypeError(env,
|
|
572
|
-
"addProcessPage(
|
|
575
|
+
"addProcessPage(options): options.buffer is empty",
|
|
573
576
|
"addProcessPage");
|
|
574
577
|
}
|
|
575
578
|
|
|
576
|
-
|
|
577
|
-
|
|
579
|
+
Napi::Value filename_value = options.Get("filename");
|
|
580
|
+
if (!filename_value.IsUndefined() && !filename_value.IsNull()) {
|
|
581
|
+
if (!filename_value.IsString()) {
|
|
578
582
|
return RejectTypeError(
|
|
579
|
-
env, "addProcessPage(
|
|
583
|
+
env, "addProcessPage(options): options.filename must be a string",
|
|
580
584
|
"addProcessPage");
|
|
581
585
|
}
|
|
582
|
-
command.filename =
|
|
586
|
+
command.filename = filename_value.As<Napi::String>().Utf8Value();
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
Napi::Value progress_callback_value = options.Get("progressCallback");
|
|
590
|
+
if (!progress_callback_value.IsUndefined() &&
|
|
591
|
+
!progress_callback_value.IsNull()) {
|
|
592
|
+
if (!progress_callback_value.IsFunction()) {
|
|
593
|
+
return RejectTypeError(env,
|
|
594
|
+
"addProcessPage(options): "
|
|
595
|
+
"options.progressCallback must be a function",
|
|
596
|
+
"addProcessPage");
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
Napi::Function progress_callback =
|
|
600
|
+
progress_callback_value.As<Napi::Function>();
|
|
601
|
+
Napi::ThreadSafeFunction progress_tsfn = Napi::ThreadSafeFunction::New(
|
|
602
|
+
env, progress_callback, "tesseract_progress_callback", 0, 1);
|
|
603
|
+
command.monitor_context =
|
|
604
|
+
std::make_shared<MonitorContext>(std::move(progress_tsfn));
|
|
583
605
|
}
|
|
584
606
|
|
|
585
607
|
command.page.bytes.resize(length);
|