@luii/node-tesseract-ocr 2.3.2 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -735,6 +735,11 @@ export interface TesseractBeginProcessPagesOptions {
735
735
  timeout: number;
736
736
  textonly: boolean;
737
737
  }
738
+ export interface TesseractAddProcessPageOptions {
739
+ buffer: Buffer<ArrayBuffer>;
740
+ filename?: string;
741
+ progressCallback?: (info: ProgressChangedInfo) => void;
742
+ }
738
743
  export interface TesseractProcessPagesStatus {
739
744
  active: boolean;
740
745
  healthy: boolean;
@@ -844,12 +849,15 @@ export interface TesseractDocumentApi {
844
849
  /**
845
850
  * Adds one encoded page to the active multipage session.
846
851
  * @throws {TesseractRuntimeError} If called before `init(...)`.
847
- * @throws {TesseractArgumentError} If `buffer` is not a non-empty Buffer.
848
- * @throws {TesseractArgumentError} If `filename` is provided but is not a string.
852
+ * @param {TesseractAddProcessPageOptions} options Page options.
853
+ * @throws {TesseractArgumentError} If `options` is missing/invalid.
854
+ * @throws {TesseractArgumentError} If `options.buffer` is not a non-empty Buffer.
855
+ * @throws {TesseractArgumentError} If `options.filename` is provided but is not a string.
856
+ * @throws {TesseractArgumentError} If `options.progressCallback` is provided but is not a function.
849
857
  * @throws {TesseractRuntimeError} If no session is active, decode fails, or page processing fails.
850
858
  * @throws {TesseractWorkerError} If the worker is closing/stopped.
851
859
  */
852
- addPage(buffer: Buffer<ArrayBuffer>, filename?: string): Promise<void>;
860
+ addPage(options: TesseractAddProcessPageOptions): Promise<void>;
853
861
  /**
854
862
  * Finalizes the active multipage session and returns output PDF path.
855
863
  * @throws {TesseractRuntimeError} If called before `init(...)`.
@@ -996,15 +1004,18 @@ export interface TesseractInstance {
996
1004
  */
997
1005
  beginProcessPages(options: TesseractBeginProcessPagesOptions): Promise<void>;
998
1006
  /**
999
- * Adds one encoded page to the current multipage session.
1007
+ * Adds one encoded page to the active multipage session.
1000
1008
  * @deprecated use `document.addPage()`
1001
- * @throws {TesseractArgumentError} If `buffer` is not a non-empty Buffer.
1002
- * @throws {TesseractArgumentError} If `filename` is provided but is not a string.
1009
+ * @param {TesseractAddProcessPageOptions} options Page options.
1010
+ * @throws {TesseractArgumentError} If `options` is missing/invalid.
1011
+ * @throws {TesseractArgumentError} If `options.buffer` is not a non-empty Buffer.
1012
+ * @throws {TesseractArgumentError} If `options.filename` is provided but is not a string.
1013
+ * @throws {TesseractArgumentError} If `options.progressCallback` is provided but is not a function.
1003
1014
  * @throws {TesseractRuntimeError} If called before `init(...)`.
1004
1015
  * @throws {TesseractRuntimeError} If no session is active, decode fails, or page processing fails.
1005
1016
  * @throws {TesseractWorkerError} If the worker is closing/stopped.
1006
1017
  */
1007
- addProcessPage(buffer: Buffer<ArrayBuffer>, filename?: string): Promise<void>;
1018
+ addProcessPage(options: TesseractAddProcessPageOptions): Promise<void>;
1008
1019
  /**
1009
1020
  * Finalizes the current multipage session and returns the output PDF path.
1010
1021
  * @deprecated use `document.finish()`
@@ -735,6 +735,11 @@ export interface TesseractBeginProcessPagesOptions {
735
735
  timeout: number;
736
736
  textonly: boolean;
737
737
  }
738
+ export interface TesseractAddProcessPageOptions {
739
+ buffer: Buffer<ArrayBuffer>;
740
+ filename?: string;
741
+ progressCallback?: (info: ProgressChangedInfo) => void;
742
+ }
738
743
  export interface TesseractProcessPagesStatus {
739
744
  active: boolean;
740
745
  healthy: boolean;
@@ -844,12 +849,15 @@ export interface TesseractDocumentApi {
844
849
  /**
845
850
  * Adds one encoded page to the active multipage session.
846
851
  * @throws {TesseractRuntimeError} If called before `init(...)`.
847
- * @throws {TesseractArgumentError} If `buffer` is not a non-empty Buffer.
848
- * @throws {TesseractArgumentError} If `filename` is provided but is not a string.
852
+ * @param {TesseractAddProcessPageOptions} options Page options.
853
+ * @throws {TesseractArgumentError} If `options` is missing/invalid.
854
+ * @throws {TesseractArgumentError} If `options.buffer` is not a non-empty Buffer.
855
+ * @throws {TesseractArgumentError} If `options.filename` is provided but is not a string.
856
+ * @throws {TesseractArgumentError} If `options.progressCallback` is provided but is not a function.
849
857
  * @throws {TesseractRuntimeError} If no session is active, decode fails, or page processing fails.
850
858
  * @throws {TesseractWorkerError} If the worker is closing/stopped.
851
859
  */
852
- addPage(buffer: Buffer<ArrayBuffer>, filename?: string): Promise<void>;
860
+ addPage(options: TesseractAddProcessPageOptions): Promise<void>;
853
861
  /**
854
862
  * Finalizes the active multipage session and returns output PDF path.
855
863
  * @throws {TesseractRuntimeError} If called before `init(...)`.
@@ -996,15 +1004,18 @@ export interface TesseractInstance {
996
1004
  */
997
1005
  beginProcessPages(options: TesseractBeginProcessPagesOptions): Promise<void>;
998
1006
  /**
999
- * Adds one encoded page to the current multipage session.
1007
+ * Adds one encoded page to the active multipage session.
1000
1008
  * @deprecated use `document.addPage()`
1001
- * @throws {TesseractArgumentError} If `buffer` is not a non-empty Buffer.
1002
- * @throws {TesseractArgumentError} If `filename` is provided but is not a string.
1009
+ * @param {TesseractAddProcessPageOptions} options Page options.
1010
+ * @throws {TesseractArgumentError} If `options` is missing/invalid.
1011
+ * @throws {TesseractArgumentError} If `options.buffer` is not a non-empty Buffer.
1012
+ * @throws {TesseractArgumentError} If `options.filename` is provided but is not a string.
1013
+ * @throws {TesseractArgumentError} If `options.progressCallback` is provided but is not a function.
1003
1014
  * @throws {TesseractRuntimeError} If called before `init(...)`.
1004
1015
  * @throws {TesseractRuntimeError} If no session is active, decode fails, or page processing fails.
1005
1016
  * @throws {TesseractWorkerError} If the worker is closing/stopped.
1006
1017
  */
1007
- addProcessPage(buffer: Buffer<ArrayBuffer>, filename?: string): Promise<void>;
1018
+ addProcessPage(options: TesseractAddProcessPageOptions): Promise<void>;
1008
1019
  /**
1009
1020
  * Finalizes the current multipage session and returns the output PDF path.
1010
1021
  * @deprecated use `document.finish()`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@luii/node-tesseract-ocr",
3
- "version": "2.3.2",
3
+ "version": "2.4.0",
4
4
  "private": false,
5
5
  "binary": {
6
6
  "napi_versions": [
package/src/commands.hpp CHANGED
@@ -458,6 +458,7 @@ struct CommandBeginProcessPages {
458
458
  struct CommandAddProcessPage {
459
459
  EncodedImageBuffer page;
460
460
  std::string filename;
461
+ std::shared_ptr<MonitorContext> monitor_context;
461
462
  Result invoke(tesseract::TessBaseAPI &api,
462
463
  std::optional<ProcessPagesSession> &session,
463
464
  const std::atomic<bool> &initialized) const {
@@ -523,18 +524,48 @@ struct CommandAddProcessPage {
523
524
 
524
525
  const char *effective_filename =
525
526
  filename.empty() ? nullptr : filename.c_str();
527
+ api.SetInputName(effective_filename);
528
+ api.SetImage(pix);
526
529
 
527
- bool success = api.ProcessPage(
528
- pix, session->next_page_index, effective_filename, nullptr,
529
- session->timeout_millisec, session->renderer.get());
530
+ bool failed = false;
531
+ MonitorHandle handle{monitor_context};
532
+ auto *monitor = monitor_context ? &handle.monitor : nullptr;
533
+
534
+ if (session->timeout_millisec > 0) {
535
+ tesseract::ETEXT_DESC timeout_only_monitor{};
536
+ if (monitor != nullptr) {
537
+ monitor->set_deadline_msecs(session->timeout_millisec);
538
+ } else {
539
+ timeout_only_monitor.cancel = nullptr;
540
+ timeout_only_monitor.cancel_this = nullptr;
541
+ timeout_only_monitor.set_deadline_msecs(session->timeout_millisec);
542
+ monitor = &timeout_only_monitor;
543
+ }
544
+ failed = api.Recognize(monitor) < 0;
545
+ } else if (api.GetPageSegMode() == tesseract::PSM_OSD_ONLY ||
546
+ api.GetPageSegMode() == tesseract::PSM_AUTO_ONLY) {
547
+ tesseract::PageIterator *it = api.AnalyseLayout();
548
+ if (it == nullptr) {
549
+ failed = true;
550
+ } else {
551
+ delete it;
552
+ }
553
+ } else {
554
+ failed = api.Recognize(monitor) < 0;
555
+ }
556
+
557
+ if (session->renderer && !failed) {
558
+ failed = !session->renderer->AddImage(&api);
559
+ }
530
560
  pixDestroy(&pix);
531
561
 
532
- if (!success) {
533
- throw_runtime("addProcessPage: ProcessPage failed at page {}",
534
- session->next_page_index);
562
+ if (!failed) {
563
+ session->next_page_index++;
564
+ return ResultVoid{};
535
565
  }
536
566
 
537
- session->next_page_index++;
567
+ throw_runtime("addProcessPage: ProcessPage failed at page {}",
568
+ session->next_page_index);
538
569
  return ResultVoid{};
539
570
  }
540
571
  };
@@ -553,33 +553,55 @@ Napi::Value TesseractWrapper::AddProcessPage(const Napi::CallbackInfo &info) {
553
553
  Napi::Env env = info.Env();
554
554
  CommandAddProcessPage command{};
555
555
 
556
- if (info.Length() < 1 || info.Length() > 2) {
557
- return RejectTypeError(
558
- env, "addProcessPage(buffer, filename?): expected 1 or 2 arguments",
559
- "addProcessPage");
556
+ if (info.Length() != 1 || !info[0].IsObject()) {
557
+ return RejectTypeError(env,
558
+ "addProcessPage(options): options must be an object",
559
+ "addProcessPage");
560
560
  }
561
561
 
562
- if (!info[0].IsBuffer()) {
562
+ Napi::Object options = info[0].As<Napi::Object>();
563
+
564
+ Napi::Value buffer_value = options.Get("buffer");
565
+ if (!buffer_value.IsBuffer()) {
563
566
  return RejectTypeError(
564
- env, "addProcessPage(buffer, filename?): buffer must be a Buffer",
567
+ env, "addProcessPage(options): options.buffer must be a Buffer",
565
568
  "addProcessPage");
566
569
  }
567
570
 
568
- Napi::Buffer<uint8_t> page_buffer = info[0].As<Napi::Buffer<uint8_t>>();
571
+ Napi::Buffer<uint8_t> page_buffer = buffer_value.As<Napi::Buffer<uint8_t>>();
569
572
  const size_t length = page_buffer.Length();
570
573
  if (length == 0) {
571
574
  return RejectTypeError(env,
572
- "addProcessPage(buffer, filename?): buffer is empty",
575
+ "addProcessPage(options): options.buffer is empty",
573
576
  "addProcessPage");
574
577
  }
575
578
 
576
- if (HasArg(info, 1)) {
577
- if (!info[1].IsString()) {
579
+ Napi::Value filename_value = options.Get("filename");
580
+ if (!filename_value.IsUndefined() && !filename_value.IsNull()) {
581
+ if (!filename_value.IsString()) {
578
582
  return RejectTypeError(
579
- env, "addProcessPage(buffer, filename?): filename must be a string",
583
+ env, "addProcessPage(options): options.filename must be a string",
580
584
  "addProcessPage");
581
585
  }
582
- command.filename = info[1].As<Napi::String>().Utf8Value();
586
+ command.filename = filename_value.As<Napi::String>().Utf8Value();
587
+ }
588
+
589
+ Napi::Value progress_callback_value = options.Get("progressCallback");
590
+ if (!progress_callback_value.IsUndefined() &&
591
+ !progress_callback_value.IsNull()) {
592
+ if (!progress_callback_value.IsFunction()) {
593
+ return RejectTypeError(env,
594
+ "addProcessPage(options): "
595
+ "options.progressCallback must be a function",
596
+ "addProcessPage");
597
+ }
598
+
599
+ Napi::Function progress_callback =
600
+ progress_callback_value.As<Napi::Function>();
601
+ Napi::ThreadSafeFunction progress_tsfn = Napi::ThreadSafeFunction::New(
602
+ env, progress_callback, "tesseract_progress_callback", 0, 1);
603
+ command.monitor_context =
604
+ std::make_shared<MonitorContext>(std::move(progress_tsfn));
583
605
  }
584
606
 
585
607
  command.page.bytes.resize(length);