pdf-oxide-wasm 0.3.51 → 0.3.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -644,7 +644,9 @@ export class WasmHeader {
644
644
  }
645
645
 
646
646
  /**
647
- * OCR configuration for WebAssembly.
647
+ * OCR configuration for WebAssembly. (Currently a marker — the engine
648
+ * uses tuned defaults; knobs are exposed as the WASM OCR surface
649
+ * matures, #524.)
648
650
  */
649
651
  export class WasmOcrConfig {
650
652
  free(): void;
@@ -656,15 +658,27 @@ export class WasmOcrConfig {
656
658
  }
657
659
 
658
660
  /**
659
- * OCR engine for WebAssembly.
661
+ * OCR engine for WebAssembly (#524).
662
+ *
663
+ * OCR runs entirely in-WASM via the pure-Rust `tract` backend — no
664
+ * native ONNX Runtime, no JS bridge. Model **delivery is host-side**:
665
+ * the browser/Deno/edge host fetches the detector + recognizer ONNX
666
+ * files and the char dictionary (see `modelManifest()` for the URLs)
667
+ * — typically `fetch()` + the Cache API / IndexedDB for the
668
+ * tens-of-MB models — then hands the bytes to the constructor. This
669
+ * only works in the `wasm-ocr` build of `pdf-oxide`; the default
670
+ * `pdf-oxide-wasm` has no OCR (the constructor returns an error
671
+ * explaining this).
660
672
  */
661
673
  export class WasmOcrEngine {
662
674
  free(): void;
663
675
  [Symbol.dispose](): void;
664
676
  /**
665
- * Create a new OCR engine.
677
+ * Not available in this build. OCR needs the `wasm-ocr` build of
678
+ * `pdf-oxide` (the pure-Rust tract backend); the default
679
+ * `pdf-oxide-wasm` ships without it.
666
680
  */
667
- constructor(_det_model_path: string, _rec_model_path: string, _dict_path: string, _config?: WasmOcrConfig | null);
681
+ constructor(_det_model: Uint8Array, _rec_model: Uint8Array, _dict: string, _config?: WasmOcrConfig | null);
668
682
  }
669
683
 
670
684
  /**
@@ -1017,12 +1031,10 @@ export class WasmPdfDocument {
1017
1031
  */
1018
1032
  extractTextLines(page_index: number, region?: Float32Array | null): any;
1019
1033
  /**
1020
- * Extract text using OCR (optical character recognition).
1021
- *
1022
- * NOTE: OCR is not yet supported in the WebAssembly build due to missing
1023
- * ONNX Runtime support for the web backend in the current implementation.
1034
+ * Extract text using OCR. Not available in this build — OCR needs
1035
+ * the `wasm-ocr` build of `pdf-oxide`.
1024
1036
  */
1025
- extractTextOcr(_page_index: number, _engine?: WasmOcrEngine | null): string;
1037
+ extractTextOcr(_page_index: number, _engine: WasmOcrEngine): string;
1026
1038
  /**
1027
1039
  * Extract word-level data from a page.
1028
1040
  *
@@ -1445,6 +1457,10 @@ export class WasmPdfPageRegion {
1445
1457
  extractTextLines(): any;
1446
1458
  /**
1447
1459
  * Extract text using OCR from this region.
1460
+ *
1461
+ * Region-scoped OCR is not wired yet; use the page-level
1462
+ * `WasmPdfDocument.extractTextOcr(pageIndex, engine)` for now
1463
+ * (#524 follow-up).
1448
1464
  */
1449
1465
  extractTextOcr(_engine?: WasmOcrEngine | null): string;
1450
1466
  /**
@@ -2544,7 +2544,9 @@ export class WasmHeader {
2544
2544
  if (Symbol.dispose) WasmHeader.prototype[Symbol.dispose] = WasmHeader.prototype.free;
2545
2545
 
2546
2546
  /**
2547
- * OCR configuration for WebAssembly.
2547
+ * OCR configuration for WebAssembly. (Currently a marker — the engine
2548
+ * uses tuned defaults; knobs are exposed as the WASM OCR surface
2549
+ * matures, #524.)
2548
2550
  */
2549
2551
  export class WasmOcrConfig {
2550
2552
  __destroy_into_raw() {
@@ -2570,7 +2572,17 @@ export class WasmOcrConfig {
2570
2572
  if (Symbol.dispose) WasmOcrConfig.prototype[Symbol.dispose] = WasmOcrConfig.prototype.free;
2571
2573
 
2572
2574
  /**
2573
- * OCR engine for WebAssembly.
2575
+ * OCR engine for WebAssembly (#524).
2576
+ *
2577
+ * OCR runs entirely in-WASM via the pure-Rust `tract` backend — no
2578
+ * native ONNX Runtime, no JS bridge. Model **delivery is host-side**:
2579
+ * the browser/Deno/edge host fetches the detector + recognizer ONNX
2580
+ * files and the char dictionary (see `modelManifest()` for the URLs)
2581
+ * — typically `fetch()` + the Cache API / IndexedDB for the
2582
+ * tens-of-MB models — then hands the bytes to the constructor. This
2583
+ * only works in the `wasm-ocr` build of `pdf-oxide`; the default
2584
+ * `pdf-oxide-wasm` has no OCR (the constructor returns an error
2585
+ * explaining this).
2574
2586
  */
2575
2587
  export class WasmOcrEngine {
2576
2588
  __destroy_into_raw() {
@@ -2584,20 +2596,22 @@ export class WasmOcrEngine {
2584
2596
  wasm.__wbg_wasmocrengine_free(ptr, 0);
2585
2597
  }
2586
2598
  /**
2587
- * Create a new OCR engine.
2588
- * @param {string} _det_model_path
2589
- * @param {string} _rec_model_path
2590
- * @param {string} _dict_path
2599
+ * Not available in this build. OCR needs the `wasm-ocr` build of
2600
+ * `pdf-oxide` (the pure-Rust tract backend); the default
2601
+ * `pdf-oxide-wasm` ships without it.
2602
+ * @param {Uint8Array} _det_model
2603
+ * @param {Uint8Array} _rec_model
2604
+ * @param {string} _dict
2591
2605
  * @param {WasmOcrConfig | null} [_config]
2592
2606
  */
2593
- constructor(_det_model_path, _rec_model_path, _dict_path, _config) {
2607
+ constructor(_det_model, _rec_model, _dict, _config) {
2594
2608
  try {
2595
2609
  const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
2596
- const ptr0 = passStringToWasm0(_det_model_path, wasm.__wbindgen_export, wasm.__wbindgen_export2);
2610
+ const ptr0 = passArray8ToWasm0(_det_model, wasm.__wbindgen_export);
2597
2611
  const len0 = WASM_VECTOR_LEN;
2598
- const ptr1 = passStringToWasm0(_rec_model_path, wasm.__wbindgen_export, wasm.__wbindgen_export2);
2612
+ const ptr1 = passArray8ToWasm0(_rec_model, wasm.__wbindgen_export);
2599
2613
  const len1 = WASM_VECTOR_LEN;
2600
- const ptr2 = passStringToWasm0(_dict_path, wasm.__wbindgen_export, wasm.__wbindgen_export2);
2614
+ const ptr2 = passStringToWasm0(_dict, wasm.__wbindgen_export, wasm.__wbindgen_export2);
2601
2615
  const len2 = WASM_VECTOR_LEN;
2602
2616
  let ptr3 = 0;
2603
2617
  if (!isLikeNone(_config)) {
@@ -3902,41 +3916,35 @@ export class WasmPdfDocument {
3902
3916
  }
3903
3917
  }
3904
3918
  /**
3905
- * Extract text using OCR (optical character recognition).
3906
- *
3907
- * NOTE: OCR is not yet supported in the WebAssembly build due to missing
3908
- * ONNX Runtime support for the web backend in the current implementation.
3919
+ * Extract text using OCR. Not available in this build — OCR needs
3920
+ * the `wasm-ocr` build of `pdf-oxide`.
3909
3921
  * @param {number} _page_index
3910
- * @param {WasmOcrEngine | null} [_engine]
3922
+ * @param {WasmOcrEngine} _engine
3911
3923
  * @returns {string}
3912
3924
  */
3913
3925
  extractTextOcr(_page_index, _engine) {
3914
- let deferred3_0;
3915
- let deferred3_1;
3926
+ let deferred2_0;
3927
+ let deferred2_1;
3916
3928
  try {
3917
3929
  const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
3918
- let ptr0 = 0;
3919
- if (!isLikeNone(_engine)) {
3920
- _assertClass(_engine, WasmOcrEngine);
3921
- ptr0 = _engine.__destroy_into_raw();
3922
- }
3923
- wasm.wasmpdfdocument_extractTextOcr(retptr, this.__wbg_ptr, _page_index, ptr0);
3930
+ _assertClass(_engine, WasmOcrEngine);
3931
+ wasm.wasmpdfdocument_extractTextOcr(retptr, this.__wbg_ptr, _page_index, _engine.__wbg_ptr);
3924
3932
  var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
3925
3933
  var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
3926
3934
  var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
3927
3935
  var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
3928
- var ptr2 = r0;
3929
- var len2 = r1;
3936
+ var ptr1 = r0;
3937
+ var len1 = r1;
3930
3938
  if (r3) {
3931
- ptr2 = 0; len2 = 0;
3939
+ ptr1 = 0; len1 = 0;
3932
3940
  throw takeObject(r2);
3933
3941
  }
3934
- deferred3_0 = ptr2;
3935
- deferred3_1 = len2;
3936
- return getStringFromWasm0(ptr2, len2);
3942
+ deferred2_0 = ptr1;
3943
+ deferred2_1 = len1;
3944
+ return getStringFromWasm0(ptr1, len1);
3937
3945
  } finally {
3938
3946
  wasm.__wbindgen_add_to_stack_pointer(16);
3939
- wasm.__wbindgen_export4(deferred3_0, deferred3_1, 1);
3947
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
3940
3948
  }
3941
3949
  }
3942
3950
  /**
@@ -5695,6 +5703,10 @@ export class WasmPdfPageRegion {
5695
5703
  }
5696
5704
  /**
5697
5705
  * Extract text using OCR from this region.
5706
+ *
5707
+ * Region-scoped OCR is not wired yet; use the page-level
5708
+ * `WasmPdfDocument.extractTextOcr(pageIndex, engine)` for now
5709
+ * (#524 follow-up).
5698
5710
  * @param {WasmOcrEngine | null} [_engine]
5699
5711
  * @returns {string}
5700
5712
  */
Binary file
@@ -644,7 +644,9 @@ export class WasmHeader {
644
644
  }
645
645
 
646
646
  /**
647
- * OCR configuration for WebAssembly.
647
+ * OCR configuration for WebAssembly. (Currently a marker — the engine
648
+ * uses tuned defaults; knobs are exposed as the WASM OCR surface
649
+ * matures, #524.)
648
650
  */
649
651
  export class WasmOcrConfig {
650
652
  free(): void;
@@ -656,15 +658,27 @@ export class WasmOcrConfig {
656
658
  }
657
659
 
658
660
  /**
659
- * OCR engine for WebAssembly.
661
+ * OCR engine for WebAssembly (#524).
662
+ *
663
+ * OCR runs entirely in-WASM via the pure-Rust `tract` backend — no
664
+ * native ONNX Runtime, no JS bridge. Model **delivery is host-side**:
665
+ * the browser/Deno/edge host fetches the detector + recognizer ONNX
666
+ * files and the char dictionary (see `modelManifest()` for the URLs)
667
+ * — typically `fetch()` + the Cache API / IndexedDB for the
668
+ * tens-of-MB models — then hands the bytes to the constructor. This
669
+ * only works in the `wasm-ocr` build of `pdf-oxide`; the default
670
+ * `pdf-oxide-wasm` has no OCR (the constructor returns an error
671
+ * explaining this).
660
672
  */
661
673
  export class WasmOcrEngine {
662
674
  free(): void;
663
675
  [Symbol.dispose](): void;
664
676
  /**
665
- * Create a new OCR engine.
677
+ * Not available in this build. OCR needs the `wasm-ocr` build of
678
+ * `pdf-oxide` (the pure-Rust tract backend); the default
679
+ * `pdf-oxide-wasm` ships without it.
666
680
  */
667
- constructor(_det_model_path: string, _rec_model_path: string, _dict_path: string, _config?: WasmOcrConfig | null);
681
+ constructor(_det_model: Uint8Array, _rec_model: Uint8Array, _dict: string, _config?: WasmOcrConfig | null);
668
682
  }
669
683
 
670
684
  /**
@@ -1017,12 +1031,10 @@ export class WasmPdfDocument {
1017
1031
  */
1018
1032
  extractTextLines(page_index: number, region?: Float32Array | null): any;
1019
1033
  /**
1020
- * Extract text using OCR (optical character recognition).
1021
- *
1022
- * NOTE: OCR is not yet supported in the WebAssembly build due to missing
1023
- * ONNX Runtime support for the web backend in the current implementation.
1034
+ * Extract text using OCR. Not available in this build — OCR needs
1035
+ * the `wasm-ocr` build of `pdf-oxide`.
1024
1036
  */
1025
- extractTextOcr(_page_index: number, _engine?: WasmOcrEngine | null): string;
1037
+ extractTextOcr(_page_index: number, _engine: WasmOcrEngine): string;
1026
1038
  /**
1027
1039
  * Extract word-level data from a page.
1028
1040
  *
@@ -1445,6 +1457,10 @@ export class WasmPdfPageRegion {
1445
1457
  extractTextLines(): any;
1446
1458
  /**
1447
1459
  * Extract text using OCR from this region.
1460
+ *
1461
+ * Region-scoped OCR is not wired yet; use the page-level
1462
+ * `WasmPdfDocument.extractTextOcr(pageIndex, engine)` for now
1463
+ * (#524 follow-up).
1448
1464
  */
1449
1465
  extractTextOcr(_engine?: WasmOcrEngine | null): string;
1450
1466
  /**
@@ -2559,7 +2559,9 @@ if (Symbol.dispose) WasmHeader.prototype[Symbol.dispose] = WasmHeader.prototype.
2559
2559
  exports.WasmHeader = WasmHeader;
2560
2560
 
2561
2561
  /**
2562
- * OCR configuration for WebAssembly.
2562
+ * OCR configuration for WebAssembly. (Currently a marker — the engine
2563
+ * uses tuned defaults; knobs are exposed as the WASM OCR surface
2564
+ * matures, #524.)
2563
2565
  */
2564
2566
  class WasmOcrConfig {
2565
2567
  __destroy_into_raw() {
@@ -2586,7 +2588,17 @@ if (Symbol.dispose) WasmOcrConfig.prototype[Symbol.dispose] = WasmOcrConfig.prot
2586
2588
  exports.WasmOcrConfig = WasmOcrConfig;
2587
2589
 
2588
2590
  /**
2589
- * OCR engine for WebAssembly.
2591
+ * OCR engine for WebAssembly (#524).
2592
+ *
2593
+ * OCR runs entirely in-WASM via the pure-Rust `tract` backend — no
2594
+ * native ONNX Runtime, no JS bridge. Model **delivery is host-side**:
2595
+ * the browser/Deno/edge host fetches the detector + recognizer ONNX
2596
+ * files and the char dictionary (see `modelManifest()` for the URLs)
2597
+ * — typically `fetch()` + the Cache API / IndexedDB for the
2598
+ * tens-of-MB models — then hands the bytes to the constructor. This
2599
+ * only works in the `wasm-ocr` build of `pdf-oxide`; the default
2600
+ * `pdf-oxide-wasm` has no OCR (the constructor returns an error
2601
+ * explaining this).
2590
2602
  */
2591
2603
  class WasmOcrEngine {
2592
2604
  __destroy_into_raw() {
@@ -2600,20 +2612,22 @@ class WasmOcrEngine {
2600
2612
  wasm.__wbg_wasmocrengine_free(ptr, 0);
2601
2613
  }
2602
2614
  /**
2603
- * Create a new OCR engine.
2604
- * @param {string} _det_model_path
2605
- * @param {string} _rec_model_path
2606
- * @param {string} _dict_path
2615
+ * Not available in this build. OCR needs the `wasm-ocr` build of
2616
+ * `pdf-oxide` (the pure-Rust tract backend); the default
2617
+ * `pdf-oxide-wasm` ships without it.
2618
+ * @param {Uint8Array} _det_model
2619
+ * @param {Uint8Array} _rec_model
2620
+ * @param {string} _dict
2607
2621
  * @param {WasmOcrConfig | null} [_config]
2608
2622
  */
2609
- constructor(_det_model_path, _rec_model_path, _dict_path, _config) {
2623
+ constructor(_det_model, _rec_model, _dict, _config) {
2610
2624
  try {
2611
2625
  const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
2612
- const ptr0 = passStringToWasm0(_det_model_path, wasm.__wbindgen_export, wasm.__wbindgen_export2);
2626
+ const ptr0 = passArray8ToWasm0(_det_model, wasm.__wbindgen_export);
2613
2627
  const len0 = WASM_VECTOR_LEN;
2614
- const ptr1 = passStringToWasm0(_rec_model_path, wasm.__wbindgen_export, wasm.__wbindgen_export2);
2628
+ const ptr1 = passArray8ToWasm0(_rec_model, wasm.__wbindgen_export);
2615
2629
  const len1 = WASM_VECTOR_LEN;
2616
- const ptr2 = passStringToWasm0(_dict_path, wasm.__wbindgen_export, wasm.__wbindgen_export2);
2630
+ const ptr2 = passStringToWasm0(_dict, wasm.__wbindgen_export, wasm.__wbindgen_export2);
2617
2631
  const len2 = WASM_VECTOR_LEN;
2618
2632
  let ptr3 = 0;
2619
2633
  if (!isLikeNone(_config)) {
@@ -3921,41 +3935,35 @@ class WasmPdfDocument {
3921
3935
  }
3922
3936
  }
3923
3937
  /**
3924
- * Extract text using OCR (optical character recognition).
3925
- *
3926
- * NOTE: OCR is not yet supported in the WebAssembly build due to missing
3927
- * ONNX Runtime support for the web backend in the current implementation.
3938
+ * Extract text using OCR. Not available in this build — OCR needs
3939
+ * the `wasm-ocr` build of `pdf-oxide`.
3928
3940
  * @param {number} _page_index
3929
- * @param {WasmOcrEngine | null} [_engine]
3941
+ * @param {WasmOcrEngine} _engine
3930
3942
  * @returns {string}
3931
3943
  */
3932
3944
  extractTextOcr(_page_index, _engine) {
3933
- let deferred3_0;
3934
- let deferred3_1;
3945
+ let deferred2_0;
3946
+ let deferred2_1;
3935
3947
  try {
3936
3948
  const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
3937
- let ptr0 = 0;
3938
- if (!isLikeNone(_engine)) {
3939
- _assertClass(_engine, WasmOcrEngine);
3940
- ptr0 = _engine.__destroy_into_raw();
3941
- }
3942
- wasm.wasmpdfdocument_extractTextOcr(retptr, this.__wbg_ptr, _page_index, ptr0);
3949
+ _assertClass(_engine, WasmOcrEngine);
3950
+ wasm.wasmpdfdocument_extractTextOcr(retptr, this.__wbg_ptr, _page_index, _engine.__wbg_ptr);
3943
3951
  var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
3944
3952
  var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
3945
3953
  var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
3946
3954
  var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
3947
- var ptr2 = r0;
3948
- var len2 = r1;
3955
+ var ptr1 = r0;
3956
+ var len1 = r1;
3949
3957
  if (r3) {
3950
- ptr2 = 0; len2 = 0;
3958
+ ptr1 = 0; len1 = 0;
3951
3959
  throw takeObject(r2);
3952
3960
  }
3953
- deferred3_0 = ptr2;
3954
- deferred3_1 = len2;
3955
- return getStringFromWasm0(ptr2, len2);
3961
+ deferred2_0 = ptr1;
3962
+ deferred2_1 = len1;
3963
+ return getStringFromWasm0(ptr1, len1);
3956
3964
  } finally {
3957
3965
  wasm.__wbindgen_add_to_stack_pointer(16);
3958
- wasm.__wbindgen_export4(deferred3_0, deferred3_1, 1);
3966
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
3959
3967
  }
3960
3968
  }
3961
3969
  /**
@@ -5715,6 +5723,10 @@ class WasmPdfPageRegion {
5715
5723
  }
5716
5724
  /**
5717
5725
  * Extract text using OCR from this region.
5726
+ *
5727
+ * Region-scoped OCR is not wired yet; use the page-level
5728
+ * `WasmPdfDocument.extractTextOcr(pageIndex, engine)` for now
5729
+ * (#524 follow-up).
5718
5730
  * @param {WasmOcrEngine | null} [_engine]
5719
5731
  * @returns {string}
5720
5732
  */
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pdf-oxide-wasm",
3
- "version": "0.3.51",
3
+ "version": "0.3.52",
4
4
  "description": "Fast, zero-dependency PDF toolkit for Node.js, browsers, and edge runtimes — text extraction, markdown/HTML conversion, search, form filling, creation, and editing. Rust core compiled to WebAssembly.",
5
5
  "license": "MIT OR Apache-2.0",
6
6
  "repository": {
@@ -644,7 +644,9 @@ export class WasmHeader {
644
644
  }
645
645
 
646
646
  /**
647
- * OCR configuration for WebAssembly.
647
+ * OCR configuration for WebAssembly. (Currently a marker — the engine
648
+ * uses tuned defaults; knobs are exposed as the WASM OCR surface
649
+ * matures, #524.)
648
650
  */
649
651
  export class WasmOcrConfig {
650
652
  free(): void;
@@ -656,15 +658,27 @@ export class WasmOcrConfig {
656
658
  }
657
659
 
658
660
  /**
659
- * OCR engine for WebAssembly.
661
+ * OCR engine for WebAssembly (#524).
662
+ *
663
+ * OCR runs entirely in-WASM via the pure-Rust `tract` backend — no
664
+ * native ONNX Runtime, no JS bridge. Model **delivery is host-side**:
665
+ * the browser/Deno/edge host fetches the detector + recognizer ONNX
666
+ * files and the char dictionary (see `modelManifest()` for the URLs)
667
+ * — typically `fetch()` + the Cache API / IndexedDB for the
668
+ * tens-of-MB models — then hands the bytes to the constructor. This
669
+ * only works in the `wasm-ocr` build of `pdf-oxide`; the default
670
+ * `pdf-oxide-wasm` has no OCR (the constructor returns an error
671
+ * explaining this).
660
672
  */
661
673
  export class WasmOcrEngine {
662
674
  free(): void;
663
675
  [Symbol.dispose](): void;
664
676
  /**
665
- * Create a new OCR engine.
677
+ * Not available in this build. OCR needs the `wasm-ocr` build of
678
+ * `pdf-oxide` (the pure-Rust tract backend); the default
679
+ * `pdf-oxide-wasm` ships without it.
666
680
  */
667
- constructor(_det_model_path: string, _rec_model_path: string, _dict_path: string, _config?: WasmOcrConfig | null);
681
+ constructor(_det_model: Uint8Array, _rec_model: Uint8Array, _dict: string, _config?: WasmOcrConfig | null);
668
682
  }
669
683
 
670
684
  /**
@@ -1017,12 +1031,10 @@ export class WasmPdfDocument {
1017
1031
  */
1018
1032
  extractTextLines(page_index: number, region?: Float32Array | null): any;
1019
1033
  /**
1020
- * Extract text using OCR (optical character recognition).
1021
- *
1022
- * NOTE: OCR is not yet supported in the WebAssembly build due to missing
1023
- * ONNX Runtime support for the web backend in the current implementation.
1034
+ * Extract text using OCR. Not available in this build — OCR needs
1035
+ * the `wasm-ocr` build of `pdf-oxide`.
1024
1036
  */
1025
- extractTextOcr(_page_index: number, _engine?: WasmOcrEngine | null): string;
1037
+ extractTextOcr(_page_index: number, _engine: WasmOcrEngine): string;
1026
1038
  /**
1027
1039
  * Extract word-level data from a page.
1028
1040
  *
@@ -1445,6 +1457,10 @@ export class WasmPdfPageRegion {
1445
1457
  extractTextLines(): any;
1446
1458
  /**
1447
1459
  * Extract text using OCR from this region.
1460
+ *
1461
+ * Region-scoped OCR is not wired yet; use the page-level
1462
+ * `WasmPdfDocument.extractTextOcr(pageIndex, engine)` for now
1463
+ * (#524 follow-up).
1448
1464
  */
1449
1465
  extractTextOcr(_engine?: WasmOcrEngine | null): string;
1450
1466
  /**
package/web/pdf_oxide.js CHANGED
@@ -2546,7 +2546,9 @@ export class WasmHeader {
2546
2546
  if (Symbol.dispose) WasmHeader.prototype[Symbol.dispose] = WasmHeader.prototype.free;
2547
2547
 
2548
2548
  /**
2549
- * OCR configuration for WebAssembly.
2549
+ * OCR configuration for WebAssembly. (Currently a marker — the engine
2550
+ * uses tuned defaults; knobs are exposed as the WASM OCR surface
2551
+ * matures, #524.)
2550
2552
  */
2551
2553
  export class WasmOcrConfig {
2552
2554
  __destroy_into_raw() {
@@ -2572,7 +2574,17 @@ export class WasmOcrConfig {
2572
2574
  if (Symbol.dispose) WasmOcrConfig.prototype[Symbol.dispose] = WasmOcrConfig.prototype.free;
2573
2575
 
2574
2576
  /**
2575
- * OCR engine for WebAssembly.
2577
+ * OCR engine for WebAssembly (#524).
2578
+ *
2579
+ * OCR runs entirely in-WASM via the pure-Rust `tract` backend — no
2580
+ * native ONNX Runtime, no JS bridge. Model **delivery is host-side**:
2581
+ * the browser/Deno/edge host fetches the detector + recognizer ONNX
2582
+ * files and the char dictionary (see `modelManifest()` for the URLs)
2583
+ * — typically `fetch()` + the Cache API / IndexedDB for the
2584
+ * tens-of-MB models — then hands the bytes to the constructor. This
2585
+ * only works in the `wasm-ocr` build of `pdf-oxide`; the default
2586
+ * `pdf-oxide-wasm` has no OCR (the constructor returns an error
2587
+ * explaining this).
2576
2588
  */
2577
2589
  export class WasmOcrEngine {
2578
2590
  __destroy_into_raw() {
@@ -2586,20 +2598,22 @@ export class WasmOcrEngine {
2586
2598
  wasm.__wbg_wasmocrengine_free(ptr, 0);
2587
2599
  }
2588
2600
  /**
2589
- * Create a new OCR engine.
2590
- * @param {string} _det_model_path
2591
- * @param {string} _rec_model_path
2592
- * @param {string} _dict_path
2601
+ * Not available in this build. OCR needs the `wasm-ocr` build of
2602
+ * `pdf-oxide` (the pure-Rust tract backend); the default
2603
+ * `pdf-oxide-wasm` ships without it.
2604
+ * @param {Uint8Array} _det_model
2605
+ * @param {Uint8Array} _rec_model
2606
+ * @param {string} _dict
2593
2607
  * @param {WasmOcrConfig | null} [_config]
2594
2608
  */
2595
- constructor(_det_model_path, _rec_model_path, _dict_path, _config) {
2609
+ constructor(_det_model, _rec_model, _dict, _config) {
2596
2610
  try {
2597
2611
  const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
2598
- const ptr0 = passStringToWasm0(_det_model_path, wasm.__wbindgen_export, wasm.__wbindgen_export2);
2612
+ const ptr0 = passArray8ToWasm0(_det_model, wasm.__wbindgen_export);
2599
2613
  const len0 = WASM_VECTOR_LEN;
2600
- const ptr1 = passStringToWasm0(_rec_model_path, wasm.__wbindgen_export, wasm.__wbindgen_export2);
2614
+ const ptr1 = passArray8ToWasm0(_rec_model, wasm.__wbindgen_export);
2601
2615
  const len1 = WASM_VECTOR_LEN;
2602
- const ptr2 = passStringToWasm0(_dict_path, wasm.__wbindgen_export, wasm.__wbindgen_export2);
2616
+ const ptr2 = passStringToWasm0(_dict, wasm.__wbindgen_export, wasm.__wbindgen_export2);
2603
2617
  const len2 = WASM_VECTOR_LEN;
2604
2618
  let ptr3 = 0;
2605
2619
  if (!isLikeNone(_config)) {
@@ -3904,41 +3918,35 @@ export class WasmPdfDocument {
3904
3918
  }
3905
3919
  }
3906
3920
  /**
3907
- * Extract text using OCR (optical character recognition).
3908
- *
3909
- * NOTE: OCR is not yet supported in the WebAssembly build due to missing
3910
- * ONNX Runtime support for the web backend in the current implementation.
3921
+ * Extract text using OCR. Not available in this build — OCR needs
3922
+ * the `wasm-ocr` build of `pdf-oxide`.
3911
3923
  * @param {number} _page_index
3912
- * @param {WasmOcrEngine | null} [_engine]
3924
+ * @param {WasmOcrEngine} _engine
3913
3925
  * @returns {string}
3914
3926
  */
3915
3927
  extractTextOcr(_page_index, _engine) {
3916
- let deferred3_0;
3917
- let deferred3_1;
3928
+ let deferred2_0;
3929
+ let deferred2_1;
3918
3930
  try {
3919
3931
  const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
3920
- let ptr0 = 0;
3921
- if (!isLikeNone(_engine)) {
3922
- _assertClass(_engine, WasmOcrEngine);
3923
- ptr0 = _engine.__destroy_into_raw();
3924
- }
3925
- wasm.wasmpdfdocument_extractTextOcr(retptr, this.__wbg_ptr, _page_index, ptr0);
3932
+ _assertClass(_engine, WasmOcrEngine);
3933
+ wasm.wasmpdfdocument_extractTextOcr(retptr, this.__wbg_ptr, _page_index, _engine.__wbg_ptr);
3926
3934
  var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
3927
3935
  var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
3928
3936
  var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
3929
3937
  var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
3930
- var ptr2 = r0;
3931
- var len2 = r1;
3938
+ var ptr1 = r0;
3939
+ var len1 = r1;
3932
3940
  if (r3) {
3933
- ptr2 = 0; len2 = 0;
3941
+ ptr1 = 0; len1 = 0;
3934
3942
  throw takeObject(r2);
3935
3943
  }
3936
- deferred3_0 = ptr2;
3937
- deferred3_1 = len2;
3938
- return getStringFromWasm0(ptr2, len2);
3944
+ deferred2_0 = ptr1;
3945
+ deferred2_1 = len1;
3946
+ return getStringFromWasm0(ptr1, len1);
3939
3947
  } finally {
3940
3948
  wasm.__wbindgen_add_to_stack_pointer(16);
3941
- wasm.__wbindgen_export4(deferred3_0, deferred3_1, 1);
3949
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
3942
3950
  }
3943
3951
  }
3944
3952
  /**
@@ -5697,6 +5705,10 @@ export class WasmPdfPageRegion {
5697
5705
  }
5698
5706
  /**
5699
5707
  * Extract text using OCR from this region.
5708
+ *
5709
+ * Region-scoped OCR is not wired yet; use the page-level
5710
+ * `WasmPdfDocument.extractTextOcr(pageIndex, engine)` for now
5711
+ * (#524 follow-up).
5700
5712
  * @param {WasmOcrEngine | null} [_engine]
5701
5713
  * @returns {string}
5702
5714
  */
Binary file