pdf-oxide-wasm 0.3.50 → 0.3.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -815,6 +815,15 @@ export class WasmPdfDocument {
815
815
  * @returns true if authentication succeeded
816
816
  */
817
817
  authenticate(password: string): boolean;
818
+ /**
819
+ * Cheap per-page text-vs-OCR classification → JSON
820
+ * `DocumentClassification`.
821
+ */
822
+ classifyDocument(): string;
823
+ /**
824
+ * Cheap per-page classification → JSON `PageClassification`.
825
+ */
826
+ classifyPage(page_index: number): string;
818
827
  /**
819
828
  * Clear all pending erase operations for a page.
820
829
  */
@@ -934,6 +943,12 @@ export class WasmPdfDocument {
934
943
  * @returns Array of path objects
935
944
  */
936
945
  extractLines(page_index: number, region?: Float32Array | null): any;
946
+ /**
947
+ * Rich per-page extraction → JSON `PageExtraction` (per-region
948
+ * bbox + typed reason). `optionsJson` is `{}`-tolerant
949
+ * `AutoExtractOptions`; undefined/empty → defaults.
950
+ */
951
+ extractPageAuto(page_index: number, options_json?: string | null): string;
937
952
  /**
938
953
  * Extract complete page text data in a single call.
939
954
  *
@@ -990,6 +1005,11 @@ export class WasmPdfDocument {
990
1005
  * @param region - Optional [x, y, width, height] to filter by
991
1006
  */
992
1007
  extractText(page_index: number, region: any): string;
1008
+ /**
1009
+ * One-shot auto text extraction — graceful native fallback (never
1010
+ * the opaque OCR error #513).
1011
+ */
1012
+ extractTextAuto(page_index: number): string;
993
1013
  /**
994
1014
  * Extract text lines from a page.
995
1015
  *
@@ -1600,6 +1620,20 @@ export function generateQrSvg(data: string, error_correction: number, size: numb
1600
1620
  */
1601
1621
  export function hasDocumentTimestamp(pdf_data: Uint8Array): boolean;
1602
1622
 
1623
+ /**
1624
+ * #519: Air-gapped OCR model manifest — JSON (detector + every
1625
+ * supported language's cache filenames and source URLs).
1626
+ *
1627
+ * WASM provisioning is **host-side**: browser/WASM has no filesystem
1628
+ * or network-to-disk, so a download-to-cache prefetch cannot run
1629
+ * here. This manifest is informational — it lets the JS host learn
1630
+ * which model files/URLs to fetch and bundle (or ship out of band)
1631
+ * before driving OCR. There is intentionally no `prefetchModels` in
1632
+ * the WASM surface (see `prefetchAvailable`, which always returns
1633
+ * `false`).
1634
+ */
1635
+ export function modelManifest(): string;
1636
+
1603
1637
  /**
1604
1638
  * Plan a bookmark split without producing PDFs. Returns a JSON array
1605
1639
  * of segment objects (`index, startPage…` shape from
@@ -1607,6 +1641,13 @@ export function hasDocumentTimestamp(pdf_data: Uint8Array): boolean;
1607
1641
  */
1608
1642
  export function planSplitByBookmarks(src_bytes: Uint8Array, title_prefix: string | null | undefined, ignore_case: boolean, level: number, include_front_matter: boolean): any;
1609
1643
 
1644
+ /**
1645
+ * #519: Whether this build can download OCR models to a local cache.
1646
+ * Always `false` in WASM — provisioning is host-side (see
1647
+ * `modelManifest`).
1648
+ */
1649
+ export function prefetchAvailable(): boolean;
1650
+
1610
1651
  /**
1611
1652
  * Install the process-wide runtime crypto policy from its grammar
1612
1653
  * string (`"compat"|"strict"|"fips-strict"[;…]`). Fail-closed:
@@ -5,5 +5,5 @@ import { __wbg_set_wasm } from "./pdf_oxide_bg.js";
5
5
  __wbg_set_wasm(wasm);
6
6
 
7
7
  export {
8
- Align, ArtifactStyle, Dss, PadesLevel, RevocationMaterial, StreamingTable, WasmArtifact, WasmCertificate, WasmDocumentBuilder, WasmEmbeddedFont, WasmFluentPageBuilder, WasmFooter, WasmHeader, WasmOcrConfig, WasmOcrEngine, WasmPageTemplate, WasmPdf, WasmPdfDocument, WasmPdfPageRegion, WasmSignature, WasmTimestamp, cryptoCbom, cryptoInventory, cryptoPolicy, disableLogging, generateBarcodeSvg, generateQrSvg, hasDocumentTimestamp, planSplitByBookmarks, setCryptoPolicy, setLogLevel, signPdfBytes, signPdfBytesPades, splitByBookmarks
8
+ Align, ArtifactStyle, Dss, PadesLevel, RevocationMaterial, StreamingTable, WasmArtifact, WasmCertificate, WasmDocumentBuilder, WasmEmbeddedFont, WasmFluentPageBuilder, WasmFooter, WasmHeader, WasmOcrConfig, WasmOcrEngine, WasmPageTemplate, WasmPdf, WasmPdfDocument, WasmPdfPageRegion, WasmSignature, WasmTimestamp, cryptoCbom, cryptoInventory, cryptoPolicy, disableLogging, generateBarcodeSvg, generateQrSvg, hasDocumentTimestamp, modelManifest, planSplitByBookmarks, prefetchAvailable, setCryptoPolicy, setLogLevel, signPdfBytes, signPdfBytesPades, splitByBookmarks
9
9
  } from "./pdf_oxide_bg.js";
@@ -3125,6 +3125,64 @@ export class WasmPdfDocument {
3125
3125
  wasm.__wbindgen_add_to_stack_pointer(16);
3126
3126
  }
3127
3127
  }
3128
+ /**
3129
+ * Cheap per-page text-vs-OCR classification → JSON
3130
+ * `DocumentClassification`.
3131
+ * @returns {string}
3132
+ */
3133
+ classifyDocument() {
3134
+ let deferred2_0;
3135
+ let deferred2_1;
3136
+ try {
3137
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
3138
+ wasm.wasmpdfdocument_classifyDocument(retptr, this.__wbg_ptr);
3139
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
3140
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
3141
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
3142
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
3143
+ var ptr1 = r0;
3144
+ var len1 = r1;
3145
+ if (r3) {
3146
+ ptr1 = 0; len1 = 0;
3147
+ throw takeObject(r2);
3148
+ }
3149
+ deferred2_0 = ptr1;
3150
+ deferred2_1 = len1;
3151
+ return getStringFromWasm0(ptr1, len1);
3152
+ } finally {
3153
+ wasm.__wbindgen_add_to_stack_pointer(16);
3154
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
3155
+ }
3156
+ }
3157
+ /**
3158
+ * Cheap per-page classification → JSON `PageClassification`.
3159
+ * @param {number} page_index
3160
+ * @returns {string}
3161
+ */
3162
+ classifyPage(page_index) {
3163
+ let deferred2_0;
3164
+ let deferred2_1;
3165
+ try {
3166
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
3167
+ wasm.wasmpdfdocument_classifyPage(retptr, this.__wbg_ptr, page_index);
3168
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
3169
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
3170
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
3171
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
3172
+ var ptr1 = r0;
3173
+ var len1 = r1;
3174
+ if (r3) {
3175
+ ptr1 = 0; len1 = 0;
3176
+ throw takeObject(r2);
3177
+ }
3178
+ deferred2_0 = ptr1;
3179
+ deferred2_1 = len1;
3180
+ return getStringFromWasm0(ptr1, len1);
3181
+ } finally {
3182
+ wasm.__wbindgen_add_to_stack_pointer(16);
3183
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
3184
+ }
3185
+ }
3128
3186
  /**
3129
3187
  * Clear all pending erase operations for a page.
3130
3188
  * @param {number} page_index
@@ -3553,6 +3611,40 @@ export class WasmPdfDocument {
3553
3611
  wasm.__wbindgen_add_to_stack_pointer(16);
3554
3612
  }
3555
3613
  }
3614
+ /**
3615
+ * Rich per-page extraction → JSON `PageExtraction` (per-region
3616
+ * bbox + typed reason). `optionsJson` is `{}`-tolerant
3617
+ * `AutoExtractOptions`; undefined/empty → defaults.
3618
+ * @param {number} page_index
3619
+ * @param {string | null} [options_json]
3620
+ * @returns {string}
3621
+ */
3622
+ extractPageAuto(page_index, options_json) {
3623
+ let deferred3_0;
3624
+ let deferred3_1;
3625
+ try {
3626
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
3627
+ var ptr0 = isLikeNone(options_json) ? 0 : passStringToWasm0(options_json, wasm.__wbindgen_export, wasm.__wbindgen_export2);
3628
+ var len0 = WASM_VECTOR_LEN;
3629
+ wasm.wasmpdfdocument_extractPageAuto(retptr, this.__wbg_ptr, page_index, ptr0, len0);
3630
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
3631
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
3632
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
3633
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
3634
+ var ptr2 = r0;
3635
+ var len2 = r1;
3636
+ if (r3) {
3637
+ ptr2 = 0; len2 = 0;
3638
+ throw takeObject(r2);
3639
+ }
3640
+ deferred3_0 = ptr2;
3641
+ deferred3_1 = len2;
3642
+ return getStringFromWasm0(ptr2, len2);
3643
+ } finally {
3644
+ wasm.__wbindgen_add_to_stack_pointer(16);
3645
+ wasm.__wbindgen_export4(deferred3_0, deferred3_1, 1);
3646
+ }
3647
+ }
3556
3648
  /**
3557
3649
  * Extract complete page text data in a single call.
3558
3650
  *
@@ -3754,6 +3846,36 @@ export class WasmPdfDocument {
3754
3846
  wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
3755
3847
  }
3756
3848
  }
3849
+ /**
3850
+ * One-shot auto text extraction — graceful native fallback (never
3851
+ * the opaque OCR error #513).
3852
+ * @param {number} page_index
3853
+ * @returns {string}
3854
+ */
3855
+ extractTextAuto(page_index) {
3856
+ let deferred2_0;
3857
+ let deferred2_1;
3858
+ try {
3859
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
3860
+ wasm.wasmpdfdocument_extractTextAuto(retptr, this.__wbg_ptr, page_index);
3861
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
3862
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
3863
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
3864
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
3865
+ var ptr1 = r0;
3866
+ var len1 = r1;
3867
+ if (r3) {
3868
+ ptr1 = 0; len1 = 0;
3869
+ throw takeObject(r2);
3870
+ }
3871
+ deferred2_0 = ptr1;
3872
+ deferred2_1 = len1;
3873
+ return getStringFromWasm0(ptr1, len1);
3874
+ } finally {
3875
+ wasm.__wbindgen_add_to_stack_pointer(16);
3876
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
3877
+ }
3878
+ }
3757
3879
  /**
3758
3880
  * Extract text lines from a page.
3759
3881
  *
@@ -6153,6 +6275,36 @@ export function hasDocumentTimestamp(pdf_data) {
6153
6275
  return ret !== 0;
6154
6276
  }
6155
6277
 
6278
+ /**
6279
+ * #519: Air-gapped OCR model manifest — JSON (detector + every
6280
+ * supported language's cache filenames and source URLs).
6281
+ *
6282
+ * WASM provisioning is **host-side**: browser/WASM has no filesystem
6283
+ * or network-to-disk, so a download-to-cache prefetch cannot run
6284
+ * here. This manifest is informational — it lets the JS host learn
6285
+ * which model files/URLs to fetch and bundle (or ship out of band)
6286
+ * before driving OCR. There is intentionally no `prefetchModels` in
6287
+ * the WASM surface (see `prefetchAvailable`, which always returns
6288
+ * `false`).
6289
+ * @returns {string}
6290
+ */
6291
+ export function modelManifest() {
6292
+ let deferred1_0;
6293
+ let deferred1_1;
6294
+ try {
6295
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
6296
+ wasm.modelManifest(retptr);
6297
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
6298
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
6299
+ deferred1_0 = r0;
6300
+ deferred1_1 = r1;
6301
+ return getStringFromWasm0(r0, r1);
6302
+ } finally {
6303
+ wasm.__wbindgen_add_to_stack_pointer(16);
6304
+ wasm.__wbindgen_export4(deferred1_0, deferred1_1, 1);
6305
+ }
6306
+ }
6307
+
6156
6308
  /**
6157
6309
  * Plan a bookmark split without producing PDFs. Returns a JSON array
6158
6310
  * of segment objects (`index, startPage…` shape from
@@ -6184,6 +6336,17 @@ export function planSplitByBookmarks(src_bytes, title_prefix, ignore_case, level
6184
6336
  }
6185
6337
  }
6186
6338
 
6339
+ /**
6340
+ * #519: Whether this build can download OCR models to a local cache.
6341
+ * Always `false` in WASM — provisioning is host-side (see
6342
+ * `modelManifest`).
6343
+ * @returns {boolean}
6344
+ */
6345
+ export function prefetchAvailable() {
6346
+ const ret = wasm.prefetchAvailable();
6347
+ return ret !== 0;
6348
+ }
6349
+
6187
6350
  /**
6188
6351
  * Install the process-wide runtime crypto policy from its grammar
6189
6352
  * string (`"compat"|"strict"|"fips-strict"[;…]`). Fail-closed:
Binary file
@@ -23,7 +23,9 @@ export const cryptoPolicy: (a: number) => void;
23
23
  export const generateBarcodeSvg: (a: number, b: number, c: number, d: number) => void;
24
24
  export const generateQrSvg: (a: number, b: number, c: number, d: number, e: number) => void;
25
25
  export const hasDocumentTimestamp: (a: number, b: number) => number;
26
+ export const modelManifest: (a: number) => void;
26
27
  export const planSplitByBookmarks: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number) => void;
28
+ export const prefetchAvailable: () => number;
27
29
  export const setCryptoPolicy: (a: number, b: number, c: number) => void;
28
30
  export const setLogLevel: (a: number, b: number, c: number) => void;
29
31
  export const signPdfBytes: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number) => void;
@@ -158,6 +160,8 @@ export const wasmpdfdocument_applyAllRedactions: (a: number, b: number) => void;
158
160
  export const wasmpdfdocument_applyPageRedactions: (a: number, b: number, c: number) => void;
159
161
  export const wasmpdfdocument_applyRedactionsDestructive: (a: number, b: number, c: number) => void;
160
162
  export const wasmpdfdocument_authenticate: (a: number, b: number, c: number, d: number) => void;
163
+ export const wasmpdfdocument_classifyDocument: (a: number, b: number) => void;
164
+ export const wasmpdfdocument_classifyPage: (a: number, b: number, c: number) => void;
161
165
  export const wasmpdfdocument_clearEraseRegions: (a: number, b: number, c: number) => void;
162
166
  export const wasmpdfdocument_convertToPdfA: (a: number, b: number, c: number, d: number) => void;
163
167
  export const wasmpdfdocument_cropMargins: (a: number, b: number, c: number, d: number, e: number, f: number) => void;
@@ -177,6 +181,7 @@ export const wasmpdfdocument_extractChars: (a: number, b: number, c: number, d:
177
181
  export const wasmpdfdocument_extractImageBytes: (a: number, b: number, c: number) => void;
178
182
  export const wasmpdfdocument_extractImages: (a: number, b: number, c: number, d: number, e: number) => void;
179
183
  export const wasmpdfdocument_extractLines: (a: number, b: number, c: number, d: number, e: number) => void;
184
+ export const wasmpdfdocument_extractPageAuto: (a: number, b: number, c: number, d: number, e: number) => void;
180
185
  export const wasmpdfdocument_extractPageText: (a: number, b: number, c: number, d: number, e: number) => void;
181
186
  export const wasmpdfdocument_extractPages: (a: number, b: number, c: number, d: number) => void;
182
187
  export const wasmpdfdocument_extractPaths: (a: number, b: number, c: number, d: number, e: number) => void;
@@ -184,6 +189,7 @@ export const wasmpdfdocument_extractRects: (a: number, b: number, c: number, d:
184
189
  export const wasmpdfdocument_extractSpans: (a: number, b: number, c: number, d: number, e: number, f: number, g: number) => void;
185
190
  export const wasmpdfdocument_extractTables: (a: number, b: number, c: number, d: number, e: number) => void;
186
191
  export const wasmpdfdocument_extractText: (a: number, b: number, c: number, d: number) => void;
192
+ export const wasmpdfdocument_extractTextAuto: (a: number, b: number, c: number) => void;
187
193
  export const wasmpdfdocument_extractTextLines: (a: number, b: number, c: number, d: number, e: number) => void;
188
194
  export const wasmpdfdocument_extractTextOcr: (a: number, b: number, c: number, d: number) => void;
189
195
  export const wasmpdfdocument_extractWords: (a: number, b: number, c: number, d: number, e: number) => void;
@@ -815,6 +815,15 @@ export class WasmPdfDocument {
815
815
  * @returns true if authentication succeeded
816
816
  */
817
817
  authenticate(password: string): boolean;
818
+ /**
819
+ * Cheap per-page text-vs-OCR classification → JSON
820
+ * `DocumentClassification`.
821
+ */
822
+ classifyDocument(): string;
823
+ /**
824
+ * Cheap per-page classification → JSON `PageClassification`.
825
+ */
826
+ classifyPage(page_index: number): string;
818
827
  /**
819
828
  * Clear all pending erase operations for a page.
820
829
  */
@@ -934,6 +943,12 @@ export class WasmPdfDocument {
934
943
  * @returns Array of path objects
935
944
  */
936
945
  extractLines(page_index: number, region?: Float32Array | null): any;
946
+ /**
947
+ * Rich per-page extraction → JSON `PageExtraction` (per-region
948
+ * bbox + typed reason). `optionsJson` is `{}`-tolerant
949
+ * `AutoExtractOptions`; undefined/empty → defaults.
950
+ */
951
+ extractPageAuto(page_index: number, options_json?: string | null): string;
937
952
  /**
938
953
  * Extract complete page text data in a single call.
939
954
  *
@@ -990,6 +1005,11 @@ export class WasmPdfDocument {
990
1005
  * @param region - Optional [x, y, width, height] to filter by
991
1006
  */
992
1007
  extractText(page_index: number, region: any): string;
1008
+ /**
1009
+ * One-shot auto text extraction — graceful native fallback (never
1010
+ * the opaque OCR error #513).
1011
+ */
1012
+ extractTextAuto(page_index: number): string;
993
1013
  /**
994
1014
  * Extract text lines from a page.
995
1015
  *
@@ -1600,6 +1620,20 @@ export function generateQrSvg(data: string, error_correction: number, size: numb
1600
1620
  */
1601
1621
  export function hasDocumentTimestamp(pdf_data: Uint8Array): boolean;
1602
1622
 
1623
+ /**
1624
+ * #519: Air-gapped OCR model manifest — JSON (detector + every
1625
+ * supported language's cache filenames and source URLs).
1626
+ *
1627
+ * WASM provisioning is **host-side**: browser/WASM has no filesystem
1628
+ * or network-to-disk, so a download-to-cache prefetch cannot run
1629
+ * here. This manifest is informational — it lets the JS host learn
1630
+ * which model files/URLs to fetch and bundle (or ship out of band)
1631
+ * before driving OCR. There is intentionally no `prefetchModels` in
1632
+ * the WASM surface (see `prefetchAvailable`, which always returns
1633
+ * `false`).
1634
+ */
1635
+ export function modelManifest(): string;
1636
+
1603
1637
  /**
1604
1638
  * Plan a bookmark split without producing PDFs. Returns a JSON array
1605
1639
  * of segment objects (`index, startPage…` shape from
@@ -1607,6 +1641,13 @@ export function hasDocumentTimestamp(pdf_data: Uint8Array): boolean;
1607
1641
  */
1608
1642
  export function planSplitByBookmarks(src_bytes: Uint8Array, title_prefix: string | null | undefined, ignore_case: boolean, level: number, include_front_matter: boolean): any;
1609
1643
 
1644
+ /**
1645
+ * #519: Whether this build can download OCR models to a local cache.
1646
+ * Always `false` in WASM — provisioning is host-side (see
1647
+ * `modelManifest`).
1648
+ */
1649
+ export function prefetchAvailable(): boolean;
1650
+
1610
1651
  /**
1611
1652
  * Install the process-wide runtime crypto policy from its grammar
1612
1653
  * string (`"compat"|"strict"|"fips-strict"[;…]`). Fail-closed:
@@ -3144,6 +3144,64 @@ class WasmPdfDocument {
3144
3144
  wasm.__wbindgen_add_to_stack_pointer(16);
3145
3145
  }
3146
3146
  }
3147
+ /**
3148
+ * Cheap per-page text-vs-OCR classification → JSON
3149
+ * `DocumentClassification`.
3150
+ * @returns {string}
3151
+ */
3152
+ classifyDocument() {
3153
+ let deferred2_0;
3154
+ let deferred2_1;
3155
+ try {
3156
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
3157
+ wasm.wasmpdfdocument_classifyDocument(retptr, this.__wbg_ptr);
3158
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
3159
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
3160
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
3161
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
3162
+ var ptr1 = r0;
3163
+ var len1 = r1;
3164
+ if (r3) {
3165
+ ptr1 = 0; len1 = 0;
3166
+ throw takeObject(r2);
3167
+ }
3168
+ deferred2_0 = ptr1;
3169
+ deferred2_1 = len1;
3170
+ return getStringFromWasm0(ptr1, len1);
3171
+ } finally {
3172
+ wasm.__wbindgen_add_to_stack_pointer(16);
3173
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
3174
+ }
3175
+ }
3176
+ /**
3177
+ * Cheap per-page classification → JSON `PageClassification`.
3178
+ * @param {number} page_index
3179
+ * @returns {string}
3180
+ */
3181
+ classifyPage(page_index) {
3182
+ let deferred2_0;
3183
+ let deferred2_1;
3184
+ try {
3185
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
3186
+ wasm.wasmpdfdocument_classifyPage(retptr, this.__wbg_ptr, page_index);
3187
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
3188
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
3189
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
3190
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
3191
+ var ptr1 = r0;
3192
+ var len1 = r1;
3193
+ if (r3) {
3194
+ ptr1 = 0; len1 = 0;
3195
+ throw takeObject(r2);
3196
+ }
3197
+ deferred2_0 = ptr1;
3198
+ deferred2_1 = len1;
3199
+ return getStringFromWasm0(ptr1, len1);
3200
+ } finally {
3201
+ wasm.__wbindgen_add_to_stack_pointer(16);
3202
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
3203
+ }
3204
+ }
3147
3205
  /**
3148
3206
  * Clear all pending erase operations for a page.
3149
3207
  * @param {number} page_index
@@ -3572,6 +3630,40 @@ class WasmPdfDocument {
3572
3630
  wasm.__wbindgen_add_to_stack_pointer(16);
3573
3631
  }
3574
3632
  }
3633
+ /**
3634
+ * Rich per-page extraction → JSON `PageExtraction` (per-region
3635
+ * bbox + typed reason). `optionsJson` is `{}`-tolerant
3636
+ * `AutoExtractOptions`; undefined/empty → defaults.
3637
+ * @param {number} page_index
3638
+ * @param {string | null} [options_json]
3639
+ * @returns {string}
3640
+ */
3641
+ extractPageAuto(page_index, options_json) {
3642
+ let deferred3_0;
3643
+ let deferred3_1;
3644
+ try {
3645
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
3646
+ var ptr0 = isLikeNone(options_json) ? 0 : passStringToWasm0(options_json, wasm.__wbindgen_export, wasm.__wbindgen_export2);
3647
+ var len0 = WASM_VECTOR_LEN;
3648
+ wasm.wasmpdfdocument_extractPageAuto(retptr, this.__wbg_ptr, page_index, ptr0, len0);
3649
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
3650
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
3651
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
3652
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
3653
+ var ptr2 = r0;
3654
+ var len2 = r1;
3655
+ if (r3) {
3656
+ ptr2 = 0; len2 = 0;
3657
+ throw takeObject(r2);
3658
+ }
3659
+ deferred3_0 = ptr2;
3660
+ deferred3_1 = len2;
3661
+ return getStringFromWasm0(ptr2, len2);
3662
+ } finally {
3663
+ wasm.__wbindgen_add_to_stack_pointer(16);
3664
+ wasm.__wbindgen_export4(deferred3_0, deferred3_1, 1);
3665
+ }
3666
+ }
3575
3667
  /**
3576
3668
  * Extract complete page text data in a single call.
3577
3669
  *
@@ -3773,6 +3865,36 @@ class WasmPdfDocument {
3773
3865
  wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
3774
3866
  }
3775
3867
  }
3868
+ /**
3869
+ * One-shot auto text extraction — graceful native fallback (never
3870
+ * the opaque OCR error #513).
3871
+ * @param {number} page_index
3872
+ * @returns {string}
3873
+ */
3874
+ extractTextAuto(page_index) {
3875
+ let deferred2_0;
3876
+ let deferred2_1;
3877
+ try {
3878
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
3879
+ wasm.wasmpdfdocument_extractTextAuto(retptr, this.__wbg_ptr, page_index);
3880
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
3881
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
3882
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
3883
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
3884
+ var ptr1 = r0;
3885
+ var len1 = r1;
3886
+ if (r3) {
3887
+ ptr1 = 0; len1 = 0;
3888
+ throw takeObject(r2);
3889
+ }
3890
+ deferred2_0 = ptr1;
3891
+ deferred2_1 = len1;
3892
+ return getStringFromWasm0(ptr1, len1);
3893
+ } finally {
3894
+ wasm.__wbindgen_add_to_stack_pointer(16);
3895
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
3896
+ }
3897
+ }
3776
3898
  /**
3777
3899
  * Extract text lines from a page.
3778
3900
  *
@@ -6183,6 +6305,37 @@ function hasDocumentTimestamp(pdf_data) {
6183
6305
  }
6184
6306
  exports.hasDocumentTimestamp = hasDocumentTimestamp;
6185
6307
 
6308
+ /**
6309
+ * #519: Air-gapped OCR model manifest — JSON (detector + every
6310
+ * supported language's cache filenames and source URLs).
6311
+ *
6312
+ * WASM provisioning is **host-side**: browser/WASM has no filesystem
6313
+ * or network-to-disk, so a download-to-cache prefetch cannot run
6314
+ * here. This manifest is informational — it lets the JS host learn
6315
+ * which model files/URLs to fetch and bundle (or ship out of band)
6316
+ * before driving OCR. There is intentionally no `prefetchModels` in
6317
+ * the WASM surface (see `prefetchAvailable`, which always returns
6318
+ * `false`).
6319
+ * @returns {string}
6320
+ */
6321
+ function modelManifest() {
6322
+ let deferred1_0;
6323
+ let deferred1_1;
6324
+ try {
6325
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
6326
+ wasm.modelManifest(retptr);
6327
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
6328
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
6329
+ deferred1_0 = r0;
6330
+ deferred1_1 = r1;
6331
+ return getStringFromWasm0(r0, r1);
6332
+ } finally {
6333
+ wasm.__wbindgen_add_to_stack_pointer(16);
6334
+ wasm.__wbindgen_export4(deferred1_0, deferred1_1, 1);
6335
+ }
6336
+ }
6337
+ exports.modelManifest = modelManifest;
6338
+
6186
6339
  /**
6187
6340
  * Plan a bookmark split without producing PDFs. Returns a JSON array
6188
6341
  * of segment objects (`index, startPage…` shape from
@@ -6215,6 +6368,18 @@ function planSplitByBookmarks(src_bytes, title_prefix, ignore_case, level, inclu
6215
6368
  }
6216
6369
  exports.planSplitByBookmarks = planSplitByBookmarks;
6217
6370
 
6371
+ /**
6372
+ * #519: Whether this build can download OCR models to a local cache.
6373
+ * Always `false` in WASM — provisioning is host-side (see
6374
+ * `modelManifest`).
6375
+ * @returns {boolean}
6376
+ */
6377
+ function prefetchAvailable() {
6378
+ const ret = wasm.prefetchAvailable();
6379
+ return ret !== 0;
6380
+ }
6381
+ exports.prefetchAvailable = prefetchAvailable;
6382
+
6218
6383
  /**
6219
6384
  * Install the process-wide runtime crypto policy from its grammar
6220
6385
  * string (`"compat"|"strict"|"fips-strict"[;…]`). Fail-closed:
Binary file
@@ -23,7 +23,9 @@ export const cryptoPolicy: (a: number) => void;
23
23
  export const generateBarcodeSvg: (a: number, b: number, c: number, d: number) => void;
24
24
  export const generateQrSvg: (a: number, b: number, c: number, d: number, e: number) => void;
25
25
  export const hasDocumentTimestamp: (a: number, b: number) => number;
26
+ export const modelManifest: (a: number) => void;
26
27
  export const planSplitByBookmarks: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number) => void;
28
+ export const prefetchAvailable: () => number;
27
29
  export const setCryptoPolicy: (a: number, b: number, c: number) => void;
28
30
  export const setLogLevel: (a: number, b: number, c: number) => void;
29
31
  export const signPdfBytes: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number) => void;
@@ -158,6 +160,8 @@ export const wasmpdfdocument_applyAllRedactions: (a: number, b: number) => void;
158
160
  export const wasmpdfdocument_applyPageRedactions: (a: number, b: number, c: number) => void;
159
161
  export const wasmpdfdocument_applyRedactionsDestructive: (a: number, b: number, c: number) => void;
160
162
  export const wasmpdfdocument_authenticate: (a: number, b: number, c: number, d: number) => void;
163
+ export const wasmpdfdocument_classifyDocument: (a: number, b: number) => void;
164
+ export const wasmpdfdocument_classifyPage: (a: number, b: number, c: number) => void;
161
165
  export const wasmpdfdocument_clearEraseRegions: (a: number, b: number, c: number) => void;
162
166
  export const wasmpdfdocument_convertToPdfA: (a: number, b: number, c: number, d: number) => void;
163
167
  export const wasmpdfdocument_cropMargins: (a: number, b: number, c: number, d: number, e: number, f: number) => void;
@@ -177,6 +181,7 @@ export const wasmpdfdocument_extractChars: (a: number, b: number, c: number, d:
177
181
  export const wasmpdfdocument_extractImageBytes: (a: number, b: number, c: number) => void;
178
182
  export const wasmpdfdocument_extractImages: (a: number, b: number, c: number, d: number, e: number) => void;
179
183
  export const wasmpdfdocument_extractLines: (a: number, b: number, c: number, d: number, e: number) => void;
184
+ export const wasmpdfdocument_extractPageAuto: (a: number, b: number, c: number, d: number, e: number) => void;
180
185
  export const wasmpdfdocument_extractPageText: (a: number, b: number, c: number, d: number, e: number) => void;
181
186
  export const wasmpdfdocument_extractPages: (a: number, b: number, c: number, d: number) => void;
182
187
  export const wasmpdfdocument_extractPaths: (a: number, b: number, c: number, d: number, e: number) => void;
@@ -184,6 +189,7 @@ export const wasmpdfdocument_extractRects: (a: number, b: number, c: number, d:
184
189
  export const wasmpdfdocument_extractSpans: (a: number, b: number, c: number, d: number, e: number, f: number, g: number) => void;
185
190
  export const wasmpdfdocument_extractTables: (a: number, b: number, c: number, d: number, e: number) => void;
186
191
  export const wasmpdfdocument_extractText: (a: number, b: number, c: number, d: number) => void;
192
+ export const wasmpdfdocument_extractTextAuto: (a: number, b: number, c: number) => void;
187
193
  export const wasmpdfdocument_extractTextLines: (a: number, b: number, c: number, d: number, e: number) => void;
188
194
  export const wasmpdfdocument_extractTextOcr: (a: number, b: number, c: number, d: number) => void;
189
195
  export const wasmpdfdocument_extractWords: (a: number, b: number, c: number, d: number, e: number) => void;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pdf-oxide-wasm",
3
- "version": "0.3.50",
3
+ "version": "0.3.51",
4
4
  "description": "Fast, zero-dependency PDF toolkit for Node.js, browsers, and edge runtimes — text extraction, markdown/HTML conversion, search, form filling, creation, and editing. Rust core compiled to WebAssembly.",
5
5
  "license": "MIT OR Apache-2.0",
6
6
  "repository": {
@@ -815,6 +815,15 @@ export class WasmPdfDocument {
815
815
  * @returns true if authentication succeeded
816
816
  */
817
817
  authenticate(password: string): boolean;
818
+ /**
819
+ * Cheap per-page text-vs-OCR classification → JSON
820
+ * `DocumentClassification`.
821
+ */
822
+ classifyDocument(): string;
823
+ /**
824
+ * Cheap per-page classification → JSON `PageClassification`.
825
+ */
826
+ classifyPage(page_index: number): string;
818
827
  /**
819
828
  * Clear all pending erase operations for a page.
820
829
  */
@@ -934,6 +943,12 @@ export class WasmPdfDocument {
934
943
  * @returns Array of path objects
935
944
  */
936
945
  extractLines(page_index: number, region?: Float32Array | null): any;
946
+ /**
947
+ * Rich per-page extraction → JSON `PageExtraction` (per-region
948
+ * bbox + typed reason). `optionsJson` is `{}`-tolerant
949
+ * `AutoExtractOptions`; undefined/empty → defaults.
950
+ */
951
+ extractPageAuto(page_index: number, options_json?: string | null): string;
937
952
  /**
938
953
  * Extract complete page text data in a single call.
939
954
  *
@@ -990,6 +1005,11 @@ export class WasmPdfDocument {
990
1005
  * @param region - Optional [x, y, width, height] to filter by
991
1006
  */
992
1007
  extractText(page_index: number, region: any): string;
1008
+ /**
1009
+ * One-shot auto text extraction — graceful native fallback (never
1010
+ * the opaque OCR error #513).
1011
+ */
1012
+ extractTextAuto(page_index: number): string;
993
1013
  /**
994
1014
  * Extract text lines from a page.
995
1015
  *
@@ -1600,6 +1620,20 @@ export function generateQrSvg(data: string, error_correction: number, size: numb
1600
1620
  */
1601
1621
  export function hasDocumentTimestamp(pdf_data: Uint8Array): boolean;
1602
1622
 
1623
+ /**
1624
+ * #519: Air-gapped OCR model manifest — JSON (detector + every
1625
+ * supported language's cache filenames and source URLs).
1626
+ *
1627
+ * WASM provisioning is **host-side**: browser/WASM has no filesystem
1628
+ * or network-to-disk, so a download-to-cache prefetch cannot run
1629
+ * here. This manifest is informational — it lets the JS host learn
1630
+ * which model files/URLs to fetch and bundle (or ship out of band)
1631
+ * before driving OCR. There is intentionally no `prefetchModels` in
1632
+ * the WASM surface (see `prefetchAvailable`, which always returns
1633
+ * `false`).
1634
+ */
1635
+ export function modelManifest(): string;
1636
+
1603
1637
  /**
1604
1638
  * Plan a bookmark split without producing PDFs. Returns a JSON array
1605
1639
  * of segment objects (`index, startPage…` shape from
@@ -1607,6 +1641,13 @@ export function hasDocumentTimestamp(pdf_data: Uint8Array): boolean;
1607
1641
  */
1608
1642
  export function planSplitByBookmarks(src_bytes: Uint8Array, title_prefix: string | null | undefined, ignore_case: boolean, level: number, include_front_matter: boolean): any;
1609
1643
 
1644
+ /**
1645
+ * #519: Whether this build can download OCR models to a local cache.
1646
+ * Always `false` in WASM — provisioning is host-side (see
1647
+ * `modelManifest`).
1648
+ */
1649
+ export function prefetchAvailable(): boolean;
1650
+
1610
1651
  /**
1611
1652
  * Install the process-wide runtime crypto policy from its grammar
1612
1653
  * string (`"compat"|"strict"|"fips-strict"[;…]`). Fail-closed:
@@ -1687,7 +1728,9 @@ export interface InitOutput {
1687
1728
  readonly generateBarcodeSvg: (a: number, b: number, c: number, d: number) => void;
1688
1729
  readonly generateQrSvg: (a: number, b: number, c: number, d: number, e: number) => void;
1689
1730
  readonly hasDocumentTimestamp: (a: number, b: number) => number;
1731
+ readonly modelManifest: (a: number) => void;
1690
1732
  readonly planSplitByBookmarks: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number) => void;
1733
+ readonly prefetchAvailable: () => number;
1691
1734
  readonly setCryptoPolicy: (a: number, b: number, c: number) => void;
1692
1735
  readonly setLogLevel: (a: number, b: number, c: number) => void;
1693
1736
  readonly signPdfBytes: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number) => void;
@@ -1822,6 +1865,8 @@ export interface InitOutput {
1822
1865
  readonly wasmpdfdocument_applyPageRedactions: (a: number, b: number, c: number) => void;
1823
1866
  readonly wasmpdfdocument_applyRedactionsDestructive: (a: number, b: number, c: number) => void;
1824
1867
  readonly wasmpdfdocument_authenticate: (a: number, b: number, c: number, d: number) => void;
1868
+ readonly wasmpdfdocument_classifyDocument: (a: number, b: number) => void;
1869
+ readonly wasmpdfdocument_classifyPage: (a: number, b: number, c: number) => void;
1825
1870
  readonly wasmpdfdocument_clearEraseRegions: (a: number, b: number, c: number) => void;
1826
1871
  readonly wasmpdfdocument_convertToPdfA: (a: number, b: number, c: number, d: number) => void;
1827
1872
  readonly wasmpdfdocument_cropMargins: (a: number, b: number, c: number, d: number, e: number, f: number) => void;
@@ -1841,6 +1886,7 @@ export interface InitOutput {
1841
1886
  readonly wasmpdfdocument_extractImageBytes: (a: number, b: number, c: number) => void;
1842
1887
  readonly wasmpdfdocument_extractImages: (a: number, b: number, c: number, d: number, e: number) => void;
1843
1888
  readonly wasmpdfdocument_extractLines: (a: number, b: number, c: number, d: number, e: number) => void;
1889
+ readonly wasmpdfdocument_extractPageAuto: (a: number, b: number, c: number, d: number, e: number) => void;
1844
1890
  readonly wasmpdfdocument_extractPageText: (a: number, b: number, c: number, d: number, e: number) => void;
1845
1891
  readonly wasmpdfdocument_extractPages: (a: number, b: number, c: number, d: number) => void;
1846
1892
  readonly wasmpdfdocument_extractPaths: (a: number, b: number, c: number, d: number, e: number) => void;
@@ -1848,6 +1894,7 @@ export interface InitOutput {
1848
1894
  readonly wasmpdfdocument_extractSpans: (a: number, b: number, c: number, d: number, e: number, f: number, g: number) => void;
1849
1895
  readonly wasmpdfdocument_extractTables: (a: number, b: number, c: number, d: number, e: number) => void;
1850
1896
  readonly wasmpdfdocument_extractText: (a: number, b: number, c: number, d: number) => void;
1897
+ readonly wasmpdfdocument_extractTextAuto: (a: number, b: number, c: number) => void;
1851
1898
  readonly wasmpdfdocument_extractTextLines: (a: number, b: number, c: number, d: number, e: number) => void;
1852
1899
  readonly wasmpdfdocument_extractTextOcr: (a: number, b: number, c: number, d: number) => void;
1853
1900
  readonly wasmpdfdocument_extractWords: (a: number, b: number, c: number, d: number, e: number) => void;
package/web/pdf_oxide.js CHANGED
@@ -3127,6 +3127,64 @@ export class WasmPdfDocument {
3127
3127
  wasm.__wbindgen_add_to_stack_pointer(16);
3128
3128
  }
3129
3129
  }
3130
+ /**
3131
+ * Cheap per-page text-vs-OCR classification → JSON
3132
+ * `DocumentClassification`.
3133
+ * @returns {string}
3134
+ */
3135
+ classifyDocument() {
3136
+ let deferred2_0;
3137
+ let deferred2_1;
3138
+ try {
3139
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
3140
+ wasm.wasmpdfdocument_classifyDocument(retptr, this.__wbg_ptr);
3141
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
3142
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
3143
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
3144
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
3145
+ var ptr1 = r0;
3146
+ var len1 = r1;
3147
+ if (r3) {
3148
+ ptr1 = 0; len1 = 0;
3149
+ throw takeObject(r2);
3150
+ }
3151
+ deferred2_0 = ptr1;
3152
+ deferred2_1 = len1;
3153
+ return getStringFromWasm0(ptr1, len1);
3154
+ } finally {
3155
+ wasm.__wbindgen_add_to_stack_pointer(16);
3156
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
3157
+ }
3158
+ }
3159
+ /**
3160
+ * Cheap per-page classification → JSON `PageClassification`.
3161
+ * @param {number} page_index
3162
+ * @returns {string}
3163
+ */
3164
+ classifyPage(page_index) {
3165
+ let deferred2_0;
3166
+ let deferred2_1;
3167
+ try {
3168
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
3169
+ wasm.wasmpdfdocument_classifyPage(retptr, this.__wbg_ptr, page_index);
3170
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
3171
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
3172
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
3173
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
3174
+ var ptr1 = r0;
3175
+ var len1 = r1;
3176
+ if (r3) {
3177
+ ptr1 = 0; len1 = 0;
3178
+ throw takeObject(r2);
3179
+ }
3180
+ deferred2_0 = ptr1;
3181
+ deferred2_1 = len1;
3182
+ return getStringFromWasm0(ptr1, len1);
3183
+ } finally {
3184
+ wasm.__wbindgen_add_to_stack_pointer(16);
3185
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
3186
+ }
3187
+ }
3130
3188
  /**
3131
3189
  * Clear all pending erase operations for a page.
3132
3190
  * @param {number} page_index
@@ -3555,6 +3613,40 @@ export class WasmPdfDocument {
3555
3613
  wasm.__wbindgen_add_to_stack_pointer(16);
3556
3614
  }
3557
3615
  }
3616
+ /**
3617
+ * Rich per-page extraction → JSON `PageExtraction` (per-region
3618
+ * bbox + typed reason). `optionsJson` is `{}`-tolerant
3619
+ * `AutoExtractOptions`; undefined/empty → defaults.
3620
+ * @param {number} page_index
3621
+ * @param {string | null} [options_json]
3622
+ * @returns {string}
3623
+ */
3624
+ extractPageAuto(page_index, options_json) {
3625
+ let deferred3_0;
3626
+ let deferred3_1;
3627
+ try {
3628
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
3629
+ var ptr0 = isLikeNone(options_json) ? 0 : passStringToWasm0(options_json, wasm.__wbindgen_export, wasm.__wbindgen_export2);
3630
+ var len0 = WASM_VECTOR_LEN;
3631
+ wasm.wasmpdfdocument_extractPageAuto(retptr, this.__wbg_ptr, page_index, ptr0, len0);
3632
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
3633
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
3634
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
3635
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
3636
+ var ptr2 = r0;
3637
+ var len2 = r1;
3638
+ if (r3) {
3639
+ ptr2 = 0; len2 = 0;
3640
+ throw takeObject(r2);
3641
+ }
3642
+ deferred3_0 = ptr2;
3643
+ deferred3_1 = len2;
3644
+ return getStringFromWasm0(ptr2, len2);
3645
+ } finally {
3646
+ wasm.__wbindgen_add_to_stack_pointer(16);
3647
+ wasm.__wbindgen_export4(deferred3_0, deferred3_1, 1);
3648
+ }
3649
+ }
3558
3650
  /**
3559
3651
  * Extract complete page text data in a single call.
3560
3652
  *
@@ -3756,6 +3848,36 @@ export class WasmPdfDocument {
3756
3848
  wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
3757
3849
  }
3758
3850
  }
3851
+ /**
3852
+ * One-shot auto text extraction — graceful native fallback (never
3853
+ * the opaque OCR error #513).
3854
+ * @param {number} page_index
3855
+ * @returns {string}
3856
+ */
3857
+ extractTextAuto(page_index) {
3858
+ let deferred2_0;
3859
+ let deferred2_1;
3860
+ try {
3861
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
3862
+ wasm.wasmpdfdocument_extractTextAuto(retptr, this.__wbg_ptr, page_index);
3863
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
3864
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
3865
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
3866
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
3867
+ var ptr1 = r0;
3868
+ var len1 = r1;
3869
+ if (r3) {
3870
+ ptr1 = 0; len1 = 0;
3871
+ throw takeObject(r2);
3872
+ }
3873
+ deferred2_0 = ptr1;
3874
+ deferred2_1 = len1;
3875
+ return getStringFromWasm0(ptr1, len1);
3876
+ } finally {
3877
+ wasm.__wbindgen_add_to_stack_pointer(16);
3878
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
3879
+ }
3880
+ }
3759
3881
  /**
3760
3882
  * Extract text lines from a page.
3761
3883
  *
@@ -6155,6 +6277,36 @@ export function hasDocumentTimestamp(pdf_data) {
6155
6277
  return ret !== 0;
6156
6278
  }
6157
6279
 
6280
+ /**
6281
+ * #519: Air-gapped OCR model manifest — JSON (detector + every
6282
+ * supported language's cache filenames and source URLs).
6283
+ *
6284
+ * WASM provisioning is **host-side**: browser/WASM has no filesystem
6285
+ * or network-to-disk, so a download-to-cache prefetch cannot run
6286
+ * here. This manifest is informational — it lets the JS host learn
6287
+ * which model files/URLs to fetch and bundle (or ship out of band)
6288
+ * before driving OCR. There is intentionally no `prefetchModels` in
6289
+ * the WASM surface (see `prefetchAvailable`, which always returns
6290
+ * `false`).
6291
+ * @returns {string}
6292
+ */
6293
+ export function modelManifest() {
6294
+ let deferred1_0;
6295
+ let deferred1_1;
6296
+ try {
6297
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
6298
+ wasm.modelManifest(retptr);
6299
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
6300
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
6301
+ deferred1_0 = r0;
6302
+ deferred1_1 = r1;
6303
+ return getStringFromWasm0(r0, r1);
6304
+ } finally {
6305
+ wasm.__wbindgen_add_to_stack_pointer(16);
6306
+ wasm.__wbindgen_export4(deferred1_0, deferred1_1, 1);
6307
+ }
6308
+ }
6309
+
6158
6310
  /**
6159
6311
  * Plan a bookmark split without producing PDFs. Returns a JSON array
6160
6312
  * of segment objects (`index, startPage…` shape from
@@ -6186,6 +6338,17 @@ export function planSplitByBookmarks(src_bytes, title_prefix, ignore_case, level
6186
6338
  }
6187
6339
  }
6188
6340
 
6341
+ /**
6342
+ * #519: Whether this build can download OCR models to a local cache.
6343
+ * Always `false` in WASM — provisioning is host-side (see
6344
+ * `modelManifest`).
6345
+ * @returns {boolean}
6346
+ */
6347
+ export function prefetchAvailable() {
6348
+ const ret = wasm.prefetchAvailable();
6349
+ return ret !== 0;
6350
+ }
6351
+
6189
6352
  /**
6190
6353
  * Install the process-wide runtime crypto policy from its grammar
6191
6354
  * string (`"compat"|"strict"|"fips-strict"[;…]`). Fail-closed:
Binary file
@@ -23,7 +23,9 @@ export const cryptoPolicy: (a: number) => void;
23
23
  export const generateBarcodeSvg: (a: number, b: number, c: number, d: number) => void;
24
24
  export const generateQrSvg: (a: number, b: number, c: number, d: number, e: number) => void;
25
25
  export const hasDocumentTimestamp: (a: number, b: number) => number;
26
+ export const modelManifest: (a: number) => void;
26
27
  export const planSplitByBookmarks: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number) => void;
28
+ export const prefetchAvailable: () => number;
27
29
  export const setCryptoPolicy: (a: number, b: number, c: number) => void;
28
30
  export const setLogLevel: (a: number, b: number, c: number) => void;
29
31
  export const signPdfBytes: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number) => void;
@@ -158,6 +160,8 @@ export const wasmpdfdocument_applyAllRedactions: (a: number, b: number) => void;
158
160
  export const wasmpdfdocument_applyPageRedactions: (a: number, b: number, c: number) => void;
159
161
  export const wasmpdfdocument_applyRedactionsDestructive: (a: number, b: number, c: number) => void;
160
162
  export const wasmpdfdocument_authenticate: (a: number, b: number, c: number, d: number) => void;
163
+ export const wasmpdfdocument_classifyDocument: (a: number, b: number) => void;
164
+ export const wasmpdfdocument_classifyPage: (a: number, b: number, c: number) => void;
161
165
  export const wasmpdfdocument_clearEraseRegions: (a: number, b: number, c: number) => void;
162
166
  export const wasmpdfdocument_convertToPdfA: (a: number, b: number, c: number, d: number) => void;
163
167
  export const wasmpdfdocument_cropMargins: (a: number, b: number, c: number, d: number, e: number, f: number) => void;
@@ -177,6 +181,7 @@ export const wasmpdfdocument_extractChars: (a: number, b: number, c: number, d:
177
181
  export const wasmpdfdocument_extractImageBytes: (a: number, b: number, c: number) => void;
178
182
  export const wasmpdfdocument_extractImages: (a: number, b: number, c: number, d: number, e: number) => void;
179
183
  export const wasmpdfdocument_extractLines: (a: number, b: number, c: number, d: number, e: number) => void;
184
+ export const wasmpdfdocument_extractPageAuto: (a: number, b: number, c: number, d: number, e: number) => void;
180
185
  export const wasmpdfdocument_extractPageText: (a: number, b: number, c: number, d: number, e: number) => void;
181
186
  export const wasmpdfdocument_extractPages: (a: number, b: number, c: number, d: number) => void;
182
187
  export const wasmpdfdocument_extractPaths: (a: number, b: number, c: number, d: number, e: number) => void;
@@ -184,6 +189,7 @@ export const wasmpdfdocument_extractRects: (a: number, b: number, c: number, d:
184
189
  export const wasmpdfdocument_extractSpans: (a: number, b: number, c: number, d: number, e: number, f: number, g: number) => void;
185
190
  export const wasmpdfdocument_extractTables: (a: number, b: number, c: number, d: number, e: number) => void;
186
191
  export const wasmpdfdocument_extractText: (a: number, b: number, c: number, d: number) => void;
192
+ export const wasmpdfdocument_extractTextAuto: (a: number, b: number, c: number) => void;
187
193
  export const wasmpdfdocument_extractTextLines: (a: number, b: number, c: number, d: number, e: number) => void;
188
194
  export const wasmpdfdocument_extractTextOcr: (a: number, b: number, c: number, d: number) => void;
189
195
  export const wasmpdfdocument_extractWords: (a: number, b: number, c: number, d: number, e: number) => void;