pdf-oxide-wasm 0.3.50 → 0.3.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bundler/pdf_oxide.d.ts +41 -0
- package/bundler/pdf_oxide.js +1 -1
- package/bundler/pdf_oxide_bg.js +163 -0
- package/bundler/pdf_oxide_bg.wasm +0 -0
- package/bundler/pdf_oxide_bg.wasm.d.ts +6 -0
- package/nodejs/pdf_oxide.d.ts +41 -0
- package/nodejs/pdf_oxide.js +165 -0
- package/nodejs/pdf_oxide_bg.wasm +0 -0
- package/nodejs/pdf_oxide_bg.wasm.d.ts +6 -0
- package/package.json +1 -1
- package/web/pdf_oxide.d.ts +47 -0
- package/web/pdf_oxide.js +163 -0
- package/web/pdf_oxide_bg.wasm +0 -0
- package/web/pdf_oxide_bg.wasm.d.ts +6 -0
package/bundler/pdf_oxide.d.ts
CHANGED
|
@@ -815,6 +815,15 @@ export class WasmPdfDocument {
|
|
|
815
815
|
* @returns true if authentication succeeded
|
|
816
816
|
*/
|
|
817
817
|
authenticate(password: string): boolean;
|
|
818
|
+
/**
|
|
819
|
+
* Cheap per-page text-vs-OCR classification → JSON
|
|
820
|
+
* `DocumentClassification`.
|
|
821
|
+
*/
|
|
822
|
+
classifyDocument(): string;
|
|
823
|
+
/**
|
|
824
|
+
* Cheap per-page classification → JSON `PageClassification`.
|
|
825
|
+
*/
|
|
826
|
+
classifyPage(page_index: number): string;
|
|
818
827
|
/**
|
|
819
828
|
* Clear all pending erase operations for a page.
|
|
820
829
|
*/
|
|
@@ -934,6 +943,12 @@ export class WasmPdfDocument {
|
|
|
934
943
|
* @returns Array of path objects
|
|
935
944
|
*/
|
|
936
945
|
extractLines(page_index: number, region?: Float32Array | null): any;
|
|
946
|
+
/**
|
|
947
|
+
* Rich per-page extraction → JSON `PageExtraction` (per-region
|
|
948
|
+
* bbox + typed reason). `optionsJson` is `{}`-tolerant
|
|
949
|
+
* `AutoExtractOptions`; undefined/empty → defaults.
|
|
950
|
+
*/
|
|
951
|
+
extractPageAuto(page_index: number, options_json?: string | null): string;
|
|
937
952
|
/**
|
|
938
953
|
* Extract complete page text data in a single call.
|
|
939
954
|
*
|
|
@@ -990,6 +1005,11 @@ export class WasmPdfDocument {
|
|
|
990
1005
|
* @param region - Optional [x, y, width, height] to filter by
|
|
991
1006
|
*/
|
|
992
1007
|
extractText(page_index: number, region: any): string;
|
|
1008
|
+
/**
|
|
1009
|
+
* One-shot auto text extraction — graceful native fallback (never
|
|
1010
|
+
* the opaque OCR error #513).
|
|
1011
|
+
*/
|
|
1012
|
+
extractTextAuto(page_index: number): string;
|
|
993
1013
|
/**
|
|
994
1014
|
* Extract text lines from a page.
|
|
995
1015
|
*
|
|
@@ -1600,6 +1620,20 @@ export function generateQrSvg(data: string, error_correction: number, size: numb
|
|
|
1600
1620
|
*/
|
|
1601
1621
|
export function hasDocumentTimestamp(pdf_data: Uint8Array): boolean;
|
|
1602
1622
|
|
|
1623
|
+
/**
|
|
1624
|
+
* #519: Air-gapped OCR model manifest — JSON (detector + every
|
|
1625
|
+
* supported language's cache filenames and source URLs).
|
|
1626
|
+
*
|
|
1627
|
+
* WASM provisioning is **host-side**: browser/WASM has no filesystem
|
|
1628
|
+
* or network-to-disk, so a download-to-cache prefetch cannot run
|
|
1629
|
+
* here. This manifest is informational — it lets the JS host learn
|
|
1630
|
+
* which model files/URLs to fetch and bundle (or ship out of band)
|
|
1631
|
+
* before driving OCR. There is intentionally no `prefetchModels` in
|
|
1632
|
+
* the WASM surface (see `prefetchAvailable`, which always returns
|
|
1633
|
+
* `false`).
|
|
1634
|
+
*/
|
|
1635
|
+
export function modelManifest(): string;
|
|
1636
|
+
|
|
1603
1637
|
/**
|
|
1604
1638
|
* Plan a bookmark split without producing PDFs. Returns a JSON array
|
|
1605
1639
|
* of segment objects (`index, startPage…` shape from
|
|
@@ -1607,6 +1641,13 @@ export function hasDocumentTimestamp(pdf_data: Uint8Array): boolean;
|
|
|
1607
1641
|
*/
|
|
1608
1642
|
export function planSplitByBookmarks(src_bytes: Uint8Array, title_prefix: string | null | undefined, ignore_case: boolean, level: number, include_front_matter: boolean): any;
|
|
1609
1643
|
|
|
1644
|
+
/**
|
|
1645
|
+
* #519: Whether this build can download OCR models to a local cache.
|
|
1646
|
+
* Always `false` in WASM — provisioning is host-side (see
|
|
1647
|
+
* `modelManifest`).
|
|
1648
|
+
*/
|
|
1649
|
+
export function prefetchAvailable(): boolean;
|
|
1650
|
+
|
|
1610
1651
|
/**
|
|
1611
1652
|
* Install the process-wide runtime crypto policy from its grammar
|
|
1612
1653
|
* string (`"compat"|"strict"|"fips-strict"[;…]`). Fail-closed:
|
package/bundler/pdf_oxide.js
CHANGED
|
@@ -5,5 +5,5 @@ import { __wbg_set_wasm } from "./pdf_oxide_bg.js";
|
|
|
5
5
|
__wbg_set_wasm(wasm);
|
|
6
6
|
|
|
7
7
|
export {
|
|
8
|
-
Align, ArtifactStyle, Dss, PadesLevel, RevocationMaterial, StreamingTable, WasmArtifact, WasmCertificate, WasmDocumentBuilder, WasmEmbeddedFont, WasmFluentPageBuilder, WasmFooter, WasmHeader, WasmOcrConfig, WasmOcrEngine, WasmPageTemplate, WasmPdf, WasmPdfDocument, WasmPdfPageRegion, WasmSignature, WasmTimestamp, cryptoCbom, cryptoInventory, cryptoPolicy, disableLogging, generateBarcodeSvg, generateQrSvg, hasDocumentTimestamp, planSplitByBookmarks, setCryptoPolicy, setLogLevel, signPdfBytes, signPdfBytesPades, splitByBookmarks
|
|
8
|
+
Align, ArtifactStyle, Dss, PadesLevel, RevocationMaterial, StreamingTable, WasmArtifact, WasmCertificate, WasmDocumentBuilder, WasmEmbeddedFont, WasmFluentPageBuilder, WasmFooter, WasmHeader, WasmOcrConfig, WasmOcrEngine, WasmPageTemplate, WasmPdf, WasmPdfDocument, WasmPdfPageRegion, WasmSignature, WasmTimestamp, cryptoCbom, cryptoInventory, cryptoPolicy, disableLogging, generateBarcodeSvg, generateQrSvg, hasDocumentTimestamp, modelManifest, planSplitByBookmarks, prefetchAvailable, setCryptoPolicy, setLogLevel, signPdfBytes, signPdfBytesPades, splitByBookmarks
|
|
9
9
|
} from "./pdf_oxide_bg.js";
|
package/bundler/pdf_oxide_bg.js
CHANGED
|
@@ -3125,6 +3125,64 @@ export class WasmPdfDocument {
|
|
|
3125
3125
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
3126
3126
|
}
|
|
3127
3127
|
}
|
|
3128
|
+
/**
|
|
3129
|
+
* Cheap per-page text-vs-OCR classification → JSON
|
|
3130
|
+
* `DocumentClassification`.
|
|
3131
|
+
* @returns {string}
|
|
3132
|
+
*/
|
|
3133
|
+
classifyDocument() {
|
|
3134
|
+
let deferred2_0;
|
|
3135
|
+
let deferred2_1;
|
|
3136
|
+
try {
|
|
3137
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
3138
|
+
wasm.wasmpdfdocument_classifyDocument(retptr, this.__wbg_ptr);
|
|
3139
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
3140
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
3141
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
3142
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
3143
|
+
var ptr1 = r0;
|
|
3144
|
+
var len1 = r1;
|
|
3145
|
+
if (r3) {
|
|
3146
|
+
ptr1 = 0; len1 = 0;
|
|
3147
|
+
throw takeObject(r2);
|
|
3148
|
+
}
|
|
3149
|
+
deferred2_0 = ptr1;
|
|
3150
|
+
deferred2_1 = len1;
|
|
3151
|
+
return getStringFromWasm0(ptr1, len1);
|
|
3152
|
+
} finally {
|
|
3153
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
3154
|
+
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
3155
|
+
}
|
|
3156
|
+
}
|
|
3157
|
+
/**
|
|
3158
|
+
* Cheap per-page classification → JSON `PageClassification`.
|
|
3159
|
+
* @param {number} page_index
|
|
3160
|
+
* @returns {string}
|
|
3161
|
+
*/
|
|
3162
|
+
classifyPage(page_index) {
|
|
3163
|
+
let deferred2_0;
|
|
3164
|
+
let deferred2_1;
|
|
3165
|
+
try {
|
|
3166
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
3167
|
+
wasm.wasmpdfdocument_classifyPage(retptr, this.__wbg_ptr, page_index);
|
|
3168
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
3169
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
3170
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
3171
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
3172
|
+
var ptr1 = r0;
|
|
3173
|
+
var len1 = r1;
|
|
3174
|
+
if (r3) {
|
|
3175
|
+
ptr1 = 0; len1 = 0;
|
|
3176
|
+
throw takeObject(r2);
|
|
3177
|
+
}
|
|
3178
|
+
deferred2_0 = ptr1;
|
|
3179
|
+
deferred2_1 = len1;
|
|
3180
|
+
return getStringFromWasm0(ptr1, len1);
|
|
3181
|
+
} finally {
|
|
3182
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
3183
|
+
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
3184
|
+
}
|
|
3185
|
+
}
|
|
3128
3186
|
/**
|
|
3129
3187
|
* Clear all pending erase operations for a page.
|
|
3130
3188
|
* @param {number} page_index
|
|
@@ -3553,6 +3611,40 @@ export class WasmPdfDocument {
|
|
|
3553
3611
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
3554
3612
|
}
|
|
3555
3613
|
}
|
|
3614
|
+
/**
|
|
3615
|
+
* Rich per-page extraction → JSON `PageExtraction` (per-region
|
|
3616
|
+
* bbox + typed reason). `optionsJson` is `{}`-tolerant
|
|
3617
|
+
* `AutoExtractOptions`; undefined/empty → defaults.
|
|
3618
|
+
* @param {number} page_index
|
|
3619
|
+
* @param {string | null} [options_json]
|
|
3620
|
+
* @returns {string}
|
|
3621
|
+
*/
|
|
3622
|
+
extractPageAuto(page_index, options_json) {
|
|
3623
|
+
let deferred3_0;
|
|
3624
|
+
let deferred3_1;
|
|
3625
|
+
try {
|
|
3626
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
3627
|
+
var ptr0 = isLikeNone(options_json) ? 0 : passStringToWasm0(options_json, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
3628
|
+
var len0 = WASM_VECTOR_LEN;
|
|
3629
|
+
wasm.wasmpdfdocument_extractPageAuto(retptr, this.__wbg_ptr, page_index, ptr0, len0);
|
|
3630
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
3631
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
3632
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
3633
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
3634
|
+
var ptr2 = r0;
|
|
3635
|
+
var len2 = r1;
|
|
3636
|
+
if (r3) {
|
|
3637
|
+
ptr2 = 0; len2 = 0;
|
|
3638
|
+
throw takeObject(r2);
|
|
3639
|
+
}
|
|
3640
|
+
deferred3_0 = ptr2;
|
|
3641
|
+
deferred3_1 = len2;
|
|
3642
|
+
return getStringFromWasm0(ptr2, len2);
|
|
3643
|
+
} finally {
|
|
3644
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
3645
|
+
wasm.__wbindgen_export4(deferred3_0, deferred3_1, 1);
|
|
3646
|
+
}
|
|
3647
|
+
}
|
|
3556
3648
|
/**
|
|
3557
3649
|
* Extract complete page text data in a single call.
|
|
3558
3650
|
*
|
|
@@ -3754,6 +3846,36 @@ export class WasmPdfDocument {
|
|
|
3754
3846
|
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
3755
3847
|
}
|
|
3756
3848
|
}
|
|
3849
|
+
/**
|
|
3850
|
+
* One-shot auto text extraction — graceful native fallback (never
|
|
3851
|
+
* the opaque OCR error #513).
|
|
3852
|
+
* @param {number} page_index
|
|
3853
|
+
* @returns {string}
|
|
3854
|
+
*/
|
|
3855
|
+
extractTextAuto(page_index) {
|
|
3856
|
+
let deferred2_0;
|
|
3857
|
+
let deferred2_1;
|
|
3858
|
+
try {
|
|
3859
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
3860
|
+
wasm.wasmpdfdocument_extractTextAuto(retptr, this.__wbg_ptr, page_index);
|
|
3861
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
3862
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
3863
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
3864
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
3865
|
+
var ptr1 = r0;
|
|
3866
|
+
var len1 = r1;
|
|
3867
|
+
if (r3) {
|
|
3868
|
+
ptr1 = 0; len1 = 0;
|
|
3869
|
+
throw takeObject(r2);
|
|
3870
|
+
}
|
|
3871
|
+
deferred2_0 = ptr1;
|
|
3872
|
+
deferred2_1 = len1;
|
|
3873
|
+
return getStringFromWasm0(ptr1, len1);
|
|
3874
|
+
} finally {
|
|
3875
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
3876
|
+
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
3877
|
+
}
|
|
3878
|
+
}
|
|
3757
3879
|
/**
|
|
3758
3880
|
* Extract text lines from a page.
|
|
3759
3881
|
*
|
|
@@ -6153,6 +6275,36 @@ export function hasDocumentTimestamp(pdf_data) {
|
|
|
6153
6275
|
return ret !== 0;
|
|
6154
6276
|
}
|
|
6155
6277
|
|
|
6278
|
+
/**
|
|
6279
|
+
* #519: Air-gapped OCR model manifest — JSON (detector + every
|
|
6280
|
+
* supported language's cache filenames and source URLs).
|
|
6281
|
+
*
|
|
6282
|
+
* WASM provisioning is **host-side**: browser/WASM has no filesystem
|
|
6283
|
+
* or network-to-disk, so a download-to-cache prefetch cannot run
|
|
6284
|
+
* here. This manifest is informational — it lets the JS host learn
|
|
6285
|
+
* which model files/URLs to fetch and bundle (or ship out of band)
|
|
6286
|
+
* before driving OCR. There is intentionally no `prefetchModels` in
|
|
6287
|
+
* the WASM surface (see `prefetchAvailable`, which always returns
|
|
6288
|
+
* `false`).
|
|
6289
|
+
* @returns {string}
|
|
6290
|
+
*/
|
|
6291
|
+
export function modelManifest() {
|
|
6292
|
+
let deferred1_0;
|
|
6293
|
+
let deferred1_1;
|
|
6294
|
+
try {
|
|
6295
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
6296
|
+
wasm.modelManifest(retptr);
|
|
6297
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
6298
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
6299
|
+
deferred1_0 = r0;
|
|
6300
|
+
deferred1_1 = r1;
|
|
6301
|
+
return getStringFromWasm0(r0, r1);
|
|
6302
|
+
} finally {
|
|
6303
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
6304
|
+
wasm.__wbindgen_export4(deferred1_0, deferred1_1, 1);
|
|
6305
|
+
}
|
|
6306
|
+
}
|
|
6307
|
+
|
|
6156
6308
|
/**
|
|
6157
6309
|
* Plan a bookmark split without producing PDFs. Returns a JSON array
|
|
6158
6310
|
* of segment objects (`index, startPage…` shape from
|
|
@@ -6184,6 +6336,17 @@ export function planSplitByBookmarks(src_bytes, title_prefix, ignore_case, level
|
|
|
6184
6336
|
}
|
|
6185
6337
|
}
|
|
6186
6338
|
|
|
6339
|
+
/**
|
|
6340
|
+
* #519: Whether this build can download OCR models to a local cache.
|
|
6341
|
+
* Always `false` in WASM — provisioning is host-side (see
|
|
6342
|
+
* `modelManifest`).
|
|
6343
|
+
* @returns {boolean}
|
|
6344
|
+
*/
|
|
6345
|
+
export function prefetchAvailable() {
|
|
6346
|
+
const ret = wasm.prefetchAvailable();
|
|
6347
|
+
return ret !== 0;
|
|
6348
|
+
}
|
|
6349
|
+
|
|
6187
6350
|
/**
|
|
6188
6351
|
* Install the process-wide runtime crypto policy from its grammar
|
|
6189
6352
|
* string (`"compat"|"strict"|"fips-strict"[;…]`). Fail-closed:
|
|
Binary file
|
|
@@ -23,7 +23,9 @@ export const cryptoPolicy: (a: number) => void;
|
|
|
23
23
|
export const generateBarcodeSvg: (a: number, b: number, c: number, d: number) => void;
|
|
24
24
|
export const generateQrSvg: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
25
25
|
export const hasDocumentTimestamp: (a: number, b: number) => number;
|
|
26
|
+
export const modelManifest: (a: number) => void;
|
|
26
27
|
export const planSplitByBookmarks: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number) => void;
|
|
28
|
+
export const prefetchAvailable: () => number;
|
|
27
29
|
export const setCryptoPolicy: (a: number, b: number, c: number) => void;
|
|
28
30
|
export const setLogLevel: (a: number, b: number, c: number) => void;
|
|
29
31
|
export const signPdfBytes: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number) => void;
|
|
@@ -158,6 +160,8 @@ export const wasmpdfdocument_applyAllRedactions: (a: number, b: number) => void;
|
|
|
158
160
|
export const wasmpdfdocument_applyPageRedactions: (a: number, b: number, c: number) => void;
|
|
159
161
|
export const wasmpdfdocument_applyRedactionsDestructive: (a: number, b: number, c: number) => void;
|
|
160
162
|
export const wasmpdfdocument_authenticate: (a: number, b: number, c: number, d: number) => void;
|
|
163
|
+
export const wasmpdfdocument_classifyDocument: (a: number, b: number) => void;
|
|
164
|
+
export const wasmpdfdocument_classifyPage: (a: number, b: number, c: number) => void;
|
|
161
165
|
export const wasmpdfdocument_clearEraseRegions: (a: number, b: number, c: number) => void;
|
|
162
166
|
export const wasmpdfdocument_convertToPdfA: (a: number, b: number, c: number, d: number) => void;
|
|
163
167
|
export const wasmpdfdocument_cropMargins: (a: number, b: number, c: number, d: number, e: number, f: number) => void;
|
|
@@ -177,6 +181,7 @@ export const wasmpdfdocument_extractChars: (a: number, b: number, c: number, d:
|
|
|
177
181
|
export const wasmpdfdocument_extractImageBytes: (a: number, b: number, c: number) => void;
|
|
178
182
|
export const wasmpdfdocument_extractImages: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
179
183
|
export const wasmpdfdocument_extractLines: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
184
|
+
export const wasmpdfdocument_extractPageAuto: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
180
185
|
export const wasmpdfdocument_extractPageText: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
181
186
|
export const wasmpdfdocument_extractPages: (a: number, b: number, c: number, d: number) => void;
|
|
182
187
|
export const wasmpdfdocument_extractPaths: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
@@ -184,6 +189,7 @@ export const wasmpdfdocument_extractRects: (a: number, b: number, c: number, d:
|
|
|
184
189
|
export const wasmpdfdocument_extractSpans: (a: number, b: number, c: number, d: number, e: number, f: number, g: number) => void;
|
|
185
190
|
export const wasmpdfdocument_extractTables: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
186
191
|
export const wasmpdfdocument_extractText: (a: number, b: number, c: number, d: number) => void;
|
|
192
|
+
export const wasmpdfdocument_extractTextAuto: (a: number, b: number, c: number) => void;
|
|
187
193
|
export const wasmpdfdocument_extractTextLines: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
188
194
|
export const wasmpdfdocument_extractTextOcr: (a: number, b: number, c: number, d: number) => void;
|
|
189
195
|
export const wasmpdfdocument_extractWords: (a: number, b: number, c: number, d: number, e: number) => void;
|
package/nodejs/pdf_oxide.d.ts
CHANGED
|
@@ -815,6 +815,15 @@ export class WasmPdfDocument {
|
|
|
815
815
|
* @returns true if authentication succeeded
|
|
816
816
|
*/
|
|
817
817
|
authenticate(password: string): boolean;
|
|
818
|
+
/**
|
|
819
|
+
* Cheap per-page text-vs-OCR classification → JSON
|
|
820
|
+
* `DocumentClassification`.
|
|
821
|
+
*/
|
|
822
|
+
classifyDocument(): string;
|
|
823
|
+
/**
|
|
824
|
+
* Cheap per-page classification → JSON `PageClassification`.
|
|
825
|
+
*/
|
|
826
|
+
classifyPage(page_index: number): string;
|
|
818
827
|
/**
|
|
819
828
|
* Clear all pending erase operations for a page.
|
|
820
829
|
*/
|
|
@@ -934,6 +943,12 @@ export class WasmPdfDocument {
|
|
|
934
943
|
* @returns Array of path objects
|
|
935
944
|
*/
|
|
936
945
|
extractLines(page_index: number, region?: Float32Array | null): any;
|
|
946
|
+
/**
|
|
947
|
+
* Rich per-page extraction → JSON `PageExtraction` (per-region
|
|
948
|
+
* bbox + typed reason). `optionsJson` is `{}`-tolerant
|
|
949
|
+
* `AutoExtractOptions`; undefined/empty → defaults.
|
|
950
|
+
*/
|
|
951
|
+
extractPageAuto(page_index: number, options_json?: string | null): string;
|
|
937
952
|
/**
|
|
938
953
|
* Extract complete page text data in a single call.
|
|
939
954
|
*
|
|
@@ -990,6 +1005,11 @@ export class WasmPdfDocument {
|
|
|
990
1005
|
* @param region - Optional [x, y, width, height] to filter by
|
|
991
1006
|
*/
|
|
992
1007
|
extractText(page_index: number, region: any): string;
|
|
1008
|
+
/**
|
|
1009
|
+
* One-shot auto text extraction — graceful native fallback (never
|
|
1010
|
+
* the opaque OCR error #513).
|
|
1011
|
+
*/
|
|
1012
|
+
extractTextAuto(page_index: number): string;
|
|
993
1013
|
/**
|
|
994
1014
|
* Extract text lines from a page.
|
|
995
1015
|
*
|
|
@@ -1600,6 +1620,20 @@ export function generateQrSvg(data: string, error_correction: number, size: numb
|
|
|
1600
1620
|
*/
|
|
1601
1621
|
export function hasDocumentTimestamp(pdf_data: Uint8Array): boolean;
|
|
1602
1622
|
|
|
1623
|
+
/**
|
|
1624
|
+
* #519: Air-gapped OCR model manifest — JSON (detector + every
|
|
1625
|
+
* supported language's cache filenames and source URLs).
|
|
1626
|
+
*
|
|
1627
|
+
* WASM provisioning is **host-side**: browser/WASM has no filesystem
|
|
1628
|
+
* or network-to-disk, so a download-to-cache prefetch cannot run
|
|
1629
|
+
* here. This manifest is informational — it lets the JS host learn
|
|
1630
|
+
* which model files/URLs to fetch and bundle (or ship out of band)
|
|
1631
|
+
* before driving OCR. There is intentionally no `prefetchModels` in
|
|
1632
|
+
* the WASM surface (see `prefetchAvailable`, which always returns
|
|
1633
|
+
* `false`).
|
|
1634
|
+
*/
|
|
1635
|
+
export function modelManifest(): string;
|
|
1636
|
+
|
|
1603
1637
|
/**
|
|
1604
1638
|
* Plan a bookmark split without producing PDFs. Returns a JSON array
|
|
1605
1639
|
* of segment objects (`index, startPage…` shape from
|
|
@@ -1607,6 +1641,13 @@ export function hasDocumentTimestamp(pdf_data: Uint8Array): boolean;
|
|
|
1607
1641
|
*/
|
|
1608
1642
|
export function planSplitByBookmarks(src_bytes: Uint8Array, title_prefix: string | null | undefined, ignore_case: boolean, level: number, include_front_matter: boolean): any;
|
|
1609
1643
|
|
|
1644
|
+
/**
|
|
1645
|
+
* #519: Whether this build can download OCR models to a local cache.
|
|
1646
|
+
* Always `false` in WASM — provisioning is host-side (see
|
|
1647
|
+
* `modelManifest`).
|
|
1648
|
+
*/
|
|
1649
|
+
export function prefetchAvailable(): boolean;
|
|
1650
|
+
|
|
1610
1651
|
/**
|
|
1611
1652
|
* Install the process-wide runtime crypto policy from its grammar
|
|
1612
1653
|
* string (`"compat"|"strict"|"fips-strict"[;…]`). Fail-closed:
|
package/nodejs/pdf_oxide.js
CHANGED
|
@@ -3144,6 +3144,64 @@ class WasmPdfDocument {
|
|
|
3144
3144
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
3145
3145
|
}
|
|
3146
3146
|
}
|
|
3147
|
+
/**
|
|
3148
|
+
* Cheap per-page text-vs-OCR classification → JSON
|
|
3149
|
+
* `DocumentClassification`.
|
|
3150
|
+
* @returns {string}
|
|
3151
|
+
*/
|
|
3152
|
+
classifyDocument() {
|
|
3153
|
+
let deferred2_0;
|
|
3154
|
+
let deferred2_1;
|
|
3155
|
+
try {
|
|
3156
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
3157
|
+
wasm.wasmpdfdocument_classifyDocument(retptr, this.__wbg_ptr);
|
|
3158
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
3159
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
3160
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
3161
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
3162
|
+
var ptr1 = r0;
|
|
3163
|
+
var len1 = r1;
|
|
3164
|
+
if (r3) {
|
|
3165
|
+
ptr1 = 0; len1 = 0;
|
|
3166
|
+
throw takeObject(r2);
|
|
3167
|
+
}
|
|
3168
|
+
deferred2_0 = ptr1;
|
|
3169
|
+
deferred2_1 = len1;
|
|
3170
|
+
return getStringFromWasm0(ptr1, len1);
|
|
3171
|
+
} finally {
|
|
3172
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
3173
|
+
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
3174
|
+
}
|
|
3175
|
+
}
|
|
3176
|
+
/**
|
|
3177
|
+
* Cheap per-page classification → JSON `PageClassification`.
|
|
3178
|
+
* @param {number} page_index
|
|
3179
|
+
* @returns {string}
|
|
3180
|
+
*/
|
|
3181
|
+
classifyPage(page_index) {
|
|
3182
|
+
let deferred2_0;
|
|
3183
|
+
let deferred2_1;
|
|
3184
|
+
try {
|
|
3185
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
3186
|
+
wasm.wasmpdfdocument_classifyPage(retptr, this.__wbg_ptr, page_index);
|
|
3187
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
3188
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
3189
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
3190
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
3191
|
+
var ptr1 = r0;
|
|
3192
|
+
var len1 = r1;
|
|
3193
|
+
if (r3) {
|
|
3194
|
+
ptr1 = 0; len1 = 0;
|
|
3195
|
+
throw takeObject(r2);
|
|
3196
|
+
}
|
|
3197
|
+
deferred2_0 = ptr1;
|
|
3198
|
+
deferred2_1 = len1;
|
|
3199
|
+
return getStringFromWasm0(ptr1, len1);
|
|
3200
|
+
} finally {
|
|
3201
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
3202
|
+
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
3203
|
+
}
|
|
3204
|
+
}
|
|
3147
3205
|
/**
|
|
3148
3206
|
* Clear all pending erase operations for a page.
|
|
3149
3207
|
* @param {number} page_index
|
|
@@ -3572,6 +3630,40 @@ class WasmPdfDocument {
|
|
|
3572
3630
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
3573
3631
|
}
|
|
3574
3632
|
}
|
|
3633
|
+
/**
|
|
3634
|
+
* Rich per-page extraction → JSON `PageExtraction` (per-region
|
|
3635
|
+
* bbox + typed reason). `optionsJson` is `{}`-tolerant
|
|
3636
|
+
* `AutoExtractOptions`; undefined/empty → defaults.
|
|
3637
|
+
* @param {number} page_index
|
|
3638
|
+
* @param {string | null} [options_json]
|
|
3639
|
+
* @returns {string}
|
|
3640
|
+
*/
|
|
3641
|
+
extractPageAuto(page_index, options_json) {
|
|
3642
|
+
let deferred3_0;
|
|
3643
|
+
let deferred3_1;
|
|
3644
|
+
try {
|
|
3645
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
3646
|
+
var ptr0 = isLikeNone(options_json) ? 0 : passStringToWasm0(options_json, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
3647
|
+
var len0 = WASM_VECTOR_LEN;
|
|
3648
|
+
wasm.wasmpdfdocument_extractPageAuto(retptr, this.__wbg_ptr, page_index, ptr0, len0);
|
|
3649
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
3650
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
3651
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
3652
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
3653
|
+
var ptr2 = r0;
|
|
3654
|
+
var len2 = r1;
|
|
3655
|
+
if (r3) {
|
|
3656
|
+
ptr2 = 0; len2 = 0;
|
|
3657
|
+
throw takeObject(r2);
|
|
3658
|
+
}
|
|
3659
|
+
deferred3_0 = ptr2;
|
|
3660
|
+
deferred3_1 = len2;
|
|
3661
|
+
return getStringFromWasm0(ptr2, len2);
|
|
3662
|
+
} finally {
|
|
3663
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
3664
|
+
wasm.__wbindgen_export4(deferred3_0, deferred3_1, 1);
|
|
3665
|
+
}
|
|
3666
|
+
}
|
|
3575
3667
|
/**
|
|
3576
3668
|
* Extract complete page text data in a single call.
|
|
3577
3669
|
*
|
|
@@ -3773,6 +3865,36 @@ class WasmPdfDocument {
|
|
|
3773
3865
|
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
3774
3866
|
}
|
|
3775
3867
|
}
|
|
3868
|
+
/**
|
|
3869
|
+
* One-shot auto text extraction — graceful native fallback (never
|
|
3870
|
+
* the opaque OCR error #513).
|
|
3871
|
+
* @param {number} page_index
|
|
3872
|
+
* @returns {string}
|
|
3873
|
+
*/
|
|
3874
|
+
extractTextAuto(page_index) {
|
|
3875
|
+
let deferred2_0;
|
|
3876
|
+
let deferred2_1;
|
|
3877
|
+
try {
|
|
3878
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
3879
|
+
wasm.wasmpdfdocument_extractTextAuto(retptr, this.__wbg_ptr, page_index);
|
|
3880
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
3881
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
3882
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
3883
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
3884
|
+
var ptr1 = r0;
|
|
3885
|
+
var len1 = r1;
|
|
3886
|
+
if (r3) {
|
|
3887
|
+
ptr1 = 0; len1 = 0;
|
|
3888
|
+
throw takeObject(r2);
|
|
3889
|
+
}
|
|
3890
|
+
deferred2_0 = ptr1;
|
|
3891
|
+
deferred2_1 = len1;
|
|
3892
|
+
return getStringFromWasm0(ptr1, len1);
|
|
3893
|
+
} finally {
|
|
3894
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
3895
|
+
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
3896
|
+
}
|
|
3897
|
+
}
|
|
3776
3898
|
/**
|
|
3777
3899
|
* Extract text lines from a page.
|
|
3778
3900
|
*
|
|
@@ -6183,6 +6305,37 @@ function hasDocumentTimestamp(pdf_data) {
|
|
|
6183
6305
|
}
|
|
6184
6306
|
exports.hasDocumentTimestamp = hasDocumentTimestamp;
|
|
6185
6307
|
|
|
6308
|
+
/**
|
|
6309
|
+
* #519: Air-gapped OCR model manifest — JSON (detector + every
|
|
6310
|
+
* supported language's cache filenames and source URLs).
|
|
6311
|
+
*
|
|
6312
|
+
* WASM provisioning is **host-side**: browser/WASM has no filesystem
|
|
6313
|
+
* or network-to-disk, so a download-to-cache prefetch cannot run
|
|
6314
|
+
* here. This manifest is informational — it lets the JS host learn
|
|
6315
|
+
* which model files/URLs to fetch and bundle (or ship out of band)
|
|
6316
|
+
* before driving OCR. There is intentionally no `prefetchModels` in
|
|
6317
|
+
* the WASM surface (see `prefetchAvailable`, which always returns
|
|
6318
|
+
* `false`).
|
|
6319
|
+
* @returns {string}
|
|
6320
|
+
*/
|
|
6321
|
+
function modelManifest() {
|
|
6322
|
+
let deferred1_0;
|
|
6323
|
+
let deferred1_1;
|
|
6324
|
+
try {
|
|
6325
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
6326
|
+
wasm.modelManifest(retptr);
|
|
6327
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
6328
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
6329
|
+
deferred1_0 = r0;
|
|
6330
|
+
deferred1_1 = r1;
|
|
6331
|
+
return getStringFromWasm0(r0, r1);
|
|
6332
|
+
} finally {
|
|
6333
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
6334
|
+
wasm.__wbindgen_export4(deferred1_0, deferred1_1, 1);
|
|
6335
|
+
}
|
|
6336
|
+
}
|
|
6337
|
+
exports.modelManifest = modelManifest;
|
|
6338
|
+
|
|
6186
6339
|
/**
|
|
6187
6340
|
* Plan a bookmark split without producing PDFs. Returns a JSON array
|
|
6188
6341
|
* of segment objects (`index, startPage…` shape from
|
|
@@ -6215,6 +6368,18 @@ function planSplitByBookmarks(src_bytes, title_prefix, ignore_case, level, inclu
|
|
|
6215
6368
|
}
|
|
6216
6369
|
exports.planSplitByBookmarks = planSplitByBookmarks;
|
|
6217
6370
|
|
|
6371
|
+
/**
|
|
6372
|
+
* #519: Whether this build can download OCR models to a local cache.
|
|
6373
|
+
* Always `false` in WASM — provisioning is host-side (see
|
|
6374
|
+
* `modelManifest`).
|
|
6375
|
+
* @returns {boolean}
|
|
6376
|
+
*/
|
|
6377
|
+
function prefetchAvailable() {
|
|
6378
|
+
const ret = wasm.prefetchAvailable();
|
|
6379
|
+
return ret !== 0;
|
|
6380
|
+
}
|
|
6381
|
+
exports.prefetchAvailable = prefetchAvailable;
|
|
6382
|
+
|
|
6218
6383
|
/**
|
|
6219
6384
|
* Install the process-wide runtime crypto policy from its grammar
|
|
6220
6385
|
* string (`"compat"|"strict"|"fips-strict"[;…]`). Fail-closed:
|
package/nodejs/pdf_oxide_bg.wasm
CHANGED
|
Binary file
|
|
@@ -23,7 +23,9 @@ export const cryptoPolicy: (a: number) => void;
|
|
|
23
23
|
export const generateBarcodeSvg: (a: number, b: number, c: number, d: number) => void;
|
|
24
24
|
export const generateQrSvg: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
25
25
|
export const hasDocumentTimestamp: (a: number, b: number) => number;
|
|
26
|
+
export const modelManifest: (a: number) => void;
|
|
26
27
|
export const planSplitByBookmarks: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number) => void;
|
|
28
|
+
export const prefetchAvailable: () => number;
|
|
27
29
|
export const setCryptoPolicy: (a: number, b: number, c: number) => void;
|
|
28
30
|
export const setLogLevel: (a: number, b: number, c: number) => void;
|
|
29
31
|
export const signPdfBytes: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number) => void;
|
|
@@ -158,6 +160,8 @@ export const wasmpdfdocument_applyAllRedactions: (a: number, b: number) => void;
|
|
|
158
160
|
export const wasmpdfdocument_applyPageRedactions: (a: number, b: number, c: number) => void;
|
|
159
161
|
export const wasmpdfdocument_applyRedactionsDestructive: (a: number, b: number, c: number) => void;
|
|
160
162
|
export const wasmpdfdocument_authenticate: (a: number, b: number, c: number, d: number) => void;
|
|
163
|
+
export const wasmpdfdocument_classifyDocument: (a: number, b: number) => void;
|
|
164
|
+
export const wasmpdfdocument_classifyPage: (a: number, b: number, c: number) => void;
|
|
161
165
|
export const wasmpdfdocument_clearEraseRegions: (a: number, b: number, c: number) => void;
|
|
162
166
|
export const wasmpdfdocument_convertToPdfA: (a: number, b: number, c: number, d: number) => void;
|
|
163
167
|
export const wasmpdfdocument_cropMargins: (a: number, b: number, c: number, d: number, e: number, f: number) => void;
|
|
@@ -177,6 +181,7 @@ export const wasmpdfdocument_extractChars: (a: number, b: number, c: number, d:
|
|
|
177
181
|
export const wasmpdfdocument_extractImageBytes: (a: number, b: number, c: number) => void;
|
|
178
182
|
export const wasmpdfdocument_extractImages: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
179
183
|
export const wasmpdfdocument_extractLines: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
184
|
+
export const wasmpdfdocument_extractPageAuto: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
180
185
|
export const wasmpdfdocument_extractPageText: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
181
186
|
export const wasmpdfdocument_extractPages: (a: number, b: number, c: number, d: number) => void;
|
|
182
187
|
export const wasmpdfdocument_extractPaths: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
@@ -184,6 +189,7 @@ export const wasmpdfdocument_extractRects: (a: number, b: number, c: number, d:
|
|
|
184
189
|
export const wasmpdfdocument_extractSpans: (a: number, b: number, c: number, d: number, e: number, f: number, g: number) => void;
|
|
185
190
|
export const wasmpdfdocument_extractTables: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
186
191
|
export const wasmpdfdocument_extractText: (a: number, b: number, c: number, d: number) => void;
|
|
192
|
+
export const wasmpdfdocument_extractTextAuto: (a: number, b: number, c: number) => void;
|
|
187
193
|
export const wasmpdfdocument_extractTextLines: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
188
194
|
export const wasmpdfdocument_extractTextOcr: (a: number, b: number, c: number, d: number) => void;
|
|
189
195
|
export const wasmpdfdocument_extractWords: (a: number, b: number, c: number, d: number, e: number) => void;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pdf-oxide-wasm",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.51",
|
|
4
4
|
"description": "Fast, zero-dependency PDF toolkit for Node.js, browsers, and edge runtimes — text extraction, markdown/HTML conversion, search, form filling, creation, and editing. Rust core compiled to WebAssembly.",
|
|
5
5
|
"license": "MIT OR Apache-2.0",
|
|
6
6
|
"repository": {
|
package/web/pdf_oxide.d.ts
CHANGED
|
@@ -815,6 +815,15 @@ export class WasmPdfDocument {
|
|
|
815
815
|
* @returns true if authentication succeeded
|
|
816
816
|
*/
|
|
817
817
|
authenticate(password: string): boolean;
|
|
818
|
+
/**
|
|
819
|
+
* Cheap per-page text-vs-OCR classification → JSON
|
|
820
|
+
* `DocumentClassification`.
|
|
821
|
+
*/
|
|
822
|
+
classifyDocument(): string;
|
|
823
|
+
/**
|
|
824
|
+
* Cheap per-page classification → JSON `PageClassification`.
|
|
825
|
+
*/
|
|
826
|
+
classifyPage(page_index: number): string;
|
|
818
827
|
/**
|
|
819
828
|
* Clear all pending erase operations for a page.
|
|
820
829
|
*/
|
|
@@ -934,6 +943,12 @@ export class WasmPdfDocument {
|
|
|
934
943
|
* @returns Array of path objects
|
|
935
944
|
*/
|
|
936
945
|
extractLines(page_index: number, region?: Float32Array | null): any;
|
|
946
|
+
/**
|
|
947
|
+
* Rich per-page extraction → JSON `PageExtraction` (per-region
|
|
948
|
+
* bbox + typed reason). `optionsJson` is `{}`-tolerant
|
|
949
|
+
* `AutoExtractOptions`; undefined/empty → defaults.
|
|
950
|
+
*/
|
|
951
|
+
extractPageAuto(page_index: number, options_json?: string | null): string;
|
|
937
952
|
/**
|
|
938
953
|
* Extract complete page text data in a single call.
|
|
939
954
|
*
|
|
@@ -990,6 +1005,11 @@ export class WasmPdfDocument {
|
|
|
990
1005
|
* @param region - Optional [x, y, width, height] to filter by
|
|
991
1006
|
*/
|
|
992
1007
|
extractText(page_index: number, region: any): string;
|
|
1008
|
+
/**
|
|
1009
|
+
* One-shot auto text extraction — graceful native fallback (never
|
|
1010
|
+
* the opaque OCR error #513).
|
|
1011
|
+
*/
|
|
1012
|
+
extractTextAuto(page_index: number): string;
|
|
993
1013
|
/**
|
|
994
1014
|
* Extract text lines from a page.
|
|
995
1015
|
*
|
|
@@ -1600,6 +1620,20 @@ export function generateQrSvg(data: string, error_correction: number, size: numb
|
|
|
1600
1620
|
*/
|
|
1601
1621
|
export function hasDocumentTimestamp(pdf_data: Uint8Array): boolean;
|
|
1602
1622
|
|
|
1623
|
+
/**
|
|
1624
|
+
* #519: Air-gapped OCR model manifest — JSON (detector + every
|
|
1625
|
+
* supported language's cache filenames and source URLs).
|
|
1626
|
+
*
|
|
1627
|
+
* WASM provisioning is **host-side**: browser/WASM has no filesystem
|
|
1628
|
+
* or network-to-disk, so a download-to-cache prefetch cannot run
|
|
1629
|
+
* here. This manifest is informational — it lets the JS host learn
|
|
1630
|
+
* which model files/URLs to fetch and bundle (or ship out of band)
|
|
1631
|
+
* before driving OCR. There is intentionally no `prefetchModels` in
|
|
1632
|
+
* the WASM surface (see `prefetchAvailable`, which always returns
|
|
1633
|
+
* `false`).
|
|
1634
|
+
*/
|
|
1635
|
+
export function modelManifest(): string;
|
|
1636
|
+
|
|
1603
1637
|
/**
|
|
1604
1638
|
* Plan a bookmark split without producing PDFs. Returns a JSON array
|
|
1605
1639
|
* of segment objects (`index, startPage…` shape from
|
|
@@ -1607,6 +1641,13 @@ export function hasDocumentTimestamp(pdf_data: Uint8Array): boolean;
|
|
|
1607
1641
|
*/
|
|
1608
1642
|
export function planSplitByBookmarks(src_bytes: Uint8Array, title_prefix: string | null | undefined, ignore_case: boolean, level: number, include_front_matter: boolean): any;
|
|
1609
1643
|
|
|
1644
|
+
/**
|
|
1645
|
+
* #519: Whether this build can download OCR models to a local cache.
|
|
1646
|
+
* Always `false` in WASM — provisioning is host-side (see
|
|
1647
|
+
* `modelManifest`).
|
|
1648
|
+
*/
|
|
1649
|
+
export function prefetchAvailable(): boolean;
|
|
1650
|
+
|
|
1610
1651
|
/**
|
|
1611
1652
|
* Install the process-wide runtime crypto policy from its grammar
|
|
1612
1653
|
* string (`"compat"|"strict"|"fips-strict"[;…]`). Fail-closed:
|
|
@@ -1687,7 +1728,9 @@ export interface InitOutput {
|
|
|
1687
1728
|
readonly generateBarcodeSvg: (a: number, b: number, c: number, d: number) => void;
|
|
1688
1729
|
readonly generateQrSvg: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
1689
1730
|
readonly hasDocumentTimestamp: (a: number, b: number) => number;
|
|
1731
|
+
readonly modelManifest: (a: number) => void;
|
|
1690
1732
|
readonly planSplitByBookmarks: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number) => void;
|
|
1733
|
+
readonly prefetchAvailable: () => number;
|
|
1691
1734
|
readonly setCryptoPolicy: (a: number, b: number, c: number) => void;
|
|
1692
1735
|
readonly setLogLevel: (a: number, b: number, c: number) => void;
|
|
1693
1736
|
readonly signPdfBytes: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number) => void;
|
|
@@ -1822,6 +1865,8 @@ export interface InitOutput {
|
|
|
1822
1865
|
readonly wasmpdfdocument_applyPageRedactions: (a: number, b: number, c: number) => void;
|
|
1823
1866
|
readonly wasmpdfdocument_applyRedactionsDestructive: (a: number, b: number, c: number) => void;
|
|
1824
1867
|
readonly wasmpdfdocument_authenticate: (a: number, b: number, c: number, d: number) => void;
|
|
1868
|
+
readonly wasmpdfdocument_classifyDocument: (a: number, b: number) => void;
|
|
1869
|
+
readonly wasmpdfdocument_classifyPage: (a: number, b: number, c: number) => void;
|
|
1825
1870
|
readonly wasmpdfdocument_clearEraseRegions: (a: number, b: number, c: number) => void;
|
|
1826
1871
|
readonly wasmpdfdocument_convertToPdfA: (a: number, b: number, c: number, d: number) => void;
|
|
1827
1872
|
readonly wasmpdfdocument_cropMargins: (a: number, b: number, c: number, d: number, e: number, f: number) => void;
|
|
@@ -1841,6 +1886,7 @@ export interface InitOutput {
|
|
|
1841
1886
|
readonly wasmpdfdocument_extractImageBytes: (a: number, b: number, c: number) => void;
|
|
1842
1887
|
readonly wasmpdfdocument_extractImages: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
1843
1888
|
readonly wasmpdfdocument_extractLines: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
1889
|
+
readonly wasmpdfdocument_extractPageAuto: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
1844
1890
|
readonly wasmpdfdocument_extractPageText: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
1845
1891
|
readonly wasmpdfdocument_extractPages: (a: number, b: number, c: number, d: number) => void;
|
|
1846
1892
|
readonly wasmpdfdocument_extractPaths: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
@@ -1848,6 +1894,7 @@ export interface InitOutput {
|
|
|
1848
1894
|
readonly wasmpdfdocument_extractSpans: (a: number, b: number, c: number, d: number, e: number, f: number, g: number) => void;
|
|
1849
1895
|
readonly wasmpdfdocument_extractTables: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
1850
1896
|
readonly wasmpdfdocument_extractText: (a: number, b: number, c: number, d: number) => void;
|
|
1897
|
+
readonly wasmpdfdocument_extractTextAuto: (a: number, b: number, c: number) => void;
|
|
1851
1898
|
readonly wasmpdfdocument_extractTextLines: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
1852
1899
|
readonly wasmpdfdocument_extractTextOcr: (a: number, b: number, c: number, d: number) => void;
|
|
1853
1900
|
readonly wasmpdfdocument_extractWords: (a: number, b: number, c: number, d: number, e: number) => void;
|
package/web/pdf_oxide.js
CHANGED
|
@@ -3127,6 +3127,64 @@ export class WasmPdfDocument {
|
|
|
3127
3127
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
3128
3128
|
}
|
|
3129
3129
|
}
|
|
3130
|
+
/**
|
|
3131
|
+
* Cheap per-page text-vs-OCR classification → JSON
|
|
3132
|
+
* `DocumentClassification`.
|
|
3133
|
+
* @returns {string}
|
|
3134
|
+
*/
|
|
3135
|
+
classifyDocument() {
|
|
3136
|
+
let deferred2_0;
|
|
3137
|
+
let deferred2_1;
|
|
3138
|
+
try {
|
|
3139
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
3140
|
+
wasm.wasmpdfdocument_classifyDocument(retptr, this.__wbg_ptr);
|
|
3141
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
3142
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
3143
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
3144
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
3145
|
+
var ptr1 = r0;
|
|
3146
|
+
var len1 = r1;
|
|
3147
|
+
if (r3) {
|
|
3148
|
+
ptr1 = 0; len1 = 0;
|
|
3149
|
+
throw takeObject(r2);
|
|
3150
|
+
}
|
|
3151
|
+
deferred2_0 = ptr1;
|
|
3152
|
+
deferred2_1 = len1;
|
|
3153
|
+
return getStringFromWasm0(ptr1, len1);
|
|
3154
|
+
} finally {
|
|
3155
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
3156
|
+
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
3157
|
+
}
|
|
3158
|
+
}
|
|
3159
|
+
/**
|
|
3160
|
+
* Cheap per-page classification → JSON `PageClassification`.
|
|
3161
|
+
* @param {number} page_index
|
|
3162
|
+
* @returns {string}
|
|
3163
|
+
*/
|
|
3164
|
+
classifyPage(page_index) {
|
|
3165
|
+
let deferred2_0;
|
|
3166
|
+
let deferred2_1;
|
|
3167
|
+
try {
|
|
3168
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
3169
|
+
wasm.wasmpdfdocument_classifyPage(retptr, this.__wbg_ptr, page_index);
|
|
3170
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
3171
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
3172
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
3173
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
3174
|
+
var ptr1 = r0;
|
|
3175
|
+
var len1 = r1;
|
|
3176
|
+
if (r3) {
|
|
3177
|
+
ptr1 = 0; len1 = 0;
|
|
3178
|
+
throw takeObject(r2);
|
|
3179
|
+
}
|
|
3180
|
+
deferred2_0 = ptr1;
|
|
3181
|
+
deferred2_1 = len1;
|
|
3182
|
+
return getStringFromWasm0(ptr1, len1);
|
|
3183
|
+
} finally {
|
|
3184
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
3185
|
+
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
3186
|
+
}
|
|
3187
|
+
}
|
|
3130
3188
|
/**
|
|
3131
3189
|
* Clear all pending erase operations for a page.
|
|
3132
3190
|
* @param {number} page_index
|
|
@@ -3555,6 +3613,40 @@ export class WasmPdfDocument {
|
|
|
3555
3613
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
3556
3614
|
}
|
|
3557
3615
|
}
|
|
3616
|
+
/**
|
|
3617
|
+
* Rich per-page extraction → JSON `PageExtraction` (per-region
|
|
3618
|
+
* bbox + typed reason). `optionsJson` is `{}`-tolerant
|
|
3619
|
+
* `AutoExtractOptions`; undefined/empty → defaults.
|
|
3620
|
+
* @param {number} page_index
|
|
3621
|
+
* @param {string | null} [options_json]
|
|
3622
|
+
* @returns {string}
|
|
3623
|
+
*/
|
|
3624
|
+
extractPageAuto(page_index, options_json) {
|
|
3625
|
+
let deferred3_0;
|
|
3626
|
+
let deferred3_1;
|
|
3627
|
+
try {
|
|
3628
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
3629
|
+
var ptr0 = isLikeNone(options_json) ? 0 : passStringToWasm0(options_json, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
3630
|
+
var len0 = WASM_VECTOR_LEN;
|
|
3631
|
+
wasm.wasmpdfdocument_extractPageAuto(retptr, this.__wbg_ptr, page_index, ptr0, len0);
|
|
3632
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
3633
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
3634
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
3635
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
3636
|
+
var ptr2 = r0;
|
|
3637
|
+
var len2 = r1;
|
|
3638
|
+
if (r3) {
|
|
3639
|
+
ptr2 = 0; len2 = 0;
|
|
3640
|
+
throw takeObject(r2);
|
|
3641
|
+
}
|
|
3642
|
+
deferred3_0 = ptr2;
|
|
3643
|
+
deferred3_1 = len2;
|
|
3644
|
+
return getStringFromWasm0(ptr2, len2);
|
|
3645
|
+
} finally {
|
|
3646
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
3647
|
+
wasm.__wbindgen_export4(deferred3_0, deferred3_1, 1);
|
|
3648
|
+
}
|
|
3649
|
+
}
|
|
3558
3650
|
/**
|
|
3559
3651
|
* Extract complete page text data in a single call.
|
|
3560
3652
|
*
|
|
@@ -3756,6 +3848,36 @@ export class WasmPdfDocument {
|
|
|
3756
3848
|
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
3757
3849
|
}
|
|
3758
3850
|
}
|
|
3851
|
+
/**
|
|
3852
|
+
* One-shot auto text extraction — graceful native fallback (never
|
|
3853
|
+
* the opaque OCR error #513).
|
|
3854
|
+
* @param {number} page_index
|
|
3855
|
+
* @returns {string}
|
|
3856
|
+
*/
|
|
3857
|
+
extractTextAuto(page_index) {
|
|
3858
|
+
let deferred2_0;
|
|
3859
|
+
let deferred2_1;
|
|
3860
|
+
try {
|
|
3861
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
3862
|
+
wasm.wasmpdfdocument_extractTextAuto(retptr, this.__wbg_ptr, page_index);
|
|
3863
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
3864
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
3865
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
3866
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
3867
|
+
var ptr1 = r0;
|
|
3868
|
+
var len1 = r1;
|
|
3869
|
+
if (r3) {
|
|
3870
|
+
ptr1 = 0; len1 = 0;
|
|
3871
|
+
throw takeObject(r2);
|
|
3872
|
+
}
|
|
3873
|
+
deferred2_0 = ptr1;
|
|
3874
|
+
deferred2_1 = len1;
|
|
3875
|
+
return getStringFromWasm0(ptr1, len1);
|
|
3876
|
+
} finally {
|
|
3877
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
3878
|
+
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
3879
|
+
}
|
|
3880
|
+
}
|
|
3759
3881
|
/**
|
|
3760
3882
|
* Extract text lines from a page.
|
|
3761
3883
|
*
|
|
@@ -6155,6 +6277,36 @@ export function hasDocumentTimestamp(pdf_data) {
|
|
|
6155
6277
|
return ret !== 0;
|
|
6156
6278
|
}
|
|
6157
6279
|
|
|
6280
|
+
/**
|
|
6281
|
+
* #519: Air-gapped OCR model manifest — JSON (detector + every
|
|
6282
|
+
* supported language's cache filenames and source URLs).
|
|
6283
|
+
*
|
|
6284
|
+
* WASM provisioning is **host-side**: browser/WASM has no filesystem
|
|
6285
|
+
* or network-to-disk, so a download-to-cache prefetch cannot run
|
|
6286
|
+
* here. This manifest is informational — it lets the JS host learn
|
|
6287
|
+
* which model files/URLs to fetch and bundle (or ship out of band)
|
|
6288
|
+
* before driving OCR. There is intentionally no `prefetchModels` in
|
|
6289
|
+
* the WASM surface (see `prefetchAvailable`, which always returns
|
|
6290
|
+
* `false`).
|
|
6291
|
+
* @returns {string}
|
|
6292
|
+
*/
|
|
6293
|
+
export function modelManifest() {
|
|
6294
|
+
let deferred1_0;
|
|
6295
|
+
let deferred1_1;
|
|
6296
|
+
try {
|
|
6297
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
6298
|
+
wasm.modelManifest(retptr);
|
|
6299
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
6300
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
6301
|
+
deferred1_0 = r0;
|
|
6302
|
+
deferred1_1 = r1;
|
|
6303
|
+
return getStringFromWasm0(r0, r1);
|
|
6304
|
+
} finally {
|
|
6305
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
6306
|
+
wasm.__wbindgen_export4(deferred1_0, deferred1_1, 1);
|
|
6307
|
+
}
|
|
6308
|
+
}
|
|
6309
|
+
|
|
6158
6310
|
/**
|
|
6159
6311
|
* Plan a bookmark split without producing PDFs. Returns a JSON array
|
|
6160
6312
|
* of segment objects (`index, startPage…` shape from
|
|
@@ -6186,6 +6338,17 @@ export function planSplitByBookmarks(src_bytes, title_prefix, ignore_case, level
|
|
|
6186
6338
|
}
|
|
6187
6339
|
}
|
|
6188
6340
|
|
|
6341
|
+
/**
|
|
6342
|
+
* #519: Whether this build can download OCR models to a local cache.
|
|
6343
|
+
* Always `false` in WASM — provisioning is host-side (see
|
|
6344
|
+
* `modelManifest`).
|
|
6345
|
+
* @returns {boolean}
|
|
6346
|
+
*/
|
|
6347
|
+
export function prefetchAvailable() {
|
|
6348
|
+
const ret = wasm.prefetchAvailable();
|
|
6349
|
+
return ret !== 0;
|
|
6350
|
+
}
|
|
6351
|
+
|
|
6189
6352
|
/**
|
|
6190
6353
|
* Install the process-wide runtime crypto policy from its grammar
|
|
6191
6354
|
* string (`"compat"|"strict"|"fips-strict"[;…]`). Fail-closed:
|
package/web/pdf_oxide_bg.wasm
CHANGED
|
Binary file
|
|
@@ -23,7 +23,9 @@ export const cryptoPolicy: (a: number) => void;
|
|
|
23
23
|
export const generateBarcodeSvg: (a: number, b: number, c: number, d: number) => void;
|
|
24
24
|
export const generateQrSvg: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
25
25
|
export const hasDocumentTimestamp: (a: number, b: number) => number;
|
|
26
|
+
export const modelManifest: (a: number) => void;
|
|
26
27
|
export const planSplitByBookmarks: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number) => void;
|
|
28
|
+
export const prefetchAvailable: () => number;
|
|
27
29
|
export const setCryptoPolicy: (a: number, b: number, c: number) => void;
|
|
28
30
|
export const setLogLevel: (a: number, b: number, c: number) => void;
|
|
29
31
|
export const signPdfBytes: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number) => void;
|
|
@@ -158,6 +160,8 @@ export const wasmpdfdocument_applyAllRedactions: (a: number, b: number) => void;
|
|
|
158
160
|
export const wasmpdfdocument_applyPageRedactions: (a: number, b: number, c: number) => void;
|
|
159
161
|
export const wasmpdfdocument_applyRedactionsDestructive: (a: number, b: number, c: number) => void;
|
|
160
162
|
export const wasmpdfdocument_authenticate: (a: number, b: number, c: number, d: number) => void;
|
|
163
|
+
export const wasmpdfdocument_classifyDocument: (a: number, b: number) => void;
|
|
164
|
+
export const wasmpdfdocument_classifyPage: (a: number, b: number, c: number) => void;
|
|
161
165
|
export const wasmpdfdocument_clearEraseRegions: (a: number, b: number, c: number) => void;
|
|
162
166
|
export const wasmpdfdocument_convertToPdfA: (a: number, b: number, c: number, d: number) => void;
|
|
163
167
|
export const wasmpdfdocument_cropMargins: (a: number, b: number, c: number, d: number, e: number, f: number) => void;
|
|
@@ -177,6 +181,7 @@ export const wasmpdfdocument_extractChars: (a: number, b: number, c: number, d:
|
|
|
177
181
|
export const wasmpdfdocument_extractImageBytes: (a: number, b: number, c: number) => void;
|
|
178
182
|
export const wasmpdfdocument_extractImages: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
179
183
|
export const wasmpdfdocument_extractLines: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
184
|
+
export const wasmpdfdocument_extractPageAuto: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
180
185
|
export const wasmpdfdocument_extractPageText: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
181
186
|
export const wasmpdfdocument_extractPages: (a: number, b: number, c: number, d: number) => void;
|
|
182
187
|
export const wasmpdfdocument_extractPaths: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
@@ -184,6 +189,7 @@ export const wasmpdfdocument_extractRects: (a: number, b: number, c: number, d:
|
|
|
184
189
|
export const wasmpdfdocument_extractSpans: (a: number, b: number, c: number, d: number, e: number, f: number, g: number) => void;
|
|
185
190
|
export const wasmpdfdocument_extractTables: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
186
191
|
export const wasmpdfdocument_extractText: (a: number, b: number, c: number, d: number) => void;
|
|
192
|
+
export const wasmpdfdocument_extractTextAuto: (a: number, b: number, c: number) => void;
|
|
187
193
|
export const wasmpdfdocument_extractTextLines: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
188
194
|
export const wasmpdfdocument_extractTextOcr: (a: number, b: number, c: number, d: number) => void;
|
|
189
195
|
export const wasmpdfdocument_extractWords: (a: number, b: number, c: number, d: number, e: number) => void;
|