@kreuzberg/wasm 4.7.4 → 4.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +90 -4
- package/README.md +3 -3
- package/dist/adapters/wasm-adapter.d.ts.map +1 -1
- package/dist/adapters/wasm-adapter.js +2 -2
- package/dist/adapters/wasm-adapter.js.map +1 -1
- package/dist/index.js +2 -2
- package/dist/index.js.map +1 -1
- package/dist/pkg/LICENSE +90 -4
- package/dist/pkg/README.md +3 -3
- package/dist/pkg/kreuzberg_wasm.js +4 -4
- package/dist/pkg/kreuzberg_wasm_bg.js +4 -4
- package/dist/pkg/kreuzberg_wasm_bg.wasm +0 -0
- package/dist/pkg/kreuzberg_wasm_bg.wasm.d.ts +1 -1
- package/dist/types.d.ts +18 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +7 -7
package/LICENSE
CHANGED
|
@@ -1,7 +1,93 @@
|
|
|
1
|
-
|
|
1
|
+
Elastic License 2.0 (ELv2)
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Copyright 2025-2026 Kreuzberg, Inc.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Acceptance
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
By using the software, you agree to all of the terms and conditions below.
|
|
8
|
+
|
|
9
|
+
Copyright License
|
|
10
|
+
|
|
11
|
+
The licensor grants you a non-exclusive, royalty-free, worldwide,
|
|
12
|
+
non-sublicensable, non-transferable license to use, copy, distribute, make
|
|
13
|
+
available, and prepare derivative works of the software, in each case subject to
|
|
14
|
+
the limitations and conditions below.
|
|
15
|
+
|
|
16
|
+
Limitations
|
|
17
|
+
|
|
18
|
+
You may not provide the software to third parties as a hosted or managed
|
|
19
|
+
service, where the service provides users with access to any substantial set of
|
|
20
|
+
the features or functionality of the software.
|
|
21
|
+
|
|
22
|
+
You may not move, change, disable, or circumvent the license key functionality
|
|
23
|
+
in the software, and you may not remove or obscure any functionality in the
|
|
24
|
+
software that is protected by the license key.
|
|
25
|
+
|
|
26
|
+
You may not alter, remove, or obscure any licensing, copyright, or other notices
|
|
27
|
+
of the licensor in the software. Any use of the licensor's trademarks is subject
|
|
28
|
+
to applicable law.
|
|
29
|
+
|
|
30
|
+
Patents
|
|
31
|
+
|
|
32
|
+
The licensor grants you a license, under any patent claims the licensor can
|
|
33
|
+
license, or becomes able to license, to make, have made, use, sell, offer for
|
|
34
|
+
sale, import and have imported the software, in each case subject to the
|
|
35
|
+
limitations and conditions in this license. This license does not cover any
|
|
36
|
+
patent claims that you cause to be infringed by modifications or additions to the
|
|
37
|
+
software. If you or your company make any written claim that the software
|
|
38
|
+
infringes or contributes to infringement of any patent, your patent license for
|
|
39
|
+
the software granted under these terms ends immediately. If your company makes
|
|
40
|
+
such a claim, your patent license ends immediately for work on behalf of your
|
|
41
|
+
company.
|
|
42
|
+
|
|
43
|
+
Notices
|
|
44
|
+
|
|
45
|
+
You must ensure that anyone who gets a copy of any part of the software from you
|
|
46
|
+
also gets a copy of these terms.
|
|
47
|
+
|
|
48
|
+
If you modify the software, you must include in any modified copies of the
|
|
49
|
+
software prominent notices stating that you have modified the software.
|
|
50
|
+
|
|
51
|
+
No Other Rights
|
|
52
|
+
|
|
53
|
+
These terms do not imply any licenses other than those expressly granted in
|
|
54
|
+
these terms.
|
|
55
|
+
|
|
56
|
+
Termination
|
|
57
|
+
|
|
58
|
+
If you use the software in violation of these terms, such use is not licensed,
|
|
59
|
+
and your licenses will automatically terminate. If the licensor provides you with
|
|
60
|
+
a notice of your violation, and you cease all violation of this license no later
|
|
61
|
+
than 30 days after you receive that notice, your licenses will be reinstated
|
|
62
|
+
retroactively. However, if you violate these terms after such reinstatement, any
|
|
63
|
+
additional violation of these terms will cause your licenses to terminate
|
|
64
|
+
automatically and permanently.
|
|
65
|
+
|
|
66
|
+
No Liability
|
|
67
|
+
|
|
68
|
+
As far as the law allows, the software comes as is, without any warranty or
|
|
69
|
+
condition, and the licensor will not be liable to you for any damages arising out
|
|
70
|
+
of these terms or the use or nature of the software, under any kind of legal
|
|
71
|
+
claim.
|
|
72
|
+
|
|
73
|
+
Definitions
|
|
74
|
+
|
|
75
|
+
The licensor is the entity offering these terms, and the software is the
|
|
76
|
+
software the licensor makes available under these terms, including any portion
|
|
77
|
+
of it.
|
|
78
|
+
|
|
79
|
+
you refers to the individual or entity agreeing to these terms.
|
|
80
|
+
|
|
81
|
+
your company is any legal entity, sole proprietorship, or other kind of
|
|
82
|
+
organization that you work for, plus all organizations that have control over,
|
|
83
|
+
are under the control of, or are under common control with that organization.
|
|
84
|
+
control means ownership of substantially all the assets of an entity, or the
|
|
85
|
+
power to direct its management and policies by vote, contract, or otherwise.
|
|
86
|
+
Control can be direct or indirect.
|
|
87
|
+
|
|
88
|
+
your licenses are all the licenses granted to you for the software under these
|
|
89
|
+
terms.
|
|
90
|
+
|
|
91
|
+
use means anything you do with the software requiring one of your licenses.
|
|
92
|
+
|
|
93
|
+
trademark means trademarks, service marks, and similar rights.
|
package/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.8.1" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
|
@@ -42,7 +42,7 @@
|
|
|
42
42
|
|
|
43
43
|
<!-- Project Info -->
|
|
44
44
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/blob/main/LICENSE">
|
|
45
|
-
<img src="https://img.shields.io/badge/License-
|
|
45
|
+
<img src="https://img.shields.io/badge/License-Elastic--2.0-blue.svg" alt="License">
|
|
46
46
|
</a>
|
|
47
47
|
<a href="https://docs.kreuzberg.dev">
|
|
48
48
|
<img src="https://img.shields.io/badge/docs-kreuzberg.dev-007ec6" alt="Documentation">
|
|
@@ -522,7 +522,7 @@ Contributions are welcome! See [Contributing Guide](https://github.com/kreuzberg
|
|
|
522
522
|
|
|
523
523
|
## License
|
|
524
524
|
|
|
525
|
-
|
|
525
|
+
Elastic License 2.0 (ELv2) - see [LICENSE](../../LICENSE) for details.
|
|
526
526
|
|
|
527
527
|
## Support
|
|
528
528
|
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"wasm-adapter.d.ts","sourceRoot":"","sources":["../../typescript/adapters/wasm-adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,KAAK,EAMX,gBAAgB,EAChB,gBAAgB,EAOhB,MAAM,aAAa,CAAC;AAoCrB;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAsB,gBAAgB,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI,GAAG,OAAO,CAAC,UAAU,CAAC,CAa7E;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAgB,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAsCnF;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,OAAO,GAAG,gBAAgB,
|
|
1
|
+
{"version":3,"file":"wasm-adapter.d.ts","sourceRoot":"","sources":["../../typescript/adapters/wasm-adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,KAAK,EAMX,gBAAgB,EAChB,gBAAgB,EAOhB,MAAM,aAAa,CAAC;AAoCrB;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAsB,gBAAgB,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI,GAAG,OAAO,CAAC,UAAU,CAAC,CAa7E;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAgB,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAsCnF;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,OAAO,GAAG,gBAAgB,CA4QvE;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,GAAG,KAAK,CASpE;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,uBAAuB,CAAC,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI,gBAAgB,CAajF"}
|
|
@@ -76,8 +76,8 @@ function jsToExtractionResult(jsValue) {
|
|
|
76
76
|
for (const table of result.tables) {
|
|
77
77
|
if (table && typeof table === "object") {
|
|
78
78
|
const t = table;
|
|
79
|
-
const pageNumber = typeof t.pageNumber === "number" ? t.pageNumber : typeof t.page_number === "number" ? t.page_number :
|
|
80
|
-
if (Array.isArray(t.cells) && t.cells.every((row) => Array.isArray(row) && row.every((cell) => typeof cell === "string")) && typeof t.markdown === "string"
|
|
79
|
+
const pageNumber = typeof t.pageNumber === "number" ? t.pageNumber : typeof t.page_number === "number" ? t.page_number : 0;
|
|
80
|
+
if (Array.isArray(t.cells) && t.cells.every((row) => Array.isArray(row) && row.every((cell) => typeof cell === "string")) && typeof t.markdown === "string") {
|
|
81
81
|
tables.push({
|
|
82
82
|
cells: t.cells,
|
|
83
83
|
markdown: t.markdown,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../typescript/adapters/wasm-adapter.ts"],"sourcesContent":["/**\n * WASM Type Adapter\n *\n * This module provides type adapters for converting between JavaScript/TypeScript\n * types and WASM-compatible types, handling File/Blob conversions, config normalization,\n * and result parsing.\n *\n * @example File Conversion\n * ```typescript\n * import { fileToUint8Array } from '@kreuzberg/wasm/adapters/wasm-adapter';\n *\n * const file = event.target.files[0];\n * const bytes = await fileToUint8Array(file);\n * const result = await extractBytes(bytes, file.type);\n * ```\n *\n * @example Config Normalization\n * ```typescript\n * import { configToJS } from '@kreuzberg/wasm/adapters/wasm-adapter';\n *\n * const config = {\n * ocr: { backend: 'tesseract', language: 'eng' },\n * chunking: { maxChars: 1000 }\n * };\n * const normalized = configToJS(config);\n * ```\n */\n\nimport type {\n\tChunk,\n\tDocumentStructure,\n\tElement,\n\tExtractedImage,\n\tExtractedKeyword,\n\tExtractionConfig,\n\tExtractionResult,\n\tMetadata,\n\tOcrElement,\n\tPageContent,\n\tPdfAnnotation,\n\tProcessingWarning,\n\tTable,\n} from \"../types.js\";\n\n/**\n * Maximum file size for processing (512 MB)\n *\n * @internal\n */\nconst MAX_FILE_SIZE = 512 * 1024 * 1024;\n\n/**\n * Type predicate to validate numeric value or null\n *\n * @internal\n */\nfunction isNumberOrNull(value: unknown): value is number | null {\n\treturn typeof value === \"number\" || value === null || value === undefined;\n}\n\n/**\n * Type predicate to validate string value or null\n *\n * @internal\n */\nfunction isStringOrNull(value: unknown): value is string | null {\n\treturn typeof value === \"string\" || value === null || value === undefined;\n}\n\n/**\n * Type predicate to validate boolean value\n *\n * @internal\n */\nfunction isBoolean(value: unknown): value is boolean {\n\treturn typeof value === \"boolean\" || value === undefined;\n}\n\n/**\n * Convert a File or Blob to Uint8Array\n *\n * Handles both browser File API and server-side Blob-like objects,\n * providing a unified interface for reading binary data.\n *\n * @param file - The File or Blob to convert\n * @returns Promise resolving to the byte array\n * @throws {Error} If the file cannot be read or exceeds size limit\n *\n * @example\n * ```typescript\n * const file = document.getElementById('input').files[0];\n * const bytes = await fileToUint8Array(file);\n * const result = await extractBytes(bytes, 'application/pdf');\n * ```\n */\nexport async function fileToUint8Array(file: File | Blob): Promise<Uint8Array> {\n\ttry {\n\t\tif (file.size > MAX_FILE_SIZE) {\n\t\t\tthrow new Error(\n\t\t\t\t`File size (${file.size} bytes) exceeds maximum (${MAX_FILE_SIZE} bytes). Maximum file size is 512 MB.`,\n\t\t\t);\n\t\t}\n\n\t\tconst arrayBuffer = await file.arrayBuffer();\n\t\treturn new Uint8Array(arrayBuffer);\n\t} catch (error) {\n\t\tthrow new Error(`Failed to read file: ${error instanceof Error ? error.message : String(error)}`);\n\t}\n}\n\n/**\n * Normalize ExtractionConfig for WASM processing\n *\n * Converts TypeScript configuration objects to a WASM-compatible format,\n * handling null values, undefined properties, and nested structures.\n *\n * @param config - The extraction configuration or null\n * @returns Normalized configuration object suitable for WASM\n *\n * @example\n * ```typescript\n * const config: ExtractionConfig = {\n * ocr: { backend: 'tesseract' },\n * chunking: { maxChars: 1000 }\n * };\n * const wasmConfig = configToJS(config);\n * ```\n */\nexport function configToJS(config: ExtractionConfig | null): Record<string, unknown> {\n\tif (!config) {\n\t\treturn {};\n\t}\n\n\t// Convert camelCase key to snake_case to match Rust serde field names.\n\tconst toSnakeCase = (str: string): string => str.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);\n\n\tconst normalizeValue = (value: unknown): unknown => {\n\t\tif (value === null || value === undefined) {\n\t\t\treturn null;\n\t\t}\n\t\tif (typeof value === \"object\") {\n\t\t\tif (Array.isArray(value)) {\n\t\t\t\treturn value.map(normalizeValue);\n\t\t\t}\n\t\t\tconst obj = value as Record<string, unknown>;\n\t\t\tconst normalized: Record<string, unknown> = {};\n\t\t\tfor (const [key, val] of Object.entries(obj)) {\n\t\t\t\tconst normalizedVal = normalizeValue(val);\n\t\t\t\tif (normalizedVal !== null && normalizedVal !== undefined) {\n\t\t\t\t\tnormalized[toSnakeCase(key)] = normalizedVal;\n\t\t\t\t}\n\t\t\t}\n\t\t\treturn Object.keys(normalized).length > 0 ? normalized : null;\n\t\t}\n\t\treturn value;\n\t};\n\n\tconst normalized: Record<string, unknown> = {};\n\tfor (const [key, value] of Object.entries(config)) {\n\t\tconst normalizedValue = normalizeValue(value);\n\t\tif (normalizedValue !== null && normalizedValue !== undefined) {\n\t\t\tnormalized[toSnakeCase(key)] = normalizedValue;\n\t\t}\n\t}\n\n\treturn normalized;\n}\n\n/**\n * Parse WASM extraction result and convert to TypeScript type\n *\n * Handles conversion of WASM-returned objects to proper ExtractionResult types,\n * including proper array conversions and type assertions for tables, chunks, and images.\n *\n * @param jsValue - The raw WASM result value\n * @returns Properly typed ExtractionResult\n * @throws {Error} If the result structure is invalid\n *\n * @example\n * ```typescript\n * const wasmResult = await wasmExtract(bytes, mimeType, config);\n * const result = jsToExtractionResult(wasmResult);\n * console.log(result.content);\n * ```\n */\nexport function jsToExtractionResult(jsValue: unknown): ExtractionResult {\n\tif (!jsValue || typeof jsValue !== \"object\") {\n\t\tthrow new Error(\"Invalid extraction result: value is not an object\");\n\t}\n\n\tconst result = jsValue as Record<string, unknown>;\n\tconst mimeType =\n\t\ttypeof result.mimeType === \"string\"\n\t\t\t? result.mimeType\n\t\t\t: typeof result.mime_type === \"string\"\n\t\t\t\t? result.mime_type\n\t\t\t\t: null;\n\n\tif (typeof result.content !== \"string\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid content\");\n\t}\n\tif (typeof mimeType !== \"string\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid mimeType\");\n\t}\n\tif (!result.metadata || typeof result.metadata !== \"object\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid metadata\");\n\t}\n\n\tconst tables: Table[] = [];\n\tif (Array.isArray(result.tables)) {\n\t\tfor (const table of result.tables) {\n\t\t\tif (table && typeof table === \"object\") {\n\t\t\t\tconst t = table as Record<string, unknown>;\n\t\t\t\tconst pageNumber =\n\t\t\t\t\ttypeof t.pageNumber === \"number\" ? t.pageNumber : typeof t.page_number === \"number\" ? t.page_number : null;\n\t\t\t\tif (\n\t\t\t\t\tArray.isArray(t.cells) &&\n\t\t\t\t\tt.cells.every((row) => Array.isArray(row) && row.every((cell) => typeof cell === \"string\")) &&\n\t\t\t\t\ttypeof t.markdown === \"string\" &&\n\t\t\t\t\tpageNumber !== null\n\t\t\t\t) {\n\t\t\t\t\ttables.push({\n\t\t\t\t\t\tcells: t.cells as string[][],\n\t\t\t\t\t\tmarkdown: t.markdown,\n\t\t\t\t\t\tpageNumber,\n\t\t\t\t\t});\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\tconst chunks: Chunk[] | null = Array.isArray(result.chunks)\n\t\t? result.chunks.map((chunk) => {\n\t\t\t\tif (!chunk || typeof chunk !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk structure\");\n\t\t\t\t}\n\t\t\t\tconst c = chunk as Record<string, unknown>;\n\t\t\t\tif (typeof c.content !== \"string\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk: missing content\");\n\t\t\t\t}\n\t\t\t\tif (!c.metadata || typeof c.metadata !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk: missing metadata\");\n\t\t\t\t}\n\t\t\t\tconst metadata = c.metadata as Record<string, unknown>;\n\n\t\t\t\tlet embedding: number[] | null = null;\n\t\t\t\tif (Array.isArray(c.embedding)) {\n\t\t\t\t\tif (!c.embedding.every((item) => typeof item === \"number\")) {\n\t\t\t\t\t\tthrow new Error(\"Invalid chunk: embedding must contain only numbers\");\n\t\t\t\t\t}\n\t\t\t\t\tembedding = c.embedding;\n\t\t\t\t}\n\n\t\t\t\t// Coerce numeric values - handle BigInt, strings, and numbers\n\t\t\t\tconst coerceToNumber = (value: unknown, fieldName: string): number => {\n\t\t\t\t\tif (typeof value === \"number\") {\n\t\t\t\t\t\treturn value;\n\t\t\t\t\t}\n\t\t\t\t\tif (typeof value === \"bigint\") {\n\t\t\t\t\t\treturn Number(value);\n\t\t\t\t\t}\n\t\t\t\t\tif (typeof value === \"string\") {\n\t\t\t\t\t\tconst parsed = parseInt(value, 10);\n\t\t\t\t\t\tif (Number.isNaN(parsed)) {\n\t\t\t\t\t\t\tthrow new Error(`Invalid chunk metadata: ${fieldName} must be a valid number, got \"${value}\"`);\n\t\t\t\t\t\t}\n\t\t\t\t\t\treturn parsed;\n\t\t\t\t\t}\n\t\t\t\t\tthrow new Error(`Invalid chunk metadata: ${fieldName} must be a number, got ${typeof value}`);\n\t\t\t\t};\n\n\t\t\t\t// The Rust code uses snake_case field names (byte_start, byte_end, etc)\n\t\t\t\t// but TypeScript expects camelCase (charStart, charEnd, etc)\n\t\t\t\t// For now, treat byte offsets as character offsets since the content is UTF-8\n\t\t\t\tconst charStart = coerceToNumber(\n\t\t\t\t\tmetadata.charStart ?? metadata.char_start ?? metadata.byteStart ?? metadata.byte_start,\n\t\t\t\t\t\"charStart\",\n\t\t\t\t);\n\t\t\t\tconst charEnd = coerceToNumber(\n\t\t\t\t\tmetadata.charEnd ?? metadata.char_end ?? metadata.byteEnd ?? metadata.byte_end,\n\t\t\t\t\t\"charEnd\",\n\t\t\t\t);\n\t\t\t\tconst chunkIndex = coerceToNumber(metadata.chunkIndex ?? metadata.chunk_index, \"chunkIndex\");\n\t\t\t\tconst totalChunks = coerceToNumber(metadata.totalChunks ?? metadata.total_chunks, \"totalChunks\");\n\n\t\t\t\tlet tokenCount: number | null = null;\n\t\t\t\tconst tokenCountValue = metadata.tokenCount ?? metadata.token_count;\n\t\t\t\tif (tokenCountValue !== null && tokenCountValue !== undefined) {\n\t\t\t\t\ttokenCount = coerceToNumber(tokenCountValue, \"tokenCount\");\n\t\t\t\t}\n\n\t\t\t\tlet firstPage: number | null = null;\n\t\t\t\tconst firstPageValue = metadata.firstPage ?? metadata.first_page;\n\t\t\t\tif (firstPageValue !== null && firstPageValue !== undefined) {\n\t\t\t\t\tfirstPage = coerceToNumber(firstPageValue, \"firstPage\");\n\t\t\t\t}\n\n\t\t\t\tlet lastPage: number | null = null;\n\t\t\t\tconst lastPageValue = metadata.lastPage ?? metadata.last_page;\n\t\t\t\tif (lastPageValue !== null && lastPageValue !== undefined) {\n\t\t\t\t\tlastPage = coerceToNumber(lastPageValue, \"lastPage\");\n\t\t\t\t}\n\n\t\t\t\tconst rawHc = (metadata[\"heading_context\"] ?? metadata[\"headingContext\"]) as\n\t\t\t\t\t| Record<string, unknown>\n\t\t\t\t\t| null\n\t\t\t\t\t| undefined;\n\t\t\t\tlet headingContext: import(\"../types.js\").HeadingContext | null = null;\n\t\t\t\tif (rawHc && typeof rawHc === \"object\") {\n\t\t\t\t\tconst rawHeadings = rawHc[\"headings\"];\n\t\t\t\t\tif (Array.isArray(rawHeadings)) {\n\t\t\t\t\t\theadingContext = {\n\t\t\t\t\t\t\theadings: rawHeadings.map((h: unknown) => {\n\t\t\t\t\t\t\t\tconst heading = h as Record<string, unknown>;\n\t\t\t\t\t\t\t\treturn {\n\t\t\t\t\t\t\t\t\tlevel: (heading[\"level\"] as number) ?? 0,\n\t\t\t\t\t\t\t\t\ttext: (heading[\"text\"] as string) ?? \"\",\n\t\t\t\t\t\t\t\t};\n\t\t\t\t\t\t\t}),\n\t\t\t\t\t\t};\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\treturn {\n\t\t\t\t\tcontent: c.content,\n\t\t\t\t\tembedding,\n\t\t\t\t\tmetadata: {\n\t\t\t\t\t\tbyteStart: charStart,\n\t\t\t\t\t\tbyteEnd: charEnd,\n\t\t\t\t\t\tcharStart,\n\t\t\t\t\t\tcharEnd,\n\t\t\t\t\t\ttokenCount,\n\t\t\t\t\t\tchunkIndex,\n\t\t\t\t\t\ttotalChunks,\n\t\t\t\t\t\tfirstPage,\n\t\t\t\t\t\tlastPage,\n\t\t\t\t\t\theadingContext,\n\t\t\t\t\t},\n\t\t\t\t};\n\t\t\t})\n\t\t: null;\n\n\tconst images: ExtractedImage[] | null = Array.isArray(result.images)\n\t\t? result.images.map((image) => {\n\t\t\t\tif (!image || typeof image !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid image structure\");\n\t\t\t\t}\n\t\t\t\tconst img = image as Record<string, unknown>;\n\t\t\t\tlet imageData: Uint8Array;\n\t\t\t\tif (img.data instanceof Uint8Array) {\n\t\t\t\t\timageData = img.data;\n\t\t\t\t} else if (Array.isArray(img.data)) {\n\t\t\t\t\timageData = new Uint8Array(img.data as number[]);\n\t\t\t\t} else {\n\t\t\t\t\tthrow new Error(\"Invalid image: data must be Uint8Array or array\");\n\t\t\t\t}\n\t\t\t\tif (typeof img.format !== \"string\") {\n\t\t\t\t\tthrow new Error(\"Invalid image: missing format\");\n\t\t\t\t}\n\n\t\t\t\t// Support both camelCase and snake_case field names (Rust serde uses snake_case)\n\t\t\t\tconst imageIndex = img.imageIndex ?? img.image_index;\n\t\t\t\tconst pageNumber = img.pageNumber ?? img.page_number;\n\t\t\t\tconst bitsPerComponent = img.bitsPerComponent ?? img.bits_per_component;\n\t\t\t\tconst isMask = img.isMask ?? img.is_mask;\n\t\t\t\tconst ocrResult = img.ocrResult ?? img.ocr_result;\n\n\t\t\t\tif (typeof imageIndex !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid image: imageIndex must be a number\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(pageNumber)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: pageNumber must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.width)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: width must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.height)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: height must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(bitsPerComponent)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: bitsPerComponent must be a number or null\");\n\t\t\t\t}\n\n\t\t\t\tif (!isBoolean(isMask)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: isMask must be a boolean\");\n\t\t\t\t}\n\n\t\t\t\tif (!isStringOrNull(img.colorspace)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: colorspace must be a string or null\");\n\t\t\t\t}\n\t\t\t\tif (!isStringOrNull(img.description)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: description must be a string or null\");\n\t\t\t\t}\n\n\t\t\t\treturn {\n\t\t\t\t\tdata: imageData,\n\t\t\t\t\tformat: img.format,\n\t\t\t\t\timageIndex: imageIndex,\n\t\t\t\t\tpageNumber: pageNumber ?? null,\n\t\t\t\t\twidth: (img.width as number) ?? null,\n\t\t\t\t\theight: (img.height as number) ?? null,\n\t\t\t\t\tcolorspace: (img.colorspace as string) ?? null,\n\t\t\t\t\tbitsPerComponent: bitsPerComponent ?? null,\n\t\t\t\t\tisMask: isMask ?? false,\n\t\t\t\t\tdescription: (img.description as string) ?? null,\n\t\t\t\t\tocrResult: ocrResult ? jsToExtractionResult(ocrResult) : null,\n\t\t\t\t};\n\t\t\t})\n\t\t: null;\n\n\tlet detectedLanguages: string[] | null = null;\n\tconst detectedLanguagesRaw = Array.isArray(result.detectedLanguages)\n\t\t? result.detectedLanguages\n\t\t: result.detected_languages;\n\tif (Array.isArray(detectedLanguagesRaw)) {\n\t\tif (!detectedLanguagesRaw.every((lang) => typeof lang === \"string\")) {\n\t\t\tthrow new Error(\"Invalid result: detectedLanguages must contain only strings\");\n\t\t}\n\t\tdetectedLanguages = detectedLanguagesRaw;\n\t}\n\n\tconst extractedKeywords = (result.extractedKeywords ?? result.extracted_keywords ?? null) as\n\t\t| ExtractedKeyword[]\n\t\t| null;\n\tconst qualityScore =\n\t\ttypeof (result.qualityScore ?? result.quality_score) === \"number\"\n\t\t\t? ((result.qualityScore ?? result.quality_score) as number)\n\t\t\t: null;\n\tconst processingWarnings = (result.processingWarnings ?? result.processing_warnings ?? null) as\n\t\t| ProcessingWarning[]\n\t\t| null;\n\tconst elements = (result.elements ?? null) as Element[] | null;\n\tconst ocrElements = (result.ocrElements ?? result.ocr_elements ?? null) as OcrElement[] | null;\n\tconst document = (result.document ?? null) as DocumentStructure | null;\n\tconst pages = (result.pages ?? null) as PageContent[] | null;\n\tconst annotations = (result.annotations ?? null) as PdfAnnotation[] | null;\n\n\treturn {\n\t\tcontent: result.content,\n\t\tmimeType,\n\t\tmetadata: (result.metadata ?? {}) as Metadata,\n\t\ttables,\n\t\tdetectedLanguages,\n\t\tchunks,\n\t\timages,\n\t\tpages,\n\t\textractedKeywords,\n\t\tqualityScore,\n\t\tprocessingWarnings,\n\t\telements,\n\t\tocrElements,\n\t\tdocument,\n\t\tannotations,\n\t};\n}\n\n/**\n * Wrap and format WASM errors with context\n *\n * Converts WASM error messages to JavaScript Error objects with proper context\n * and stack trace information when available.\n *\n * @param error - The error from WASM\n * @param context - Additional context about what operation failed\n * @returns A formatted Error object\n *\n * @internal\n *\n * @example\n * ```typescript\n * try {\n * await wasmExtract(bytes, mimeType);\n * } catch (error) {\n * throw wrapWasmError(error, 'extracting document');\n * }\n * ```\n */\nexport function wrapWasmError(error: unknown, context: string): Error {\n\tif (error instanceof Error) {\n\t\treturn new Error(`Error ${context}: ${error.message}`, {\n\t\t\tcause: error,\n\t\t});\n\t}\n\n\tconst message = String(error);\n\treturn new Error(`Error ${context}: ${message}`);\n}\n\n/**\n * Validate that a WASM-returned value conforms to ExtractionResult structure\n *\n * Performs structural validation without full type checking,\n * useful for runtime validation of WASM output.\n *\n * @param value - The value to validate\n * @returns True if value appears to be a valid ExtractionResult\n *\n * @internal\n */\nexport function isValidExtractionResult(value: unknown): value is ExtractionResult {\n\tif (!value || typeof value !== \"object\") {\n\t\treturn false;\n\t}\n\n\tconst obj = value as Record<string, unknown>;\n\treturn (\n\t\ttypeof obj.content === \"string\" &&\n\t\t(typeof obj.mimeType === \"string\" || typeof obj.mime_type === \"string\") &&\n\t\tobj.metadata !== null &&\n\t\ttypeof obj.metadata === \"object\" &&\n\t\tArray.isArray(obj.tables)\n\t);\n}\n"],"mappings":";AAiDA,IAAM,gBAAgB,MAAM,OAAO;AAOnC,SAAS,eAAe,OAAwC;AAC/D,SAAO,OAAO,UAAU,YAAY,UAAU,QAAQ,UAAU;AACjE;AAOA,SAAS,eAAe,OAAwC;AAC/D,SAAO,OAAO,UAAU,YAAY,UAAU,QAAQ,UAAU;AACjE;AAOA,SAAS,UAAU,OAAkC;AACpD,SAAO,OAAO,UAAU,aAAa,UAAU;AAChD;AAmBA,eAAsB,iBAAiB,MAAwC;AAC9E,MAAI;AACH,QAAI,KAAK,OAAO,eAAe;AAC9B,YAAM,IAAI;AAAA,QACT,cAAc,KAAK,IAAI,4BAA4B,aAAa;AAAA,MACjE;AAAA,IACD;AAEA,UAAM,cAAc,MAAM,KAAK,YAAY;AAC3C,WAAO,IAAI,WAAW,WAAW;AAAA,EAClC,SAAS,OAAO;AACf,UAAM,IAAI,MAAM,wBAAwB,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC,EAAE;AAAA,EACjG;AACD;AAoBO,SAAS,WAAW,QAA0D;AACpF,MAAI,CAAC,QAAQ;AACZ,WAAO,CAAC;AAAA,EACT;AAGA,QAAM,cAAc,CAAC,QAAwB,IAAI,QAAQ,UAAU,CAAC,WAAW,IAAI,OAAO,YAAY,CAAC,EAAE;AAEzG,QAAM,iBAAiB,CAAC,UAA4B;AACnD,QAAI,UAAU,QAAQ,UAAU,QAAW;AAC1C,aAAO;AAAA,IACR;AACA,QAAI,OAAO,UAAU,UAAU;AAC9B,UAAI,MAAM,QAAQ,KAAK,GAAG;AACzB,eAAO,MAAM,IAAI,cAAc;AAAA,MAChC;AACA,YAAM,MAAM;AACZ,YAAMA,cAAsC,CAAC;AAC7C,iBAAW,CAAC,KAAK,GAAG,KAAK,OAAO,QAAQ,GAAG,GAAG;AAC7C,cAAM,gBAAgB,eAAe,GAAG;AACxC,YAAI,kBAAkB,QAAQ,kBAAkB,QAAW;AAC1D,UAAAA,YAAW,YAAY,GAAG,CAAC,IAAI;AAAA,QAChC;AAAA,MACD;AACA,aAAO,OAAO,KAAKA,WAAU,EAAE,SAAS,IAAIA,cAAa;AAAA,IAC1D;AACA,WAAO;AAAA,EACR;AAEA,QAAM,aAAsC,CAAC;AAC7C,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,MAAM,GAAG;AAClD,UAAM,kBAAkB,eAAe,KAAK;AAC5C,QAAI,oBAAoB,QAAQ,oBAAoB,QAAW;AAC9D,iBAAW,YAAY,GAAG,CAAC,IAAI;AAAA,IAChC;AAAA,EACD;AAEA,SAAO;AACR;AAmBO,SAAS,qBAAqB,SAAoC;AACxE,MAAI,CAAC,WAAW,OAAO,YAAY,UAAU;AAC5C,UAAM,IAAI,MAAM,mDAAmD;AAAA,EACpE;AAEA,QAAM,SAAS;AACf,QAAM,WACL,OAAO,OAAO,aAAa,WACxB,OAAO,WACP,OAAO,OAAO,cAAc,WAC3B,OAAO,YACP;AAEL,MAAI,OAAO,OAAO,YAAY,UAAU;AACvC,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACxE;AACA,MAAI,OAAO,aAAa,UAAU;AACjC,UAAM,IAAI,MAAM,wDAAwD;AAAA,EACzE;AACA,MAAI,CAAC,OAAO,YAAY,OAAO,OAAO,aAAa,UAAU;AAC5D,UAAM,IAAI,MAAM,wDAAwD;AAAA,EACzE;AAEA,QAAM,SAAkB,CAAC;AACzB,MAAI,MAAM,QAAQ,OAAO,MAAM,GAAG;AACjC,eAAW,SAAS,OAAO,QAAQ;AAClC,UAAI,SAAS,OAAO,UAAU,UAAU;AACvC,cAAM,IAAI;AACV,cAAM,aACL,OAAO,EAAE,eAAe,WAAW,EAAE,aAAa,OAAO,EAAE,gBAAgB,WAAW,EAAE,cAAc;AACvG,YACC,MAAM,QAAQ,EAAE,KAAK,KACrB,EAAE,MAAM,MAAM,CAAC,QAAQ,MAAM,QAAQ,GAAG,KAAK,IAAI,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,CAAC,KAC1F,OAAO,EAAE,aAAa,YACtB,eAAe,MACd;AACD,iBAAO,KAAK;AAAA,YACX,OAAO,EAAE;AAAA,YACT,UAAU,EAAE;AAAA,YACZ;AAAA,UACD,CAAC;AAAA,QACF;AAAA,MACD;AAAA,IACD;AAAA,EACD;AAEA,QAAM,SAAyB,MAAM,QAAQ,OAAO,MAAM,IACvD,OAAO,OAAO,IAAI,CAAC,UAAU;AAC7B,QAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC1C;AACA,UAAM,IAAI;AACV,QAAI,OAAO,EAAE,YAAY,UAAU;AAClC,YAAM,IAAI,MAAM,gCAAgC;AAAA,IACjD;AACA,QAAI,CAAC,EAAE,YAAY,OAAO,EAAE,aAAa,UAAU;AAClD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IAClD;AACA,UAAM,WAAW,EAAE;AAEnB,QAAI,YAA6B;AACjC,QAAI,MAAM,QAAQ,EAAE,SAAS,GAAG;AAC/B,UAAI,CAAC,EAAE,UAAU,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,GAAG;AAC3D,cAAM,IAAI,MAAM,oDAAoD;AAAA,MACrE;AACA,kBAAY,EAAE;AAAA,IACf;AAGA,UAAM,iBAAiB,CAAC,OAAgB,cAA8B;AACrE,UAAI,OAAO,UAAU,UAAU;AAC9B,eAAO;AAAA,MACR;AACA,UAAI,OAAO,UAAU,UAAU;AAC9B,eAAO,OAAO,KAAK;AAAA,MACpB;AACA,UAAI,OAAO,UAAU,UAAU;AAC9B,cAAM,SAAS,SAAS,OAAO,EAAE;AACjC,YAAI,OAAO,MAAM,MAAM,GAAG;AACzB,gBAAM,IAAI,MAAM,2BAA2B,SAAS,iCAAiC,KAAK,GAAG;AAAA,QAC9F;AACA,eAAO;AAAA,MACR;AACA,YAAM,IAAI,MAAM,2BAA2B,SAAS,0BAA0B,OAAO,KAAK,EAAE;AAAA,IAC7F;AAKA,UAAM,YAAY;AAAA,MACjB,SAAS,aAAa,SAAS,cAAc,SAAS,aAAa,SAAS;AAAA,MAC5E;AAAA,IACD;AACA,UAAM,UAAU;AAAA,MACf,SAAS,WAAW,SAAS,YAAY,SAAS,WAAW,SAAS;AAAA,MACtE;AAAA,IACD;AACA,UAAM,aAAa,eAAe,SAAS,cAAc,SAAS,aAAa,YAAY;AAC3F,UAAM,cAAc,eAAe,SAAS,eAAe,SAAS,cAAc,aAAa;AAE/F,QAAI,aAA4B;AAChC,UAAM,kBAAkB,SAAS,cAAc,SAAS;AACxD,QAAI,oBAAoB,QAAQ,oBAAoB,QAAW;AAC9D,mBAAa,eAAe,iBAAiB,YAAY;AAAA,IAC1D;AAEA,QAAI,YAA2B;AAC/B,UAAM,iBAAiB,SAAS,aAAa,SAAS;AACtD,QAAI,mBAAmB,QAAQ,mBAAmB,QAAW;AAC5D,kBAAY,eAAe,gBAAgB,WAAW;AAAA,IACvD;AAEA,QAAI,WAA0B;AAC9B,UAAM,gBAAgB,SAAS,YAAY,SAAS;AACpD,QAAI,kBAAkB,QAAQ,kBAAkB,QAAW;AAC1D,iBAAW,eAAe,eAAe,UAAU;AAAA,IACpD;AAEA,UAAM,QAAS,SAAS,iBAAiB,KAAK,SAAS,gBAAgB;AAIvE,QAAI,iBAA8D;AAClE,QAAI,SAAS,OAAO,UAAU,UAAU;AACvC,YAAM,cAAc,MAAM,UAAU;AACpC,UAAI,MAAM,QAAQ,WAAW,GAAG;AAC/B,yBAAiB;AAAA,UAChB,UAAU,YAAY,IAAI,CAAC,MAAe;AACzC,kBAAM,UAAU;AAChB,mBAAO;AAAA,cACN,OAAQ,QAAQ,OAAO,KAAgB;AAAA,cACvC,MAAO,QAAQ,MAAM,KAAgB;AAAA,YACtC;AAAA,UACD,CAAC;AAAA,QACF;AAAA,MACD;AAAA,IACD;AAEA,WAAO;AAAA,MACN,SAAS,EAAE;AAAA,MACX;AAAA,MACA,UAAU;AAAA,QACT,WAAW;AAAA,QACX,SAAS;AAAA,QACT;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACD;AAAA,IACD;AAAA,EACD,CAAC,IACA;AAEH,QAAM,SAAkC,MAAM,QAAQ,OAAO,MAAM,IAChE,OAAO,OAAO,IAAI,CAAC,UAAU;AAC7B,QAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC1C;AACA,UAAM,MAAM;AACZ,QAAI;AACJ,QAAI,IAAI,gBAAgB,YAAY;AACnC,kBAAY,IAAI;AAAA,IACjB,WAAW,MAAM,QAAQ,IAAI,IAAI,GAAG;AACnC,kBAAY,IAAI,WAAW,IAAI,IAAgB;AAAA,IAChD,OAAO;AACN,YAAM,IAAI,MAAM,iDAAiD;AAAA,IAClE;AACA,QAAI,OAAO,IAAI,WAAW,UAAU;AACnC,YAAM,IAAI,MAAM,+BAA+B;AAAA,IAChD;AAGA,UAAM,aAAa,IAAI,cAAc,IAAI;AACzC,UAAM,aAAa,IAAI,cAAc,IAAI;AACzC,UAAM,mBAAmB,IAAI,oBAAoB,IAAI;AACrD,UAAM,SAAS,IAAI,UAAU,IAAI;AACjC,UAAM,YAAY,IAAI,aAAa,IAAI;AAEvC,QAAI,OAAO,eAAe,UAAU;AACnC,YAAM,IAAI,MAAM,4CAA4C;AAAA,IAC7D;AACA,QAAI,CAAC,eAAe,UAAU,GAAG;AAChC,YAAM,IAAI,MAAM,oDAAoD;AAAA,IACrE;AACA,QAAI,CAAC,eAAe,IAAI,KAAK,GAAG;AAC/B,YAAM,IAAI,MAAM,+CAA+C;AAAA,IAChE;AACA,QAAI,CAAC,eAAe,IAAI,MAAM,GAAG;AAChC,YAAM,IAAI,MAAM,gDAAgD;AAAA,IACjE;AACA,QAAI,CAAC,eAAe,gBAAgB,GAAG;AACtC,YAAM,IAAI,MAAM,0DAA0D;AAAA,IAC3E;AAEA,QAAI,CAAC,UAAU,MAAM,GAAG;AACvB,YAAM,IAAI,MAAM,yCAAyC;AAAA,IAC1D;AAEA,QAAI,CAAC,eAAe,IAAI,UAAU,GAAG;AACpC,YAAM,IAAI,MAAM,oDAAoD;AAAA,IACrE;AACA,QAAI,CAAC,eAAe,IAAI,WAAW,GAAG;AACrC,YAAM,IAAI,MAAM,qDAAqD;AAAA,IACtE;AAEA,WAAO;AAAA,MACN,MAAM;AAAA,MACN,QAAQ,IAAI;AAAA,MACZ;AAAA,MACA,YAAY,cAAc;AAAA,MAC1B,OAAQ,IAAI,SAAoB;AAAA,MAChC,QAAS,IAAI,UAAqB;AAAA,MAClC,YAAa,IAAI,cAAyB;AAAA,MAC1C,kBAAkB,oBAAoB;AAAA,MACtC,QAAQ,UAAU;AAAA,MAClB,aAAc,IAAI,eAA0B;AAAA,MAC5C,WAAW,YAAY,qBAAqB,SAAS,IAAI;AAAA,IAC1D;AAAA,EACD,CAAC,IACA;AAEH,MAAI,oBAAqC;AACzC,QAAM,uBAAuB,MAAM,QAAQ,OAAO,iBAAiB,IAChE,OAAO,oBACP,OAAO;AACV,MAAI,MAAM,QAAQ,oBAAoB,GAAG;AACxC,QAAI,CAAC,qBAAqB,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,GAAG;AACpE,YAAM,IAAI,MAAM,6DAA6D;AAAA,IAC9E;AACA,wBAAoB;AAAA,EACrB;AAEA,QAAM,oBAAqB,OAAO,qBAAqB,OAAO,sBAAsB;AAGpF,QAAM,eACL,QAAQ,OAAO,gBAAgB,OAAO,mBAAmB,WACpD,OAAO,gBAAgB,OAAO,gBAChC;AACJ,QAAM,qBAAsB,OAAO,sBAAsB,OAAO,uBAAuB;AAGvF,QAAM,WAAY,OAAO,YAAY;AACrC,QAAM,cAAe,OAAO,eAAe,OAAO,gBAAgB;AAClE,QAAM,WAAY,OAAO,YAAY;AACrC,QAAM,QAAS,OAAO,SAAS;AAC/B,QAAM,cAAe,OAAO,eAAe;AAE3C,SAAO;AAAA,IACN,SAAS,OAAO;AAAA,IAChB;AAAA,IACA,UAAW,OAAO,YAAY,CAAC;AAAA,IAC/B;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACD;AACD;AAuBO,SAAS,cAAc,OAAgB,SAAwB;AACrE,MAAI,iBAAiB,OAAO;AAC3B,WAAO,IAAI,MAAM,SAAS,OAAO,KAAK,MAAM,OAAO,IAAI;AAAA,MACtD,OAAO;AAAA,IACR,CAAC;AAAA,EACF;AAEA,QAAM,UAAU,OAAO,KAAK;AAC5B,SAAO,IAAI,MAAM,SAAS,OAAO,KAAK,OAAO,EAAE;AAChD;AAaO,SAAS,wBAAwB,OAA2C;AAClF,MAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,WAAO;AAAA,EACR;AAEA,QAAM,MAAM;AACZ,SACC,OAAO,IAAI,YAAY,aACtB,OAAO,IAAI,aAAa,YAAY,OAAO,IAAI,cAAc,aAC9D,IAAI,aAAa,QACjB,OAAO,IAAI,aAAa,YACxB,MAAM,QAAQ,IAAI,MAAM;AAE1B;","names":["normalized"]}
|
|
1
|
+
{"version":3,"sources":["../../typescript/adapters/wasm-adapter.ts"],"sourcesContent":["/**\n * WASM Type Adapter\n *\n * This module provides type adapters for converting between JavaScript/TypeScript\n * types and WASM-compatible types, handling File/Blob conversions, config normalization,\n * and result parsing.\n *\n * @example File Conversion\n * ```typescript\n * import { fileToUint8Array } from '@kreuzberg/wasm/adapters/wasm-adapter';\n *\n * const file = event.target.files[0];\n * const bytes = await fileToUint8Array(file);\n * const result = await extractBytes(bytes, file.type);\n * ```\n *\n * @example Config Normalization\n * ```typescript\n * import { configToJS } from '@kreuzberg/wasm/adapters/wasm-adapter';\n *\n * const config = {\n * ocr: { backend: 'tesseract', language: 'eng' },\n * chunking: { maxChars: 1000 }\n * };\n * const normalized = configToJS(config);\n * ```\n */\n\nimport type {\n\tChunk,\n\tDocumentStructure,\n\tElement,\n\tExtractedImage,\n\tExtractedKeyword,\n\tExtractionConfig,\n\tExtractionResult,\n\tMetadata,\n\tOcrElement,\n\tPageContent,\n\tPdfAnnotation,\n\tProcessingWarning,\n\tTable,\n} from \"../types.js\";\n\n/**\n * Maximum file size for processing (512 MB)\n *\n * @internal\n */\nconst MAX_FILE_SIZE = 512 * 1024 * 1024;\n\n/**\n * Type predicate to validate numeric value or null\n *\n * @internal\n */\nfunction isNumberOrNull(value: unknown): value is number | null {\n\treturn typeof value === \"number\" || value === null || value === undefined;\n}\n\n/**\n * Type predicate to validate string value or null\n *\n * @internal\n */\nfunction isStringOrNull(value: unknown): value is string | null {\n\treturn typeof value === \"string\" || value === null || value === undefined;\n}\n\n/**\n * Type predicate to validate boolean value\n *\n * @internal\n */\nfunction isBoolean(value: unknown): value is boolean {\n\treturn typeof value === \"boolean\" || value === undefined;\n}\n\n/**\n * Convert a File or Blob to Uint8Array\n *\n * Handles both browser File API and server-side Blob-like objects,\n * providing a unified interface for reading binary data.\n *\n * @param file - The File or Blob to convert\n * @returns Promise resolving to the byte array\n * @throws {Error} If the file cannot be read or exceeds size limit\n *\n * @example\n * ```typescript\n * const file = document.getElementById('input').files[0];\n * const bytes = await fileToUint8Array(file);\n * const result = await extractBytes(bytes, 'application/pdf');\n * ```\n */\nexport async function fileToUint8Array(file: File | Blob): Promise<Uint8Array> {\n\ttry {\n\t\tif (file.size > MAX_FILE_SIZE) {\n\t\t\tthrow new Error(\n\t\t\t\t`File size (${file.size} bytes) exceeds maximum (${MAX_FILE_SIZE} bytes). Maximum file size is 512 MB.`,\n\t\t\t);\n\t\t}\n\n\t\tconst arrayBuffer = await file.arrayBuffer();\n\t\treturn new Uint8Array(arrayBuffer);\n\t} catch (error) {\n\t\tthrow new Error(`Failed to read file: ${error instanceof Error ? error.message : String(error)}`);\n\t}\n}\n\n/**\n * Normalize ExtractionConfig for WASM processing\n *\n * Converts TypeScript configuration objects to a WASM-compatible format,\n * handling null values, undefined properties, and nested structures.\n *\n * @param config - The extraction configuration or null\n * @returns Normalized configuration object suitable for WASM\n *\n * @example\n * ```typescript\n * const config: ExtractionConfig = {\n * ocr: { backend: 'tesseract' },\n * chunking: { maxChars: 1000 }\n * };\n * const wasmConfig = configToJS(config);\n * ```\n */\nexport function configToJS(config: ExtractionConfig | null): Record<string, unknown> {\n\tif (!config) {\n\t\treturn {};\n\t}\n\n\t// Convert camelCase key to snake_case to match Rust serde field names.\n\tconst toSnakeCase = (str: string): string => str.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);\n\n\tconst normalizeValue = (value: unknown): unknown => {\n\t\tif (value === null || value === undefined) {\n\t\t\treturn null;\n\t\t}\n\t\tif (typeof value === \"object\") {\n\t\t\tif (Array.isArray(value)) {\n\t\t\t\treturn value.map(normalizeValue);\n\t\t\t}\n\t\t\tconst obj = value as Record<string, unknown>;\n\t\t\tconst normalized: Record<string, unknown> = {};\n\t\t\tfor (const [key, val] of Object.entries(obj)) {\n\t\t\t\tconst normalizedVal = normalizeValue(val);\n\t\t\t\tif (normalizedVal !== null && normalizedVal !== undefined) {\n\t\t\t\t\tnormalized[toSnakeCase(key)] = normalizedVal;\n\t\t\t\t}\n\t\t\t}\n\t\t\treturn Object.keys(normalized).length > 0 ? normalized : null;\n\t\t}\n\t\treturn value;\n\t};\n\n\tconst normalized: Record<string, unknown> = {};\n\tfor (const [key, value] of Object.entries(config)) {\n\t\tconst normalizedValue = normalizeValue(value);\n\t\tif (normalizedValue !== null && normalizedValue !== undefined) {\n\t\t\tnormalized[toSnakeCase(key)] = normalizedValue;\n\t\t}\n\t}\n\n\treturn normalized;\n}\n\n/**\n * Parse WASM extraction result and convert to TypeScript type\n *\n * Handles conversion of WASM-returned objects to proper ExtractionResult types,\n * including proper array conversions and type assertions for tables, chunks, and images.\n *\n * @param jsValue - The raw WASM result value\n * @returns Properly typed ExtractionResult\n * @throws {Error} If the result structure is invalid\n *\n * @example\n * ```typescript\n * const wasmResult = await wasmExtract(bytes, mimeType, config);\n * const result = jsToExtractionResult(wasmResult);\n * console.log(result.content);\n * ```\n */\nexport function jsToExtractionResult(jsValue: unknown): ExtractionResult {\n\tif (!jsValue || typeof jsValue !== \"object\") {\n\t\tthrow new Error(\"Invalid extraction result: value is not an object\");\n\t}\n\n\tconst result = jsValue as Record<string, unknown>;\n\tconst mimeType =\n\t\ttypeof result.mimeType === \"string\"\n\t\t\t? result.mimeType\n\t\t\t: typeof result.mime_type === \"string\"\n\t\t\t\t? result.mime_type\n\t\t\t\t: null;\n\n\tif (typeof result.content !== \"string\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid content\");\n\t}\n\tif (typeof mimeType !== \"string\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid mimeType\");\n\t}\n\tif (!result.metadata || typeof result.metadata !== \"object\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid metadata\");\n\t}\n\n\tconst tables: Table[] = [];\n\tif (Array.isArray(result.tables)) {\n\t\tfor (const table of result.tables) {\n\t\t\tif (table && typeof table === \"object\") {\n\t\t\t\tconst t = table as Record<string, unknown>;\n\t\t\t\tconst pageNumber =\n\t\t\t\t\ttypeof t.pageNumber === \"number\" ? t.pageNumber : typeof t.page_number === \"number\" ? t.page_number : 0;\n\t\t\t\tif (\n\t\t\t\t\tArray.isArray(t.cells) &&\n\t\t\t\t\tt.cells.every((row) => Array.isArray(row) && row.every((cell) => typeof cell === \"string\")) &&\n\t\t\t\t\ttypeof t.markdown === \"string\"\n\t\t\t\t) {\n\t\t\t\t\ttables.push({\n\t\t\t\t\t\tcells: t.cells as string[][],\n\t\t\t\t\t\tmarkdown: t.markdown,\n\t\t\t\t\t\tpageNumber,\n\t\t\t\t\t});\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\tconst chunks: Chunk[] | null = Array.isArray(result.chunks)\n\t\t? result.chunks.map((chunk) => {\n\t\t\t\tif (!chunk || typeof chunk !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk structure\");\n\t\t\t\t}\n\t\t\t\tconst c = chunk as Record<string, unknown>;\n\t\t\t\tif (typeof c.content !== \"string\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk: missing content\");\n\t\t\t\t}\n\t\t\t\tif (!c.metadata || typeof c.metadata !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk: missing metadata\");\n\t\t\t\t}\n\t\t\t\tconst metadata = c.metadata as Record<string, unknown>;\n\n\t\t\t\tlet embedding: number[] | null = null;\n\t\t\t\tif (Array.isArray(c.embedding)) {\n\t\t\t\t\tif (!c.embedding.every((item) => typeof item === \"number\")) {\n\t\t\t\t\t\tthrow new Error(\"Invalid chunk: embedding must contain only numbers\");\n\t\t\t\t\t}\n\t\t\t\t\tembedding = c.embedding;\n\t\t\t\t}\n\n\t\t\t\t// Coerce numeric values - handle BigInt, strings, and numbers\n\t\t\t\tconst coerceToNumber = (value: unknown, fieldName: string): number => {\n\t\t\t\t\tif (typeof value === \"number\") {\n\t\t\t\t\t\treturn value;\n\t\t\t\t\t}\n\t\t\t\t\tif (typeof value === \"bigint\") {\n\t\t\t\t\t\treturn Number(value);\n\t\t\t\t\t}\n\t\t\t\t\tif (typeof value === \"string\") {\n\t\t\t\t\t\tconst parsed = parseInt(value, 10);\n\t\t\t\t\t\tif (Number.isNaN(parsed)) {\n\t\t\t\t\t\t\tthrow new Error(`Invalid chunk metadata: ${fieldName} must be a valid number, got \"${value}\"`);\n\t\t\t\t\t\t}\n\t\t\t\t\t\treturn parsed;\n\t\t\t\t\t}\n\t\t\t\t\tthrow new Error(`Invalid chunk metadata: ${fieldName} must be a number, got ${typeof value}`);\n\t\t\t\t};\n\n\t\t\t\t// The Rust code uses snake_case field names (byte_start, byte_end, etc)\n\t\t\t\t// but TypeScript expects camelCase (charStart, charEnd, etc)\n\t\t\t\t// For now, treat byte offsets as character offsets since the content is UTF-8\n\t\t\t\tconst charStart = coerceToNumber(\n\t\t\t\t\tmetadata.charStart ?? metadata.char_start ?? metadata.byteStart ?? metadata.byte_start,\n\t\t\t\t\t\"charStart\",\n\t\t\t\t);\n\t\t\t\tconst charEnd = coerceToNumber(\n\t\t\t\t\tmetadata.charEnd ?? metadata.char_end ?? metadata.byteEnd ?? metadata.byte_end,\n\t\t\t\t\t\"charEnd\",\n\t\t\t\t);\n\t\t\t\tconst chunkIndex = coerceToNumber(metadata.chunkIndex ?? metadata.chunk_index, \"chunkIndex\");\n\t\t\t\tconst totalChunks = coerceToNumber(metadata.totalChunks ?? metadata.total_chunks, \"totalChunks\");\n\n\t\t\t\tlet tokenCount: number | null = null;\n\t\t\t\tconst tokenCountValue = metadata.tokenCount ?? metadata.token_count;\n\t\t\t\tif (tokenCountValue !== null && tokenCountValue !== undefined) {\n\t\t\t\t\ttokenCount = coerceToNumber(tokenCountValue, \"tokenCount\");\n\t\t\t\t}\n\n\t\t\t\tlet firstPage: number | null = null;\n\t\t\t\tconst firstPageValue = metadata.firstPage ?? metadata.first_page;\n\t\t\t\tif (firstPageValue !== null && firstPageValue !== undefined) {\n\t\t\t\t\tfirstPage = coerceToNumber(firstPageValue, \"firstPage\");\n\t\t\t\t}\n\n\t\t\t\tlet lastPage: number | null = null;\n\t\t\t\tconst lastPageValue = metadata.lastPage ?? metadata.last_page;\n\t\t\t\tif (lastPageValue !== null && lastPageValue !== undefined) {\n\t\t\t\t\tlastPage = coerceToNumber(lastPageValue, \"lastPage\");\n\t\t\t\t}\n\n\t\t\t\tconst rawHc = (metadata[\"heading_context\"] ?? metadata[\"headingContext\"]) as\n\t\t\t\t\t| Record<string, unknown>\n\t\t\t\t\t| null\n\t\t\t\t\t| undefined;\n\t\t\t\tlet headingContext: import(\"../types.js\").HeadingContext | null = null;\n\t\t\t\tif (rawHc && typeof rawHc === \"object\") {\n\t\t\t\t\tconst rawHeadings = rawHc[\"headings\"];\n\t\t\t\t\tif (Array.isArray(rawHeadings)) {\n\t\t\t\t\t\theadingContext = {\n\t\t\t\t\t\t\theadings: rawHeadings.map((h: unknown) => {\n\t\t\t\t\t\t\t\tconst heading = h as Record<string, unknown>;\n\t\t\t\t\t\t\t\treturn {\n\t\t\t\t\t\t\t\t\tlevel: (heading[\"level\"] as number) ?? 0,\n\t\t\t\t\t\t\t\t\ttext: (heading[\"text\"] as string) ?? \"\",\n\t\t\t\t\t\t\t\t};\n\t\t\t\t\t\t\t}),\n\t\t\t\t\t\t};\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\treturn {\n\t\t\t\t\tcontent: c.content,\n\t\t\t\t\tembedding,\n\t\t\t\t\tmetadata: {\n\t\t\t\t\t\tbyteStart: charStart,\n\t\t\t\t\t\tbyteEnd: charEnd,\n\t\t\t\t\t\tcharStart,\n\t\t\t\t\t\tcharEnd,\n\t\t\t\t\t\ttokenCount,\n\t\t\t\t\t\tchunkIndex,\n\t\t\t\t\t\ttotalChunks,\n\t\t\t\t\t\tfirstPage,\n\t\t\t\t\t\tlastPage,\n\t\t\t\t\t\theadingContext,\n\t\t\t\t\t},\n\t\t\t\t};\n\t\t\t})\n\t\t: null;\n\n\tconst images: ExtractedImage[] | null = Array.isArray(result.images)\n\t\t? result.images.map((image) => {\n\t\t\t\tif (!image || typeof image !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid image structure\");\n\t\t\t\t}\n\t\t\t\tconst img = image as Record<string, unknown>;\n\t\t\t\tlet imageData: Uint8Array;\n\t\t\t\tif (img.data instanceof Uint8Array) {\n\t\t\t\t\timageData = img.data;\n\t\t\t\t} else if (Array.isArray(img.data)) {\n\t\t\t\t\timageData = new Uint8Array(img.data as number[]);\n\t\t\t\t} else {\n\t\t\t\t\tthrow new Error(\"Invalid image: data must be Uint8Array or array\");\n\t\t\t\t}\n\t\t\t\tif (typeof img.format !== \"string\") {\n\t\t\t\t\tthrow new Error(\"Invalid image: missing format\");\n\t\t\t\t}\n\n\t\t\t\t// Support both camelCase and snake_case field names (Rust serde uses snake_case)\n\t\t\t\tconst imageIndex = img.imageIndex ?? img.image_index;\n\t\t\t\tconst pageNumber = img.pageNumber ?? img.page_number;\n\t\t\t\tconst bitsPerComponent = img.bitsPerComponent ?? img.bits_per_component;\n\t\t\t\tconst isMask = img.isMask ?? img.is_mask;\n\t\t\t\tconst ocrResult = img.ocrResult ?? img.ocr_result;\n\n\t\t\t\tif (typeof imageIndex !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid image: imageIndex must be a number\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(pageNumber)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: pageNumber must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.width)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: width must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.height)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: height must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(bitsPerComponent)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: bitsPerComponent must be a number or null\");\n\t\t\t\t}\n\n\t\t\t\tif (!isBoolean(isMask)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: isMask must be a boolean\");\n\t\t\t\t}\n\n\t\t\t\tif (!isStringOrNull(img.colorspace)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: colorspace must be a string or null\");\n\t\t\t\t}\n\t\t\t\tif (!isStringOrNull(img.description)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: description must be a string or null\");\n\t\t\t\t}\n\n\t\t\t\treturn {\n\t\t\t\t\tdata: imageData,\n\t\t\t\t\tformat: img.format,\n\t\t\t\t\timageIndex: imageIndex,\n\t\t\t\t\tpageNumber: pageNumber ?? null,\n\t\t\t\t\twidth: (img.width as number) ?? null,\n\t\t\t\t\theight: (img.height as number) ?? null,\n\t\t\t\t\tcolorspace: (img.colorspace as string) ?? null,\n\t\t\t\t\tbitsPerComponent: bitsPerComponent ?? null,\n\t\t\t\t\tisMask: isMask ?? false,\n\t\t\t\t\tdescription: (img.description as string) ?? null,\n\t\t\t\t\tocrResult: ocrResult ? jsToExtractionResult(ocrResult) : null,\n\t\t\t\t};\n\t\t\t})\n\t\t: null;\n\n\tlet detectedLanguages: string[] | null = null;\n\tconst detectedLanguagesRaw = Array.isArray(result.detectedLanguages)\n\t\t? result.detectedLanguages\n\t\t: result.detected_languages;\n\tif (Array.isArray(detectedLanguagesRaw)) {\n\t\tif (!detectedLanguagesRaw.every((lang) => typeof lang === \"string\")) {\n\t\t\tthrow new Error(\"Invalid result: detectedLanguages must contain only strings\");\n\t\t}\n\t\tdetectedLanguages = detectedLanguagesRaw;\n\t}\n\n\tconst extractedKeywords = (result.extractedKeywords ?? result.extracted_keywords ?? null) as\n\t\t| ExtractedKeyword[]\n\t\t| null;\n\tconst qualityScore =\n\t\ttypeof (result.qualityScore ?? result.quality_score) === \"number\"\n\t\t\t? ((result.qualityScore ?? result.quality_score) as number)\n\t\t\t: null;\n\tconst processingWarnings = (result.processingWarnings ?? result.processing_warnings ?? null) as\n\t\t| ProcessingWarning[]\n\t\t| null;\n\tconst elements = (result.elements ?? null) as Element[] | null;\n\tconst ocrElements = (result.ocrElements ?? result.ocr_elements ?? null) as OcrElement[] | null;\n\tconst document = (result.document ?? null) as DocumentStructure | null;\n\tconst pages = (result.pages ?? null) as PageContent[] | null;\n\tconst annotations = (result.annotations ?? null) as PdfAnnotation[] | null;\n\n\treturn {\n\t\tcontent: result.content,\n\t\tmimeType,\n\t\tmetadata: (result.metadata ?? {}) as Metadata,\n\t\ttables,\n\t\tdetectedLanguages,\n\t\tchunks,\n\t\timages,\n\t\tpages,\n\t\textractedKeywords,\n\t\tqualityScore,\n\t\tprocessingWarnings,\n\t\telements,\n\t\tocrElements,\n\t\tdocument,\n\t\tannotations,\n\t};\n}\n\n/**\n * Wrap and format WASM errors with context\n *\n * Converts WASM error messages to JavaScript Error objects with proper context\n * and stack trace information when available.\n *\n * @param error - The error from WASM\n * @param context - Additional context about what operation failed\n * @returns A formatted Error object\n *\n * @internal\n *\n * @example\n * ```typescript\n * try {\n * await wasmExtract(bytes, mimeType);\n * } catch (error) {\n * throw wrapWasmError(error, 'extracting document');\n * }\n * ```\n */\nexport function wrapWasmError(error: unknown, context: string): Error {\n\tif (error instanceof Error) {\n\t\treturn new Error(`Error ${context}: ${error.message}`, {\n\t\t\tcause: error,\n\t\t});\n\t}\n\n\tconst message = String(error);\n\treturn new Error(`Error ${context}: ${message}`);\n}\n\n/**\n * Validate that a WASM-returned value conforms to ExtractionResult structure\n *\n * Performs structural validation without full type checking,\n * useful for runtime validation of WASM output.\n *\n * @param value - The value to validate\n * @returns True if value appears to be a valid ExtractionResult\n *\n * @internal\n */\nexport function isValidExtractionResult(value: unknown): value is ExtractionResult {\n\tif (!value || typeof value !== \"object\") {\n\t\treturn false;\n\t}\n\n\tconst obj = value as Record<string, unknown>;\n\treturn (\n\t\ttypeof obj.content === \"string\" &&\n\t\t(typeof obj.mimeType === \"string\" || typeof obj.mime_type === \"string\") &&\n\t\tobj.metadata !== null &&\n\t\ttypeof obj.metadata === \"object\" &&\n\t\tArray.isArray(obj.tables)\n\t);\n}\n"],"mappings":";AAiDA,IAAM,gBAAgB,MAAM,OAAO;AAOnC,SAAS,eAAe,OAAwC;AAC/D,SAAO,OAAO,UAAU,YAAY,UAAU,QAAQ,UAAU;AACjE;AAOA,SAAS,eAAe,OAAwC;AAC/D,SAAO,OAAO,UAAU,YAAY,UAAU,QAAQ,UAAU;AACjE;AAOA,SAAS,UAAU,OAAkC;AACpD,SAAO,OAAO,UAAU,aAAa,UAAU;AAChD;AAmBA,eAAsB,iBAAiB,MAAwC;AAC9E,MAAI;AACH,QAAI,KAAK,OAAO,eAAe;AAC9B,YAAM,IAAI;AAAA,QACT,cAAc,KAAK,IAAI,4BAA4B,aAAa;AAAA,MACjE;AAAA,IACD;AAEA,UAAM,cAAc,MAAM,KAAK,YAAY;AAC3C,WAAO,IAAI,WAAW,WAAW;AAAA,EAClC,SAAS,OAAO;AACf,UAAM,IAAI,MAAM,wBAAwB,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC,EAAE;AAAA,EACjG;AACD;AAoBO,SAAS,WAAW,QAA0D;AACpF,MAAI,CAAC,QAAQ;AACZ,WAAO,CAAC;AAAA,EACT;AAGA,QAAM,cAAc,CAAC,QAAwB,IAAI,QAAQ,UAAU,CAAC,WAAW,IAAI,OAAO,YAAY,CAAC,EAAE;AAEzG,QAAM,iBAAiB,CAAC,UAA4B;AACnD,QAAI,UAAU,QAAQ,UAAU,QAAW;AAC1C,aAAO;AAAA,IACR;AACA,QAAI,OAAO,UAAU,UAAU;AAC9B,UAAI,MAAM,QAAQ,KAAK,GAAG;AACzB,eAAO,MAAM,IAAI,cAAc;AAAA,MAChC;AACA,YAAM,MAAM;AACZ,YAAMA,cAAsC,CAAC;AAC7C,iBAAW,CAAC,KAAK,GAAG,KAAK,OAAO,QAAQ,GAAG,GAAG;AAC7C,cAAM,gBAAgB,eAAe,GAAG;AACxC,YAAI,kBAAkB,QAAQ,kBAAkB,QAAW;AAC1D,UAAAA,YAAW,YAAY,GAAG,CAAC,IAAI;AAAA,QAChC;AAAA,MACD;AACA,aAAO,OAAO,KAAKA,WAAU,EAAE,SAAS,IAAIA,cAAa;AAAA,IAC1D;AACA,WAAO;AAAA,EACR;AAEA,QAAM,aAAsC,CAAC;AAC7C,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,MAAM,GAAG;AAClD,UAAM,kBAAkB,eAAe,KAAK;AAC5C,QAAI,oBAAoB,QAAQ,oBAAoB,QAAW;AAC9D,iBAAW,YAAY,GAAG,CAAC,IAAI;AAAA,IAChC;AAAA,EACD;AAEA,SAAO;AACR;AAmBO,SAAS,qBAAqB,SAAoC;AACxE,MAAI,CAAC,WAAW,OAAO,YAAY,UAAU;AAC5C,UAAM,IAAI,MAAM,mDAAmD;AAAA,EACpE;AAEA,QAAM,SAAS;AACf,QAAM,WACL,OAAO,OAAO,aAAa,WACxB,OAAO,WACP,OAAO,OAAO,cAAc,WAC3B,OAAO,YACP;AAEL,MAAI,OAAO,OAAO,YAAY,UAAU;AACvC,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACxE;AACA,MAAI,OAAO,aAAa,UAAU;AACjC,UAAM,IAAI,MAAM,wDAAwD;AAAA,EACzE;AACA,MAAI,CAAC,OAAO,YAAY,OAAO,OAAO,aAAa,UAAU;AAC5D,UAAM,IAAI,MAAM,wDAAwD;AAAA,EACzE;AAEA,QAAM,SAAkB,CAAC;AACzB,MAAI,MAAM,QAAQ,OAAO,MAAM,GAAG;AACjC,eAAW,SAAS,OAAO,QAAQ;AAClC,UAAI,SAAS,OAAO,UAAU,UAAU;AACvC,cAAM,IAAI;AACV,cAAM,aACL,OAAO,EAAE,eAAe,WAAW,EAAE,aAAa,OAAO,EAAE,gBAAgB,WAAW,EAAE,cAAc;AACvG,YACC,MAAM,QAAQ,EAAE,KAAK,KACrB,EAAE,MAAM,MAAM,CAAC,QAAQ,MAAM,QAAQ,GAAG,KAAK,IAAI,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,CAAC,KAC1F,OAAO,EAAE,aAAa,UACrB;AACD,iBAAO,KAAK;AAAA,YACX,OAAO,EAAE;AAAA,YACT,UAAU,EAAE;AAAA,YACZ;AAAA,UACD,CAAC;AAAA,QACF;AAAA,MACD;AAAA,IACD;AAAA,EACD;AAEA,QAAM,SAAyB,MAAM,QAAQ,OAAO,MAAM,IACvD,OAAO,OAAO,IAAI,CAAC,UAAU;AAC7B,QAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC1C;AACA,UAAM,IAAI;AACV,QAAI,OAAO,EAAE,YAAY,UAAU;AAClC,YAAM,IAAI,MAAM,gCAAgC;AAAA,IACjD;AACA,QAAI,CAAC,EAAE,YAAY,OAAO,EAAE,aAAa,UAAU;AAClD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IAClD;AACA,UAAM,WAAW,EAAE;AAEnB,QAAI,YAA6B;AACjC,QAAI,MAAM,QAAQ,EAAE,SAAS,GAAG;AAC/B,UAAI,CAAC,EAAE,UAAU,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,GAAG;AAC3D,cAAM,IAAI,MAAM,oDAAoD;AAAA,MACrE;AACA,kBAAY,EAAE;AAAA,IACf;AAGA,UAAM,iBAAiB,CAAC,OAAgB,cAA8B;AACrE,UAAI,OAAO,UAAU,UAAU;AAC9B,eAAO;AAAA,MACR;AACA,UAAI,OAAO,UAAU,UAAU;AAC9B,eAAO,OAAO,KAAK;AAAA,MACpB;AACA,UAAI,OAAO,UAAU,UAAU;AAC9B,cAAM,SAAS,SAAS,OAAO,EAAE;AACjC,YAAI,OAAO,MAAM,MAAM,GAAG;AACzB,gBAAM,IAAI,MAAM,2BAA2B,SAAS,iCAAiC,KAAK,GAAG;AAAA,QAC9F;AACA,eAAO;AAAA,MACR;AACA,YAAM,IAAI,MAAM,2BAA2B,SAAS,0BAA0B,OAAO,KAAK,EAAE;AAAA,IAC7F;AAKA,UAAM,YAAY;AAAA,MACjB,SAAS,aAAa,SAAS,cAAc,SAAS,aAAa,SAAS;AAAA,MAC5E;AAAA,IACD;AACA,UAAM,UAAU;AAAA,MACf,SAAS,WAAW,SAAS,YAAY,SAAS,WAAW,SAAS;AAAA,MACtE;AAAA,IACD;AACA,UAAM,aAAa,eAAe,SAAS,cAAc,SAAS,aAAa,YAAY;AAC3F,UAAM,cAAc,eAAe,SAAS,eAAe,SAAS,cAAc,aAAa;AAE/F,QAAI,aAA4B;AAChC,UAAM,kBAAkB,SAAS,cAAc,SAAS;AACxD,QAAI,oBAAoB,QAAQ,oBAAoB,QAAW;AAC9D,mBAAa,eAAe,iBAAiB,YAAY;AAAA,IAC1D;AAEA,QAAI,YAA2B;AAC/B,UAAM,iBAAiB,SAAS,aAAa,SAAS;AACtD,QAAI,mBAAmB,QAAQ,mBAAmB,QAAW;AAC5D,kBAAY,eAAe,gBAAgB,WAAW;AAAA,IACvD;AAEA,QAAI,WAA0B;AAC9B,UAAM,gBAAgB,SAAS,YAAY,SAAS;AACpD,QAAI,kBAAkB,QAAQ,kBAAkB,QAAW;AAC1D,iBAAW,eAAe,eAAe,UAAU;AAAA,IACpD;AAEA,UAAM,QAAS,SAAS,iBAAiB,KAAK,SAAS,gBAAgB;AAIvE,QAAI,iBAA8D;AAClE,QAAI,SAAS,OAAO,UAAU,UAAU;AACvC,YAAM,cAAc,MAAM,UAAU;AACpC,UAAI,MAAM,QAAQ,WAAW,GAAG;AAC/B,yBAAiB;AAAA,UAChB,UAAU,YAAY,IAAI,CAAC,MAAe;AACzC,kBAAM,UAAU;AAChB,mBAAO;AAAA,cACN,OAAQ,QAAQ,OAAO,KAAgB;AAAA,cACvC,MAAO,QAAQ,MAAM,KAAgB;AAAA,YACtC;AAAA,UACD,CAAC;AAAA,QACF;AAAA,MACD;AAAA,IACD;AAEA,WAAO;AAAA,MACN,SAAS,EAAE;AAAA,MACX;AAAA,MACA,UAAU;AAAA,QACT,WAAW;AAAA,QACX,SAAS;AAAA,QACT;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACD;AAAA,IACD;AAAA,EACD,CAAC,IACA;AAEH,QAAM,SAAkC,MAAM,QAAQ,OAAO,MAAM,IAChE,OAAO,OAAO,IAAI,CAAC,UAAU;AAC7B,QAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC1C;AACA,UAAM,MAAM;AACZ,QAAI;AACJ,QAAI,IAAI,gBAAgB,YAAY;AACnC,kBAAY,IAAI;AAAA,IACjB,WAAW,MAAM,QAAQ,IAAI,IAAI,GAAG;AACnC,kBAAY,IAAI,WAAW,IAAI,IAAgB;AAAA,IAChD,OAAO;AACN,YAAM,IAAI,MAAM,iDAAiD;AAAA,IAClE;AACA,QAAI,OAAO,IAAI,WAAW,UAAU;AACnC,YAAM,IAAI,MAAM,+BAA+B;AAAA,IAChD;AAGA,UAAM,aAAa,IAAI,cAAc,IAAI;AACzC,UAAM,aAAa,IAAI,cAAc,IAAI;AACzC,UAAM,mBAAmB,IAAI,oBAAoB,IAAI;AACrD,UAAM,SAAS,IAAI,UAAU,IAAI;AACjC,UAAM,YAAY,IAAI,aAAa,IAAI;AAEvC,QAAI,OAAO,eAAe,UAAU;AACnC,YAAM,IAAI,MAAM,4CAA4C;AAAA,IAC7D;AACA,QAAI,CAAC,eAAe,UAAU,GAAG;AAChC,YAAM,IAAI,MAAM,oDAAoD;AAAA,IACrE;AACA,QAAI,CAAC,eAAe,IAAI,KAAK,GAAG;AAC/B,YAAM,IAAI,MAAM,+CAA+C;AAAA,IAChE;AACA,QAAI,CAAC,eAAe,IAAI,MAAM,GAAG;AAChC,YAAM,IAAI,MAAM,gDAAgD;AAAA,IACjE;AACA,QAAI,CAAC,eAAe,gBAAgB,GAAG;AACtC,YAAM,IAAI,MAAM,0DAA0D;AAAA,IAC3E;AAEA,QAAI,CAAC,UAAU,MAAM,GAAG;AACvB,YAAM,IAAI,MAAM,yCAAyC;AAAA,IAC1D;AAEA,QAAI,CAAC,eAAe,IAAI,UAAU,GAAG;AACpC,YAAM,IAAI,MAAM,oDAAoD;AAAA,IACrE;AACA,QAAI,CAAC,eAAe,IAAI,WAAW,GAAG;AACrC,YAAM,IAAI,MAAM,qDAAqD;AAAA,IACtE;AAEA,WAAO;AAAA,MACN,MAAM;AAAA,MACN,QAAQ,IAAI;AAAA,MACZ;AAAA,MACA,YAAY,cAAc;AAAA,MAC1B,OAAQ,IAAI,SAAoB;AAAA,MAChC,QAAS,IAAI,UAAqB;AAAA,MAClC,YAAa,IAAI,cAAyB;AAAA,MAC1C,kBAAkB,oBAAoB;AAAA,MACtC,QAAQ,UAAU;AAAA,MAClB,aAAc,IAAI,eAA0B;AAAA,MAC5C,WAAW,YAAY,qBAAqB,SAAS,IAAI;AAAA,IAC1D;AAAA,EACD,CAAC,IACA;AAEH,MAAI,oBAAqC;AACzC,QAAM,uBAAuB,MAAM,QAAQ,OAAO,iBAAiB,IAChE,OAAO,oBACP,OAAO;AACV,MAAI,MAAM,QAAQ,oBAAoB,GAAG;AACxC,QAAI,CAAC,qBAAqB,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,GAAG;AACpE,YAAM,IAAI,MAAM,6DAA6D;AAAA,IAC9E;AACA,wBAAoB;AAAA,EACrB;AAEA,QAAM,oBAAqB,OAAO,qBAAqB,OAAO,sBAAsB;AAGpF,QAAM,eACL,QAAQ,OAAO,gBAAgB,OAAO,mBAAmB,WACpD,OAAO,gBAAgB,OAAO,gBAChC;AACJ,QAAM,qBAAsB,OAAO,sBAAsB,OAAO,uBAAuB;AAGvF,QAAM,WAAY,OAAO,YAAY;AACrC,QAAM,cAAe,OAAO,eAAe,OAAO,gBAAgB;AAClE,QAAM,WAAY,OAAO,YAAY;AACrC,QAAM,QAAS,OAAO,SAAS;AAC/B,QAAM,cAAe,OAAO,eAAe;AAE3C,SAAO;AAAA,IACN,SAAS,OAAO;AAAA,IAChB;AAAA,IACA,UAAW,OAAO,YAAY,CAAC;AAAA,IAC/B;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACD;AACD;AAuBO,SAAS,cAAc,OAAgB,SAAwB;AACrE,MAAI,iBAAiB,OAAO;AAC3B,WAAO,IAAI,MAAM,SAAS,OAAO,KAAK,MAAM,OAAO,IAAI;AAAA,MACtD,OAAO;AAAA,IACR,CAAC;AAAA,EACF;AAEA,QAAM,UAAU,OAAO,KAAK;AAC5B,SAAO,IAAI,MAAM,SAAS,OAAO,KAAK,OAAO,EAAE;AAChD;AAaO,SAAS,wBAAwB,OAA2C;AAClF,MAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,WAAO;AAAA,EACR;AAEA,QAAM,MAAM;AACZ,SACC,OAAO,IAAI,YAAY,aACtB,OAAO,IAAI,aAAa,YAAY,OAAO,IAAI,cAAc,aAC9D,IAAI,aAAa,QACjB,OAAO,IAAI,aAAa,YACxB,MAAM,QAAQ,IAAI,MAAM;AAE1B;","names":["normalized"]}
|
package/dist/index.js
CHANGED
|
@@ -302,8 +302,8 @@ function jsToExtractionResult(jsValue) {
|
|
|
302
302
|
for (const table of result.tables) {
|
|
303
303
|
if (table && typeof table === "object") {
|
|
304
304
|
const t = table;
|
|
305
|
-
const pageNumber = typeof t.pageNumber === "number" ? t.pageNumber : typeof t.page_number === "number" ? t.page_number :
|
|
306
|
-
if (Array.isArray(t.cells) && t.cells.every((row) => Array.isArray(row) && row.every((cell) => typeof cell === "string")) && typeof t.markdown === "string"
|
|
305
|
+
const pageNumber = typeof t.pageNumber === "number" ? t.pageNumber : typeof t.page_number === "number" ? t.page_number : 0;
|
|
306
|
+
if (Array.isArray(t.cells) && t.cells.every((row) => Array.isArray(row) && row.every((cell) => typeof cell === "string")) && typeof t.markdown === "string") {
|
|
307
307
|
tables.push({
|
|
308
308
|
cells: t.cells,
|
|
309
309
|
markdown: t.markdown,
|