@kreuzberg/wasm 4.0.0-rc.23 → 4.0.0-rc.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@
7
7
  /**
8
8
  * Token reduction configuration
9
9
  */
10
- interface TokenReductionConfig {
10
+ export interface TokenReductionConfig {
11
11
  /** Token reduction mode */
12
12
  mode?: string;
13
13
  /** Preserve important words during reduction */
@@ -16,7 +16,7 @@ interface TokenReductionConfig {
16
16
  /**
17
17
  * Post-processor configuration
18
18
  */
19
- interface PostProcessorConfig {
19
+ export interface PostProcessorConfig {
20
20
  /** Whether post-processing is enabled */
21
21
  enabled?: boolean;
22
22
  /** List of enabled processors */
@@ -31,18 +31,18 @@ interface PostProcessorConfig {
31
31
  * - "yake": YAKE (Yet Another Keyword Extractor) - statistical approach
32
32
  * - "rake": RAKE (Rapid Automatic Keyword Extraction) - co-occurrence based
33
33
  */
34
- type KeywordAlgorithm = "yake" | "rake";
34
+ export type KeywordAlgorithm = "yake" | "rake";
35
35
  /**
36
36
  * YAKE algorithm-specific parameters
37
37
  */
38
- interface YakeParams {
38
+ export interface YakeParams {
39
39
  /** Window size for co-occurrence analysis (default: 2) */
40
40
  windowSize?: number;
41
41
  }
42
42
  /**
43
43
  * RAKE algorithm-specific parameters
44
44
  */
45
- interface RakeParams {
45
+ export interface RakeParams {
46
46
  /** Minimum word length to consider (default: 1) */
47
47
  minWordLength?: number;
48
48
  /** Maximum words in a keyword phrase (default: 3) */
@@ -54,7 +54,7 @@ interface RakeParams {
54
54
  * Controls how keywords are extracted from text, including algorithm selection,
55
55
  * scoring thresholds, n-gram ranges, and language-specific settings.
56
56
  */
57
- interface KeywordConfig {
57
+ export interface KeywordConfig {
58
58
  /** Algorithm to use for extraction (default: "yake") */
59
59
  algorithm?: KeywordAlgorithm;
60
60
  /** Maximum number of keywords to extract (default: 10) */
@@ -76,7 +76,7 @@ interface KeywordConfig {
76
76
  * Represents a single keyword extracted from text along with its relevance score,
77
77
  * the algorithm that extracted it, and optional position information.
78
78
  */
79
- interface ExtractedKeyword {
79
+ export interface ExtractedKeyword {
80
80
  /** The keyword text */
81
81
  text: string;
82
82
  /** Relevance score (higher is better, algorithm-specific range) */
@@ -89,7 +89,7 @@ interface ExtractedKeyword {
89
89
  /**
90
90
  * Configuration for document extraction
91
91
  */
92
- interface ExtractionConfig {
92
+ export interface ExtractionConfig {
93
93
  /** OCR configuration */
94
94
  ocr?: OcrConfig;
95
95
  /** Chunking configuration */
@@ -120,7 +120,7 @@ interface ExtractionConfig {
120
120
  /**
121
121
  * Tesseract OCR configuration
122
122
  */
123
- interface TesseractConfig {
123
+ export interface TesseractConfig {
124
124
  /** Tesseract page segmentation mode */
125
125
  psm?: number;
126
126
  /** Enable table detection */
@@ -131,7 +131,7 @@ interface TesseractConfig {
131
131
  /**
132
132
  * OCR configuration
133
133
  */
134
- interface OcrConfig {
134
+ export interface OcrConfig {
135
135
  /** OCR backend to use */
136
136
  backend?: string;
137
137
  /** Language codes (ISO 639) */
@@ -146,7 +146,7 @@ interface OcrConfig {
146
146
  /**
147
147
  * Chunking configuration
148
148
  */
149
- interface ChunkingConfig {
149
+ export interface ChunkingConfig {
150
150
  /** Maximum characters per chunk */
151
151
  maxChars?: number;
152
152
  /** Overlap between chunks */
@@ -155,7 +155,7 @@ interface ChunkingConfig {
155
155
  /**
156
156
  * Image extraction configuration
157
157
  */
158
- interface ImageExtractionConfig {
158
+ export interface ImageExtractionConfig {
159
159
  /** Whether to extract images */
160
160
  enabled?: boolean;
161
161
  /** Target DPI for image extraction */
@@ -172,7 +172,7 @@ interface ImageExtractionConfig {
172
172
  /**
173
173
  * PDF extraction configuration
174
174
  */
175
- interface PdfConfig {
175
+ export interface PdfConfig {
176
176
  /** Whether to extract images from PDF */
177
177
  extractImages?: boolean;
178
178
  /** Passwords for encrypted PDFs */
@@ -183,7 +183,7 @@ interface PdfConfig {
183
183
  /**
184
184
  * Page extraction configuration
185
185
  */
186
- interface PageExtractionConfig {
186
+ export interface PageExtractionConfig {
187
187
  /** Extract pages as separate array (ExtractionResult.pages) */
188
188
  extractPages?: boolean;
189
189
  /** Insert page markers in main content string */
@@ -194,14 +194,14 @@ interface PageExtractionConfig {
194
194
  /**
195
195
  * Language detection configuration
196
196
  */
197
- interface LanguageDetectionConfig {
197
+ export interface LanguageDetectionConfig {
198
198
  /** Whether to detect languages */
199
199
  enabled?: boolean;
200
200
  }
201
201
  /**
202
202
  * Result of document extraction
203
203
  */
204
- interface ExtractionResult {
204
+ export interface ExtractionResult {
205
205
  /** Extracted text content */
206
206
  content: string;
207
207
  /** MIME type of the document */
@@ -224,7 +224,7 @@ interface ExtractionResult {
224
224
  /**
225
225
  * Document metadata
226
226
  */
227
- interface Metadata {
227
+ export interface Metadata {
228
228
  /** Document title */
229
229
  title?: string;
230
230
  /** Document subject or description */
@@ -256,7 +256,7 @@ interface Metadata {
256
256
  /**
257
257
  * Extracted table
258
258
  */
259
- interface Table {
259
+ export interface Table {
260
260
  /** Table cells/rows */
261
261
  cells?: string[][];
262
262
  /** Table markdown representation */
@@ -271,7 +271,7 @@ interface Table {
271
271
  /**
272
272
  * Chunk metadata
273
273
  */
274
- interface ChunkMetadata {
274
+ export interface ChunkMetadata {
275
275
  /** Character start position in original content */
276
276
  charStart: number;
277
277
  /** Character end position in original content */
@@ -286,7 +286,7 @@ interface ChunkMetadata {
286
286
  /**
287
287
  * Text chunk from chunked content
288
288
  */
289
- interface Chunk {
289
+ export interface Chunk {
290
290
  /** Chunk text content */
291
291
  content: string;
292
292
  /** Chunk metadata */
@@ -301,7 +301,7 @@ interface Chunk {
301
301
  /**
302
302
  * Extracted image from document
303
303
  */
304
- interface ExtractedImage {
304
+ export interface ExtractedImage {
305
305
  /** Image data as Uint8Array or base64 string */
306
306
  data: Uint8Array | string;
307
307
  /** Image format/MIME type */
@@ -330,7 +330,7 @@ interface ExtractedImage {
330
330
  /**
331
331
  * Per-page content
332
332
  */
333
- interface PageContent {
333
+ export interface PageContent {
334
334
  /** Page number (1-indexed) */
335
335
  pageNumber: number;
336
336
  /** Text content of the page */
@@ -343,7 +343,7 @@ interface PageContent {
343
343
  /**
344
344
  * OCR backend protocol/interface
345
345
  */
346
- interface OcrBackendProtocol {
346
+ export interface OcrBackendProtocol {
347
347
  /** Get the backend name */
348
348
  name(): string;
349
349
  /** Get supported language codes */
@@ -360,5 +360,4 @@ interface OcrBackendProtocol {
360
360
  tables?: unknown[];
361
361
  } | string>;
362
362
  }
363
-
364
- export type { Chunk as C, ExtractionResult as E, ImageExtractionConfig as I, KeywordAlgorithm as K, LanguageDetectionConfig as L, Metadata as M, OcrBackendProtocol as O, PageContent as P, RakeParams as R, Table as T, YakeParams as Y, ExtractionConfig as a, ChunkingConfig as b, ChunkMetadata as c, ExtractedImage as d, OcrConfig as e, PageExtractionConfig as f, PdfConfig as g, PostProcessorConfig as h, TesseractConfig as i, TokenReductionConfig as j, KeywordConfig as k, ExtractedKeyword as l };
363
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../typescript/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACpC,2BAA2B;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,gDAAgD;IAChD,sBAAsB,CAAC,EAAE,OAAO,CAAC;CACjC;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IACnC,yCAAyC;IACzC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,iCAAiC;IACjC,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC7B,kCAAkC;IAClC,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED;;;;;;GAMG;AACH,MAAM,MAAM,gBAAgB,GAAG,MAAM,GAAG,MAAM,CAAC;AAE/C;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,0DAA0D;IAC1D,UAAU,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,mDAAmD;IACnD,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,qDAAqD;IACrD,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED;;;;;GAKG;AACH,MAAM,WAAW,aAAa;IAC7B,wDAAwD;IACxD,SAAS,CAAC,EAAE,gBAAgB,CAAC;IAC7B,0DAA0D;IAC1D,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,qDAAqD;IACrD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,uEAAuE;IACvE,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC9B,oEAAoE;IACpE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,sCAAsC;IACtC,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,sCAAsC;IACtC,UAAU,CAAC,EAAE,UAAU,CAAC;CACxB;AAED;;;;;GAKG;AACH,MAAM,WAAW,gBAAgB;IAChC,uBAAuB;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,mEAAmE;IACnE,KAAK,EAAE,MAAM,CAAC;IACd,4CAA4C;IAC5C,SAAS,EAAE,gBAAgB,CAAC;IAC5B,2EAA2E;IAC3E,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC,wBAAwB;IACxB,GAAG,CAAC,EAAE,SAAS,CAAC;IAChB,6BAA6B;IAC7B,QAAQ,CAAC,EAAE,cAAc,CAAC;IAC1B,qCAAqC;IACrC,MAAM,CAAC,EAAE,qBAAqB,CAAC;IAC/B,oCAAoC;IACpC,KAAK,CAAC,EAAE,oBAAoB,CAAC;IAC7B,uCAAuC;IACvC,iBAAiB,CAAC,EAAE,uBAAuB,CAAC;IAC5C,6BAA6B;IAC7B,UAAU,CAAC,EAAE,SAAS,CAAC;IACvB,oCAAoC;IACpC,cAAc,CAAC,EAAE,oBAAoB,CAAC;IACtC,mCAAmC;IACnC,aAAa,CAAC,EAAE,mBAAmB,CAAC;IACpC,uCAAuC;IACvC,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,6BAA6B;IAC7B,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,gCAAgC;IAChC,uBAAuB,CAAC,EAAE,OAAO,CAAC;IAClC,0CAA0C;IAC1C,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,qCAAqC;IACrC,wBAAwB,CAAC,EAAE,MAAM,CAAC;CAClC;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC/B,uCAAuC;IACvC,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,6BAA6B;IAC7B,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAC/B,0CAA0C;IAC1C,qBAAqB,CAAC,EAAE,MAAM,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACzB,yBAAyB;IACzB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,+BAA+B;IAC/B,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,6BAA6B;IAC7B,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,uCAAuC;IACvC,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,4BAA4B;IAC5B,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC9B,mCAAmC;IACnC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,6BAA6B;IAC7B,UAAU,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACrC,gCAAgC;IAChC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,sCAAsC;IACtC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,wCAAwC;IACxC,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,+BAA+B;IAC/B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,4BAA4B;IAC5B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,4BAA4B;IAC5B,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACzB,yCAAyC;IACzC,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,mCAAmC;IACnC,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,kCAAkC;IAClC,eAAe,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACpC,+DAA+D;IAC/D,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,iDAAiD;IACjD,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAC5B,sDAAsD;IACtD,YAAY,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACvC,kCAAkC;IAClC,OAAO,CAAC,EAAE,OAAO,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC,6BAA6B;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,gCAAgC;IAChC,QAAQ,EAAE,MAAM,CAAC;IACjB,wBAAwB;IACxB,QAAQ,EAAE,QAAQ,CAAC;IACnB,uBAAuB;IACvB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,yCAAyC;IACzC,iBAAiB,CAAC,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;IACpC,2CAA2C;IAC3C,MAAM,CAAC,EAAE,KAAK,EAAE,GAAG,IAAI,CAAC;IACxB,uBAAuB;IACvB,MAAM,CAAC,EAAE,cAAc,EAAE,GAAG,IAAI,CAAC;IACjC,uBAAuB;IACvB,KAAK,CAAC,EAAE,WAAW,EAAE,GAAG,IAAI,CAAC;IAC7B,4DAA4D;IAC5D,QAAQ,CAAC,EAAE,gBAAgB,EAAE,GAAG,IAAI,CAAC;CACrC;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACxB,qBAAqB;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,sCAAsC;IACtC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,yBAAyB;IACzB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,oBAAoB;IACpB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,sCAAsC;IACtC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,2CAA2C;IAC3C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oDAAoD;IACpD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,oCAAoC;IACpC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,0CAA0C;IAC1C,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,6BAA6B;IAC7B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,+BAA+B;IAC/B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB;;;OAGG;IACH,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,KAAK;IACrB,uBAAuB;IACvB,KAAK,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;IACnB,oCAAoC;IACpC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,+BAA+B;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,oBAAoB;IACpB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,iBAAiB;IACjB,IAAI,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC7B,mDAAmD;IACnD,SAAS,EAAE,MAAM,CAAC;IAClB,iDAAiD;IACjD,OAAO,EAAE,MAAM,CAAC;IAChB,+BAA+B;IAC/B,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,0BAA0B;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,6BAA6B;IAC7B,WAAW,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,KAAK;IACrB,yBAAyB;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,qBAAqB;IACrB,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,sDAAsD;IACtD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,wCAAwC;IACxC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,mCAAmC;IACnC,SAAS,CAAC,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC9B,gDAAgD;IAChD,IAAI,EAAE,UAAU,GAAG,MAAM,CAAC;IAC1B,6BAA6B;IAC7B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,6BAA6B;IAC7B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,8BAA8B;IAC9B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,+BAA+B;IAC/B,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,4BAA4B;IAC5B,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,6BAA6B;IAC7B,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,+BAA+B;IAC/B,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,+BAA+B;IAC/B,gBAAgB,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,mCAAmC;IACnC,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,wBAAwB;IACxB,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,yCAAyC;IACzC,SAAS,CAAC,EAAE,gBAAgB,GAAG,MAAM,GAAG,IAAI,CAAC;CAC7C;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC3B,8BAA8B;IAC9B,UAAU,EAAE,MAAM,CAAC;IACnB,+BAA+B;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,0BAA0B;IAC1B,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;IACjB,0BAA0B;IAC1B,MAAM,CAAC,EAAE,cAAc,EAAE,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IAClC,2BAA2B;IAC3B,IAAI,IAAI,MAAM,CAAC;IACf,mCAAmC;IACnC,kBAAkB,CAAC,IAAI,MAAM,EAAE,CAAC;IAChC,6BAA6B;IAC7B,UAAU,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACpE,2BAA2B;IAC3B,QAAQ,CAAC,IAAI,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAClC,gCAAgC;IAChC,YAAY,CACX,SAAS,EAAE,UAAU,GAAG,MAAM,EAC9B,QAAQ,CAAC,EAAE,MAAM,GACf,OAAO,CACP;QACA,OAAO,EAAE,MAAM,CAAC;QAChB,SAAS,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACnC,MAAM,CAAC,EAAE,OAAO,EAAE,CAAC;KAClB,GACD,MAAM,CACR,CAAC;CACF"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kreuzberg/wasm",
3
- "version": "4.0.0-rc.23",
3
+ "version": "4.0.0-rc.24",
4
4
  "type": "module",
5
5
  "packageManager": "pnpm@10.17.0",
6
6
  "description": "Kreuzberg document intelligence - WebAssembly bindings",
@@ -13,81 +13,36 @@
13
13
  "bugs": {
14
14
  "url": "https://github.com/kreuzberg-dev/kreuzberg/issues"
15
15
  },
16
- "main": "dist/index.cjs",
16
+ "main": "dist/index.js",
17
17
  "module": "dist/index.js",
18
18
  "types": "dist/index.d.ts",
19
19
  "exports": {
20
20
  ".": {
21
- "browser": {
22
- "import": {
23
- "types": "./dist/index.d.ts",
24
- "default": "./dist/index.js"
25
- },
26
- "require": {
27
- "types": "./dist/index.d.cts",
28
- "default": "./dist/index.cjs"
29
- }
30
- },
31
- "node": {
32
- "import": {
33
- "types": "./dist/index.d.ts",
34
- "default": "./dist/index.js"
35
- },
36
- "require": {
37
- "types": "./dist/index.d.cts",
38
- "default": "./dist/index.cjs"
39
- }
40
- },
41
- "import": {
42
- "types": "./dist/index.d.ts",
43
- "default": "./dist/index.js"
44
- },
45
- "require": {
46
- "types": "./dist/index.d.cts",
47
- "default": "./dist/index.cjs"
48
- }
21
+ "types": "./dist/index.d.ts",
22
+ "import": "./dist/index.js",
23
+ "default": "./dist/index.js"
49
24
  },
50
25
  "./dist/pkg/*": "./dist/pkg/*",
51
26
  "./dist/kreuzberg_wasm.js": "./dist/kreuzberg_wasm.js",
52
27
  "./runtime": {
53
- "import": {
54
- "types": "./dist/runtime.d.ts",
55
- "default": "./dist/runtime.js"
56
- },
57
- "require": {
58
- "types": "./dist/runtime.d.cts",
59
- "default": "./dist/runtime.cjs"
60
- }
28
+ "types": "./dist/runtime.d.ts",
29
+ "import": "./dist/runtime.js",
30
+ "default": "./dist/runtime.js"
61
31
  },
62
32
  "./adapters/wasm-adapter": {
63
- "import": {
64
- "types": "./dist/adapters/wasm-adapter.d.ts",
65
- "default": "./dist/adapters/wasm-adapter.js"
66
- },
67
- "require": {
68
- "types": "./dist/adapters/wasm-adapter.d.cts",
69
- "default": "./dist/adapters/wasm-adapter.cjs"
70
- }
33
+ "types": "./dist/adapters/wasm-adapter.d.ts",
34
+ "import": "./dist/adapters/wasm-adapter.js",
35
+ "default": "./dist/adapters/wasm-adapter.js"
71
36
  },
72
37
  "./ocr/registry": {
73
- "import": {
74
- "types": "./dist/ocr/registry.d.ts",
75
- "default": "./dist/ocr/registry.js"
76
- },
77
- "require": {
78
- "types": "./dist/ocr/registry.d.cts",
79
- "default": "./dist/ocr/registry.cjs"
80
- }
38
+ "types": "./dist/ocr/registry.d.ts",
39
+ "import": "./dist/ocr/registry.js",
40
+ "default": "./dist/ocr/registry.js"
81
41
  },
82
42
  "./ocr/tesseract-wasm-backend": {
83
- "import": {
84
- "types": "./dist/ocr/tesseract-wasm-backend.d.ts",
85
- "default": "./dist/ocr/tesseract-wasm-backend.js"
86
- },
87
- "require": {
88
- "types": "./dist/ocr/tesseract-wasm-backend.d.cts",
89
- "default": "./dist/ocr/tesseract-wasm-backend.cjs"
90
- }
43
+ "types": "./dist/ocr/tesseract-wasm-backend.d.ts",
44
+ "import": "./dist/ocr/tesseract-wasm-backend.js",
45
+ "default": "./dist/ocr/tesseract-wasm-backend.js"
91
46
  }
92
47
  },
93
48
  "repository": {
@@ -134,9 +89,11 @@
134
89
  "build:wasm:bundler": "wasm-pack build --target bundler --out-dir pkg --release",
135
90
  "build:wasm:nodejs": "wasm-pack build --target nodejs --out-dir pkg --release",
136
91
  "build:wasm:deno": "wasm-pack build --target deno --out-dir pkg --release",
137
- "build:ts": "tsup && node scripts/fix-type-exports.js",
92
+ "build:ts": "tsup && tsc --emitDeclarationOnly && node scripts/fix-type-exports.js",
93
+ "build:ts:minify": "tsup --minify && tsc --emitDeclarationOnly && node scripts/fix-type-exports.js",
138
94
  "copy:pkg": "node scripts/copy-pkg.js",
139
95
  "build": "npm run build:wasm:nodejs && npm run build:ts && npm run copy:pkg",
96
+ "build:ci": "npm run build:wasm:nodejs && npm run build:ts:minify && npm run copy:pkg",
140
97
  "build:all": "npm run build:wasm:web && npm run build:wasm:bundler && npm run build:wasm:nodejs && npm run build:wasm:deno && npm run build:ts && npm run copy:pkg",
141
98
  "typecheck": "tsc --noEmit",
142
99
  "lint": "biome check typescript && oxlint typescript",
@@ -1,245 +0,0 @@
1
- "use strict";
2
- var __defProp = Object.defineProperty;
3
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
- var __getOwnPropNames = Object.getOwnPropertyNames;
5
- var __hasOwnProp = Object.prototype.hasOwnProperty;
6
- var __export = (target, all) => {
7
- for (var name in all)
8
- __defProp(target, name, { get: all[name], enumerable: true });
9
- };
10
- var __copyProps = (to, from, except, desc) => {
11
- if (from && typeof from === "object" || typeof from === "function") {
12
- for (let key of __getOwnPropNames(from))
13
- if (!__hasOwnProp.call(to, key) && key !== except)
14
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
- }
16
- return to;
17
- };
18
- var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
-
20
- // typescript/adapters/wasm-adapter.ts
21
- var wasm_adapter_exports = {};
22
- __export(wasm_adapter_exports, {
23
- configToJS: () => configToJS,
24
- fileToUint8Array: () => fileToUint8Array,
25
- isValidExtractionResult: () => isValidExtractionResult,
26
- jsToExtractionResult: () => jsToExtractionResult,
27
- wrapWasmError: () => wrapWasmError
28
- });
29
- module.exports = __toCommonJS(wasm_adapter_exports);
30
- var MAX_FILE_SIZE = 512 * 1024 * 1024;
31
- function isNumberOrNull(value) {
32
- return typeof value === "number" || value === null;
33
- }
34
- function isStringOrNull(value) {
35
- return typeof value === "string" || value === null;
36
- }
37
- function isBoolean(value) {
38
- return typeof value === "boolean";
39
- }
40
- async function fileToUint8Array(file) {
41
- try {
42
- if (file.size > MAX_FILE_SIZE) {
43
- throw new Error(
44
- `File size (${file.size} bytes) exceeds maximum (${MAX_FILE_SIZE} bytes). Maximum file size is 512 MB.`
45
- );
46
- }
47
- const arrayBuffer = await file.arrayBuffer();
48
- return new Uint8Array(arrayBuffer);
49
- } catch (error) {
50
- throw new Error(`Failed to read file: ${error instanceof Error ? error.message : String(error)}`);
51
- }
52
- }
53
- function configToJS(config) {
54
- if (!config) {
55
- return {};
56
- }
57
- const normalized = {};
58
- const normalizeValue = (value) => {
59
- if (value === null || value === void 0) {
60
- return null;
61
- }
62
- if (typeof value === "object") {
63
- if (Array.isArray(value)) {
64
- return value.map(normalizeValue);
65
- }
66
- const obj = value;
67
- const normalized2 = {};
68
- for (const [key, val] of Object.entries(obj)) {
69
- const normalizedVal = normalizeValue(val);
70
- if (normalizedVal !== null && normalizedVal !== void 0) {
71
- normalized2[key] = normalizedVal;
72
- }
73
- }
74
- return Object.keys(normalized2).length > 0 ? normalized2 : null;
75
- }
76
- return value;
77
- };
78
- for (const [key, value] of Object.entries(config)) {
79
- const normalizedValue = normalizeValue(value);
80
- if (normalizedValue !== null && normalizedValue !== void 0) {
81
- normalized[key] = normalizedValue;
82
- }
83
- }
84
- return normalized;
85
- }
86
- function jsToExtractionResult(jsValue) {
87
- if (!jsValue || typeof jsValue !== "object") {
88
- throw new Error("Invalid extraction result: value is not an object");
89
- }
90
- const result = jsValue;
91
- const mimeType = typeof result.mimeType === "string" ? result.mimeType : typeof result.mime_type === "string" ? result.mime_type : null;
92
- if (typeof result.content !== "string") {
93
- throw new Error("Invalid extraction result: missing or invalid content");
94
- }
95
- if (typeof mimeType !== "string") {
96
- throw new Error("Invalid extraction result: missing or invalid mimeType");
97
- }
98
- if (!result.metadata || typeof result.metadata !== "object") {
99
- throw new Error("Invalid extraction result: missing or invalid metadata");
100
- }
101
- const tables = [];
102
- if (Array.isArray(result.tables)) {
103
- for (const table of result.tables) {
104
- if (table && typeof table === "object") {
105
- const t = table;
106
- if (Array.isArray(t.cells) && t.cells.every((row) => Array.isArray(row) && row.every((cell) => typeof cell === "string")) && typeof t.markdown === "string" && typeof t.pageNumber === "number") {
107
- tables.push({
108
- cells: t.cells,
109
- markdown: t.markdown,
110
- pageNumber: t.pageNumber
111
- });
112
- }
113
- }
114
- }
115
- }
116
- const chunks = Array.isArray(result.chunks) ? result.chunks.map((chunk) => {
117
- if (!chunk || typeof chunk !== "object") {
118
- throw new Error("Invalid chunk structure");
119
- }
120
- const c = chunk;
121
- if (typeof c.content !== "string") {
122
- throw new Error("Invalid chunk: missing content");
123
- }
124
- if (!c.metadata || typeof c.metadata !== "object") {
125
- throw new Error("Invalid chunk: missing metadata");
126
- }
127
- const metadata = c.metadata;
128
- let embedding = null;
129
- if (Array.isArray(c.embedding)) {
130
- if (!c.embedding.every((item) => typeof item === "number")) {
131
- throw new Error("Invalid chunk: embedding must contain only numbers");
132
- }
133
- embedding = c.embedding;
134
- }
135
- if (typeof metadata.charStart !== "number") {
136
- throw new Error("Invalid chunk metadata: charStart must be a number");
137
- }
138
- if (typeof metadata.charEnd !== "number") {
139
- throw new Error("Invalid chunk metadata: charEnd must be a number");
140
- }
141
- if (!isNumberOrNull(metadata.tokenCount)) {
142
- throw new Error("Invalid chunk metadata: tokenCount must be a number or null");
143
- }
144
- if (typeof metadata.chunkIndex !== "number") {
145
- throw new Error("Invalid chunk metadata: chunkIndex must be a number");
146
- }
147
- if (typeof metadata.totalChunks !== "number") {
148
- throw new Error("Invalid chunk metadata: totalChunks must be a number");
149
- }
150
- return {
151
- content: c.content,
152
- embedding,
153
- metadata: {
154
- charStart: metadata.charStart,
155
- charEnd: metadata.charEnd,
156
- tokenCount: metadata.tokenCount,
157
- chunkIndex: metadata.chunkIndex,
158
- totalChunks: metadata.totalChunks
159
- }
160
- };
161
- }) : null;
162
- const images = Array.isArray(result.images) ? result.images.map((image) => {
163
- if (!image || typeof image !== "object") {
164
- throw new Error("Invalid image structure");
165
- }
166
- const img = image;
167
- if (!(img.data instanceof Uint8Array)) {
168
- throw new Error("Invalid image: data must be Uint8Array");
169
- }
170
- if (typeof img.format !== "string") {
171
- throw new Error("Invalid image: missing format");
172
- }
173
- if (typeof img.imageIndex !== "number") {
174
- throw new Error("Invalid image: imageIndex must be a number");
175
- }
176
- if (!isNumberOrNull(img.pageNumber)) {
177
- throw new Error("Invalid image: pageNumber must be a number or null");
178
- }
179
- if (!isNumberOrNull(img.width)) {
180
- throw new Error("Invalid image: width must be a number or null");
181
- }
182
- if (!isNumberOrNull(img.height)) {
183
- throw new Error("Invalid image: height must be a number or null");
184
- }
185
- if (!isNumberOrNull(img.bitsPerComponent)) {
186
- throw new Error("Invalid image: bitsPerComponent must be a number or null");
187
- }
188
- if (!isBoolean(img.isMask)) {
189
- throw new Error("Invalid image: isMask must be a boolean");
190
- }
191
- if (!isStringOrNull(img.colorspace)) {
192
- throw new Error("Invalid image: colorspace must be a string or null");
193
- }
194
- if (!isStringOrNull(img.description)) {
195
- throw new Error("Invalid image: description must be a string or null");
196
- }
197
- return {
198
- data: img.data,
199
- format: img.format,
200
- imageIndex: img.imageIndex,
201
- pageNumber: img.pageNumber,
202
- width: img.width,
203
- height: img.height,
204
- colorspace: img.colorspace,
205
- bitsPerComponent: img.bitsPerComponent,
206
- isMask: img.isMask,
207
- description: img.description,
208
- ocrResult: img.ocrResult ? jsToExtractionResult(img.ocrResult) : null
209
- };
210
- }) : null;
211
- let detectedLanguages = null;
212
- const detectedLanguagesRaw = Array.isArray(result.detectedLanguages) ? result.detectedLanguages : result.detected_languages;
213
- if (Array.isArray(detectedLanguagesRaw)) {
214
- if (!detectedLanguagesRaw.every((lang) => typeof lang === "string")) {
215
- throw new Error("Invalid result: detectedLanguages must contain only strings");
216
- }
217
- detectedLanguages = detectedLanguagesRaw;
218
- }
219
- return {
220
- content: result.content,
221
- mimeType,
222
- metadata: result.metadata ?? {},
223
- tables,
224
- detectedLanguages,
225
- chunks,
226
- images
227
- };
228
- }
229
- function wrapWasmError(error, context) {
230
- if (error instanceof Error) {
231
- return new Error(`Error ${context}: ${error.message}`, {
232
- cause: error
233
- });
234
- }
235
- const message = String(error);
236
- return new Error(`Error ${context}: ${message}`);
237
- }
238
- function isValidExtractionResult(value) {
239
- if (!value || typeof value !== "object") {
240
- return false;
241
- }
242
- const obj = value;
243
- return typeof obj.content === "string" && (typeof obj.mimeType === "string" || typeof obj.mime_type === "string") && obj.metadata !== null && typeof obj.metadata === "object" && Array.isArray(obj.tables);
244
- }
245
- //# sourceMappingURL=wasm-adapter.cjs.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../../typescript/adapters/wasm-adapter.ts"],"sourcesContent":["/**\n * WASM Type Adapter\n *\n * This module provides type adapters for converting between JavaScript/TypeScript\n * types and WASM-compatible types, handling File/Blob conversions, config normalization,\n * and result parsing.\n *\n * @example File Conversion\n * ```typescript\n * import { fileToUint8Array } from '@kreuzberg/wasm/adapters/wasm-adapter';\n *\n * const file = event.target.files[0];\n * const bytes = await fileToUint8Array(file);\n * const result = await extractBytes(bytes, file.type);\n * ```\n *\n * @example Config Normalization\n * ```typescript\n * import { configToJS } from '@kreuzberg/wasm/adapters/wasm-adapter';\n *\n * const config = {\n * ocr: { backend: 'tesseract', language: 'eng' },\n * chunking: { maxChars: 1000 }\n * };\n * const normalized = configToJS(config);\n * ```\n */\n\nimport type { Chunk, ExtractedImage, ExtractionConfig, ExtractionResult, Metadata, Table } from \"../types.js\";\n\n/**\n * Maximum file size for processing (512 MB)\n *\n * @internal\n */\nconst MAX_FILE_SIZE = 512 * 1024 * 1024;\n\n/**\n * Type predicate to validate numeric value or null\n *\n * @internal\n */\nfunction isNumberOrNull(value: unknown): value is number | null {\n\treturn typeof value === \"number\" || value === null;\n}\n\n/**\n * Type predicate to validate string value or null\n *\n * @internal\n */\nfunction isStringOrNull(value: unknown): value is string | null {\n\treturn typeof value === \"string\" || value === null;\n}\n\n/**\n * Type predicate to validate boolean value\n *\n * @internal\n */\nfunction isBoolean(value: unknown): value is boolean {\n\treturn typeof value === \"boolean\";\n}\n\n/**\n * Convert a File or Blob to Uint8Array\n *\n * Handles both browser File API and server-side Blob-like objects,\n * providing a unified interface for reading binary data.\n *\n * @param file - The File or Blob to convert\n * @returns Promise resolving to the byte array\n * @throws {Error} If the file cannot be read or exceeds size limit\n *\n * @example\n * ```typescript\n * const file = document.getElementById('input').files[0];\n * const bytes = await fileToUint8Array(file);\n * const result = await extractBytes(bytes, 'application/pdf');\n * ```\n */\nexport async function fileToUint8Array(file: File | Blob): Promise<Uint8Array> {\n\ttry {\n\t\tif (file.size > MAX_FILE_SIZE) {\n\t\t\tthrow new Error(\n\t\t\t\t`File size (${file.size} bytes) exceeds maximum (${MAX_FILE_SIZE} bytes). Maximum file size is 512 MB.`,\n\t\t\t);\n\t\t}\n\n\t\tconst arrayBuffer = await file.arrayBuffer();\n\t\treturn new Uint8Array(arrayBuffer);\n\t} catch (error) {\n\t\tthrow new Error(`Failed to read file: ${error instanceof Error ? error.message : String(error)}`);\n\t}\n}\n\n/**\n * Normalize ExtractionConfig for WASM processing\n *\n * Converts TypeScript configuration objects to a WASM-compatible format,\n * handling null values, undefined properties, and nested structures.\n *\n * @param config - The extraction configuration or null\n * @returns Normalized configuration object suitable for WASM\n *\n * @example\n * ```typescript\n * const config: ExtractionConfig = {\n * ocr: { backend: 'tesseract' },\n * chunking: { maxChars: 1000 }\n * };\n * const wasmConfig = configToJS(config);\n * ```\n */\nexport function configToJS(config: ExtractionConfig | null): Record<string, unknown> {\n\tif (!config) {\n\t\treturn {};\n\t}\n\n\tconst normalized: Record<string, unknown> = {};\n\n\tconst normalizeValue = (value: unknown): unknown => {\n\t\tif (value === null || value === undefined) {\n\t\t\treturn null;\n\t\t}\n\t\tif (typeof value === \"object\") {\n\t\t\tif (Array.isArray(value)) {\n\t\t\t\treturn value.map(normalizeValue);\n\t\t\t}\n\t\t\tconst obj = value as Record<string, unknown>;\n\t\t\tconst normalized: Record<string, unknown> = {};\n\t\t\tfor (const [key, val] of Object.entries(obj)) {\n\t\t\t\tconst normalizedVal = normalizeValue(val);\n\t\t\t\tif (normalizedVal !== null && normalizedVal !== undefined) {\n\t\t\t\t\tnormalized[key] = normalizedVal;\n\t\t\t\t}\n\t\t\t}\n\t\t\treturn Object.keys(normalized).length > 0 ? normalized : null;\n\t\t}\n\t\treturn value;\n\t};\n\n\tfor (const [key, value] of Object.entries(config)) {\n\t\tconst normalizedValue = normalizeValue(value);\n\t\tif (normalizedValue !== null && normalizedValue !== undefined) {\n\t\t\tnormalized[key] = normalizedValue;\n\t\t}\n\t}\n\n\treturn normalized;\n}\n\n/**\n * Parse WASM extraction result and convert to TypeScript type\n *\n * Handles conversion of WASM-returned objects to proper ExtractionResult types,\n * including proper array conversions and type assertions for tables, chunks, and images.\n *\n * @param jsValue - The raw WASM result value\n * @returns Properly typed ExtractionResult\n * @throws {Error} If the result structure is invalid\n *\n * @example\n * ```typescript\n * const wasmResult = await wasmExtract(bytes, mimeType, config);\n * const result = jsToExtractionResult(wasmResult);\n * console.log(result.content);\n * ```\n */\nexport function jsToExtractionResult(jsValue: unknown): ExtractionResult {\n\tif (!jsValue || typeof jsValue !== \"object\") {\n\t\tthrow new Error(\"Invalid extraction result: value is not an object\");\n\t}\n\n\tconst result = jsValue as Record<string, unknown>;\n\tconst mimeType =\n\t\ttypeof result.mimeType === \"string\"\n\t\t\t? result.mimeType\n\t\t\t: typeof result.mime_type === \"string\"\n\t\t\t\t? result.mime_type\n\t\t\t\t: null;\n\n\tif (typeof result.content !== \"string\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid content\");\n\t}\n\tif (typeof mimeType !== \"string\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid mimeType\");\n\t}\n\tif (!result.metadata || typeof result.metadata !== \"object\") {\n\t\tthrow new Error(\"Invalid extraction result: missing or invalid metadata\");\n\t}\n\n\tconst tables: Table[] = [];\n\tif (Array.isArray(result.tables)) {\n\t\tfor (const table of result.tables) {\n\t\t\tif (table && typeof table === \"object\") {\n\t\t\t\tconst t = table as Record<string, unknown>;\n\t\t\t\tif (\n\t\t\t\t\tArray.isArray(t.cells) &&\n\t\t\t\t\tt.cells.every((row) => Array.isArray(row) && row.every((cell) => typeof cell === \"string\")) &&\n\t\t\t\t\ttypeof t.markdown === \"string\" &&\n\t\t\t\t\ttypeof t.pageNumber === \"number\"\n\t\t\t\t) {\n\t\t\t\t\ttables.push({\n\t\t\t\t\t\tcells: t.cells as string[][],\n\t\t\t\t\t\tmarkdown: t.markdown,\n\t\t\t\t\t\tpageNumber: t.pageNumber,\n\t\t\t\t\t});\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\tconst chunks: Chunk[] | null = Array.isArray(result.chunks)\n\t\t? result.chunks.map((chunk) => {\n\t\t\t\tif (!chunk || typeof chunk !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk structure\");\n\t\t\t\t}\n\t\t\t\tconst c = chunk as Record<string, unknown>;\n\t\t\t\tif (typeof c.content !== \"string\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk: missing content\");\n\t\t\t\t}\n\t\t\t\tif (!c.metadata || typeof c.metadata !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk: missing metadata\");\n\t\t\t\t}\n\t\t\t\tconst metadata = c.metadata as Record<string, unknown>;\n\n\t\t\t\tlet embedding: number[] | null = null;\n\t\t\t\tif (Array.isArray(c.embedding)) {\n\t\t\t\t\tif (!c.embedding.every((item) => typeof item === \"number\")) {\n\t\t\t\t\t\tthrow new Error(\"Invalid chunk: embedding must contain only numbers\");\n\t\t\t\t\t}\n\t\t\t\t\tembedding = c.embedding;\n\t\t\t\t}\n\n\t\t\t\tif (typeof metadata.charStart !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk metadata: charStart must be a number\");\n\t\t\t\t}\n\t\t\t\tif (typeof metadata.charEnd !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk metadata: charEnd must be a number\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(metadata.tokenCount)) {\n\t\t\t\t\tthrow new Error(\"Invalid chunk metadata: tokenCount must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (typeof metadata.chunkIndex !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk metadata: chunkIndex must be a number\");\n\t\t\t\t}\n\t\t\t\tif (typeof metadata.totalChunks !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid chunk metadata: totalChunks must be a number\");\n\t\t\t\t}\n\n\t\t\t\treturn {\n\t\t\t\t\tcontent: c.content,\n\t\t\t\t\tembedding,\n\t\t\t\t\tmetadata: {\n\t\t\t\t\t\tcharStart: metadata.charStart,\n\t\t\t\t\t\tcharEnd: metadata.charEnd,\n\t\t\t\t\t\ttokenCount: metadata.tokenCount,\n\t\t\t\t\t\tchunkIndex: metadata.chunkIndex,\n\t\t\t\t\t\ttotalChunks: metadata.totalChunks,\n\t\t\t\t\t},\n\t\t\t\t};\n\t\t\t})\n\t\t: null;\n\n\tconst images: ExtractedImage[] | null = Array.isArray(result.images)\n\t\t? result.images.map((image) => {\n\t\t\t\tif (!image || typeof image !== \"object\") {\n\t\t\t\t\tthrow new Error(\"Invalid image structure\");\n\t\t\t\t}\n\t\t\t\tconst img = image as Record<string, unknown>;\n\t\t\t\tif (!(img.data instanceof Uint8Array)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: data must be Uint8Array\");\n\t\t\t\t}\n\t\t\t\tif (typeof img.format !== \"string\") {\n\t\t\t\t\tthrow new Error(\"Invalid image: missing format\");\n\t\t\t\t}\n\n\t\t\t\tif (typeof img.imageIndex !== \"number\") {\n\t\t\t\t\tthrow new Error(\"Invalid image: imageIndex must be a number\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.pageNumber)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: pageNumber must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.width)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: width must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.height)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: height must be a number or null\");\n\t\t\t\t}\n\t\t\t\tif (!isNumberOrNull(img.bitsPerComponent)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: bitsPerComponent must be a number or null\");\n\t\t\t\t}\n\n\t\t\t\tif (!isBoolean(img.isMask)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: isMask must be a boolean\");\n\t\t\t\t}\n\n\t\t\t\tif (!isStringOrNull(img.colorspace)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: colorspace must be a string or null\");\n\t\t\t\t}\n\t\t\t\tif (!isStringOrNull(img.description)) {\n\t\t\t\t\tthrow new Error(\"Invalid image: description must be a string or null\");\n\t\t\t\t}\n\n\t\t\t\treturn {\n\t\t\t\t\tdata: img.data,\n\t\t\t\t\tformat: img.format,\n\t\t\t\t\timageIndex: img.imageIndex,\n\t\t\t\t\tpageNumber: img.pageNumber,\n\t\t\t\t\twidth: img.width,\n\t\t\t\t\theight: img.height,\n\t\t\t\t\tcolorspace: img.colorspace,\n\t\t\t\t\tbitsPerComponent: img.bitsPerComponent,\n\t\t\t\t\tisMask: img.isMask,\n\t\t\t\t\tdescription: img.description,\n\t\t\t\t\tocrResult: img.ocrResult ? jsToExtractionResult(img.ocrResult) : null,\n\t\t\t\t};\n\t\t\t})\n\t\t: null;\n\n\tlet detectedLanguages: string[] | null = null;\n\tconst detectedLanguagesRaw = Array.isArray(result.detectedLanguages)\n\t\t? result.detectedLanguages\n\t\t: result.detected_languages;\n\tif (Array.isArray(detectedLanguagesRaw)) {\n\t\tif (!detectedLanguagesRaw.every((lang) => typeof lang === \"string\")) {\n\t\t\tthrow new Error(\"Invalid result: detectedLanguages must contain only strings\");\n\t\t}\n\t\tdetectedLanguages = detectedLanguagesRaw;\n\t}\n\n\treturn {\n\t\tcontent: result.content,\n\t\tmimeType,\n\t\tmetadata: (result.metadata ?? {}) as Metadata,\n\t\ttables,\n\t\tdetectedLanguages,\n\t\tchunks,\n\t\timages,\n\t};\n}\n\n/**\n * Wrap and format WASM errors with context\n *\n * Converts WASM error messages to JavaScript Error objects with proper context\n * and stack trace information when available.\n *\n * @param error - The error from WASM\n * @param context - Additional context about what operation failed\n * @returns A formatted Error object\n *\n * @internal\n *\n * @example\n * ```typescript\n * try {\n * await wasmExtract(bytes, mimeType);\n * } catch (error) {\n * throw wrapWasmError(error, 'extracting document');\n * }\n * ```\n */\nexport function wrapWasmError(error: unknown, context: string): Error {\n\tif (error instanceof Error) {\n\t\treturn new Error(`Error ${context}: ${error.message}`, {\n\t\t\tcause: error,\n\t\t});\n\t}\n\n\tconst message = String(error);\n\treturn new Error(`Error ${context}: ${message}`);\n}\n\n/**\n * Validate that a WASM-returned value conforms to ExtractionResult structure\n *\n * Performs structural validation without full type checking,\n * useful for runtime validation of WASM output.\n *\n * @param value - The value to validate\n * @returns True if value appears to be a valid ExtractionResult\n *\n * @internal\n */\nexport function isValidExtractionResult(value: unknown): value is ExtractionResult {\n\tif (!value || typeof value !== \"object\") {\n\t\treturn false;\n\t}\n\n\tconst obj = value as Record<string, unknown>;\n\treturn (\n\t\ttypeof obj.content === \"string\" &&\n\t\t(typeof obj.mimeType === \"string\" || typeof obj.mime_type === \"string\") &&\n\t\tobj.metadata !== null &&\n\t\ttypeof obj.metadata === \"object\" &&\n\t\tArray.isArray(obj.tables)\n\t);\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAmCA,IAAM,gBAAgB,MAAM,OAAO;AAOnC,SAAS,eAAe,OAAwC;AAC/D,SAAO,OAAO,UAAU,YAAY,UAAU;AAC/C;AAOA,SAAS,eAAe,OAAwC;AAC/D,SAAO,OAAO,UAAU,YAAY,UAAU;AAC/C;AAOA,SAAS,UAAU,OAAkC;AACpD,SAAO,OAAO,UAAU;AACzB;AAmBA,eAAsB,iBAAiB,MAAwC;AAC9E,MAAI;AACH,QAAI,KAAK,OAAO,eAAe;AAC9B,YAAM,IAAI;AAAA,QACT,cAAc,KAAK,IAAI,4BAA4B,aAAa;AAAA,MACjE;AAAA,IACD;AAEA,UAAM,cAAc,MAAM,KAAK,YAAY;AAC3C,WAAO,IAAI,WAAW,WAAW;AAAA,EAClC,SAAS,OAAO;AACf,UAAM,IAAI,MAAM,wBAAwB,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC,EAAE;AAAA,EACjG;AACD;AAoBO,SAAS,WAAW,QAA0D;AACpF,MAAI,CAAC,QAAQ;AACZ,WAAO,CAAC;AAAA,EACT;AAEA,QAAM,aAAsC,CAAC;AAE7C,QAAM,iBAAiB,CAAC,UAA4B;AACnD,QAAI,UAAU,QAAQ,UAAU,QAAW;AAC1C,aAAO;AAAA,IACR;AACA,QAAI,OAAO,UAAU,UAAU;AAC9B,UAAI,MAAM,QAAQ,KAAK,GAAG;AACzB,eAAO,MAAM,IAAI,cAAc;AAAA,MAChC;AACA,YAAM,MAAM;AACZ,YAAMA,cAAsC,CAAC;AAC7C,iBAAW,CAAC,KAAK,GAAG,KAAK,OAAO,QAAQ,GAAG,GAAG;AAC7C,cAAM,gBAAgB,eAAe,GAAG;AACxC,YAAI,kBAAkB,QAAQ,kBAAkB,QAAW;AAC1D,UAAAA,YAAW,GAAG,IAAI;AAAA,QACnB;AAAA,MACD;AACA,aAAO,OAAO,KAAKA,WAAU,EAAE,SAAS,IAAIA,cAAa;AAAA,IAC1D;AACA,WAAO;AAAA,EACR;AAEA,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,MAAM,GAAG;AAClD,UAAM,kBAAkB,eAAe,KAAK;AAC5C,QAAI,oBAAoB,QAAQ,oBAAoB,QAAW;AAC9D,iBAAW,GAAG,IAAI;AAAA,IACnB;AAAA,EACD;AAEA,SAAO;AACR;AAmBO,SAAS,qBAAqB,SAAoC;AACxE,MAAI,CAAC,WAAW,OAAO,YAAY,UAAU;AAC5C,UAAM,IAAI,MAAM,mDAAmD;AAAA,EACpE;AAEA,QAAM,SAAS;AACf,QAAM,WACL,OAAO,OAAO,aAAa,WACxB,OAAO,WACP,OAAO,OAAO,cAAc,WAC3B,OAAO,YACP;AAEL,MAAI,OAAO,OAAO,YAAY,UAAU;AACvC,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACxE;AACA,MAAI,OAAO,aAAa,UAAU;AACjC,UAAM,IAAI,MAAM,wDAAwD;AAAA,EACzE;AACA,MAAI,CAAC,OAAO,YAAY,OAAO,OAAO,aAAa,UAAU;AAC5D,UAAM,IAAI,MAAM,wDAAwD;AAAA,EACzE;AAEA,QAAM,SAAkB,CAAC;AACzB,MAAI,MAAM,QAAQ,OAAO,MAAM,GAAG;AACjC,eAAW,SAAS,OAAO,QAAQ;AAClC,UAAI,SAAS,OAAO,UAAU,UAAU;AACvC,cAAM,IAAI;AACV,YACC,MAAM,QAAQ,EAAE,KAAK,KACrB,EAAE,MAAM,MAAM,CAAC,QAAQ,MAAM,QAAQ,GAAG,KAAK,IAAI,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,CAAC,KAC1F,OAAO,EAAE,aAAa,YACtB,OAAO,EAAE,eAAe,UACvB;AACD,iBAAO,KAAK;AAAA,YACX,OAAO,EAAE;AAAA,YACT,UAAU,EAAE;AAAA,YACZ,YAAY,EAAE;AAAA,UACf,CAAC;AAAA,QACF;AAAA,MACD;AAAA,IACD;AAAA,EACD;AAEA,QAAM,SAAyB,MAAM,QAAQ,OAAO,MAAM,IACvD,OAAO,OAAO,IAAI,CAAC,UAAU;AAC7B,QAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC1C;AACA,UAAM,IAAI;AACV,QAAI,OAAO,EAAE,YAAY,UAAU;AAClC,YAAM,IAAI,MAAM,gCAAgC;AAAA,IACjD;AACA,QAAI,CAAC,EAAE,YAAY,OAAO,EAAE,aAAa,UAAU;AAClD,YAAM,IAAI,MAAM,iCAAiC;AAAA,IAClD;AACA,UAAM,WAAW,EAAE;AAEnB,QAAI,YAA6B;AACjC,QAAI,MAAM,QAAQ,EAAE,SAAS,GAAG;AAC/B,UAAI,CAAC,EAAE,UAAU,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,GAAG;AAC3D,cAAM,IAAI,MAAM,oDAAoD;AAAA,MACrE;AACA,kBAAY,EAAE;AAAA,IACf;AAEA,QAAI,OAAO,SAAS,cAAc,UAAU;AAC3C,YAAM,IAAI,MAAM,oDAAoD;AAAA,IACrE;AACA,QAAI,OAAO,SAAS,YAAY,UAAU;AACzC,YAAM,IAAI,MAAM,kDAAkD;AAAA,IACnE;AACA,QAAI,CAAC,eAAe,SAAS,UAAU,GAAG;AACzC,YAAM,IAAI,MAAM,6DAA6D;AAAA,IAC9E;AACA,QAAI,OAAO,SAAS,eAAe,UAAU;AAC5C,YAAM,IAAI,MAAM,qDAAqD;AAAA,IACtE;AACA,QAAI,OAAO,SAAS,gBAAgB,UAAU;AAC7C,YAAM,IAAI,MAAM,sDAAsD;AAAA,IACvE;AAEA,WAAO;AAAA,MACN,SAAS,EAAE;AAAA,MACX;AAAA,MACA,UAAU;AAAA,QACT,WAAW,SAAS;AAAA,QACpB,SAAS,SAAS;AAAA,QAClB,YAAY,SAAS;AAAA,QACrB,YAAY,SAAS;AAAA,QACrB,aAAa,SAAS;AAAA,MACvB;AAAA,IACD;AAAA,EACD,CAAC,IACA;AAEH,QAAM,SAAkC,MAAM,QAAQ,OAAO,MAAM,IAChE,OAAO,OAAO,IAAI,CAAC,UAAU;AAC7B,QAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,YAAM,IAAI,MAAM,yBAAyB;AAAA,IAC1C;AACA,UAAM,MAAM;AACZ,QAAI,EAAE,IAAI,gBAAgB,aAAa;AACtC,YAAM,IAAI,MAAM,wCAAwC;AAAA,IACzD;AACA,QAAI,OAAO,IAAI,WAAW,UAAU;AACnC,YAAM,IAAI,MAAM,+BAA+B;AAAA,IAChD;AAEA,QAAI,OAAO,IAAI,eAAe,UAAU;AACvC,YAAM,IAAI,MAAM,4CAA4C;AAAA,IAC7D;AACA,QAAI,CAAC,eAAe,IAAI,UAAU,GAAG;AACpC,YAAM,IAAI,MAAM,oDAAoD;AAAA,IACrE;AACA,QAAI,CAAC,eAAe,IAAI,KAAK,GAAG;AAC/B,YAAM,IAAI,MAAM,+CAA+C;AAAA,IAChE;AACA,QAAI,CAAC,eAAe,IAAI,MAAM,GAAG;AAChC,YAAM,IAAI,MAAM,gDAAgD;AAAA,IACjE;AACA,QAAI,CAAC,eAAe,IAAI,gBAAgB,GAAG;AAC1C,YAAM,IAAI,MAAM,0DAA0D;AAAA,IAC3E;AAEA,QAAI,CAAC,UAAU,IAAI,MAAM,GAAG;AAC3B,YAAM,IAAI,MAAM,yCAAyC;AAAA,IAC1D;AAEA,QAAI,CAAC,eAAe,IAAI,UAAU,GAAG;AACpC,YAAM,IAAI,MAAM,oDAAoD;AAAA,IACrE;AACA,QAAI,CAAC,eAAe,IAAI,WAAW,GAAG;AACrC,YAAM,IAAI,MAAM,qDAAqD;AAAA,IACtE;AAEA,WAAO;AAAA,MACN,MAAM,IAAI;AAAA,MACV,QAAQ,IAAI;AAAA,MACZ,YAAY,IAAI;AAAA,MAChB,YAAY,IAAI;AAAA,MAChB,OAAO,IAAI;AAAA,MACX,QAAQ,IAAI;AAAA,MACZ,YAAY,IAAI;AAAA,MAChB,kBAAkB,IAAI;AAAA,MACtB,QAAQ,IAAI;AAAA,MACZ,aAAa,IAAI;AAAA,MACjB,WAAW,IAAI,YAAY,qBAAqB,IAAI,SAAS,IAAI;AAAA,IAClE;AAAA,EACD,CAAC,IACA;AAEH,MAAI,oBAAqC;AACzC,QAAM,uBAAuB,MAAM,QAAQ,OAAO,iBAAiB,IAChE,OAAO,oBACP,OAAO;AACV,MAAI,MAAM,QAAQ,oBAAoB,GAAG;AACxC,QAAI,CAAC,qBAAqB,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,GAAG;AACpE,YAAM,IAAI,MAAM,6DAA6D;AAAA,IAC9E;AACA,wBAAoB;AAAA,EACrB;AAEA,SAAO;AAAA,IACN,SAAS,OAAO;AAAA,IAChB;AAAA,IACA,UAAW,OAAO,YAAY,CAAC;AAAA,IAC/B;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACD;AACD;AAuBO,SAAS,cAAc,OAAgB,SAAwB;AACrE,MAAI,iBAAiB,OAAO;AAC3B,WAAO,IAAI,MAAM,SAAS,OAAO,KAAK,MAAM,OAAO,IAAI;AAAA,MACtD,OAAO;AAAA,IACR,CAAC;AAAA,EACF;AAEA,QAAM,UAAU,OAAO,KAAK;AAC5B,SAAO,IAAI,MAAM,SAAS,OAAO,KAAK,OAAO,EAAE;AAChD;AAaO,SAAS,wBAAwB,OAA2C;AAClF,MAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACxC,WAAO;AAAA,EACR;AAEA,QAAM,MAAM;AACZ,SACC,OAAO,IAAI,YAAY,aACtB,OAAO,IAAI,aAAa,YAAY,OAAO,IAAI,cAAc,aAC9D,IAAI,aAAa,QACjB,OAAO,IAAI,aAAa,YACxB,MAAM,QAAQ,IAAI,MAAM;AAE1B;","names":["normalized"]}