@kreuzberg/wasm 4.0.0-rc.6 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/LICENSE +7 -0
  2. package/README.md +321 -800
  3. package/dist/adapters/wasm-adapter.d.ts +7 -10
  4. package/dist/adapters/wasm-adapter.d.ts.map +1 -0
  5. package/dist/adapters/wasm-adapter.js +53 -54
  6. package/dist/adapters/wasm-adapter.js.map +1 -1
  7. package/dist/index.d.ts +23 -67
  8. package/dist/index.d.ts.map +1 -0
  9. package/dist/index.js +1102 -104
  10. package/dist/index.js.map +1 -1
  11. package/dist/ocr/registry.d.ts +7 -10
  12. package/dist/ocr/registry.d.ts.map +1 -0
  13. package/dist/ocr/registry.js +9 -28
  14. package/dist/ocr/registry.js.map +1 -1
  15. package/dist/ocr/tesseract-wasm-backend.d.ts +3 -6
  16. package/dist/ocr/tesseract-wasm-backend.d.ts.map +1 -0
  17. package/dist/ocr/tesseract-wasm-backend.js +8 -83
  18. package/dist/ocr/tesseract-wasm-backend.js.map +1 -1
  19. package/dist/pdfium.js +77 -0
  20. package/dist/pkg/LICENSE +7 -0
  21. package/dist/pkg/README.md +503 -0
  22. package/dist/{kreuzberg_wasm.d.ts → pkg/kreuzberg_wasm.d.ts} +24 -12
  23. package/dist/{kreuzberg_wasm.js → pkg/kreuzberg_wasm.js} +224 -233
  24. package/dist/pkg/kreuzberg_wasm_bg.js +1871 -0
  25. package/dist/{kreuzberg_wasm_bg.wasm → pkg/kreuzberg_wasm_bg.wasm} +0 -0
  26. package/dist/{kreuzberg_wasm_bg.wasm.d.ts → pkg/kreuzberg_wasm_bg.wasm.d.ts} +10 -13
  27. package/dist/pkg/package.json +27 -0
  28. package/dist/plugin-registry.d.ts +246 -0
  29. package/dist/plugin-registry.d.ts.map +1 -0
  30. package/dist/runtime.d.ts +21 -22
  31. package/dist/runtime.d.ts.map +1 -0
  32. package/dist/runtime.js +21 -41
  33. package/dist/runtime.js.map +1 -1
  34. package/dist/types.d.ts +363 -0
  35. package/dist/types.d.ts.map +1 -0
  36. package/package.json +34 -51
  37. package/dist/adapters/wasm-adapter.d.mts +0 -121
  38. package/dist/adapters/wasm-adapter.mjs +0 -221
  39. package/dist/adapters/wasm-adapter.mjs.map +0 -1
  40. package/dist/index.d.mts +0 -466
  41. package/dist/index.mjs +0 -384
  42. package/dist/index.mjs.map +0 -1
  43. package/dist/kreuzberg_wasm.d.mts +0 -758
  44. package/dist/kreuzberg_wasm.mjs +0 -48
  45. package/dist/ocr/registry.d.mts +0 -102
  46. package/dist/ocr/registry.mjs +0 -70
  47. package/dist/ocr/registry.mjs.map +0 -1
  48. package/dist/ocr/tesseract-wasm-backend.d.mts +0 -257
  49. package/dist/ocr/tesseract-wasm-backend.mjs +0 -424
  50. package/dist/ocr/tesseract-wasm-backend.mjs.map +0 -1
  51. package/dist/runtime.d.mts +0 -256
  52. package/dist/runtime.mjs +0 -152
  53. package/dist/runtime.mjs.map +0 -1
  54. package/dist/snippets/wasm-bindgen-rayon-38edf6e439f6d70d/src/workerHelpers.js +0 -107
  55. package/dist/types-GJVIvbPy.d.mts +0 -221
  56. package/dist/types-GJVIvbPy.d.ts +0 -221
@@ -0,0 +1,363 @@
1
+ /**
2
+ * Type definitions for Kreuzberg WASM bindings
3
+ *
4
+ * These types are generated from the Rust core library and define
5
+ * the interface for extraction, configuration, and results.
6
+ */
7
+ /**
8
+ * Token reduction configuration
9
+ */
10
+ export interface TokenReductionConfig {
11
+ /** Token reduction mode */
12
+ mode?: string;
13
+ /** Preserve important words during reduction */
14
+ preserveImportantWords?: boolean;
15
+ }
16
+ /**
17
+ * Post-processor configuration
18
+ */
19
+ export interface PostProcessorConfig {
20
+ /** Whether post-processing is enabled */
21
+ enabled?: boolean;
22
+ /** List of enabled processors */
23
+ enabledProcessors?: string[];
24
+ /** List of disabled processors */
25
+ disabledProcessors?: string[];
26
+ }
27
+ /**
28
+ * Keyword extraction algorithm type
29
+ *
30
+ * Supported algorithms:
31
+ * - "yake": YAKE (Yet Another Keyword Extractor) - statistical approach
32
+ * - "rake": RAKE (Rapid Automatic Keyword Extraction) - co-occurrence based
33
+ */
34
+ export type KeywordAlgorithm = "yake" | "rake";
35
+ /**
36
+ * YAKE algorithm-specific parameters
37
+ */
38
+ export interface YakeParams {
39
+ /** Window size for co-occurrence analysis (default: 2) */
40
+ windowSize?: number;
41
+ }
42
+ /**
43
+ * RAKE algorithm-specific parameters
44
+ */
45
+ export interface RakeParams {
46
+ /** Minimum word length to consider (default: 1) */
47
+ minWordLength?: number;
48
+ /** Maximum words in a keyword phrase (default: 3) */
49
+ maxWordsPerPhrase?: number;
50
+ }
51
+ /**
52
+ * Keyword extraction configuration
53
+ *
54
+ * Controls how keywords are extracted from text, including algorithm selection,
55
+ * scoring thresholds, n-gram ranges, and language-specific settings.
56
+ */
57
+ export interface KeywordConfig {
58
+ /** Algorithm to use for extraction (default: "yake") */
59
+ algorithm?: KeywordAlgorithm;
60
+ /** Maximum number of keywords to extract (default: 10) */
61
+ maxKeywords?: number;
62
+ /** Minimum score threshold 0.0-1.0 (default: 0.0) */
63
+ minScore?: number;
64
+ /** N-gram range [min, max] for keyword extraction (default: [1, 3]) */
65
+ ngramRange?: [number, number];
66
+ /** Language code for stopword filtering (e.g., "en", "de", "fr") */
67
+ language?: string;
68
+ /** YAKE-specific tuning parameters */
69
+ yakeParams?: YakeParams;
70
+ /** RAKE-specific tuning parameters */
71
+ rakeParams?: RakeParams;
72
+ }
73
+ /**
74
+ * Extracted keyword with relevance metadata
75
+ *
76
+ * Represents a single keyword extracted from text along with its relevance score,
77
+ * the algorithm that extracted it, and optional position information.
78
+ */
79
+ export interface ExtractedKeyword {
80
+ /** The keyword text */
81
+ text: string;
82
+ /** Relevance score (higher is better, algorithm-specific range) */
83
+ score: number;
84
+ /** Algorithm that extracted this keyword */
85
+ algorithm: KeywordAlgorithm;
86
+ /** Optional positions where keyword appears in text (character offsets) */
87
+ positions?: number[];
88
+ }
89
+ /**
90
+ * Configuration for document extraction
91
+ */
92
+ export interface ExtractionConfig {
93
+ /** OCR configuration */
94
+ ocr?: OcrConfig;
95
+ /** Chunking configuration */
96
+ chunking?: ChunkingConfig;
97
+ /** Image extraction configuration */
98
+ images?: ImageExtractionConfig;
99
+ /** Page extraction configuration */
100
+ pages?: PageExtractionConfig;
101
+ /** Language detection configuration */
102
+ languageDetection?: LanguageDetectionConfig;
103
+ /** PDF extraction options */
104
+ pdfOptions?: PdfConfig;
105
+ /** Token reduction configuration */
106
+ tokenReduction?: TokenReductionConfig;
107
+ /** Post-processor configuration */
108
+ postprocessor?: PostProcessorConfig;
109
+ /** Keyword extraction configuration */
110
+ keywords?: KeywordConfig;
111
+ /** Whether to use caching */
112
+ useCache?: boolean;
113
+ /** Enable quality processing */
114
+ enableQualityProcessing?: boolean;
115
+ /** Force OCR even if text is available */
116
+ forceOcr?: boolean;
117
+ /** Maximum concurrent extractions */
118
+ maxConcurrentExtractions?: number;
119
+ }
120
+ /**
121
+ * Tesseract OCR configuration
122
+ */
123
+ export interface TesseractConfig {
124
+ /** Tesseract page segmentation mode */
125
+ psm?: number;
126
+ /** Enable table detection */
127
+ enableTableDetection?: boolean;
128
+ /** Character whitelist for recognition */
129
+ tesseditCharWhitelist?: string;
130
+ }
131
+ /**
132
+ * OCR configuration
133
+ */
134
+ export interface OcrConfig {
135
+ /** OCR backend to use */
136
+ backend?: string;
137
+ /** Language codes (ISO 639) */
138
+ languages?: string[];
139
+ /** Whether to perform OCR */
140
+ enabled?: boolean;
141
+ /** Tesseract-specific configuration */
142
+ tesseractConfig?: TesseractConfig;
143
+ /** Language code for OCR */
144
+ language?: string;
145
+ }
146
+ /**
147
+ * Chunking configuration
148
+ */
149
+ export interface ChunkingConfig {
150
+ /** Maximum characters per chunk */
151
+ maxChars?: number;
152
+ /** Overlap between chunks */
153
+ maxOverlap?: number;
154
+ }
155
+ /**
156
+ * Image extraction configuration
157
+ */
158
+ export interface ImageExtractionConfig {
159
+ /** Whether to extract images */
160
+ enabled?: boolean;
161
+ /** Target DPI for image extraction */
162
+ targetDpi?: number;
163
+ /** Maximum image dimension in pixels */
164
+ maxImageDimension?: number;
165
+ /** Automatically adjust DPI */
166
+ autoAdjustDpi?: boolean;
167
+ /** Minimum DPI threshold */
168
+ minDpi?: number;
169
+ /** Maximum DPI threshold */
170
+ maxDpi?: number;
171
+ }
172
+ /**
173
+ * PDF extraction configuration
174
+ */
175
+ export interface PdfConfig {
176
+ /** Whether to extract images from PDF */
177
+ extractImages?: boolean;
178
+ /** Passwords for encrypted PDFs */
179
+ passwords?: string[];
180
+ /** Whether to extract metadata */
181
+ extractMetadata?: boolean;
182
+ }
183
+ /**
184
+ * Page extraction configuration
185
+ */
186
+ export interface PageExtractionConfig {
187
+ /** Extract pages as separate array (ExtractionResult.pages) */
188
+ extractPages?: boolean;
189
+ /** Insert page markers in main content string */
190
+ insertPageMarkers?: boolean;
191
+ /** Page marker format (use {page_num} placeholder) */
192
+ markerFormat?: string;
193
+ }
194
+ /**
195
+ * Language detection configuration
196
+ */
197
+ export interface LanguageDetectionConfig {
198
+ /** Whether to detect languages */
199
+ enabled?: boolean;
200
+ }
201
+ /**
202
+ * Result of document extraction
203
+ */
204
+ export interface ExtractionResult {
205
+ /** Extracted text content */
206
+ content: string;
207
+ /** MIME type of the document */
208
+ mimeType: string;
209
+ /** Document metadata */
210
+ metadata: Metadata;
211
+ /** Extracted tables */
212
+ tables: Table[];
213
+ /** Detected languages (ISO 639 codes) */
214
+ detectedLanguages?: string[] | null;
215
+ /** Text chunks when chunking is enabled */
216
+ chunks?: Chunk[] | null;
217
+ /** Extracted images */
218
+ images?: ExtractedImage[] | null;
219
+ /** Per-page content */
220
+ pages?: PageContent[] | null;
221
+ /** Extracted keywords when keyword extraction is enabled */
222
+ keywords?: ExtractedKeyword[] | null;
223
+ }
224
+ /**
225
+ * Document metadata
226
+ */
227
+ export interface Metadata {
228
+ /** Document title */
229
+ title?: string;
230
+ /** Document subject or description */
231
+ subject?: string;
232
+ /** Document author(s) */
233
+ authors?: string[];
234
+ /** Keywords/tags */
235
+ keywords?: string[];
236
+ /** Primary language (ISO 639 code) */
237
+ language?: string;
238
+ /** Creation timestamp (ISO 8601 format) */
239
+ createdAt?: string;
240
+ /** Last modification timestamp (ISO 8601 format) */
241
+ modifiedAt?: string;
242
+ /** User who created the document */
243
+ creator?: string;
244
+ /** User who last modified the document */
245
+ lastModifiedBy?: string;
246
+ /** Number of pages/slides */
247
+ pageCount?: number;
248
+ /** Format-specific metadata */
249
+ formatMetadata?: unknown;
250
+ /**
251
+ * Additional fields may be added at runtime by postprocessors.
252
+ * Use bracket notation to safely access unexpected properties.
253
+ */
254
+ [key: string]: unknown;
255
+ }
256
+ /**
257
+ * Extracted table
258
+ */
259
+ export interface Table {
260
+ /** Table cells/rows */
261
+ cells?: string[][];
262
+ /** Table markdown representation */
263
+ markdown?: string;
264
+ /** Page number if available */
265
+ pageNumber?: number;
266
+ /** Table headers */
267
+ headers?: string[];
268
+ /** Table rows */
269
+ rows?: string[][];
270
+ }
271
+ /**
272
+ * Chunk metadata
273
+ */
274
+ export interface ChunkMetadata {
275
+ /** Character start position in original content */
276
+ charStart: number;
277
+ /** Character end position in original content */
278
+ charEnd: number;
279
+ /** Token count if available */
280
+ tokenCount: number | null;
281
+ /** Index of this chunk */
282
+ chunkIndex: number;
283
+ /** Total number of chunks */
284
+ totalChunks: number;
285
+ }
286
+ /**
287
+ * Text chunk from chunked content
288
+ */
289
+ export interface Chunk {
290
+ /** Chunk text content */
291
+ content: string;
292
+ /** Chunk metadata */
293
+ metadata?: ChunkMetadata;
294
+ /** Character position in original content (legacy) */
295
+ charIndex?: number;
296
+ /** Token count if available (legacy) */
297
+ tokenCount?: number;
298
+ /** Embedding vector if computed */
299
+ embedding?: number[] | null;
300
+ }
301
+ /**
302
+ * Extracted image from document
303
+ */
304
+ export interface ExtractedImage {
305
+ /** Image data as Uint8Array or base64 string */
306
+ data: Uint8Array | string;
307
+ /** Image format/MIME type */
308
+ format?: string;
309
+ /** MIME type of the image */
310
+ mimeType?: string;
311
+ /** Image index in document */
312
+ imageIndex?: number;
313
+ /** Page number if available */
314
+ pageNumber?: number | null;
315
+ /** Image width in pixels */
316
+ width?: number | null;
317
+ /** Image height in pixels */
318
+ height?: number | null;
319
+ /** Color space of the image */
320
+ colorspace?: string | null;
321
+ /** Bits per color component */
322
+ bitsPerComponent?: number | null;
323
+ /** Whether this is a mask image */
324
+ isMask?: boolean;
325
+ /** Image description */
326
+ description?: string | null;
327
+ /** Optional OCR result from the image */
328
+ ocrResult?: ExtractionResult | string | null;
329
+ }
330
+ /**
331
+ * Per-page content
332
+ */
333
+ export interface PageContent {
334
+ /** Page number (1-indexed) */
335
+ pageNumber: number;
336
+ /** Text content of the page */
337
+ content: string;
338
+ /** Tables on this page */
339
+ tables?: Table[];
340
+ /** Images on this page */
341
+ images?: ExtractedImage[];
342
+ }
343
+ /**
344
+ * OCR backend protocol/interface
345
+ */
346
+ export interface OcrBackendProtocol {
347
+ /** Get the backend name */
348
+ name(): string;
349
+ /** Get supported language codes */
350
+ supportedLanguages?(): string[];
351
+ /** Initialize the backend */
352
+ initialize(options?: Record<string, unknown>): void | Promise<void>;
353
+ /** Shutdown the backend */
354
+ shutdown?(): void | Promise<void>;
355
+ /** Process an image with OCR */
356
+ processImage(imageData: Uint8Array | string, language?: string): Promise<{
357
+ content: string;
358
+ mime_type: string;
359
+ metadata?: Record<string, unknown>;
360
+ tables?: unknown[];
361
+ } | string>;
362
+ }
363
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../typescript/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACpC,2BAA2B;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,gDAAgD;IAChD,sBAAsB,CAAC,EAAE,OAAO,CAAC;CACjC;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IACnC,yCAAyC;IACzC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,iCAAiC;IACjC,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC7B,kCAAkC;IAClC,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED;;;;;;GAMG;AACH,MAAM,MAAM,gBAAgB,GAAG,MAAM,GAAG,MAAM,CAAC;AAE/C;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,0DAA0D;IAC1D,UAAU,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,mDAAmD;IACnD,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,qDAAqD;IACrD,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED;;;;;GAKG;AACH,MAAM,WAAW,aAAa;IAC7B,wDAAwD;IACxD,SAAS,CAAC,EAAE,gBAAgB,CAAC;IAC7B,0DAA0D;IAC1D,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,qDAAqD;IACrD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,uEAAuE;IACvE,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC9B,oEAAoE;IACpE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,sCAAsC;IACtC,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,sCAAsC;IACtC,UAAU,CAAC,EAAE,UAAU,CAAC;CACxB;AAED;;;;;GAKG;AACH,MAAM,WAAW,gBAAgB;IAChC,uBAAuB;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,mEAAmE;IACnE,KAAK,EAAE,MAAM,CAAC;IACd,4CAA4C;IAC5C,SAAS,EAAE,gBAAgB,CAAC;IAC5B,2EAA2E;IAC3E,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC,wBAAwB;IACxB,GAAG,CAAC,EAAE,SAAS,CAAC;IAChB,6BAA6B;IAC7B,QAAQ,CAAC,EAAE,cAAc,CAAC;IAC1B,qCAAqC;IACrC,MAAM,CAAC,EAAE,qBAAqB,CAAC;IAC/B,oCAAoC;IACpC,KAAK,CAAC,EAAE,oBAAoB,CAAC;IAC7B,uCAAuC;IACvC,iBAAiB,CAAC,EAAE,uBAAuB,CAAC;IAC5C,6BAA6B;IAC7B,UAAU,CAAC,EAAE,SAAS,CAAC;IACvB,oCAAoC;IACpC,cAAc,CAAC,EAAE,oBAAoB,CAAC;IACtC,mCAAmC;IACnC,aAAa,CAAC,EAAE,mBAAmB,CAAC;IACpC,uCAAuC;IACvC,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,6BAA6B;IAC7B,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,gCAAgC;IAChC,uBAAuB,CAAC,EAAE,OAAO,CAAC;IAClC,0CAA0C;IAC1C,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,qCAAqC;IACrC,wBAAwB,CAAC,EAAE,MAAM,CAAC;CAClC;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC/B,uCAAuC;IACvC,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,6BAA6B;IAC7B,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAC/B,0CAA0C;IAC1C,qBAAqB,CAAC,EAAE,MAAM,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACzB,yBAAyB;IACzB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,+BAA+B;IAC/B,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,6BAA6B;IAC7B,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,uCAAuC;IACvC,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,4BAA4B;IAC5B,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC9B,mCAAmC;IACnC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,6BAA6B;IAC7B,UAAU,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACrC,gCAAgC;IAChC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,sCAAsC;IACtC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,wCAAwC;IACxC,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,+BAA+B;IAC/B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,4BAA4B;IAC5B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,4BAA4B;IAC5B,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACzB,yCAAyC;IACzC,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,mCAAmC;IACnC,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,kCAAkC;IAClC,eAAe,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACpC,+DAA+D;IAC/D,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,iDAAiD;IACjD,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAC5B,sDAAsD;IACtD,YAAY,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACvC,kCAAkC;IAClC,OAAO,CAAC,EAAE,OAAO,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC,6BAA6B;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,gCAAgC;IAChC,QAAQ,EAAE,MAAM,CAAC;IACjB,wBAAwB;IACxB,QAAQ,EAAE,QAAQ,CAAC;IACnB,uBAAuB;IACvB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,yCAAyC;IACzC,iBAAiB,CAAC,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;IACpC,2CAA2C;IAC3C,MAAM,CAAC,EAAE,KAAK,EAAE,GAAG,IAAI,CAAC;IACxB,uBAAuB;IACvB,MAAM,CAAC,EAAE,cAAc,EAAE,GAAG,IAAI,CAAC;IACjC,uBAAuB;IACvB,KAAK,CAAC,EAAE,WAAW,EAAE,GAAG,IAAI,CAAC;IAC7B,4DAA4D;IAC5D,QAAQ,CAAC,EAAE,gBAAgB,EAAE,GAAG,IAAI,CAAC;CACrC;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACxB,qBAAqB;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,sCAAsC;IACtC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,yBAAyB;IACzB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,oBAAoB;IACpB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,sCAAsC;IACtC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,2CAA2C;IAC3C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oDAAoD;IACpD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,oCAAoC;IACpC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,0CAA0C;IAC1C,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,6BAA6B;IAC7B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,+BAA+B;IAC/B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB;;;OAGG;IACH,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,KAAK;IACrB,uBAAuB;IACvB,KAAK,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;IACnB,oCAAoC;IACpC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,+BAA+B;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,oBAAoB;IACpB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,iBAAiB;IACjB,IAAI,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC7B,mDAAmD;IACnD,SAAS,EAAE,MAAM,CAAC;IAClB,iDAAiD;IACjD,OAAO,EAAE,MAAM,CAAC;IAChB,+BAA+B;IAC/B,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,0BAA0B;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,6BAA6B;IAC7B,WAAW,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,KAAK;IACrB,yBAAyB;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,qBAAqB;IACrB,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,sDAAsD;IACtD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,wCAAwC;IACxC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,mCAAmC;IACnC,SAAS,CAAC,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC9B,gDAAgD;IAChD,IAAI,EAAE,UAAU,GAAG,MAAM,CAAC;IAC1B,6BAA6B;IAC7B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,6BAA6B;IAC7B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,8BAA8B;IAC9B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,+BAA+B;IAC/B,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,4BAA4B;IAC5B,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,6BAA6B;IAC7B,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,+BAA+B;IAC/B,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,+BAA+B;IAC/B,gBAAgB,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,mCAAmC;IACnC,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,wBAAwB;IACxB,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,yCAAyC;IACzC,SAAS,CAAC,EAAE,gBAAgB,GAAG,MAAM,GAAG,IAAI,CAAC;CAC7C;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC3B,8BAA8B;IAC9B,UAAU,EAAE,MAAM,CAAC;IACnB,+BAA+B;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,0BAA0B;IAC1B,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;IACjB,0BAA0B;IAC1B,MAAM,CAAC,EAAE,cAAc,EAAE,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IAClC,2BAA2B;IAC3B,IAAI,IAAI,MAAM,CAAC;IACf,mCAAmC;IACnC,kBAAkB,CAAC,IAAI,MAAM,EAAE,CAAC;IAChC,6BAA6B;IAC7B,UAAU,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACpE,2BAA2B;IAC3B,QAAQ,CAAC,IAAI,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAClC,gCAAgC;IAChC,YAAY,CACX,SAAS,EAAE,UAAU,GAAG,MAAM,EAC9B,QAAQ,CAAC,EAAE,MAAM,GACf,OAAO,CACP;QACA,OAAO,EAAE,MAAM,CAAC;QAChB,SAAS,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACnC,MAAM,CAAC,EAAE,OAAO,EAAE,CAAC;KAClB,GACD,MAAM,CACR,CAAC;CACF"}
package/package.json CHANGED
@@ -1,6 +1,8 @@
1
1
  {
2
2
  "name": "@kreuzberg/wasm",
3
- "version": "4.0.0-rc.6",
3
+ "version": "4.0.0",
4
+ "type": "module",
5
+ "packageManager": "pnpm@10.17.0",
4
6
  "description": "Kreuzberg document intelligence - WebAssembly bindings",
5
7
  "author": {
6
8
  "name": "Na'aman Hirschfeld",
@@ -12,58 +14,35 @@
12
14
  "url": "https://github.com/kreuzberg-dev/kreuzberg/issues"
13
15
  },
14
16
  "main": "dist/index.js",
15
- "module": "dist/index.mjs",
17
+ "module": "dist/index.js",
16
18
  "types": "dist/index.d.ts",
17
19
  "exports": {
18
20
  ".": {
19
- "import": {
20
- "types": "./dist/index.d.mts",
21
- "default": "./dist/index.mjs"
22
- },
23
- "require": {
24
- "types": "./dist/index.d.ts",
25
- "default": "./dist/index.js"
26
- }
21
+ "types": "./dist/index.d.ts",
22
+ "import": "./dist/index.js",
23
+ "default": "./dist/index.js"
27
24
  },
25
+ "./dist/pkg/*": "./dist/pkg/*",
26
+ "./dist/kreuzberg_wasm.js": "./dist/kreuzberg_wasm.js",
28
27
  "./runtime": {
29
- "import": {
30
- "types": "./dist/runtime.d.mts",
31
- "default": "./dist/runtime.mjs"
32
- },
33
- "require": {
34
- "types": "./dist/runtime.d.ts",
35
- "default": "./dist/runtime.js"
36
- }
28
+ "types": "./dist/runtime.d.ts",
29
+ "import": "./dist/runtime.js",
30
+ "default": "./dist/runtime.js"
37
31
  },
38
32
  "./adapters/wasm-adapter": {
39
- "import": {
40
- "types": "./dist/adapters/wasm-adapter.d.mts",
41
- "default": "./dist/adapters/wasm-adapter.mjs"
42
- },
43
- "require": {
44
- "types": "./dist/adapters/wasm-adapter.d.ts",
45
- "default": "./dist/adapters/wasm-adapter.js"
46
- }
33
+ "types": "./dist/adapters/wasm-adapter.d.ts",
34
+ "import": "./dist/adapters/wasm-adapter.js",
35
+ "default": "./dist/adapters/wasm-adapter.js"
47
36
  },
48
37
  "./ocr/registry": {
49
- "import": {
50
- "types": "./dist/ocr/registry.d.mts",
51
- "default": "./dist/ocr/registry.mjs"
52
- },
53
- "require": {
54
- "types": "./dist/ocr/registry.d.ts",
55
- "default": "./dist/ocr/registry.js"
56
- }
38
+ "types": "./dist/ocr/registry.d.ts",
39
+ "import": "./dist/ocr/registry.js",
40
+ "default": "./dist/ocr/registry.js"
57
41
  },
58
42
  "./ocr/tesseract-wasm-backend": {
59
- "import": {
60
- "types": "./dist/ocr/tesseract-wasm-backend.d.mts",
61
- "default": "./dist/ocr/tesseract-wasm-backend.mjs"
62
- },
63
- "require": {
64
- "types": "./dist/ocr/tesseract-wasm-backend.d.ts",
65
- "default": "./dist/ocr/tesseract-wasm-backend.js"
66
- }
43
+ "types": "./dist/ocr/tesseract-wasm-backend.d.ts",
44
+ "import": "./dist/ocr/tesseract-wasm-backend.js",
45
+ "default": "./dist/ocr/tesseract-wasm-backend.js"
67
46
  }
68
47
  },
69
48
  "repository": {
@@ -95,6 +74,7 @@
95
74
  "pkg",
96
75
  "*.wasm",
97
76
  "*.d.ts",
77
+ "pdfium.js",
98
78
  "README.md"
99
79
  ],
100
80
  "engines": {
@@ -109,9 +89,12 @@
109
89
  "build:wasm:bundler": "wasm-pack build --target bundler --out-dir pkg --release",
110
90
  "build:wasm:nodejs": "wasm-pack build --target nodejs --out-dir pkg --release",
111
91
  "build:wasm:deno": "wasm-pack build --target deno --out-dir pkg --release",
112
- "build:ts": "tsup",
113
- "build": "npm run build:wasm:nodejs && npm run build:ts",
114
- "build:all": "npm run build:wasm:web && npm run build:wasm:bundler && npm run build:wasm:nodejs && npm run build:wasm:deno && npm run build:ts",
92
+ "build:ts": "tsup && tsc --emitDeclarationOnly && node scripts/fix-type-exports.js",
93
+ "build:ts:minify": "tsup --minify && tsc --emitDeclarationOnly && node scripts/fix-type-exports.js",
94
+ "copy:pkg": "node scripts/copy-pkg.js",
95
+ "build": "npm run build:wasm:nodejs && npm run build:ts && npm run copy:pkg",
96
+ "build:ci": "npm run build:wasm:nodejs && npm run build:ts:minify && npm run copy:pkg",
97
+ "build:all": "npm run build:wasm:web && npm run build:wasm:bundler && npm run build:wasm:nodejs && npm run build:wasm:deno && npm run build:ts && npm run copy:pkg",
115
98
  "typecheck": "tsc --noEmit",
116
99
  "lint": "biome check typescript && oxlint typescript",
117
100
  "lint:fix": "biome check --write typescript",
@@ -123,14 +106,14 @@
123
106
  "prepublishOnly": "npm run build"
124
107
  },
125
108
  "devDependencies": {
126
- "@types/node": "^24.10.1",
127
- "@vitest/coverage-v8": "^1.4.0",
128
- "@vitest/ui": "^1.4.0",
129
- "jsdom": "^24.0.0",
130
- "oxlint": "^1.31.0",
109
+ "@types/node": "^25.0.6",
110
+ "@vitest/coverage-v8": "^4.0.16",
111
+ "@vitest/ui": "^4.0.16",
112
+ "jsdom": "^27.4.0",
113
+ "oxlint": "^1.38.0",
131
114
  "tsup": "^8.5.1",
132
115
  "typescript": "^5.9.3",
133
- "vitest": "^1.4.0"
116
+ "vitest": "^4.0.16"
134
117
  },
135
118
  "optionalDependencies": {
136
119
  "tesseract-wasm": "^0.11.0"
@@ -1,121 +0,0 @@
1
- import { E as ExtractionConfig, a as ExtractionResult } from '../types-GJVIvbPy.mjs';
2
-
3
- /**
4
- * WASM Type Adapter
5
- *
6
- * This module provides type adapters for converting between JavaScript/TypeScript
7
- * types and WASM-compatible types, handling File/Blob conversions, config normalization,
8
- * and result parsing.
9
- *
10
- * @example File Conversion
11
- * ```typescript
12
- * import { fileToUint8Array } from '@kreuzberg/wasm/adapters/wasm-adapter';
13
- *
14
- * const file = event.target.files[0];
15
- * const bytes = await fileToUint8Array(file);
16
- * const result = await extractBytes(bytes, file.type);
17
- * ```
18
- *
19
- * @example Config Normalization
20
- * ```typescript
21
- * import { configToJS } from '@kreuzberg/wasm/adapters/wasm-adapter';
22
- *
23
- * const config = {
24
- * ocr: { backend: 'tesseract', language: 'eng' },
25
- * chunking: { maxChars: 1000 }
26
- * };
27
- * const normalized = configToJS(config);
28
- * ```
29
- */
30
-
31
- /**
32
- * Convert a File or Blob to Uint8Array
33
- *
34
- * Handles both browser File API and server-side Blob-like objects,
35
- * providing a unified interface for reading binary data.
36
- *
37
- * @param file - The File or Blob to convert
38
- * @returns Promise resolving to the byte array
39
- * @throws {Error} If the file cannot be read or exceeds size limit
40
- *
41
- * @example
42
- * ```typescript
43
- * const file = document.getElementById('input').files[0];
44
- * const bytes = await fileToUint8Array(file);
45
- * const result = await extractBytes(bytes, 'application/pdf');
46
- * ```
47
- */
48
- declare function fileToUint8Array(file: File | Blob): Promise<Uint8Array>;
49
- /**
50
- * Normalize ExtractionConfig for WASM processing
51
- *
52
- * Converts TypeScript configuration objects to a WASM-compatible format,
53
- * handling null values, undefined properties, and nested structures.
54
- *
55
- * @param config - The extraction configuration or null
56
- * @returns Normalized configuration object suitable for WASM
57
- *
58
- * @example
59
- * ```typescript
60
- * const config: ExtractionConfig = {
61
- * ocr: { backend: 'tesseract' },
62
- * chunking: { maxChars: 1000 }
63
- * };
64
- * const wasmConfig = configToJS(config);
65
- * ```
66
- */
67
- declare function configToJS(config: ExtractionConfig | null): Record<string, unknown>;
68
- /**
69
- * Parse WASM extraction result and convert to TypeScript type
70
- *
71
- * Handles conversion of WASM-returned objects to proper ExtractionResult types,
72
- * including proper array conversions and type assertions for tables, chunks, and images.
73
- *
74
- * @param jsValue - The raw WASM result value
75
- * @returns Properly typed ExtractionResult
76
- * @throws {Error} If the result structure is invalid
77
- *
78
- * @example
79
- * ```typescript
80
- * const wasmResult = await wasmExtract(bytes, mimeType, config);
81
- * const result = jsToExtractionResult(wasmResult);
82
- * console.log(result.content);
83
- * ```
84
- */
85
- declare function jsToExtractionResult(jsValue: unknown): ExtractionResult;
86
- /**
87
- * Wrap and format WASM errors with context
88
- *
89
- * Converts WASM error messages to JavaScript Error objects with proper context
90
- * and stack trace information when available.
91
- *
92
- * @param error - The error from WASM
93
- * @param context - Additional context about what operation failed
94
- * @returns A formatted Error object
95
- *
96
- * @internal
97
- *
98
- * @example
99
- * ```typescript
100
- * try {
101
- * await wasmExtract(bytes, mimeType);
102
- * } catch (error) {
103
- * throw wrapWasmError(error, 'extracting document');
104
- * }
105
- * ```
106
- */
107
- declare function wrapWasmError(error: unknown, context: string): Error;
108
- /**
109
- * Validate that a WASM-returned value conforms to ExtractionResult structure
110
- *
111
- * Performs structural validation without full type checking,
112
- * useful for runtime validation of WASM output.
113
- *
114
- * @param value - The value to validate
115
- * @returns True if value appears to be a valid ExtractionResult
116
- *
117
- * @internal
118
- */
119
- declare function isValidExtractionResult(value: unknown): value is ExtractionResult;
120
-
121
- export { configToJS, fileToUint8Array, isValidExtractionResult, jsToExtractionResult, wrapWasmError };