@kreuzberg/node 4.0.0-rc.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +7 -0
- package/README.md +669 -0
- package/index.d.ts +1109 -0
- package/index.js +607 -0
- package/metadata.d.ts +502 -0
- package/package.json +128 -0
package/metadata.d.ts
ADDED
|
@@ -0,0 +1,502 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metadata type definitions for Kreuzberg extraction results.
|
|
3
|
+
*
|
|
4
|
+
* These types mirror the Rust metadata structures and are referenced by
|
|
5
|
+
* the auto-generated index.d.ts file.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// ===== Enums =====
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Page Segmentation Mode for Tesseract OCR.
|
|
12
|
+
* Maps to Rust's PSMMode enum.
|
|
13
|
+
*/
|
|
14
|
+
export enum PSMMode {
|
|
15
|
+
/** Orientation and script detection only */
|
|
16
|
+
OsdOnly = 0,
|
|
17
|
+
/** Automatic page segmentation with OSD */
|
|
18
|
+
AutoOsd = 1,
|
|
19
|
+
/** Automatic page segmentation, but no OSD, or OCR */
|
|
20
|
+
AutoOnly = 2,
|
|
21
|
+
/** Fully automatic page segmentation, but no OSD (default) */
|
|
22
|
+
Auto = 3,
|
|
23
|
+
/** Assume a single column of text of variable sizes */
|
|
24
|
+
SingleColumn = 4,
|
|
25
|
+
/** Assume a single uniform block of vertically aligned text */
|
|
26
|
+
SingleBlockVertical = 5,
|
|
27
|
+
/** Assume a single uniform block of text */
|
|
28
|
+
SingleBlock = 6,
|
|
29
|
+
/** Treat the image as a single text line */
|
|
30
|
+
SingleLine = 7,
|
|
31
|
+
/** Treat the image as a single word */
|
|
32
|
+
SingleWord = 8,
|
|
33
|
+
/** Treat the image as a single word in a circle */
|
|
34
|
+
CircleWord = 9,
|
|
35
|
+
/** Treat the image as a single character */
|
|
36
|
+
SingleChar = 10,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Output format for OCR extraction.
|
|
41
|
+
*/
|
|
42
|
+
export type OcrOutputFormat = "text" | "markdown" | "hocr" | "tsv";
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Token reduction level.
|
|
46
|
+
*/
|
|
47
|
+
export type TokenReductionLevel = "off" | "light" | "moderate" | "aggressive" | "maximum";
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Image binarization method for preprocessing.
|
|
51
|
+
*/
|
|
52
|
+
export type BinarizationMethod = "otsu" | "adaptive" | "sauvola";
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Processing stage for postprocessors.
|
|
56
|
+
*/
|
|
57
|
+
export type ProcessingStage = "early" | "middle" | "late";
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Common EXIF metadata fields from images.
|
|
61
|
+
* Based on standard EXIF 2.3 specification.
|
|
62
|
+
*/
|
|
63
|
+
export interface ExifMetadata {
|
|
64
|
+
/** Image width in pixels */
|
|
65
|
+
ImageWidth?: number;
|
|
66
|
+
/** Image height in pixels */
|
|
67
|
+
ImageHeight?: number;
|
|
68
|
+
/** Camera make/manufacturer */
|
|
69
|
+
Make?: string;
|
|
70
|
+
/** Camera model */
|
|
71
|
+
Model?: string;
|
|
72
|
+
/** Image orientation (1-8) */
|
|
73
|
+
Orientation?: number;
|
|
74
|
+
/** Horizontal resolution (DPI) */
|
|
75
|
+
XResolution?: number;
|
|
76
|
+
/** Vertical resolution (DPI) */
|
|
77
|
+
YResolution?: number;
|
|
78
|
+
/** Resolution unit (2=inches, 3=cm) */
|
|
79
|
+
ResolutionUnit?: number;
|
|
80
|
+
/** Software used to create/edit image */
|
|
81
|
+
Software?: string;
|
|
82
|
+
/** Date and time of image creation */
|
|
83
|
+
DateTime?: string;
|
|
84
|
+
/** Date and time original image was taken */
|
|
85
|
+
DateTimeOriginal?: string;
|
|
86
|
+
/** Date and time image was digitized */
|
|
87
|
+
DateTimeDigitized?: string;
|
|
88
|
+
/** Copyright information */
|
|
89
|
+
Copyright?: string;
|
|
90
|
+
/** Image description */
|
|
91
|
+
ImageDescription?: string;
|
|
92
|
+
/** Artist/creator */
|
|
93
|
+
Artist?: string;
|
|
94
|
+
/** Color space (1=sRGB, 2=Adobe RGB) */
|
|
95
|
+
ColorSpace?: number;
|
|
96
|
+
/** Bits per sample */
|
|
97
|
+
BitsPerSample?: number;
|
|
98
|
+
/** Compression type */
|
|
99
|
+
Compression?: number;
|
|
100
|
+
/** Photometric interpretation */
|
|
101
|
+
PhotometricInterpretation?: number;
|
|
102
|
+
/** GPS latitude */
|
|
103
|
+
GPSLatitude?: string;
|
|
104
|
+
/** GPS longitude */
|
|
105
|
+
GPSLongitude?: string;
|
|
106
|
+
/** GPS altitude */
|
|
107
|
+
GPSAltitude?: string;
|
|
108
|
+
/** Exposure time (seconds) */
|
|
109
|
+
ExposureTime?: string;
|
|
110
|
+
/** F-number */
|
|
111
|
+
FNumber?: string;
|
|
112
|
+
/** ISO speed rating */
|
|
113
|
+
ISOSpeedRatings?: number;
|
|
114
|
+
/** Focal length (mm) */
|
|
115
|
+
FocalLength?: string;
|
|
116
|
+
/** Flash mode */
|
|
117
|
+
Flash?: number;
|
|
118
|
+
/** White balance mode */
|
|
119
|
+
WhiteBalance?: number;
|
|
120
|
+
/** Metering mode */
|
|
121
|
+
MeteringMode?: number;
|
|
122
|
+
/** Exposure program */
|
|
123
|
+
ExposureProgram?: number;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export interface PdfMetadata {
|
|
127
|
+
title?: string;
|
|
128
|
+
author?: string;
|
|
129
|
+
subject?: string;
|
|
130
|
+
keywords?: string;
|
|
131
|
+
creator?: string;
|
|
132
|
+
producer?: string;
|
|
133
|
+
creationDate?: string;
|
|
134
|
+
modificationDate?: string;
|
|
135
|
+
pageCount?: number;
|
|
136
|
+
pdfVersion?: string;
|
|
137
|
+
isEncrypted?: boolean;
|
|
138
|
+
width?: number;
|
|
139
|
+
height?: number;
|
|
140
|
+
summary?: string;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export interface ExcelMetadata {
|
|
144
|
+
sheetCount: number;
|
|
145
|
+
sheetNames: string[];
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
export interface EmailMetadata {
|
|
149
|
+
fromEmail?: string;
|
|
150
|
+
fromName?: string;
|
|
151
|
+
toEmails: string[];
|
|
152
|
+
ccEmails: string[];
|
|
153
|
+
bccEmails: string[];
|
|
154
|
+
messageId?: string;
|
|
155
|
+
attachments: string[];
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
export interface PptxMetadata {
|
|
159
|
+
title?: string;
|
|
160
|
+
author?: string;
|
|
161
|
+
description?: string;
|
|
162
|
+
summary?: string;
|
|
163
|
+
fonts: string[];
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
export interface ArchiveMetadata {
|
|
167
|
+
format: string;
|
|
168
|
+
fileCount: number;
|
|
169
|
+
fileList: string[];
|
|
170
|
+
totalSize: number;
|
|
171
|
+
compressedSize?: number;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
export interface ImageMetadata {
|
|
175
|
+
width: number;
|
|
176
|
+
height: number;
|
|
177
|
+
format: string;
|
|
178
|
+
exif: ExifMetadata;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
export interface XmlMetadata {
|
|
182
|
+
elementCount: number;
|
|
183
|
+
uniqueElements: string[];
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
export interface TextMetadata {
|
|
187
|
+
lineCount: number;
|
|
188
|
+
wordCount: number;
|
|
189
|
+
characterCount: number;
|
|
190
|
+
headers?: string[];
|
|
191
|
+
links?: Array<[string, string]>;
|
|
192
|
+
codeBlocks?: Array<[string, string]>;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
export interface HtmlMetadata {
|
|
196
|
+
title?: string;
|
|
197
|
+
description?: string;
|
|
198
|
+
keywords?: string;
|
|
199
|
+
author?: string;
|
|
200
|
+
canonical?: string;
|
|
201
|
+
baseHref?: string;
|
|
202
|
+
ogTitle?: string;
|
|
203
|
+
ogDescription?: string;
|
|
204
|
+
ogImage?: string;
|
|
205
|
+
ogUrl?: string;
|
|
206
|
+
ogType?: string;
|
|
207
|
+
ogSiteName?: string;
|
|
208
|
+
twitterCard?: string;
|
|
209
|
+
twitterTitle?: string;
|
|
210
|
+
twitterDescription?: string;
|
|
211
|
+
twitterImage?: string;
|
|
212
|
+
twitterSite?: string;
|
|
213
|
+
twitterCreator?: string;
|
|
214
|
+
linkAuthor?: string;
|
|
215
|
+
linkLicense?: string;
|
|
216
|
+
linkAlternate?: string;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
export interface OcrMetadata {
|
|
220
|
+
language: string;
|
|
221
|
+
psm: PSMMode;
|
|
222
|
+
outputFormat: OcrOutputFormat;
|
|
223
|
+
tableCount: number;
|
|
224
|
+
tableRows?: number;
|
|
225
|
+
tableCols?: number;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
export interface ImagePreprocessingMetadata {
|
|
229
|
+
originalDimensions: [number, number];
|
|
230
|
+
originalDpi: [number, number];
|
|
231
|
+
targetDpi: number;
|
|
232
|
+
scaleFactor: number;
|
|
233
|
+
autoAdjusted: boolean;
|
|
234
|
+
finalDpi: number;
|
|
235
|
+
newDimensions?: [number, number];
|
|
236
|
+
resampleMethod: string;
|
|
237
|
+
dimensionClamped: boolean;
|
|
238
|
+
calculatedDpi?: number;
|
|
239
|
+
skippedResize: boolean;
|
|
240
|
+
resizeError?: string;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
export interface ErrorMetadata {
|
|
244
|
+
errorType: string;
|
|
245
|
+
message: string;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Discriminated union type for format-specific metadata.
|
|
250
|
+
* The `formatType` field indicates which format-specific fields are present.
|
|
251
|
+
* All format-specific fields are flattened into the metadata object.
|
|
252
|
+
*
|
|
253
|
+
* Example for PDF:
|
|
254
|
+
* ```typescript
|
|
255
|
+
* {
|
|
256
|
+
* "language": "en",
|
|
257
|
+
* "formatType": "pdf",
|
|
258
|
+
* "title": "My Document",
|
|
259
|
+
* "pageCount": 5
|
|
260
|
+
* }
|
|
261
|
+
* ```
|
|
262
|
+
*
|
|
263
|
+
* Example for Excel:
|
|
264
|
+
* ```typescript
|
|
265
|
+
* {
|
|
266
|
+
* "formatType": "excel",
|
|
267
|
+
* "sheetCount": 3,
|
|
268
|
+
* "sheetNames": ["Sheet1", "Sheet2", "Sheet3"]
|
|
269
|
+
* }
|
|
270
|
+
* ```
|
|
271
|
+
*/
|
|
272
|
+
export type FormatType = "pdf" | "excel" | "email" | "pptx" | "archive" | "image" | "xml" | "text" | "html" | "ocr";
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* Base metadata interface with common fields.
|
|
276
|
+
* Use the generic `Metadata<T>` type to extend with custom fields.
|
|
277
|
+
*/
|
|
278
|
+
export interface BaseMetadata {
|
|
279
|
+
language?: string;
|
|
280
|
+
date?: string;
|
|
281
|
+
subject?: string;
|
|
282
|
+
|
|
283
|
+
formatType?: FormatType;
|
|
284
|
+
|
|
285
|
+
title?: string;
|
|
286
|
+
author?: string;
|
|
287
|
+
keywords?: string;
|
|
288
|
+
creator?: string;
|
|
289
|
+
producer?: string;
|
|
290
|
+
creationDate?: string;
|
|
291
|
+
modificationDate?: string;
|
|
292
|
+
pageCount?: number;
|
|
293
|
+
pdfVersion?: string;
|
|
294
|
+
isEncrypted?: boolean;
|
|
295
|
+
width?: number;
|
|
296
|
+
height?: number;
|
|
297
|
+
summary?: string;
|
|
298
|
+
|
|
299
|
+
sheetCount?: number;
|
|
300
|
+
sheetNames?: string[];
|
|
301
|
+
|
|
302
|
+
fromEmail?: string;
|
|
303
|
+
fromName?: string;
|
|
304
|
+
toEmails?: string[];
|
|
305
|
+
ccEmails?: string[];
|
|
306
|
+
bccEmails?: string[];
|
|
307
|
+
messageId?: string;
|
|
308
|
+
attachments?: string[];
|
|
309
|
+
|
|
310
|
+
description?: string;
|
|
311
|
+
fonts?: string[];
|
|
312
|
+
|
|
313
|
+
format?: string;
|
|
314
|
+
fileCount?: number;
|
|
315
|
+
fileList?: string[];
|
|
316
|
+
totalSize?: number;
|
|
317
|
+
compressedSize?: number;
|
|
318
|
+
|
|
319
|
+
exif?: ExifMetadata;
|
|
320
|
+
|
|
321
|
+
elementCount?: number;
|
|
322
|
+
uniqueElements?: string[];
|
|
323
|
+
|
|
324
|
+
lineCount?: number;
|
|
325
|
+
wordCount?: number;
|
|
326
|
+
characterCount?: number;
|
|
327
|
+
headers?: string[];
|
|
328
|
+
links?: Array<[string, string]>;
|
|
329
|
+
codeBlocks?: Array<[string, string]>;
|
|
330
|
+
|
|
331
|
+
canonical?: string;
|
|
332
|
+
baseHref?: string;
|
|
333
|
+
ogTitle?: string;
|
|
334
|
+
ogDescription?: string;
|
|
335
|
+
ogImage?: string;
|
|
336
|
+
ogUrl?: string;
|
|
337
|
+
ogType?: string;
|
|
338
|
+
ogSiteName?: string;
|
|
339
|
+
twitterCard?: string;
|
|
340
|
+
twitterTitle?: string;
|
|
341
|
+
twitterDescription?: string;
|
|
342
|
+
twitterImage?: string;
|
|
343
|
+
twitterSite?: string;
|
|
344
|
+
twitterCreator?: string;
|
|
345
|
+
linkAuthor?: string;
|
|
346
|
+
linkLicense?: string;
|
|
347
|
+
linkAlternate?: string;
|
|
348
|
+
|
|
349
|
+
psm?: PSMMode;
|
|
350
|
+
outputFormat?: OcrOutputFormat;
|
|
351
|
+
tableCount?: number;
|
|
352
|
+
tableRows?: number;
|
|
353
|
+
tableCols?: number;
|
|
354
|
+
|
|
355
|
+
imagePreprocessing?: ImagePreprocessingMetadata;
|
|
356
|
+
jsonSchema?: Record<string, unknown>;
|
|
357
|
+
error?: ErrorMetadata;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
/**
|
|
361
|
+
* Generic metadata type that allows extension with custom fields.
|
|
362
|
+
*
|
|
363
|
+
* Example usage:
|
|
364
|
+
* ```typescript
|
|
365
|
+
* // Extend with custom fields
|
|
366
|
+
* interface MyCustomMetadata {
|
|
367
|
+
* customField1: string;
|
|
368
|
+
* customField2: number;
|
|
369
|
+
* }
|
|
370
|
+
*
|
|
371
|
+
* type ExtendedMetadata = Metadata<MyCustomMetadata>;
|
|
372
|
+
*
|
|
373
|
+
* const metadata: ExtendedMetadata = {
|
|
374
|
+
* formatType: 'pdf',
|
|
375
|
+
* pageCount: 5,
|
|
376
|
+
* customField1: 'value',
|
|
377
|
+
* customField2: 42
|
|
378
|
+
* };
|
|
379
|
+
* ```
|
|
380
|
+
*/
|
|
381
|
+
export type Metadata<T extends Record<string, unknown> = Record<string, never>> = BaseMetadata & T;
|
|
382
|
+
|
|
383
|
+
/**
|
|
384
|
+
* Extracted image with metadata and optional nested OCR result.
|
|
385
|
+
*/
|
|
386
|
+
export interface ExtractedImage {
|
|
387
|
+
/** Raw image bytes */
|
|
388
|
+
data: Buffer;
|
|
389
|
+
/** Image format (e.g., "jpeg", "png", "tiff") */
|
|
390
|
+
format: string;
|
|
391
|
+
/** Zero-based image index within the document */
|
|
392
|
+
imageIndex: number;
|
|
393
|
+
/** Page number where image was found (1-indexed) */
|
|
394
|
+
pageNumber?: number;
|
|
395
|
+
/** Image width in pixels */
|
|
396
|
+
width?: number;
|
|
397
|
+
/** Image height in pixels */
|
|
398
|
+
height?: number;
|
|
399
|
+
/** Colorspace (e.g., "DeviceRGB", "DeviceGray") */
|
|
400
|
+
colorspace?: string;
|
|
401
|
+
/** Bits per color component */
|
|
402
|
+
bitsPerComponent?: number;
|
|
403
|
+
/** Whether this is a mask image */
|
|
404
|
+
isMask: boolean;
|
|
405
|
+
/** Optional description/alt text */
|
|
406
|
+
description?: string;
|
|
407
|
+
/** Nested OCR result if image was processed with OCR */
|
|
408
|
+
ocrResult?: ExtractionResult;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
/**
|
|
412
|
+
* Generic extraction result type that allows extension with custom metadata.
|
|
413
|
+
*
|
|
414
|
+
* Example usage:
|
|
415
|
+
* ```typescript
|
|
416
|
+
* interface MyMetadata {
|
|
417
|
+
* processingTime: number;
|
|
418
|
+
* confidence: number;
|
|
419
|
+
* }
|
|
420
|
+
*
|
|
421
|
+
* type MyResult = ExtractionResult<MyMetadata>;
|
|
422
|
+
* ```
|
|
423
|
+
*/
|
|
424
|
+
export interface ExtractionResult<T extends Record<string, unknown> = Record<string, never>> {
|
|
425
|
+
content: string;
|
|
426
|
+
mimeType: string;
|
|
427
|
+
metadata: Metadata<T>;
|
|
428
|
+
tables: ExtractedTable[];
|
|
429
|
+
detectedLanguages?: string[];
|
|
430
|
+
chunks?: string[];
|
|
431
|
+
images?: ExtractedImage[];
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
/**
|
|
435
|
+
* Extracted table structure.
|
|
436
|
+
*/
|
|
437
|
+
export interface ExtractedTable {
|
|
438
|
+
/** 2D array of cell values */
|
|
439
|
+
cells: string[][];
|
|
440
|
+
/** Markdown representation of the table */
|
|
441
|
+
markdown: string;
|
|
442
|
+
/** Page number where table was found (1-indexed) */
|
|
443
|
+
pageNumber: number;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
/**
|
|
447
|
+
* Type guard to check if metadata is for a specific format type.
|
|
448
|
+
*/
|
|
449
|
+
export declare function isFormatType<T extends FormatType>(
|
|
450
|
+
metadata: BaseMetadata,
|
|
451
|
+
formatType: T,
|
|
452
|
+
): metadata is BaseMetadata & { formatType: T };
|
|
453
|
+
|
|
454
|
+
/**
|
|
455
|
+
* Type guard to check if metadata contains PDF-specific fields.
|
|
456
|
+
*/
|
|
457
|
+
export declare function isPdfMetadata(metadata: BaseMetadata): metadata is BaseMetadata & PdfMetadata;
|
|
458
|
+
|
|
459
|
+
/**
|
|
460
|
+
* Type guard to check if metadata contains Excel-specific fields.
|
|
461
|
+
*/
|
|
462
|
+
export declare function isExcelMetadata(metadata: BaseMetadata): metadata is BaseMetadata & ExcelMetadata;
|
|
463
|
+
|
|
464
|
+
/**
|
|
465
|
+
* Type guard to check if metadata contains Email-specific fields.
|
|
466
|
+
*/
|
|
467
|
+
export declare function isEmailMetadata(metadata: BaseMetadata): metadata is BaseMetadata & EmailMetadata;
|
|
468
|
+
|
|
469
|
+
/**
|
|
470
|
+
* Type guard to check if metadata contains PPTX-specific fields.
|
|
471
|
+
*/
|
|
472
|
+
export declare function isPptxMetadata(metadata: BaseMetadata): metadata is BaseMetadata & PptxMetadata;
|
|
473
|
+
|
|
474
|
+
/**
|
|
475
|
+
* Type guard to check if metadata contains Archive-specific fields.
|
|
476
|
+
*/
|
|
477
|
+
export declare function isArchiveMetadata(metadata: BaseMetadata): metadata is BaseMetadata & ArchiveMetadata;
|
|
478
|
+
|
|
479
|
+
/**
|
|
480
|
+
* Type guard to check if metadata contains Image-specific fields.
|
|
481
|
+
*/
|
|
482
|
+
export declare function isImageMetadata(metadata: BaseMetadata): metadata is BaseMetadata & ImageMetadata;
|
|
483
|
+
|
|
484
|
+
/**
|
|
485
|
+
* Type guard to check if metadata contains XML-specific fields.
|
|
486
|
+
*/
|
|
487
|
+
export declare function isXmlMetadata(metadata: BaseMetadata): metadata is BaseMetadata & XmlMetadata;
|
|
488
|
+
|
|
489
|
+
/**
|
|
490
|
+
* Type guard to check if metadata contains Text-specific fields.
|
|
491
|
+
*/
|
|
492
|
+
export declare function isTextMetadata(metadata: BaseMetadata): metadata is BaseMetadata & TextMetadata;
|
|
493
|
+
|
|
494
|
+
/**
|
|
495
|
+
* Type guard to check if metadata contains HTML-specific fields.
|
|
496
|
+
*/
|
|
497
|
+
export declare function isHtmlMetadata(metadata: BaseMetadata): metadata is BaseMetadata & HtmlMetadata;
|
|
498
|
+
|
|
499
|
+
/**
|
|
500
|
+
* Type guard to check if metadata contains OCR-specific fields.
|
|
501
|
+
*/
|
|
502
|
+
export declare function isOcrMetadata(metadata: BaseMetadata): metadata is BaseMetadata & OcrMetadata;
|
package/package.json
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@kreuzberg/node",
|
|
3
|
+
"version": "4.0.0-rc.5",
|
|
4
|
+
"description": "Kreuzberg document intelligence - Node.js native bindings",
|
|
5
|
+
"author": {
|
|
6
|
+
"name": "Na'aman Hirschfeld",
|
|
7
|
+
"email": "nhirschfeld@gmail.com",
|
|
8
|
+
"url": "https://kreuzberg.dev"
|
|
9
|
+
},
|
|
10
|
+
"homepage": "https://kreuzberg.dev",
|
|
11
|
+
"bugs": {
|
|
12
|
+
"url": "https://github.com/kreuzberg-dev/kreuzberg/issues"
|
|
13
|
+
},
|
|
14
|
+
"main": "dist/index.js",
|
|
15
|
+
"module": "dist/index.mjs",
|
|
16
|
+
"types": "dist/index.d.ts",
|
|
17
|
+
"exports": {
|
|
18
|
+
".": {
|
|
19
|
+
"types": "./dist/index.d.ts",
|
|
20
|
+
"import": "./dist/index.mjs",
|
|
21
|
+
"require": "./dist/index.js"
|
|
22
|
+
},
|
|
23
|
+
"./cli": {
|
|
24
|
+
"types": "./dist/cli.d.ts",
|
|
25
|
+
"import": "./dist/cli.mjs",
|
|
26
|
+
"require": "./dist/cli.js"
|
|
27
|
+
}
|
|
28
|
+
},
|
|
29
|
+
"bin": {
|
|
30
|
+
"kreuzberg": "./dist/cli.js"
|
|
31
|
+
},
|
|
32
|
+
"repository": {
|
|
33
|
+
"type": "git",
|
|
34
|
+
"url": "https://github.com/kreuzberg-dev/kreuzberg.git"
|
|
35
|
+
},
|
|
36
|
+
"license": "MIT",
|
|
37
|
+
"keywords": [
|
|
38
|
+
"document-intelligence",
|
|
39
|
+
"document-extraction",
|
|
40
|
+
"text-extraction",
|
|
41
|
+
"pdf-extraction",
|
|
42
|
+
"ocr",
|
|
43
|
+
"pdf",
|
|
44
|
+
"docx",
|
|
45
|
+
"xlsx",
|
|
46
|
+
"pptx",
|
|
47
|
+
"office-documents",
|
|
48
|
+
"table-extraction",
|
|
49
|
+
"metadata-extraction",
|
|
50
|
+
"rust",
|
|
51
|
+
"napi",
|
|
52
|
+
"native",
|
|
53
|
+
"nodejs"
|
|
54
|
+
],
|
|
55
|
+
"files": [
|
|
56
|
+
"dist",
|
|
57
|
+
"index.js",
|
|
58
|
+
"*.cjs",
|
|
59
|
+
"*.d.ts",
|
|
60
|
+
"*.node",
|
|
61
|
+
"README.md"
|
|
62
|
+
],
|
|
63
|
+
"napi": {
|
|
64
|
+
"binaryName": "kreuzberg-node",
|
|
65
|
+
"package": {
|
|
66
|
+
"name": "@kreuzberg/node"
|
|
67
|
+
},
|
|
68
|
+
"targets": [
|
|
69
|
+
"x86_64-apple-darwin",
|
|
70
|
+
"aarch64-apple-darwin",
|
|
71
|
+
"x86_64-pc-windows-msvc",
|
|
72
|
+
"aarch64-pc-windows-msvc",
|
|
73
|
+
"x86_64-unknown-linux-gnu",
|
|
74
|
+
"aarch64-unknown-linux-gnu",
|
|
75
|
+
"armv7-unknown-linux-gnueabihf"
|
|
76
|
+
]
|
|
77
|
+
},
|
|
78
|
+
"engines": {
|
|
79
|
+
"node": ">= 22"
|
|
80
|
+
},
|
|
81
|
+
"publishConfig": {
|
|
82
|
+
"registry": "https://registry.npmjs.org/",
|
|
83
|
+
"access": "public"
|
|
84
|
+
},
|
|
85
|
+
"devDependencies": {
|
|
86
|
+
"@napi-rs/cli": "^3.5.0",
|
|
87
|
+
"@types/archiver": "^7.0.0",
|
|
88
|
+
"@types/node": "^24.10.2",
|
|
89
|
+
"@types/which": "^3.0.4",
|
|
90
|
+
"@vitest/coverage-v8": "^4.0.15",
|
|
91
|
+
"archiver": "^7.0.1",
|
|
92
|
+
"oxlint": "^1.32.0",
|
|
93
|
+
"tsup": "^8.5.1",
|
|
94
|
+
"typescript": "^5.9.3",
|
|
95
|
+
"vitest": "^4.0.15"
|
|
96
|
+
},
|
|
97
|
+
"dependencies": {
|
|
98
|
+
"@emnapi/runtime": "1.7.1",
|
|
99
|
+
"which": "^6.0.0"
|
|
100
|
+
},
|
|
101
|
+
"optionalDependencies": {
|
|
102
|
+
"@kreuzberg/node-darwin-x64": "4.0.0-rc.5",
|
|
103
|
+
"@kreuzberg/node-darwin-arm64": "4.0.0-rc.5",
|
|
104
|
+
"@kreuzberg/node-win32-x64-msvc": "4.0.0-rc.5",
|
|
105
|
+
"@kreuzberg/node-win32-arm64-msvc": "4.0.0-rc.5",
|
|
106
|
+
"@kreuzberg/node-linux-x64-gnu": "4.0.0-rc.5",
|
|
107
|
+
"@kreuzberg/node-linux-arm64-gnu": "4.0.0-rc.5",
|
|
108
|
+
"@kreuzberg/node-linux-arm-gnueabihf": "4.0.0-rc.5"
|
|
109
|
+
},
|
|
110
|
+
"scripts": {
|
|
111
|
+
"artifacts": "napi artifacts",
|
|
112
|
+
"build": "napi build --platform --release && tsup",
|
|
113
|
+
"build:debug": "napi build --platform",
|
|
114
|
+
"build:ts": "tsup",
|
|
115
|
+
"test": "pnpm test:smoke && pnpm test:binding",
|
|
116
|
+
"test:smoke": "vitest run tests/smoke/",
|
|
117
|
+
"test:binding": "vitest run tests/binding/",
|
|
118
|
+
"test:cli": "vitest run tests/cli/",
|
|
119
|
+
"test:watch": "vitest",
|
|
120
|
+
"test:coverage": "vitest run --coverage tests/binding/ tests/smoke/",
|
|
121
|
+
"typecheck": "tsc --noEmit",
|
|
122
|
+
"lint": "biome check typescript && oxlint typescript",
|
|
123
|
+
"lint:fix": "biome check --write typescript",
|
|
124
|
+
"format": "biome format --write typescript",
|
|
125
|
+
"universal": "napi universal",
|
|
126
|
+
"version": "napi version"
|
|
127
|
+
}
|
|
128
|
+
}
|