@kreuzberg/node 4.2.14 → 4.2.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.2.14" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.2.15" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
@@ -55,7 +55,7 @@
55
55
  </div>
56
56
 
57
57
 
58
- Extract text, tables, images, and metadata from 56 file formats including PDF, Office documents, and images. Native NAPI-RS bindings for Node.js with superior performance, async/await support, and TypeScript type definitions.
58
+ Extract text, tables, images, and metadata from 62+ file formats including PDF, Office documents, and images. Native NAPI-RS bindings for Node.js with superior performance, async/await support, and TypeScript type definitions.
59
59
 
60
60
 
61
61
  ## Installation
@@ -103,7 +103,7 @@ yarn add @kreuzberg/node
103
103
  **Format Support Notes:**
104
104
  - Modern Office formats (DOCX, XLSX, PPTX) work without LibreOffice
105
105
  - Legacy formats (DOC, XLS, PPT) require LibreOffice installation
106
- - WASM binding does NOT support LibreOffice formats (use Node.js for full format support)
106
+ - WASM binding supports DOCX, XLSX, PPTX, and ODT (no LibreOffice required)
107
107
 
108
108
 
109
109
 
@@ -322,9 +322,9 @@ This binding uses NAPI-RS to provide native Node.js bindings with:
322
322
 
323
323
  ## Features
324
324
 
325
- ### Supported File Formats (56+)
325
+ ### Supported File Formats (62+)
326
326
 
327
- 56 file formats across 8 major categories with intelligent format detection and comprehensive metadata extraction.
327
+ 62+ file formats across 8 major categories with intelligent format detection and comprehensive metadata extraction.
328
328
 
329
329
  #### Office Documents
330
330
 
@@ -341,7 +341,7 @@ This binding uses NAPI-RS to provide native Node.js bindings with:
341
341
  | Category | Formats | Features |
342
342
  |----------|---------|----------|
343
343
  | **Raster** | `.png`, `.jpg`, `.jpeg`, `.gif`, `.webp`, `.bmp`, `.tiff`, `.tif` | OCR, table detection, EXIF metadata, dimensions, color space |
344
- | **Advanced** | `.jp2`, `.jpx`, `.jpm`, `.mj2`, `.pnm`, `.pbm`, `.pgm`, `.ppm` | OCR, table detection, format-specific metadata |
344
+ | **Advanced** | `.jp2`, `.jpx`, `.jpm`, `.mj2`, `.jbig2`, `.jb2`, `.pnm`, `.pbm`, `.pgm`, `.ppm` | OCR via hayro-jpeg2000 (pure Rust decoder), JBIG2 support, table detection, format-specific metadata |
345
345
  | **Vector** | `.svg` | DOM parsing, embedded text, graphics metadata |
346
346
 
347
347
  #### Web & Data
@@ -350,7 +350,7 @@ This binding uses NAPI-RS to provide native Node.js bindings with:
350
350
  |----------|---------|----------|
351
351
  | **Markup** | `.html`, `.htm`, `.xhtml`, `.xml`, `.svg` | DOM parsing, metadata (Open Graph, Twitter Card), link extraction |
352
352
  | **Structured Data** | `.json`, `.yaml`, `.yml`, `.toml`, `.csv`, `.tsv` | Schema detection, nested structures, validation |
353
- | **Text & Markdown** | `.txt`, `.md`, `.markdown`, `.rst`, `.org`, `.rtf` | CommonMark, GFM, reStructuredText, Org Mode |
353
+ | **Text & Markdown** | `.txt`, `.md`, `.markdown`, `.djot`, `.rst`, `.org`, `.rtf` | CommonMark, GFM, Djot, reStructuredText, Org Mode |
354
354
 
355
355
  #### Email & Archives
356
356
 
@@ -363,7 +363,7 @@ This binding uses NAPI-RS to provide native Node.js bindings with:
363
363
 
364
364
  | Category | Formats | Features |
365
365
  |----------|---------|----------|
366
- | **Citations** | `.bib`, `.biblatex`, `.ris`, `.enw`, `.csl` | Bibliography parsing, citation extraction |
366
+ | **Citations** | `.bib`, `.biblatex`, `.ris`, `.nbib`, `.enw`, `.csl` | Structured parsing: RIS (structured), PubMed/MEDLINE, EndNote XML (structured), BibTeX, CSL JSON |
367
367
  | **Scientific** | `.tex`, `.latex`, `.typst`, `.jats`, `.ipynb`, `.docbook` | LaTeX, Jupyter notebooks, PubMed JATS |
368
368
  | **Documentation** | `.opml`, `.pod`, `.mdoc`, `.troff` | Technical documentation formats |
369
369
 
package/dist/index.d.mts CHANGED
@@ -1200,6 +1200,6 @@ declare function __resetBindingForTests(): void;
1200
1200
  * @module @kreuzberg/node
1201
1201
  */
1202
1202
 
1203
- declare const __version__ = "4.2.14";
1203
+ declare const __version__ = "4.2.15";
1204
1204
 
1205
1205
  export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, loadConfigFile, loadConfigFromPath, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
package/dist/index.d.ts CHANGED
@@ -1200,6 +1200,6 @@ declare function __resetBindingForTests(): void;
1200
1200
  * @module @kreuzberg/node
1201
1201
  */
1202
1202
 
1203
- declare const __version__ = "4.2.14";
1203
+ declare const __version__ = "4.2.15";
1204
1204
 
1205
1205
  export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, loadConfigFile, loadConfigFromPath, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
package/dist/index.js CHANGED
@@ -1372,7 +1372,7 @@ function getEmbeddingPreset(name) {
1372
1372
  }
1373
1373
 
1374
1374
  // typescript/index.ts
1375
- var __version__ = "4.2.14";
1375
+ var __version__ = "4.2.15";
1376
1376
  // Annotate the CommonJS export names for ESM import in node:
1377
1377
  0 && (module.exports = {
1378
1378
  CacheError,