@kreuzberg/node 4.2.14 → 4.2.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -8
- package/dist/index.d.mts +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1 -1
- package/dist/index.mjs.map +1 -1
- package/index.js +52 -52
- package/package.json +5 -5
package/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.2.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.2.15" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
|
@@ -55,7 +55,7 @@
|
|
|
55
55
|
</div>
|
|
56
56
|
|
|
57
57
|
|
|
58
|
-
Extract text, tables, images, and metadata from
|
|
58
|
+
Extract text, tables, images, and metadata from 62+ file formats including PDF, Office documents, and images. Native NAPI-RS bindings for Node.js with superior performance, async/await support, and TypeScript type definitions.
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
## Installation
|
|
@@ -103,7 +103,7 @@ yarn add @kreuzberg/node
|
|
|
103
103
|
**Format Support Notes:**
|
|
104
104
|
- Modern Office formats (DOCX, XLSX, PPTX) work without LibreOffice
|
|
105
105
|
- Legacy formats (DOC, XLS, PPT) require LibreOffice installation
|
|
106
|
-
- WASM binding
|
|
106
|
+
- WASM binding supports DOCX, XLSX, PPTX, and ODT (no LibreOffice required)
|
|
107
107
|
|
|
108
108
|
|
|
109
109
|
|
|
@@ -322,9 +322,9 @@ This binding uses NAPI-RS to provide native Node.js bindings with:
|
|
|
322
322
|
|
|
323
323
|
## Features
|
|
324
324
|
|
|
325
|
-
### Supported File Formats (
|
|
325
|
+
### Supported File Formats (62+)
|
|
326
326
|
|
|
327
|
-
|
|
327
|
+
62+ file formats across 8 major categories with intelligent format detection and comprehensive metadata extraction.
|
|
328
328
|
|
|
329
329
|
#### Office Documents
|
|
330
330
|
|
|
@@ -341,7 +341,7 @@ This binding uses NAPI-RS to provide native Node.js bindings with:
|
|
|
341
341
|
| Category | Formats | Features |
|
|
342
342
|
|----------|---------|----------|
|
|
343
343
|
| **Raster** | `.png`, `.jpg`, `.jpeg`, `.gif`, `.webp`, `.bmp`, `.tiff`, `.tif` | OCR, table detection, EXIF metadata, dimensions, color space |
|
|
344
|
-
| **Advanced** | `.jp2`, `.jpx`, `.jpm`, `.mj2`, `.pnm`, `.pbm`, `.pgm`, `.ppm` | OCR, table detection, format-specific metadata |
|
|
344
|
+
| **Advanced** | `.jp2`, `.jpx`, `.jpm`, `.mj2`, `.jbig2`, `.jb2`, `.pnm`, `.pbm`, `.pgm`, `.ppm` | OCR via hayro-jpeg2000 (pure Rust decoder), JBIG2 support, table detection, format-specific metadata |
|
|
345
345
|
| **Vector** | `.svg` | DOM parsing, embedded text, graphics metadata |
|
|
346
346
|
|
|
347
347
|
#### Web & Data
|
|
@@ -350,7 +350,7 @@ This binding uses NAPI-RS to provide native Node.js bindings with:
|
|
|
350
350
|
|----------|---------|----------|
|
|
351
351
|
| **Markup** | `.html`, `.htm`, `.xhtml`, `.xml`, `.svg` | DOM parsing, metadata (Open Graph, Twitter Card), link extraction |
|
|
352
352
|
| **Structured Data** | `.json`, `.yaml`, `.yml`, `.toml`, `.csv`, `.tsv` | Schema detection, nested structures, validation |
|
|
353
|
-
| **Text & Markdown** | `.txt`, `.md`, `.markdown`, `.rst`, `.org`, `.rtf` | CommonMark, GFM, reStructuredText, Org Mode |
|
|
353
|
+
| **Text & Markdown** | `.txt`, `.md`, `.markdown`, `.djot`, `.rst`, `.org`, `.rtf` | CommonMark, GFM, Djot, reStructuredText, Org Mode |
|
|
354
354
|
|
|
355
355
|
#### Email & Archives
|
|
356
356
|
|
|
@@ -363,7 +363,7 @@ This binding uses NAPI-RS to provide native Node.js bindings with:
|
|
|
363
363
|
|
|
364
364
|
| Category | Formats | Features |
|
|
365
365
|
|----------|---------|----------|
|
|
366
|
-
| **Citations** | `.bib`, `.biblatex`, `.ris`, `.enw`, `.csl` |
|
|
366
|
+
| **Citations** | `.bib`, `.biblatex`, `.ris`, `.nbib`, `.enw`, `.csl` | Structured parsing: RIS (structured), PubMed/MEDLINE, EndNote XML (structured), BibTeX, CSL JSON |
|
|
367
367
|
| **Scientific** | `.tex`, `.latex`, `.typst`, `.jats`, `.ipynb`, `.docbook` | LaTeX, Jupyter notebooks, PubMed JATS |
|
|
368
368
|
| **Documentation** | `.opml`, `.pod`, `.mdoc`, `.troff` | Technical documentation formats |
|
|
369
369
|
|
package/dist/index.d.mts
CHANGED
|
@@ -1200,6 +1200,6 @@ declare function __resetBindingForTests(): void;
|
|
|
1200
1200
|
* @module @kreuzberg/node
|
|
1201
1201
|
*/
|
|
1202
1202
|
|
|
1203
|
-
declare const __version__ = "4.2.
|
|
1203
|
+
declare const __version__ = "4.2.15";
|
|
1204
1204
|
|
|
1205
1205
|
export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, loadConfigFile, loadConfigFromPath, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
|
package/dist/index.d.ts
CHANGED
|
@@ -1200,6 +1200,6 @@ declare function __resetBindingForTests(): void;
|
|
|
1200
1200
|
* @module @kreuzberg/node
|
|
1201
1201
|
*/
|
|
1202
1202
|
|
|
1203
|
-
declare const __version__ = "4.2.
|
|
1203
|
+
declare const __version__ = "4.2.15";
|
|
1204
1204
|
|
|
1205
1205
|
export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, loadConfigFile, loadConfigFromPath, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
|
package/dist/index.js
CHANGED
|
@@ -1372,7 +1372,7 @@ function getEmbeddingPreset(name) {
|
|
|
1372
1372
|
}
|
|
1373
1373
|
|
|
1374
1374
|
// typescript/index.ts
|
|
1375
|
-
var __version__ = "4.2.
|
|
1375
|
+
var __version__ = "4.2.15";
|
|
1376
1376
|
// Annotate the CommonJS export names for ESM import in node:
|
|
1377
1377
|
0 && (module.exports = {
|
|
1378
1378
|
CacheError,
|