@kreuzberg/node 4.6.3 → 4.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -5
- package/dist/cli.js +2 -2
- package/dist/cli.js.map +1 -1
- package/dist/cli.mjs +2 -2
- package/dist/cli.mjs.map +1 -1
- package/dist/index.d.mts +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +3 -1
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +3 -1
- package/dist/index.mjs.map +1 -1
- package/dist/types.d.mts +5 -1
- package/dist/types.d.ts +5 -1
- package/dist/types.js.map +1 -1
- package/index.d.ts +73 -12
- package/index.js +52 -52
- package/package.json +9 -9
package/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.7.1" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
|
@@ -42,13 +42,16 @@
|
|
|
42
42
|
|
|
43
43
|
<!-- Project Info -->
|
|
44
44
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/blob/main/LICENSE">
|
|
45
|
-
<img src="https://img.shields.io/badge/License-MIT-
|
|
45
|
+
<img src="https://img.shields.io/badge/License-MIT-007ec6" alt="License">
|
|
46
46
|
</a>
|
|
47
47
|
<a href="https://docs.kreuzberg.dev">
|
|
48
|
-
<img src="https://img.shields.io/badge/docs-kreuzberg.dev-
|
|
48
|
+
<img src="https://img.shields.io/badge/docs-kreuzberg.dev-007ec6" alt="Documentation">
|
|
49
|
+
</a>
|
|
50
|
+
<a href="https://docs.kreuzberg.dev/demo.html">
|
|
51
|
+
<img src="https://img.shields.io/badge/%E2%96%B6%EF%B8%8F_Live_Demo-007ec6" alt="Live Demo">
|
|
49
52
|
</a>
|
|
50
53
|
<a href="https://huggingface.co/Kreuzberg">
|
|
51
|
-
<img src="https://img.shields.io/badge/%F0%9F%A4%
|
|
54
|
+
<img src="https://img.shields.io/badge/%F0%9F%A4%97_Hugging_Face-007ec6" alt="Hugging Face">
|
|
52
55
|
</a>
|
|
53
56
|
</div>
|
|
54
57
|
|
|
@@ -61,7 +64,7 @@
|
|
|
61
64
|
</div>
|
|
62
65
|
|
|
63
66
|
|
|
64
|
-
Extract text, tables, images, and metadata from 91+ file formats including PDF, Office documents, and images. Native NAPI-RS bindings for Node.js with superior performance, async/await support, and TypeScript type definitions.
|
|
67
|
+
Extract text, tables, images, and metadata from 91+ file formats and 248 programming languages including PDF, Office documents, and images. Native NAPI-RS bindings for Node.js with superior performance, async/await support, and TypeScript type definitions.
|
|
65
68
|
|
|
66
69
|
|
|
67
70
|
## Installation
|
|
@@ -74,6 +77,7 @@ Install via one of the supported package managers:
|
|
|
74
77
|
|
|
75
78
|
|
|
76
79
|
**npm:**
|
|
80
|
+
|
|
77
81
|
```bash
|
|
78
82
|
npm install @kreuzberg/node
|
|
79
83
|
```
|
|
@@ -82,6 +86,7 @@ npm install @kreuzberg/node
|
|
|
82
86
|
|
|
83
87
|
|
|
84
88
|
**pnpm:**
|
|
89
|
+
|
|
85
90
|
```bash
|
|
86
91
|
pnpm add @kreuzberg/node
|
|
87
92
|
```
|
|
@@ -90,6 +95,7 @@ pnpm add @kreuzberg/node
|
|
|
90
95
|
|
|
91
96
|
|
|
92
97
|
**yarn:**
|
|
98
|
+
|
|
93
99
|
```bash
|
|
94
100
|
yarn add @kreuzberg/node
|
|
95
101
|
```
|
|
@@ -107,6 +113,7 @@ yarn add @kreuzberg/node
|
|
|
107
113
|
### Platform Support
|
|
108
114
|
|
|
109
115
|
Pre-built binaries available for:
|
|
116
|
+
|
|
110
117
|
- macOS (arm64, x64)
|
|
111
118
|
- Linux (x64)
|
|
112
119
|
- Windows (x64)
|
|
@@ -268,12 +275,14 @@ try {
|
|
|
268
275
|
|
|
269
276
|
|
|
270
277
|
**Performance Benefits:**
|
|
278
|
+
|
|
271
279
|
- **Parallel Processing**: Multiple documents extracted simultaneously
|
|
272
280
|
- **CPU Utilization**: Maximizes multi-core CPU usage for large batches
|
|
273
281
|
- **Queue Management**: Automatically distributes work across available workers
|
|
274
282
|
- **Resource Control**: Prevents thread exhaustion with configurable pool size
|
|
275
283
|
|
|
276
284
|
**Best Practices:**
|
|
285
|
+
|
|
277
286
|
- Use worker pools for batches of 10+ documents
|
|
278
287
|
- Set pool size to number of CPU cores (default behavior)
|
|
279
288
|
- Always close pools with `closeWorkerPool()` to prevent resource leaks
|
|
@@ -366,6 +375,19 @@ This binding uses NAPI-RS to provide native Node.js bindings with:
|
|
|
366
375
|
| **Scientific** | `.tex`, `.latex`, `.typst`, `.jats`, `.ipynb`, `.docbook` | LaTeX, Jupyter notebooks, PubMed JATS |
|
|
367
376
|
| **Documentation** | `.opml`, `.pod`, `.mdoc`, `.troff` | Technical documentation formats |
|
|
368
377
|
|
|
378
|
+
#### Code Intelligence (248 Languages)
|
|
379
|
+
|
|
380
|
+
| Feature | Description |
|
|
381
|
+
|---------|-------------|
|
|
382
|
+
| **Structure Extraction** | Functions, classes, methods, structs, interfaces, enums |
|
|
383
|
+
| **Import/Export Analysis** | Module dependencies, re-exports, wildcard imports |
|
|
384
|
+
| **Symbol Extraction** | Variables, constants, type aliases, properties |
|
|
385
|
+
| **Docstring Parsing** | Google, NumPy, Sphinx, JSDoc, RustDoc, and 10+ formats |
|
|
386
|
+
| **Diagnostics** | Parse errors with line/column positions |
|
|
387
|
+
| **Syntax-Aware Chunking** | Split code by semantic boundaries, not arbitrary byte offsets |
|
|
388
|
+
|
|
389
|
+
Powered by [tree-sitter-language-pack](https://github.com/kreuzberg-dev/tree-sitter-language-pack) — [documentation](https://docs.tree-sitter-language-pack.kreuzberg.dev).
|
|
390
|
+
|
|
369
391
|
**[Complete Format Reference](https://kreuzberg.dev/reference/formats/)**
|
|
370
392
|
|
|
371
393
|
### Key Capabilities
|
|
@@ -387,6 +409,9 @@ This binding uses NAPI-RS to provide native Node.js bindings with:
|
|
|
387
409
|
- **Batch Processing** - Efficiently process multiple documents in parallel
|
|
388
410
|
- **Memory Efficient** - Stream large files without loading entirely into memory
|
|
389
411
|
- **Language Detection** - Detect and support multiple languages in documents
|
|
412
|
+
|
|
413
|
+
- **Code Intelligence** - Extract structure, imports, exports, symbols, and docstrings from [248 programming languages](https://docs.tree-sitter-language-pack.kreuzberg.dev) via tree-sitter
|
|
414
|
+
|
|
390
415
|
- **Configuration** - Fine-grained control over extraction behavior
|
|
391
416
|
|
|
392
417
|
### Performance Characteristics
|
package/dist/cli.js
CHANGED
|
@@ -44,8 +44,8 @@ function getDirectory() {
|
|
|
44
44
|
return (0, import_node_path.dirname)(__filename);
|
|
45
45
|
}
|
|
46
46
|
try {
|
|
47
|
-
const
|
|
48
|
-
return (0, import_node_path.dirname)((0, import_node_url.fileURLToPath)(
|
|
47
|
+
const getUrl = new Function("return import.meta.url");
|
|
48
|
+
return (0, import_node_path.dirname)((0, import_node_url.fileURLToPath)(getUrl()));
|
|
49
49
|
} catch {
|
|
50
50
|
return process.cwd();
|
|
51
51
|
}
|
package/dist/cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../typescript/cli.ts"],"sourcesContent":["#!/usr/bin/env node\n\n/**\n * Proxy entry point that forwards to the Rust-based Kreuzberg CLI.\n *\n * This keeps `npx kreuzberg` working without shipping an additional TypeScript CLI implementation.\n */\n\nimport { spawnSync } from \"node:child_process\";\nimport { existsSync } from \"node:fs\";\nimport { dirname, join } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\nimport which from \"which\";\n\ndeclare global {\n\tvar __filename: string | undefined;\n\tvar __dirname: string | undefined;\n}\n\nfunction getDirectory(): string {\n\t// In CJS, __filename will be defined\n\tif (typeof __filename !== \"undefined\") {\n\t\treturn dirname(__filename);\n\t}\n\t// Fallback for ESM
|
|
1
|
+
{"version":3,"sources":["../typescript/cli.ts"],"sourcesContent":["#!/usr/bin/env node\n\n/**\n * Proxy entry point that forwards to the Rust-based Kreuzberg CLI.\n *\n * This keeps `npx kreuzberg` working without shipping an additional TypeScript CLI implementation.\n */\n\nimport { spawnSync } from \"node:child_process\";\nimport { existsSync } from \"node:fs\";\nimport { dirname, join } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\nimport which from \"which\";\n\ndeclare global {\n\tvar __filename: string | undefined;\n\tvar __dirname: string | undefined;\n}\n\nfunction getDirectory(): string {\n\t// In CJS, __filename will be defined\n\tif (typeof __filename !== \"undefined\") {\n\t\treturn dirname(__filename);\n\t}\n\t// Fallback for ESM: use Function constructor to avoid static analysis warnings\n\ttry {\n\t\tconst getUrl = new Function(\"return import.meta.url\");\n\t\treturn dirname(fileURLToPath(getUrl()));\n\t} catch {\n\t\treturn process.cwd();\n\t}\n}\n\nfunction main(argv: string[]): number {\n\tconst args = argv.slice(2);\n\n\tlet cliPath: string | undefined;\n\ttry {\n\t\tcliPath = which.sync(\"kreuzberg-cli\");\n\t} catch {}\n\n\tif (!cliPath) {\n\t\tconst __dirname = getDirectory();\n\t\tconst devBinary = join(__dirname, \"..\", \"..\", \"..\", \"target\", \"release\", \"kreuzberg\");\n\t\tif (existsSync(devBinary)) {\n\t\t\tcliPath = devBinary;\n\t\t}\n\t}\n\n\tif (!cliPath) {\n\t\tconsole.error(\n\t\t\t\"The embedded Kreuzberg CLI binary could not be located. \" +\n\t\t\t\t\"This indicates a packaging issue; please open an issue at \" +\n\t\t\t\t\"https://github.com/kreuzberg-dev/kreuzberg/issues so we can investigate.\",\n\t\t);\n\t\treturn 1;\n\t}\n\n\tconst result = spawnSync(cliPath, args, {\n\t\tstdio: \"inherit\",\n\t\tshell: false,\n\t});\n\n\tif (result.error) {\n\t\tconsole.error(`Failed to execute kreuzberg-cli: ${result.error.message}`);\n\t\treturn 1;\n\t}\n\n\treturn result.status ?? 1;\n}\n\nif (require.main === module) {\n\tprocess.exit(main(process.argv));\n}\n\nexport { main };\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAQA,gCAA0B;AAC1B,qBAA2B;AAC3B,uBAA8B;AAC9B,sBAA8B;AAC9B,mBAAkB;AAOlB,SAAS,eAAuB;AAE/B,MAAI,OAAO,eAAe,aAAa;AACtC,eAAO,0BAAQ,UAAU;AAAA,EAC1B;AAEA,MAAI;AACH,UAAM,SAAS,IAAI,SAAS,wBAAwB;AACpD,eAAO,8BAAQ,+BAAc,OAAO,CAAC,CAAC;AAAA,EACvC,QAAQ;AACP,WAAO,QAAQ,IAAI;AAAA,EACpB;AACD;AAEA,SAAS,KAAK,MAAwB;AACrC,QAAM,OAAO,KAAK,MAAM,CAAC;AAEzB,MAAI;AACJ,MAAI;AACH,cAAU,aAAAA,QAAM,KAAK,eAAe;AAAA,EACrC,QAAQ;AAAA,EAAC;AAET,MAAI,CAAC,SAAS;AACb,UAAM,YAAY,aAAa;AAC/B,UAAM,gBAAY,uBAAK,WAAW,MAAM,MAAM,MAAM,UAAU,WAAW,WAAW;AACpF,YAAI,2BAAW,SAAS,GAAG;AAC1B,gBAAU;AAAA,IACX;AAAA,EACD;AAEA,MAAI,CAAC,SAAS;AACb,YAAQ;AAAA,MACP;AAAA,IAGD;AACA,WAAO;AAAA,EACR;AAEA,QAAM,aAAS,qCAAU,SAAS,MAAM;AAAA,IACvC,OAAO;AAAA,IACP,OAAO;AAAA,EACR,CAAC;AAED,MAAI,OAAO,OAAO;AACjB,YAAQ,MAAM,oCAAoC,OAAO,MAAM,OAAO,EAAE;AACxE,WAAO;AAAA,EACR;AAEA,SAAO,OAAO,UAAU;AACzB;AAEA,IAAI,QAAQ,SAAS,QAAQ;AAC5B,UAAQ,KAAK,KAAK,QAAQ,IAAI,CAAC;AAChC;","names":["which"]}
|
package/dist/cli.mjs
CHANGED
|
@@ -17,8 +17,8 @@ function getDirectory() {
|
|
|
17
17
|
return dirname(__filename);
|
|
18
18
|
}
|
|
19
19
|
try {
|
|
20
|
-
const
|
|
21
|
-
return dirname(fileURLToPath(
|
|
20
|
+
const getUrl = new Function("return import.meta.url");
|
|
21
|
+
return dirname(fileURLToPath(getUrl()));
|
|
22
22
|
} catch {
|
|
23
23
|
return process.cwd();
|
|
24
24
|
}
|
package/dist/cli.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../typescript/cli.ts"],"sourcesContent":["#!/usr/bin/env node\n\n/**\n * Proxy entry point that forwards to the Rust-based Kreuzberg CLI.\n *\n * This keeps `npx kreuzberg` working without shipping an additional TypeScript CLI implementation.\n */\n\nimport { spawnSync } from \"node:child_process\";\nimport { existsSync } from \"node:fs\";\nimport { dirname, join } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\nimport which from \"which\";\n\ndeclare global {\n\tvar __filename: string | undefined;\n\tvar __dirname: string | undefined;\n}\n\nfunction getDirectory(): string {\n\t// In CJS, __filename will be defined\n\tif (typeof __filename !== \"undefined\") {\n\t\treturn dirname(__filename);\n\t}\n\t// Fallback for ESM
|
|
1
|
+
{"version":3,"sources":["../typescript/cli.ts"],"sourcesContent":["#!/usr/bin/env node\n\n/**\n * Proxy entry point that forwards to the Rust-based Kreuzberg CLI.\n *\n * This keeps `npx kreuzberg` working without shipping an additional TypeScript CLI implementation.\n */\n\nimport { spawnSync } from \"node:child_process\";\nimport { existsSync } from \"node:fs\";\nimport { dirname, join } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\nimport which from \"which\";\n\ndeclare global {\n\tvar __filename: string | undefined;\n\tvar __dirname: string | undefined;\n}\n\nfunction getDirectory(): string {\n\t// In CJS, __filename will be defined\n\tif (typeof __filename !== \"undefined\") {\n\t\treturn dirname(__filename);\n\t}\n\t// Fallback for ESM: use Function constructor to avoid static analysis warnings\n\ttry {\n\t\tconst getUrl = new Function(\"return import.meta.url\");\n\t\treturn dirname(fileURLToPath(getUrl()));\n\t} catch {\n\t\treturn process.cwd();\n\t}\n}\n\nfunction main(argv: string[]): number {\n\tconst args = argv.slice(2);\n\n\tlet cliPath: string | undefined;\n\ttry {\n\t\tcliPath = which.sync(\"kreuzberg-cli\");\n\t} catch {}\n\n\tif (!cliPath) {\n\t\tconst __dirname = getDirectory();\n\t\tconst devBinary = join(__dirname, \"..\", \"..\", \"..\", \"target\", \"release\", \"kreuzberg\");\n\t\tif (existsSync(devBinary)) {\n\t\t\tcliPath = devBinary;\n\t\t}\n\t}\n\n\tif (!cliPath) {\n\t\tconsole.error(\n\t\t\t\"The embedded Kreuzberg CLI binary could not be located. \" +\n\t\t\t\t\"This indicates a packaging issue; please open an issue at \" +\n\t\t\t\t\"https://github.com/kreuzberg-dev/kreuzberg/issues so we can investigate.\",\n\t\t);\n\t\treturn 1;\n\t}\n\n\tconst result = spawnSync(cliPath, args, {\n\t\tstdio: \"inherit\",\n\t\tshell: false,\n\t});\n\n\tif (result.error) {\n\t\tconsole.error(`Failed to execute kreuzberg-cli: ${result.error.message}`);\n\t\treturn 1;\n\t}\n\n\treturn result.status ?? 1;\n}\n\nif (require.main === module) {\n\tprocess.exit(main(process.argv));\n}\n\nexport { main };\n"],"mappings":";;;;;;;;;AAQA,SAAS,iBAAiB;AAC1B,SAAS,kBAAkB;AAC3B,SAAS,SAAS,YAAY;AAC9B,SAAS,qBAAqB;AAC9B,OAAO,WAAW;AAOlB,SAAS,eAAuB;AAE/B,MAAI,OAAO,eAAe,aAAa;AACtC,WAAO,QAAQ,UAAU;AAAA,EAC1B;AAEA,MAAI;AACH,UAAM,SAAS,IAAI,SAAS,wBAAwB;AACpD,WAAO,QAAQ,cAAc,OAAO,CAAC,CAAC;AAAA,EACvC,QAAQ;AACP,WAAO,QAAQ,IAAI;AAAA,EACpB;AACD;AAEA,SAAS,KAAK,MAAwB;AACrC,QAAM,OAAO,KAAK,MAAM,CAAC;AAEzB,MAAI;AACJ,MAAI;AACH,cAAU,MAAM,KAAK,eAAe;AAAA,EACrC,QAAQ;AAAA,EAAC;AAET,MAAI,CAAC,SAAS;AACb,UAAM,YAAY,aAAa;AAC/B,UAAM,YAAY,KAAK,WAAW,MAAM,MAAM,MAAM,UAAU,WAAW,WAAW;AACpF,QAAI,WAAW,SAAS,GAAG;AAC1B,gBAAU;AAAA,IACX;AAAA,EACD;AAEA,MAAI,CAAC,SAAS;AACb,YAAQ;AAAA,MACP;AAAA,IAGD;AACA,WAAO;AAAA,EACR;AAEA,QAAM,SAAS,UAAU,SAAS,MAAM;AAAA,IACvC,OAAO;AAAA,IACP,OAAO;AAAA,EACR,CAAC;AAED,MAAI,OAAO,OAAO;AACjB,YAAQ,MAAM,oCAAoC,OAAO,MAAM,OAAO,EAAE;AACxE,WAAO;AAAA,EACR;AAEA,SAAO,OAAO,UAAU;AACzB;AAEA,IAAI,UAAQ,SAAS,QAAQ;AAC5B,UAAQ,KAAK,KAAK,QAAQ,IAAI,CAAC;AAChC;","names":[]}
|
package/dist/index.d.mts
CHANGED
|
@@ -1282,6 +1282,6 @@ declare function __resetBindingForTests(): void;
|
|
|
1282
1282
|
* @module @kreuzberg/node
|
|
1283
1283
|
*/
|
|
1284
1284
|
|
|
1285
|
-
declare const __version__ = "4.
|
|
1285
|
+
declare const __version__ = "4.7.1";
|
|
1286
1286
|
|
|
1287
1287
|
export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PdfPageIterator, type PdfPageResult, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, iteratePdfPages, iteratePdfPagesSync, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, loadConfigFile, loadConfigFromPath, pdfPageCount, registerOcrBackend, registerPostProcessor, registerValidator, renderPdfPage, renderPdfPageSync, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
|
package/dist/index.d.ts
CHANGED
|
@@ -1282,6 +1282,6 @@ declare function __resetBindingForTests(): void;
|
|
|
1282
1282
|
* @module @kreuzberg/node
|
|
1283
1283
|
*/
|
|
1284
1284
|
|
|
1285
|
-
declare const __version__ = "4.
|
|
1285
|
+
declare const __version__ = "4.7.1";
|
|
1286
1286
|
|
|
1287
1287
|
export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PdfPageIterator, type PdfPageResult, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, iteratePdfPages, iteratePdfPagesSync, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, loadConfigFile, loadConfigFromPath, pdfPageCount, registerOcrBackend, registerPostProcessor, registerValidator, renderPdfPage, renderPdfPageSync, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
|
package/dist/index.js
CHANGED
|
@@ -585,6 +585,7 @@ function convertChunk(rawChunk) {
|
|
|
585
585
|
if (!rawChunk || typeof rawChunk !== "object") {
|
|
586
586
|
return {
|
|
587
587
|
content: "",
|
|
588
|
+
chunkType: null,
|
|
588
589
|
metadata: {
|
|
589
590
|
byteStart: 0,
|
|
590
591
|
byteEnd: 0,
|
|
@@ -599,6 +600,7 @@ function convertChunk(rawChunk) {
|
|
|
599
600
|
const metadata = chunk["metadata"] ?? {};
|
|
600
601
|
return {
|
|
601
602
|
content: chunk["content"] ?? "",
|
|
603
|
+
chunkType: chunk["chunk_type"] ?? chunk["chunkType"] ?? null,
|
|
602
604
|
embedding: chunk["embedding"] ?? null,
|
|
603
605
|
metadata: {
|
|
604
606
|
byteStart: metadata["byte_start"] ?? metadata["charStart"] ?? 0,
|
|
@@ -1232,7 +1234,7 @@ function getEmbeddingPreset(name) {
|
|
|
1232
1234
|
}
|
|
1233
1235
|
|
|
1234
1236
|
// typescript/index.ts
|
|
1235
|
-
var __version__ = "4.
|
|
1237
|
+
var __version__ = "4.7.1";
|
|
1236
1238
|
// Annotate the CommonJS export names for ESM import in node:
|
|
1237
1239
|
0 && (module.exports = {
|
|
1238
1240
|
CacheError,
|