@kreuzberg/node 4.6.3 → 4.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.6.3" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.7.0" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
@@ -42,13 +42,16 @@
42
42
 
43
43
  <!-- Project Info -->
44
44
  <a href="https://github.com/kreuzberg-dev/kreuzberg/blob/main/LICENSE">
45
- <img src="https://img.shields.io/badge/License-MIT-blue.svg" alt="License">
45
+ <img src="https://img.shields.io/badge/License-MIT-007ec6" alt="License">
46
46
  </a>
47
47
  <a href="https://docs.kreuzberg.dev">
48
- <img src="https://img.shields.io/badge/docs-kreuzberg.dev-blue" alt="Documentation">
48
+ <img src="https://img.shields.io/badge/docs-kreuzberg.dev-007ec6" alt="Documentation">
49
+ </a>
50
+ <a href="https://docs.kreuzberg.dev/demo.html">
51
+ <img src="https://img.shields.io/badge/%E2%96%B6%EF%B8%8F_Live_Demo-007ec6" alt="Live Demo">
49
52
  </a>
50
53
  <a href="https://huggingface.co/Kreuzberg">
51
- <img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Models-yellow" alt="Hugging Face">
54
+ <img src="https://img.shields.io/badge/%F0%9F%A4%97_Hugging_Face-007ec6" alt="Hugging Face">
52
55
  </a>
53
56
  </div>
54
57
 
@@ -61,7 +64,7 @@
61
64
  </div>
62
65
 
63
66
 
64
- Extract text, tables, images, and metadata from 91+ file formats including PDF, Office documents, and images. Native NAPI-RS bindings for Node.js with superior performance, async/await support, and TypeScript type definitions.
67
+ Extract text, tables, images, and metadata from 91+ file formats and 248 programming languages including PDF, Office documents, and images. Native NAPI-RS bindings for Node.js with superior performance, async/await support, and TypeScript type definitions.
65
68
 
66
69
 
67
70
  ## Installation
@@ -74,6 +77,7 @@ Install via one of the supported package managers:
74
77
 
75
78
 
76
79
  **npm:**
80
+
77
81
  ```bash
78
82
  npm install @kreuzberg/node
79
83
  ```
@@ -82,6 +86,7 @@ npm install @kreuzberg/node
82
86
 
83
87
 
84
88
  **pnpm:**
89
+
85
90
  ```bash
86
91
  pnpm add @kreuzberg/node
87
92
  ```
@@ -90,6 +95,7 @@ pnpm add @kreuzberg/node
90
95
 
91
96
 
92
97
  **yarn:**
98
+
93
99
  ```bash
94
100
  yarn add @kreuzberg/node
95
101
  ```
@@ -107,6 +113,7 @@ yarn add @kreuzberg/node
107
113
  ### Platform Support
108
114
 
109
115
  Pre-built binaries available for:
116
+
110
117
  - macOS (arm64, x64)
111
118
  - Linux (x64)
112
119
  - Windows (x64)
@@ -268,12 +275,14 @@ try {
268
275
 
269
276
 
270
277
  **Performance Benefits:**
278
+
271
279
  - **Parallel Processing**: Multiple documents extracted simultaneously
272
280
  - **CPU Utilization**: Maximizes multi-core CPU usage for large batches
273
281
  - **Queue Management**: Automatically distributes work across available workers
274
282
  - **Resource Control**: Prevents thread exhaustion with configurable pool size
275
283
 
276
284
  **Best Practices:**
285
+
277
286
  - Use worker pools for batches of 10+ documents
278
287
  - Set pool size to number of CPU cores (default behavior)
279
288
  - Always close pools with `closeWorkerPool()` to prevent resource leaks
@@ -366,6 +375,19 @@ This binding uses NAPI-RS to provide native Node.js bindings with:
366
375
  | **Scientific** | `.tex`, `.latex`, `.typst`, `.jats`, `.ipynb`, `.docbook` | LaTeX, Jupyter notebooks, PubMed JATS |
367
376
  | **Documentation** | `.opml`, `.pod`, `.mdoc`, `.troff` | Technical documentation formats |
368
377
 
378
+ #### Code Intelligence (248 Languages)
379
+
380
+ | Feature | Description |
381
+ |---------|-------------|
382
+ | **Structure Extraction** | Functions, classes, methods, structs, interfaces, enums |
383
+ | **Import/Export Analysis** | Module dependencies, re-exports, wildcard imports |
384
+ | **Symbol Extraction** | Variables, constants, type aliases, properties |
385
+ | **Docstring Parsing** | Google, NumPy, Sphinx, JSDoc, RustDoc, and 10+ formats |
386
+ | **Diagnostics** | Parse errors with line/column positions |
387
+ | **Syntax-Aware Chunking** | Split code by semantic boundaries, not arbitrary byte offsets |
388
+
389
+ Powered by [tree-sitter-language-pack](https://github.com/kreuzberg-dev/tree-sitter-language-pack) — [documentation](https://docs.tree-sitter-language-pack.kreuzberg.dev).
390
+
369
391
  **[Complete Format Reference](https://kreuzberg.dev/reference/formats/)**
370
392
 
371
393
  ### Key Capabilities
@@ -387,6 +409,9 @@ This binding uses NAPI-RS to provide native Node.js bindings with:
387
409
  - **Batch Processing** - Efficiently process multiple documents in parallel
388
410
  - **Memory Efficient** - Stream large files without loading entirely into memory
389
411
  - **Language Detection** - Detect and support multiple languages in documents
412
+
413
+ - **Code Intelligence** - Extract structure, imports, exports, symbols, and docstrings from [248 programming languages](https://docs.tree-sitter-language-pack.kreuzberg.dev) via tree-sitter
414
+
390
415
  - **Configuration** - Fine-grained control over extraction behavior
391
416
 
392
417
  ### Performance Characteristics
package/dist/cli.js CHANGED
@@ -44,8 +44,8 @@ function getDirectory() {
44
44
  return (0, import_node_path.dirname)(__filename);
45
45
  }
46
46
  try {
47
- const url = eval("import.meta.url");
48
- return (0, import_node_path.dirname)((0, import_node_url.fileURLToPath)(url));
47
+ const getUrl = new Function("return import.meta.url");
48
+ return (0, import_node_path.dirname)((0, import_node_url.fileURLToPath)(getUrl()));
49
49
  } catch {
50
50
  return process.cwd();
51
51
  }
package/dist/cli.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../typescript/cli.ts"],"sourcesContent":["#!/usr/bin/env node\n\n/**\n * Proxy entry point that forwards to the Rust-based Kreuzberg CLI.\n *\n * This keeps `npx kreuzberg` working without shipping an additional TypeScript CLI implementation.\n */\n\nimport { spawnSync } from \"node:child_process\";\nimport { existsSync } from \"node:fs\";\nimport { dirname, join } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\nimport which from \"which\";\n\ndeclare global {\n\tvar __filename: string | undefined;\n\tvar __dirname: string | undefined;\n}\n\nfunction getDirectory(): string {\n\t// In CJS, __filename will be defined\n\tif (typeof __filename !== \"undefined\") {\n\t\treturn dirname(__filename);\n\t}\n\t// Fallback for ESM\n\ttry {\n\t\t// Use eval to avoid esbuild warnings about import.meta in CJS builds\n\t\t// oxlint-disable-next-line no-eval -- Required for CJS/ESM compat\n\t\t// biome-ignore lint/security/noGlobalEval: CJS/ESM compat\n\t\tconst url = eval(\"import.meta.url\");\n\t\treturn dirname(fileURLToPath(url));\n\t} catch {\n\t\treturn process.cwd();\n\t}\n}\n\nfunction main(argv: string[]): number {\n\tconst args = argv.slice(2);\n\n\tlet cliPath: string | undefined;\n\ttry {\n\t\tcliPath = which.sync(\"kreuzberg-cli\");\n\t} catch {}\n\n\tif (!cliPath) {\n\t\tconst __dirname = getDirectory();\n\t\tconst devBinary = join(__dirname, \"..\", \"..\", \"..\", \"target\", \"release\", \"kreuzberg\");\n\t\tif (existsSync(devBinary)) {\n\t\t\tcliPath = devBinary;\n\t\t}\n\t}\n\n\tif (!cliPath) {\n\t\tconsole.error(\n\t\t\t\"The embedded Kreuzberg CLI binary could not be located. \" +\n\t\t\t\t\"This indicates a packaging issue; please open an issue at \" +\n\t\t\t\t\"https://github.com/kreuzberg-dev/kreuzberg/issues so we can investigate.\",\n\t\t);\n\t\treturn 1;\n\t}\n\n\tconst result = spawnSync(cliPath, args, {\n\t\tstdio: \"inherit\",\n\t\tshell: false,\n\t});\n\n\tif (result.error) {\n\t\tconsole.error(`Failed to execute kreuzberg-cli: ${result.error.message}`);\n\t\treturn 1;\n\t}\n\n\treturn result.status ?? 1;\n}\n\nif (require.main === module) {\n\tprocess.exit(main(process.argv));\n}\n\nexport { main };\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAQA,gCAA0B;AAC1B,qBAA2B;AAC3B,uBAA8B;AAC9B,sBAA8B;AAC9B,mBAAkB;AAOlB,SAAS,eAAuB;AAE/B,MAAI,OAAO,eAAe,aAAa;AACtC,eAAO,0BAAQ,UAAU;AAAA,EAC1B;AAEA,MAAI;AAIH,UAAM,MAAM,KAAK,iBAAiB;AAClC,eAAO,8BAAQ,+BAAc,GAAG,CAAC;AAAA,EAClC,QAAQ;AACP,WAAO,QAAQ,IAAI;AAAA,EACpB;AACD;AAEA,SAAS,KAAK,MAAwB;AACrC,QAAM,OAAO,KAAK,MAAM,CAAC;AAEzB,MAAI;AACJ,MAAI;AACH,cAAU,aAAAA,QAAM,KAAK,eAAe;AAAA,EACrC,QAAQ;AAAA,EAAC;AAET,MAAI,CAAC,SAAS;AACb,UAAM,YAAY,aAAa;AAC/B,UAAM,gBAAY,uBAAK,WAAW,MAAM,MAAM,MAAM,UAAU,WAAW,WAAW;AACpF,YAAI,2BAAW,SAAS,GAAG;AAC1B,gBAAU;AAAA,IACX;AAAA,EACD;AAEA,MAAI,CAAC,SAAS;AACb,YAAQ;AAAA,MACP;AAAA,IAGD;AACA,WAAO;AAAA,EACR;AAEA,QAAM,aAAS,qCAAU,SAAS,MAAM;AAAA,IACvC,OAAO;AAAA,IACP,OAAO;AAAA,EACR,CAAC;AAED,MAAI,OAAO,OAAO;AACjB,YAAQ,MAAM,oCAAoC,OAAO,MAAM,OAAO,EAAE;AACxE,WAAO;AAAA,EACR;AAEA,SAAO,OAAO,UAAU;AACzB;AAEA,IAAI,QAAQ,SAAS,QAAQ;AAC5B,UAAQ,KAAK,KAAK,QAAQ,IAAI,CAAC;AAChC;","names":["which"]}
1
+ {"version":3,"sources":["../typescript/cli.ts"],"sourcesContent":["#!/usr/bin/env node\n\n/**\n * Proxy entry point that forwards to the Rust-based Kreuzberg CLI.\n *\n * This keeps `npx kreuzberg` working without shipping an additional TypeScript CLI implementation.\n */\n\nimport { spawnSync } from \"node:child_process\";\nimport { existsSync } from \"node:fs\";\nimport { dirname, join } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\nimport which from \"which\";\n\ndeclare global {\n\tvar __filename: string | undefined;\n\tvar __dirname: string | undefined;\n}\n\nfunction getDirectory(): string {\n\t// In CJS, __filename will be defined\n\tif (typeof __filename !== \"undefined\") {\n\t\treturn dirname(__filename);\n\t}\n\t// Fallback for ESM: use Function constructor to avoid static analysis warnings\n\ttry {\n\t\tconst getUrl = new Function(\"return import.meta.url\");\n\t\treturn dirname(fileURLToPath(getUrl()));\n\t} catch {\n\t\treturn process.cwd();\n\t}\n}\n\nfunction main(argv: string[]): number {\n\tconst args = argv.slice(2);\n\n\tlet cliPath: string | undefined;\n\ttry {\n\t\tcliPath = which.sync(\"kreuzberg-cli\");\n\t} catch {}\n\n\tif (!cliPath) {\n\t\tconst __dirname = getDirectory();\n\t\tconst devBinary = join(__dirname, \"..\", \"..\", \"..\", \"target\", \"release\", \"kreuzberg\");\n\t\tif (existsSync(devBinary)) {\n\t\t\tcliPath = devBinary;\n\t\t}\n\t}\n\n\tif (!cliPath) {\n\t\tconsole.error(\n\t\t\t\"The embedded Kreuzberg CLI binary could not be located. \" +\n\t\t\t\t\"This indicates a packaging issue; please open an issue at \" +\n\t\t\t\t\"https://github.com/kreuzberg-dev/kreuzberg/issues so we can investigate.\",\n\t\t);\n\t\treturn 1;\n\t}\n\n\tconst result = spawnSync(cliPath, args, {\n\t\tstdio: \"inherit\",\n\t\tshell: false,\n\t});\n\n\tif (result.error) {\n\t\tconsole.error(`Failed to execute kreuzberg-cli: ${result.error.message}`);\n\t\treturn 1;\n\t}\n\n\treturn result.status ?? 1;\n}\n\nif (require.main === module) {\n\tprocess.exit(main(process.argv));\n}\n\nexport { main };\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAQA,gCAA0B;AAC1B,qBAA2B;AAC3B,uBAA8B;AAC9B,sBAA8B;AAC9B,mBAAkB;AAOlB,SAAS,eAAuB;AAE/B,MAAI,OAAO,eAAe,aAAa;AACtC,eAAO,0BAAQ,UAAU;AAAA,EAC1B;AAEA,MAAI;AACH,UAAM,SAAS,IAAI,SAAS,wBAAwB;AACpD,eAAO,8BAAQ,+BAAc,OAAO,CAAC,CAAC;AAAA,EACvC,QAAQ;AACP,WAAO,QAAQ,IAAI;AAAA,EACpB;AACD;AAEA,SAAS,KAAK,MAAwB;AACrC,QAAM,OAAO,KAAK,MAAM,CAAC;AAEzB,MAAI;AACJ,MAAI;AACH,cAAU,aAAAA,QAAM,KAAK,eAAe;AAAA,EACrC,QAAQ;AAAA,EAAC;AAET,MAAI,CAAC,SAAS;AACb,UAAM,YAAY,aAAa;AAC/B,UAAM,gBAAY,uBAAK,WAAW,MAAM,MAAM,MAAM,UAAU,WAAW,WAAW;AACpF,YAAI,2BAAW,SAAS,GAAG;AAC1B,gBAAU;AAAA,IACX;AAAA,EACD;AAEA,MAAI,CAAC,SAAS;AACb,YAAQ;AAAA,MACP;AAAA,IAGD;AACA,WAAO;AAAA,EACR;AAEA,QAAM,aAAS,qCAAU,SAAS,MAAM;AAAA,IACvC,OAAO;AAAA,IACP,OAAO;AAAA,EACR,CAAC;AAED,MAAI,OAAO,OAAO;AACjB,YAAQ,MAAM,oCAAoC,OAAO,MAAM,OAAO,EAAE;AACxE,WAAO;AAAA,EACR;AAEA,SAAO,OAAO,UAAU;AACzB;AAEA,IAAI,QAAQ,SAAS,QAAQ;AAC5B,UAAQ,KAAK,KAAK,QAAQ,IAAI,CAAC;AAChC;","names":["which"]}
package/dist/cli.mjs CHANGED
@@ -17,8 +17,8 @@ function getDirectory() {
17
17
  return dirname(__filename);
18
18
  }
19
19
  try {
20
- const url = eval("import.meta.url");
21
- return dirname(fileURLToPath(url));
20
+ const getUrl = new Function("return import.meta.url");
21
+ return dirname(fileURLToPath(getUrl()));
22
22
  } catch {
23
23
  return process.cwd();
24
24
  }
package/dist/cli.mjs.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../typescript/cli.ts"],"sourcesContent":["#!/usr/bin/env node\n\n/**\n * Proxy entry point that forwards to the Rust-based Kreuzberg CLI.\n *\n * This keeps `npx kreuzberg` working without shipping an additional TypeScript CLI implementation.\n */\n\nimport { spawnSync } from \"node:child_process\";\nimport { existsSync } from \"node:fs\";\nimport { dirname, join } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\nimport which from \"which\";\n\ndeclare global {\n\tvar __filename: string | undefined;\n\tvar __dirname: string | undefined;\n}\n\nfunction getDirectory(): string {\n\t// In CJS, __filename will be defined\n\tif (typeof __filename !== \"undefined\") {\n\t\treturn dirname(__filename);\n\t}\n\t// Fallback for ESM\n\ttry {\n\t\t// Use eval to avoid esbuild warnings about import.meta in CJS builds\n\t\t// oxlint-disable-next-line no-eval -- Required for CJS/ESM compat\n\t\t// biome-ignore lint/security/noGlobalEval: CJS/ESM compat\n\t\tconst url = eval(\"import.meta.url\");\n\t\treturn dirname(fileURLToPath(url));\n\t} catch {\n\t\treturn process.cwd();\n\t}\n}\n\nfunction main(argv: string[]): number {\n\tconst args = argv.slice(2);\n\n\tlet cliPath: string | undefined;\n\ttry {\n\t\tcliPath = which.sync(\"kreuzberg-cli\");\n\t} catch {}\n\n\tif (!cliPath) {\n\t\tconst __dirname = getDirectory();\n\t\tconst devBinary = join(__dirname, \"..\", \"..\", \"..\", \"target\", \"release\", \"kreuzberg\");\n\t\tif (existsSync(devBinary)) {\n\t\t\tcliPath = devBinary;\n\t\t}\n\t}\n\n\tif (!cliPath) {\n\t\tconsole.error(\n\t\t\t\"The embedded Kreuzberg CLI binary could not be located. \" +\n\t\t\t\t\"This indicates a packaging issue; please open an issue at \" +\n\t\t\t\t\"https://github.com/kreuzberg-dev/kreuzberg/issues so we can investigate.\",\n\t\t);\n\t\treturn 1;\n\t}\n\n\tconst result = spawnSync(cliPath, args, {\n\t\tstdio: \"inherit\",\n\t\tshell: false,\n\t});\n\n\tif (result.error) {\n\t\tconsole.error(`Failed to execute kreuzberg-cli: ${result.error.message}`);\n\t\treturn 1;\n\t}\n\n\treturn result.status ?? 1;\n}\n\nif (require.main === module) {\n\tprocess.exit(main(process.argv));\n}\n\nexport { main };\n"],"mappings":";;;;;;;;;AAQA,SAAS,iBAAiB;AAC1B,SAAS,kBAAkB;AAC3B,SAAS,SAAS,YAAY;AAC9B,SAAS,qBAAqB;AAC9B,OAAO,WAAW;AAOlB,SAAS,eAAuB;AAE/B,MAAI,OAAO,eAAe,aAAa;AACtC,WAAO,QAAQ,UAAU;AAAA,EAC1B;AAEA,MAAI;AAIH,UAAM,MAAM,KAAK,iBAAiB;AAClC,WAAO,QAAQ,cAAc,GAAG,CAAC;AAAA,EAClC,QAAQ;AACP,WAAO,QAAQ,IAAI;AAAA,EACpB;AACD;AAEA,SAAS,KAAK,MAAwB;AACrC,QAAM,OAAO,KAAK,MAAM,CAAC;AAEzB,MAAI;AACJ,MAAI;AACH,cAAU,MAAM,KAAK,eAAe;AAAA,EACrC,QAAQ;AAAA,EAAC;AAET,MAAI,CAAC,SAAS;AACb,UAAM,YAAY,aAAa;AAC/B,UAAM,YAAY,KAAK,WAAW,MAAM,MAAM,MAAM,UAAU,WAAW,WAAW;AACpF,QAAI,WAAW,SAAS,GAAG;AAC1B,gBAAU;AAAA,IACX;AAAA,EACD;AAEA,MAAI,CAAC,SAAS;AACb,YAAQ;AAAA,MACP;AAAA,IAGD;AACA,WAAO;AAAA,EACR;AAEA,QAAM,SAAS,UAAU,SAAS,MAAM;AAAA,IACvC,OAAO;AAAA,IACP,OAAO;AAAA,EACR,CAAC;AAED,MAAI,OAAO,OAAO;AACjB,YAAQ,MAAM,oCAAoC,OAAO,MAAM,OAAO,EAAE;AACxE,WAAO;AAAA,EACR;AAEA,SAAO,OAAO,UAAU;AACzB;AAEA,IAAI,UAAQ,SAAS,QAAQ;AAC5B,UAAQ,KAAK,KAAK,QAAQ,IAAI,CAAC;AAChC;","names":[]}
1
+ {"version":3,"sources":["../typescript/cli.ts"],"sourcesContent":["#!/usr/bin/env node\n\n/**\n * Proxy entry point that forwards to the Rust-based Kreuzberg CLI.\n *\n * This keeps `npx kreuzberg` working without shipping an additional TypeScript CLI implementation.\n */\n\nimport { spawnSync } from \"node:child_process\";\nimport { existsSync } from \"node:fs\";\nimport { dirname, join } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\nimport which from \"which\";\n\ndeclare global {\n\tvar __filename: string | undefined;\n\tvar __dirname: string | undefined;\n}\n\nfunction getDirectory(): string {\n\t// In CJS, __filename will be defined\n\tif (typeof __filename !== \"undefined\") {\n\t\treturn dirname(__filename);\n\t}\n\t// Fallback for ESM: use Function constructor to avoid static analysis warnings\n\ttry {\n\t\tconst getUrl = new Function(\"return import.meta.url\");\n\t\treturn dirname(fileURLToPath(getUrl()));\n\t} catch {\n\t\treturn process.cwd();\n\t}\n}\n\nfunction main(argv: string[]): number {\n\tconst args = argv.slice(2);\n\n\tlet cliPath: string | undefined;\n\ttry {\n\t\tcliPath = which.sync(\"kreuzberg-cli\");\n\t} catch {}\n\n\tif (!cliPath) {\n\t\tconst __dirname = getDirectory();\n\t\tconst devBinary = join(__dirname, \"..\", \"..\", \"..\", \"target\", \"release\", \"kreuzberg\");\n\t\tif (existsSync(devBinary)) {\n\t\t\tcliPath = devBinary;\n\t\t}\n\t}\n\n\tif (!cliPath) {\n\t\tconsole.error(\n\t\t\t\"The embedded Kreuzberg CLI binary could not be located. \" +\n\t\t\t\t\"This indicates a packaging issue; please open an issue at \" +\n\t\t\t\t\"https://github.com/kreuzberg-dev/kreuzberg/issues so we can investigate.\",\n\t\t);\n\t\treturn 1;\n\t}\n\n\tconst result = spawnSync(cliPath, args, {\n\t\tstdio: \"inherit\",\n\t\tshell: false,\n\t});\n\n\tif (result.error) {\n\t\tconsole.error(`Failed to execute kreuzberg-cli: ${result.error.message}`);\n\t\treturn 1;\n\t}\n\n\treturn result.status ?? 1;\n}\n\nif (require.main === module) {\n\tprocess.exit(main(process.argv));\n}\n\nexport { main };\n"],"mappings":";;;;;;;;;AAQA,SAAS,iBAAiB;AAC1B,SAAS,kBAAkB;AAC3B,SAAS,SAAS,YAAY;AAC9B,SAAS,qBAAqB;AAC9B,OAAO,WAAW;AAOlB,SAAS,eAAuB;AAE/B,MAAI,OAAO,eAAe,aAAa;AACtC,WAAO,QAAQ,UAAU;AAAA,EAC1B;AAEA,MAAI;AACH,UAAM,SAAS,IAAI,SAAS,wBAAwB;AACpD,WAAO,QAAQ,cAAc,OAAO,CAAC,CAAC;AAAA,EACvC,QAAQ;AACP,WAAO,QAAQ,IAAI;AAAA,EACpB;AACD;AAEA,SAAS,KAAK,MAAwB;AACrC,QAAM,OAAO,KAAK,MAAM,CAAC;AAEzB,MAAI;AACJ,MAAI;AACH,cAAU,MAAM,KAAK,eAAe;AAAA,EACrC,QAAQ;AAAA,EAAC;AAET,MAAI,CAAC,SAAS;AACb,UAAM,YAAY,aAAa;AAC/B,UAAM,YAAY,KAAK,WAAW,MAAM,MAAM,MAAM,UAAU,WAAW,WAAW;AACpF,QAAI,WAAW,SAAS,GAAG;AAC1B,gBAAU;AAAA,IACX;AAAA,EACD;AAEA,MAAI,CAAC,SAAS;AACb,YAAQ;AAAA,MACP;AAAA,IAGD;AACA,WAAO;AAAA,EACR;AAEA,QAAM,SAAS,UAAU,SAAS,MAAM;AAAA,IACvC,OAAO;AAAA,IACP,OAAO;AAAA,EACR,CAAC;AAED,MAAI,OAAO,OAAO;AACjB,YAAQ,MAAM,oCAAoC,OAAO,MAAM,OAAO,EAAE;AACxE,WAAO;AAAA,EACR;AAEA,SAAO,OAAO,UAAU;AACzB;AAEA,IAAI,UAAQ,SAAS,QAAQ;AAC5B,UAAQ,KAAK,KAAK,QAAQ,IAAI,CAAC;AAChC;","names":[]}
package/dist/index.d.mts CHANGED
@@ -1282,6 +1282,6 @@ declare function __resetBindingForTests(): void;
1282
1282
  * @module @kreuzberg/node
1283
1283
  */
1284
1284
 
1285
- declare const __version__ = "4.6.3";
1285
+ declare const __version__ = "4.7.0";
1286
1286
 
1287
1287
  export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PdfPageIterator, type PdfPageResult, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, iteratePdfPages, iteratePdfPagesSync, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, loadConfigFile, loadConfigFromPath, pdfPageCount, registerOcrBackend, registerPostProcessor, registerValidator, renderPdfPage, renderPdfPageSync, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
package/dist/index.d.ts CHANGED
@@ -1282,6 +1282,6 @@ declare function __resetBindingForTests(): void;
1282
1282
  * @module @kreuzberg/node
1283
1283
  */
1284
1284
 
1285
- declare const __version__ = "4.6.3";
1285
+ declare const __version__ = "4.7.0";
1286
1286
 
1287
1287
  export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PdfPageIterator, type PdfPageResult, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, iteratePdfPages, iteratePdfPagesSync, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, loadConfigFile, loadConfigFromPath, pdfPageCount, registerOcrBackend, registerPostProcessor, registerValidator, renderPdfPage, renderPdfPageSync, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
package/dist/index.js CHANGED
@@ -585,6 +585,7 @@ function convertChunk(rawChunk) {
585
585
  if (!rawChunk || typeof rawChunk !== "object") {
586
586
  return {
587
587
  content: "",
588
+ chunkType: null,
588
589
  metadata: {
589
590
  byteStart: 0,
590
591
  byteEnd: 0,
@@ -599,6 +600,7 @@ function convertChunk(rawChunk) {
599
600
  const metadata = chunk["metadata"] ?? {};
600
601
  return {
601
602
  content: chunk["content"] ?? "",
603
+ chunkType: chunk["chunk_type"] ?? chunk["chunkType"] ?? null,
602
604
  embedding: chunk["embedding"] ?? null,
603
605
  metadata: {
604
606
  byteStart: metadata["byte_start"] ?? metadata["charStart"] ?? 0,
@@ -1232,7 +1234,7 @@ function getEmbeddingPreset(name) {
1232
1234
  }
1233
1235
 
1234
1236
  // typescript/index.ts
1235
- var __version__ = "4.6.3";
1237
+ var __version__ = "4.7.0";
1236
1238
  // Annotate the CommonJS export names for ESM import in node:
1237
1239
  0 && (module.exports = {
1238
1240
  CacheError,