npm - @kreuzberg/node - Versions diffs - 4.0.0-rc.21 → 4.0.0-rc.24 - Mend

@kreuzberg/node 4.0.0-rc.21 → 4.0.0-rc.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/dist/cli.d.mts CHANGED Viewed

@@ -4,6 +4,10 @@
  *
  * This keeps `npx kreuzberg` working without shipping an additional TypeScript CLI implementation.
  */
+declare global {
+    var __filename: string | undefined;
+    var __dirname: string | undefined;
+}
 declare function main(argv: string[]): number;
 export { main };

package/dist/cli.d.ts CHANGED Viewed

@@ -4,6 +4,10 @@
  *
  * This keeps `npx kreuzberg` working without shipping an additional TypeScript CLI implementation.
  */
+declare global {
+    var __filename: string | undefined;
+    var __dirname: string | undefined;
+}
 declare function main(argv: string[]): number;
 export { main };

package/dist/cli.js CHANGED Viewed

@@ -37,7 +37,17 @@ var import_node_fs = require("node:fs");
 var import_node_path = require("node:path");
 var import_node_url = require("node:url");
 var import_which = __toESM(require("which"));
-const import_meta = {};
+function getDirectory() {
+  if (typeof __filename !== "undefined") {
+    return (0, import_node_path.dirname)(__filename);
+  }
+  try {
+    const url = eval("import.meta.url");
+    return (0, import_node_path.dirname)((0, import_node_url.fileURLToPath)(url));
+  } catch {
+    return process.cwd();
+  }
+}
 function main(argv) {
   const args = argv.slice(2);
   let cliPath;
@@ -46,7 +56,7 @@ function main(argv) {
   } catch {
   }
   if (!cliPath) {
-    const __dirname = typeof __filename !== "undefined" ? (0, import_node_path.dirname)(__filename) : (0, import_node_path.dirname)((0, import_node_url.fileURLToPath)(import_meta.url));
+    const __dirname = getDirectory();
     const devBinary = (0, import_node_path.join)(__dirname, "..", "..", "..", "target", "release", "kreuzberg");
     if ((0, import_node_fs.existsSync)(devBinary)) {
       cliPath = devBinary;

package/dist/cli.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"sources":["../typescript/cli.ts"],"sourcesContent":["#!/usr/bin/env node\n\n/*\n Proxy entry point that forwards to the Rust-based Kreuzberg CLI.\n \n This keeps `npx kreuzberg` working without shipping an additional TypeScript CLI implementation.\n */\n\nimport { spawnSync } from \"node:child_process\";\nimport { existsSync } from \"node:fs\";\nimport { dirname, join } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\nimport which from \"which\";\n\nfunction main(argv: string[]): number {\n\tconst args = argv.slice(2);\n\n\tlet cliPath: string \| undefined;\n\ttry {\n\t\tcliPath = which.sync(\"kreuzberg-cli\");\n\t} catch {}\n\n\tif (!cliPath) {\n\t\tconst __dirname = ~~typeof __filename !== \"undefined\" ? dirname~~(~~__filename~~) ~~: dirname(fileURLToPath(import.meta.url))~~;\n\t\tconst devBinary = join(__dirname, \"..\", \"..\", \"..\", \"target\", \"release\", \"kreuzberg\");\n\t\tif (existsSync(devBinary)) {\n\t\t\tcliPath = devBinary;\n\t\t}\n\t}\n\n\tif (!cliPath) {\n\t\tconsole.error(\n\t\t\t\"The embedded Kreuzberg CLI binary could not be located. \" +\n\t\t\t\t\"This indicates a packaging issue; please open an issue at \" +\n\t\t\t\t\"https://github.com/kreuzberg-dev/kreuzberg/issues so we can investigate.\",\n\t\t);\n\t\treturn 1;\n\t}\n\n\tconst result = spawnSync(cliPath, args, {\n\t\tstdio: \"inherit\",\n\t\tshell: false,\n\t});\n\n\tif (result.error) {\n\t\tconsole.error(`Failed to execute kreuzberg-cli: ${result.error.message}`);\n\t\treturn 1;\n\t}\n\n\treturn result.status ?? 1;\n}\n\nif (require.main === module) {\n\tprocess.exit(main(process.argv));\n}\n\nexport { main };\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAQA,gCAA0B;AAC1B,qBAA2B;AAC3B,uBAA8B;AAC9B,sBAA8B;AAC9B,mBAAkB;~~AAZlB~~;~~AAcA~~,SAAS,KAAK,MAAwB;AACrC,QAAM,OAAO,KAAK,MAAM,CAAC;AAEzB,MAAI;AACJ,MAAI;AACH,cAAU,aAAAA,QAAM,KAAK,eAAe;AAAA,EACrC,QAAQ;AAAA,EAAC;AAET,MAAI,CAAC,SAAS;AACb,UAAM,YAAY,~~OAAO,eAAe,kBAAc,0BAAQ,UAAU,QAAI,8BAAQ,+BAAc,YAAY,GAAG,CAAC~~;~~AAClH~~,UAAM,gBAAY,uBAAK,WAAW,MAAM,MAAM,MAAM,UAAU,WAAW,WAAW;AACpF,YAAI,2BAAW,SAAS,GAAG;AAC1B,gBAAU;AAAA,IACX;AAAA,EACD;AAEA,MAAI,CAAC,SAAS;AACb,YAAQ;AAAA,MACP;AAAA,IAGD;AACA,WAAO;AAAA,EACR;AAEA,QAAM,aAAS,qCAAU,SAAS,MAAM;AAAA,IACvC,OAAO;AAAA,IACP,OAAO;AAAA,EACR,CAAC;AAED,MAAI,OAAO,OAAO;AACjB,YAAQ,MAAM,oCAAoC,OAAO,MAAM,OAAO,EAAE;AACxE,WAAO;AAAA,EACR;AAEA,SAAO,OAAO,UAAU;AACzB;AAEA,IAAI,QAAQ,SAAS,QAAQ;AAC5B,UAAQ,KAAK,KAAK,QAAQ,IAAI,CAAC;AAChC;","names":["which"]}
1	+ {"version":3,"sources":["../typescript/cli.ts"],"sourcesContent":["#!/usr/bin/env node\n\n/*\n Proxy entry point that forwards to the Rust-based Kreuzberg CLI.\n \n This keeps `npx kreuzberg` working without shipping an additional TypeScript CLI implementation.\n */\n\nimport { spawnSync } from \"node:child_process\";\nimport { existsSync } from \"node:fs\";\nimport { dirname, join } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\nimport which from \"which\";\n\ndeclare global {\n\tvar __filename: string \| undefined;\n\tvar __dirname: string \| undefined;\n}\n\nfunction getDirectory(): string {\n\t// In CJS, __filename will be defined\n\tif (typeof __filename !== \"undefined\") {\n\t\treturn dirname(__filename);\n\t}\n\t// Fallback for ESM\n\ttry {\n\t\t// Use eval to avoid esbuild warnings about import.meta in CJS builds\n\t\t// @ts-ignore - import.meta is only available in ESM\n\t\tconst url = eval(\"import.meta.url\");\n\t\treturn dirname(fileURLToPath(url));\n\t} catch {\n\t\treturn process.cwd();\n\t}\n}\n\nfunction main(argv: string[]): number {\n\tconst args = argv.slice(2);\n\n\tlet cliPath: string \| undefined;\n\ttry {\n\t\tcliPath = which.sync(\"kreuzberg-cli\");\n\t} catch {}\n\n\tif (!cliPath) {\n\t\tconst __dirname = getDirectory();\n\t\tconst devBinary = join(__dirname, \"..\", \"..\", \"..\", \"target\", \"release\", \"kreuzberg\");\n\t\tif (existsSync(devBinary)) {\n\t\t\tcliPath = devBinary;\n\t\t}\n\t}\n\n\tif (!cliPath) {\n\t\tconsole.error(\n\t\t\t\"The embedded Kreuzberg CLI binary could not be located. \" +\n\t\t\t\t\"This indicates a packaging issue; please open an issue at \" +\n\t\t\t\t\"https://github.com/kreuzberg-dev/kreuzberg/issues so we can investigate.\",\n\t\t);\n\t\treturn 1;\n\t}\n\n\tconst result = spawnSync(cliPath, args, {\n\t\tstdio: \"inherit\",\n\t\tshell: false,\n\t});\n\n\tif (result.error) {\n\t\tconsole.error(`Failed to execute kreuzberg-cli: ${result.error.message}`);\n\t\treturn 1;\n\t}\n\n\treturn result.status ?? 1;\n}\n\nif (require.main === module) {\n\tprocess.exit(main(process.argv));\n}\n\nexport { main };\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAQA,gCAA0B;AAC1B,qBAA2B;AAC3B,uBAA8B;AAC9B,sBAA8B;AAC9B,mBAAkB;AAOlB,SAAS,eAAuB;AAE/B,MAAI,OAAO,eAAe,aAAa;AACtC,eAAO,0BAAQ,UAAU;AAAA,EAC1B;AAEA,MAAI;AAGH,UAAM,MAAM,KAAK,iBAAiB;AAClC,eAAO,8BAAQ,+BAAc,GAAG,CAAC;AAAA,EAClC,QAAQ;AACP,WAAO,QAAQ,IAAI;AAAA,EACpB;AACD;AAEA,SAAS,KAAK,MAAwB;AACrC,QAAM,OAAO,KAAK,MAAM,CAAC;AAEzB,MAAI;AACJ,MAAI;AACH,cAAU,aAAAA,QAAM,KAAK,eAAe;AAAA,EACrC,QAAQ;AAAA,EAAC;AAET,MAAI,CAAC,SAAS;AACb,UAAM,YAAY,aAAa;AAC/B,UAAM,gBAAY,uBAAK,WAAW,MAAM,MAAM,MAAM,UAAU,WAAW,WAAW;AACpF,YAAI,2BAAW,SAAS,GAAG;AAC1B,gBAAU;AAAA,IACX;AAAA,EACD;AAEA,MAAI,CAAC,SAAS;AACb,YAAQ;AAAA,MACP;AAAA,IAGD;AACA,WAAO;AAAA,EACR;AAEA,QAAM,aAAS,qCAAU,SAAS,MAAM;AAAA,IACvC,OAAO;AAAA,IACP,OAAO;AAAA,EACR,CAAC;AAED,MAAI,OAAO,OAAO;AACjB,YAAQ,MAAM,oCAAoC,OAAO,MAAM,OAAO,EAAE;AACxE,WAAO;AAAA,EACR;AAEA,SAAO,OAAO,UAAU;AACzB;AAEA,IAAI,QAAQ,SAAS,QAAQ;AAC5B,UAAQ,KAAK,KAAK,QAAQ,IAAI,CAAC;AAChC;","names":["which"]}

package/dist/cli.mjs CHANGED Viewed

@@ -4,6 +4,17 @@ import { existsSync } from "node:fs";
 import { dirname, join } from "node:path";
 import { fileURLToPath } from "node:url";
 import which from "which";
+function getDirectory() {
+  if (typeof __filename !== "undefined") {
+    return dirname(__filename);
+  }
+  try {
+    const url = eval("import.meta.url");
+    return dirname(fileURLToPath(url));
+  } catch {
+    return process.cwd();
+  }
+}
 function main(argv) {
   const args = argv.slice(2);
   let cliPath;
@@ -12,7 +23,7 @@ function main(argv) {
   } catch {
   }
   if (!cliPath) {
-    const __dirname = typeof __filename !== "undefined" ? dirname(__filename) : dirname(fileURLToPath(import.meta.url));
+    const __dirname = getDirectory();
     const devBinary = join(__dirname, "..", "..", "..", "target", "release", "kreuzberg");
     if (existsSync(devBinary)) {
       cliPath = devBinary;

package/dist/cli.mjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"sources":["../typescript/cli.ts"],"sourcesContent":["#!/usr/bin/env node\n\n/*\n Proxy entry point that forwards to the Rust-based Kreuzberg CLI.\n \n This keeps `npx kreuzberg` working without shipping an additional TypeScript CLI implementation.\n */\n\nimport { spawnSync } from \"node:child_process\";\nimport { existsSync } from \"node:fs\";\nimport { dirname, join } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\nimport which from \"which\";\n\nfunction main(argv: string[]): number {\n\tconst args = argv.slice(2);\n\n\tlet cliPath: string \| undefined;\n\ttry {\n\t\tcliPath = which.sync(\"kreuzberg-cli\");\n\t} catch {}\n\n\tif (!cliPath) {\n\t\tconst __dirname = ~~typeof __filename !== \"undefined\" ? dirname~~(~~__filename~~) ~~: dirname(fileURLToPath(import.meta.url))~~;\n\t\tconst devBinary = join(__dirname, \"..\", \"..\", \"..\", \"target\", \"release\", \"kreuzberg\");\n\t\tif (existsSync(devBinary)) {\n\t\t\tcliPath = devBinary;\n\t\t}\n\t}\n\n\tif (!cliPath) {\n\t\tconsole.error(\n\t\t\t\"The embedded Kreuzberg CLI binary could not be located. \" +\n\t\t\t\t\"This indicates a packaging issue; please open an issue at \" +\n\t\t\t\t\"https://github.com/kreuzberg-dev/kreuzberg/issues so we can investigate.\",\n\t\t);\n\t\treturn 1;\n\t}\n\n\tconst result = spawnSync(cliPath, args, {\n\t\tstdio: \"inherit\",\n\t\tshell: false,\n\t});\n\n\tif (result.error) {\n\t\tconsole.error(`Failed to execute kreuzberg-cli: ${result.error.message}`);\n\t\treturn 1;\n\t}\n\n\treturn result.status ?? 1;\n}\n\nif (require.main === module) {\n\tprocess.exit(main(process.argv));\n}\n\nexport { main };\n"],"mappings":";AAQA,SAAS,iBAAiB;AAC1B,SAAS,kBAAkB;AAC3B,SAAS,SAAS,YAAY;AAC9B,SAAS,qBAAqB;AAC9B,OAAO,WAAW;~~AAElB~~,SAAS,KAAK,MAAwB;AACrC,QAAM,OAAO,KAAK,MAAM,CAAC;AAEzB,MAAI;AACJ,MAAI;AACH,cAAU,MAAM,KAAK,eAAe;AAAA,EACrC,QAAQ;AAAA,EAAC;AAET,MAAI,CAAC,SAAS;AACb,UAAM,YAAY,~~OAAO,eAAe,cAAc,QAAQ,UAAU,IAAI,QAAQ,cAAc,YAAY,GAAG,CAAC~~;~~AAClH~~,UAAM,YAAY,KAAK,WAAW,MAAM,MAAM,MAAM,UAAU,WAAW,WAAW;AACpF,QAAI,WAAW,SAAS,GAAG;AAC1B,gBAAU;AAAA,IACX;AAAA,EACD;AAEA,MAAI,CAAC,SAAS;AACb,YAAQ;AAAA,MACP;AAAA,IAGD;AACA,WAAO;AAAA,EACR;AAEA,QAAM,SAAS,UAAU,SAAS,MAAM;AAAA,IACvC,OAAO;AAAA,IACP,OAAO;AAAA,EACR,CAAC;AAED,MAAI,OAAO,OAAO;AACjB,YAAQ,MAAM,oCAAoC,OAAO,MAAM,OAAO,EAAE;AACxE,WAAO;AAAA,EACR;AAEA,SAAO,OAAO,UAAU;AACzB;AAEA,IAAI,QAAQ,SAAS,QAAQ;AAC5B,UAAQ,KAAK,KAAK,QAAQ,IAAI,CAAC;AAChC;","names":[]}
1	+ {"version":3,"sources":["../typescript/cli.ts"],"sourcesContent":["#!/usr/bin/env node\n\n/*\n Proxy entry point that forwards to the Rust-based Kreuzberg CLI.\n \n This keeps `npx kreuzberg` working without shipping an additional TypeScript CLI implementation.\n */\n\nimport { spawnSync } from \"node:child_process\";\nimport { existsSync } from \"node:fs\";\nimport { dirname, join } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\nimport which from \"which\";\n\ndeclare global {\n\tvar __filename: string \| undefined;\n\tvar __dirname: string \| undefined;\n}\n\nfunction getDirectory(): string {\n\t// In CJS, __filename will be defined\n\tif (typeof __filename !== \"undefined\") {\n\t\treturn dirname(__filename);\n\t}\n\t// Fallback for ESM\n\ttry {\n\t\t// Use eval to avoid esbuild warnings about import.meta in CJS builds\n\t\t// @ts-ignore - import.meta is only available in ESM\n\t\tconst url = eval(\"import.meta.url\");\n\t\treturn dirname(fileURLToPath(url));\n\t} catch {\n\t\treturn process.cwd();\n\t}\n}\n\nfunction main(argv: string[]): number {\n\tconst args = argv.slice(2);\n\n\tlet cliPath: string \| undefined;\n\ttry {\n\t\tcliPath = which.sync(\"kreuzberg-cli\");\n\t} catch {}\n\n\tif (!cliPath) {\n\t\tconst __dirname = getDirectory();\n\t\tconst devBinary = join(__dirname, \"..\", \"..\", \"..\", \"target\", \"release\", \"kreuzberg\");\n\t\tif (existsSync(devBinary)) {\n\t\t\tcliPath = devBinary;\n\t\t}\n\t}\n\n\tif (!cliPath) {\n\t\tconsole.error(\n\t\t\t\"The embedded Kreuzberg CLI binary could not be located. \" +\n\t\t\t\t\"This indicates a packaging issue; please open an issue at \" +\n\t\t\t\t\"https://github.com/kreuzberg-dev/kreuzberg/issues so we can investigate.\",\n\t\t);\n\t\treturn 1;\n\t}\n\n\tconst result = spawnSync(cliPath, args, {\n\t\tstdio: \"inherit\",\n\t\tshell: false,\n\t});\n\n\tif (result.error) {\n\t\tconsole.error(`Failed to execute kreuzberg-cli: ${result.error.message}`);\n\t\treturn 1;\n\t}\n\n\treturn result.status ?? 1;\n}\n\nif (require.main === module) {\n\tprocess.exit(main(process.argv));\n}\n\nexport { main };\n"],"mappings":";AAQA,SAAS,iBAAiB;AAC1B,SAAS,kBAAkB;AAC3B,SAAS,SAAS,YAAY;AAC9B,SAAS,qBAAqB;AAC9B,OAAO,WAAW;AAOlB,SAAS,eAAuB;AAE/B,MAAI,OAAO,eAAe,aAAa;AACtC,WAAO,QAAQ,UAAU;AAAA,EAC1B;AAEA,MAAI;AAGH,UAAM,MAAM,KAAK,iBAAiB;AAClC,WAAO,QAAQ,cAAc,GAAG,CAAC;AAAA,EAClC,QAAQ;AACP,WAAO,QAAQ,IAAI;AAAA,EACpB;AACD;AAEA,SAAS,KAAK,MAAwB;AACrC,QAAM,OAAO,KAAK,MAAM,CAAC;AAEzB,MAAI;AACJ,MAAI;AACH,cAAU,MAAM,KAAK,eAAe;AAAA,EACrC,QAAQ;AAAA,EAAC;AAET,MAAI,CAAC,SAAS;AACb,UAAM,YAAY,aAAa;AAC/B,UAAM,YAAY,KAAK,WAAW,MAAM,MAAM,MAAM,UAAU,WAAW,WAAW;AACpF,QAAI,WAAW,SAAS,GAAG;AAC1B,gBAAU;AAAA,IACX;AAAA,EACD;AAEA,MAAI,CAAC,SAAS;AACb,YAAQ;AAAA,MACP;AAAA,IAGD;AACA,WAAO;AAAA,EACR;AAEA,QAAM,SAAS,UAAU,SAAS,MAAM;AAAA,IACvC,OAAO;AAAA,IACP,OAAO;AAAA,EACR,CAAC;AAED,MAAI,OAAO,OAAO;AACjB,YAAQ,MAAM,oCAAoC,OAAO,MAAM,OAAO,EAAE;AACxE,WAAO;AAAA,EACR;AAEA,SAAO,OAAO,UAAU;AACzB;AAEA,IAAI,QAAQ,SAAS,QAAQ;AAC5B,UAAQ,KAAK,KAAK,QAAQ,IAAI,CAAC;AAChC;","names":[]}

package/dist/index.d.mts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { PanicContext } from './errors.mjs';
 export { CacheError, ErrorCode, ImageProcessingError, KreuzbergError, MissingDependencyError, OcrError, ParsingError, PluginError, ValidationError } from './errors.mjs';
-import { ExtractionConfig as ExtractionConfig$1, ExtractionResult, PostProcessorProtocol, ValidatorProtocol, OcrBackendProtocol, OcrConfig, ChunkingConfig, ImageExtractionConfig, PdfConfig, KeywordConfig, LanguageDetectionConfig, ErrorClassification } from './types.mjs';
-export { ArchiveMetadata, Chunk, ChunkMetadata, EmailMetadata, ErrorMetadata, ExcelMetadata, ExtractedImage, HtmlConversionOptions, HtmlMetadata, HtmlPreprocessingOptions, ImageMetadata, ImagePreprocessingMetadata, KeywordAlgorithm, Metadata, OcrMetadata, PageBoundary, PageConfig, PageContent, PageInfo, PageStructure, PageUnitType, PdfMetadata, PostProcessorConfig, PptxMetadata, ProcessingStage, RakeParams, Table, TesseractConfig, TextMetadata, TokenReductionConfig, XmlMetadata, YakeParams } from './types.mjs';
+import { ExtractionConfig as ExtractionConfig$1, ExtractionResult, PostProcessorProtocol, ValidatorProtocol, OcrBackendProtocol, ErrorClassification, WorkerPool, WorkerPoolStats } from './types.mjs';
+export { ArchiveMetadata, Chunk, ChunkMetadata, ChunkingConfig, EmailMetadata, ErrorMetadata, ExcelMetadata, ExtractedImage, ExtractedKeyword, HeaderMetadata, HierarchyConfig, HtmlConversionOptions, HtmlImageMetadata, HtmlMetadata, HtmlPreprocessingOptions, ImageExtractionConfig, ImageMetadata, ImagePreprocessingMetadata, KeywordAlgorithm, KeywordConfig, LanguageDetectionConfig, LinkMetadata, Metadata, OcrConfig, OcrMetadata, PageBoundary, PageContent, PageExtractionConfig, PageInfo, PageStructure, PageUnitType, PdfConfig, PdfMetadata, PostProcessorConfig, PptxMetadata, ProcessingStage, RakeParams, StructuredData, Table, TesseractConfig, TextMetadata, TokenReductionConfig, XmlMetadata, YakeParams } from './types.mjs';
 export { GutenOcrBackend } from './ocr/guten-ocr.mjs';
 /**
@@ -610,72 +610,12 @@ declare function unregisterDocumentExtractor(name: string): void;
  */
 declare function clearDocumentExtractors(): void;
 /**
- * Builder class for creating ExtractionConfig objects with a fluent API.
- *
- * Provides a convenient way to build extraction configurations using method chaining.
- *
- * @example
- * ```typescript
- * import { ExtractionConfig, extractFile } from '@kreuzberg/node';
- *
- * // Create with builder pattern
- * const config = ExtractionConfig.default()
- *   .withChunking({ maxChars: 2048 })
- *   .withOcr({ backend: 'tesseract', language: 'eng' })
- *   .build();
- *
- * const result = await extractFile('document.pdf', null, config);
- * ```
- */
-declare class ExtractionConfigBuilder {
-    private config;
-    /**
-     * Create a new builder with default configuration.
-     */
-    static default(): ExtractionConfigBuilder;
-    /**
-     * Set OCR configuration.
-     */
-    withOcr(ocr: OcrConfig): ExtractionConfigBuilder;
-    /**
-     * Set chunking configuration.
-     */
-    withChunking(chunking: ChunkingConfig): ExtractionConfigBuilder;
-    /**
-     * Set image extraction configuration.
-     */
-    withImageExtraction(images: ImageExtractionConfig): ExtractionConfigBuilder;
-    /**
-     * Set PDF configuration.
-     */
-    withPdf(pdf: PdfConfig): ExtractionConfigBuilder;
-    /**
-     * Set keyword extraction configuration.
-     */
-    withKeywords(keywords: KeywordConfig): ExtractionConfigBuilder;
-    /**
-     * Set language detection configuration.
-     */
-    withLanguageDetection(languageDetection: LanguageDetectionConfig): ExtractionConfigBuilder;
-    /**
-     * Set whether to enable metadata extraction.
-     */
-    withMetadataExtraction(enabled: boolean): ExtractionConfigBuilder;
-    /**
-     * Set whether to enable quality mode.
-     */
-    withQualityMode(enabled: boolean): ExtractionConfigBuilder;
-    /**
-     * Build and return the final ExtractionConfig object.
-     */
-    build(): ExtractionConfig$1;
-}
-/**
- * ExtractionConfig namespace with static methods for loading configuration from files
- * and creating new configurations with the builder pattern.
+ * ExtractionConfig namespace with static methods for loading configuration from files.
  *
  * Provides factory methods to load extraction configuration from TOML, YAML, or JSON files,
- * or to create configurations using a fluent builder API.
+ * or to discover configuration files in the current directory tree.
+ *
+ * For creating configurations programmatically, use plain TypeScript objects instead:
  *
  * @example
  * ```typescript
@@ -684,35 +624,17 @@ declare class ExtractionConfigBuilder {
  * // Load configuration from file
  * const config1 = ExtractionConfig.fromFile('config.toml');
  *
- * // Create with builder pattern
- * const config2 = ExtractionConfig.default()
- *   .withChunking({ maxChars: 2048 })
- *   .build();
+ * // Or create with plain object
+ * const config2 = {
+ *   chunking: { maxChars: 2048 },
+ *   ocr: { backend: 'tesseract', language: 'eng' }
+ * };
  *
  * // Use with extraction
  * const result = await extractFile('document.pdf', null, config2);
  * ```
  */
 declare const ExtractionConfig: {
-    /**
-     * Create a default extraction configuration using the builder pattern.
-     *
-     * Returns a builder object that allows you to configure extraction settings
-     * using method chaining.
-     *
-     * @returns ExtractionConfigBuilder for chaining configuration calls
-     *
-     * @example
-     * ```typescript
-     * import { ExtractionConfig } from '@kreuzberg/node';
-     *
-     * const config = ExtractionConfig.default()
-     *   .withChunking({ maxChars: 2048 })
-     *   .withOcr({ backend: 'tesseract', language: 'eng' })
-     *   .build();
-     * ```
-     */
-    default(): ExtractionConfigBuilder;
     /**
      * Load extraction configuration from a file.
      *
@@ -1060,6 +982,151 @@ declare function getErrorCodeDescription(code: number): string;
  * ```
  */
 declare function classifyError(errorMessage: string): ErrorClassification;
-declare const __version__ = "4.0.0-rc.21";
+/**
+ * Create a worker pool for concurrent file extraction.
+ *
+ * The worker pool manages a set of background worker threads that can process
+ * extraction requests concurrently, improving throughput when handling multiple files.
+ *
+ * @param size - Optional number of worker threads (defaults to CPU count). Must be > 0
+ * @returns A WorkerPool instance to use with extraction functions
+ *
+ * @throws {Error} If size is invalid or pool creation fails
+ *
+ * @example
+ * ```typescript
+ * import { createWorkerPool, extractFileInWorker, closeWorkerPool } from '@kreuzberg/node';
+ *
+ * // Create pool with 4 workers
+ * const pool = createWorkerPool(4);
+ *
+ * try {
+ *   const result = await extractFileInWorker(pool, 'document.pdf');
+ *   console.log(result.content);
+ * } finally {
+ *   // Always close the pool when done
+ *   await closeWorkerPool(pool);
+ * }
+ * ```
+ */
+declare function createWorkerPool(size?: number): WorkerPool;
+/**
+ * Get statistics about a worker pool.
+ *
+ * Returns information about the pool's current state, including the number of active workers,
+ * queued tasks, and total processed tasks.
+ *
+ * @param pool - The worker pool instance
+ * @returns WorkerPoolStats with pool information
+ *
+ * @example
+ * ```typescript
+ * import { createWorkerPool, getWorkerPoolStats } from '@kreuzberg/node';
+ *
+ * const pool = createWorkerPool(4);
+ * const stats = getWorkerPoolStats(pool);
+ *
+ * console.log(`Pool size: ${stats.size}`);
+ * console.log(`Active workers: ${stats.activeWorkers}`);
+ * console.log(`Queued tasks: ${stats.queuedTasks}`);
+ * ```
+ */
+declare function getWorkerPoolStats(pool: WorkerPool): WorkerPoolStats;
+/**
+ * Extract content from a single file using a worker pool (asynchronous).
+ *
+ * Submits an extraction task to the worker pool. The task is executed by one of the
+ * available workers in the background, allowing other tasks to be processed concurrently.
+ *
+ * @param pool - The worker pool instance
+ * @param filePath - Path to the file to extract
+ * @param mimeTypeOrConfig - Optional MIME type or extraction configuration
+ * @param maybeConfig - Optional extraction configuration (if second param is MIME type)
+ * @returns Promise<ExtractionResult> containing extracted content and metadata
+ *
+ * @throws {Error} If the file cannot be read or extraction fails
+ *
+ * @example
+ * ```typescript
+ * import { createWorkerPool, extractFileInWorker, closeWorkerPool } from '@kreuzberg/node';
+ *
+ * const pool = createWorkerPool(4);
+ *
+ * try {
+ *   const files = ['doc1.pdf', 'doc2.docx', 'doc3.xlsx'];
+ *   const results = await Promise.all(
+ *     files.map(f => extractFileInWorker(pool, f))
+ *   );
+ *
+ *   results.forEach((r, i) => {
+ *     console.log(`${files[i]}: ${r.content.substring(0, 100)}...`);
+ *   });
+ * } finally {
+ *   await closeWorkerPool(pool);
+ * }
+ * ```
+ */
+declare function extractFileInWorker(pool: WorkerPool, filePath: string, mimeTypeOrConfig?: string | null | ExtractionConfig$1, maybeConfig?: ExtractionConfig$1 | null): Promise<ExtractionResult>;
+/**
+ * Extract content from multiple files in parallel using a worker pool (asynchronous).
+ *
+ * Submits multiple extraction tasks to the worker pool for concurrent processing.
+ * This is more efficient than using `extractFileInWorker` multiple times sequentially.
+ *
+ * @param pool - The worker pool instance
+ * @param paths - Array of file paths to extract
+ * @param config - Extraction configuration object (applies to all files)
+ * @returns Promise<ExtractionResult[]> array of results (one per file, in same order)
+ *
+ * @throws {Error} If any file cannot be read or extraction fails
+ *
+ * @example
+ * ```typescript
+ * import { createWorkerPool, batchExtractFilesInWorker, closeWorkerPool } from '@kreuzberg/node';
+ *
+ * const pool = createWorkerPool(4);
+ *
+ * try {
+ *   const files = ['invoice1.pdf', 'invoice2.pdf', 'invoice3.pdf'];
+ *   const results = await batchExtractFilesInWorker(pool, files, {
+ *     ocr: { backend: 'tesseract', language: 'eng' }
+ *   });
+ *
+ *   const total = results.reduce((sum, r) => sum + extractAmount(r.content), 0);
+ *   console.log(`Total: $${total}`);
+ * } finally {
+ *   await closeWorkerPool(pool);
+ * }
+ * ```
+ */
+declare function batchExtractFilesInWorker(pool: WorkerPool, paths: string[], config?: ExtractionConfig$1 | null): Promise<ExtractionResult[]>;
+/**
+ * Close a worker pool and shut down all worker threads.
+ *
+ * Should be called when the pool is no longer needed to clean up resources
+ * and gracefully shut down worker threads. Any pending tasks will be cancelled.
+ *
+ * @param pool - The worker pool instance to close
+ * @returns Promise that resolves when the pool is fully closed
+ *
+ * @throws {Error} If pool shutdown fails
+ *
+ * @example
+ * ```typescript
+ * import { createWorkerPool, extractFileInWorker, closeWorkerPool } from '@kreuzberg/node';
+ *
+ * const pool = createWorkerPool(4);
+ *
+ * try {
+ *   const result = await extractFileInWorker(pool, 'document.pdf');
+ *   console.log(result.content);
+ * } finally {
+ *   // Clean up the pool
+ *   await closeWorkerPool(pool);
+ * }
+ * ```
+ */
+declare function closeWorkerPool(pool: WorkerPool): Promise<void>;
+declare const __version__ = "4.0.0-rc.24";
-export { ChunkingConfig, type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, ImageExtractionConfig, KeywordConfig, LanguageDetectionConfig, OcrBackendProtocol, OcrConfig, PanicContext, PdfConfig, PostProcessorProtocol, ValidatorProtocol, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };
+export { type EmbeddingPreset, ErrorClassification, ExtractionConfig, ExtractionResult, OcrBackendProtocol, PanicContext, PostProcessorProtocol, ValidatorProtocol, WorkerPool, WorkerPoolStats, __resetBindingForTests, __setBindingForTests, __version__, batchExtractBytes, batchExtractBytesSync, batchExtractFiles, batchExtractFilesInWorker, batchExtractFilesSync, classifyError, clearDocumentExtractors, clearOcrBackends, clearPostProcessors, clearValidators, closeWorkerPool, createWorkerPool, detectMimeType, detectMimeTypeFromPath, extractBytes, extractBytesSync, extractFile, extractFileInWorker, extractFileSync, getEmbeddingPreset, getErrorCodeDescription, getErrorCodeName, getExtensionsForMime, getLastErrorCode, getLastPanicContext, getWorkerPoolStats, listDocumentExtractors, listEmbeddingPresets, listOcrBackends, listPostProcessors, listValidators, registerOcrBackend, registerPostProcessor, registerValidator, unregisterDocumentExtractor, unregisterOcrBackend, unregisterPostProcessor, unregisterValidator, validateMimeType };