@kreuzberg/node 4.0.0-rc.6 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.d.ts CHANGED
@@ -1,5 +1,10 @@
1
1
  /* auto-generated by NAPI-RS */
2
2
  /* eslint-disable */
3
+ /** Opaque handle to a worker pool */
4
+ export declare class JsWorkerPool {
5
+
6
+ }
7
+
3
8
  /**
4
9
  * Batch extract from multiple byte arrays (asynchronous).
5
10
  *
@@ -96,6 +101,40 @@ export declare function batchExtractBytesSync(dataList: Array<Buffer>, mimeTypes
96
101
  */
97
102
  export declare function batchExtractFiles(paths: Array<string>, config?: JsExtractionConfig | undefined | null): Promise<Array<JsExtractionResult>>
98
103
 
104
+ /**
105
+ * Extract multiple files using worker threads from the pool.
106
+ *
107
+ * Submits multiple file extraction tasks to the worker pool for concurrent
108
+ * processing. Files are processed in parallel up to the pool size limit.
109
+ *
110
+ * # Parameters
111
+ *
112
+ * * `pool` - Worker pool handle
113
+ * * `file_paths` - Array of file paths to extract
114
+ * * `config` - Optional extraction configuration applied to all files
115
+ *
116
+ * # Returns
117
+ *
118
+ * Promise resolving to array of extraction results in the same order as input paths.
119
+ *
120
+ * # Example
121
+ *
122
+ * ```typescript
123
+ * import { createWorkerPool, batchExtractFilesInWorker } from '@kreuzberg/node';
124
+ *
125
+ * const pool = createWorkerPool(4);
126
+ * const files = ['doc1.pdf', 'doc2.docx', 'doc3.xlsx'];
127
+ * const results = await batchExtractFilesInWorker(pool, files, {
128
+ * useCache: true
129
+ * });
130
+ *
131
+ * results.forEach((result, i) => {
132
+ * console.log(`File ${i + 1}: ${result.content.length} chars`);
133
+ * });
134
+ * ```
135
+ */
136
+ export declare function batchExtractFilesInWorker(pool: JsWorkerPool, filePaths: Array<string>, config?: JsExtractionConfig | undefined | null): Promise<Array<JsExtractionResult>>
137
+
99
138
  /**
100
139
  * Batch extract from multiple files (synchronous).
101
140
  *
@@ -125,6 +164,8 @@ export declare function batchExtractFiles(paths: Array<string>, config?: JsExtra
125
164
  */
126
165
  export declare function batchExtractFilesSync(paths: Array<string>, config?: JsExtractionConfig | undefined | null): Array<JsExtractionResult>
127
166
 
167
+ export declare function classifyError(errorMessage: string): ErrorClassification
168
+
128
169
  /**
129
170
  * Clear all registered document extractors.
130
171
  *
@@ -165,6 +206,107 @@ export declare function clearPostProcessors(): void
165
206
  /** Clear all registered validators */
166
207
  export declare function clearValidators(): void
167
208
 
209
+ /**
210
+ * Close and shutdown a worker pool gracefully.
211
+ *
212
+ * Waits for all in-flight extraction tasks to complete before shutting down
213
+ * the pool. After calling this function, the pool handle becomes invalid.
214
+ *
215
+ * # Parameters
216
+ *
217
+ * * `pool` - Worker pool handle
218
+ *
219
+ * # Returns
220
+ *
221
+ * Promise that resolves when all workers have completed and pool is closed.
222
+ *
223
+ * # Example
224
+ *
225
+ * ```typescript
226
+ * import { createWorkerPool, closeWorkerPool } from '@kreuzberg/node';
227
+ *
228
+ * const pool = createWorkerPool(4);
229
+ * // ... use pool for extractions ...
230
+ * await closeWorkerPool(pool); // Wait for completion and cleanup
231
+ * ```
232
+ */
233
+ export declare function closeWorkerPool(pool: JsWorkerPool): Promise<void>
234
+
235
+ /**
236
+ * Get a specific field from config (represented as JSON string) by name via FFI.
237
+ *
238
+ * Retrieves a configuration field by path, supporting nested access with
239
+ * dot notation (e.g., "ocr.backend"). Returns the field value as a JSON string.
240
+ *
241
+ * # Arguments
242
+ *
243
+ * * `json_str` - A JSON string representation of the configuration
244
+ * * `field_name` - The field path to retrieve (e.g., "useCache", "ocr.backend")
245
+ *
246
+ * # Returns
247
+ *
248
+ * The field value as a JSON string, or null if not found
249
+ */
250
+ export declare function configGetFieldInternal(jsonStr: string, fieldName: string): string | null
251
+
252
+ /**
253
+ * Merge two configs (override takes precedence over base) via FFI.
254
+ *
255
+ * Performs a shallow merge where fields from the override config take
256
+ * precedence over fields in the base config.
257
+ *
258
+ * # Arguments
259
+ *
260
+ * * `base_json` - A JSON string representation of the base ExtractionConfig
261
+ * * `override_json` - A JSON string representation of the override ExtractionConfig
262
+ *
263
+ * # Returns
264
+ *
265
+ * The merged configuration as a JSON string, or error
266
+ */
267
+ export declare function configMergeInternal(baseJson: string, overrideJson: string): string
268
+
269
+ /**
270
+ * Validate and normalize an ExtractionConfig JSON string via FFI.
271
+ *
272
+ * This validates the JSON and returns a normalized version, using the shared
273
+ * FFI layer to ensure consistent validation across all language bindings.
274
+ *
275
+ * # Arguments
276
+ *
277
+ * * `json_str` - A JSON string containing the configuration
278
+ *
279
+ * # Returns
280
+ *
281
+ * The normalized JSON string representation of the config, or error
282
+ */
283
+ export declare function configValidateAndNormalize(jsonStr: string): string
284
+
285
+ /**
286
+ * Create a new worker pool for concurrent extraction operations.
287
+ *
288
+ * Creates a pool of worker threads for CPU-bound document extraction.
289
+ * Tasks submitted to the pool will be executed concurrently up to the pool size.
290
+ *
291
+ * # Parameters
292
+ *
293
+ * * `size` - Number of concurrent workers (defaults to CPU count)
294
+ *
295
+ * # Returns
296
+ *
297
+ * Worker pool handle that can be used with extraction functions.
298
+ *
299
+ * # Example
300
+ *
301
+ * ```typescript
302
+ * import { createWorkerPool } from '@kreuzberg/node';
303
+ *
304
+ * const pool = createWorkerPool(4); // 4 concurrent workers
305
+ * console.log(`Pool created with ${pool.size} workers`);
306
+ * ```
307
+ */
308
+ export declare function createWorkerPool(size?: number | undefined | null): JsWorkerPool
309
+
168
310
  /**
169
311
  * Detect MIME type from raw bytes.
170
312
  *
@@ -198,7 +340,7 @@ export declare function clearValidators(): void
198
340
  * console.log(mimeType); // 'application/pdf'
199
341
  * ```
200
342
  */
201
- export declare function detectMimeType(bytes: Buffer): string
343
+ export declare function detectMimeTypeFromBytes(bytes: Buffer): string
202
344
 
203
345
  /**
204
346
  * Detect MIME type from a file path.
@@ -285,6 +427,53 @@ export interface EmbeddingPreset {
285
427
  description: string
286
428
  }
287
429
 
430
+ /**
431
+ * Classifies an error message string into an error code category.
432
+ *
433
+ * This function analyzes the error message content and returns the most likely
434
+ * error code (0-7) based on keyword patterns. Used to programmatically classify
435
+ * errors for handling purposes.
436
+ *
437
+ * # Arguments
438
+ *
439
+ * * `error_message` - The error message string to classify
440
+ *
441
+ * # Returns
442
+ *
443
+ * An object with:
444
+ * - `code`: The numeric error code (0-7)
445
+ * - `name`: The error code name string
446
+ * - `description`: Brief description of the error type
447
+ * - `confidence`: Confidence score (0.0-1.0) of the classification
448
+ *
449
+ * # Classification Rules
450
+ *
451
+ * - **Validation (0)**: Keywords: invalid, validation, invalid_argument, schema, required, unexpected field
452
+ * - **Parsing (1)**: Keywords: parsing, parse_error, corrupted, malformed, invalid format, decode, encoding
453
+ * - **Ocr (2)**: Keywords: ocr, optical, character, recognition, tesseract, language, model
454
+ * - **MissingDependency (3)**: Keywords: not found, not installed, missing, dependency, require, unavailable
455
+ * - **Io (4)**: Keywords: io, file, disk, read, write, permission, access, path
456
+ * - **Plugin (5)**: Keywords: plugin, register, extension, handler, processor
457
+ * - **UnsupportedFormat (6)**: Keywords: unsupported, format, mime, type, codec
458
+ * - **Internal (7)**: Keywords: internal, bug, panic, unexpected, invariant
459
+ *
460
+ * # Examples
461
+ *
462
+ * ```typescript
463
+ * const result = classifyError("PDF file is corrupted");
464
+ * // Returns: { code: 1, name: "parsing", confidence: 0.95 }
465
+ *
466
+ * const result = classifyError("Tesseract not found");
467
+ * // Returns: { code: 3, name: "missing_dependency", confidence: 0.9 }
468
+ * ```
469
+ */
470
+ export interface ErrorClassification {
471
+ code: number
472
+ name: string
473
+ description: string
474
+ confidence: number
475
+ }
476
+
288
477
  /**
289
478
  * Extract content from bytes (asynchronous).
290
479
  *
@@ -383,6 +572,38 @@ export declare function extractBytesSync(data: Buffer, mimeType: string, config?
383
572
  */
384
573
  export declare function extractFile(filePath: string, mimeType?: string | undefined | null, config?: JsExtractionConfig | undefined | null): Promise<JsExtractionResult>
385
574
 
575
+ /**
576
+ * Extract a file using a worker thread from the pool.
577
+ *
578
+ * Submits a file extraction task to the worker pool. The task will execute
579
+ * when a worker thread becomes available. This is useful for CPU-bound
580
+ * extraction operations that need to be run concurrently.
581
+ *
582
+ * # Parameters
583
+ *
584
+ * * `pool` - Worker pool handle
585
+ * * `file_path` - Path to the file to extract
586
+ * * `password` - Optional password for encrypted files
587
+ * * `config` - Optional extraction configuration
588
+ *
589
+ * # Returns
590
+ *
591
+ * Promise resolving to extraction result.
592
+ *
593
+ * # Example
594
+ *
595
+ * ```typescript
596
+ * import { createWorkerPool, extractFileInWorker } from '@kreuzberg/node';
597
+ *
598
+ * const pool = createWorkerPool(4);
599
+ * const result = await extractFileInWorker(pool, 'document.pdf', null, {
600
+ * useCache: true
601
+ * });
602
+ * console.log(result.content);
603
+ * ```
604
+ */
605
+ export declare function extractFileInWorker(pool: JsWorkerPool, filePath: string, password?: string | undefined | null, config?: JsExtractionConfig | undefined | null): Promise<JsExtractionResult>
606
+
386
607
  /**
387
608
  * Extract content from a file (synchronous).
388
609
  *
@@ -473,6 +694,52 @@ export declare function extractFileSync(filePath: string, mimeType?: string | un
473
694
  */
474
695
  export declare function getEmbeddingPreset(name: string): EmbeddingPreset | null
475
696
 
697
+ /**
698
+ * Returns the description for an error code.
699
+ *
700
+ * Maps to FFI function kreuzberg_error_code_description().
701
+ *
702
+ * # Arguments
703
+ *
704
+ * * `code` - Numeric error code (0-7)
705
+ *
706
+ * # Returns
707
+ *
708
+ * A string containing a brief description of the error
709
+ *
710
+ * # Examples
711
+ *
712
+ * ```typescript
713
+ * const desc = getErrorCodeDescription(0); // returns "Input validation error"
714
+ * const desc = getErrorCodeDescription(4); // returns "File system I/O error"
715
+ * const desc = getErrorCodeDescription(99); // returns "Unknown error code"
716
+ * ```
717
+ */
718
+ export declare function getErrorCodeDescription(code: number): string
719
+
720
+ /**
721
+ * Returns the human-readable name for an error code.
722
+ *
723
+ * Maps to FFI function kreuzberg_error_code_name().
724
+ *
725
+ * # Arguments
726
+ *
727
+ * * `code` - Numeric error code (0-7)
728
+ *
729
+ * # Returns
730
+ *
731
+ * A string containing the error code name (e.g., "validation", "ocr", "unknown")
732
+ *
733
+ * # Examples
734
+ *
735
+ * ```typescript
736
+ * const name = getErrorCodeName(0); // returns "validation"
737
+ * const name = getErrorCodeName(2); // returns "ocr"
738
+ * const name = getErrorCodeName(99); // returns "unknown"
739
+ * ```
740
+ */
741
+ export declare function getErrorCodeName(code: number): string
742
+
476
743
  /**
477
744
  * Get file extensions for a given MIME type.
478
745
  *
@@ -580,6 +847,112 @@ export declare function getLastErrorCode(): number
580
847
  */
581
848
  export declare function getLastPanicContext(): any | null
582
849
 
850
+ /**
851
+ * Get valid binarization methods.
852
+ *
853
+ * Returns a list of all valid binarization method values.
854
+ *
855
+ * # Returns
856
+ *
857
+ * Array of valid binarization methods: ["otsu", "adaptive", "sauvola"]
858
+ *
859
+ * # Example
860
+ *
861
+ * ```typescript
862
+ * import { getValidBinarizationMethods } from '@kreuzberg/node';
863
+ *
864
+ * const methods = getValidBinarizationMethods();
865
+ * console.log(methods); // ['otsu', 'adaptive', 'sauvola']
866
+ * ```
867
+ */
868
+ export declare function getValidBinarizationMethods(): Array<string>
869
+
870
+ /**
871
+ * Get valid language codes.
872
+ *
873
+ * Returns a list of all valid language codes in ISO 639-1 and 639-3 formats.
874
+ *
875
+ * # Returns
876
+ *
877
+ * Array of valid language codes (both 2-letter and 3-letter codes)
878
+ *
879
+ * # Example
880
+ *
881
+ * ```typescript
882
+ * import { getValidLanguageCodes } from '@kreuzberg/node';
883
+ *
884
+ * const codes = getValidLanguageCodes();
885
+ * console.log(codes); // ['en', 'de', 'fr', ..., 'eng', 'deu', 'fra', ...]
886
+ * ```
887
+ */
888
+ export declare function getValidLanguageCodes(): Array<string>
889
+
890
+ /**
891
+ * Get valid OCR backends.
892
+ *
893
+ * Returns a list of all valid OCR backend values.
894
+ *
895
+ * # Returns
896
+ *
897
+ * Array of valid OCR backends: ["tesseract", "easyocr", "paddleocr"]
898
+ *
899
+ * # Example
900
+ *
901
+ * ```typescript
902
+ * import { getValidOcrBackends } from '@kreuzberg/node';
903
+ *
904
+ * const backends = getValidOcrBackends();
905
+ * console.log(backends); // ['tesseract', 'easyocr', 'paddleocr']
906
+ * ```
907
+ */
908
+ export declare function getValidOcrBackends(): Array<string>
909
+
910
+ /**
911
+ * Get valid token reduction levels.
912
+ *
913
+ * Returns a list of all valid token reduction level values.
914
+ *
915
+ * # Returns
916
+ *
917
+ * Array of valid levels: ["off", "light", "moderate", "aggressive", "maximum"]
918
+ *
919
+ * # Example
920
+ *
921
+ * ```typescript
922
+ * import { getValidTokenReductionLevels } from '@kreuzberg/node';
923
+ *
924
+ * const levels = getValidTokenReductionLevels();
925
+ * console.log(levels); // ['off', 'light', 'moderate', 'aggressive', 'maximum']
926
+ * ```
927
+ */
928
+ export declare function getValidTokenReductionLevels(): Array<string>
929
+
930
+ /**
931
+ * Get worker pool statistics.
932
+ *
933
+ * Returns current statistics about the worker pool including size,
934
+ * active workers, and queued tasks.
935
+ *
936
+ * # Parameters
937
+ *
938
+ * * `pool` - Worker pool handle
939
+ *
940
+ * # Returns
941
+ *
942
+ * Pool statistics object with size, activeWorkers, and queuedTasks fields.
943
+ *
944
+ * # Example
945
+ *
946
+ * ```typescript
947
+ * import { createWorkerPool, getWorkerPoolStats } from '@kreuzberg/node';
948
+ *
949
+ * const pool = createWorkerPool(4);
950
+ * const stats = getWorkerPoolStats(pool);
951
+ * console.log(`Active: ${stats.activeWorkers}/${stats.size}`);
952
+ * ```
953
+ */
954
+ export declare function getWorkerPoolStats(pool: JsWorkerPool): WorkerPoolStats
955
+
583
956
  export interface JsChunk {
584
957
  content: string
585
958
  embedding?: number[] | undefined
@@ -596,11 +969,13 @@ export interface JsChunkingConfig {
596
969
  }
597
970
 
598
971
  export interface JsChunkMetadata {
599
- charStart: number
600
- charEnd: number
972
+ byteStart: number
973
+ byteEnd: number
601
974
  tokenCount?: number
602
975
  chunkIndex: number
603
976
  totalChunks: number
977
+ firstPage?: number
978
+ lastPage?: number
604
979
  }
605
980
 
606
981
  /** Embedding generation configuration for Node.js bindings. */
@@ -662,6 +1037,7 @@ export interface JsExtractionConfig {
662
1037
  keywords?: JsKeywordConfig
663
1038
  htmlOptions?: JsHtmlOptions
664
1039
  maxConcurrentExtractions?: number
1040
+ pages?: JsPageConfig
665
1041
  }
666
1042
 
667
1043
  export interface JsExtractionResult {
@@ -672,6 +1048,21 @@ export interface JsExtractionResult {
672
1048
  detectedLanguages?: Array<string>
673
1049
  chunks?: Array<JsChunk>
674
1050
  images?: Array<JsExtractedImage>
1051
+ pages?: Array<JsPageContent>
1052
+ }
1053
+
1054
+ export interface JsHierarchicalBlock {
1055
+ text: string
1056
+ fontSize: number
1057
+ level: string
1058
+ bbox?: [number, number, number, number] | undefined
1059
+ }
1060
+
1061
+ export interface JsHierarchyConfig {
1062
+ enabled?: boolean
1063
+ kClusters?: number
1064
+ includeBbox?: boolean
1065
+ ocrCoverageThreshold?: number
675
1066
  }
676
1067
 
677
1068
  export interface JsHtmlOptions {
@@ -746,10 +1137,30 @@ export interface JsOcrConfig {
746
1137
  tesseractConfig?: JsTesseractConfig
747
1138
  }
748
1139
 
1140
+ export interface JsPageConfig {
1141
+ extractPages?: boolean
1142
+ insertPageMarkers?: boolean
1143
+ markerFormat?: string
1144
+ }
1145
+
1146
+ export interface JsPageContent {
1147
+ pageNumber: number
1148
+ content: string
1149
+ tables: Array<JsTable>
1150
+ images: Array<JsExtractedImage>
1151
+ hierarchy?: JsPageHierarchy
1152
+ }
1153
+
1154
+ export interface JsPageHierarchy {
1155
+ blockCount: number
1156
+ blocks: Array<JsHierarchicalBlock>
1157
+ }
1158
+
749
1159
  export interface JsPdfConfig {
750
1160
  extractImages?: boolean
751
1161
  passwords?: Array<string>
752
1162
  extractMetadata?: boolean
1163
+ hierarchy?: JsHierarchyConfig
753
1164
  }
754
1165
 
755
1166
  export interface JsPostProcessorConfig {
@@ -1067,6 +1478,134 @@ export declare function unregisterPostProcessor(name: string): void
1067
1478
  /** Unregister a validator by name */
1068
1479
  export declare function unregisterValidator(name: string): void
1069
1480
 
1481
+ /**
1482
+ * Validates a binarization method string.
1483
+ *
1484
+ * Valid methods: "otsu", "adaptive", "sauvola"
1485
+ *
1486
+ * # Arguments
1487
+ *
1488
+ * * `method` - The binarization method to validate
1489
+ *
1490
+ * # Returns
1491
+ *
1492
+ * `true` if valid, `false` if invalid.
1493
+ *
1494
+ * # Example
1495
+ *
1496
+ * ```typescript
1497
+ * import { validateBinarizationMethod } from '@kreuzberg/node';
1498
+ *
1499
+ * if (validateBinarizationMethod('otsu')) {
1500
+ * console.log('Valid method');
1501
+ * } else {
1502
+ * console.log('Invalid method');
1503
+ * }
1504
+ * ```
1505
+ */
1506
+ export declare function validateBinarizationMethod(method: string): boolean
1507
+
1508
+ /**
1509
+ * Validates chunking parameters.
1510
+ *
1511
+ * Checks that `maxChars > 0` and `maxOverlap < maxChars`.
1512
+ *
1513
+ * # Arguments
1514
+ *
1515
+ * * `max_chars` - Maximum characters per chunk
1516
+ * * `max_overlap` - Maximum overlap between chunks
1517
+ *
1518
+ * # Returns
1519
+ *
1520
+ * `true` if valid, `false` if invalid.
1521
+ *
1522
+ * # Example
1523
+ *
1524
+ * ```typescript
1525
+ * import { validateChunkingParams } from '@kreuzberg/node';
1526
+ *
1527
+ * if (validateChunkingParams(1000, 200)) {
1528
+ * console.log('Valid chunking parameters');
1529
+ * }
1530
+ * ```
1531
+ */
1532
+ export declare function validateChunkingParams(maxChars: number, maxOverlap: number): boolean
1533
+
1534
+ /**
1535
+ * Validates a confidence threshold value.
1536
+ *
1537
+ * Valid range: 0.0 to 1.0 (inclusive)
1538
+ *
1539
+ * # Arguments
1540
+ *
1541
+ * * `confidence` - The confidence threshold to validate
1542
+ *
1543
+ * # Returns
1544
+ *
1545
+ * `true` if valid, `false` if invalid.
1546
+ *
1547
+ * # Example
1548
+ *
1549
+ * ```typescript
1550
+ * import { validateConfidence } from '@kreuzberg/node';
1551
+ *
1552
+ * if (validateConfidence(0.75)) {
1553
+ * console.log('Valid confidence threshold');
1554
+ * }
1555
+ * ```
1556
+ */
1557
+ export declare function validateConfidence(confidence: number): boolean
1558
+
1559
+ /**
1560
+ * Validates a DPI (dots per inch) value.
1561
+ *
1562
+ * Valid range: 1-2400
1563
+ *
1564
+ * # Arguments
1565
+ *
1566
+ * * `dpi` - The DPI value to validate
1567
+ *
1568
+ * # Returns
1569
+ *
1570
+ * `true` if valid, `false` if invalid.
1571
+ *
1572
+ * # Example
1573
+ *
1574
+ * ```typescript
1575
+ * import { validateDpi } from '@kreuzberg/node';
1576
+ *
1577
+ * if (validateDpi(300)) {
1578
+ * console.log('Valid DPI');
1579
+ * }
1580
+ * ```
1581
+ */
1582
+ export declare function validateDpi(dpi: number): boolean
1583
+
1584
+ /**
1585
+ * Validates a language code (ISO 639-1 or 639-3 format).
1586
+ *
1587
+ * Accepts both 2-letter codes (e.g., "en", "de") and 3-letter codes (e.g., "eng", "deu").
1588
+ *
1589
+ * # Arguments
1590
+ *
1591
+ * * `code` - The language code to validate
1592
+ *
1593
+ * # Returns
1594
+ *
1595
+ * `true` if valid, `false` if invalid.
1596
+ *
1597
+ * # Example
1598
+ *
1599
+ * ```typescript
1600
+ * import { validateLanguageCode } from '@kreuzberg/node';
1601
+ *
1602
+ * if (validateLanguageCode('en')) {
1603
+ * console.log('Valid language code');
1604
+ * }
1605
+ * ```
1606
+ */
1607
+ export declare function validateLanguageCode(code: string): boolean
1608
+
1070
1609
  /**
1071
1610
  * Validate that a MIME type is supported by Kreuzberg.
1072
1611
  *
@@ -1107,3 +1646,134 @@ export declare function unregisterValidator(name: string): void
1107
1646
  * ```
1108
1647
  */
1109
1648
  export declare function validateMimeType(mimeType: string): string
1649
+
1650
+ /**
1651
+ * Validates an OCR backend string.
1652
+ *
1653
+ * Valid backends: "tesseract", "easyocr", "paddleocr"
1654
+ *
1655
+ * # Arguments
1656
+ *
1657
+ * * `backend` - The OCR backend to validate
1658
+ *
1659
+ * # Returns
1660
+ *
1661
+ * `true` if valid, `false` if invalid.
1662
+ *
1663
+ * # Example
1664
+ *
1665
+ * ```typescript
1666
+ * import { validateOcrBackend } from '@kreuzberg/node';
1667
+ *
1668
+ * if (validateOcrBackend('tesseract')) {
1669
+ * console.log('Valid backend');
1670
+ * }
1671
+ * ```
1672
+ */
1673
+ export declare function validateOcrBackend(backend: string): boolean
1674
+
1675
+ /**
1676
+ * Validates a tesseract output format string.
1677
+ *
1678
+ * Valid formats: "text", "markdown"
1679
+ *
1680
+ * # Arguments
1681
+ *
1682
+ * * `format` - The output format to validate
1683
+ *
1684
+ * # Returns
1685
+ *
1686
+ * `true` if valid, `false` if invalid.
1687
+ *
1688
+ * # Example
1689
+ *
1690
+ * ```typescript
1691
+ * import { validateOutputFormat } from '@kreuzberg/node';
1692
+ *
1693
+ * if (validateOutputFormat('markdown')) {
1694
+ * console.log('Valid output format');
1695
+ * }
1696
+ * ```
1697
+ */
1698
+ export declare function validateOutputFormat(format: string): boolean
1699
+
1700
+ /**
1701
+ * Validates a Tesseract OCR Engine Mode (OEM) value.
1702
+ *
1703
+ * Valid range: 0-3
1704
+ *
1705
+ * # Arguments
1706
+ *
1707
+ * * `oem` - The OEM value to validate
1708
+ *
1709
+ * # Returns
1710
+ *
1711
+ * `true` if valid (0-3), `false` otherwise.
1712
+ *
1713
+ * # Example
1714
+ *
1715
+ * ```typescript
1716
+ * import { validateTesseractOem } from '@kreuzberg/node';
1717
+ *
1718
+ * if (validateTesseractOem(1)) {
1719
+ * console.log('Valid OEM');
1720
+ * }
1721
+ * ```
1722
+ */
1723
+ export declare function validateTesseractOem(oem: number): boolean
1724
+
1725
+ /**
1726
+ * Validates a Tesseract Page Segmentation Mode (PSM) value.
1727
+ *
1728
+ * Valid range: 0-13
1729
+ *
1730
+ * # Arguments
1731
+ *
1732
+ * * `psm` - The PSM value to validate
1733
+ *
1734
+ * # Returns
1735
+ *
1736
+ * `true` if valid (0-13), `false` otherwise.
1737
+ *
1738
+ * # Example
1739
+ *
1740
+ * ```typescript
1741
+ * import { validateTesseractPsm } from '@kreuzberg/node';
1742
+ *
1743
+ * if (validateTesseractPsm(3)) {
1744
+ * console.log('Valid PSM');
1745
+ * }
1746
+ * ```
1747
+ */
1748
+ export declare function validateTesseractPsm(psm: number): boolean
1749
+
1750
+ /**
1751
+ * Validates a token reduction level string.
1752
+ *
1753
+ * Valid levels: "off", "light", "moderate", "aggressive", "maximum"
1754
+ *
1755
+ * # Arguments
1756
+ *
1757
+ * * `level` - The token reduction level to validate
1758
+ *
1759
+ * # Returns
1760
+ *
1761
+ * `true` if valid, `false` if invalid.
1762
+ *
1763
+ * # Example
1764
+ *
1765
+ * ```typescript
1766
+ * import { validateTokenReductionLevel } from '@kreuzberg/node';
1767
+ *
1768
+ * if (validateTokenReductionLevel('moderate')) {
1769
+ * console.log('Valid token reduction level');
1770
+ * }
1771
+ * ```
1772
+ */
1773
+ export declare function validateTokenReductionLevel(level: string): boolean
1774
+
1775
+ export interface WorkerPoolStats {
1776
+ size: number
1777
+ activeWorkers: number
1778
+ queuedTasks: number
1779
+ }