rag-lite-ts 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{cli → cjs/cli}/indexer.js +1 -1
- package/dist/{cli → cjs/cli}/search.js +5 -10
- package/dist/{core → cjs/core}/binary-index-format.d.ts +28 -2
- package/dist/cjs/core/binary-index-format.js +291 -0
- package/dist/{core → cjs/core}/ingestion.d.ts +5 -1
- package/dist/{core → cjs/core}/ingestion.js +76 -9
- package/dist/{core → cjs/core}/model-validator.js +1 -1
- package/dist/{core → cjs/core}/reranking-strategies.js +4 -5
- package/dist/{core → cjs/core}/search.js +2 -1
- package/dist/{core → cjs/core}/types.d.ts +1 -1
- package/dist/{core → cjs/core}/vector-index.d.ts +4 -0
- package/dist/{core → cjs/core}/vector-index.js +10 -2
- package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +2 -0
- package/dist/{file-processor.js → cjs/file-processor.js} +20 -0
- package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +17 -1
- package/dist/{index-manager.js → cjs/index-manager.js} +148 -7
- package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +71 -66
- package/dist/esm/api-errors.d.ts +90 -0
- package/dist/esm/api-errors.js +320 -0
- package/dist/esm/cli/indexer.d.ts +11 -0
- package/dist/esm/cli/indexer.js +471 -0
- package/dist/esm/cli/search.d.ts +7 -0
- package/dist/esm/cli/search.js +332 -0
- package/dist/esm/cli.d.ts +3 -0
- package/dist/esm/cli.js +529 -0
- package/dist/esm/config.d.ts +51 -0
- package/dist/esm/config.js +79 -0
- package/dist/esm/core/abstract-embedder.d.ts +125 -0
- package/dist/esm/core/abstract-embedder.js +264 -0
- package/dist/esm/core/actionable-error-messages.d.ts +60 -0
- package/dist/esm/core/actionable-error-messages.js +397 -0
- package/dist/esm/core/adapters.d.ts +93 -0
- package/dist/esm/core/adapters.js +139 -0
- package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/esm/core/batch-processing-optimizer.js +536 -0
- package/dist/esm/core/binary-index-format.d.ts +78 -0
- package/dist/esm/core/binary-index-format.js +291 -0
- package/dist/esm/core/chunker.d.ts +119 -0
- package/dist/esm/core/chunker.js +73 -0
- package/dist/esm/core/cli-database-utils.d.ts +53 -0
- package/dist/esm/core/cli-database-utils.js +239 -0
- package/dist/esm/core/config.d.ts +102 -0
- package/dist/esm/core/config.js +247 -0
- package/dist/esm/core/content-errors.d.ts +111 -0
- package/dist/esm/core/content-errors.js +362 -0
- package/dist/esm/core/content-manager.d.ts +335 -0
- package/dist/esm/core/content-manager.js +1476 -0
- package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
- package/dist/esm/core/content-performance-optimizer.js +516 -0
- package/dist/esm/core/content-resolver.d.ts +104 -0
- package/dist/esm/core/content-resolver.js +285 -0
- package/dist/esm/core/cross-modal-search.d.ts +164 -0
- package/dist/esm/core/cross-modal-search.js +342 -0
- package/dist/esm/core/database-connection-manager.d.ts +109 -0
- package/dist/esm/core/database-connection-manager.js +310 -0
- package/dist/esm/core/db.d.ts +213 -0
- package/dist/esm/core/db.js +895 -0
- package/dist/esm/core/embedder-factory.d.ts +154 -0
- package/dist/esm/core/embedder-factory.js +311 -0
- package/dist/esm/core/error-handler.d.ts +112 -0
- package/dist/esm/core/error-handler.js +239 -0
- package/dist/esm/core/index.d.ts +59 -0
- package/dist/esm/core/index.js +69 -0
- package/dist/esm/core/ingestion.d.ts +202 -0
- package/dist/esm/core/ingestion.js +901 -0
- package/dist/esm/core/interfaces.d.ts +408 -0
- package/dist/esm/core/interfaces.js +106 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
- package/dist/esm/core/lazy-dependency-loader.js +435 -0
- package/dist/esm/core/mode-detection-service.d.ts +150 -0
- package/dist/esm/core/mode-detection-service.js +565 -0
- package/dist/esm/core/mode-model-validator.d.ts +92 -0
- package/dist/esm/core/mode-model-validator.js +203 -0
- package/dist/esm/core/model-registry.d.ts +116 -0
- package/dist/esm/core/model-registry.js +411 -0
- package/dist/esm/core/model-validator.d.ts +217 -0
- package/dist/esm/core/model-validator.js +782 -0
- package/dist/esm/core/path-manager.d.ts +47 -0
- package/dist/esm/core/path-manager.js +71 -0
- package/dist/esm/core/raglite-paths.d.ts +121 -0
- package/dist/esm/core/raglite-paths.js +145 -0
- package/dist/esm/core/reranking-config.d.ts +42 -0
- package/dist/esm/core/reranking-config.js +147 -0
- package/dist/esm/core/reranking-factory.d.ts +92 -0
- package/dist/esm/core/reranking-factory.js +410 -0
- package/dist/esm/core/reranking-strategies.d.ts +310 -0
- package/dist/esm/core/reranking-strategies.js +650 -0
- package/dist/esm/core/resource-cleanup.d.ts +163 -0
- package/dist/esm/core/resource-cleanup.js +371 -0
- package/dist/esm/core/resource-manager.d.ts +212 -0
- package/dist/esm/core/resource-manager.js +564 -0
- package/dist/esm/core/search-pipeline.d.ts +111 -0
- package/dist/esm/core/search-pipeline.js +287 -0
- package/dist/esm/core/search.d.ts +141 -0
- package/dist/esm/core/search.js +320 -0
- package/dist/esm/core/streaming-operations.d.ts +145 -0
- package/dist/esm/core/streaming-operations.js +409 -0
- package/dist/esm/core/types.d.ts +66 -0
- package/dist/esm/core/types.js +6 -0
- package/dist/esm/core/universal-embedder.d.ts +177 -0
- package/dist/esm/core/universal-embedder.js +139 -0
- package/dist/esm/core/validation-messages.d.ts +99 -0
- package/dist/esm/core/validation-messages.js +334 -0
- package/dist/esm/core/vector-index.d.ts +72 -0
- package/dist/esm/core/vector-index.js +333 -0
- package/dist/esm/dom-polyfills.d.ts +6 -0
- package/dist/esm/dom-polyfills.js +37 -0
- package/dist/esm/factories/index.d.ts +27 -0
- package/dist/esm/factories/index.js +29 -0
- package/dist/esm/factories/ingestion-factory.d.ts +200 -0
- package/dist/esm/factories/ingestion-factory.js +477 -0
- package/dist/esm/factories/search-factory.d.ts +154 -0
- package/dist/esm/factories/search-factory.js +344 -0
- package/dist/esm/file-processor.d.ts +147 -0
- package/dist/esm/file-processor.js +963 -0
- package/dist/esm/index-manager.d.ts +116 -0
- package/dist/esm/index-manager.js +598 -0
- package/dist/esm/index.d.ts +75 -0
- package/dist/esm/index.js +110 -0
- package/dist/esm/indexer.d.ts +7 -0
- package/dist/esm/indexer.js +54 -0
- package/dist/esm/ingestion.d.ts +63 -0
- package/dist/esm/ingestion.js +124 -0
- package/dist/esm/mcp-server.d.ts +46 -0
- package/dist/esm/mcp-server.js +1820 -0
- package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
- package/dist/esm/multimodal/clip-embedder.js +996 -0
- package/dist/esm/multimodal/index.d.ts +6 -0
- package/dist/esm/multimodal/index.js +6 -0
- package/dist/esm/preprocess.d.ts +19 -0
- package/dist/esm/preprocess.js +203 -0
- package/dist/esm/preprocessors/index.d.ts +17 -0
- package/dist/esm/preprocessors/index.js +38 -0
- package/dist/esm/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/preprocessors/mdx.js +101 -0
- package/dist/esm/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/preprocessors/mermaid.js +329 -0
- package/dist/esm/preprocessors/registry.d.ts +56 -0
- package/dist/esm/preprocessors/registry.js +179 -0
- package/dist/esm/run-error-recovery-tests.d.ts +7 -0
- package/dist/esm/run-error-recovery-tests.js +101 -0
- package/dist/esm/search-standalone.d.ts +7 -0
- package/dist/esm/search-standalone.js +117 -0
- package/dist/esm/search.d.ts +99 -0
- package/dist/esm/search.js +177 -0
- package/dist/esm/test-utils.d.ts +18 -0
- package/dist/esm/test-utils.js +27 -0
- package/dist/esm/text/chunker.d.ts +33 -0
- package/dist/esm/text/chunker.js +279 -0
- package/dist/esm/text/embedder.d.ts +111 -0
- package/dist/esm/text/embedder.js +386 -0
- package/dist/esm/text/index.d.ts +8 -0
- package/dist/esm/text/index.js +9 -0
- package/dist/esm/text/preprocessors/index.d.ts +17 -0
- package/dist/esm/text/preprocessors/index.js +38 -0
- package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/text/preprocessors/mdx.js +101 -0
- package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/text/preprocessors/mermaid.js +330 -0
- package/dist/esm/text/preprocessors/registry.d.ts +56 -0
- package/dist/esm/text/preprocessors/registry.js +180 -0
- package/dist/esm/text/reranker.d.ts +49 -0
- package/dist/esm/text/reranker.js +274 -0
- package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/esm/text/sentence-transformer-embedder.js +340 -0
- package/dist/esm/text/tokenizer.d.ts +22 -0
- package/dist/esm/text/tokenizer.js +64 -0
- package/dist/esm/types.d.ts +83 -0
- package/dist/esm/types.js +3 -0
- package/dist/esm/utils/vector-math.d.ts +31 -0
- package/dist/esm/utils/vector-math.js +70 -0
- package/package.json +30 -12
- package/dist/core/binary-index-format.js +0 -122
- /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
- /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
- /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
- /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
- /package/dist/{cli.js → cjs/cli.js} +0 -0
- /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
- /package/dist/{config.js → cjs/config.js} +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
- /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
- /package/dist/{core → cjs/core}/adapters.js +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.js +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
- /package/dist/{core → cjs/core}/config.d.ts +0 -0
- /package/dist/{core → cjs/core}/config.js +0 -0
- /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-errors.js +0 -0
- /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-manager.js +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-resolver.js +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
- /package/dist/{core → cjs/core}/db.d.ts +0 -0
- /package/dist/{core → cjs/core}/db.js +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
- /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
- /package/dist/{core → cjs/core}/error-handler.js +0 -0
- /package/dist/{core → cjs/core}/index.d.ts +0 -0
- /package/dist/{core → cjs/core}/index.js +0 -0
- /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.js +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
- /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
- /package/dist/{core → cjs/core}/model-registry.js +0 -0
- /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.js +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
- /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-config.js +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
- /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-manager.js +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
- /package/dist/{core → cjs/core}/search.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
- /package/dist/{core → cjs/core}/types.js +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
- /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/validation-messages.js +0 -0
- /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
- /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
- /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/index.js +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
- /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
- /package/dist/{index.js → cjs/index.js} +0 -0
- /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
- /package/dist/{indexer.js → cjs/indexer.js} +0 -0
- /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
- /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
- /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
- /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
- /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
- /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
- /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
- /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
- /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
- /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
- /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
- /package/dist/{search.js → cjs/search.js} +0 -0
- /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
- /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
- /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
- /package/dist/{text → cjs/text}/chunker.js +0 -0
- /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/embedder.js +0 -0
- /package/dist/{text → cjs/text}/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
- /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
- /package/dist/{text → cjs/text}/reranker.js +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
- /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
- /package/dist/{text → cjs/text}/tokenizer.js +0 -0
- /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
- /package/dist/{types.js → cjs/types.js} +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Batch Processing Optimizer
|
|
3
|
+
* Optimizes embedding generation for large multimodal content batches
|
|
4
|
+
* Implements efficient image processing pipelines with progress reporting
|
|
5
|
+
* Creates memory-efficient processing for large image collections
|
|
6
|
+
*/
|
|
7
|
+
import type { EmbeddingResult } from '../types.js';
|
|
8
|
+
import type { EmbeddingBatchItem } from './universal-embedder.js';
|
|
9
|
+
/**
|
|
10
|
+
* Configuration for batch processing optimization
|
|
11
|
+
*/
|
|
12
|
+
export interface BatchProcessingConfig {
|
|
13
|
+
textBatchSize: number;
|
|
14
|
+
imageBatchSize: number;
|
|
15
|
+
maxConcurrentBatches: number;
|
|
16
|
+
memoryThresholdMB: number;
|
|
17
|
+
enableMemoryMonitoring: boolean;
|
|
18
|
+
enableGarbageCollection: boolean;
|
|
19
|
+
enableProgressReporting: boolean;
|
|
20
|
+
progressReportInterval: number;
|
|
21
|
+
maxRetries: number;
|
|
22
|
+
retryDelayMs: number;
|
|
23
|
+
enableFallbackProcessing: boolean;
|
|
24
|
+
enableParallelProcessing: boolean;
|
|
25
|
+
enableResourcePooling: boolean;
|
|
26
|
+
preloadModels: boolean;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Default batch processing configuration optimized for multimodal content
|
|
30
|
+
*/
|
|
31
|
+
export declare const DEFAULT_BATCH_CONFIG: BatchProcessingConfig;
|
|
32
|
+
/**
|
|
33
|
+
* Statistics for batch processing operations
|
|
34
|
+
*/
|
|
35
|
+
export interface BatchProcessingStats {
|
|
36
|
+
totalItems: number;
|
|
37
|
+
processedItems: number;
|
|
38
|
+
failedItems: number;
|
|
39
|
+
skippedItems: number;
|
|
40
|
+
totalBatches: number;
|
|
41
|
+
completedBatches: number;
|
|
42
|
+
failedBatches: number;
|
|
43
|
+
processingTimeMs: number;
|
|
44
|
+
averageBatchTimeMs: number;
|
|
45
|
+
itemsPerSecond: number;
|
|
46
|
+
memoryUsageMB: number;
|
|
47
|
+
peakMemoryUsageMB: number;
|
|
48
|
+
retryCount: number;
|
|
49
|
+
fallbackCount: number;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Progress callback for batch processing
|
|
53
|
+
*/
|
|
54
|
+
export type BatchProgressCallback = (stats: BatchProcessingStats) => void;
|
|
55
|
+
/**
|
|
56
|
+
* Result of batch processing operation
|
|
57
|
+
*/
|
|
58
|
+
export interface BatchProcessingResult {
|
|
59
|
+
results: EmbeddingResult[];
|
|
60
|
+
stats: BatchProcessingStats;
|
|
61
|
+
errors: Array<{
|
|
62
|
+
item: EmbeddingBatchItem;
|
|
63
|
+
error: string;
|
|
64
|
+
batchIndex: number;
|
|
65
|
+
itemIndex: number;
|
|
66
|
+
}>;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Optimized batch processor for multimodal content
|
|
70
|
+
* Handles large collections of text and image content efficiently
|
|
71
|
+
*/
|
|
72
|
+
export declare class BatchProcessingOptimizer {
|
|
73
|
+
private config;
|
|
74
|
+
private memoryMonitor;
|
|
75
|
+
private resourcePool;
|
|
76
|
+
private resourceManager;
|
|
77
|
+
constructor(config?: Partial<BatchProcessingConfig>);
|
|
78
|
+
/**
|
|
79
|
+
* Process a large batch of multimodal content with optimization
|
|
80
|
+
*/
|
|
81
|
+
processBatch(items: EmbeddingBatchItem[], embedFunction: (item: EmbeddingBatchItem) => Promise<EmbeddingResult>, progressCallback?: BatchProgressCallback): Promise<BatchProcessingResult>;
|
|
82
|
+
/**
|
|
83
|
+
* Process text items in optimized batches
|
|
84
|
+
*/
|
|
85
|
+
private processTextBatches;
|
|
86
|
+
/**
|
|
87
|
+
* Process a single text batch with error handling
|
|
88
|
+
*/
|
|
89
|
+
private processTextBatch;
|
|
90
|
+
/**
|
|
91
|
+
* Process image items in optimized batches with memory management
|
|
92
|
+
*/
|
|
93
|
+
private processImageBatches;
|
|
94
|
+
/**
|
|
95
|
+
* Process a single image batch with memory optimization
|
|
96
|
+
*/
|
|
97
|
+
private processImageBatch;
|
|
98
|
+
/**
|
|
99
|
+
* Process batch with fallback to individual item processing
|
|
100
|
+
*/
|
|
101
|
+
private processBatchWithFallback;
|
|
102
|
+
/**
|
|
103
|
+
* Preload required models based on content types
|
|
104
|
+
*/
|
|
105
|
+
private preloadRequiredModels;
|
|
106
|
+
/**
|
|
107
|
+
* Preload image processing models
|
|
108
|
+
*/
|
|
109
|
+
private preloadImageProcessingModels;
|
|
110
|
+
/**
|
|
111
|
+
* Perform memory management operations
|
|
112
|
+
*/
|
|
113
|
+
private performMemoryManagement;
|
|
114
|
+
/**
|
|
115
|
+
* Cleanup resources after processing with resource manager integration
|
|
116
|
+
*/
|
|
117
|
+
private cleanupResources;
|
|
118
|
+
/**
|
|
119
|
+
* Check if progress should be reported for this batch
|
|
120
|
+
*/
|
|
121
|
+
private shouldReportProgress;
|
|
122
|
+
/**
|
|
123
|
+
* Delay execution for specified milliseconds
|
|
124
|
+
*/
|
|
125
|
+
private delay;
|
|
126
|
+
/**
|
|
127
|
+
* Get current configuration
|
|
128
|
+
*/
|
|
129
|
+
getConfig(): BatchProcessingConfig;
|
|
130
|
+
/**
|
|
131
|
+
* Update configuration
|
|
132
|
+
*/
|
|
133
|
+
updateConfig(updates: Partial<BatchProcessingConfig>): void;
|
|
134
|
+
/**
|
|
135
|
+
* Get current memory statistics
|
|
136
|
+
*/
|
|
137
|
+
getMemoryStats(): {
|
|
138
|
+
currentMB: number;
|
|
139
|
+
peakMB: number;
|
|
140
|
+
initialMB: number;
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Create a batch processing optimizer with default configuration
|
|
145
|
+
*/
|
|
146
|
+
export declare function createBatchProcessor(config?: Partial<BatchProcessingConfig>): BatchProcessingOptimizer;
|
|
147
|
+
/**
|
|
148
|
+
* Create a batch processing optimizer optimized for large image collections
|
|
149
|
+
*/
|
|
150
|
+
export declare function createImageBatchProcessor(): BatchProcessingOptimizer;
|
|
151
|
+
/**
|
|
152
|
+
* Create a batch processing optimizer optimized for text processing
|
|
153
|
+
*/
|
|
154
|
+
export declare function createTextBatchProcessor(): BatchProcessingOptimizer;
|
|
155
|
+
//# sourceMappingURL=batch-processing-optimizer.d.ts.map
|
|
@@ -0,0 +1,536 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Batch Processing Optimizer
|
|
3
|
+
* Optimizes embedding generation for large multimodal content batches
|
|
4
|
+
* Implements efficient image processing pipelines with progress reporting
|
|
5
|
+
* Creates memory-efficient processing for large image collections
|
|
6
|
+
*/
|
|
7
|
+
import { LazyMultimodalLoader } from './lazy-dependency-loader.js';
|
|
8
|
+
import { createError } from './error-handler.js';
|
|
9
|
+
import { getResourceManager } from './resource-manager.js';
|
|
10
|
+
/**
|
|
11
|
+
* Default batch processing configuration optimized for multimodal content
|
|
12
|
+
*/
|
|
13
|
+
export const DEFAULT_BATCH_CONFIG = {
|
|
14
|
+
// Conservative batch sizes for memory efficiency
|
|
15
|
+
textBatchSize: 16,
|
|
16
|
+
imageBatchSize: 4, // Smaller for memory-intensive image processing
|
|
17
|
+
maxConcurrentBatches: 2,
|
|
18
|
+
// Memory management (512MB threshold for multimodal processing)
|
|
19
|
+
memoryThresholdMB: 512,
|
|
20
|
+
enableMemoryMonitoring: true,
|
|
21
|
+
enableGarbageCollection: true,
|
|
22
|
+
// Progress reporting every 5 batches
|
|
23
|
+
enableProgressReporting: true,
|
|
24
|
+
progressReportInterval: 5,
|
|
25
|
+
// Error handling with retries
|
|
26
|
+
maxRetries: 3,
|
|
27
|
+
retryDelayMs: 1000,
|
|
28
|
+
enableFallbackProcessing: true,
|
|
29
|
+
// Performance optimization
|
|
30
|
+
enableParallelProcessing: true,
|
|
31
|
+
enableResourcePooling: true,
|
|
32
|
+
preloadModels: false // Lazy loading by default
|
|
33
|
+
};
|
|
34
|
+
// =============================================================================
|
|
35
|
+
// MEMORY MONITORING
|
|
36
|
+
// =============================================================================
|
|
37
|
+
/**
|
|
38
|
+
* Memory monitoring utilities for batch processing
|
|
39
|
+
*/
|
|
40
|
+
class MemoryMonitor {
|
|
41
|
+
initialMemoryMB;
|
|
42
|
+
peakMemoryMB;
|
|
43
|
+
constructor() {
|
|
44
|
+
this.initialMemoryMB = this.getCurrentMemoryUsageMB();
|
|
45
|
+
this.peakMemoryMB = this.initialMemoryMB;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Get current memory usage in MB
|
|
49
|
+
*/
|
|
50
|
+
getCurrentMemoryUsageMB() {
|
|
51
|
+
const usage = process.memoryUsage();
|
|
52
|
+
return Math.round(usage.heapUsed / 1024 / 1024);
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Update peak memory usage
|
|
56
|
+
*/
|
|
57
|
+
updatePeakMemory() {
|
|
58
|
+
const current = this.getCurrentMemoryUsageMB();
|
|
59
|
+
if (current > this.peakMemoryMB) {
|
|
60
|
+
this.peakMemoryMB = current;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Check if memory usage exceeds threshold
|
|
65
|
+
*/
|
|
66
|
+
isMemoryThresholdExceeded(thresholdMB) {
|
|
67
|
+
return this.getCurrentMemoryUsageMB() > thresholdMB;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Force garbage collection if enabled
|
|
71
|
+
*/
|
|
72
|
+
forceGarbageCollection() {
|
|
73
|
+
if (global.gc) {
|
|
74
|
+
global.gc();
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Get memory statistics
|
|
79
|
+
*/
|
|
80
|
+
getStats() {
|
|
81
|
+
return {
|
|
82
|
+
currentMB: this.getCurrentMemoryUsageMB(),
|
|
83
|
+
peakMB: this.peakMemoryMB,
|
|
84
|
+
initialMB: this.initialMemoryMB
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
// =============================================================================
|
|
89
|
+
// BATCH PROCESSING OPTIMIZER
|
|
90
|
+
// =============================================================================
|
|
91
|
+
/**
|
|
92
|
+
* Optimized batch processor for multimodal content
|
|
93
|
+
* Handles large collections of text and image content efficiently
|
|
94
|
+
*/
|
|
95
|
+
export class BatchProcessingOptimizer {
|
|
96
|
+
config;
|
|
97
|
+
memoryMonitor;
|
|
98
|
+
resourcePool = new Map();
|
|
99
|
+
resourceManager = getResourceManager();
|
|
100
|
+
constructor(config = {}) {
|
|
101
|
+
this.config = { ...DEFAULT_BATCH_CONFIG, ...config };
|
|
102
|
+
this.memoryMonitor = new MemoryMonitor();
|
|
103
|
+
}
|
|
104
|
+
// =============================================================================
|
|
105
|
+
// PUBLIC API
|
|
106
|
+
// =============================================================================
|
|
107
|
+
/**
|
|
108
|
+
* Process a large batch of multimodal content with optimization
|
|
109
|
+
*/
|
|
110
|
+
async processBatch(items, embedFunction, progressCallback) {
|
|
111
|
+
const startTime = Date.now();
|
|
112
|
+
// Initialize statistics
|
|
113
|
+
const stats = {
|
|
114
|
+
totalItems: items.length,
|
|
115
|
+
processedItems: 0,
|
|
116
|
+
failedItems: 0,
|
|
117
|
+
skippedItems: 0,
|
|
118
|
+
totalBatches: 0,
|
|
119
|
+
completedBatches: 0,
|
|
120
|
+
failedBatches: 0,
|
|
121
|
+
processingTimeMs: 0,
|
|
122
|
+
averageBatchTimeMs: 0,
|
|
123
|
+
itemsPerSecond: 0,
|
|
124
|
+
memoryUsageMB: this.memoryMonitor.getCurrentMemoryUsageMB(),
|
|
125
|
+
peakMemoryUsageMB: this.memoryMonitor.getCurrentMemoryUsageMB(),
|
|
126
|
+
retryCount: 0,
|
|
127
|
+
fallbackCount: 0
|
|
128
|
+
};
|
|
129
|
+
const results = [];
|
|
130
|
+
const errors = [];
|
|
131
|
+
try {
|
|
132
|
+
// Preload models if configured
|
|
133
|
+
if (this.config.preloadModels) {
|
|
134
|
+
await this.preloadRequiredModels(items);
|
|
135
|
+
}
|
|
136
|
+
// Separate items by content type for optimized processing
|
|
137
|
+
const textItems = items.filter(item => item.contentType === 'text');
|
|
138
|
+
const imageItems = items.filter(item => item.contentType === 'image');
|
|
139
|
+
// Process text items in optimized batches
|
|
140
|
+
if (textItems.length > 0) {
|
|
141
|
+
const textResults = await this.processTextBatches(textItems, embedFunction, stats, errors, progressCallback);
|
|
142
|
+
results.push(...textResults);
|
|
143
|
+
}
|
|
144
|
+
// Process image items in optimized batches
|
|
145
|
+
if (imageItems.length > 0) {
|
|
146
|
+
const imageResults = await this.processImageBatches(imageItems, embedFunction, stats, errors, progressCallback);
|
|
147
|
+
results.push(...imageResults);
|
|
148
|
+
}
|
|
149
|
+
// Calculate final statistics
|
|
150
|
+
const endTime = Date.now();
|
|
151
|
+
stats.processingTimeMs = endTime - startTime;
|
|
152
|
+
stats.averageBatchTimeMs = stats.totalBatches > 0 ? stats.processingTimeMs / stats.totalBatches : 0;
|
|
153
|
+
stats.itemsPerSecond = stats.processingTimeMs > 0 ? (stats.processedItems / stats.processingTimeMs) * 1000 : 0;
|
|
154
|
+
const memoryStats = this.memoryMonitor.getStats();
|
|
155
|
+
stats.memoryUsageMB = memoryStats.currentMB;
|
|
156
|
+
stats.peakMemoryUsageMB = memoryStats.peakMB;
|
|
157
|
+
// Final progress report
|
|
158
|
+
if (progressCallback && this.config.enableProgressReporting) {
|
|
159
|
+
progressCallback(stats);
|
|
160
|
+
}
|
|
161
|
+
return { results, stats, errors };
|
|
162
|
+
}
|
|
163
|
+
catch (error) {
|
|
164
|
+
throw createError.model(`Batch processing failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
165
|
+
}
|
|
166
|
+
finally {
|
|
167
|
+
// Cleanup resources
|
|
168
|
+
await this.cleanupResources();
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
// =============================================================================
|
|
172
|
+
// TEXT BATCH PROCESSING
|
|
173
|
+
// =============================================================================
|
|
174
|
+
/**
|
|
175
|
+
* Process text items in optimized batches
|
|
176
|
+
*/
|
|
177
|
+
async processTextBatches(textItems, embedFunction, stats, errors, progressCallback) {
|
|
178
|
+
const results = [];
|
|
179
|
+
const batchSize = this.config.textBatchSize;
|
|
180
|
+
const totalBatches = Math.ceil(textItems.length / batchSize);
|
|
181
|
+
console.log(`Processing ${textItems.length} text items in ${totalBatches} batches (batch size: ${batchSize})`);
|
|
182
|
+
for (let i = 0; i < textItems.length; i += batchSize) {
|
|
183
|
+
const batch = textItems.slice(i, i + batchSize);
|
|
184
|
+
const batchIndex = Math.floor(i / batchSize);
|
|
185
|
+
stats.totalBatches++;
|
|
186
|
+
try {
|
|
187
|
+
const batchResults = await this.processTextBatch(batch, embedFunction, batchIndex, stats, errors);
|
|
188
|
+
results.push(...batchResults);
|
|
189
|
+
stats.completedBatches++;
|
|
190
|
+
// Memory management
|
|
191
|
+
await this.performMemoryManagement();
|
|
192
|
+
// Progress reporting
|
|
193
|
+
if (progressCallback && this.shouldReportProgress(batchIndex)) {
|
|
194
|
+
progressCallback({ ...stats });
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
catch (error) {
|
|
198
|
+
stats.failedBatches++;
|
|
199
|
+
console.warn(`Text batch ${batchIndex + 1}/${totalBatches} failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
200
|
+
// Try fallback processing if enabled
|
|
201
|
+
if (this.config.enableFallbackProcessing) {
|
|
202
|
+
const fallbackResults = await this.processBatchWithFallback(batch, embedFunction, batchIndex, stats, errors);
|
|
203
|
+
results.push(...fallbackResults);
|
|
204
|
+
stats.fallbackCount++;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
return results;
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Process a single text batch with error handling
|
|
212
|
+
*/
|
|
213
|
+
async processTextBatch(batch, embedFunction, batchIndex, stats, errors) {
|
|
214
|
+
const batchStartTime = Date.now();
|
|
215
|
+
try {
|
|
216
|
+
// Process batch items in parallel if enabled
|
|
217
|
+
if (this.config.enableParallelProcessing) {
|
|
218
|
+
const promises = batch.map(async (item, itemIndex) => {
|
|
219
|
+
try {
|
|
220
|
+
const result = await embedFunction(item);
|
|
221
|
+
stats.processedItems++;
|
|
222
|
+
return result;
|
|
223
|
+
}
|
|
224
|
+
catch (error) {
|
|
225
|
+
stats.failedItems++;
|
|
226
|
+
errors.push({
|
|
227
|
+
item,
|
|
228
|
+
error: error instanceof Error ? error.message : String(error),
|
|
229
|
+
batchIndex,
|
|
230
|
+
itemIndex
|
|
231
|
+
});
|
|
232
|
+
return null;
|
|
233
|
+
}
|
|
234
|
+
});
|
|
235
|
+
const results = await Promise.all(promises);
|
|
236
|
+
return results.filter((result) => result !== null);
|
|
237
|
+
}
|
|
238
|
+
else {
|
|
239
|
+
// Sequential processing
|
|
240
|
+
const results = [];
|
|
241
|
+
for (let itemIndex = 0; itemIndex < batch.length; itemIndex++) {
|
|
242
|
+
const item = batch[itemIndex];
|
|
243
|
+
try {
|
|
244
|
+
const result = await embedFunction(item);
|
|
245
|
+
results.push(result);
|
|
246
|
+
stats.processedItems++;
|
|
247
|
+
}
|
|
248
|
+
catch (error) {
|
|
249
|
+
stats.failedItems++;
|
|
250
|
+
errors.push({
|
|
251
|
+
item,
|
|
252
|
+
error: error instanceof Error ? error.message : String(error),
|
|
253
|
+
batchIndex,
|
|
254
|
+
itemIndex
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
return results;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
finally {
|
|
262
|
+
// Update batch timing
|
|
263
|
+
const batchTime = Date.now() - batchStartTime;
|
|
264
|
+
stats.averageBatchTimeMs = ((stats.averageBatchTimeMs * (stats.completedBatches + stats.failedBatches)) + batchTime) / (stats.completedBatches + stats.failedBatches + 1);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
// =============================================================================
|
|
268
|
+
// IMAGE BATCH PROCESSING
|
|
269
|
+
// =============================================================================
|
|
270
|
+
/**
|
|
271
|
+
* Process image items in optimized batches with memory management
|
|
272
|
+
*/
|
|
273
|
+
async processImageBatches(imageItems, embedFunction, stats, errors, progressCallback) {
|
|
274
|
+
const results = [];
|
|
275
|
+
const batchSize = this.config.imageBatchSize;
|
|
276
|
+
const totalBatches = Math.ceil(imageItems.length / batchSize);
|
|
277
|
+
console.log(`Processing ${imageItems.length} image items in ${totalBatches} batches (batch size: ${batchSize})`);
|
|
278
|
+
// Preload image processing models
|
|
279
|
+
await this.preloadImageProcessingModels();
|
|
280
|
+
for (let i = 0; i < imageItems.length; i += batchSize) {
|
|
281
|
+
const batch = imageItems.slice(i, i + batchSize);
|
|
282
|
+
const batchIndex = Math.floor(i / batchSize) + Math.ceil(stats.totalBatches);
|
|
283
|
+
stats.totalBatches++;
|
|
284
|
+
try {
|
|
285
|
+
const batchResults = await this.processImageBatch(batch, embedFunction, batchIndex, stats, errors);
|
|
286
|
+
results.push(...batchResults);
|
|
287
|
+
stats.completedBatches++;
|
|
288
|
+
// Aggressive memory management for images
|
|
289
|
+
await this.performMemoryManagement(true);
|
|
290
|
+
// Progress reporting
|
|
291
|
+
if (progressCallback && this.shouldReportProgress(batchIndex)) {
|
|
292
|
+
progressCallback({ ...stats });
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
catch (error) {
|
|
296
|
+
stats.failedBatches++;
|
|
297
|
+
console.warn(`Image batch ${batchIndex + 1} failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
298
|
+
// Try fallback processing if enabled
|
|
299
|
+
if (this.config.enableFallbackProcessing) {
|
|
300
|
+
const fallbackResults = await this.processBatchWithFallback(batch, embedFunction, batchIndex, stats, errors);
|
|
301
|
+
results.push(...fallbackResults);
|
|
302
|
+
stats.fallbackCount++;
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
return results;
|
|
307
|
+
}
|
|
308
|
+
/**
|
|
309
|
+
* Process a single image batch with memory optimization
|
|
310
|
+
*/
|
|
311
|
+
async processImageBatch(batch, embedFunction, batchIndex, stats, errors) {
|
|
312
|
+
const batchStartTime = Date.now();
|
|
313
|
+
try {
|
|
314
|
+
// For images, use sequential processing to manage memory better
|
|
315
|
+
const results = [];
|
|
316
|
+
for (let itemIndex = 0; itemIndex < batch.length; itemIndex++) {
|
|
317
|
+
const item = batch[itemIndex];
|
|
318
|
+
try {
|
|
319
|
+
// Check memory before processing each image
|
|
320
|
+
if (this.memoryMonitor.isMemoryThresholdExceeded(this.config.memoryThresholdMB)) {
|
|
321
|
+
console.warn(`Memory threshold exceeded (${this.memoryMonitor.getCurrentMemoryUsageMB()}MB), forcing garbage collection`);
|
|
322
|
+
this.memoryMonitor.forceGarbageCollection();
|
|
323
|
+
}
|
|
324
|
+
const result = await embedFunction(item);
|
|
325
|
+
results.push(result);
|
|
326
|
+
stats.processedItems++;
|
|
327
|
+
// Update memory tracking
|
|
328
|
+
this.memoryMonitor.updatePeakMemory();
|
|
329
|
+
}
|
|
330
|
+
catch (error) {
|
|
331
|
+
stats.failedItems++;
|
|
332
|
+
errors.push({
|
|
333
|
+
item,
|
|
334
|
+
error: error instanceof Error ? error.message : String(error),
|
|
335
|
+
batchIndex,
|
|
336
|
+
itemIndex
|
|
337
|
+
});
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
return results;
|
|
341
|
+
}
|
|
342
|
+
finally {
|
|
343
|
+
// Update batch timing
|
|
344
|
+
const batchTime = Date.now() - batchStartTime;
|
|
345
|
+
stats.averageBatchTimeMs = ((stats.averageBatchTimeMs * (stats.completedBatches + stats.failedBatches)) + batchTime) / (stats.completedBatches + stats.failedBatches + 1);
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
// =============================================================================
|
|
349
|
+
// FALLBACK PROCESSING
|
|
350
|
+
// =============================================================================
|
|
351
|
+
/**
|
|
352
|
+
* Process batch with fallback to individual item processing
|
|
353
|
+
*/
|
|
354
|
+
async processBatchWithFallback(batch, embedFunction, batchIndex, stats, errors) {
|
|
355
|
+
console.log(`Attempting fallback processing for batch ${batchIndex} (${batch.length} items)`);
|
|
356
|
+
const results = [];
|
|
357
|
+
for (let itemIndex = 0; itemIndex < batch.length; itemIndex++) {
|
|
358
|
+
const item = batch[itemIndex];
|
|
359
|
+
let retryCount = 0;
|
|
360
|
+
while (retryCount <= this.config.maxRetries) {
|
|
361
|
+
try {
|
|
362
|
+
const result = await embedFunction(item);
|
|
363
|
+
results.push(result);
|
|
364
|
+
stats.processedItems++;
|
|
365
|
+
break;
|
|
366
|
+
}
|
|
367
|
+
catch (error) {
|
|
368
|
+
retryCount++;
|
|
369
|
+
stats.retryCount++;
|
|
370
|
+
if (retryCount <= this.config.maxRetries) {
|
|
371
|
+
console.warn(`Retry ${retryCount}/${this.config.maxRetries} for item ${itemIndex} in batch ${batchIndex}`);
|
|
372
|
+
await this.delay(this.config.retryDelayMs);
|
|
373
|
+
}
|
|
374
|
+
else {
|
|
375
|
+
stats.failedItems++;
|
|
376
|
+
errors.push({
|
|
377
|
+
item,
|
|
378
|
+
error: error instanceof Error ? error.message : String(error),
|
|
379
|
+
batchIndex,
|
|
380
|
+
itemIndex
|
|
381
|
+
});
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
return results;
|
|
387
|
+
}
|
|
388
|
+
// =============================================================================
|
|
389
|
+
// RESOURCE MANAGEMENT
|
|
390
|
+
// =============================================================================
|
|
391
|
+
/**
|
|
392
|
+
* Preload required models based on content types
|
|
393
|
+
*/
|
|
394
|
+
async preloadRequiredModels(items) {
|
|
395
|
+
const hasImages = items.some(item => item.contentType === 'image');
|
|
396
|
+
if (hasImages) {
|
|
397
|
+
await this.preloadImageProcessingModels();
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
/**
|
|
401
|
+
* Preload image processing models
|
|
402
|
+
*/
|
|
403
|
+
async preloadImageProcessingModels() {
|
|
404
|
+
try {
|
|
405
|
+
// Note: Image-to-text processor is loaded on-demand by file-processor.ts
|
|
406
|
+
// to avoid conflicts with different pipeline configurations
|
|
407
|
+
if (!this.resourcePool.has('metadataExtractor')) {
|
|
408
|
+
console.log('Preloading image metadata extractor...');
|
|
409
|
+
const extractor = await LazyMultimodalLoader.loadImageMetadataExtractor();
|
|
410
|
+
this.resourcePool.set('metadataExtractor', extractor);
|
|
411
|
+
// Register with resource manager
|
|
412
|
+
this.resourceManager.registerImageProcessor(extractor, 'metadata-extractor');
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
catch (error) {
|
|
416
|
+
console.warn(`Failed to preload image processing models: ${error instanceof Error ? error.message : String(error)}`);
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
/**
|
|
420
|
+
* Perform memory management operations
|
|
421
|
+
*/
|
|
422
|
+
async performMemoryManagement(aggressive = false) {
|
|
423
|
+
if (!this.config.enableMemoryMonitoring) {
|
|
424
|
+
return;
|
|
425
|
+
}
|
|
426
|
+
const currentMemory = this.memoryMonitor.getCurrentMemoryUsageMB();
|
|
427
|
+
// Force garbage collection if memory threshold exceeded or aggressive mode
|
|
428
|
+
if (aggressive || this.memoryMonitor.isMemoryThresholdExceeded(this.config.memoryThresholdMB)) {
|
|
429
|
+
if (this.config.enableGarbageCollection) {
|
|
430
|
+
this.memoryMonitor.forceGarbageCollection();
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
// Update peak memory tracking
|
|
434
|
+
this.memoryMonitor.updatePeakMemory();
|
|
435
|
+
}
|
|
436
|
+
/**
|
|
437
|
+
* Cleanup resources after processing with resource manager integration
|
|
438
|
+
*/
|
|
439
|
+
async cleanupResources() {
|
|
440
|
+
try {
|
|
441
|
+
// Clear resource pool if not using resource pooling
|
|
442
|
+
if (!this.config.enableResourcePooling) {
|
|
443
|
+
// Clean up registered processors
|
|
444
|
+
for (const [key, processor] of this.resourcePool) {
|
|
445
|
+
try {
|
|
446
|
+
// The resource manager will handle proper cleanup
|
|
447
|
+
if (processor && typeof processor.cleanup === 'function') {
|
|
448
|
+
await processor.cleanup();
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
catch (error) {
|
|
452
|
+
console.warn(`Failed to cleanup processor ${key}: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
this.resourcePool.clear();
|
|
456
|
+
}
|
|
457
|
+
// Use resource manager for memory optimization
|
|
458
|
+
if (this.config.enableGarbageCollection) {
|
|
459
|
+
await this.resourceManager.optimizeMemory();
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
catch (error) {
|
|
463
|
+
console.warn(`Error during batch processing cleanup: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
// =============================================================================
|
|
467
|
+
// UTILITY METHODS
|
|
468
|
+
// =============================================================================
|
|
469
|
+
/**
|
|
470
|
+
* Check if progress should be reported for this batch
|
|
471
|
+
*/
|
|
472
|
+
shouldReportProgress(batchIndex) {
|
|
473
|
+
return this.config.enableProgressReporting &&
|
|
474
|
+
(batchIndex + 1) % this.config.progressReportInterval === 0;
|
|
475
|
+
}
|
|
476
|
+
/**
|
|
477
|
+
* Delay execution for specified milliseconds
|
|
478
|
+
*/
|
|
479
|
+
delay(ms) {
|
|
480
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
481
|
+
}
|
|
482
|
+
/**
|
|
483
|
+
* Get current configuration
|
|
484
|
+
*/
|
|
485
|
+
getConfig() {
|
|
486
|
+
return { ...this.config };
|
|
487
|
+
}
|
|
488
|
+
/**
|
|
489
|
+
* Update configuration
|
|
490
|
+
*/
|
|
491
|
+
updateConfig(updates) {
|
|
492
|
+
this.config = { ...this.config, ...updates };
|
|
493
|
+
}
|
|
494
|
+
/**
|
|
495
|
+
* Get current memory statistics
|
|
496
|
+
*/
|
|
497
|
+
getMemoryStats() {
|
|
498
|
+
return this.memoryMonitor.getStats();
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
// =============================================================================
|
|
502
|
+
// FACTORY FUNCTIONS
|
|
503
|
+
// =============================================================================
|
|
504
|
+
/**
|
|
505
|
+
* Create a batch processing optimizer with default configuration
|
|
506
|
+
*/
|
|
507
|
+
export function createBatchProcessor(config) {
|
|
508
|
+
return new BatchProcessingOptimizer(config);
|
|
509
|
+
}
|
|
510
|
+
/**
|
|
511
|
+
* Create a batch processing optimizer optimized for large image collections
|
|
512
|
+
*/
|
|
513
|
+
export function createImageBatchProcessor() {
|
|
514
|
+
return new BatchProcessingOptimizer({
|
|
515
|
+
imageBatchSize: 2, // Very small batches for memory efficiency
|
|
516
|
+
textBatchSize: 8,
|
|
517
|
+
memoryThresholdMB: 512, // Higher threshold for memory-intensive image processing
|
|
518
|
+
enableMemoryMonitoring: true,
|
|
519
|
+
enableGarbageCollection: true,
|
|
520
|
+
enableParallelProcessing: false, // Sequential for better memory control
|
|
521
|
+
progressReportInterval: 2 // More frequent progress reports
|
|
522
|
+
});
|
|
523
|
+
}
|
|
524
|
+
/**
|
|
525
|
+
* Create a batch processing optimizer optimized for text processing
|
|
526
|
+
*/
|
|
527
|
+
export function createTextBatchProcessor() {
|
|
528
|
+
return new BatchProcessingOptimizer({
|
|
529
|
+
textBatchSize: 32, // Larger batches for text
|
|
530
|
+
imageBatchSize: 4,
|
|
531
|
+
enableParallelProcessing: true, // Parallel processing for text
|
|
532
|
+
memoryThresholdMB: 256, // Lower threshold sufficient for text processing
|
|
533
|
+
progressReportInterval: 10
|
|
534
|
+
});
|
|
535
|
+
}
|
|
536
|
+
//# sourceMappingURL=batch-processing-optimizer.js.map
|