rag-lite-ts 1.0.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +606 -93
- package/dist/cli/indexer.js +192 -4
- package/dist/cli/search.js +50 -11
- package/dist/cli.js +183 -26
- package/dist/core/abstract-embedder.d.ts +125 -0
- package/dist/core/abstract-embedder.js +264 -0
- package/dist/core/actionable-error-messages.d.ts +60 -0
- package/dist/core/actionable-error-messages.js +397 -0
- package/dist/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/core/batch-processing-optimizer.js +541 -0
- package/dist/core/chunker.d.ts +2 -0
- package/dist/core/cli-database-utils.d.ts +53 -0
- package/dist/core/cli-database-utils.js +239 -0
- package/dist/core/config.js +10 -3
- package/dist/core/content-errors.d.ts +111 -0
- package/dist/core/content-errors.js +362 -0
- package/dist/core/content-manager.d.ts +343 -0
- package/dist/core/content-manager.js +1504 -0
- package/dist/core/content-performance-optimizer.d.ts +150 -0
- package/dist/core/content-performance-optimizer.js +516 -0
- package/dist/core/content-resolver.d.ts +104 -0
- package/dist/core/content-resolver.js +285 -0
- package/dist/core/cross-modal-search.d.ts +164 -0
- package/dist/core/cross-modal-search.js +342 -0
- package/dist/core/database-connection-manager.d.ts +109 -0
- package/dist/core/database-connection-manager.js +304 -0
- package/dist/core/db.d.ts +141 -2
- package/dist/core/db.js +631 -89
- package/dist/core/embedder-factory.d.ts +176 -0
- package/dist/core/embedder-factory.js +338 -0
- package/dist/core/index.d.ts +3 -1
- package/dist/core/index.js +4 -1
- package/dist/core/ingestion.d.ts +85 -15
- package/dist/core/ingestion.js +510 -45
- package/dist/core/lazy-dependency-loader.d.ts +152 -0
- package/dist/core/lazy-dependency-loader.js +453 -0
- package/dist/core/mode-detection-service.d.ts +150 -0
- package/dist/core/mode-detection-service.js +565 -0
- package/dist/core/mode-model-validator.d.ts +92 -0
- package/dist/core/mode-model-validator.js +203 -0
- package/dist/core/model-registry.d.ts +120 -0
- package/dist/core/model-registry.js +415 -0
- package/dist/core/model-validator.d.ts +217 -0
- package/dist/core/model-validator.js +782 -0
- package/dist/core/polymorphic-search-factory.d.ts +154 -0
- package/dist/core/polymorphic-search-factory.js +344 -0
- package/dist/core/raglite-paths.d.ts +121 -0
- package/dist/core/raglite-paths.js +145 -0
- package/dist/core/reranking-config.d.ts +42 -0
- package/dist/core/reranking-config.js +156 -0
- package/dist/core/reranking-factory.d.ts +92 -0
- package/dist/core/reranking-factory.js +591 -0
- package/dist/core/reranking-strategies.d.ts +325 -0
- package/dist/core/reranking-strategies.js +720 -0
- package/dist/core/resource-cleanup.d.ts +163 -0
- package/dist/core/resource-cleanup.js +371 -0
- package/dist/core/resource-manager.d.ts +212 -0
- package/dist/core/resource-manager.js +564 -0
- package/dist/core/search.d.ts +28 -1
- package/dist/core/search.js +83 -5
- package/dist/core/streaming-operations.d.ts +145 -0
- package/dist/core/streaming-operations.js +409 -0
- package/dist/core/types.d.ts +3 -0
- package/dist/core/universal-embedder.d.ts +177 -0
- package/dist/core/universal-embedder.js +139 -0
- package/dist/core/validation-messages.d.ts +99 -0
- package/dist/core/validation-messages.js +334 -0
- package/dist/core/vector-index.js +7 -8
- package/dist/factories/index.d.ts +1 -1
- package/dist/factories/text-factory.d.ts +128 -34
- package/dist/factories/text-factory.js +346 -97
- package/dist/file-processor.d.ts +88 -2
- package/dist/file-processor.js +720 -17
- package/dist/index.d.ts +9 -0
- package/dist/index.js +11 -0
- package/dist/ingestion.d.ts +16 -0
- package/dist/ingestion.js +21 -0
- package/dist/mcp-server.d.ts +35 -3
- package/dist/mcp-server.js +1107 -31
- package/dist/multimodal/clip-embedder.d.ts +314 -0
- package/dist/multimodal/clip-embedder.js +945 -0
- package/dist/multimodal/index.d.ts +6 -0
- package/dist/multimodal/index.js +6 -0
- package/dist/run-error-recovery-tests.d.ts +7 -0
- package/dist/run-error-recovery-tests.js +101 -0
- package/dist/search.d.ts +26 -0
- package/dist/search.js +54 -1
- package/dist/test-utils.d.ts +8 -26
- package/dist/text/chunker.d.ts +1 -0
- package/dist/text/embedder.js +15 -8
- package/dist/text/index.d.ts +1 -0
- package/dist/text/index.js +1 -0
- package/dist/text/reranker.d.ts +1 -2
- package/dist/text/reranker.js +17 -47
- package/dist/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/text/sentence-transformer-embedder.js +340 -0
- package/dist/types.d.ts +39 -0
- package/dist/utils/vector-math.d.ts +31 -0
- package/dist/utils/vector-math.js +70 -0
- package/package.json +15 -3
- package/dist/api-errors.d.ts.map +0 -1
- package/dist/api-errors.js.map +0 -1
- package/dist/cli/indexer.d.ts.map +0 -1
- package/dist/cli/indexer.js.map +0 -1
- package/dist/cli/search.d.ts.map +0 -1
- package/dist/cli/search.js.map +0 -1
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js.map +0 -1
- package/dist/core/adapters.d.ts.map +0 -1
- package/dist/core/adapters.js.map +0 -1
- package/dist/core/chunker.d.ts.map +0 -1
- package/dist/core/chunker.js.map +0 -1
- package/dist/core/config.d.ts.map +0 -1
- package/dist/core/config.js.map +0 -1
- package/dist/core/db.d.ts.map +0 -1
- package/dist/core/db.js.map +0 -1
- package/dist/core/error-handler.d.ts.map +0 -1
- package/dist/core/error-handler.js.map +0 -1
- package/dist/core/index.d.ts.map +0 -1
- package/dist/core/index.js.map +0 -1
- package/dist/core/ingestion.d.ts.map +0 -1
- package/dist/core/ingestion.js.map +0 -1
- package/dist/core/interfaces.d.ts.map +0 -1
- package/dist/core/interfaces.js.map +0 -1
- package/dist/core/path-manager.d.ts.map +0 -1
- package/dist/core/path-manager.js.map +0 -1
- package/dist/core/search-example.d.ts +0 -25
- package/dist/core/search-example.d.ts.map +0 -1
- package/dist/core/search-example.js +0 -138
- package/dist/core/search-example.js.map +0 -1
- package/dist/core/search-pipeline-example.d.ts +0 -21
- package/dist/core/search-pipeline-example.d.ts.map +0 -1
- package/dist/core/search-pipeline-example.js +0 -188
- package/dist/core/search-pipeline-example.js.map +0 -1
- package/dist/core/search-pipeline.d.ts.map +0 -1
- package/dist/core/search-pipeline.js.map +0 -1
- package/dist/core/search.d.ts.map +0 -1
- package/dist/core/search.js.map +0 -1
- package/dist/core/types.d.ts.map +0 -1
- package/dist/core/types.js.map +0 -1
- package/dist/core/vector-index.d.ts.map +0 -1
- package/dist/core/vector-index.js.map +0 -1
- package/dist/dom-polyfills.d.ts.map +0 -1
- package/dist/dom-polyfills.js.map +0 -1
- package/dist/examples/clean-api-examples.d.ts +0 -44
- package/dist/examples/clean-api-examples.d.ts.map +0 -1
- package/dist/examples/clean-api-examples.js +0 -206
- package/dist/examples/clean-api-examples.js.map +0 -1
- package/dist/factories/index.d.ts.map +0 -1
- package/dist/factories/index.js.map +0 -1
- package/dist/factories/text-factory.d.ts.map +0 -1
- package/dist/factories/text-factory.js.map +0 -1
- package/dist/file-processor.d.ts.map +0 -1
- package/dist/file-processor.js.map +0 -1
- package/dist/index-manager.d.ts.map +0 -1
- package/dist/index-manager.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/indexer.d.ts.map +0 -1
- package/dist/indexer.js.map +0 -1
- package/dist/ingestion.d.ts.map +0 -1
- package/dist/ingestion.js.map +0 -1
- package/dist/mcp-server.d.ts.map +0 -1
- package/dist/mcp-server.js.map +0 -1
- package/dist/preprocess.d.ts.map +0 -1
- package/dist/preprocess.js.map +0 -1
- package/dist/preprocessors/index.d.ts.map +0 -1
- package/dist/preprocessors/index.js.map +0 -1
- package/dist/preprocessors/mdx.d.ts.map +0 -1
- package/dist/preprocessors/mdx.js.map +0 -1
- package/dist/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/preprocessors/mermaid.js.map +0 -1
- package/dist/preprocessors/registry.d.ts.map +0 -1
- package/dist/preprocessors/registry.js.map +0 -1
- package/dist/search-standalone.d.ts.map +0 -1
- package/dist/search-standalone.js.map +0 -1
- package/dist/search.d.ts.map +0 -1
- package/dist/search.js.map +0 -1
- package/dist/test-utils.d.ts.map +0 -1
- package/dist/test-utils.js.map +0 -1
- package/dist/text/chunker.d.ts.map +0 -1
- package/dist/text/chunker.js.map +0 -1
- package/dist/text/embedder.d.ts.map +0 -1
- package/dist/text/embedder.js.map +0 -1
- package/dist/text/index.d.ts.map +0 -1
- package/dist/text/index.js.map +0 -1
- package/dist/text/preprocessors/index.d.ts.map +0 -1
- package/dist/text/preprocessors/index.js.map +0 -1
- package/dist/text/preprocessors/mdx.d.ts.map +0 -1
- package/dist/text/preprocessors/mdx.js.map +0 -1
- package/dist/text/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/text/preprocessors/mermaid.js.map +0 -1
- package/dist/text/preprocessors/registry.d.ts.map +0 -1
- package/dist/text/preprocessors/registry.js.map +0 -1
- package/dist/text/reranker.d.ts.map +0 -1
- package/dist/text/reranker.js.map +0 -1
- package/dist/text/tokenizer.d.ts.map +0 -1
- package/dist/text/tokenizer.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
|
@@ -0,0 +1,541 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Batch Processing Optimizer
|
|
3
|
+
* Optimizes embedding generation for large multimodal content batches
|
|
4
|
+
* Implements efficient image processing pipelines with progress reporting
|
|
5
|
+
* Creates memory-efficient processing for large image collections
|
|
6
|
+
*/
|
|
7
|
+
import { LazyMultimodalLoader } from './lazy-dependency-loader.js';
|
|
8
|
+
import { createError } from './error-handler.js';
|
|
9
|
+
import { getResourceManager } from './resource-manager.js';
|
|
10
|
+
/**
|
|
11
|
+
* Default batch processing configuration optimized for multimodal content
|
|
12
|
+
*/
|
|
13
|
+
export const DEFAULT_BATCH_CONFIG = {
|
|
14
|
+
// Conservative batch sizes for memory efficiency
|
|
15
|
+
textBatchSize: 16,
|
|
16
|
+
imageBatchSize: 4, // Smaller for memory-intensive image processing
|
|
17
|
+
maxConcurrentBatches: 2,
|
|
18
|
+
// Memory management (256MB threshold)
|
|
19
|
+
memoryThresholdMB: 256,
|
|
20
|
+
enableMemoryMonitoring: true,
|
|
21
|
+
enableGarbageCollection: true,
|
|
22
|
+
// Progress reporting every 5 batches
|
|
23
|
+
enableProgressReporting: true,
|
|
24
|
+
progressReportInterval: 5,
|
|
25
|
+
// Error handling with retries
|
|
26
|
+
maxRetries: 3,
|
|
27
|
+
retryDelayMs: 1000,
|
|
28
|
+
enableFallbackProcessing: true,
|
|
29
|
+
// Performance optimization
|
|
30
|
+
enableParallelProcessing: true,
|
|
31
|
+
enableResourcePooling: true,
|
|
32
|
+
preloadModels: false // Lazy loading by default
|
|
33
|
+
};
|
|
34
|
+
// =============================================================================
|
|
35
|
+
// MEMORY MONITORING
|
|
36
|
+
// =============================================================================
|
|
37
|
+
/**
|
|
38
|
+
* Memory monitoring utilities for batch processing
|
|
39
|
+
*/
|
|
40
|
+
class MemoryMonitor {
|
|
41
|
+
initialMemoryMB;
|
|
42
|
+
peakMemoryMB;
|
|
43
|
+
constructor() {
|
|
44
|
+
this.initialMemoryMB = this.getCurrentMemoryUsageMB();
|
|
45
|
+
this.peakMemoryMB = this.initialMemoryMB;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Get current memory usage in MB
|
|
49
|
+
*/
|
|
50
|
+
getCurrentMemoryUsageMB() {
|
|
51
|
+
const usage = process.memoryUsage();
|
|
52
|
+
return Math.round(usage.heapUsed / 1024 / 1024);
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Update peak memory usage
|
|
56
|
+
*/
|
|
57
|
+
updatePeakMemory() {
|
|
58
|
+
const current = this.getCurrentMemoryUsageMB();
|
|
59
|
+
if (current > this.peakMemoryMB) {
|
|
60
|
+
this.peakMemoryMB = current;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Check if memory usage exceeds threshold
|
|
65
|
+
*/
|
|
66
|
+
isMemoryThresholdExceeded(thresholdMB) {
|
|
67
|
+
return this.getCurrentMemoryUsageMB() > thresholdMB;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Force garbage collection if enabled
|
|
71
|
+
*/
|
|
72
|
+
forceGarbageCollection() {
|
|
73
|
+
if (global.gc) {
|
|
74
|
+
global.gc();
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Get memory statistics
|
|
79
|
+
*/
|
|
80
|
+
getStats() {
|
|
81
|
+
return {
|
|
82
|
+
currentMB: this.getCurrentMemoryUsageMB(),
|
|
83
|
+
peakMB: this.peakMemoryMB,
|
|
84
|
+
initialMB: this.initialMemoryMB
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
// =============================================================================
|
|
89
|
+
// BATCH PROCESSING OPTIMIZER
|
|
90
|
+
// =============================================================================
|
|
91
|
+
/**
|
|
92
|
+
* Optimized batch processor for multimodal content
|
|
93
|
+
* Handles large collections of text and image content efficiently
|
|
94
|
+
*/
|
|
95
|
+
export class BatchProcessingOptimizer {
|
|
96
|
+
config;
|
|
97
|
+
memoryMonitor;
|
|
98
|
+
resourcePool = new Map();
|
|
99
|
+
resourceManager = getResourceManager();
|
|
100
|
+
constructor(config = {}) {
|
|
101
|
+
this.config = { ...DEFAULT_BATCH_CONFIG, ...config };
|
|
102
|
+
this.memoryMonitor = new MemoryMonitor();
|
|
103
|
+
}
|
|
104
|
+
// =============================================================================
|
|
105
|
+
// PUBLIC API
|
|
106
|
+
// =============================================================================
|
|
107
|
+
/**
|
|
108
|
+
* Process a large batch of multimodal content with optimization
|
|
109
|
+
*/
|
|
110
|
+
async processBatch(items, embedFunction, progressCallback) {
|
|
111
|
+
const startTime = Date.now();
|
|
112
|
+
// Initialize statistics
|
|
113
|
+
const stats = {
|
|
114
|
+
totalItems: items.length,
|
|
115
|
+
processedItems: 0,
|
|
116
|
+
failedItems: 0,
|
|
117
|
+
skippedItems: 0,
|
|
118
|
+
totalBatches: 0,
|
|
119
|
+
completedBatches: 0,
|
|
120
|
+
failedBatches: 0,
|
|
121
|
+
processingTimeMs: 0,
|
|
122
|
+
averageBatchTimeMs: 0,
|
|
123
|
+
itemsPerSecond: 0,
|
|
124
|
+
memoryUsageMB: this.memoryMonitor.getCurrentMemoryUsageMB(),
|
|
125
|
+
peakMemoryUsageMB: this.memoryMonitor.getCurrentMemoryUsageMB(),
|
|
126
|
+
retryCount: 0,
|
|
127
|
+
fallbackCount: 0
|
|
128
|
+
};
|
|
129
|
+
const results = [];
|
|
130
|
+
const errors = [];
|
|
131
|
+
try {
|
|
132
|
+
// Preload models if configured
|
|
133
|
+
if (this.config.preloadModels) {
|
|
134
|
+
await this.preloadRequiredModels(items);
|
|
135
|
+
}
|
|
136
|
+
// Separate items by content type for optimized processing
|
|
137
|
+
const textItems = items.filter(item => item.contentType === 'text');
|
|
138
|
+
const imageItems = items.filter(item => item.contentType === 'image');
|
|
139
|
+
// Process text items in optimized batches
|
|
140
|
+
if (textItems.length > 0) {
|
|
141
|
+
const textResults = await this.processTextBatches(textItems, embedFunction, stats, errors, progressCallback);
|
|
142
|
+
results.push(...textResults);
|
|
143
|
+
}
|
|
144
|
+
// Process image items in optimized batches
|
|
145
|
+
if (imageItems.length > 0) {
|
|
146
|
+
const imageResults = await this.processImageBatches(imageItems, embedFunction, stats, errors, progressCallback);
|
|
147
|
+
results.push(...imageResults);
|
|
148
|
+
}
|
|
149
|
+
// Calculate final statistics
|
|
150
|
+
const endTime = Date.now();
|
|
151
|
+
stats.processingTimeMs = endTime - startTime;
|
|
152
|
+
stats.averageBatchTimeMs = stats.totalBatches > 0 ? stats.processingTimeMs / stats.totalBatches : 0;
|
|
153
|
+
stats.itemsPerSecond = stats.processingTimeMs > 0 ? (stats.processedItems / stats.processingTimeMs) * 1000 : 0;
|
|
154
|
+
const memoryStats = this.memoryMonitor.getStats();
|
|
155
|
+
stats.memoryUsageMB = memoryStats.currentMB;
|
|
156
|
+
stats.peakMemoryUsageMB = memoryStats.peakMB;
|
|
157
|
+
// Final progress report
|
|
158
|
+
if (progressCallback && this.config.enableProgressReporting) {
|
|
159
|
+
progressCallback(stats);
|
|
160
|
+
}
|
|
161
|
+
return { results, stats, errors };
|
|
162
|
+
}
|
|
163
|
+
catch (error) {
|
|
164
|
+
throw createError.model(`Batch processing failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
165
|
+
}
|
|
166
|
+
finally {
|
|
167
|
+
// Cleanup resources
|
|
168
|
+
await this.cleanupResources();
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
// =============================================================================
|
|
172
|
+
// TEXT BATCH PROCESSING
|
|
173
|
+
// =============================================================================
|
|
174
|
+
/**
|
|
175
|
+
* Process text items in optimized batches
|
|
176
|
+
*/
|
|
177
|
+
async processTextBatches(textItems, embedFunction, stats, errors, progressCallback) {
|
|
178
|
+
const results = [];
|
|
179
|
+
const batchSize = this.config.textBatchSize;
|
|
180
|
+
const totalBatches = Math.ceil(textItems.length / batchSize);
|
|
181
|
+
console.log(`Processing ${textItems.length} text items in ${totalBatches} batches (batch size: ${batchSize})`);
|
|
182
|
+
for (let i = 0; i < textItems.length; i += batchSize) {
|
|
183
|
+
const batch = textItems.slice(i, i + batchSize);
|
|
184
|
+
const batchIndex = Math.floor(i / batchSize);
|
|
185
|
+
stats.totalBatches++;
|
|
186
|
+
try {
|
|
187
|
+
const batchResults = await this.processTextBatch(batch, embedFunction, batchIndex, stats, errors);
|
|
188
|
+
results.push(...batchResults);
|
|
189
|
+
stats.completedBatches++;
|
|
190
|
+
// Memory management
|
|
191
|
+
await this.performMemoryManagement();
|
|
192
|
+
// Progress reporting
|
|
193
|
+
if (progressCallback && this.shouldReportProgress(batchIndex)) {
|
|
194
|
+
progressCallback({ ...stats });
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
catch (error) {
|
|
198
|
+
stats.failedBatches++;
|
|
199
|
+
console.warn(`Text batch ${batchIndex + 1}/${totalBatches} failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
200
|
+
// Try fallback processing if enabled
|
|
201
|
+
if (this.config.enableFallbackProcessing) {
|
|
202
|
+
const fallbackResults = await this.processBatchWithFallback(batch, embedFunction, batchIndex, stats, errors);
|
|
203
|
+
results.push(...fallbackResults);
|
|
204
|
+
stats.fallbackCount++;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
return results;
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Process a single text batch with error handling
|
|
212
|
+
*/
|
|
213
|
+
async processTextBatch(batch, embedFunction, batchIndex, stats, errors) {
|
|
214
|
+
const batchStartTime = Date.now();
|
|
215
|
+
try {
|
|
216
|
+
// Process batch items in parallel if enabled
|
|
217
|
+
if (this.config.enableParallelProcessing) {
|
|
218
|
+
const promises = batch.map(async (item, itemIndex) => {
|
|
219
|
+
try {
|
|
220
|
+
const result = await embedFunction(item);
|
|
221
|
+
stats.processedItems++;
|
|
222
|
+
return result;
|
|
223
|
+
}
|
|
224
|
+
catch (error) {
|
|
225
|
+
stats.failedItems++;
|
|
226
|
+
errors.push({
|
|
227
|
+
item,
|
|
228
|
+
error: error instanceof Error ? error.message : String(error),
|
|
229
|
+
batchIndex,
|
|
230
|
+
itemIndex
|
|
231
|
+
});
|
|
232
|
+
return null;
|
|
233
|
+
}
|
|
234
|
+
});
|
|
235
|
+
const results = await Promise.all(promises);
|
|
236
|
+
return results.filter((result) => result !== null);
|
|
237
|
+
}
|
|
238
|
+
else {
|
|
239
|
+
// Sequential processing
|
|
240
|
+
const results = [];
|
|
241
|
+
for (let itemIndex = 0; itemIndex < batch.length; itemIndex++) {
|
|
242
|
+
const item = batch[itemIndex];
|
|
243
|
+
try {
|
|
244
|
+
const result = await embedFunction(item);
|
|
245
|
+
results.push(result);
|
|
246
|
+
stats.processedItems++;
|
|
247
|
+
}
|
|
248
|
+
catch (error) {
|
|
249
|
+
stats.failedItems++;
|
|
250
|
+
errors.push({
|
|
251
|
+
item,
|
|
252
|
+
error: error instanceof Error ? error.message : String(error),
|
|
253
|
+
batchIndex,
|
|
254
|
+
itemIndex
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
return results;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
finally {
|
|
262
|
+
// Update batch timing
|
|
263
|
+
const batchTime = Date.now() - batchStartTime;
|
|
264
|
+
stats.averageBatchTimeMs = ((stats.averageBatchTimeMs * (stats.completedBatches + stats.failedBatches)) + batchTime) / (stats.completedBatches + stats.failedBatches + 1);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
// =============================================================================
|
|
268
|
+
// IMAGE BATCH PROCESSING
|
|
269
|
+
// =============================================================================
|
|
270
|
+
/**
|
|
271
|
+
* Process image items in optimized batches with memory management
|
|
272
|
+
*/
|
|
273
|
+
async processImageBatches(imageItems, embedFunction, stats, errors, progressCallback) {
|
|
274
|
+
const results = [];
|
|
275
|
+
const batchSize = this.config.imageBatchSize;
|
|
276
|
+
const totalBatches = Math.ceil(imageItems.length / batchSize);
|
|
277
|
+
console.log(`Processing ${imageItems.length} image items in ${totalBatches} batches (batch size: ${batchSize})`);
|
|
278
|
+
// Preload image processing models
|
|
279
|
+
await this.preloadImageProcessingModels();
|
|
280
|
+
for (let i = 0; i < imageItems.length; i += batchSize) {
|
|
281
|
+
const batch = imageItems.slice(i, i + batchSize);
|
|
282
|
+
const batchIndex = Math.floor(i / batchSize) + Math.ceil(stats.totalBatches);
|
|
283
|
+
stats.totalBatches++;
|
|
284
|
+
try {
|
|
285
|
+
const batchResults = await this.processImageBatch(batch, embedFunction, batchIndex, stats, errors);
|
|
286
|
+
results.push(...batchResults);
|
|
287
|
+
stats.completedBatches++;
|
|
288
|
+
// Aggressive memory management for images
|
|
289
|
+
await this.performMemoryManagement(true);
|
|
290
|
+
// Progress reporting
|
|
291
|
+
if (progressCallback && this.shouldReportProgress(batchIndex)) {
|
|
292
|
+
progressCallback({ ...stats });
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
catch (error) {
|
|
296
|
+
stats.failedBatches++;
|
|
297
|
+
console.warn(`Image batch ${batchIndex + 1} failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
298
|
+
// Try fallback processing if enabled
|
|
299
|
+
if (this.config.enableFallbackProcessing) {
|
|
300
|
+
const fallbackResults = await this.processBatchWithFallback(batch, embedFunction, batchIndex, stats, errors);
|
|
301
|
+
results.push(...fallbackResults);
|
|
302
|
+
stats.fallbackCount++;
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
return results;
|
|
307
|
+
}
|
|
308
|
+
/**
|
|
309
|
+
* Process a single image batch with memory optimization
|
|
310
|
+
*/
|
|
311
|
+
async processImageBatch(batch, embedFunction, batchIndex, stats, errors) {
|
|
312
|
+
const batchStartTime = Date.now();
|
|
313
|
+
try {
|
|
314
|
+
// For images, use sequential processing to manage memory better
|
|
315
|
+
const results = [];
|
|
316
|
+
for (let itemIndex = 0; itemIndex < batch.length; itemIndex++) {
|
|
317
|
+
const item = batch[itemIndex];
|
|
318
|
+
try {
|
|
319
|
+
// Check memory before processing each image
|
|
320
|
+
if (this.memoryMonitor.isMemoryThresholdExceeded(this.config.memoryThresholdMB)) {
|
|
321
|
+
console.warn(`Memory threshold exceeded (${this.memoryMonitor.getCurrentMemoryUsageMB()}MB), forcing garbage collection`);
|
|
322
|
+
this.memoryMonitor.forceGarbageCollection();
|
|
323
|
+
}
|
|
324
|
+
const result = await embedFunction(item);
|
|
325
|
+
results.push(result);
|
|
326
|
+
stats.processedItems++;
|
|
327
|
+
// Update memory tracking
|
|
328
|
+
this.memoryMonitor.updatePeakMemory();
|
|
329
|
+
}
|
|
330
|
+
catch (error) {
|
|
331
|
+
stats.failedItems++;
|
|
332
|
+
errors.push({
|
|
333
|
+
item,
|
|
334
|
+
error: error instanceof Error ? error.message : String(error),
|
|
335
|
+
batchIndex,
|
|
336
|
+
itemIndex
|
|
337
|
+
});
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
return results;
|
|
341
|
+
}
|
|
342
|
+
finally {
|
|
343
|
+
// Update batch timing
|
|
344
|
+
const batchTime = Date.now() - batchStartTime;
|
|
345
|
+
stats.averageBatchTimeMs = ((stats.averageBatchTimeMs * (stats.completedBatches + stats.failedBatches)) + batchTime) / (stats.completedBatches + stats.failedBatches + 1);
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
// =============================================================================
|
|
349
|
+
// FALLBACK PROCESSING
|
|
350
|
+
// =============================================================================
|
|
351
|
+
/**
|
|
352
|
+
* Process batch with fallback to individual item processing
|
|
353
|
+
*/
|
|
354
|
+
async processBatchWithFallback(batch, embedFunction, batchIndex, stats, errors) {
|
|
355
|
+
console.log(`Attempting fallback processing for batch ${batchIndex} (${batch.length} items)`);
|
|
356
|
+
const results = [];
|
|
357
|
+
for (let itemIndex = 0; itemIndex < batch.length; itemIndex++) {
|
|
358
|
+
const item = batch[itemIndex];
|
|
359
|
+
let retryCount = 0;
|
|
360
|
+
while (retryCount <= this.config.maxRetries) {
|
|
361
|
+
try {
|
|
362
|
+
const result = await embedFunction(item);
|
|
363
|
+
results.push(result);
|
|
364
|
+
stats.processedItems++;
|
|
365
|
+
break;
|
|
366
|
+
}
|
|
367
|
+
catch (error) {
|
|
368
|
+
retryCount++;
|
|
369
|
+
stats.retryCount++;
|
|
370
|
+
if (retryCount <= this.config.maxRetries) {
|
|
371
|
+
console.warn(`Retry ${retryCount}/${this.config.maxRetries} for item ${itemIndex} in batch ${batchIndex}`);
|
|
372
|
+
await this.delay(this.config.retryDelayMs);
|
|
373
|
+
}
|
|
374
|
+
else {
|
|
375
|
+
stats.failedItems++;
|
|
376
|
+
errors.push({
|
|
377
|
+
item,
|
|
378
|
+
error: error instanceof Error ? error.message : String(error),
|
|
379
|
+
batchIndex,
|
|
380
|
+
itemIndex
|
|
381
|
+
});
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
return results;
|
|
387
|
+
}
|
|
388
|
+
// =============================================================================
|
|
389
|
+
// RESOURCE MANAGEMENT
|
|
390
|
+
// =============================================================================
|
|
391
|
+
/**
|
|
392
|
+
* Preload required models based on content types
|
|
393
|
+
*/
|
|
394
|
+
async preloadRequiredModels(items) {
|
|
395
|
+
const hasImages = items.some(item => item.contentType === 'image');
|
|
396
|
+
if (hasImages) {
|
|
397
|
+
await this.preloadImageProcessingModels();
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
/**
|
|
401
|
+
* Preload image processing models
|
|
402
|
+
*/
|
|
403
|
+
async preloadImageProcessingModels() {
|
|
404
|
+
try {
|
|
405
|
+
if (!this.resourcePool.has('imageToText')) {
|
|
406
|
+
console.log('Preloading image-to-text processor...');
|
|
407
|
+
const processor = await LazyMultimodalLoader.loadImageToTextProcessor();
|
|
408
|
+
this.resourcePool.set('imageToText', processor);
|
|
409
|
+
// Register with resource manager
|
|
410
|
+
this.resourceManager.registerImageProcessor(processor, 'image-to-text');
|
|
411
|
+
}
|
|
412
|
+
if (!this.resourcePool.has('metadataExtractor')) {
|
|
413
|
+
console.log('Preloading image metadata extractor...');
|
|
414
|
+
const extractor = await LazyMultimodalLoader.loadImageMetadataExtractor();
|
|
415
|
+
this.resourcePool.set('metadataExtractor', extractor);
|
|
416
|
+
// Register with resource manager
|
|
417
|
+
this.resourceManager.registerImageProcessor(extractor, 'metadata-extractor');
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
catch (error) {
|
|
421
|
+
console.warn(`Failed to preload image processing models: ${error instanceof Error ? error.message : String(error)}`);
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
/**
|
|
425
|
+
* Perform memory management operations
|
|
426
|
+
*/
|
|
427
|
+
async performMemoryManagement(aggressive = false) {
|
|
428
|
+
if (!this.config.enableMemoryMonitoring) {
|
|
429
|
+
return;
|
|
430
|
+
}
|
|
431
|
+
const currentMemory = this.memoryMonitor.getCurrentMemoryUsageMB();
|
|
432
|
+
// Force garbage collection if memory threshold exceeded or aggressive mode
|
|
433
|
+
if (aggressive || this.memoryMonitor.isMemoryThresholdExceeded(this.config.memoryThresholdMB)) {
|
|
434
|
+
if (this.config.enableGarbageCollection) {
|
|
435
|
+
this.memoryMonitor.forceGarbageCollection();
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
// Update peak memory tracking
|
|
439
|
+
this.memoryMonitor.updatePeakMemory();
|
|
440
|
+
}
|
|
441
|
+
/**
|
|
442
|
+
* Cleanup resources after processing with resource manager integration
|
|
443
|
+
*/
|
|
444
|
+
async cleanupResources() {
|
|
445
|
+
try {
|
|
446
|
+
// Clear resource pool if not using resource pooling
|
|
447
|
+
if (!this.config.enableResourcePooling) {
|
|
448
|
+
// Clean up registered processors
|
|
449
|
+
for (const [key, processor] of this.resourcePool) {
|
|
450
|
+
try {
|
|
451
|
+
// The resource manager will handle proper cleanup
|
|
452
|
+
if (processor && typeof processor.cleanup === 'function') {
|
|
453
|
+
await processor.cleanup();
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
catch (error) {
|
|
457
|
+
console.warn(`Failed to cleanup processor ${key}: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
this.resourcePool.clear();
|
|
461
|
+
}
|
|
462
|
+
// Use resource manager for memory optimization
|
|
463
|
+
if (this.config.enableGarbageCollection) {
|
|
464
|
+
await this.resourceManager.optimizeMemory();
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
catch (error) {
|
|
468
|
+
console.warn(`Error during batch processing cleanup: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
// =============================================================================
|
|
472
|
+
// UTILITY METHODS
|
|
473
|
+
// =============================================================================
|
|
474
|
+
/**
|
|
475
|
+
* Check if progress should be reported for this batch
|
|
476
|
+
*/
|
|
477
|
+
shouldReportProgress(batchIndex) {
|
|
478
|
+
return this.config.enableProgressReporting &&
|
|
479
|
+
(batchIndex + 1) % this.config.progressReportInterval === 0;
|
|
480
|
+
}
|
|
481
|
+
/**
|
|
482
|
+
* Delay execution for specified milliseconds
|
|
483
|
+
*/
|
|
484
|
+
delay(ms) {
|
|
485
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
486
|
+
}
|
|
487
|
+
/**
|
|
488
|
+
* Get current configuration
|
|
489
|
+
*/
|
|
490
|
+
getConfig() {
|
|
491
|
+
return { ...this.config };
|
|
492
|
+
}
|
|
493
|
+
/**
|
|
494
|
+
* Update configuration
|
|
495
|
+
*/
|
|
496
|
+
updateConfig(updates) {
|
|
497
|
+
this.config = { ...this.config, ...updates };
|
|
498
|
+
}
|
|
499
|
+
/**
|
|
500
|
+
* Get current memory statistics
|
|
501
|
+
*/
|
|
502
|
+
getMemoryStats() {
|
|
503
|
+
return this.memoryMonitor.getStats();
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
// =============================================================================
|
|
507
|
+
// FACTORY FUNCTIONS
|
|
508
|
+
// =============================================================================
|
|
509
|
+
/**
|
|
510
|
+
* Create a batch processing optimizer with default configuration
|
|
511
|
+
*/
|
|
512
|
+
export function createBatchProcessor(config) {
|
|
513
|
+
return new BatchProcessingOptimizer(config);
|
|
514
|
+
}
|
|
515
|
+
/**
|
|
516
|
+
* Create a batch processing optimizer optimized for large image collections
|
|
517
|
+
*/
|
|
518
|
+
export function createImageBatchProcessor() {
|
|
519
|
+
return new BatchProcessingOptimizer({
|
|
520
|
+
imageBatchSize: 2, // Very small batches for memory efficiency
|
|
521
|
+
textBatchSize: 8,
|
|
522
|
+
memoryThresholdMB: 128, // Lower threshold for images
|
|
523
|
+
enableMemoryMonitoring: true,
|
|
524
|
+
enableGarbageCollection: true,
|
|
525
|
+
enableParallelProcessing: false, // Sequential for better memory control
|
|
526
|
+
progressReportInterval: 2 // More frequent progress reports
|
|
527
|
+
});
|
|
528
|
+
}
|
|
529
|
+
/**
|
|
530
|
+
* Create a batch processing optimizer optimized for text processing
|
|
531
|
+
*/
|
|
532
|
+
export function createTextBatchProcessor() {
|
|
533
|
+
return new BatchProcessingOptimizer({
|
|
534
|
+
textBatchSize: 32, // Larger batches for text
|
|
535
|
+
imageBatchSize: 4,
|
|
536
|
+
enableParallelProcessing: true, // Parallel processing for text
|
|
537
|
+
memoryThresholdMB: 512, // Higher threshold for text
|
|
538
|
+
progressReportInterval: 10
|
|
539
|
+
});
|
|
540
|
+
}
|
|
541
|
+
//# sourceMappingURL=batch-processing-optimizer.js.map
|
package/dist/core/chunker.d.ts
CHANGED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI Database Utilities - Database access helpers for CLI commands
|
|
3
|
+
* Provides database locking detection and retry mechanisms for CLI operations
|
|
4
|
+
* Prevents conflicts between CLI commands and long-running processes like MCP server
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* CLI-specific database access options
|
|
8
|
+
*/
|
|
9
|
+
export interface CLIDatabaseOptions {
|
|
10
|
+
/** Maximum time to wait for database access (ms) */
|
|
11
|
+
maxWaitMs?: number;
|
|
12
|
+
/** Retry interval (ms) */
|
|
13
|
+
retryIntervalMs?: number;
|
|
14
|
+
/** Show progress messages to user */
|
|
15
|
+
showProgress?: boolean;
|
|
16
|
+
/** Command name for better error messages */
|
|
17
|
+
commandName?: string;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Wait for database to become available for CLI operations
|
|
21
|
+
* Provides user-friendly progress messages and error handling
|
|
22
|
+
*/
|
|
23
|
+
export declare function waitForCLIDatabaseAccess(dbPath: string, options?: CLIDatabaseOptions): Promise<void>;
|
|
24
|
+
/**
|
|
25
|
+
* Execute a CLI operation with database access protection
|
|
26
|
+
* Automatically handles database locking and provides user feedback
|
|
27
|
+
*/
|
|
28
|
+
export declare function withCLIDatabaseAccess<T>(dbPath: string, operation: () => Promise<T>, options?: CLIDatabaseOptions): Promise<T>;
|
|
29
|
+
/**
|
|
30
|
+
* Check if database is currently busy (non-blocking)
|
|
31
|
+
* Useful for showing warnings or status information
|
|
32
|
+
*/
|
|
33
|
+
export declare function isDatabaseBusy(dbPath: string): Promise<{
|
|
34
|
+
isBusy: boolean;
|
|
35
|
+
reason?: string;
|
|
36
|
+
suggestions?: string[];
|
|
37
|
+
}>;
|
|
38
|
+
/**
|
|
39
|
+
* Show database status information for debugging
|
|
40
|
+
* Useful for troubleshooting CLI issues
|
|
41
|
+
*/
|
|
42
|
+
export declare function showDatabaseStatus(dbPath: string): Promise<void>;
|
|
43
|
+
/**
|
|
44
|
+
* Force cleanup of database connections (emergency use only)
|
|
45
|
+
* Use with caution - only for recovery from stuck states
|
|
46
|
+
*/
|
|
47
|
+
export declare function forceCleanupDatabase(dbPath: string): Promise<void>;
|
|
48
|
+
/**
|
|
49
|
+
* Graceful shutdown helper for CLI commands
|
|
50
|
+
* Ensures proper cleanup when CLI commands are interrupted
|
|
51
|
+
*/
|
|
52
|
+
export declare function setupCLICleanup(dbPath?: string): void;
|
|
53
|
+
//# sourceMappingURL=cli-database-utils.d.ts.map
|