rag-lite-ts 1.0.2 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (208) hide show
  1. package/README.md +605 -93
  2. package/dist/cli/indexer.js +192 -4
  3. package/dist/cli/search.js +50 -11
  4. package/dist/cli.js +183 -26
  5. package/dist/core/abstract-embedder.d.ts +125 -0
  6. package/dist/core/abstract-embedder.js +264 -0
  7. package/dist/core/actionable-error-messages.d.ts +60 -0
  8. package/dist/core/actionable-error-messages.js +397 -0
  9. package/dist/core/batch-processing-optimizer.d.ts +155 -0
  10. package/dist/core/batch-processing-optimizer.js +541 -0
  11. package/dist/core/binary-index-format.d.ts +52 -0
  12. package/dist/core/binary-index-format.js +122 -0
  13. package/dist/core/chunker.d.ts +2 -0
  14. package/dist/core/cli-database-utils.d.ts +53 -0
  15. package/dist/core/cli-database-utils.js +239 -0
  16. package/dist/core/config.js +10 -3
  17. package/dist/core/content-errors.d.ts +111 -0
  18. package/dist/core/content-errors.js +362 -0
  19. package/dist/core/content-manager.d.ts +343 -0
  20. package/dist/core/content-manager.js +1504 -0
  21. package/dist/core/content-performance-optimizer.d.ts +150 -0
  22. package/dist/core/content-performance-optimizer.js +516 -0
  23. package/dist/core/content-resolver.d.ts +104 -0
  24. package/dist/core/content-resolver.js +285 -0
  25. package/dist/core/cross-modal-search.d.ts +164 -0
  26. package/dist/core/cross-modal-search.js +342 -0
  27. package/dist/core/database-connection-manager.d.ts +109 -0
  28. package/dist/core/database-connection-manager.js +304 -0
  29. package/dist/core/db.d.ts +141 -2
  30. package/dist/core/db.js +631 -89
  31. package/dist/core/embedder-factory.d.ts +176 -0
  32. package/dist/core/embedder-factory.js +338 -0
  33. package/dist/core/index.d.ts +3 -1
  34. package/dist/core/index.js +4 -1
  35. package/dist/core/ingestion.d.ts +85 -15
  36. package/dist/core/ingestion.js +510 -45
  37. package/dist/core/lazy-dependency-loader.d.ts +152 -0
  38. package/dist/core/lazy-dependency-loader.js +453 -0
  39. package/dist/core/mode-detection-service.d.ts +150 -0
  40. package/dist/core/mode-detection-service.js +565 -0
  41. package/dist/core/mode-model-validator.d.ts +92 -0
  42. package/dist/core/mode-model-validator.js +203 -0
  43. package/dist/core/model-registry.d.ts +120 -0
  44. package/dist/core/model-registry.js +415 -0
  45. package/dist/core/model-validator.d.ts +217 -0
  46. package/dist/core/model-validator.js +782 -0
  47. package/dist/core/polymorphic-search-factory.d.ts +154 -0
  48. package/dist/core/polymorphic-search-factory.js +344 -0
  49. package/dist/core/raglite-paths.d.ts +121 -0
  50. package/dist/core/raglite-paths.js +145 -0
  51. package/dist/core/reranking-config.d.ts +42 -0
  52. package/dist/core/reranking-config.js +156 -0
  53. package/dist/core/reranking-factory.d.ts +92 -0
  54. package/dist/core/reranking-factory.js +591 -0
  55. package/dist/core/reranking-strategies.d.ts +325 -0
  56. package/dist/core/reranking-strategies.js +720 -0
  57. package/dist/core/resource-cleanup.d.ts +163 -0
  58. package/dist/core/resource-cleanup.js +371 -0
  59. package/dist/core/resource-manager.d.ts +212 -0
  60. package/dist/core/resource-manager.js +564 -0
  61. package/dist/core/search.d.ts +28 -1
  62. package/dist/core/search.js +83 -5
  63. package/dist/core/streaming-operations.d.ts +145 -0
  64. package/dist/core/streaming-operations.js +409 -0
  65. package/dist/core/types.d.ts +3 -0
  66. package/dist/core/universal-embedder.d.ts +177 -0
  67. package/dist/core/universal-embedder.js +139 -0
  68. package/dist/core/validation-messages.d.ts +99 -0
  69. package/dist/core/validation-messages.js +334 -0
  70. package/dist/core/vector-index.d.ts +1 -1
  71. package/dist/core/vector-index.js +37 -39
  72. package/dist/factories/index.d.ts +3 -1
  73. package/dist/factories/index.js +2 -0
  74. package/dist/factories/polymorphic-factory.d.ts +50 -0
  75. package/dist/factories/polymorphic-factory.js +159 -0
  76. package/dist/factories/text-factory.d.ts +128 -34
  77. package/dist/factories/text-factory.js +346 -97
  78. package/dist/file-processor.d.ts +88 -2
  79. package/dist/file-processor.js +720 -17
  80. package/dist/index.d.ts +32 -0
  81. package/dist/index.js +29 -0
  82. package/dist/ingestion.d.ts +16 -0
  83. package/dist/ingestion.js +21 -0
  84. package/dist/mcp-server.d.ts +35 -3
  85. package/dist/mcp-server.js +1107 -31
  86. package/dist/multimodal/clip-embedder.d.ts +327 -0
  87. package/dist/multimodal/clip-embedder.js +992 -0
  88. package/dist/multimodal/index.d.ts +6 -0
  89. package/dist/multimodal/index.js +6 -0
  90. package/dist/run-error-recovery-tests.d.ts +7 -0
  91. package/dist/run-error-recovery-tests.js +101 -0
  92. package/dist/search.d.ts +60 -9
  93. package/dist/search.js +82 -11
  94. package/dist/test-utils.d.ts +8 -26
  95. package/dist/text/chunker.d.ts +1 -0
  96. package/dist/text/embedder.js +15 -8
  97. package/dist/text/index.d.ts +1 -0
  98. package/dist/text/index.js +1 -0
  99. package/dist/text/reranker.d.ts +1 -2
  100. package/dist/text/reranker.js +17 -47
  101. package/dist/text/sentence-transformer-embedder.d.ts +96 -0
  102. package/dist/text/sentence-transformer-embedder.js +340 -0
  103. package/dist/types.d.ts +39 -0
  104. package/dist/utils/vector-math.d.ts +31 -0
  105. package/dist/utils/vector-math.js +70 -0
  106. package/package.json +27 -6
  107. package/dist/api-errors.d.ts.map +0 -1
  108. package/dist/api-errors.js.map +0 -1
  109. package/dist/cli/indexer.d.ts.map +0 -1
  110. package/dist/cli/indexer.js.map +0 -1
  111. package/dist/cli/search.d.ts.map +0 -1
  112. package/dist/cli/search.js.map +0 -1
  113. package/dist/cli.d.ts.map +0 -1
  114. package/dist/cli.js.map +0 -1
  115. package/dist/config.d.ts.map +0 -1
  116. package/dist/config.js.map +0 -1
  117. package/dist/core/adapters.d.ts.map +0 -1
  118. package/dist/core/adapters.js.map +0 -1
  119. package/dist/core/chunker.d.ts.map +0 -1
  120. package/dist/core/chunker.js.map +0 -1
  121. package/dist/core/config.d.ts.map +0 -1
  122. package/dist/core/config.js.map +0 -1
  123. package/dist/core/db.d.ts.map +0 -1
  124. package/dist/core/db.js.map +0 -1
  125. package/dist/core/error-handler.d.ts.map +0 -1
  126. package/dist/core/error-handler.js.map +0 -1
  127. package/dist/core/index.d.ts.map +0 -1
  128. package/dist/core/index.js.map +0 -1
  129. package/dist/core/ingestion.d.ts.map +0 -1
  130. package/dist/core/ingestion.js.map +0 -1
  131. package/dist/core/interfaces.d.ts.map +0 -1
  132. package/dist/core/interfaces.js.map +0 -1
  133. package/dist/core/path-manager.d.ts.map +0 -1
  134. package/dist/core/path-manager.js.map +0 -1
  135. package/dist/core/search-example.d.ts +0 -25
  136. package/dist/core/search-example.d.ts.map +0 -1
  137. package/dist/core/search-example.js +0 -138
  138. package/dist/core/search-example.js.map +0 -1
  139. package/dist/core/search-pipeline-example.d.ts +0 -21
  140. package/dist/core/search-pipeline-example.d.ts.map +0 -1
  141. package/dist/core/search-pipeline-example.js +0 -188
  142. package/dist/core/search-pipeline-example.js.map +0 -1
  143. package/dist/core/search-pipeline.d.ts.map +0 -1
  144. package/dist/core/search-pipeline.js.map +0 -1
  145. package/dist/core/search.d.ts.map +0 -1
  146. package/dist/core/search.js.map +0 -1
  147. package/dist/core/types.d.ts.map +0 -1
  148. package/dist/core/types.js.map +0 -1
  149. package/dist/core/vector-index.d.ts.map +0 -1
  150. package/dist/core/vector-index.js.map +0 -1
  151. package/dist/dom-polyfills.d.ts.map +0 -1
  152. package/dist/dom-polyfills.js.map +0 -1
  153. package/dist/examples/clean-api-examples.d.ts +0 -44
  154. package/dist/examples/clean-api-examples.d.ts.map +0 -1
  155. package/dist/examples/clean-api-examples.js +0 -206
  156. package/dist/examples/clean-api-examples.js.map +0 -1
  157. package/dist/factories/index.d.ts.map +0 -1
  158. package/dist/factories/index.js.map +0 -1
  159. package/dist/factories/text-factory.d.ts.map +0 -1
  160. package/dist/factories/text-factory.js.map +0 -1
  161. package/dist/file-processor.d.ts.map +0 -1
  162. package/dist/file-processor.js.map +0 -1
  163. package/dist/index-manager.d.ts.map +0 -1
  164. package/dist/index-manager.js.map +0 -1
  165. package/dist/index.d.ts.map +0 -1
  166. package/dist/index.js.map +0 -1
  167. package/dist/indexer.d.ts.map +0 -1
  168. package/dist/indexer.js.map +0 -1
  169. package/dist/ingestion.d.ts.map +0 -1
  170. package/dist/ingestion.js.map +0 -1
  171. package/dist/mcp-server.d.ts.map +0 -1
  172. package/dist/mcp-server.js.map +0 -1
  173. package/dist/preprocess.d.ts.map +0 -1
  174. package/dist/preprocess.js.map +0 -1
  175. package/dist/preprocessors/index.d.ts.map +0 -1
  176. package/dist/preprocessors/index.js.map +0 -1
  177. package/dist/preprocessors/mdx.d.ts.map +0 -1
  178. package/dist/preprocessors/mdx.js.map +0 -1
  179. package/dist/preprocessors/mermaid.d.ts.map +0 -1
  180. package/dist/preprocessors/mermaid.js.map +0 -1
  181. package/dist/preprocessors/registry.d.ts.map +0 -1
  182. package/dist/preprocessors/registry.js.map +0 -1
  183. package/dist/search-standalone.d.ts.map +0 -1
  184. package/dist/search-standalone.js.map +0 -1
  185. package/dist/search.d.ts.map +0 -1
  186. package/dist/search.js.map +0 -1
  187. package/dist/test-utils.d.ts.map +0 -1
  188. package/dist/test-utils.js.map +0 -1
  189. package/dist/text/chunker.d.ts.map +0 -1
  190. package/dist/text/chunker.js.map +0 -1
  191. package/dist/text/embedder.d.ts.map +0 -1
  192. package/dist/text/embedder.js.map +0 -1
  193. package/dist/text/index.d.ts.map +0 -1
  194. package/dist/text/index.js.map +0 -1
  195. package/dist/text/preprocessors/index.d.ts.map +0 -1
  196. package/dist/text/preprocessors/index.js.map +0 -1
  197. package/dist/text/preprocessors/mdx.d.ts.map +0 -1
  198. package/dist/text/preprocessors/mdx.js.map +0 -1
  199. package/dist/text/preprocessors/mermaid.d.ts.map +0 -1
  200. package/dist/text/preprocessors/mermaid.js.map +0 -1
  201. package/dist/text/preprocessors/registry.d.ts.map +0 -1
  202. package/dist/text/preprocessors/registry.js.map +0 -1
  203. package/dist/text/reranker.d.ts.map +0 -1
  204. package/dist/text/reranker.js.map +0 -1
  205. package/dist/text/tokenizer.d.ts.map +0 -1
  206. package/dist/text/tokenizer.js.map +0 -1
  207. package/dist/types.d.ts.map +0 -1
  208. package/dist/types.js.map +0 -1
@@ -0,0 +1,541 @@
1
+ /**
2
+ * CORE MODULE — Batch Processing Optimizer
3
+ * Optimizes embedding generation for large multimodal content batches
4
+ * Implements efficient image processing pipelines with progress reporting
5
+ * Creates memory-efficient processing for large image collections
6
+ */
7
+ import { LazyMultimodalLoader } from './lazy-dependency-loader.js';
8
+ import { createError } from './error-handler.js';
9
+ import { getResourceManager } from './resource-manager.js';
10
+ /**
11
+ * Default batch processing configuration optimized for multimodal content
12
+ */
13
+ export const DEFAULT_BATCH_CONFIG = {
14
+ // Conservative batch sizes for memory efficiency
15
+ textBatchSize: 16,
16
+ imageBatchSize: 4, // Smaller for memory-intensive image processing
17
+ maxConcurrentBatches: 2,
18
+ // Memory management (256MB threshold)
19
+ memoryThresholdMB: 256,
20
+ enableMemoryMonitoring: true,
21
+ enableGarbageCollection: true,
22
+ // Progress reporting every 5 batches
23
+ enableProgressReporting: true,
24
+ progressReportInterval: 5,
25
+ // Error handling with retries
26
+ maxRetries: 3,
27
+ retryDelayMs: 1000,
28
+ enableFallbackProcessing: true,
29
+ // Performance optimization
30
+ enableParallelProcessing: true,
31
+ enableResourcePooling: true,
32
+ preloadModels: false // Lazy loading by default
33
+ };
34
+ // =============================================================================
35
+ // MEMORY MONITORING
36
+ // =============================================================================
37
+ /**
38
+ * Memory monitoring utilities for batch processing
39
+ */
40
+ class MemoryMonitor {
41
+ initialMemoryMB;
42
+ peakMemoryMB;
43
+ constructor() {
44
+ this.initialMemoryMB = this.getCurrentMemoryUsageMB();
45
+ this.peakMemoryMB = this.initialMemoryMB;
46
+ }
47
+ /**
48
+ * Get current memory usage in MB
49
+ */
50
+ getCurrentMemoryUsageMB() {
51
+ const usage = process.memoryUsage();
52
+ return Math.round(usage.heapUsed / 1024 / 1024);
53
+ }
54
+ /**
55
+ * Update peak memory usage
56
+ */
57
+ updatePeakMemory() {
58
+ const current = this.getCurrentMemoryUsageMB();
59
+ if (current > this.peakMemoryMB) {
60
+ this.peakMemoryMB = current;
61
+ }
62
+ }
63
+ /**
64
+ * Check if memory usage exceeds threshold
65
+ */
66
+ isMemoryThresholdExceeded(thresholdMB) {
67
+ return this.getCurrentMemoryUsageMB() > thresholdMB;
68
+ }
69
+ /**
70
+ * Force garbage collection if enabled
71
+ */
72
+ forceGarbageCollection() {
73
+ if (global.gc) {
74
+ global.gc();
75
+ }
76
+ }
77
+ /**
78
+ * Get memory statistics
79
+ */
80
+ getStats() {
81
+ return {
82
+ currentMB: this.getCurrentMemoryUsageMB(),
83
+ peakMB: this.peakMemoryMB,
84
+ initialMB: this.initialMemoryMB
85
+ };
86
+ }
87
+ }
88
+ // =============================================================================
89
+ // BATCH PROCESSING OPTIMIZER
90
+ // =============================================================================
91
+ /**
92
+ * Optimized batch processor for multimodal content
93
+ * Handles large collections of text and image content efficiently
94
+ */
95
+ export class BatchProcessingOptimizer {
96
+ config;
97
+ memoryMonitor;
98
+ resourcePool = new Map();
99
+ resourceManager = getResourceManager();
100
+ constructor(config = {}) {
101
+ this.config = { ...DEFAULT_BATCH_CONFIG, ...config };
102
+ this.memoryMonitor = new MemoryMonitor();
103
+ }
104
+ // =============================================================================
105
+ // PUBLIC API
106
+ // =============================================================================
107
+ /**
108
+ * Process a large batch of multimodal content with optimization
109
+ */
110
+ async processBatch(items, embedFunction, progressCallback) {
111
+ const startTime = Date.now();
112
+ // Initialize statistics
113
+ const stats = {
114
+ totalItems: items.length,
115
+ processedItems: 0,
116
+ failedItems: 0,
117
+ skippedItems: 0,
118
+ totalBatches: 0,
119
+ completedBatches: 0,
120
+ failedBatches: 0,
121
+ processingTimeMs: 0,
122
+ averageBatchTimeMs: 0,
123
+ itemsPerSecond: 0,
124
+ memoryUsageMB: this.memoryMonitor.getCurrentMemoryUsageMB(),
125
+ peakMemoryUsageMB: this.memoryMonitor.getCurrentMemoryUsageMB(),
126
+ retryCount: 0,
127
+ fallbackCount: 0
128
+ };
129
+ const results = [];
130
+ const errors = [];
131
+ try {
132
+ // Preload models if configured
133
+ if (this.config.preloadModels) {
134
+ await this.preloadRequiredModels(items);
135
+ }
136
+ // Separate items by content type for optimized processing
137
+ const textItems = items.filter(item => item.contentType === 'text');
138
+ const imageItems = items.filter(item => item.contentType === 'image');
139
+ // Process text items in optimized batches
140
+ if (textItems.length > 0) {
141
+ const textResults = await this.processTextBatches(textItems, embedFunction, stats, errors, progressCallback);
142
+ results.push(...textResults);
143
+ }
144
+ // Process image items in optimized batches
145
+ if (imageItems.length > 0) {
146
+ const imageResults = await this.processImageBatches(imageItems, embedFunction, stats, errors, progressCallback);
147
+ results.push(...imageResults);
148
+ }
149
+ // Calculate final statistics
150
+ const endTime = Date.now();
151
+ stats.processingTimeMs = endTime - startTime;
152
+ stats.averageBatchTimeMs = stats.totalBatches > 0 ? stats.processingTimeMs / stats.totalBatches : 0;
153
+ stats.itemsPerSecond = stats.processingTimeMs > 0 ? (stats.processedItems / stats.processingTimeMs) * 1000 : 0;
154
+ const memoryStats = this.memoryMonitor.getStats();
155
+ stats.memoryUsageMB = memoryStats.currentMB;
156
+ stats.peakMemoryUsageMB = memoryStats.peakMB;
157
+ // Final progress report
158
+ if (progressCallback && this.config.enableProgressReporting) {
159
+ progressCallback(stats);
160
+ }
161
+ return { results, stats, errors };
162
+ }
163
+ catch (error) {
164
+ throw createError.model(`Batch processing failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
165
+ }
166
+ finally {
167
+ // Cleanup resources
168
+ await this.cleanupResources();
169
+ }
170
+ }
171
+ // =============================================================================
172
+ // TEXT BATCH PROCESSING
173
+ // =============================================================================
174
+ /**
175
+ * Process text items in optimized batches
176
+ */
177
+ async processTextBatches(textItems, embedFunction, stats, errors, progressCallback) {
178
+ const results = [];
179
+ const batchSize = this.config.textBatchSize;
180
+ const totalBatches = Math.ceil(textItems.length / batchSize);
181
+ console.log(`Processing ${textItems.length} text items in ${totalBatches} batches (batch size: ${batchSize})`);
182
+ for (let i = 0; i < textItems.length; i += batchSize) {
183
+ const batch = textItems.slice(i, i + batchSize);
184
+ const batchIndex = Math.floor(i / batchSize);
185
+ stats.totalBatches++;
186
+ try {
187
+ const batchResults = await this.processTextBatch(batch, embedFunction, batchIndex, stats, errors);
188
+ results.push(...batchResults);
189
+ stats.completedBatches++;
190
+ // Memory management
191
+ await this.performMemoryManagement();
192
+ // Progress reporting
193
+ if (progressCallback && this.shouldReportProgress(batchIndex)) {
194
+ progressCallback({ ...stats });
195
+ }
196
+ }
197
+ catch (error) {
198
+ stats.failedBatches++;
199
+ console.warn(`Text batch ${batchIndex + 1}/${totalBatches} failed: ${error instanceof Error ? error.message : String(error)}`);
200
+ // Try fallback processing if enabled
201
+ if (this.config.enableFallbackProcessing) {
202
+ const fallbackResults = await this.processBatchWithFallback(batch, embedFunction, batchIndex, stats, errors);
203
+ results.push(...fallbackResults);
204
+ stats.fallbackCount++;
205
+ }
206
+ }
207
+ }
208
+ return results;
209
+ }
210
+ /**
211
+ * Process a single text batch with error handling
212
+ */
213
+ async processTextBatch(batch, embedFunction, batchIndex, stats, errors) {
214
+ const batchStartTime = Date.now();
215
+ try {
216
+ // Process batch items in parallel if enabled
217
+ if (this.config.enableParallelProcessing) {
218
+ const promises = batch.map(async (item, itemIndex) => {
219
+ try {
220
+ const result = await embedFunction(item);
221
+ stats.processedItems++;
222
+ return result;
223
+ }
224
+ catch (error) {
225
+ stats.failedItems++;
226
+ errors.push({
227
+ item,
228
+ error: error instanceof Error ? error.message : String(error),
229
+ batchIndex,
230
+ itemIndex
231
+ });
232
+ return null;
233
+ }
234
+ });
235
+ const results = await Promise.all(promises);
236
+ return results.filter((result) => result !== null);
237
+ }
238
+ else {
239
+ // Sequential processing
240
+ const results = [];
241
+ for (let itemIndex = 0; itemIndex < batch.length; itemIndex++) {
242
+ const item = batch[itemIndex];
243
+ try {
244
+ const result = await embedFunction(item);
245
+ results.push(result);
246
+ stats.processedItems++;
247
+ }
248
+ catch (error) {
249
+ stats.failedItems++;
250
+ errors.push({
251
+ item,
252
+ error: error instanceof Error ? error.message : String(error),
253
+ batchIndex,
254
+ itemIndex
255
+ });
256
+ }
257
+ }
258
+ return results;
259
+ }
260
+ }
261
+ finally {
262
+ // Update batch timing
263
+ const batchTime = Date.now() - batchStartTime;
264
+ stats.averageBatchTimeMs = ((stats.averageBatchTimeMs * (stats.completedBatches + stats.failedBatches)) + batchTime) / (stats.completedBatches + stats.failedBatches + 1);
265
+ }
266
+ }
267
+ // =============================================================================
268
+ // IMAGE BATCH PROCESSING
269
+ // =============================================================================
270
+ /**
271
+ * Process image items in optimized batches with memory management
272
+ */
273
+ async processImageBatches(imageItems, embedFunction, stats, errors, progressCallback) {
274
+ const results = [];
275
+ const batchSize = this.config.imageBatchSize;
276
+ const totalBatches = Math.ceil(imageItems.length / batchSize);
277
+ console.log(`Processing ${imageItems.length} image items in ${totalBatches} batches (batch size: ${batchSize})`);
278
+ // Preload image processing models
279
+ await this.preloadImageProcessingModels();
280
+ for (let i = 0; i < imageItems.length; i += batchSize) {
281
+ const batch = imageItems.slice(i, i + batchSize);
282
+ const batchIndex = Math.floor(i / batchSize) + Math.ceil(stats.totalBatches);
283
+ stats.totalBatches++;
284
+ try {
285
+ const batchResults = await this.processImageBatch(batch, embedFunction, batchIndex, stats, errors);
286
+ results.push(...batchResults);
287
+ stats.completedBatches++;
288
+ // Aggressive memory management for images
289
+ await this.performMemoryManagement(true);
290
+ // Progress reporting
291
+ if (progressCallback && this.shouldReportProgress(batchIndex)) {
292
+ progressCallback({ ...stats });
293
+ }
294
+ }
295
+ catch (error) {
296
+ stats.failedBatches++;
297
+ console.warn(`Image batch ${batchIndex + 1} failed: ${error instanceof Error ? error.message : String(error)}`);
298
+ // Try fallback processing if enabled
299
+ if (this.config.enableFallbackProcessing) {
300
+ const fallbackResults = await this.processBatchWithFallback(batch, embedFunction, batchIndex, stats, errors);
301
+ results.push(...fallbackResults);
302
+ stats.fallbackCount++;
303
+ }
304
+ }
305
+ }
306
+ return results;
307
+ }
308
+ /**
309
+ * Process a single image batch with memory optimization
310
+ */
311
+ async processImageBatch(batch, embedFunction, batchIndex, stats, errors) {
312
+ const batchStartTime = Date.now();
313
+ try {
314
+ // For images, use sequential processing to manage memory better
315
+ const results = [];
316
+ for (let itemIndex = 0; itemIndex < batch.length; itemIndex++) {
317
+ const item = batch[itemIndex];
318
+ try {
319
+ // Check memory before processing each image
320
+ if (this.memoryMonitor.isMemoryThresholdExceeded(this.config.memoryThresholdMB)) {
321
+ console.warn(`Memory threshold exceeded (${this.memoryMonitor.getCurrentMemoryUsageMB()}MB), forcing garbage collection`);
322
+ this.memoryMonitor.forceGarbageCollection();
323
+ }
324
+ const result = await embedFunction(item);
325
+ results.push(result);
326
+ stats.processedItems++;
327
+ // Update memory tracking
328
+ this.memoryMonitor.updatePeakMemory();
329
+ }
330
+ catch (error) {
331
+ stats.failedItems++;
332
+ errors.push({
333
+ item,
334
+ error: error instanceof Error ? error.message : String(error),
335
+ batchIndex,
336
+ itemIndex
337
+ });
338
+ }
339
+ }
340
+ return results;
341
+ }
342
+ finally {
343
+ // Update batch timing
344
+ const batchTime = Date.now() - batchStartTime;
345
+ stats.averageBatchTimeMs = ((stats.averageBatchTimeMs * (stats.completedBatches + stats.failedBatches)) + batchTime) / (stats.completedBatches + stats.failedBatches + 1);
346
+ }
347
+ }
348
+ // =============================================================================
349
+ // FALLBACK PROCESSING
350
+ // =============================================================================
351
+ /**
352
+ * Process batch with fallback to individual item processing
353
+ */
354
+ async processBatchWithFallback(batch, embedFunction, batchIndex, stats, errors) {
355
+ console.log(`Attempting fallback processing for batch ${batchIndex} (${batch.length} items)`);
356
+ const results = [];
357
+ for (let itemIndex = 0; itemIndex < batch.length; itemIndex++) {
358
+ const item = batch[itemIndex];
359
+ let retryCount = 0;
360
+ while (retryCount <= this.config.maxRetries) {
361
+ try {
362
+ const result = await embedFunction(item);
363
+ results.push(result);
364
+ stats.processedItems++;
365
+ break;
366
+ }
367
+ catch (error) {
368
+ retryCount++;
369
+ stats.retryCount++;
370
+ if (retryCount <= this.config.maxRetries) {
371
+ console.warn(`Retry ${retryCount}/${this.config.maxRetries} for item ${itemIndex} in batch ${batchIndex}`);
372
+ await this.delay(this.config.retryDelayMs);
373
+ }
374
+ else {
375
+ stats.failedItems++;
376
+ errors.push({
377
+ item,
378
+ error: error instanceof Error ? error.message : String(error),
379
+ batchIndex,
380
+ itemIndex
381
+ });
382
+ }
383
+ }
384
+ }
385
+ }
386
+ return results;
387
+ }
388
+ // =============================================================================
389
+ // RESOURCE MANAGEMENT
390
+ // =============================================================================
391
+ /**
392
+ * Preload required models based on content types
393
+ */
394
+ async preloadRequiredModels(items) {
395
+ const hasImages = items.some(item => item.contentType === 'image');
396
+ if (hasImages) {
397
+ await this.preloadImageProcessingModels();
398
+ }
399
+ }
400
+ /**
401
+ * Preload image processing models
402
+ */
403
+ async preloadImageProcessingModels() {
404
+ try {
405
+ if (!this.resourcePool.has('imageToText')) {
406
+ console.log('Preloading image-to-text processor...');
407
+ const processor = await LazyMultimodalLoader.loadImageToTextProcessor();
408
+ this.resourcePool.set('imageToText', processor);
409
+ // Register with resource manager
410
+ this.resourceManager.registerImageProcessor(processor, 'image-to-text');
411
+ }
412
+ if (!this.resourcePool.has('metadataExtractor')) {
413
+ console.log('Preloading image metadata extractor...');
414
+ const extractor = await LazyMultimodalLoader.loadImageMetadataExtractor();
415
+ this.resourcePool.set('metadataExtractor', extractor);
416
+ // Register with resource manager
417
+ this.resourceManager.registerImageProcessor(extractor, 'metadata-extractor');
418
+ }
419
+ }
420
+ catch (error) {
421
+ console.warn(`Failed to preload image processing models: ${error instanceof Error ? error.message : String(error)}`);
422
+ }
423
+ }
424
+ /**
425
+ * Perform memory management operations
426
+ */
427
+ async performMemoryManagement(aggressive = false) {
428
+ if (!this.config.enableMemoryMonitoring) {
429
+ return;
430
+ }
431
+ const currentMemory = this.memoryMonitor.getCurrentMemoryUsageMB();
432
+ // Force garbage collection if memory threshold exceeded or aggressive mode
433
+ if (aggressive || this.memoryMonitor.isMemoryThresholdExceeded(this.config.memoryThresholdMB)) {
434
+ if (this.config.enableGarbageCollection) {
435
+ this.memoryMonitor.forceGarbageCollection();
436
+ }
437
+ }
438
+ // Update peak memory tracking
439
+ this.memoryMonitor.updatePeakMemory();
440
+ }
441
+ /**
442
+ * Cleanup resources after processing with resource manager integration
443
+ */
444
+ async cleanupResources() {
445
+ try {
446
+ // Clear resource pool if not using resource pooling
447
+ if (!this.config.enableResourcePooling) {
448
+ // Clean up registered processors
449
+ for (const [key, processor] of this.resourcePool) {
450
+ try {
451
+ // The resource manager will handle proper cleanup
452
+ if (processor && typeof processor.cleanup === 'function') {
453
+ await processor.cleanup();
454
+ }
455
+ }
456
+ catch (error) {
457
+ console.warn(`Failed to cleanup processor ${key}: ${error instanceof Error ? error.message : 'Unknown error'}`);
458
+ }
459
+ }
460
+ this.resourcePool.clear();
461
+ }
462
+ // Use resource manager for memory optimization
463
+ if (this.config.enableGarbageCollection) {
464
+ await this.resourceManager.optimizeMemory();
465
+ }
466
+ }
467
+ catch (error) {
468
+ console.warn(`Error during batch processing cleanup: ${error instanceof Error ? error.message : 'Unknown error'}`);
469
+ }
470
+ }
471
+ // =============================================================================
472
+ // UTILITY METHODS
473
+ // =============================================================================
474
+ /**
475
+ * Check if progress should be reported for this batch
476
+ */
477
+ shouldReportProgress(batchIndex) {
478
+ return this.config.enableProgressReporting &&
479
+ (batchIndex + 1) % this.config.progressReportInterval === 0;
480
+ }
481
+ /**
482
+ * Delay execution for specified milliseconds
483
+ */
484
+ delay(ms) {
485
+ return new Promise(resolve => setTimeout(resolve, ms));
486
+ }
487
+ /**
488
+ * Get current configuration
489
+ */
490
+ getConfig() {
491
+ return { ...this.config };
492
+ }
493
+ /**
494
+ * Update configuration
495
+ */
496
+ updateConfig(updates) {
497
+ this.config = { ...this.config, ...updates };
498
+ }
499
+ /**
500
+ * Get current memory statistics
501
+ */
502
+ getMemoryStats() {
503
+ return this.memoryMonitor.getStats();
504
+ }
505
+ }
506
+ // =============================================================================
507
+ // FACTORY FUNCTIONS
508
+ // =============================================================================
509
+ /**
510
+ * Create a batch processing optimizer with default configuration
511
+ */
512
+ export function createBatchProcessor(config) {
513
+ return new BatchProcessingOptimizer(config);
514
+ }
515
+ /**
516
+ * Create a batch processing optimizer optimized for large image collections
517
+ */
518
+ export function createImageBatchProcessor() {
519
+ return new BatchProcessingOptimizer({
520
+ imageBatchSize: 2, // Very small batches for memory efficiency
521
+ textBatchSize: 8,
522
+ memoryThresholdMB: 128, // Lower threshold for images
523
+ enableMemoryMonitoring: true,
524
+ enableGarbageCollection: true,
525
+ enableParallelProcessing: false, // Sequential for better memory control
526
+ progressReportInterval: 2 // More frequent progress reports
527
+ });
528
+ }
529
+ /**
530
+ * Create a batch processing optimizer optimized for text processing
531
+ */
532
+ export function createTextBatchProcessor() {
533
+ return new BatchProcessingOptimizer({
534
+ textBatchSize: 32, // Larger batches for text
535
+ imageBatchSize: 4,
536
+ enableParallelProcessing: true, // Parallel processing for text
537
+ memoryThresholdMB: 512, // Higher threshold for text
538
+ progressReportInterval: 10
539
+ });
540
+ }
541
+ //# sourceMappingURL=batch-processing-optimizer.js.map
@@ -0,0 +1,52 @@
1
+ /**
2
+ * Binary Index Format Module
3
+ *
4
+ * Provides efficient binary serialization for HNSW vector indices.
5
+ *
6
+ * Format Specification:
7
+ * - Header: 24 bytes (6 × uint32)
8
+ * - Vectors: N × (4 + D × 4) bytes
9
+ * - Little-endian encoding for cross-platform compatibility
10
+ * - 4-byte alignment for Float32Array zero-copy views
11
+ *
12
+ * Performance:
13
+ * - 3.66x smaller than JSON format
14
+ * - 3.5x faster loading
15
+ * - Zero-copy Float32Array views
16
+ */
17
+ export interface BinaryIndexData {
18
+ dimensions: number;
19
+ maxElements: number;
20
+ M: number;
21
+ efConstruction: number;
22
+ seed: number;
23
+ currentSize: number;
24
+ vectors: Array<{
25
+ id: number;
26
+ vector: Float32Array;
27
+ }>;
28
+ }
29
+ export declare class BinaryIndexFormat {
30
+ /**
31
+ * Save index data to binary format
32
+ *
33
+ * File structure:
34
+ * - Header (24 bytes): dimensions, maxElements, M, efConstruction, seed, currentSize
35
+ * - Vectors: For each vector: id (4 bytes) + vector data (dimensions × 4 bytes)
36
+ *
37
+ * @param indexPath Path to save the binary index file
38
+ * @param data Index data to serialize
39
+ */
40
+ static save(indexPath: string, data: BinaryIndexData): Promise<void>;
41
+ /**
42
+ * Load index data from binary format
43
+ *
44
+ * Uses zero-copy Float32Array views for efficient loading.
45
+ * Copies the views to ensure data persistence after buffer lifecycle.
46
+ *
47
+ * @param indexPath Path to the binary index file
48
+ * @returns Deserialized index data
49
+ */
50
+ static load(indexPath: string): Promise<BinaryIndexData>;
51
+ }
52
+ //# sourceMappingURL=binary-index-format.d.ts.map