rag-lite-ts 1.0.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. package/README.md +651 -109
  2. package/dist/cli/indexer.js +262 -46
  3. package/dist/cli/search.js +54 -32
  4. package/dist/cli.js +185 -28
  5. package/dist/config.d.ts +34 -73
  6. package/dist/config.js +50 -255
  7. package/dist/core/abstract-embedder.d.ts +125 -0
  8. package/dist/core/abstract-embedder.js +264 -0
  9. package/dist/core/actionable-error-messages.d.ts +60 -0
  10. package/dist/core/actionable-error-messages.js +397 -0
  11. package/dist/core/adapters.d.ts +93 -0
  12. package/dist/core/adapters.js +139 -0
  13. package/dist/core/batch-processing-optimizer.d.ts +155 -0
  14. package/dist/core/batch-processing-optimizer.js +541 -0
  15. package/dist/core/chunker.d.ts +119 -0
  16. package/dist/core/chunker.js +73 -0
  17. package/dist/core/cli-database-utils.d.ts +53 -0
  18. package/dist/core/cli-database-utils.js +239 -0
  19. package/dist/core/config.d.ts +102 -0
  20. package/dist/core/config.js +247 -0
  21. package/dist/core/content-errors.d.ts +111 -0
  22. package/dist/core/content-errors.js +362 -0
  23. package/dist/core/content-manager.d.ts +343 -0
  24. package/dist/core/content-manager.js +1504 -0
  25. package/dist/core/content-performance-optimizer.d.ts +150 -0
  26. package/dist/core/content-performance-optimizer.js +516 -0
  27. package/dist/core/content-resolver.d.ts +104 -0
  28. package/dist/core/content-resolver.js +285 -0
  29. package/dist/core/cross-modal-search.d.ts +164 -0
  30. package/dist/core/cross-modal-search.js +342 -0
  31. package/dist/core/database-connection-manager.d.ts +109 -0
  32. package/dist/core/database-connection-manager.js +304 -0
  33. package/dist/core/db.d.ts +245 -0
  34. package/dist/core/db.js +952 -0
  35. package/dist/core/embedder-factory.d.ts +176 -0
  36. package/dist/core/embedder-factory.js +338 -0
  37. package/dist/{error-handler.d.ts → core/error-handler.d.ts} +23 -2
  38. package/dist/{error-handler.js → core/error-handler.js} +51 -8
  39. package/dist/core/index.d.ts +59 -0
  40. package/dist/core/index.js +69 -0
  41. package/dist/core/ingestion.d.ts +213 -0
  42. package/dist/core/ingestion.js +812 -0
  43. package/dist/core/interfaces.d.ts +408 -0
  44. package/dist/core/interfaces.js +106 -0
  45. package/dist/core/lazy-dependency-loader.d.ts +152 -0
  46. package/dist/core/lazy-dependency-loader.js +453 -0
  47. package/dist/core/mode-detection-service.d.ts +150 -0
  48. package/dist/core/mode-detection-service.js +565 -0
  49. package/dist/core/mode-model-validator.d.ts +92 -0
  50. package/dist/core/mode-model-validator.js +203 -0
  51. package/dist/core/model-registry.d.ts +120 -0
  52. package/dist/core/model-registry.js +415 -0
  53. package/dist/core/model-validator.d.ts +217 -0
  54. package/dist/core/model-validator.js +782 -0
  55. package/dist/{path-manager.d.ts → core/path-manager.d.ts} +5 -0
  56. package/dist/{path-manager.js → core/path-manager.js} +5 -0
  57. package/dist/core/polymorphic-search-factory.d.ts +154 -0
  58. package/dist/core/polymorphic-search-factory.js +344 -0
  59. package/dist/core/raglite-paths.d.ts +121 -0
  60. package/dist/core/raglite-paths.js +145 -0
  61. package/dist/core/reranking-config.d.ts +42 -0
  62. package/dist/core/reranking-config.js +156 -0
  63. package/dist/core/reranking-factory.d.ts +92 -0
  64. package/dist/core/reranking-factory.js +591 -0
  65. package/dist/core/reranking-strategies.d.ts +325 -0
  66. package/dist/core/reranking-strategies.js +720 -0
  67. package/dist/core/resource-cleanup.d.ts +163 -0
  68. package/dist/core/resource-cleanup.js +371 -0
  69. package/dist/core/resource-manager.d.ts +212 -0
  70. package/dist/core/resource-manager.js +564 -0
  71. package/dist/core/search-pipeline.d.ts +111 -0
  72. package/dist/core/search-pipeline.js +287 -0
  73. package/dist/core/search.d.ts +131 -0
  74. package/dist/core/search.js +296 -0
  75. package/dist/core/streaming-operations.d.ts +145 -0
  76. package/dist/core/streaming-operations.js +409 -0
  77. package/dist/core/types.d.ts +66 -0
  78. package/dist/core/types.js +6 -0
  79. package/dist/core/universal-embedder.d.ts +177 -0
  80. package/dist/core/universal-embedder.js +139 -0
  81. package/dist/core/validation-messages.d.ts +99 -0
  82. package/dist/core/validation-messages.js +334 -0
  83. package/dist/{vector-index.d.ts → core/vector-index.d.ts} +4 -0
  84. package/dist/{vector-index.js → core/vector-index.js} +21 -3
  85. package/dist/dom-polyfills.d.ts +6 -0
  86. package/dist/dom-polyfills.js +40 -0
  87. package/dist/factories/index.d.ts +43 -0
  88. package/dist/factories/index.js +44 -0
  89. package/dist/factories/text-factory.d.ts +560 -0
  90. package/dist/factories/text-factory.js +968 -0
  91. package/dist/file-processor.d.ts +90 -4
  92. package/dist/file-processor.js +723 -20
  93. package/dist/index-manager.d.ts +3 -2
  94. package/dist/index-manager.js +13 -11
  95. package/dist/index.d.ts +72 -8
  96. package/dist/index.js +102 -16
  97. package/dist/indexer.js +1 -1
  98. package/dist/ingestion.d.ts +44 -154
  99. package/dist/ingestion.js +75 -671
  100. package/dist/mcp-server.d.ts +35 -3
  101. package/dist/mcp-server.js +1186 -79
  102. package/dist/multimodal/clip-embedder.d.ts +314 -0
  103. package/dist/multimodal/clip-embedder.js +945 -0
  104. package/dist/multimodal/index.d.ts +6 -0
  105. package/dist/multimodal/index.js +6 -0
  106. package/dist/preprocess.js +1 -1
  107. package/dist/run-error-recovery-tests.d.ts +7 -0
  108. package/dist/run-error-recovery-tests.js +101 -0
  109. package/dist/search-standalone.js +1 -1
  110. package/dist/search.d.ts +51 -69
  111. package/dist/search.js +117 -412
  112. package/dist/test-utils.d.ts +8 -26
  113. package/dist/text/chunker.d.ts +33 -0
  114. package/dist/{chunker.js → text/chunker.js} +98 -75
  115. package/dist/{embedder.d.ts → text/embedder.d.ts} +22 -1
  116. package/dist/{embedder.js → text/embedder.js} +84 -10
  117. package/dist/text/index.d.ts +8 -0
  118. package/dist/text/index.js +9 -0
  119. package/dist/text/preprocessors/index.d.ts +17 -0
  120. package/dist/text/preprocessors/index.js +38 -0
  121. package/dist/text/preprocessors/mdx.d.ts +25 -0
  122. package/dist/text/preprocessors/mdx.js +101 -0
  123. package/dist/text/preprocessors/mermaid.d.ts +68 -0
  124. package/dist/text/preprocessors/mermaid.js +330 -0
  125. package/dist/text/preprocessors/registry.d.ts +56 -0
  126. package/dist/text/preprocessors/registry.js +180 -0
  127. package/dist/text/reranker.d.ts +59 -0
  128. package/dist/{reranker.js → text/reranker.js} +138 -53
  129. package/dist/text/sentence-transformer-embedder.d.ts +96 -0
  130. package/dist/text/sentence-transformer-embedder.js +340 -0
  131. package/dist/{tokenizer.d.ts → text/tokenizer.d.ts} +1 -0
  132. package/dist/{tokenizer.js → text/tokenizer.js} +7 -2
  133. package/dist/types.d.ts +40 -1
  134. package/dist/utils/vector-math.d.ts +31 -0
  135. package/dist/utils/vector-math.js +70 -0
  136. package/package.json +16 -4
  137. package/dist/api-errors.d.ts.map +0 -1
  138. package/dist/api-errors.js.map +0 -1
  139. package/dist/chunker.d.ts +0 -47
  140. package/dist/chunker.d.ts.map +0 -1
  141. package/dist/chunker.js.map +0 -1
  142. package/dist/cli/indexer.d.ts.map +0 -1
  143. package/dist/cli/indexer.js.map +0 -1
  144. package/dist/cli/search.d.ts.map +0 -1
  145. package/dist/cli/search.js.map +0 -1
  146. package/dist/cli.d.ts.map +0 -1
  147. package/dist/cli.js.map +0 -1
  148. package/dist/config.d.ts.map +0 -1
  149. package/dist/config.js.map +0 -1
  150. package/dist/db.d.ts +0 -90
  151. package/dist/db.d.ts.map +0 -1
  152. package/dist/db.js +0 -340
  153. package/dist/db.js.map +0 -1
  154. package/dist/embedder.d.ts.map +0 -1
  155. package/dist/embedder.js.map +0 -1
  156. package/dist/error-handler.d.ts.map +0 -1
  157. package/dist/error-handler.js.map +0 -1
  158. package/dist/file-processor.d.ts.map +0 -1
  159. package/dist/file-processor.js.map +0 -1
  160. package/dist/index-manager.d.ts.map +0 -1
  161. package/dist/index-manager.js.map +0 -1
  162. package/dist/index.d.ts.map +0 -1
  163. package/dist/index.js.map +0 -1
  164. package/dist/indexer.d.ts.map +0 -1
  165. package/dist/indexer.js.map +0 -1
  166. package/dist/ingestion.d.ts.map +0 -1
  167. package/dist/ingestion.js.map +0 -1
  168. package/dist/mcp-server.d.ts.map +0 -1
  169. package/dist/mcp-server.js.map +0 -1
  170. package/dist/path-manager.d.ts.map +0 -1
  171. package/dist/path-manager.js.map +0 -1
  172. package/dist/preprocess.d.ts.map +0 -1
  173. package/dist/preprocess.js.map +0 -1
  174. package/dist/preprocessors/index.d.ts.map +0 -1
  175. package/dist/preprocessors/index.js.map +0 -1
  176. package/dist/preprocessors/mdx.d.ts.map +0 -1
  177. package/dist/preprocessors/mdx.js.map +0 -1
  178. package/dist/preprocessors/mermaid.d.ts.map +0 -1
  179. package/dist/preprocessors/mermaid.js.map +0 -1
  180. package/dist/preprocessors/registry.d.ts.map +0 -1
  181. package/dist/preprocessors/registry.js.map +0 -1
  182. package/dist/reranker.d.ts +0 -40
  183. package/dist/reranker.d.ts.map +0 -1
  184. package/dist/reranker.js.map +0 -1
  185. package/dist/resource-manager-demo.d.ts +0 -7
  186. package/dist/resource-manager-demo.d.ts.map +0 -1
  187. package/dist/resource-manager-demo.js +0 -52
  188. package/dist/resource-manager-demo.js.map +0 -1
  189. package/dist/resource-manager.d.ts +0 -129
  190. package/dist/resource-manager.d.ts.map +0 -1
  191. package/dist/resource-manager.js +0 -389
  192. package/dist/resource-manager.js.map +0 -1
  193. package/dist/search-standalone.d.ts.map +0 -1
  194. package/dist/search-standalone.js.map +0 -1
  195. package/dist/search.d.ts.map +0 -1
  196. package/dist/search.js.map +0 -1
  197. package/dist/test-utils.d.ts.map +0 -1
  198. package/dist/test-utils.js.map +0 -1
  199. package/dist/tokenizer.d.ts.map +0 -1
  200. package/dist/tokenizer.js.map +0 -1
  201. package/dist/types.d.ts.map +0 -1
  202. package/dist/types.js.map +0 -1
  203. package/dist/vector-index.d.ts.map +0 -1
  204. package/dist/vector-index.js.map +0 -1
@@ -0,0 +1,296 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ */
5
+ import { getChunksByEmbeddingIds } from './db.js';
6
+ import { config } from './config.js';
7
+ import { createMissingDependencyError } from './actionable-error-messages.js';
8
+ /**
9
+ * Search engine that provides semantic search capabilities
10
+ * Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
11
+ * Uses explicit dependency injection for clean architecture
12
+ */
13
+ export class SearchEngine {
14
+ embedFn;
15
+ indexManager;
16
+ db;
17
+ rerankFn;
18
+ contentResolver;
19
+ /**
20
+ * Creates a new SearchEngine with explicit dependency injection
21
+ *
22
+ * DEPENDENCY INJECTION PATTERN:
23
+ * This constructor requires all dependencies to be explicitly provided, enabling:
24
+ * - Clean separation between core logic and implementation-specific components
25
+ * - Support for different embedding models (text-only, multimodal, custom)
26
+ * - Testability through mock injection
27
+ * - Future extensibility without core changes
28
+ *
29
+ * @param embedFn - Function to embed queries into vectors
30
+ * - Signature: (query: string, contentType?: string) => Promise<EmbeddingResult>
31
+ * - Examples:
32
+ * - Text: const embedFn = (query) => textEmbedder.embedSingle(query)
33
+ * - Multimodal: const embedFn = (query, type) => type === 'image' ? clipEmbedder.embedImage(query) : clipEmbedder.embedText(query)
34
+ * - Custom: const embedFn = (query) => customModel.embed(query)
35
+ *
36
+ * @param indexManager - Vector index manager for similarity search
37
+ * - Handles vector storage and retrieval operations
38
+ * - Works with any embedding dimensions (384, 512, 768, etc.)
39
+ * - Example: new IndexManager('./index.bin')
40
+ *
41
+ * @param db - Database connection for metadata retrieval
42
+ * - Provides access to document and chunk metadata
43
+ * - Supports different content types through metadata fields
44
+ * - Example: await openDatabase('./db.sqlite')
45
+ *
46
+ * @param rerankFn - Optional function to rerank search results
47
+ * - Signature: (query: string, results: SearchResult[], contentType?: string) => Promise<SearchResult[]>
48
+ * - Examples:
49
+ * - Text: const rerankFn = (query, results) => textReranker.rerank(query, results)
50
+ * - Custom: const rerankFn = (query, results) => customReranker.rerank(query, results)
51
+ * - Disabled: undefined (no reranking)
52
+ *
53
+ * USAGE EXAMPLES:
54
+ * ```typescript
55
+ * // Text-only search engine
56
+ * const textEmbedFn = await createTextEmbedder();
57
+ * const textRerankFn = await createTextReranker();
58
+ * const indexManager = new IndexManager('./index.bin');
59
+ * const db = await openDatabase('./db.sqlite');
60
+ * const search = new SearchEngine(textEmbedFn, indexManager, db, textRerankFn);
61
+ *
62
+ * // Search engine without reranking
63
+ * const search = new SearchEngine(textEmbedFn, indexManager, db);
64
+ *
65
+ * // Custom embedding implementation
66
+ * const customEmbedFn = async (query) => ({
67
+ * embedding_id: generateId(),
68
+ * vector: await myCustomModel.embed(query)
69
+ * });
70
+ * const search = new SearchEngine(customEmbedFn, indexManager, db);
71
+ * ```
72
+ */
73
+ constructor(embedFn, indexManager, db, rerankFn, contentResolver) {
74
+ this.embedFn = embedFn;
75
+ this.indexManager = indexManager;
76
+ this.db = db;
77
+ this.rerankFn = rerankFn;
78
+ // Validate required dependencies
79
+ if (!embedFn || typeof embedFn !== 'function') {
80
+ throw createMissingDependencyError('embedFn', 'function', {
81
+ operationContext: 'SearchEngine constructor'
82
+ });
83
+ }
84
+ if (!indexManager) {
85
+ throw createMissingDependencyError('indexManager', 'object', {
86
+ operationContext: 'SearchEngine constructor'
87
+ });
88
+ }
89
+ if (!db) {
90
+ throw createMissingDependencyError('db', 'object', {
91
+ operationContext: 'SearchEngine constructor'
92
+ });
93
+ }
94
+ // Initialize ContentResolver if provided, or create lazily when needed
95
+ this.contentResolver = contentResolver;
96
+ }
97
+ /**
98
+ * Perform semantic search on the indexed documents
99
+ * Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
100
+ * @param query - Search query string
101
+ * @param options - Search options including top_k and rerank settings
102
+ * @returns Promise resolving to array of search results
103
+ */
104
+ async search(query, options = {}) {
105
+ if (!query || query.trim().length === 0) {
106
+ return [];
107
+ }
108
+ const startTime = performance.now();
109
+ const topK = options.top_k || config.top_k || 10;
110
+ const shouldRerank = options.rerank !== undefined ? options.rerank : (this.rerankFn !== undefined);
111
+ try {
112
+ // Step 1: Build query embedding using injected embed function
113
+ const embeddingStartTime = performance.now();
114
+ const queryEmbedding = await this.embedFn(query);
115
+ const embeddingTime = performance.now() - embeddingStartTime;
116
+ // Step 2: Search using IndexManager (which handles hash mapping properly)
117
+ const searchStartTime = performance.now();
118
+ let searchResult;
119
+ try {
120
+ searchResult = this.indexManager.search(queryEmbedding.vector, topK);
121
+ }
122
+ catch (error) {
123
+ if (error instanceof Error && error.message.includes('No embedding ID found for hash')) {
124
+ console.warn(`Hash mapping issue detected: ${error.message}`);
125
+ console.warn('This may indicate index/database synchronization issues. Consider running: raglite rebuild');
126
+ return [];
127
+ }
128
+ throw error;
129
+ }
130
+ const vectorSearchTime = performance.now() - searchStartTime;
131
+ if (searchResult.embeddingIds.length === 0) {
132
+ const totalTime = performance.now() - startTime;
133
+ console.log(`No similar documents found (${totalTime.toFixed(2)}ms total)`);
134
+ return [];
135
+ }
136
+ // Step 3: Retrieve chunks from database using embedding IDs
137
+ const retrievalStartTime = performance.now();
138
+ const chunks = await getChunksByEmbeddingIds(this.db, searchResult.embeddingIds);
139
+ const retrievalTime = performance.now() - retrievalStartTime;
140
+ // Step 4: Format results as JSON with text, score, and document metadata
141
+ let results = this.formatSearchResults(chunks, searchResult.distances, searchResult.embeddingIds);
142
+ // Step 5: Optional reranking with injected rerank function
143
+ let rerankTime = 0;
144
+ if (shouldRerank && this.rerankFn && results.length > 1) {
145
+ try {
146
+ const rerankStartTime = performance.now();
147
+ results = await this.rerankFn(query, results);
148
+ rerankTime = performance.now() - rerankStartTime;
149
+ }
150
+ catch (error) {
151
+ // Fallback to vector search results and log the error
152
+ console.warn(`Reranking failed, using vector search results: ${error instanceof Error ? error.message : 'Unknown error'}`);
153
+ }
154
+ }
155
+ const totalTime = performance.now() - startTime;
156
+ // Measure latency without premature optimization - just log for monitoring
157
+ console.log(`Search completed: ${results.length} results in ${totalTime.toFixed(2)}ms ` +
158
+ `(embed: ${embeddingTime.toFixed(2)}ms, vector: ${vectorSearchTime.toFixed(2)}ms, ` +
159
+ `retrieval: ${retrievalTime.toFixed(2)}ms${rerankTime > 0 ? `, rerank: ${rerankTime.toFixed(2)}ms` : ''})`);
160
+ return results;
161
+ }
162
+ catch (error) {
163
+ throw new Error(`Search failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
164
+ }
165
+ }
166
+ /**
167
+ * Format search results with proper structure
168
+ * @param chunks - Database chunks with metadata
169
+ * @param distances - Similarity distances from vector search
170
+ * @param embeddingIds - Embedding IDs in search result order
171
+ * @returns Formatted search results
172
+ */
173
+ formatSearchResults(chunks, distances, embeddingIds) {
174
+ const results = [];
175
+ // Create a map for quick chunk lookup by embedding_id
176
+ const chunkMap = new Map();
177
+ chunks.forEach(chunk => {
178
+ chunkMap.set(chunk.embedding_id, chunk);
179
+ });
180
+ // Build results in the order of search results
181
+ for (let i = 0; i < embeddingIds.length; i++) {
182
+ const embeddingId = embeddingIds[i];
183
+ const chunk = chunkMap.get(embeddingId);
184
+ if (chunk) {
185
+ // Convert cosine distance to similarity score (1 - distance)
186
+ // hnswlib-wasm returns cosine distance, we want similarity
187
+ const score = Math.max(0, 1 - distances[i]);
188
+ results.push({
189
+ content: chunk.content,
190
+ score: score,
191
+ contentType: chunk.content_type || 'text',
192
+ document: {
193
+ id: chunk.document_id,
194
+ source: chunk.document_source,
195
+ title: chunk.document_title,
196
+ contentType: chunk.document_content_type || 'text',
197
+ contentId: chunk.document_content_id || undefined
198
+ }
199
+ });
200
+ }
201
+ }
202
+ return results;
203
+ }
204
+ /**
205
+ * Get search engine statistics
206
+ * @returns Object with current search engine stats
207
+ */
208
+ async getStats() {
209
+ const indexStats = await this.indexManager.getStats();
210
+ return {
211
+ totalChunks: indexStats.totalVectors,
212
+ indexSize: indexStats.totalVectors,
213
+ rerankingEnabled: this.rerankFn !== undefined
214
+ };
215
+ }
216
+ /**
217
+ * Retrieve content by ID in the specified format
218
+ * @param contentId - Content ID to retrieve
219
+ * @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
220
+ * @returns Promise that resolves to content in requested format
221
+ */
222
+ async getContent(contentId, format = 'file') {
223
+ // Lazy initialization of ContentResolver
224
+ if (!this.contentResolver) {
225
+ const { ContentResolver } = await import('./content-resolver.js');
226
+ this.contentResolver = new ContentResolver(this.db);
227
+ }
228
+ return this.contentResolver.getContent(contentId, format);
229
+ }
230
+ /**
231
+ * Retrieve multiple content items efficiently in batch
232
+ * @param contentIds - Array of content IDs to retrieve
233
+ * @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
234
+ * @returns Promise that resolves to array of content in requested format
235
+ */
236
+ async getContentBatch(contentIds, format = 'file') {
237
+ // Lazy initialization of ContentResolver
238
+ if (!this.contentResolver) {
239
+ const { ContentResolver } = await import('./content-resolver.js');
240
+ this.contentResolver = new ContentResolver(this.db);
241
+ }
242
+ // Convert contentIds array to ContentRequest array
243
+ const requests = contentIds.map(contentId => ({ contentId, format }));
244
+ const results = await this.contentResolver.getContentBatch(requests);
245
+ // Extract content from results, maintaining order and handling errors
246
+ return results.map(result => {
247
+ if (!result.success) {
248
+ throw new Error(`Failed to retrieve content ${result.contentId}: ${result.error}`);
249
+ }
250
+ return result.content;
251
+ });
252
+ }
253
+ /**
254
+ * Retrieve content metadata for result enhancement
255
+ * @param contentId - Content ID to get metadata for
256
+ * @returns Promise that resolves to content metadata
257
+ */
258
+ async getContentMetadata(contentId) {
259
+ // Lazy initialization of ContentResolver
260
+ if (!this.contentResolver) {
261
+ const { ContentResolver } = await import('./content-resolver.js');
262
+ this.contentResolver = new ContentResolver(this.db);
263
+ }
264
+ return this.contentResolver.getContentMetadata(contentId);
265
+ }
266
+ /**
267
+ * Verify that content exists and is accessible
268
+ * @param contentId - Content ID to verify
269
+ * @returns Promise that resolves to true if content exists, false otherwise
270
+ */
271
+ async verifyContentExists(contentId) {
272
+ // Lazy initialization of ContentResolver
273
+ if (!this.contentResolver) {
274
+ const { ContentResolver } = await import('./content-resolver.js');
275
+ this.contentResolver = new ContentResolver(this.db);
276
+ }
277
+ return this.contentResolver.verifyContentExists(contentId);
278
+ }
279
+ /**
280
+ * Clean up resources - explicit cleanup method
281
+ */
282
+ async cleanup() {
283
+ try {
284
+ // Clean up ContentResolver to prevent resource leaks
285
+ if (this.contentResolver && typeof this.contentResolver.cleanup === 'function') {
286
+ this.contentResolver.cleanup();
287
+ }
288
+ await this.db.close();
289
+ await this.indexManager.close();
290
+ }
291
+ catch (error) {
292
+ console.error('Error during SearchEngine cleanup:', error instanceof Error ? error.message : String(error));
293
+ }
294
+ }
295
+ }
296
+ //# sourceMappingURL=search.js.map
@@ -0,0 +1,145 @@
1
+ /**
2
+ * Streaming Operations for Large Content - Task 9.1 Implementation
3
+ * Provides memory-efficient streaming operations for content ingestion and retrieval
4
+ * Minimizes memory usage for large files through streaming algorithms
5
+ */
6
+ /**
7
+ * Progress callback for long-running operations
8
+ */
9
+ export interface ProgressCallback {
10
+ (bytesProcessed: number, totalBytes?: number): void;
11
+ }
12
+ /**
13
+ * Streaming hash calculation result
14
+ */
15
+ export interface StreamingHashResult {
16
+ hash: string;
17
+ bytesProcessed: number;
18
+ processingTimeMs: number;
19
+ }
20
+ /**
21
+ * Streaming file copy result
22
+ */
23
+ export interface StreamingCopyResult {
24
+ bytesWritten: number;
25
+ processingTimeMs: number;
26
+ hash?: string;
27
+ }
28
+ /**
29
+ * Configuration for streaming operations
30
+ */
31
+ export interface StreamingConfig {
32
+ chunkSize: number;
33
+ enableProgress: boolean;
34
+ enableHashing: boolean;
35
+ timeout: number;
36
+ }
37
+ /**
38
+ * StreamingOperations class provides memory-efficient operations for large content
39
+ */
40
+ export declare class StreamingOperations {
41
+ private config;
42
+ constructor(config?: Partial<StreamingConfig>);
43
+ /**
44
+ * Calculates SHA-256 hash of a file using streaming to minimize memory usage
45
+ * @param filePath - Path to the file to hash
46
+ * @param progressCallback - Optional callback for progress reporting
47
+ * @returns Promise that resolves to hash result
48
+ */
49
+ calculateFileHashStreaming(filePath: string, progressCallback?: ProgressCallback): Promise<StreamingHashResult>;
50
+ /**
51
+ * Calculates SHA-256 hash of a buffer using streaming to minimize memory usage
52
+ * @param content - Buffer to hash
53
+ * @param progressCallback - Optional callback for progress reporting
54
+ * @returns Promise that resolves to hash result
55
+ */
56
+ calculateBufferHashStreaming(content: Buffer, progressCallback?: ProgressCallback): Promise<StreamingHashResult>;
57
+ /**
58
+ * Copies a file using streaming operations with optional hashing
59
+ * @param sourcePath - Source file path
60
+ * @param destinationPath - Destination file path
61
+ * @param progressCallback - Optional callback for progress reporting
62
+ * @returns Promise that resolves to copy result
63
+ */
64
+ copyFileStreaming(sourcePath: string, destinationPath: string, progressCallback?: ProgressCallback): Promise<StreamingCopyResult>;
65
+ /**
66
+ * Writes buffer content to file using streaming operations
67
+ * @param content - Buffer to write
68
+ * @param destinationPath - Destination file path
69
+ * @param progressCallback - Optional callback for progress reporting
70
+ * @returns Promise that resolves to write result
71
+ */
72
+ writeBufferStreaming(content: Buffer, destinationPath: string, progressCallback?: ProgressCallback): Promise<StreamingCopyResult>;
73
+ /**
74
+ * Reads file content and converts to base64 using streaming to minimize memory usage
75
+ * @param filePath - Path to the file to read
76
+ * @param progressCallback - Optional callback for progress reporting
77
+ * @returns Promise that resolves to base64 string
78
+ */
79
+ readFileAsBase64Streaming(filePath: string, progressCallback?: ProgressCallback): Promise<string>;
80
+ /**
81
+ * Validates file integrity by comparing streaming hash with expected hash
82
+ * @param filePath - Path to the file to validate
83
+ * @param expectedHash - Expected SHA-256 hash
84
+ * @param progressCallback - Optional callback for progress reporting
85
+ * @returns Promise that resolves to validation result
86
+ */
87
+ validateFileIntegrityStreaming(filePath: string, expectedHash: string, progressCallback?: ProgressCallback): Promise<{
88
+ isValid: boolean;
89
+ actualHash: string;
90
+ bytesProcessed: number;
91
+ }>;
92
+ /**
93
+ * Gets file information without loading content into memory
94
+ * @param filePath - Path to the file
95
+ * @returns Promise that resolves to file information
96
+ */
97
+ getFileInfo(filePath: string): Promise<{
98
+ size: number;
99
+ isFile: boolean;
100
+ isDirectory: boolean;
101
+ lastModified: Date;
102
+ canRead: boolean;
103
+ canWrite: boolean;
104
+ }>;
105
+ /**
106
+ * Converts buffer to chunks for streaming
107
+ * @param buffer - Buffer to chunk
108
+ * @returns Generator that yields buffer chunks
109
+ */
110
+ private bufferToChunks;
111
+ /**
112
+ * Wraps a promise with timeout functionality
113
+ * @param promise - Promise to wrap
114
+ * @param timeoutMs - Timeout in milliseconds
115
+ * @param errorMessage - Error message for timeout
116
+ * @returns Promise that rejects if timeout is reached
117
+ */
118
+ private withTimeout;
119
+ }
120
+ /**
121
+ * Creates a StreamingOperations instance with default configuration
122
+ * @param config - Optional configuration overrides
123
+ * @returns StreamingOperations instance
124
+ */
125
+ export declare function createStreamingOperations(config?: Partial<StreamingConfig>): StreamingOperations;
126
+ /**
127
+ * Utility function to format bytes for progress reporting
128
+ * @param bytes - Number of bytes
129
+ * @returns Formatted string (e.g., "1.5 MB")
130
+ */
131
+ export declare function formatBytes(bytes: number): string;
132
+ /**
133
+ * Utility function to format processing time
134
+ * @param milliseconds - Processing time in milliseconds
135
+ * @returns Formatted string (e.g., "1.5s" or "150ms")
136
+ */
137
+ export declare function formatProcessingTime(milliseconds: number): string;
138
+ /**
139
+ * Utility function to calculate processing speed
140
+ * @param bytes - Number of bytes processed
141
+ * @param milliseconds - Processing time in milliseconds
142
+ * @returns Speed in MB/s
143
+ */
144
+ export declare function calculateProcessingSpeed(bytes: number, milliseconds: number): number;
145
+ //# sourceMappingURL=streaming-operations.d.ts.map