rag-lite-ts 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. package/README.md +94 -65
  2. package/dist/cli/indexer.d.ts.map +1 -1
  3. package/dist/cli/indexer.js +78 -50
  4. package/dist/cli/indexer.js.map +1 -1
  5. package/dist/cli/search.d.ts.map +1 -1
  6. package/dist/cli/search.js +13 -30
  7. package/dist/cli/search.js.map +1 -1
  8. package/dist/cli.js +2 -2
  9. package/dist/cli.js.map +1 -1
  10. package/dist/config.d.ts +34 -73
  11. package/dist/config.d.ts.map +1 -1
  12. package/dist/config.js +50 -255
  13. package/dist/config.js.map +1 -1
  14. package/dist/core/adapters.d.ts +93 -0
  15. package/dist/core/adapters.d.ts.map +1 -0
  16. package/dist/core/adapters.js +139 -0
  17. package/dist/core/adapters.js.map +1 -0
  18. package/dist/core/chunker.d.ts +117 -0
  19. package/dist/core/chunker.d.ts.map +1 -0
  20. package/dist/core/chunker.js +73 -0
  21. package/dist/core/chunker.js.map +1 -0
  22. package/dist/core/config.d.ts +102 -0
  23. package/dist/core/config.d.ts.map +1 -0
  24. package/dist/core/config.js +240 -0
  25. package/dist/core/config.js.map +1 -0
  26. package/dist/{db.d.ts → core/db.d.ts} +25 -9
  27. package/dist/core/db.d.ts.map +1 -0
  28. package/dist/{db.js → core/db.js} +86 -16
  29. package/dist/core/db.js.map +1 -0
  30. package/dist/{error-handler.d.ts → core/error-handler.d.ts} +23 -2
  31. package/dist/core/error-handler.d.ts.map +1 -0
  32. package/dist/{error-handler.js → core/error-handler.js} +51 -8
  33. package/dist/core/error-handler.js.map +1 -0
  34. package/dist/core/index.d.ts +57 -0
  35. package/dist/core/index.d.ts.map +1 -0
  36. package/dist/core/index.js +66 -0
  37. package/dist/core/index.js.map +1 -0
  38. package/dist/core/ingestion.d.ts +143 -0
  39. package/dist/core/ingestion.d.ts.map +1 -0
  40. package/dist/core/ingestion.js +347 -0
  41. package/dist/core/ingestion.js.map +1 -0
  42. package/dist/core/interfaces.d.ts +408 -0
  43. package/dist/core/interfaces.d.ts.map +1 -0
  44. package/dist/core/interfaces.js +106 -0
  45. package/dist/core/interfaces.js.map +1 -0
  46. package/dist/{path-manager.d.ts → core/path-manager.d.ts} +5 -0
  47. package/dist/core/path-manager.d.ts.map +1 -0
  48. package/dist/{path-manager.js → core/path-manager.js} +5 -0
  49. package/dist/core/path-manager.js.map +1 -0
  50. package/dist/core/search-example.d.ts +25 -0
  51. package/dist/core/search-example.d.ts.map +1 -0
  52. package/dist/core/search-example.js +138 -0
  53. package/dist/core/search-example.js.map +1 -0
  54. package/dist/core/search-pipeline-example.d.ts +21 -0
  55. package/dist/core/search-pipeline-example.d.ts.map +1 -0
  56. package/dist/core/search-pipeline-example.js +188 -0
  57. package/dist/core/search-pipeline-example.js.map +1 -0
  58. package/dist/core/search-pipeline.d.ts +111 -0
  59. package/dist/core/search-pipeline.d.ts.map +1 -0
  60. package/dist/core/search-pipeline.js +287 -0
  61. package/dist/core/search-pipeline.js.map +1 -0
  62. package/dist/core/search.d.ts +104 -0
  63. package/dist/core/search.d.ts.map +1 -0
  64. package/dist/core/search.js +218 -0
  65. package/dist/core/search.js.map +1 -0
  66. package/dist/core/types.d.ts +63 -0
  67. package/dist/core/types.d.ts.map +1 -0
  68. package/dist/core/types.js +6 -0
  69. package/dist/core/types.js.map +1 -0
  70. package/dist/{vector-index.d.ts → core/vector-index.d.ts} +4 -0
  71. package/dist/core/vector-index.d.ts.map +1 -0
  72. package/dist/{vector-index.js → core/vector-index.js} +19 -0
  73. package/dist/core/vector-index.js.map +1 -0
  74. package/dist/dom-polyfills.d.ts +6 -0
  75. package/dist/dom-polyfills.d.ts.map +1 -0
  76. package/dist/dom-polyfills.js +40 -0
  77. package/dist/dom-polyfills.js.map +1 -0
  78. package/dist/examples/clean-api-examples.d.ts +44 -0
  79. package/dist/examples/clean-api-examples.d.ts.map +1 -0
  80. package/dist/examples/clean-api-examples.js +206 -0
  81. package/dist/examples/clean-api-examples.js.map +1 -0
  82. package/dist/factories/index.d.ts +43 -0
  83. package/dist/factories/index.d.ts.map +1 -0
  84. package/dist/factories/index.js +44 -0
  85. package/dist/factories/index.js.map +1 -0
  86. package/dist/factories/text-factory.d.ts +466 -0
  87. package/dist/factories/text-factory.d.ts.map +1 -0
  88. package/dist/factories/text-factory.js +719 -0
  89. package/dist/factories/text-factory.js.map +1 -0
  90. package/dist/file-processor.d.ts +2 -2
  91. package/dist/file-processor.d.ts.map +1 -1
  92. package/dist/file-processor.js +3 -3
  93. package/dist/file-processor.js.map +1 -1
  94. package/dist/index-manager.d.ts +3 -2
  95. package/dist/index-manager.d.ts.map +1 -1
  96. package/dist/index-manager.js +13 -11
  97. package/dist/index-manager.js.map +1 -1
  98. package/dist/index.d.ts +63 -8
  99. package/dist/index.d.ts.map +1 -1
  100. package/dist/index.js +91 -16
  101. package/dist/index.js.map +1 -1
  102. package/dist/indexer.js +1 -1
  103. package/dist/indexer.js.map +1 -1
  104. package/dist/ingestion.d.ts +30 -156
  105. package/dist/ingestion.d.ts.map +1 -1
  106. package/dist/ingestion.js +58 -675
  107. package/dist/ingestion.js.map +1 -1
  108. package/dist/mcp-server.js +86 -55
  109. package/dist/mcp-server.js.map +1 -1
  110. package/dist/preprocess.js +1 -1
  111. package/dist/preprocess.js.map +1 -1
  112. package/dist/search-standalone.js +1 -1
  113. package/dist/search-standalone.js.map +1 -1
  114. package/dist/search.d.ts +32 -76
  115. package/dist/search.d.ts.map +1 -1
  116. package/dist/search.js +80 -428
  117. package/dist/search.js.map +1 -1
  118. package/dist/text/chunker.d.ts +32 -0
  119. package/dist/text/chunker.d.ts.map +1 -0
  120. package/dist/{chunker.js → text/chunker.js} +98 -75
  121. package/dist/text/chunker.js.map +1 -0
  122. package/dist/{embedder.d.ts → text/embedder.d.ts} +22 -1
  123. package/dist/text/embedder.d.ts.map +1 -0
  124. package/dist/{embedder.js → text/embedder.js} +71 -4
  125. package/dist/text/embedder.js.map +1 -0
  126. package/dist/text/index.d.ts +7 -0
  127. package/dist/text/index.d.ts.map +1 -0
  128. package/dist/text/index.js +8 -0
  129. package/dist/text/index.js.map +1 -0
  130. package/dist/text/preprocessors/index.d.ts +17 -0
  131. package/dist/text/preprocessors/index.d.ts.map +1 -0
  132. package/dist/text/preprocessors/index.js +38 -0
  133. package/dist/text/preprocessors/index.js.map +1 -0
  134. package/dist/text/preprocessors/mdx.d.ts +25 -0
  135. package/dist/text/preprocessors/mdx.d.ts.map +1 -0
  136. package/dist/text/preprocessors/mdx.js +101 -0
  137. package/dist/text/preprocessors/mdx.js.map +1 -0
  138. package/dist/text/preprocessors/mermaid.d.ts +68 -0
  139. package/dist/text/preprocessors/mermaid.d.ts.map +1 -0
  140. package/dist/text/preprocessors/mermaid.js +330 -0
  141. package/dist/text/preprocessors/mermaid.js.map +1 -0
  142. package/dist/text/preprocessors/registry.d.ts +56 -0
  143. package/dist/text/preprocessors/registry.d.ts.map +1 -0
  144. package/dist/text/preprocessors/registry.js +180 -0
  145. package/dist/text/preprocessors/registry.js.map +1 -0
  146. package/dist/text/reranker.d.ts +60 -0
  147. package/dist/text/reranker.d.ts.map +1 -0
  148. package/dist/{reranker.js → text/reranker.js} +134 -19
  149. package/dist/text/reranker.js.map +1 -0
  150. package/dist/{tokenizer.d.ts → text/tokenizer.d.ts} +1 -0
  151. package/dist/text/tokenizer.d.ts.map +1 -0
  152. package/dist/{tokenizer.js → text/tokenizer.js} +7 -2
  153. package/dist/text/tokenizer.js.map +1 -0
  154. package/dist/types.d.ts +1 -1
  155. package/dist/types.d.ts.map +1 -1
  156. package/package.json +2 -2
  157. package/dist/chunker.d.ts +0 -47
  158. package/dist/chunker.d.ts.map +0 -1
  159. package/dist/chunker.js.map +0 -1
  160. package/dist/db.d.ts.map +0 -1
  161. package/dist/db.js.map +0 -1
  162. package/dist/embedder.d.ts.map +0 -1
  163. package/dist/embedder.js.map +0 -1
  164. package/dist/error-handler.d.ts.map +0 -1
  165. package/dist/error-handler.js.map +0 -1
  166. package/dist/path-manager.d.ts.map +0 -1
  167. package/dist/path-manager.js.map +0 -1
  168. package/dist/reranker.d.ts +0 -40
  169. package/dist/reranker.d.ts.map +0 -1
  170. package/dist/reranker.js.map +0 -1
  171. package/dist/resource-manager-demo.d.ts +0 -7
  172. package/dist/resource-manager-demo.d.ts.map +0 -1
  173. package/dist/resource-manager-demo.js +0 -52
  174. package/dist/resource-manager-demo.js.map +0 -1
  175. package/dist/resource-manager.d.ts +0 -129
  176. package/dist/resource-manager.d.ts.map +0 -1
  177. package/dist/resource-manager.js +0 -389
  178. package/dist/resource-manager.js.map +0 -1
  179. package/dist/tokenizer.d.ts.map +0 -1
  180. package/dist/tokenizer.js.map +0 -1
  181. package/dist/vector-index.d.ts.map +0 -1
  182. package/dist/vector-index.js.map +0 -1
@@ -0,0 +1,287 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ */
5
+ /**
6
+ * Core search pipeline coordinator
7
+ * Orchestrates the search pipeline: query processing → vector search → metadata retrieval → optional reranking
8
+ * Remains completely independent of specific embedding models or transformer libraries
9
+ */
10
+ export class SearchPipelineCoordinator {
11
+ embedQueryFn = null;
12
+ rerankResultsFn = null;
13
+ indexManager = null;
14
+ dbConnection = null;
15
+ defaultContentType = 'text';
16
+ /**
17
+ * Set the embedding function for query processing
18
+ */
19
+ setEmbedFunction(embedFn) {
20
+ this.embedQueryFn = embedFn;
21
+ }
22
+ /**
23
+ * Set the reranking function for result reranking
24
+ */
25
+ setRerankFunction(rerankFn) {
26
+ this.rerankResultsFn = rerankFn;
27
+ }
28
+ /**
29
+ * Set the index manager for vector search
30
+ */
31
+ setIndexManager(indexManager) {
32
+ this.indexManager = indexManager;
33
+ }
34
+ /**
35
+ * Set the database connection for metadata retrieval
36
+ */
37
+ setDatabaseConnection(dbConnection) {
38
+ this.dbConnection = dbConnection;
39
+ }
40
+ /**
41
+ * Set the default content type
42
+ */
43
+ setDefaultContentType(contentType) {
44
+ this.defaultContentType = contentType;
45
+ }
46
+ /**
47
+ * Execute the complete search pipeline
48
+ * Coordinates all steps without knowledge of specific embedding models
49
+ */
50
+ async executeSearchPipeline(query, options = {}) {
51
+ if (!query || query.trim().length === 0) {
52
+ return [];
53
+ }
54
+ const startTime = performance.now();
55
+ const topK = options.top_k || 10;
56
+ const shouldRerank = options.rerank !== undefined ? options.rerank : (this.rerankResultsFn !== null);
57
+ const contentType = options.contentType || this.defaultContentType;
58
+ // Validate dependencies
59
+ this.validateDependencies();
60
+ try {
61
+ // Step 1: Query processing and embedding
62
+ const embeddingStartTime = performance.now();
63
+ const queryEmbedding = await this.embedQuery(query, contentType);
64
+ const embeddingTime = performance.now() - embeddingStartTime;
65
+ // Step 2: Vector search
66
+ const searchStartTime = performance.now();
67
+ const searchResult = await this.vectorSearch(queryEmbedding.vector, topK);
68
+ const vectorSearchTime = performance.now() - searchStartTime;
69
+ if (searchResult.embeddingIds.length === 0) {
70
+ const totalTime = performance.now() - startTime;
71
+ console.log(`No similar documents found (${totalTime.toFixed(2)}ms total)`);
72
+ return [];
73
+ }
74
+ // Step 3: Metadata retrieval
75
+ const retrievalStartTime = performance.now();
76
+ const chunks = await this.retrieveMetadata(searchResult.embeddingIds);
77
+ const retrievalTime = performance.now() - retrievalStartTime;
78
+ // Step 4: Format initial results
79
+ let results = this.formatResults(chunks, searchResult.distances, searchResult.embeddingIds);
80
+ // Step 5: Optional reranking
81
+ let rerankTime = 0;
82
+ if (shouldRerank && this.rerankResultsFn && results.length > 1) {
83
+ try {
84
+ const rerankStartTime = performance.now();
85
+ results = await this.rerankResults(query, results, contentType);
86
+ rerankTime = performance.now() - rerankStartTime;
87
+ }
88
+ catch (error) {
89
+ console.warn(`Reranking failed, using vector search results: ${error instanceof Error ? error.message : 'Unknown error'}`);
90
+ }
91
+ }
92
+ const totalTime = performance.now() - startTime;
93
+ console.log(`Search pipeline completed: ${results.length} results in ${totalTime.toFixed(2)}ms ` +
94
+ `(embed: ${embeddingTime.toFixed(2)}ms, vector: ${vectorSearchTime.toFixed(2)}ms, ` +
95
+ `retrieval: ${retrievalTime.toFixed(2)}ms${rerankTime > 0 ? `, rerank: ${rerankTime.toFixed(2)}ms` : ''})`);
96
+ return results;
97
+ }
98
+ catch (error) {
99
+ throw new Error(`Search pipeline failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
100
+ }
101
+ }
102
+ /**
103
+ * Step 1: Process and embed the query
104
+ * Uses injected embedding function without knowledge of specific models
105
+ */
106
+ async embedQuery(query, contentType) {
107
+ if (!this.embedQueryFn) {
108
+ throw new Error('No embedding function provided. Set embedding function before executing pipeline.');
109
+ }
110
+ try {
111
+ return await this.embedQueryFn(query, contentType);
112
+ }
113
+ catch (error) {
114
+ throw new Error(`Query embedding failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
115
+ }
116
+ }
117
+ /**
118
+ * Step 2: Perform vector search
119
+ * Uses index manager without knowledge of specific embedding models
120
+ */
121
+ async vectorSearch(queryVector, topK) {
122
+ if (!this.indexManager) {
123
+ throw new Error('Index manager not set. Set index manager before executing pipeline.');
124
+ }
125
+ try {
126
+ return this.indexManager.search(queryVector, topK);
127
+ }
128
+ catch (error) {
129
+ if (error instanceof Error && error.message.includes('No embedding ID found for hash')) {
130
+ console.warn(`Hash mapping issue detected: ${error.message}`);
131
+ console.warn('This may indicate index/database synchronization issues. Consider running: raglite rebuild');
132
+ return { embeddingIds: [], distances: [] };
133
+ }
134
+ throw new Error(`Vector search failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
135
+ }
136
+ }
137
+ /**
138
+ * Step 3: Retrieve metadata from database
139
+ * Uses database connection without knowledge of specific data formats
140
+ */
141
+ async retrieveMetadata(embeddingIds) {
142
+ if (!this.dbConnection) {
143
+ throw new Error('Database connection not set. Set database connection before executing pipeline.');
144
+ }
145
+ try {
146
+ const { getChunksByEmbeddingIds } = await import('./db.js');
147
+ return await getChunksByEmbeddingIds(this.dbConnection, embeddingIds);
148
+ }
149
+ catch (error) {
150
+ throw new Error(`Metadata retrieval failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
151
+ }
152
+ }
153
+ /**
154
+ * Step 4: Format initial results
155
+ * Formats results in core format without knowledge of specific content types
156
+ */
157
+ formatResults(chunks, distances, embeddingIds) {
158
+ const results = [];
159
+ // Create a map for quick chunk lookup by embedding_id
160
+ const chunkMap = new Map();
161
+ chunks.forEach(chunk => {
162
+ chunkMap.set(chunk.embedding_id, chunk);
163
+ });
164
+ // Build results in the order of search results
165
+ for (let i = 0; i < embeddingIds.length; i++) {
166
+ const embeddingId = embeddingIds[i];
167
+ const chunk = chunkMap.get(embeddingId);
168
+ if (chunk) {
169
+ // Convert cosine distance to similarity score (1 - distance)
170
+ const score = Math.max(0, 1 - distances[i]);
171
+ results.push({
172
+ content: chunk.text,
173
+ score: score,
174
+ contentType: chunk.content_type || this.defaultContentType,
175
+ document: {
176
+ id: chunk.document_id,
177
+ source: chunk.document_source,
178
+ title: chunk.document_title,
179
+ contentType: chunk.document_content_type || this.defaultContentType
180
+ },
181
+ metadata: chunk.metadata ? this.parseMetadata(chunk.metadata) : undefined
182
+ });
183
+ }
184
+ }
185
+ return results;
186
+ }
187
+ /**
188
+ * Step 5: Optional reranking
189
+ * Uses injected reranking function without knowledge of specific models
190
+ */
191
+ async rerankResults(query, results, contentType) {
192
+ if (!this.rerankResultsFn) {
193
+ return results; // No reranking function available
194
+ }
195
+ try {
196
+ return await this.rerankResultsFn(query, results, contentType);
197
+ }
198
+ catch (error) {
199
+ console.warn(`Reranking failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
200
+ return results; // Return original results on reranking failure
201
+ }
202
+ }
203
+ /**
204
+ * Validate that all required dependencies are set
205
+ */
206
+ validateDependencies() {
207
+ const missing = [];
208
+ if (!this.embedQueryFn) {
209
+ missing.push('embedding function');
210
+ }
211
+ if (!this.indexManager) {
212
+ missing.push('index manager');
213
+ }
214
+ if (!this.dbConnection) {
215
+ missing.push('database connection');
216
+ }
217
+ if (missing.length > 0) {
218
+ throw new Error(`Missing required dependencies: ${missing.join(', ')}. Set all dependencies before executing search pipeline.`);
219
+ }
220
+ }
221
+ /**
222
+ * Parse metadata safely
223
+ */
224
+ parseMetadata(metadata) {
225
+ try {
226
+ return JSON.parse(metadata);
227
+ }
228
+ catch (error) {
229
+ console.warn(`Failed to parse metadata: ${metadata}`);
230
+ return undefined;
231
+ }
232
+ }
233
+ /**
234
+ * Check if the pipeline is ready to execute
235
+ */
236
+ isReady() {
237
+ return !!(this.embedQueryFn && this.indexManager && this.dbConnection);
238
+ }
239
+ /**
240
+ * Get pipeline status information
241
+ */
242
+ getStatus() {
243
+ return {
244
+ hasEmbedFunction: this.embedQueryFn !== null,
245
+ hasRerankFunction: this.rerankResultsFn !== null,
246
+ hasIndexManager: this.indexManager !== null,
247
+ hasDatabaseConnection: this.dbConnection !== null,
248
+ isReady: this.isReady()
249
+ };
250
+ }
251
+ /**
252
+ * Reset all dependencies (useful for testing or reconfiguration)
253
+ */
254
+ reset() {
255
+ this.embedQueryFn = null;
256
+ this.rerankResultsFn = null;
257
+ this.indexManager = null;
258
+ this.dbConnection = null;
259
+ this.defaultContentType = 'text';
260
+ }
261
+ }
262
+ /**
263
+ * Factory for creating search pipeline coordinators
264
+ */
265
+ export class SearchPipelineFactory {
266
+ /**
267
+ * Create a search pipeline coordinator with all dependencies
268
+ */
269
+ static create(embedFn, indexManager, dbConnection, rerankFn, defaultContentType = 'text') {
270
+ const coordinator = new SearchPipelineCoordinator();
271
+ coordinator.setEmbedFunction(embedFn);
272
+ coordinator.setIndexManager(indexManager);
273
+ coordinator.setDatabaseConnection(dbConnection);
274
+ coordinator.setDefaultContentType(defaultContentType);
275
+ if (rerankFn) {
276
+ coordinator.setRerankFunction(rerankFn);
277
+ }
278
+ return coordinator;
279
+ }
280
+ /**
281
+ * Create an empty coordinator for manual configuration
282
+ */
283
+ static createEmpty() {
284
+ return new SearchPipelineCoordinator();
285
+ }
286
+ }
287
+ //# sourceMappingURL=search-pipeline.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"search-pipeline.js","sourceRoot":"","sources":["../../src/core/search-pipeline.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAaH;;;;GAIG;AACH,MAAM,OAAO,yBAAyB;IAC5B,YAAY,GAAyB,IAAI,CAAC;IAC1C,eAAe,GAA0B,IAAI,CAAC;IAC9C,YAAY,GAAe,IAAI,CAAC;IAChC,YAAY,GAAe,IAAI,CAAC;IAChC,kBAAkB,GAAW,MAAM,CAAC;IAE5C;;OAEG;IACH,gBAAgB,CAAC,OAAsB;QACrC,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC;IAC9B,CAAC;IAED;;OAEG;IACH,iBAAiB,CAAC,QAAwB;QACxC,IAAI,CAAC,eAAe,GAAG,QAAQ,CAAC;IAClC,CAAC;IAED;;OAEG;IACH,eAAe,CAAC,YAAiB;QAC/B,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;IACnC,CAAC;IAED;;OAEG;IACH,qBAAqB,CAAC,YAAiB;QACrC,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;IACnC,CAAC;IAED;;OAEG;IACH,qBAAqB,CAAC,WAAmB;QACvC,IAAI,CAAC,kBAAkB,GAAG,WAAW,CAAC;IACxC,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,qBAAqB,CACzB,KAAa,EACb,UAAyB,EAAE;QAE3B,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxC,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QACpC,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;QACjC,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,eAAe,KAAK,IAAI,CAAC,CAAC;QACrG,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,IAAI,CAAC,kBAAkB,CAAC;QAEnE,wBAAwB;QACxB,IAAI,CAAC,oBAAoB,EAAE,CAAC;QAE5B,IAAI,CAAC;YACH,yCAAyC;YACzC,MAAM,kBAAkB,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YAC7C,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;YACjE,MAAM,aAAa,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,kBAAkB,CAAC;YAE7D,wBAAwB;YACxB,MAAM,eAAe,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YAC1C,MAAM,YAAY,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,cAAc,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YAC1E,MAAM,gBAAgB,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,eAAe,CAAC;YAE7D,IAAI,YAAY,CAAC,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC3C,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;gBAChD,OAAO,CAAC,GAAG,CAAC,+BAA+B,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;gBAC5E,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,6BAA6B;YAC7B,MAAM,kBAAkB,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YAC7C,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;YACtE,MAAM,aAAa,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,kBAAkB,CAAC;YAE7D,iCAAiC;YACjC,IAAI,OAAO,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,EAAE,YAAY,CAAC,SAAS,EAAE,YAAY,CAAC,YAAY,CAAC,CAAC;YAE5F,6BAA6B;YAC7B,IAAI,UAAU,GAAG,CAAC,CAAC;YACnB,IAAI,YAAY,IAAI,IAAI,CAAC,eAAe,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC/D,IAAI,CAAC;oBACH,MAAM,eAAe,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;oBAC1C,OAAO,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,OAAO,EAAE,WAAW,CAAC,CAAC;oBAChE,UAAU,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,eAAe,CAAC;gBACnD,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,OAAO,CAAC,IAAI,CAAC,kDAAkD,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;gBAC7H,CAAC;YACH,CAAC;YAED,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YAChD,OAAO,CAAC,GAAG,CAAC,8BAA8B,OAAO,CAAC,MAAM,eAAe,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;gBAC9F,WAAW,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,gBAAgB,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM;gBACnF,cAAc,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,aAAa,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;YAE9G,OAAO,OAAO,CAAC;QAEjB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,2BAA2B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;QACzG,CAAC;IACH,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,UAAU,CAAC,KAAa,EAAE,WAAoB;QAClD,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACvB,MAAM,IAAI,KAAK,CAAC,mFAAmF,CAAC,CAAC;QACvG,CAAC;QAED,IAAI,CAAC;YACH,OAAO,MAAM,IAAI,CAAC,YAAY,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;QACrD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,2BAA2B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;QACzG,CAAC;IACH,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,YAAY,CAAC,WAAyB,EAAE,IAAY;QAIxD,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACvB,MAAM,IAAI,KAAK,CAAC,qEAAqE,CAAC,CAAC;QACzF,CAAC;QAED,IAAI,CAAC;YACH,OAAO,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,WAAW,EAAE,IAAI,CAAC,CAAC;QACrD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,KAAK,YAAY,KAAK,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,gCAAgC,CAAC,EAAE,CAAC;gBACvF,OAAO,CAAC,IAAI,CAAC,gCAAgC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;gBAC9D,OAAO,CAAC,IAAI,CAAC,4FAA4F,CAAC,CAAC;gBAC3G,OAAO,EAAE,YAAY,EAAE,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE,CAAC;YAC7C,CAAC;YACD,MAAM,IAAI,KAAK,CAAC,yBAAyB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;QACvG,CAAC;IACH,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,gBAAgB,CAAC,YAAsB;QAC3C,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACvB,MAAM,IAAI,KAAK,CAAC,iFAAiF,CAAC,CAAC;QACrG,CAAC;QAED,IAAI,CAAC;YACH,MAAM,EAAE,uBAAuB,EAAE,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC;YAC5D,OAAO,MAAM,uBAAuB,CAAC,IAAI,CAAC,YAAY,EAAE,YAAY,CAAC,CAAC;QACxE,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,8BAA8B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;QAC5G,CAAC;IACH,CAAC;IAED;;;OAGG;IACH,aAAa,CAAC,MAAa,EAAE,SAAmB,EAAE,YAAsB;QACtE,MAAM,OAAO,GAAmB,EAAE,CAAC;QAEnC,sDAAsD;QACtD,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAE,CAAC;QAC3B,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YACrB,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC;QAC1C,CAAC,CAAC,CAAC;QAEH,+CAA+C;QAC/C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC7C,MAAM,WAAW,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;YACpC,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;YAExC,IAAI,KAAK,EAAE,CAAC;gBACV,6DAA6D;gBAC7D,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;gBAE5C,OAAO,CAAC,IAAI,CAAC;oBACX,OAAO,EAAE,KAAK,CAAC,IAAI;oBACnB,KAAK,EAAE,KAAK;oBACZ,WAAW,EAAE,KAAK,CAAC,YAAY,IAAI,IAAI,CAAC,kBAAkB;oBAC1D,QAAQ,EAAE;wBACR,EAAE,EAAE,KAAK,CAAC,WAAW;wBACrB,MAAM,EAAE,KAAK,CAAC,eAAe;wBAC7B,KAAK,EAAE,KAAK,CAAC,cAAc;wBAC3B,WAAW,EAAE,KAAK,CAAC,qBAAqB,IAAI,IAAI,CAAC,kBAAkB;qBACpE;oBACD,QAAQ,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS;iBAC1E,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,aAAa,CAAC,KAAa,EAAE,OAAuB,EAAE,WAAoB;QAC9E,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC;YAC1B,OAAO,OAAO,CAAC,CAAC,kCAAkC;QACpD,CAAC;QAED,IAAI,CAAC;YACH,OAAO,MAAM,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,OAAO,EAAE,WAAW,CAAC,CAAC;QACjE,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,qBAAqB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;YAC9F,OAAO,OAAO,CAAC,CAAC,+CAA+C;QACjE,CAAC;IACH,CAAC;IAED;;OAEG;IACK,oBAAoB;QAC1B,MAAM,OAAO,GAAa,EAAE,CAAC;QAE7B,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACvB,OAAO,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;QACrC,CAAC;QACD,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACvB,OAAO,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAChC,CAAC;QACD,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACvB,OAAO,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;QACtC,CAAC;QAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,KAAK,CAAC,kCAAkC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,0DAA0D,CAAC,CAAC;QAClI,CAAC;IACH,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,QAAgB;QACpC,IAAI,CAAC;YACH,OAAO,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QAC9B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,6BAA6B,QAAQ,EAAE,CAAC,CAAC;YACtD,OAAO,SAAS,CAAC;QACnB,CAAC;IACH,CAAC;IAED;;OAEG;IACH,OAAO;QACL,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,YAAY,CAAC,CAAC;IACzE,CAAC;IAED;;OAEG;IACH,SAAS;QAOP,OAAO;YACL,gBAAgB,EAAE,IAAI,CAAC,YAAY,KAAK,IAAI;YAC5C,iBAAiB,EAAE,IAAI,CAAC,eAAe,KAAK,IAAI;YAChD,eAAe,EAAE,IAAI,CAAC,YAAY,KAAK,IAAI;YAC3C,qBAAqB,EAAE,IAAI,CAAC,YAAY,KAAK,IAAI;YACjD,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE;SACxB,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;QACzB,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;QAC5B,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;QACzB,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;QACzB,IAAI,CAAC,kBAAkB,GAAG,MAAM,CAAC;IACnC,CAAC;CACF;AAED;;GAEG;AACH,MAAM,OAAO,qBAAqB;IAChC;;OAEG;IACH,MAAM,CAAC,MAAM,CACX,OAAsB,EACtB,YAAiB,EACjB,YAAiB,EACjB,QAAyB,EACzB,qBAA6B,MAAM;QAEnC,MAAM,WAAW,GAAG,IAAI,yBAAyB,EAAE,CAAC;QACpD,WAAW,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;QACtC,WAAW,CAAC,eAAe,CAAC,YAAY,CAAC,CAAC;QAC1C,WAAW,CAAC,qBAAqB,CAAC,YAAY,CAAC,CAAC;QAChD,WAAW,CAAC,qBAAqB,CAAC,kBAAkB,CAAC,CAAC;QAEtD,IAAI,QAAQ,EAAE,CAAC;YACb,WAAW,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAC1C,CAAC;QAED,OAAO,WAAW,CAAC;IACrB,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,WAAW;QAChB,OAAO,IAAI,yBAAyB,EAAE,CAAC;IACzC,CAAC;CACF"}
@@ -0,0 +1,104 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ */
5
+ import { IndexManager } from '../index-manager.js';
6
+ import { DatabaseConnection } from './db.js';
7
+ import type { SearchResult, SearchOptions } from './types.js';
8
+ import type { EmbedFunction, RerankFunction } from './interfaces.js';
9
+ /**
10
+ * Search engine that provides semantic search capabilities
11
+ * Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
12
+ * Uses explicit dependency injection for clean architecture
13
+ */
14
+ export declare class SearchEngine {
15
+ private embedFn;
16
+ private indexManager;
17
+ private db;
18
+ private rerankFn?;
19
+ /**
20
+ * Creates a new SearchEngine with explicit dependency injection
21
+ *
22
+ * DEPENDENCY INJECTION PATTERN:
23
+ * This constructor requires all dependencies to be explicitly provided, enabling:
24
+ * - Clean separation between core logic and implementation-specific components
25
+ * - Support for different embedding models (text-only, multimodal, custom)
26
+ * - Testability through mock injection
27
+ * - Future extensibility without core changes
28
+ *
29
+ * @param embedFn - Function to embed queries into vectors
30
+ * - Signature: (query: string, contentType?: string) => Promise<EmbeddingResult>
31
+ * - Examples:
32
+ * - Text: const embedFn = (query) => textEmbedder.embedSingle(query)
33
+ * - Multimodal: const embedFn = (query, type) => type === 'image' ? clipEmbedder.embedImage(query) : clipEmbedder.embedText(query)
34
+ * - Custom: const embedFn = (query) => customModel.embed(query)
35
+ *
36
+ * @param indexManager - Vector index manager for similarity search
37
+ * - Handles vector storage and retrieval operations
38
+ * - Works with any embedding dimensions (384, 512, 768, etc.)
39
+ * - Example: new IndexManager('./index.bin')
40
+ *
41
+ * @param db - Database connection for metadata retrieval
42
+ * - Provides access to document and chunk metadata
43
+ * - Supports different content types through metadata fields
44
+ * - Example: await openDatabase('./db.sqlite')
45
+ *
46
+ * @param rerankFn - Optional function to rerank search results
47
+ * - Signature: (query: string, results: SearchResult[], contentType?: string) => Promise<SearchResult[]>
48
+ * - Examples:
49
+ * - Text: const rerankFn = (query, results) => textReranker.rerank(query, results)
50
+ * - Custom: const rerankFn = (query, results) => customReranker.rerank(query, results)
51
+ * - Disabled: undefined (no reranking)
52
+ *
53
+ * USAGE EXAMPLES:
54
+ * ```typescript
55
+ * // Text-only search engine
56
+ * const textEmbedFn = await createTextEmbedder();
57
+ * const textRerankFn = await createTextReranker();
58
+ * const indexManager = new IndexManager('./index.bin');
59
+ * const db = await openDatabase('./db.sqlite');
60
+ * const search = new SearchEngine(textEmbedFn, indexManager, db, textRerankFn);
61
+ *
62
+ * // Search engine without reranking
63
+ * const search = new SearchEngine(textEmbedFn, indexManager, db);
64
+ *
65
+ * // Custom embedding implementation
66
+ * const customEmbedFn = async (query) => ({
67
+ * embedding_id: generateId(),
68
+ * vector: await myCustomModel.embed(query)
69
+ * });
70
+ * const search = new SearchEngine(customEmbedFn, indexManager, db);
71
+ * ```
72
+ */
73
+ constructor(embedFn: EmbedFunction, indexManager: IndexManager, db: DatabaseConnection, rerankFn?: RerankFunction | undefined);
74
+ /**
75
+ * Perform semantic search on the indexed documents
76
+ * Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
77
+ * @param query - Search query string
78
+ * @param options - Search options including top_k and rerank settings
79
+ * @returns Promise resolving to array of search results
80
+ */
81
+ search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
82
+ /**
83
+ * Format search results with proper structure
84
+ * @param chunks - Database chunks with metadata
85
+ * @param distances - Similarity distances from vector search
86
+ * @param embeddingIds - Embedding IDs in search result order
87
+ * @returns Formatted search results
88
+ */
89
+ private formatSearchResults;
90
+ /**
91
+ * Get search engine statistics
92
+ * @returns Object with current search engine stats
93
+ */
94
+ getStats(): Promise<{
95
+ totalChunks: number;
96
+ indexSize: number;
97
+ rerankingEnabled: boolean;
98
+ }>;
99
+ /**
100
+ * Clean up resources - explicit cleanup method
101
+ */
102
+ cleanup(): Promise<void>;
103
+ }
104
+ //# sourceMappingURL=search.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../../src/core/search.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACnD,OAAO,EAAE,kBAAkB,EAA2B,MAAM,SAAS,CAAC;AACtE,OAAO,KAAK,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAC9D,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAGrE;;;;GAIG;AACH,qBAAa,YAAY;IAwDrB,OAAO,CAAC,OAAO;IACf,OAAO,CAAC,YAAY;IACpB,OAAO,CAAC,EAAE;IACV,OAAO,CAAC,QAAQ,CAAC;IA1DnB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OAqDG;gBAEO,OAAO,EAAE,aAAa,EACtB,YAAY,EAAE,YAAY,EAC1B,EAAE,EAAE,kBAAkB,EACtB,QAAQ,CAAC,EAAE,cAAc,YAAA;IAcnC;;;;;;OAMG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAuEjF;;;;;;OAMG;IACH,OAAO,CAAC,mBAAmB;IAwC3B;;;OAGG;IACG,QAAQ,IAAI,OAAO,CAAC;QACxB,WAAW,EAAE,MAAM,CAAC;QACpB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,EAAE,OAAO,CAAC;KAC3B,CAAC;IASF;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAQ/B"}
@@ -0,0 +1,218 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ */
5
+ import { getChunksByEmbeddingIds } from './db.js';
6
+ import { config } from './config.js';
7
+ /**
8
+ * Search engine that provides semantic search capabilities
9
+ * Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
10
+ * Uses explicit dependency injection for clean architecture
11
+ */
12
+ export class SearchEngine {
13
+ embedFn;
14
+ indexManager;
15
+ db;
16
+ rerankFn;
17
+ /**
18
+ * Creates a new SearchEngine with explicit dependency injection
19
+ *
20
+ * DEPENDENCY INJECTION PATTERN:
21
+ * This constructor requires all dependencies to be explicitly provided, enabling:
22
+ * - Clean separation between core logic and implementation-specific components
23
+ * - Support for different embedding models (text-only, multimodal, custom)
24
+ * - Testability through mock injection
25
+ * - Future extensibility without core changes
26
+ *
27
+ * @param embedFn - Function to embed queries into vectors
28
+ * - Signature: (query: string, contentType?: string) => Promise<EmbeddingResult>
29
+ * - Examples:
30
+ * - Text: const embedFn = (query) => textEmbedder.embedSingle(query)
31
+ * - Multimodal: const embedFn = (query, type) => type === 'image' ? clipEmbedder.embedImage(query) : clipEmbedder.embedText(query)
32
+ * - Custom: const embedFn = (query) => customModel.embed(query)
33
+ *
34
+ * @param indexManager - Vector index manager for similarity search
35
+ * - Handles vector storage and retrieval operations
36
+ * - Works with any embedding dimensions (384, 512, 768, etc.)
37
+ * - Example: new IndexManager('./index.bin')
38
+ *
39
+ * @param db - Database connection for metadata retrieval
40
+ * - Provides access to document and chunk metadata
41
+ * - Supports different content types through metadata fields
42
+ * - Example: await openDatabase('./db.sqlite')
43
+ *
44
+ * @param rerankFn - Optional function to rerank search results
45
+ * - Signature: (query: string, results: SearchResult[], contentType?: string) => Promise<SearchResult[]>
46
+ * - Examples:
47
+ * - Text: const rerankFn = (query, results) => textReranker.rerank(query, results)
48
+ * - Custom: const rerankFn = (query, results) => customReranker.rerank(query, results)
49
+ * - Disabled: undefined (no reranking)
50
+ *
51
+ * USAGE EXAMPLES:
52
+ * ```typescript
53
+ * // Text-only search engine
54
+ * const textEmbedFn = await createTextEmbedder();
55
+ * const textRerankFn = await createTextReranker();
56
+ * const indexManager = new IndexManager('./index.bin');
57
+ * const db = await openDatabase('./db.sqlite');
58
+ * const search = new SearchEngine(textEmbedFn, indexManager, db, textRerankFn);
59
+ *
60
+ * // Search engine without reranking
61
+ * const search = new SearchEngine(textEmbedFn, indexManager, db);
62
+ *
63
+ * // Custom embedding implementation
64
+ * const customEmbedFn = async (query) => ({
65
+ * embedding_id: generateId(),
66
+ * vector: await myCustomModel.embed(query)
67
+ * });
68
+ * const search = new SearchEngine(customEmbedFn, indexManager, db);
69
+ * ```
70
+ */
71
+ constructor(embedFn, indexManager, db, rerankFn) {
72
+ this.embedFn = embedFn;
73
+ this.indexManager = indexManager;
74
+ this.db = db;
75
+ this.rerankFn = rerankFn;
76
+ // Validate required dependencies
77
+ if (!embedFn || typeof embedFn !== 'function') {
78
+ throw new Error('embedFn must be a valid function');
79
+ }
80
+ if (!indexManager) {
81
+ throw new Error('indexManager is required');
82
+ }
83
+ if (!db) {
84
+ throw new Error('db connection is required');
85
+ }
86
+ }
87
+ /**
88
+ * Perform semantic search on the indexed documents
89
+ * Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
90
+ * @param query - Search query string
91
+ * @param options - Search options including top_k and rerank settings
92
+ * @returns Promise resolving to array of search results
93
+ */
94
+ async search(query, options = {}) {
95
+ if (!query || query.trim().length === 0) {
96
+ return [];
97
+ }
98
+ const startTime = performance.now();
99
+ const topK = options.top_k || config.top_k || 10;
100
+ const shouldRerank = options.rerank !== undefined ? options.rerank : (this.rerankFn !== undefined);
101
+ try {
102
+ // Step 1: Build query embedding using injected embed function
103
+ const embeddingStartTime = performance.now();
104
+ const queryEmbedding = await this.embedFn(query);
105
+ const embeddingTime = performance.now() - embeddingStartTime;
106
+ // Step 2: Search using IndexManager (which handles hash mapping properly)
107
+ const searchStartTime = performance.now();
108
+ let searchResult;
109
+ try {
110
+ searchResult = this.indexManager.search(queryEmbedding.vector, topK);
111
+ }
112
+ catch (error) {
113
+ if (error instanceof Error && error.message.includes('No embedding ID found for hash')) {
114
+ console.warn(`Hash mapping issue detected: ${error.message}`);
115
+ console.warn('This may indicate index/database synchronization issues. Consider running: raglite rebuild');
116
+ return [];
117
+ }
118
+ throw error;
119
+ }
120
+ const vectorSearchTime = performance.now() - searchStartTime;
121
+ if (searchResult.embeddingIds.length === 0) {
122
+ const totalTime = performance.now() - startTime;
123
+ console.log(`No similar documents found (${totalTime.toFixed(2)}ms total)`);
124
+ return [];
125
+ }
126
+ // Step 3: Retrieve chunks from database using embedding IDs
127
+ const retrievalStartTime = performance.now();
128
+ const chunks = await getChunksByEmbeddingIds(this.db, searchResult.embeddingIds);
129
+ const retrievalTime = performance.now() - retrievalStartTime;
130
+ // Step 4: Format results as JSON with text, score, and document metadata
131
+ let results = this.formatSearchResults(chunks, searchResult.distances, searchResult.embeddingIds);
132
+ // Step 5: Optional reranking with injected rerank function
133
+ let rerankTime = 0;
134
+ if (shouldRerank && this.rerankFn && results.length > 1) {
135
+ try {
136
+ const rerankStartTime = performance.now();
137
+ results = await this.rerankFn(query, results);
138
+ rerankTime = performance.now() - rerankStartTime;
139
+ }
140
+ catch (error) {
141
+ // Fallback to vector search results and log the error
142
+ console.warn(`Reranking failed, using vector search results: ${error instanceof Error ? error.message : 'Unknown error'}`);
143
+ }
144
+ }
145
+ const totalTime = performance.now() - startTime;
146
+ // Measure latency without premature optimization - just log for monitoring
147
+ console.log(`Search completed: ${results.length} results in ${totalTime.toFixed(2)}ms ` +
148
+ `(embed: ${embeddingTime.toFixed(2)}ms, vector: ${vectorSearchTime.toFixed(2)}ms, ` +
149
+ `retrieval: ${retrievalTime.toFixed(2)}ms${rerankTime > 0 ? `, rerank: ${rerankTime.toFixed(2)}ms` : ''})`);
150
+ return results;
151
+ }
152
+ catch (error) {
153
+ throw new Error(`Search failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
154
+ }
155
+ }
156
+ /**
157
+ * Format search results with proper structure
158
+ * @param chunks - Database chunks with metadata
159
+ * @param distances - Similarity distances from vector search
160
+ * @param embeddingIds - Embedding IDs in search result order
161
+ * @returns Formatted search results
162
+ */
163
+ formatSearchResults(chunks, distances, embeddingIds) {
164
+ const results = [];
165
+ // Create a map for quick chunk lookup by embedding_id
166
+ const chunkMap = new Map();
167
+ chunks.forEach(chunk => {
168
+ chunkMap.set(chunk.embedding_id, chunk);
169
+ });
170
+ // Build results in the order of search results
171
+ for (let i = 0; i < embeddingIds.length; i++) {
172
+ const embeddingId = embeddingIds[i];
173
+ const chunk = chunkMap.get(embeddingId);
174
+ if (chunk) {
175
+ // Convert cosine distance to similarity score (1 - distance)
176
+ // hnswlib-wasm returns cosine distance, we want similarity
177
+ const score = Math.max(0, 1 - distances[i]);
178
+ results.push({
179
+ content: chunk.content,
180
+ score: score,
181
+ contentType: chunk.content_type || 'text',
182
+ document: {
183
+ id: chunk.document_id,
184
+ source: chunk.document_source,
185
+ title: chunk.document_title,
186
+ contentType: chunk.document_content_type || 'text'
187
+ }
188
+ });
189
+ }
190
+ }
191
+ return results;
192
+ }
193
+ /**
194
+ * Get search engine statistics
195
+ * @returns Object with current search engine stats
196
+ */
197
+ async getStats() {
198
+ const indexStats = await this.indexManager.getStats();
199
+ return {
200
+ totalChunks: indexStats.totalVectors,
201
+ indexSize: indexStats.totalVectors,
202
+ rerankingEnabled: this.rerankFn !== undefined
203
+ };
204
+ }
205
+ /**
206
+ * Clean up resources - explicit cleanup method
207
+ */
208
+ async cleanup() {
209
+ try {
210
+ await this.db.close();
211
+ await this.indexManager.close();
212
+ }
213
+ catch (error) {
214
+ console.error('Error during SearchEngine cleanup:', error instanceof Error ? error.message : String(error));
215
+ }
216
+ }
217
+ }
218
+ //# sourceMappingURL=search.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"search.js","sourceRoot":"","sources":["../../src/core/search.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,EAAsB,uBAAuB,EAAE,MAAM,SAAS,CAAC;AAGtE,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC;;;;GAIG;AACH,MAAM,OAAO,YAAY;IAwDb;IACA;IACA;IACA;IA1DV;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OAqDG;IACH,YACU,OAAsB,EACtB,YAA0B,EAC1B,EAAsB,EACtB,QAAyB;QAHzB,YAAO,GAAP,OAAO,CAAe;QACtB,iBAAY,GAAZ,YAAY,CAAc;QAC1B,OAAE,GAAF,EAAE,CAAoB;QACtB,aAAQ,GAAR,QAAQ,CAAiB;QAEjC,iCAAiC;QACjC,IAAI,CAAC,OAAO,IAAI,OAAO,OAAO,KAAK,UAAU,EAAE,CAAC;YAC9C,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAC;QACtD,CAAC;QACD,IAAI,CAAC,YAAY,EAAE,CAAC;YAClB,MAAM,IAAI,KAAK,CAAC,0BAA0B,CAAC,CAAC;QAC9C,CAAC;QACD,IAAI,CAAC,EAAE,EAAE,CAAC;YACR,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,UAAyB,EAAE;QACrD,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxC,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QACpC,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC;QACjD,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,KAAK,SAAS,CAAC,CAAC;QAEnG,IAAI,CAAC;YACH,8DAA8D;YAC9D,MAAM,kBAAkB,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YAC7C,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;YACjD,MAAM,aAAa,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,kBAAkB,CAAC;YAE7D,0EAA0E;YAC1E,MAAM,eAAe,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YAC1C,IAAI,YAAY,CAAC;YACjB,IAAI,CAAC;gBACH,YAAY,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,cAAc,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YACvE,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,IAAI,KAAK,YAAY,KAAK,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,gCAAgC,CAAC,EAAE,CAAC;oBACvF,OAAO,CAAC,IAAI,CAAC,gCAAgC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;oBAC9D,OAAO,CAAC,IAAI,CAAC,4FAA4F,CAAC,CAAC;oBAC3G,OAAO,EAAE,CAAC;gBACZ,CAAC;gBACD,MAAM,KAAK,CAAC;YACd,CAAC;YACD,MAAM,gBAAgB,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,eAAe,CAAC;YAE7D,IAAI,YAAY,CAAC,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC3C,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;gBAChD,OAAO,CAAC,GAAG,CAAC,+BAA+B,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;gBAC5E,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,4DAA4D;YAC5D,MAAM,kBAAkB,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YAC7C,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,IAAI,CAAC,EAAE,EAAE,YAAY,CAAC,YAAY,CAAC,CAAC;YACjF,MAAM,aAAa,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,kBAAkB,CAAC;YAE7D,yEAAyE;YACzE,IAAI,OAAO,GAAG,IAAI,CAAC,mBAAmB,CAAC,MAAM,EAAE,YAAY,CAAC,SAAS,EAAE,YAAY,CAAC,YAAY,CAAC,CAAC;YAElG,2DAA2D;YAC3D,IAAI,UAAU,GAAG,CAAC,CAAC;YACnB,IAAI,YAAY,IAAI,IAAI,CAAC,QAAQ,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxD,IAAI,CAAC;oBACH,MAAM,eAAe,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;oBAC1C,OAAO,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;oBAC9C,UAAU,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,eAAe,CAAC;gBACnD,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,sDAAsD;oBACtD,OAAO,CAAC,IAAI,CAAC,kDAAkD,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;gBAC7H,CAAC;YACH,CAAC;YAED,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YAEhD,2EAA2E;YAC3E,OAAO,CAAC,GAAG,CAAC,qBAAqB,OAAO,CAAC,MAAM,eAAe,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;gBACrF,WAAW,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,gBAAgB,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM;gBACnF,cAAc,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,aAAa,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;YAE9G,OAAO,OAAO,CAAC;QAEjB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,kBAAkB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;QAChG,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACK,mBAAmB,CACzB,MAAa,EACb,SAAmB,EACnB,YAAsB;QAEtB,MAAM,OAAO,GAAmB,EAAE,CAAC;QAEnC,sDAAsD;QACtD,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAE,CAAC;QAC3B,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YACrB,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC;QAC1C,CAAC,CAAC,CAAC;QAEH,+CAA+C;QAC/C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC7C,MAAM,WAAW,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;YACpC,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;YAExC,IAAI,KAAK,EAAE,CAAC;gBACV,6DAA6D;gBAC7D,2DAA2D;gBAC3D,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;gBAE5C,OAAO,CAAC,IAAI,CAAC;oBACX,OAAO,EAAE,KAAK,CAAC,OAAO;oBACtB,KAAK,EAAE,KAAK;oBACZ,WAAW,EAAE,KAAK,CAAC,YAAY,IAAI,MAAM;oBACzC,QAAQ,EAAE;wBACR,EAAE,EAAE,KAAK,CAAC,WAAW;wBACrB,MAAM,EAAE,KAAK,CAAC,eAAe;wBAC7B,KAAK,EAAE,KAAK,CAAC,cAAc;wBAC3B,WAAW,EAAE,KAAK,CAAC,qBAAqB,IAAI,MAAM;qBACnD;iBACF,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,QAAQ;QAKZ,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,QAAQ,EAAE,CAAC;QACtD,OAAO;YACL,WAAW,EAAE,UAAU,CAAC,YAAY;YACpC,SAAS,EAAE,UAAU,CAAC,YAAY;YAClC,gBAAgB,EAAE,IAAI,CAAC,QAAQ,KAAK,SAAS;SAC9C,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC;YACtB,MAAM,IAAI,CAAC,YAAY,CAAC,KAAK,EAAE,CAAC;QAClC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,oCAAoC,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QAC9G,CAAC;IACH,CAAC;CACF"}