rag-lite-ts 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. package/README.md +94 -65
  2. package/dist/cli/indexer.d.ts.map +1 -1
  3. package/dist/cli/indexer.js +78 -50
  4. package/dist/cli/indexer.js.map +1 -1
  5. package/dist/cli/search.d.ts.map +1 -1
  6. package/dist/cli/search.js +13 -30
  7. package/dist/cli/search.js.map +1 -1
  8. package/dist/cli.js +2 -2
  9. package/dist/cli.js.map +1 -1
  10. package/dist/config.d.ts +34 -73
  11. package/dist/config.d.ts.map +1 -1
  12. package/dist/config.js +50 -255
  13. package/dist/config.js.map +1 -1
  14. package/dist/core/adapters.d.ts +93 -0
  15. package/dist/core/adapters.d.ts.map +1 -0
  16. package/dist/core/adapters.js +139 -0
  17. package/dist/core/adapters.js.map +1 -0
  18. package/dist/core/chunker.d.ts +117 -0
  19. package/dist/core/chunker.d.ts.map +1 -0
  20. package/dist/core/chunker.js +73 -0
  21. package/dist/core/chunker.js.map +1 -0
  22. package/dist/core/config.d.ts +102 -0
  23. package/dist/core/config.d.ts.map +1 -0
  24. package/dist/core/config.js +240 -0
  25. package/dist/core/config.js.map +1 -0
  26. package/dist/{db.d.ts → core/db.d.ts} +25 -9
  27. package/dist/core/db.d.ts.map +1 -0
  28. package/dist/{db.js → core/db.js} +86 -16
  29. package/dist/core/db.js.map +1 -0
  30. package/dist/{error-handler.d.ts → core/error-handler.d.ts} +23 -2
  31. package/dist/core/error-handler.d.ts.map +1 -0
  32. package/dist/{error-handler.js → core/error-handler.js} +51 -8
  33. package/dist/core/error-handler.js.map +1 -0
  34. package/dist/core/index.d.ts +57 -0
  35. package/dist/core/index.d.ts.map +1 -0
  36. package/dist/core/index.js +66 -0
  37. package/dist/core/index.js.map +1 -0
  38. package/dist/core/ingestion.d.ts +143 -0
  39. package/dist/core/ingestion.d.ts.map +1 -0
  40. package/dist/core/ingestion.js +347 -0
  41. package/dist/core/ingestion.js.map +1 -0
  42. package/dist/core/interfaces.d.ts +408 -0
  43. package/dist/core/interfaces.d.ts.map +1 -0
  44. package/dist/core/interfaces.js +106 -0
  45. package/dist/core/interfaces.js.map +1 -0
  46. package/dist/{path-manager.d.ts → core/path-manager.d.ts} +5 -0
  47. package/dist/core/path-manager.d.ts.map +1 -0
  48. package/dist/{path-manager.js → core/path-manager.js} +5 -0
  49. package/dist/core/path-manager.js.map +1 -0
  50. package/dist/core/search-example.d.ts +25 -0
  51. package/dist/core/search-example.d.ts.map +1 -0
  52. package/dist/core/search-example.js +138 -0
  53. package/dist/core/search-example.js.map +1 -0
  54. package/dist/core/search-pipeline-example.d.ts +21 -0
  55. package/dist/core/search-pipeline-example.d.ts.map +1 -0
  56. package/dist/core/search-pipeline-example.js +188 -0
  57. package/dist/core/search-pipeline-example.js.map +1 -0
  58. package/dist/core/search-pipeline.d.ts +111 -0
  59. package/dist/core/search-pipeline.d.ts.map +1 -0
  60. package/dist/core/search-pipeline.js +287 -0
  61. package/dist/core/search-pipeline.js.map +1 -0
  62. package/dist/core/search.d.ts +104 -0
  63. package/dist/core/search.d.ts.map +1 -0
  64. package/dist/core/search.js +218 -0
  65. package/dist/core/search.js.map +1 -0
  66. package/dist/core/types.d.ts +63 -0
  67. package/dist/core/types.d.ts.map +1 -0
  68. package/dist/core/types.js +6 -0
  69. package/dist/core/types.js.map +1 -0
  70. package/dist/{vector-index.d.ts → core/vector-index.d.ts} +4 -0
  71. package/dist/core/vector-index.d.ts.map +1 -0
  72. package/dist/{vector-index.js → core/vector-index.js} +19 -0
  73. package/dist/core/vector-index.js.map +1 -0
  74. package/dist/dom-polyfills.d.ts +6 -0
  75. package/dist/dom-polyfills.d.ts.map +1 -0
  76. package/dist/dom-polyfills.js +40 -0
  77. package/dist/dom-polyfills.js.map +1 -0
  78. package/dist/examples/clean-api-examples.d.ts +44 -0
  79. package/dist/examples/clean-api-examples.d.ts.map +1 -0
  80. package/dist/examples/clean-api-examples.js +206 -0
  81. package/dist/examples/clean-api-examples.js.map +1 -0
  82. package/dist/factories/index.d.ts +43 -0
  83. package/dist/factories/index.d.ts.map +1 -0
  84. package/dist/factories/index.js +44 -0
  85. package/dist/factories/index.js.map +1 -0
  86. package/dist/factories/text-factory.d.ts +466 -0
  87. package/dist/factories/text-factory.d.ts.map +1 -0
  88. package/dist/factories/text-factory.js +719 -0
  89. package/dist/factories/text-factory.js.map +1 -0
  90. package/dist/file-processor.d.ts +2 -2
  91. package/dist/file-processor.d.ts.map +1 -1
  92. package/dist/file-processor.js +3 -3
  93. package/dist/file-processor.js.map +1 -1
  94. package/dist/index-manager.d.ts +3 -2
  95. package/dist/index-manager.d.ts.map +1 -1
  96. package/dist/index-manager.js +13 -11
  97. package/dist/index-manager.js.map +1 -1
  98. package/dist/index.d.ts +63 -8
  99. package/dist/index.d.ts.map +1 -1
  100. package/dist/index.js +91 -16
  101. package/dist/index.js.map +1 -1
  102. package/dist/indexer.js +1 -1
  103. package/dist/indexer.js.map +1 -1
  104. package/dist/ingestion.d.ts +30 -156
  105. package/dist/ingestion.d.ts.map +1 -1
  106. package/dist/ingestion.js +58 -675
  107. package/dist/ingestion.js.map +1 -1
  108. package/dist/mcp-server.js +86 -55
  109. package/dist/mcp-server.js.map +1 -1
  110. package/dist/preprocess.js +1 -1
  111. package/dist/preprocess.js.map +1 -1
  112. package/dist/search-standalone.js +1 -1
  113. package/dist/search-standalone.js.map +1 -1
  114. package/dist/search.d.ts +32 -76
  115. package/dist/search.d.ts.map +1 -1
  116. package/dist/search.js +80 -428
  117. package/dist/search.js.map +1 -1
  118. package/dist/text/chunker.d.ts +32 -0
  119. package/dist/text/chunker.d.ts.map +1 -0
  120. package/dist/{chunker.js → text/chunker.js} +98 -75
  121. package/dist/text/chunker.js.map +1 -0
  122. package/dist/{embedder.d.ts → text/embedder.d.ts} +22 -1
  123. package/dist/text/embedder.d.ts.map +1 -0
  124. package/dist/{embedder.js → text/embedder.js} +71 -4
  125. package/dist/text/embedder.js.map +1 -0
  126. package/dist/text/index.d.ts +7 -0
  127. package/dist/text/index.d.ts.map +1 -0
  128. package/dist/text/index.js +8 -0
  129. package/dist/text/index.js.map +1 -0
  130. package/dist/text/preprocessors/index.d.ts +17 -0
  131. package/dist/text/preprocessors/index.d.ts.map +1 -0
  132. package/dist/text/preprocessors/index.js +38 -0
  133. package/dist/text/preprocessors/index.js.map +1 -0
  134. package/dist/text/preprocessors/mdx.d.ts +25 -0
  135. package/dist/text/preprocessors/mdx.d.ts.map +1 -0
  136. package/dist/text/preprocessors/mdx.js +101 -0
  137. package/dist/text/preprocessors/mdx.js.map +1 -0
  138. package/dist/text/preprocessors/mermaid.d.ts +68 -0
  139. package/dist/text/preprocessors/mermaid.d.ts.map +1 -0
  140. package/dist/text/preprocessors/mermaid.js +330 -0
  141. package/dist/text/preprocessors/mermaid.js.map +1 -0
  142. package/dist/text/preprocessors/registry.d.ts +56 -0
  143. package/dist/text/preprocessors/registry.d.ts.map +1 -0
  144. package/dist/text/preprocessors/registry.js +180 -0
  145. package/dist/text/preprocessors/registry.js.map +1 -0
  146. package/dist/text/reranker.d.ts +60 -0
  147. package/dist/text/reranker.d.ts.map +1 -0
  148. package/dist/{reranker.js → text/reranker.js} +134 -19
  149. package/dist/text/reranker.js.map +1 -0
  150. package/dist/{tokenizer.d.ts → text/tokenizer.d.ts} +1 -0
  151. package/dist/text/tokenizer.d.ts.map +1 -0
  152. package/dist/{tokenizer.js → text/tokenizer.js} +7 -2
  153. package/dist/text/tokenizer.js.map +1 -0
  154. package/dist/types.d.ts +1 -1
  155. package/dist/types.d.ts.map +1 -1
  156. package/package.json +2 -2
  157. package/dist/chunker.d.ts +0 -47
  158. package/dist/chunker.d.ts.map +0 -1
  159. package/dist/chunker.js.map +0 -1
  160. package/dist/db.d.ts.map +0 -1
  161. package/dist/db.js.map +0 -1
  162. package/dist/embedder.d.ts.map +0 -1
  163. package/dist/embedder.js.map +0 -1
  164. package/dist/error-handler.d.ts.map +0 -1
  165. package/dist/error-handler.js.map +0 -1
  166. package/dist/path-manager.d.ts.map +0 -1
  167. package/dist/path-manager.js.map +0 -1
  168. package/dist/reranker.d.ts +0 -40
  169. package/dist/reranker.d.ts.map +0 -1
  170. package/dist/reranker.js.map +0 -1
  171. package/dist/resource-manager-demo.d.ts +0 -7
  172. package/dist/resource-manager-demo.d.ts.map +0 -1
  173. package/dist/resource-manager-demo.js +0 -52
  174. package/dist/resource-manager-demo.js.map +0 -1
  175. package/dist/resource-manager.d.ts +0 -129
  176. package/dist/resource-manager.d.ts.map +0 -1
  177. package/dist/resource-manager.js +0 -389
  178. package/dist/resource-manager.js.map +0 -1
  179. package/dist/tokenizer.d.ts.map +0 -1
  180. package/dist/tokenizer.js.map +0 -1
  181. package/dist/vector-index.d.ts.map +0 -1
  182. package/dist/vector-index.js.map +0 -1
@@ -0,0 +1,408 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ *
5
+ * This module defines the core interfaces for dependency injection in the search engine.
6
+ * These interfaces enable:
7
+ * 1. Different embedding implementations (text-only, multimodal, etc.)
8
+ * 2. Different reranking strategies (cross-encoder, neural, etc.)
9
+ * 3. Support for multiple content types (text, image, etc.)
10
+ * 4. Different embedding dimensions (384, 512, 768, etc.)
11
+ *
12
+ * DEPENDENCY INJECTION PATTERNS:
13
+ *
14
+ * 1. Direct Function Injection (Advanced Users):
15
+ * ```typescript
16
+ * // Text-only implementation
17
+ * const textEmbedFn: EmbedFunction = async (query) => textEmbedder.embedSingle(query);
18
+ * const textRerankFn: RerankFunction = async (query, results) => textReranker.rerank(query, results);
19
+ * const search = new SearchEngine(textEmbedFn, indexManager, db, textRerankFn);
20
+ *
21
+ * // Custom implementation
22
+ * const customEmbedFn: EmbedFunction = async (query) => ({
23
+ * embedding_id: generateId(),
24
+ * vector: await myCustomModel.embed(query)
25
+ * });
26
+ * const search = new SearchEngine(customEmbedFn, indexManager, db);
27
+ * ```
28
+ *
29
+ * 2. Factory Pattern (Recommended for Common Use Cases):
30
+ * ```typescript
31
+ * // Using factory for convenience
32
+ * const search = await TextSearchFactory.create('./index.bin', './db.sqlite', {
33
+ * embeddingModel: 'all-MiniLM-L6-v2',
34
+ * enableReranking: true
35
+ * });
36
+ *
37
+ * // Factory with custom configuration
38
+ * const ingestion = await TextIngestionFactory.create('./db.sqlite', './index.bin', {
39
+ * chunkSize: 300,
40
+ * chunkOverlap: 50
41
+ * });
42
+ * ```
43
+ *
44
+ * 3. Interface-Based Implementation (Plugin Architecture):
45
+ * ```typescript
46
+ * // Implement interfaces for custom behavior
47
+ * class CustomEmbeddingInterface implements EmbeddingQueryInterface {
48
+ * async embedQuery(query: string): Promise<EmbeddingResult> {
49
+ * return { embedding_id: generateId(), vector: await this.model.embed(query) };
50
+ * }
51
+ * supportedContentTypes = ['text', 'code'];
52
+ * embeddingDimensions = 384;
53
+ * modelIdentifier = 'custom-model-v1';
54
+ * }
55
+ *
56
+ * const customInterface = new CustomEmbeddingInterface();
57
+ * const embedFn = customInterface.embedQuery.bind(customInterface);
58
+ * const search = new SearchEngine(embedFn, indexManager, db);
59
+ * ```
60
+ *
61
+ * 4. Multimodal Implementation (Future):
62
+ * ```typescript
63
+ * // Multimodal embedding function
64
+ * const multimodalEmbedFn: EmbedFunction = async (query, contentType) => {
65
+ * if (contentType === 'image') return clipEmbedder.embedImage(query);
66
+ * return clipEmbedder.embedText(query);
67
+ * };
68
+ *
69
+ * // Multimodal reranking function
70
+ * const multimodalRerankFn: RerankFunction = async (query, results, contentType) => {
71
+ * return multimodalReranker.rerank(query, results, contentType);
72
+ * };
73
+ *
74
+ * const search = new SearchEngine(multimodalEmbedFn, indexManager, db, multimodalRerankFn);
75
+ * ```
76
+ */
77
+ import type { SearchResult, EmbeddingResult } from './types.js';
78
+ /**
79
+ * Core embedding function interface for dependency injection
80
+ * Supports different content types and embedding dimensions (384, 512, 768, etc.)
81
+ */
82
+ export type EmbedFunction = (query: string, contentType?: string) => Promise<EmbeddingResult>;
83
+ /**
84
+ * Core reranking function interface for dependency injection
85
+ * Supports different content types and query-result pairs
86
+ */
87
+ export type RerankFunction = (query: string, results: SearchResult[], contentType?: string) => Promise<SearchResult[]>;
88
+ /**
89
+ * Interface for embedding query operations
90
+ * Enables dependency injection of different embedding implementations
91
+ *
92
+ * This interface provides a standardized way to interact with different
93
+ * embedding models while maintaining compatibility checking and metadata.
94
+ * Implementations can be text-only, multimodal, or custom models.
95
+ *
96
+ * @example
97
+ * ```typescript
98
+ * // Text embedding implementation
99
+ * class TextEmbeddingInterface implements EmbeddingQueryInterface {
100
+ * embedQuery = async (query: string) => textEmbedder.embedSingle(query);
101
+ * supportedContentTypes = ['text', 'code'];
102
+ * embeddingDimensions = 384;
103
+ * modelIdentifier = 'all-MiniLM-L6-v2';
104
+ * }
105
+ *
106
+ * // Use with SearchEngine
107
+ * const embeddingInterface = new TextEmbeddingInterface();
108
+ * const embedFn = embeddingInterface.embedQuery.bind(embeddingInterface);
109
+ * const search = new SearchEngine(embedFn, indexManager, db);
110
+ * ```
111
+ */
112
+ export interface EmbeddingQueryInterface {
113
+ /**
114
+ * Function to embed a query string into a vector
115
+ * Should handle the specific content types listed in supportedContentTypes
116
+ */
117
+ embedQuery: EmbedFunction;
118
+ /**
119
+ * Content types this embedder supports (e.g., ['text'], ['text', 'image'])
120
+ * Used for validation and routing of different content types
121
+ */
122
+ supportedContentTypes: string[];
123
+ /**
124
+ * Dimensions of the embedding vectors this embedder produces
125
+ * Must match the vector index dimensions for compatibility
126
+ */
127
+ embeddingDimensions: number;
128
+ /**
129
+ * Model name or identifier for compatibility checking
130
+ * Used to ensure consistent model usage across sessions
131
+ */
132
+ modelIdentifier: string;
133
+ }
134
+ /**
135
+ * Interface for reranking operations
136
+ * Enables dependency injection of different reranking implementations
137
+ *
138
+ * This interface provides a standardized way to interact with different
139
+ * reranking models. Reranking improves search quality by re-scoring
140
+ * initial search results using more sophisticated models.
141
+ *
142
+ * @example
143
+ * ```typescript
144
+ * // Text reranking implementation
145
+ * class TextRerankingInterface implements RerankingInterface {
146
+ * rerankResults = async (query: string, results: SearchResult[]) =>
147
+ * textReranker.rerank(query, results);
148
+ * supportedContentTypes = ['text'];
149
+ * isEnabled = true;
150
+ * modelIdentifier = 'cross-encoder/ms-marco-MiniLM-L-6-v2';
151
+ * }
152
+ *
153
+ * // Use with SearchEngine
154
+ * const rerankingInterface = new TextRerankingInterface();
155
+ * const rerankFn = rerankingInterface.rerankResults.bind(rerankingInterface);
156
+ * const search = new SearchEngine(embedFn, indexManager, db, rerankFn);
157
+ * ```
158
+ */
159
+ export interface RerankingInterface {
160
+ /**
161
+ * Function to rerank search results
162
+ * Takes a query and initial results, returns reordered results with updated scores
163
+ */
164
+ rerankResults: RerankFunction;
165
+ /**
166
+ * Content types this reranker supports
167
+ * Should match or be a subset of the embedding interface content types
168
+ */
169
+ supportedContentTypes: string[];
170
+ /**
171
+ * Whether reranking is currently enabled and available
172
+ * Can be used to gracefully disable reranking if models fail to load
173
+ */
174
+ isEnabled: boolean;
175
+ /**
176
+ * Model name or identifier for the reranking model
177
+ * Used for logging and compatibility tracking
178
+ */
179
+ modelIdentifier: string;
180
+ }
181
+ /**
182
+ * Configuration interface for search engine dependency injection
183
+ * Allows different implementations to be plugged in with optional settings
184
+ *
185
+ * This interface provides a way to configure SearchEngine instances with
186
+ * different embedding and reranking implementations, along with default
187
+ * behaviors for content type handling and initialization.
188
+ *
189
+ * @example
190
+ * ```typescript
191
+ * // Configuration with custom interfaces
192
+ * const config: SearchEngineConfig = {
193
+ * embeddingInterface: new CustomEmbeddingInterface(),
194
+ * rerankingInterface: new CustomRerankingInterface(),
195
+ * defaultContentType: 'text',
196
+ * autoInitialize: true
197
+ * };
198
+ *
199
+ * // Use configuration (implementation-specific)
200
+ * const search = new ConfigurableSearchEngine(config);
201
+ * ```
202
+ */
203
+ export interface SearchEngineConfig {
204
+ /**
205
+ * Optional embedding interface for dependency injection
206
+ * If provided, will be used instead of direct function injection
207
+ */
208
+ embeddingInterface?: EmbeddingQueryInterface;
209
+ /**
210
+ * Optional reranking interface for dependency injection
211
+ * If provided, will be used for result reranking
212
+ */
213
+ rerankingInterface?: RerankingInterface;
214
+ /**
215
+ * Default content type for queries when not specified
216
+ * Used when content type cannot be inferred from context
217
+ */
218
+ defaultContentType?: string;
219
+ /**
220
+ * Whether to enable automatic initialization
221
+ * When true, models and resources are loaded lazily on first use
222
+ */
223
+ autoInitialize?: boolean;
224
+ }
225
+ /**
226
+ * Interface for content type strategies
227
+ * Enables different handling for different content types
228
+ */
229
+ export interface ContentTypeStrategy {
230
+ /**
231
+ * Content type this strategy handles
232
+ */
233
+ contentType: string;
234
+ /**
235
+ * Whether this strategy can handle the given content type
236
+ */
237
+ canHandle(contentType: string): boolean;
238
+ /**
239
+ * Process query for this content type before embedding
240
+ */
241
+ preprocessQuery?(query: string): string;
242
+ /**
243
+ * Post-process search results for this content type
244
+ */
245
+ postprocessResults?(results: SearchResult[]): SearchResult[];
246
+ }
247
+ /**
248
+ * Generic interface for model-agnostic operations
249
+ * Base interface that all model-specific implementations should extend
250
+ */
251
+ export interface ModelAgnosticInterface {
252
+ /**
253
+ * Initialize the interface (load models, set up resources, etc.)
254
+ */
255
+ initialize(): Promise<void>;
256
+ /**
257
+ * Clean up resources
258
+ */
259
+ cleanup(): Promise<void>;
260
+ /**
261
+ * Check if the interface is ready for use
262
+ */
263
+ isReady(): boolean;
264
+ /**
265
+ * Get interface metadata
266
+ */
267
+ getMetadata(): {
268
+ name: string;
269
+ version: string;
270
+ supportedContentTypes: string[];
271
+ };
272
+ }
273
+ /**
274
+ * Extended embedding interface that includes model-agnostic operations
275
+ */
276
+ export interface ExtendedEmbeddingInterface extends EmbeddingQueryInterface, ModelAgnosticInterface {
277
+ /**
278
+ * Batch embed multiple queries for efficiency
279
+ */
280
+ embedBatch?(queries: string[], contentType?: string): Promise<EmbeddingResult[]>;
281
+ }
282
+ /**
283
+ * Extended reranking interface that includes model-agnostic operations
284
+ */
285
+ export interface ExtendedRerankingInterface extends RerankingInterface, ModelAgnosticInterface {
286
+ /**
287
+ * Batch rerank multiple query-result pairs for efficiency
288
+ */
289
+ rerankBatch?(queries: string[], resultSets: SearchResult[][], contentType?: string): Promise<SearchResult[][]>;
290
+ }
291
+ /**
292
+ * Interface for search pipeline coordination
293
+ * Defines the core search pipeline steps that are model-agnostic
294
+ */
295
+ export interface SearchPipelineInterface {
296
+ /**
297
+ * Step 1: Process and embed the query
298
+ */
299
+ embedQuery(query: string, contentType?: string): Promise<EmbeddingResult>;
300
+ /**
301
+ * Step 2: Perform vector search
302
+ */
303
+ vectorSearch(queryVector: Float32Array, topK: number): Promise<{
304
+ embeddingIds: string[];
305
+ distances: number[];
306
+ }>;
307
+ /**
308
+ * Step 3: Retrieve metadata from database
309
+ */
310
+ retrieveMetadata(embeddingIds: string[]): Promise<any[]>;
311
+ /**
312
+ * Step 4: Format initial results
313
+ */
314
+ formatResults(chunks: any[], distances: number[], embeddingIds: string[]): SearchResult[];
315
+ /**
316
+ * Step 5: Optional reranking
317
+ */
318
+ rerankResults?(query: string, results: SearchResult[], contentType?: string): Promise<SearchResult[]>;
319
+ }
320
+ /**
321
+ * Factory interface for creating embedding and reranking functions
322
+ * Enables clean dependency injection patterns and simplifies common use cases
323
+ *
324
+ * FACTORY PATTERN BENEFITS:
325
+ * - Handles complex initialization logic (model loading, configuration)
326
+ * - Provides simple API for common use cases
327
+ * - Maintains access to underlying dependency injection architecture
328
+ * - Supports different content types and embedding models
329
+ *
330
+ * USAGE EXAMPLES:
331
+ * ```typescript
332
+ * // Text factory implementation
333
+ * class TextSearchDependencyFactory implements SearchDependencyFactory {
334
+ * createEmbedFunction(contentType = 'text'): EmbedFunction {
335
+ * const embedder = new TextEmbeddingEngine();
336
+ * return async (query) => embedder.embedSingle(query);
337
+ * }
338
+ *
339
+ * createRerankFunction(contentType = 'text'): RerankFunction {
340
+ * const reranker = new CrossEncoderReranker();
341
+ * return async (query, results) => reranker.rerank(query, results);
342
+ * }
343
+ * }
344
+ *
345
+ * // Factory usage in practice
346
+ * const factory = new TextSearchDependencyFactory();
347
+ * const embedFn = factory.createEmbedFunction();
348
+ * const rerankFn = factory.createRerankFunction();
349
+ * const search = new SearchEngine(embedFn, indexManager, db, rerankFn);
350
+ *
351
+ * // Multimodal factory (future)
352
+ * class MultimodalSearchDependencyFactory implements SearchDependencyFactory {
353
+ * createEmbedFunction(contentType = 'text'): EmbedFunction {
354
+ * const clipModel = new CLIPEmbeddingEngine();
355
+ * return async (query, type) => {
356
+ * if (type === 'image') return clipModel.embedImage(query);
357
+ * return clipModel.embedText(query);
358
+ * };
359
+ * }
360
+ * }
361
+ * ```
362
+ */
363
+ export interface SearchDependencyFactory {
364
+ /**
365
+ * Create an embedding function for the specified content type
366
+ * @param contentType - Content type to create embedder for ('text', 'image', etc.)
367
+ * @returns EmbedFunction that can handle the specified content type
368
+ */
369
+ createEmbedFunction(contentType?: string): EmbedFunction;
370
+ /**
371
+ * Create a reranking function for the specified content type
372
+ * @param contentType - Content type to create reranker for ('text', 'image', etc.)
373
+ * @returns RerankFunction for the content type, or undefined if not supported
374
+ */
375
+ createRerankFunction(contentType?: string): RerankFunction | undefined;
376
+ /**
377
+ * Get supported content types for this factory
378
+ * @returns Array of supported content type strings
379
+ */
380
+ getSupportedContentTypes(): string[];
381
+ /**
382
+ * Get embedding dimensions for compatibility checking
383
+ * @returns Number of dimensions in embedding vectors produced by this factory
384
+ */
385
+ getEmbeddingDimensions(): number;
386
+ }
387
+ /**
388
+ * Validation utilities for interface compatibility
389
+ */
390
+ export declare class InterfaceValidator {
391
+ /**
392
+ * Validate that an EmbedFunction is compatible with expected interface
393
+ */
394
+ static validateEmbedFunction(embedFn: EmbedFunction): boolean;
395
+ /**
396
+ * Validate that a RerankFunction is compatible with expected interface
397
+ */
398
+ static validateRerankFunction(rerankFn: RerankFunction): boolean;
399
+ /**
400
+ * Validate embedding dimensions compatibility
401
+ */
402
+ static validateEmbeddingDimensions(expected: number, actual: number): boolean;
403
+ /**
404
+ * Validate content type support
405
+ */
406
+ static validateContentTypeSupport(supportedTypes: string[], requestedType: string): boolean;
407
+ }
408
+ //# sourceMappingURL=interfaces.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"interfaces.d.ts","sourceRoot":"","sources":["../../src/core/interfaces.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2EG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAEhE;;;GAGG;AACH,MAAM,MAAM,aAAa,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,eAAe,CAAC,CAAC;AAE9F;;;GAGG;AACH,MAAM,MAAM,cAAc,GAAG,CAC3B,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,YAAY,EAAE,EACvB,WAAW,CAAC,EAAE,MAAM,KACjB,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;AAE7B;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,WAAW,uBAAuB;IACtC;;;OAGG;IACH,UAAU,EAAE,aAAa,CAAC;IAE1B;;;OAGG;IACH,qBAAqB,EAAE,MAAM,EAAE,CAAC;IAEhC;;;OAGG;IACH,mBAAmB,EAAE,MAAM,CAAC;IAE5B;;;OAGG;IACH,eAAe,EAAE,MAAM,CAAC;CACzB;AAED;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,MAAM,WAAW,kBAAkB;IACjC;;;OAGG;IACH,aAAa,EAAE,cAAc,CAAC;IAE9B;;;OAGG;IACH,qBAAqB,EAAE,MAAM,EAAE,CAAC;IAEhC;;;OAGG;IACH,SAAS,EAAE,OAAO,CAAC;IAEnB;;;OAGG;IACH,eAAe,EAAE,MAAM,CAAC;CACzB;AAED;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,MAAM,WAAW,kBAAkB;IACjC;;;OAGG;IACH,kBAAkB,CAAC,EAAE,uBAAuB,CAAC;IAE7C;;;OAGG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;IAExC;;;OAGG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAE5B;;;OAGG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED;;;GAGG;AACH,MAAM,WAAW,mBAAmB;IAClC;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IAEpB;;OAEG;IACH,SAAS,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC;IAExC;;OAEG;IACH,eAAe,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAAC;IAExC;;OAEG;IACH,kBAAkB,CAAC,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,YAAY,EAAE,CAAC;CAC9D;AAED;;;GAGG;AACH,MAAM,WAAW,sBAAsB;IACrC;;OAEG;IACH,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IAE5B;;OAEG;IACH,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IAEzB;;OAEG;IACH,OAAO,IAAI,OAAO,CAAC;IAEnB;;OAEG;IACH,WAAW,IAAI;QACb,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,EAAE,MAAM,CAAC;QAChB,qBAAqB,EAAE,MAAM,EAAE,CAAC;KACjC,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,0BAA2B,SAAQ,uBAAuB,EAAE,sBAAsB;IACjG;;OAEG;IACH,UAAU,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE,WAAW,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAAC;CAClF;AAED;;GAEG;AACH,MAAM,WAAW,0BAA2B,SAAQ,kBAAkB,EAAE,sBAAsB;IAC5F;;OAEG;IACH,WAAW,CAAC,CACV,OAAO,EAAE,MAAM,EAAE,EACjB,UAAU,EAAE,YAAY,EAAE,EAAE,EAC5B,WAAW,CAAC,EAAE,MAAM,GACnB,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;CAC9B;AAED;;;GAGG;AACH,MAAM,WAAW,uBAAuB;IACtC;;OAEG;IACH,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;IAE1E;;OAEG;IACH,YAAY,CAAC,WAAW,EAAE,YAAY,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC;QAC7D,YAAY,EAAE,MAAM,EAAE,CAAC;QACvB,SAAS,EAAE,MAAM,EAAE,CAAC;KACrB,CAAC,CAAC;IAEH;;OAEG;IACH,gBAAgB,CAAC,YAAY,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;IAEzD;;OAEG;IACH,aAAa,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,SAAS,EAAE,MAAM,EAAE,EAAE,YAAY,EAAE,MAAM,EAAE,GAAG,YAAY,EAAE,CAAC;IAE1F;;OAEG;IACH,aAAa,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,EAAE,WAAW,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;CACvG;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AACH,MAAM,WAAW,uBAAuB;IACtC;;;;OAIG;IACH,mBAAmB,CAAC,WAAW,CAAC,EAAE,MAAM,GAAG,aAAa,CAAC;IAEzD;;;;OAIG;IACH,oBAAoB,CAAC,WAAW,CAAC,EAAE,MAAM,GAAG,cAAc,GAAG,SAAS,CAAC;IAEvE;;;OAGG;IACH,wBAAwB,IAAI,MAAM,EAAE,CAAC;IAErC;;;OAGG;IACH,sBAAsB,IAAI,MAAM,CAAC;CAClC;AAED;;GAEG;AACH,qBAAa,kBAAkB;IAC7B;;OAEG;IACH,MAAM,CAAC,qBAAqB,CAAC,OAAO,EAAE,aAAa,GAAG,OAAO;IAI7D;;OAEG;IACH,MAAM,CAAC,sBAAsB,CAAC,QAAQ,EAAE,cAAc,GAAG,OAAO;IAIhE;;OAEG;IACH,MAAM,CAAC,2BAA2B,CAChC,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,GACb,OAAO;IAIV;;OAEG;IACH,MAAM,CAAC,0BAA0B,CAC/B,cAAc,EAAE,MAAM,EAAE,EACxB,aAAa,EAAE,MAAM,GACpB,OAAO;CAGX"}
@@ -0,0 +1,106 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ *
5
+ * This module defines the core interfaces for dependency injection in the search engine.
6
+ * These interfaces enable:
7
+ * 1. Different embedding implementations (text-only, multimodal, etc.)
8
+ * 2. Different reranking strategies (cross-encoder, neural, etc.)
9
+ * 3. Support for multiple content types (text, image, etc.)
10
+ * 4. Different embedding dimensions (384, 512, 768, etc.)
11
+ *
12
+ * DEPENDENCY INJECTION PATTERNS:
13
+ *
14
+ * 1. Direct Function Injection (Advanced Users):
15
+ * ```typescript
16
+ * // Text-only implementation
17
+ * const textEmbedFn: EmbedFunction = async (query) => textEmbedder.embedSingle(query);
18
+ * const textRerankFn: RerankFunction = async (query, results) => textReranker.rerank(query, results);
19
+ * const search = new SearchEngine(textEmbedFn, indexManager, db, textRerankFn);
20
+ *
21
+ * // Custom implementation
22
+ * const customEmbedFn: EmbedFunction = async (query) => ({
23
+ * embedding_id: generateId(),
24
+ * vector: await myCustomModel.embed(query)
25
+ * });
26
+ * const search = new SearchEngine(customEmbedFn, indexManager, db);
27
+ * ```
28
+ *
29
+ * 2. Factory Pattern (Recommended for Common Use Cases):
30
+ * ```typescript
31
+ * // Using factory for convenience
32
+ * const search = await TextSearchFactory.create('./index.bin', './db.sqlite', {
33
+ * embeddingModel: 'all-MiniLM-L6-v2',
34
+ * enableReranking: true
35
+ * });
36
+ *
37
+ * // Factory with custom configuration
38
+ * const ingestion = await TextIngestionFactory.create('./db.sqlite', './index.bin', {
39
+ * chunkSize: 300,
40
+ * chunkOverlap: 50
41
+ * });
42
+ * ```
43
+ *
44
+ * 3. Interface-Based Implementation (Plugin Architecture):
45
+ * ```typescript
46
+ * // Implement interfaces for custom behavior
47
+ * class CustomEmbeddingInterface implements EmbeddingQueryInterface {
48
+ * async embedQuery(query: string): Promise<EmbeddingResult> {
49
+ * return { embedding_id: generateId(), vector: await this.model.embed(query) };
50
+ * }
51
+ * supportedContentTypes = ['text', 'code'];
52
+ * embeddingDimensions = 384;
53
+ * modelIdentifier = 'custom-model-v1';
54
+ * }
55
+ *
56
+ * const customInterface = new CustomEmbeddingInterface();
57
+ * const embedFn = customInterface.embedQuery.bind(customInterface);
58
+ * const search = new SearchEngine(embedFn, indexManager, db);
59
+ * ```
60
+ *
61
+ * 4. Multimodal Implementation (Future):
62
+ * ```typescript
63
+ * // Multimodal embedding function
64
+ * const multimodalEmbedFn: EmbedFunction = async (query, contentType) => {
65
+ * if (contentType === 'image') return clipEmbedder.embedImage(query);
66
+ * return clipEmbedder.embedText(query);
67
+ * };
68
+ *
69
+ * // Multimodal reranking function
70
+ * const multimodalRerankFn: RerankFunction = async (query, results, contentType) => {
71
+ * return multimodalReranker.rerank(query, results, contentType);
72
+ * };
73
+ *
74
+ * const search = new SearchEngine(multimodalEmbedFn, indexManager, db, multimodalRerankFn);
75
+ * ```
76
+ */
77
+ /**
78
+ * Validation utilities for interface compatibility
79
+ */
80
+ export class InterfaceValidator {
81
+ /**
82
+ * Validate that an EmbedFunction is compatible with expected interface
83
+ */
84
+ static validateEmbedFunction(embedFn) {
85
+ return typeof embedFn === 'function';
86
+ }
87
+ /**
88
+ * Validate that a RerankFunction is compatible with expected interface
89
+ */
90
+ static validateRerankFunction(rerankFn) {
91
+ return typeof rerankFn === 'function';
92
+ }
93
+ /**
94
+ * Validate embedding dimensions compatibility
95
+ */
96
+ static validateEmbeddingDimensions(expected, actual) {
97
+ return expected === actual;
98
+ }
99
+ /**
100
+ * Validate content type support
101
+ */
102
+ static validateContentTypeSupport(supportedTypes, requestedType) {
103
+ return supportedTypes.includes(requestedType);
104
+ }
105
+ }
106
+ //# sourceMappingURL=interfaces.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"interfaces.js","sourceRoot":"","sources":["../../src/core/interfaces.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2EG;AAkWH;;GAEG;AACH,MAAM,OAAO,kBAAkB;IAC7B;;OAEG;IACH,MAAM,CAAC,qBAAqB,CAAC,OAAsB;QACjD,OAAO,OAAO,OAAO,KAAK,UAAU,CAAC;IACvC,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,sBAAsB,CAAC,QAAwB;QACpD,OAAO,OAAO,QAAQ,KAAK,UAAU,CAAC;IACxC,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,2BAA2B,CAChC,QAAgB,EAChB,MAAc;QAEd,OAAO,QAAQ,KAAK,MAAM,CAAC;IAC7B,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,0BAA0B,CAC/B,cAAwB,EACxB,aAAqB;QAErB,OAAO,cAAc,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;IAChD,CAAC;CACF"}
@@ -1,5 +1,10 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ */
1
5
  /**
2
6
  * Manages document path storage and resolution strategies
7
+ * Model-agnostic - works with any content type (text, image, etc.)
3
8
  */
4
9
  export declare class DocumentPathManager {
5
10
  private strategy;
@@ -0,0 +1 @@
1
+ {"version":3,"file":"path-manager.d.ts","sourceRoot":"","sources":["../../src/core/path-manager.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH;;;GAGG;AACH,qBAAa,mBAAmB;IAExB,OAAO,CAAC,QAAQ;IAChB,OAAO,CAAC,QAAQ;gBADR,QAAQ,EAAE,UAAU,GAAG,UAAU,EACjC,QAAQ,EAAE,MAAM;IAG5B;;;;OAIG;IACH,aAAa,CAAC,YAAY,EAAE,MAAM,GAAG,MAAM;IAS3C;;;;OAIG;IACH,cAAc,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM;IAS3C;;OAEG;IACH,WAAW,IAAI,UAAU,GAAG,UAAU;IAItC;;OAEG;IACH,WAAW,IAAI,MAAM;IAIrB;;;;OAIG;IACH,YAAY,CAAC,WAAW,EAAE,MAAM,GAAG,mBAAmB;IAItD;;;;;OAKG;IACH,YAAY,CAAC,WAAW,EAAE,UAAU,GAAG,UAAU,EAAE,WAAW,CAAC,EAAE,MAAM,GAAG,mBAAmB;CAGhG"}
@@ -1,6 +1,11 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ */
1
5
  import { relative, resolve, isAbsolute } from 'path';
2
6
  /**
3
7
  * Manages document path storage and resolution strategies
8
+ * Model-agnostic - works with any content type (text, image, etc.)
4
9
  */
5
10
  export class DocumentPathManager {
6
11
  strategy;
@@ -0,0 +1 @@
1
+ {"version":3,"file":"path-manager.js","sourceRoot":"","sources":["../../src/core/path-manager.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM,MAAM,CAAC;AAErD;;;GAGG;AACH,MAAM,OAAO,mBAAmB;IAEhB;IACA;IAFZ,YACY,QAAiC,EACjC,QAAgB;QADhB,aAAQ,GAAR,QAAQ,CAAyB;QACjC,aAAQ,GAAR,QAAQ,CAAQ;IACxB,CAAC;IAEL;;;;OAIG;IACH,aAAa,CAAC,YAAoB;QAC9B,IAAI,IAAI,CAAC,QAAQ,KAAK,UAAU,EAAE,CAAC;YAC/B,OAAO,YAAY,CAAC;QACxB,CAAC;QAED,qDAAqD;QACrD,OAAO,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IACjD,CAAC;IAED;;;;OAIG;IACH,cAAc,CAAC,WAAmB;QAC9B,IAAI,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;YAC1B,OAAO,WAAW,CAAC;QACvB,CAAC;QAED,0CAA0C;QAC1C,OAAO,OAAO,CAAC,IAAI,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC;IAC/C,CAAC;IAED;;OAEG;IACH,WAAW;QACP,OAAO,IAAI,CAAC,QAAQ,CAAC;IACzB,CAAC;IAED;;OAEG;IACH,WAAW;QACP,OAAO,IAAI,CAAC,QAAQ,CAAC;IACzB,CAAC;IAED;;;;OAIG;IACH,YAAY,CAAC,WAAmB;QAC5B,OAAO,IAAI,mBAAmB,CAAC,IAAI,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC;IAC/D,CAAC;IAED;;;;;OAKG;IACH,YAAY,CAAC,WAAoC,EAAE,WAAoB;QACnE,OAAO,IAAI,mBAAmB,CAAC,WAAW,EAAE,WAAW,IAAI,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC9E,CAAC;CACJ"}
@@ -0,0 +1,25 @@
1
+ /**
2
+ * CORE MODULE — Example usage of the new SearchEngine with dependency injection
3
+ * This file demonstrates how the refactored SearchEngine works with injected dependencies
4
+ */
5
+ import type { EmbedFunction, RerankFunction } from './interfaces.js';
6
+ /**
7
+ * Example embedding function that could be injected
8
+ * In practice, this would be created from a text embedder or multimodal embedder
9
+ */
10
+ declare const exampleEmbedFunction: EmbedFunction;
11
+ /**
12
+ * Example reranking function that could be injected
13
+ * In practice, this would be created from a cross-encoder reranker
14
+ */
15
+ declare const exampleRerankFunction: RerankFunction;
16
+ /**
17
+ * Example usage of the new SearchEngine with dependency injection
18
+ */
19
+ export declare function exampleUsage(): Promise<void>;
20
+ /**
21
+ * Example of how to create embedding and reranking functions from existing components
22
+ */
23
+ export declare function createAdapterExample(): void;
24
+ export { exampleEmbedFunction, exampleRerankFunction };
25
+ //# sourceMappingURL=search-example.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"search-example.d.ts","sourceRoot":"","sources":["../../src/core/search-example.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAIrE;;;GAGG;AACH,QAAA,MAAM,oBAAoB,EAAE,aAU3B,CAAC;AAEF;;;GAGG;AACH,QAAA,MAAM,qBAAqB,EAAE,cAS5B,CAAC;AAEF;;GAEG;AACH,wBAAsB,YAAY,kBA4CjC;AAED;;GAEG;AACH,wBAAgB,oBAAoB,SAoFnC;AAGD,OAAO,EAAE,oBAAoB,EAAE,qBAAqB,EAAE,CAAC"}