rag-lite-ts 1.0.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/README.md +606 -93
  2. package/dist/cli/indexer.js +192 -4
  3. package/dist/cli/search.js +50 -11
  4. package/dist/cli.js +183 -26
  5. package/dist/core/abstract-embedder.d.ts +125 -0
  6. package/dist/core/abstract-embedder.js +264 -0
  7. package/dist/core/actionable-error-messages.d.ts +60 -0
  8. package/dist/core/actionable-error-messages.js +397 -0
  9. package/dist/core/batch-processing-optimizer.d.ts +155 -0
  10. package/dist/core/batch-processing-optimizer.js +541 -0
  11. package/dist/core/chunker.d.ts +2 -0
  12. package/dist/core/cli-database-utils.d.ts +53 -0
  13. package/dist/core/cli-database-utils.js +239 -0
  14. package/dist/core/config.js +10 -3
  15. package/dist/core/content-errors.d.ts +111 -0
  16. package/dist/core/content-errors.js +362 -0
  17. package/dist/core/content-manager.d.ts +343 -0
  18. package/dist/core/content-manager.js +1504 -0
  19. package/dist/core/content-performance-optimizer.d.ts +150 -0
  20. package/dist/core/content-performance-optimizer.js +516 -0
  21. package/dist/core/content-resolver.d.ts +104 -0
  22. package/dist/core/content-resolver.js +285 -0
  23. package/dist/core/cross-modal-search.d.ts +164 -0
  24. package/dist/core/cross-modal-search.js +342 -0
  25. package/dist/core/database-connection-manager.d.ts +109 -0
  26. package/dist/core/database-connection-manager.js +304 -0
  27. package/dist/core/db.d.ts +141 -2
  28. package/dist/core/db.js +631 -89
  29. package/dist/core/embedder-factory.d.ts +176 -0
  30. package/dist/core/embedder-factory.js +338 -0
  31. package/dist/core/index.d.ts +3 -1
  32. package/dist/core/index.js +4 -1
  33. package/dist/core/ingestion.d.ts +85 -15
  34. package/dist/core/ingestion.js +510 -45
  35. package/dist/core/lazy-dependency-loader.d.ts +152 -0
  36. package/dist/core/lazy-dependency-loader.js +453 -0
  37. package/dist/core/mode-detection-service.d.ts +150 -0
  38. package/dist/core/mode-detection-service.js +565 -0
  39. package/dist/core/mode-model-validator.d.ts +92 -0
  40. package/dist/core/mode-model-validator.js +203 -0
  41. package/dist/core/model-registry.d.ts +120 -0
  42. package/dist/core/model-registry.js +415 -0
  43. package/dist/core/model-validator.d.ts +217 -0
  44. package/dist/core/model-validator.js +782 -0
  45. package/dist/core/polymorphic-search-factory.d.ts +154 -0
  46. package/dist/core/polymorphic-search-factory.js +344 -0
  47. package/dist/core/raglite-paths.d.ts +121 -0
  48. package/dist/core/raglite-paths.js +145 -0
  49. package/dist/core/reranking-config.d.ts +42 -0
  50. package/dist/core/reranking-config.js +156 -0
  51. package/dist/core/reranking-factory.d.ts +92 -0
  52. package/dist/core/reranking-factory.js +591 -0
  53. package/dist/core/reranking-strategies.d.ts +325 -0
  54. package/dist/core/reranking-strategies.js +720 -0
  55. package/dist/core/resource-cleanup.d.ts +163 -0
  56. package/dist/core/resource-cleanup.js +371 -0
  57. package/dist/core/resource-manager.d.ts +212 -0
  58. package/dist/core/resource-manager.js +564 -0
  59. package/dist/core/search.d.ts +28 -1
  60. package/dist/core/search.js +83 -5
  61. package/dist/core/streaming-operations.d.ts +145 -0
  62. package/dist/core/streaming-operations.js +409 -0
  63. package/dist/core/types.d.ts +3 -0
  64. package/dist/core/universal-embedder.d.ts +177 -0
  65. package/dist/core/universal-embedder.js +139 -0
  66. package/dist/core/validation-messages.d.ts +99 -0
  67. package/dist/core/validation-messages.js +334 -0
  68. package/dist/core/vector-index.js +7 -8
  69. package/dist/factories/index.d.ts +1 -1
  70. package/dist/factories/text-factory.d.ts +128 -34
  71. package/dist/factories/text-factory.js +346 -97
  72. package/dist/file-processor.d.ts +88 -2
  73. package/dist/file-processor.js +720 -17
  74. package/dist/index.d.ts +9 -0
  75. package/dist/index.js +11 -0
  76. package/dist/ingestion.d.ts +16 -0
  77. package/dist/ingestion.js +21 -0
  78. package/dist/mcp-server.d.ts +35 -3
  79. package/dist/mcp-server.js +1107 -31
  80. package/dist/multimodal/clip-embedder.d.ts +314 -0
  81. package/dist/multimodal/clip-embedder.js +945 -0
  82. package/dist/multimodal/index.d.ts +6 -0
  83. package/dist/multimodal/index.js +6 -0
  84. package/dist/run-error-recovery-tests.d.ts +7 -0
  85. package/dist/run-error-recovery-tests.js +101 -0
  86. package/dist/search.d.ts +26 -0
  87. package/dist/search.js +54 -1
  88. package/dist/test-utils.d.ts +8 -26
  89. package/dist/text/chunker.d.ts +1 -0
  90. package/dist/text/embedder.js +15 -8
  91. package/dist/text/index.d.ts +1 -0
  92. package/dist/text/index.js +1 -0
  93. package/dist/text/reranker.d.ts +1 -2
  94. package/dist/text/reranker.js +17 -47
  95. package/dist/text/sentence-transformer-embedder.d.ts +96 -0
  96. package/dist/text/sentence-transformer-embedder.js +340 -0
  97. package/dist/types.d.ts +39 -0
  98. package/dist/utils/vector-math.d.ts +31 -0
  99. package/dist/utils/vector-math.js +70 -0
  100. package/package.json +15 -3
  101. package/dist/api-errors.d.ts.map +0 -1
  102. package/dist/api-errors.js.map +0 -1
  103. package/dist/cli/indexer.d.ts.map +0 -1
  104. package/dist/cli/indexer.js.map +0 -1
  105. package/dist/cli/search.d.ts.map +0 -1
  106. package/dist/cli/search.js.map +0 -1
  107. package/dist/cli.d.ts.map +0 -1
  108. package/dist/cli.js.map +0 -1
  109. package/dist/config.d.ts.map +0 -1
  110. package/dist/config.js.map +0 -1
  111. package/dist/core/adapters.d.ts.map +0 -1
  112. package/dist/core/adapters.js.map +0 -1
  113. package/dist/core/chunker.d.ts.map +0 -1
  114. package/dist/core/chunker.js.map +0 -1
  115. package/dist/core/config.d.ts.map +0 -1
  116. package/dist/core/config.js.map +0 -1
  117. package/dist/core/db.d.ts.map +0 -1
  118. package/dist/core/db.js.map +0 -1
  119. package/dist/core/error-handler.d.ts.map +0 -1
  120. package/dist/core/error-handler.js.map +0 -1
  121. package/dist/core/index.d.ts.map +0 -1
  122. package/dist/core/index.js.map +0 -1
  123. package/dist/core/ingestion.d.ts.map +0 -1
  124. package/dist/core/ingestion.js.map +0 -1
  125. package/dist/core/interfaces.d.ts.map +0 -1
  126. package/dist/core/interfaces.js.map +0 -1
  127. package/dist/core/path-manager.d.ts.map +0 -1
  128. package/dist/core/path-manager.js.map +0 -1
  129. package/dist/core/search-example.d.ts +0 -25
  130. package/dist/core/search-example.d.ts.map +0 -1
  131. package/dist/core/search-example.js +0 -138
  132. package/dist/core/search-example.js.map +0 -1
  133. package/dist/core/search-pipeline-example.d.ts +0 -21
  134. package/dist/core/search-pipeline-example.d.ts.map +0 -1
  135. package/dist/core/search-pipeline-example.js +0 -188
  136. package/dist/core/search-pipeline-example.js.map +0 -1
  137. package/dist/core/search-pipeline.d.ts.map +0 -1
  138. package/dist/core/search-pipeline.js.map +0 -1
  139. package/dist/core/search.d.ts.map +0 -1
  140. package/dist/core/search.js.map +0 -1
  141. package/dist/core/types.d.ts.map +0 -1
  142. package/dist/core/types.js.map +0 -1
  143. package/dist/core/vector-index.d.ts.map +0 -1
  144. package/dist/core/vector-index.js.map +0 -1
  145. package/dist/dom-polyfills.d.ts.map +0 -1
  146. package/dist/dom-polyfills.js.map +0 -1
  147. package/dist/examples/clean-api-examples.d.ts +0 -44
  148. package/dist/examples/clean-api-examples.d.ts.map +0 -1
  149. package/dist/examples/clean-api-examples.js +0 -206
  150. package/dist/examples/clean-api-examples.js.map +0 -1
  151. package/dist/factories/index.d.ts.map +0 -1
  152. package/dist/factories/index.js.map +0 -1
  153. package/dist/factories/text-factory.d.ts.map +0 -1
  154. package/dist/factories/text-factory.js.map +0 -1
  155. package/dist/file-processor.d.ts.map +0 -1
  156. package/dist/file-processor.js.map +0 -1
  157. package/dist/index-manager.d.ts.map +0 -1
  158. package/dist/index-manager.js.map +0 -1
  159. package/dist/index.d.ts.map +0 -1
  160. package/dist/index.js.map +0 -1
  161. package/dist/indexer.d.ts.map +0 -1
  162. package/dist/indexer.js.map +0 -1
  163. package/dist/ingestion.d.ts.map +0 -1
  164. package/dist/ingestion.js.map +0 -1
  165. package/dist/mcp-server.d.ts.map +0 -1
  166. package/dist/mcp-server.js.map +0 -1
  167. package/dist/preprocess.d.ts.map +0 -1
  168. package/dist/preprocess.js.map +0 -1
  169. package/dist/preprocessors/index.d.ts.map +0 -1
  170. package/dist/preprocessors/index.js.map +0 -1
  171. package/dist/preprocessors/mdx.d.ts.map +0 -1
  172. package/dist/preprocessors/mdx.js.map +0 -1
  173. package/dist/preprocessors/mermaid.d.ts.map +0 -1
  174. package/dist/preprocessors/mermaid.js.map +0 -1
  175. package/dist/preprocessors/registry.d.ts.map +0 -1
  176. package/dist/preprocessors/registry.js.map +0 -1
  177. package/dist/search-standalone.d.ts.map +0 -1
  178. package/dist/search-standalone.js.map +0 -1
  179. package/dist/search.d.ts.map +0 -1
  180. package/dist/search.js.map +0 -1
  181. package/dist/test-utils.d.ts.map +0 -1
  182. package/dist/test-utils.js.map +0 -1
  183. package/dist/text/chunker.d.ts.map +0 -1
  184. package/dist/text/chunker.js.map +0 -1
  185. package/dist/text/embedder.d.ts.map +0 -1
  186. package/dist/text/embedder.js.map +0 -1
  187. package/dist/text/index.d.ts.map +0 -1
  188. package/dist/text/index.js.map +0 -1
  189. package/dist/text/preprocessors/index.d.ts.map +0 -1
  190. package/dist/text/preprocessors/index.js.map +0 -1
  191. package/dist/text/preprocessors/mdx.d.ts.map +0 -1
  192. package/dist/text/preprocessors/mdx.js.map +0 -1
  193. package/dist/text/preprocessors/mermaid.d.ts.map +0 -1
  194. package/dist/text/preprocessors/mermaid.js.map +0 -1
  195. package/dist/text/preprocessors/registry.d.ts.map +0 -1
  196. package/dist/text/preprocessors/registry.js.map +0 -1
  197. package/dist/text/reranker.d.ts.map +0 -1
  198. package/dist/text/reranker.js.map +0 -1
  199. package/dist/text/tokenizer.d.ts.map +0 -1
  200. package/dist/text/tokenizer.js.map +0 -1
  201. package/dist/types.d.ts.map +0 -1
  202. package/dist/types.js.map +0 -1
@@ -0,0 +1,720 @@
1
+ /**
2
+ * Cross-Encoder Reranking Strategy for Text Mode
3
+ *
4
+ * Adapts the existing CrossEncoderReranker to work with the new RerankingStrategy
5
+ * interface defined in the Chameleon Multimodal Architecture.
6
+ */
7
+ import { CrossEncoderReranker } from '../text/reranker.js';
8
+ /**
9
+ * Cross-Encoder Reranking Strategy Implementation
10
+ *
11
+ * Uses the existing CrossEncoderReranker from the text module to provide
12
+ * reranking functionality for text mode in the new architecture.
13
+ */
14
+ export class CrossEncoderRerankingStrategy {
15
+ name = 'cross-encoder';
16
+ supportedContentTypes = ['text'];
17
+ isEnabled = true;
18
+ reranker;
19
+ modelName;
20
+ initialized = false;
21
+ constructor(modelName) {
22
+ this.modelName = modelName;
23
+ this.reranker = new CrossEncoderReranker();
24
+ // Set custom model name if provided
25
+ if (modelName) {
26
+ this.reranker.modelName = modelName;
27
+ }
28
+ }
29
+ /**
30
+ * Initialize the reranker if not already done
31
+ */
32
+ async ensureInitialized() {
33
+ if (!this.initialized) {
34
+ try {
35
+ await this.reranker.loadModel();
36
+ this.initialized = true;
37
+ this.isEnabled = this.reranker.isLoaded();
38
+ if (!this.isEnabled) {
39
+ console.warn('Cross-encoder reranker failed to load, strategy disabled');
40
+ }
41
+ }
42
+ catch (error) {
43
+ console.warn(`Cross-encoder reranker initialization failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
44
+ this.isEnabled = false;
45
+ }
46
+ }
47
+ }
48
+ /**
49
+ * Rerank search results using cross-encoder model
50
+ */
51
+ rerank = async (query, results, contentType) => {
52
+ // If strategy is disabled, return results unchanged immediately
53
+ if (!this.isEnabled) {
54
+ return results;
55
+ }
56
+ // Validate content type
57
+ if (contentType && !this.supportedContentTypes.includes(contentType)) {
58
+ throw new Error(`Cross-encoder strategy does not support content type '${contentType}'. ` +
59
+ `Supported types: ${this.supportedContentTypes.join(', ')}`);
60
+ }
61
+ // Ensure reranker is initialized
62
+ await this.ensureInitialized();
63
+ // If reranker failed to initialize, return results unchanged
64
+ if (!this.isEnabled) {
65
+ console.warn('Cross-encoder reranker not enabled, returning results unchanged');
66
+ return results;
67
+ }
68
+ // Filter to only text content if mixed content types are present
69
+ const textResults = results.filter(result => !result.contentType || result.contentType === 'text');
70
+ if (textResults.length === 0) {
71
+ return results; // No text results to rerank
72
+ }
73
+ if (textResults.length !== results.length) {
74
+ console.warn(`Cross-encoder reranker filtering ${results.length - textResults.length} ` +
75
+ `non-text results from reranking`);
76
+ }
77
+ try {
78
+ // Use the existing reranker implementation
79
+ const rerankedTextResults = await this.reranker.rerank(query, textResults);
80
+ // If we filtered results, we need to merge back non-text results
81
+ if (textResults.length !== results.length) {
82
+ const nonTextResults = results.filter(result => result.contentType && result.contentType !== 'text');
83
+ // Append non-text results at the end with their original scores
84
+ return [...rerankedTextResults, ...nonTextResults];
85
+ }
86
+ return rerankedTextResults;
87
+ }
88
+ catch (error) {
89
+ console.warn(`Cross-encoder reranking failed: ${error instanceof Error ? error.message : 'Unknown error'}. ` +
90
+ `Returning original results.`);
91
+ return results;
92
+ }
93
+ };
94
+ /**
95
+ * Configure the reranking strategy
96
+ */
97
+ configure(config) {
98
+ if (config.modelName && typeof config.modelName === 'string') {
99
+ this.modelName = config.modelName;
100
+ // Reset initialization to use new model
101
+ this.initialized = false;
102
+ this.reranker = new CrossEncoderReranker();
103
+ this.reranker.modelName = config.modelName;
104
+ }
105
+ if (config.enabled !== undefined) {
106
+ this.isEnabled = Boolean(config.enabled);
107
+ }
108
+ }
109
+ /**
110
+ * Get metadata about this reranking strategy
111
+ */
112
+ getMetadata() {
113
+ return {
114
+ description: 'Cross-encoder reranking using transformer models for improved text relevance scoring',
115
+ requiredModels: [
116
+ 'Xenova/ms-marco-MiniLM-L-6-v2',
117
+ 'cross-encoder/ms-marco-MiniLM-L-6-v2',
118
+ 'cross-encoder/ms-marco-MiniLM-L-2-v2'
119
+ ],
120
+ configOptions: {
121
+ modelName: {
122
+ type: 'string',
123
+ description: 'Cross-encoder model name to use for reranking',
124
+ default: 'Xenova/ms-marco-MiniLM-L-6-v2'
125
+ },
126
+ enabled: {
127
+ type: 'boolean',
128
+ description: 'Enable or disable cross-encoder reranking',
129
+ default: true
130
+ }
131
+ }
132
+ };
133
+ }
134
+ /**
135
+ * Check if the strategy is ready to use
136
+ */
137
+ async isReady() {
138
+ await this.ensureInitialized();
139
+ return this.isEnabled && this.reranker.isLoaded();
140
+ }
141
+ /**
142
+ * Get the current model name being used
143
+ */
144
+ getModelName() {
145
+ return this.reranker.getModelName();
146
+ }
147
+ /**
148
+ * Clean up resources
149
+ */
150
+ async cleanup() {
151
+ // The existing CrossEncoderReranker doesn't have explicit cleanup
152
+ // but we can reset the initialization state
153
+ this.initialized = false;
154
+ }
155
+ }
156
+ /**
157
+ * Factory function to create a cross-encoder reranking strategy
158
+ *
159
+ * This provides a simple way to create the strategy without complex factory patterns,
160
+ * following the design principle of using simple functions over complex factories.
161
+ */
162
+ export function createCrossEncoderStrategy(modelName) {
163
+ return new CrossEncoderRerankingStrategy(modelName);
164
+ }
165
+ /**
166
+ * Text-Derived Reranking Strategy Implementation
167
+ *
168
+ * Converts images to text descriptions using image-to-text models, then applies
169
+ * cross-encoder reranking to the text descriptions. This enables multimodal
170
+ * content to be reranked using text-based reranking models.
171
+ */
172
+ export class TextDerivedRerankingStrategy {
173
+ name = 'text-derived';
174
+ supportedContentTypes = ['text', 'image'];
175
+ isEnabled = true;
176
+ crossEncoderReranker;
177
+ imageToTextModel = null;
178
+ imageToTextModelName = 'Xenova/vit-gpt2-image-captioning';
179
+ initialized = false;
180
+ constructor(imageToTextModelName, crossEncoderModelName) {
181
+ if (imageToTextModelName) {
182
+ this.imageToTextModelName = imageToTextModelName;
183
+ }
184
+ // Create the underlying cross-encoder strategy
185
+ this.crossEncoderReranker = new CrossEncoderRerankingStrategy(crossEncoderModelName);
186
+ }
187
+ /**
188
+ * Initialize the image-to-text model if not already done
189
+ */
190
+ async ensureInitialized() {
191
+ if (!this.initialized) {
192
+ try {
193
+ console.log(`Loading image-to-text model: ${this.imageToTextModelName}`);
194
+ // Set up polyfills for transformers.js
195
+ this.ensurePolyfills();
196
+ const { pipeline } = await import('@huggingface/transformers');
197
+ this.imageToTextModel = await pipeline('image-to-text', this.imageToTextModelName);
198
+ this.initialized = true;
199
+ console.log(`Image-to-text model loaded successfully: ${this.imageToTextModelName}`);
200
+ }
201
+ catch (error) {
202
+ console.warn(`Image-to-text model initialization failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
203
+ this.isEnabled = false;
204
+ }
205
+ }
206
+ }
207
+ /**
208
+ * Ensure DOM polyfills are set up for transformers.js
209
+ */
210
+ ensurePolyfills() {
211
+ if (typeof window === 'undefined' && typeof globalThis !== 'undefined') {
212
+ if (typeof globalThis.self === 'undefined') {
213
+ globalThis.self = globalThis;
214
+ }
215
+ if (typeof global.self === 'undefined') {
216
+ global.self = global;
217
+ }
218
+ }
219
+ }
220
+ /**
221
+ * Generate text description for an image
222
+ */
223
+ async generateImageDescription(imagePath) {
224
+ await this.ensureInitialized();
225
+ if (!this.imageToTextModel) {
226
+ throw new Error('Image-to-text model not loaded');
227
+ }
228
+ try {
229
+ const result = await this.imageToTextModel(imagePath);
230
+ // Handle different response formats from the pipeline
231
+ if (Array.isArray(result) && result.length > 0) {
232
+ return result[0].generated_text || result[0].text || String(result[0]);
233
+ }
234
+ else if (result && typeof result === 'object') {
235
+ return result.generated_text || result.text || String(result);
236
+ }
237
+ else {
238
+ return String(result);
239
+ }
240
+ }
241
+ catch (error) {
242
+ console.warn(`Failed to generate description for image ${imagePath}: ${error instanceof Error ? error.message : 'Unknown error'}`);
243
+ // Fallback to filename-based description
244
+ const filename = imagePath.split('/').pop() || imagePath;
245
+ return `Image file: ${filename}`;
246
+ }
247
+ }
248
+ /**
249
+ * Rerank search results using text-derived approach
250
+ */
251
+ rerank = async (query, results, contentType) => {
252
+ // If strategy is disabled, return results unchanged
253
+ if (!this.isEnabled) {
254
+ return results;
255
+ }
256
+ // Validate content type
257
+ if (contentType && !this.supportedContentTypes.includes(contentType)) {
258
+ throw new Error(`Text-derived strategy does not support content type '${contentType}'. ` +
259
+ `Supported types: ${this.supportedContentTypes.join(', ')}`);
260
+ }
261
+ // Ensure models are initialized
262
+ await this.ensureInitialized();
263
+ // If initialization failed, return results unchanged
264
+ if (!this.isEnabled) {
265
+ console.warn('Text-derived reranker not enabled, returning results unchanged');
266
+ return results;
267
+ }
268
+ try {
269
+ // Step 1: Convert images to text descriptions
270
+ const processedResults = await Promise.all(results.map(async (result) => {
271
+ if (result.contentType === 'image') {
272
+ // Generate text description for image
273
+ const description = await this.generateImageDescription(result.content);
274
+ return {
275
+ ...result,
276
+ content: description,
277
+ originalContent: result.content,
278
+ originalContentType: result.contentType,
279
+ metadata: {
280
+ ...result.metadata,
281
+ originalImagePath: result.content,
282
+ generatedDescription: description
283
+ }
284
+ };
285
+ }
286
+ return result;
287
+ }));
288
+ // Step 2: Use cross-encoder reranking on the text descriptions
289
+ const rerankedResults = await this.crossEncoderReranker.rerank(query, processedResults);
290
+ // Step 3: Restore original content for images
291
+ return rerankedResults.map(result => {
292
+ if (result.originalContent && result.originalContentType) {
293
+ return {
294
+ ...result,
295
+ content: result.originalContent,
296
+ contentType: result.originalContentType,
297
+ // Keep the generated description in metadata for reference
298
+ metadata: {
299
+ ...result.metadata,
300
+ generatedDescription: result.content
301
+ }
302
+ };
303
+ }
304
+ return result;
305
+ });
306
+ }
307
+ catch (error) {
308
+ console.warn(`Text-derived reranking failed: ${error instanceof Error ? error.message : 'Unknown error'}. ` +
309
+ `Returning original results.`);
310
+ return results;
311
+ }
312
+ };
313
+ /**
314
+ * Configure the reranking strategy
315
+ */
316
+ configure(config) {
317
+ if (config.imageToTextModel && typeof config.imageToTextModel === 'string') {
318
+ this.imageToTextModelName = config.imageToTextModel;
319
+ // Reset initialization to use new model
320
+ this.initialized = false;
321
+ this.imageToTextModel = null;
322
+ }
323
+ if (config.crossEncoderModel && typeof config.crossEncoderModel === 'string') {
324
+ this.crossEncoderReranker.configure({ modelName: config.crossEncoderModel });
325
+ }
326
+ if (config.enabled !== undefined) {
327
+ this.isEnabled = Boolean(config.enabled);
328
+ }
329
+ }
330
+ /**
331
+ * Get metadata about this reranking strategy
332
+ */
333
+ getMetadata() {
334
+ return {
335
+ description: 'Text-derived reranking that converts images to text descriptions then applies cross-encoder reranking',
336
+ requiredModels: [
337
+ 'Xenova/vit-gpt2-image-captioning', // Image-to-text model
338
+ 'Xenova/ms-marco-MiniLM-L-6-v2' // Cross-encoder model
339
+ ],
340
+ configOptions: {
341
+ imageToTextModel: {
342
+ type: 'string',
343
+ description: 'Image-to-text model name for generating descriptions',
344
+ default: 'Xenova/vit-gpt2-image-captioning'
345
+ },
346
+ crossEncoderModel: {
347
+ type: 'string',
348
+ description: 'Cross-encoder model name for text reranking',
349
+ default: 'Xenova/ms-marco-MiniLM-L-6-v2'
350
+ },
351
+ enabled: {
352
+ type: 'boolean',
353
+ description: 'Enable or disable text-derived reranking',
354
+ default: true
355
+ }
356
+ }
357
+ };
358
+ }
359
+ /**
360
+ * Check if the strategy is ready to use
361
+ */
362
+ async isReady() {
363
+ await this.ensureInitialized();
364
+ const crossEncoderReady = await this.crossEncoderReranker.isReady();
365
+ return this.isEnabled && this.imageToTextModel !== null && crossEncoderReady;
366
+ }
367
+ /**
368
+ * Get the current model names being used
369
+ */
370
+ getModelNames() {
371
+ return {
372
+ imageToText: this.imageToTextModelName,
373
+ crossEncoder: this.crossEncoderReranker.getModelName()
374
+ };
375
+ }
376
+ /**
377
+ * Clean up resources
378
+ */
379
+ async cleanup() {
380
+ this.initialized = false;
381
+ this.imageToTextModel = null;
382
+ await this.crossEncoderReranker.cleanup();
383
+ }
384
+ }
385
+ /**
386
+ * Factory function to create a text-derived reranking strategy
387
+ */
388
+ export function createTextDerivedStrategy(imageToTextModelName, crossEncoderModelName) {
389
+ return new TextDerivedRerankingStrategy(imageToTextModelName, crossEncoderModelName);
390
+ }
391
+ /**
392
+ * Create a RerankFunction using the text-derived strategy
393
+ */
394
+ export function createTextDerivedRerankFunction(imageToTextModelName, crossEncoderModelName) {
395
+ const strategy = createTextDerivedStrategy(imageToTextModelName, crossEncoderModelName);
396
+ return strategy.rerank;
397
+ }
398
+ /**
399
+ * Metadata-Based Reranking Strategy Implementation
400
+ *
401
+ * Reranks search results based on filename patterns, metadata, and content type
402
+ * information. This strategy is particularly useful for multimodal content where
403
+ * semantic similarity might not capture all relevant aspects.
404
+ */
405
+ export class MetadataRerankingStrategy {
406
+ name = 'metadata';
407
+ supportedContentTypes = ['text', 'image', 'pdf', 'docx'];
408
+ isEnabled = true;
409
+ config;
410
+ constructor(config) {
411
+ // Default configuration with reasonable weights and boost factors
412
+ const defaultConfig = {
413
+ weights: {
414
+ filename: 0.4,
415
+ contentType: 0.3,
416
+ metadata: 0.3
417
+ },
418
+ boostFactors: {
419
+ diagram: 1.5,
420
+ chart: 1.4,
421
+ graph: 1.4,
422
+ image: 1.2,
423
+ screenshot: 1.3,
424
+ figure: 1.3
425
+ },
426
+ keywordBoosts: {
427
+ // Technical terms
428
+ 'api': 1.2,
429
+ 'architecture': 1.3,
430
+ 'design': 1.2,
431
+ 'implementation': 1.2,
432
+ 'configuration': 1.2,
433
+ 'setup': 1.2,
434
+ 'guide': 1.2,
435
+ 'tutorial': 1.2,
436
+ 'example': 1.1,
437
+ 'demo': 1.1,
438
+ // Visual content indicators
439
+ 'visual': 1.3,
440
+ 'overview': 1.2,
441
+ 'flow': 1.3,
442
+ 'process': 1.2,
443
+ 'workflow': 1.3
444
+ }
445
+ };
446
+ this.config = {
447
+ weights: { ...defaultConfig.weights, ...config?.weights },
448
+ boostFactors: { ...defaultConfig.boostFactors, ...config?.boostFactors },
449
+ keywordBoosts: { ...defaultConfig.keywordBoosts, ...config?.keywordBoosts }
450
+ };
451
+ }
452
+ /**
453
+ * Calculate filename-based score
454
+ */
455
+ calculateFilenameScore(query, filename) {
456
+ const queryLower = query.toLowerCase();
457
+ const filenameLower = filename.toLowerCase();
458
+ let score = 0;
459
+ // Exact filename match gets highest score
460
+ if (filenameLower.includes(queryLower)) {
461
+ score += 1.0;
462
+ }
463
+ // Word-level matching
464
+ const queryWords = queryLower.split(/\s+/).filter(word => word.length > 2);
465
+ const filenameWords = filenameLower.split(/[_\-\s\.]+/).filter(word => word.length > 2);
466
+ for (const queryWord of queryWords) {
467
+ for (const filenameWord of filenameWords) {
468
+ if (filenameWord.includes(queryWord) || queryWord.includes(filenameWord)) {
469
+ score += 0.3;
470
+ }
471
+ }
472
+ }
473
+ // Apply keyword boosts
474
+ for (const [keyword, boost] of Object.entries(this.config.keywordBoosts)) {
475
+ if (filenameLower.includes(keyword)) {
476
+ score *= boost;
477
+ }
478
+ }
479
+ // Apply pattern-based boosts
480
+ for (const [pattern, boost] of Object.entries(this.config.boostFactors)) {
481
+ if (filenameLower.includes(pattern)) {
482
+ score *= boost;
483
+ }
484
+ }
485
+ return Math.min(score, 2.0); // Cap at 2.0 to prevent extreme scores
486
+ }
487
+ /**
488
+ * Calculate content type-based score
489
+ */
490
+ calculateContentTypeScore(query, contentType) {
491
+ const queryLower = query.toLowerCase();
492
+ // Base scores for different content types
493
+ const contentTypeScores = {
494
+ 'image': 0.8,
495
+ 'text': 1.0,
496
+ 'pdf': 0.9,
497
+ 'docx': 0.9
498
+ };
499
+ let score = contentTypeScores[contentType] || 0.5;
500
+ // Boost image content for visual-related queries
501
+ if (contentType === 'image') {
502
+ const visualKeywords = ['diagram', 'chart', 'graph', 'image', 'visual', 'screenshot', 'figure', 'illustration', 'visualization'];
503
+ for (const keyword of visualKeywords) {
504
+ if (queryLower.includes(keyword)) {
505
+ score *= 2.0; // More aggressive boost
506
+ break;
507
+ }
508
+ }
509
+ }
510
+ // Boost document content for text-heavy queries
511
+ if (contentType === 'text' || contentType === 'pdf' || contentType === 'docx') {
512
+ const textKeywords = ['documentation', 'guide', 'tutorial', 'explanation', 'description', 'details'];
513
+ for (const keyword of textKeywords) {
514
+ if (queryLower.includes(keyword)) {
515
+ score *= 1.8; // More aggressive boost
516
+ break;
517
+ }
518
+ }
519
+ }
520
+ return score;
521
+ }
522
+ /**
523
+ * Calculate metadata-based score
524
+ */
525
+ calculateMetadataScore(query, metadata = {}) {
526
+ let score = 0;
527
+ const queryLower = query.toLowerCase();
528
+ // Check various metadata fields
529
+ const metadataFields = ['title', 'description', 'tags', 'category', 'type'];
530
+ for (const field of metadataFields) {
531
+ const value = metadata[field];
532
+ if (typeof value === 'string') {
533
+ const valueLower = value.toLowerCase();
534
+ if (valueLower.includes(queryLower)) {
535
+ score += 0.5;
536
+ }
537
+ // Word-level matching
538
+ const queryWords = queryLower.split(/\s+/).filter(word => word.length > 2);
539
+ for (const word of queryWords) {
540
+ if (valueLower.includes(word)) {
541
+ score += 0.2;
542
+ }
543
+ }
544
+ }
545
+ else if (Array.isArray(value)) {
546
+ // Handle tag arrays
547
+ for (const item of value) {
548
+ if (typeof item === 'string' && item.toLowerCase().includes(queryLower)) {
549
+ score += 0.3;
550
+ }
551
+ }
552
+ }
553
+ }
554
+ // Special handling for image metadata
555
+ if (metadata.dimensions) {
556
+ // Larger images might be more important
557
+ const { width, height } = metadata.dimensions;
558
+ if (width && height && width * height > 500000) { // > 500k pixels
559
+ score += 0.5; // More significant boost for large images
560
+ }
561
+ }
562
+ // File size considerations
563
+ if (metadata.fileSize) {
564
+ // Very small files might be less important
565
+ if (metadata.fileSize < 1000) {
566
+ score -= 0.1;
567
+ }
568
+ }
569
+ return Math.max(score, 0); // Ensure non-negative
570
+ }
571
+ /**
572
+ * Rerank search results using metadata-based scoring
573
+ */
574
+ rerank = async (query, results, contentType) => {
575
+ // If strategy is disabled, return results unchanged
576
+ if (!this.isEnabled) {
577
+ return results;
578
+ }
579
+ // Validate content type if specified
580
+ if (contentType && !this.supportedContentTypes.includes(contentType)) {
581
+ console.warn(`Metadata strategy does not support content type '${contentType}'. ` +
582
+ `Supported types: ${this.supportedContentTypes.join(', ')}. Proceeding anyway.`);
583
+ }
584
+ try {
585
+ // Calculate metadata scores for each result
586
+ const scoredResults = results.map(result => {
587
+ const filename = result.document.source.split('/').pop() || result.document.source;
588
+ // Calculate individual scores
589
+ const filenameScore = this.calculateFilenameScore(query, filename);
590
+ const contentTypeScore = this.calculateContentTypeScore(query, result.contentType);
591
+ const metadataScore = this.calculateMetadataScore(query, result.metadata);
592
+ // Combine scores using configured weights
593
+ const metadataBoost = (filenameScore * this.config.weights.filename +
594
+ contentTypeScore * this.config.weights.contentType +
595
+ metadataScore * this.config.weights.metadata);
596
+ // Combine with original vector similarity score
597
+ // Use a weighted combination where metadata boost can have significant influence
598
+ const combinedScore = result.score * 0.4 + metadataBoost * 0.6;
599
+ return {
600
+ ...result,
601
+ score: combinedScore,
602
+ metadata: {
603
+ ...result.metadata,
604
+ rerankingScores: {
605
+ original: result.score,
606
+ filename: filenameScore,
607
+ contentType: contentTypeScore,
608
+ metadata: metadataScore,
609
+ combined: combinedScore
610
+ }
611
+ }
612
+ };
613
+ });
614
+ // Sort by combined score (descending)
615
+ scoredResults.sort((a, b) => b.score - a.score);
616
+ return scoredResults;
617
+ }
618
+ catch (error) {
619
+ console.warn(`Metadata reranking failed: ${error instanceof Error ? error.message : 'Unknown error'}. ` +
620
+ `Returning original results.`);
621
+ return results;
622
+ }
623
+ };
624
+ /**
625
+ * Configure the reranking strategy
626
+ */
627
+ configure(config) {
628
+ if (config.weights && typeof config.weights === 'object') {
629
+ this.config.weights = { ...this.config.weights, ...config.weights };
630
+ }
631
+ if (config.boostFactors && typeof config.boostFactors === 'object') {
632
+ this.config.boostFactors = { ...this.config.boostFactors, ...config.boostFactors };
633
+ }
634
+ if (config.keywordBoosts && typeof config.keywordBoosts === 'object') {
635
+ this.config.keywordBoosts = { ...this.config.keywordBoosts, ...config.keywordBoosts };
636
+ }
637
+ if (config.enabled !== undefined) {
638
+ this.isEnabled = Boolean(config.enabled);
639
+ }
640
+ }
641
+ /**
642
+ * Get metadata about this reranking strategy
643
+ */
644
+ getMetadata() {
645
+ return {
646
+ description: 'Metadata-based reranking using filename patterns, content types, and file metadata',
647
+ requiredModels: [], // No models required
648
+ configOptions: {
649
+ weights: {
650
+ type: 'object',
651
+ description: 'Weights for different scoring components',
652
+ default: this.config.weights,
653
+ properties: {
654
+ filename: { type: 'number', min: 0, max: 1 },
655
+ contentType: { type: 'number', min: 0, max: 1 },
656
+ metadata: { type: 'number', min: 0, max: 1 }
657
+ }
658
+ },
659
+ boostFactors: {
660
+ type: 'object',
661
+ description: 'Boost factors for specific file patterns',
662
+ default: this.config.boostFactors
663
+ },
664
+ keywordBoosts: {
665
+ type: 'object',
666
+ description: 'Boost factors for specific keywords in filenames',
667
+ default: this.config.keywordBoosts
668
+ },
669
+ enabled: {
670
+ type: 'boolean',
671
+ description: 'Enable or disable metadata-based reranking',
672
+ default: true
673
+ }
674
+ }
675
+ };
676
+ }
677
+ /**
678
+ * Check if the strategy is ready to use
679
+ */
680
+ async isReady() {
681
+ // Metadata strategy doesn't require model loading, so it's always ready if enabled
682
+ return this.isEnabled;
683
+ }
684
+ /**
685
+ * Get current configuration
686
+ */
687
+ getConfig() {
688
+ return { ...this.config };
689
+ }
690
+ /**
691
+ * Clean up resources (no-op for metadata strategy)
692
+ */
693
+ async cleanup() {
694
+ // No resources to clean up for metadata-based reranking
695
+ }
696
+ }
697
+ /**
698
+ * Factory function to create a metadata reranking strategy
699
+ */
700
+ export function createMetadataStrategy(config) {
701
+ return new MetadataRerankingStrategy(config);
702
+ }
703
+ /**
704
+ * Create a RerankFunction using the metadata strategy
705
+ */
706
+ export function createMetadataRerankFunction(config) {
707
+ const strategy = createMetadataStrategy(config);
708
+ return strategy.rerank;
709
+ }
710
+ /**
711
+ * Create a RerankFunction using the cross-encoder strategy
712
+ *
713
+ * This provides backward compatibility with the existing RerankFunction interface
714
+ * while using the new strategy-based architecture internally.
715
+ */
716
+ export function createCrossEncoderRerankFunction(modelName) {
717
+ const strategy = createCrossEncoderStrategy(modelName);
718
+ return strategy.rerank;
719
+ }
720
+ //# sourceMappingURL=reranking-strategies.js.map