rag-lite-ts 1.0.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. package/README.md +651 -109
  2. package/dist/cli/indexer.js +262 -46
  3. package/dist/cli/search.js +54 -32
  4. package/dist/cli.js +185 -28
  5. package/dist/config.d.ts +34 -73
  6. package/dist/config.js +50 -255
  7. package/dist/core/abstract-embedder.d.ts +125 -0
  8. package/dist/core/abstract-embedder.js +264 -0
  9. package/dist/core/actionable-error-messages.d.ts +60 -0
  10. package/dist/core/actionable-error-messages.js +397 -0
  11. package/dist/core/adapters.d.ts +93 -0
  12. package/dist/core/adapters.js +139 -0
  13. package/dist/core/batch-processing-optimizer.d.ts +155 -0
  14. package/dist/core/batch-processing-optimizer.js +541 -0
  15. package/dist/core/chunker.d.ts +119 -0
  16. package/dist/core/chunker.js +73 -0
  17. package/dist/core/cli-database-utils.d.ts +53 -0
  18. package/dist/core/cli-database-utils.js +239 -0
  19. package/dist/core/config.d.ts +102 -0
  20. package/dist/core/config.js +247 -0
  21. package/dist/core/content-errors.d.ts +111 -0
  22. package/dist/core/content-errors.js +362 -0
  23. package/dist/core/content-manager.d.ts +343 -0
  24. package/dist/core/content-manager.js +1504 -0
  25. package/dist/core/content-performance-optimizer.d.ts +150 -0
  26. package/dist/core/content-performance-optimizer.js +516 -0
  27. package/dist/core/content-resolver.d.ts +104 -0
  28. package/dist/core/content-resolver.js +285 -0
  29. package/dist/core/cross-modal-search.d.ts +164 -0
  30. package/dist/core/cross-modal-search.js +342 -0
  31. package/dist/core/database-connection-manager.d.ts +109 -0
  32. package/dist/core/database-connection-manager.js +304 -0
  33. package/dist/core/db.d.ts +245 -0
  34. package/dist/core/db.js +952 -0
  35. package/dist/core/embedder-factory.d.ts +176 -0
  36. package/dist/core/embedder-factory.js +338 -0
  37. package/dist/{error-handler.d.ts → core/error-handler.d.ts} +23 -2
  38. package/dist/{error-handler.js → core/error-handler.js} +51 -8
  39. package/dist/core/index.d.ts +59 -0
  40. package/dist/core/index.js +69 -0
  41. package/dist/core/ingestion.d.ts +213 -0
  42. package/dist/core/ingestion.js +812 -0
  43. package/dist/core/interfaces.d.ts +408 -0
  44. package/dist/core/interfaces.js +106 -0
  45. package/dist/core/lazy-dependency-loader.d.ts +152 -0
  46. package/dist/core/lazy-dependency-loader.js +453 -0
  47. package/dist/core/mode-detection-service.d.ts +150 -0
  48. package/dist/core/mode-detection-service.js +565 -0
  49. package/dist/core/mode-model-validator.d.ts +92 -0
  50. package/dist/core/mode-model-validator.js +203 -0
  51. package/dist/core/model-registry.d.ts +120 -0
  52. package/dist/core/model-registry.js +415 -0
  53. package/dist/core/model-validator.d.ts +217 -0
  54. package/dist/core/model-validator.js +782 -0
  55. package/dist/{path-manager.d.ts → core/path-manager.d.ts} +5 -0
  56. package/dist/{path-manager.js → core/path-manager.js} +5 -0
  57. package/dist/core/polymorphic-search-factory.d.ts +154 -0
  58. package/dist/core/polymorphic-search-factory.js +344 -0
  59. package/dist/core/raglite-paths.d.ts +121 -0
  60. package/dist/core/raglite-paths.js +145 -0
  61. package/dist/core/reranking-config.d.ts +42 -0
  62. package/dist/core/reranking-config.js +156 -0
  63. package/dist/core/reranking-factory.d.ts +92 -0
  64. package/dist/core/reranking-factory.js +591 -0
  65. package/dist/core/reranking-strategies.d.ts +325 -0
  66. package/dist/core/reranking-strategies.js +720 -0
  67. package/dist/core/resource-cleanup.d.ts +163 -0
  68. package/dist/core/resource-cleanup.js +371 -0
  69. package/dist/core/resource-manager.d.ts +212 -0
  70. package/dist/core/resource-manager.js +564 -0
  71. package/dist/core/search-pipeline.d.ts +111 -0
  72. package/dist/core/search-pipeline.js +287 -0
  73. package/dist/core/search.d.ts +131 -0
  74. package/dist/core/search.js +296 -0
  75. package/dist/core/streaming-operations.d.ts +145 -0
  76. package/dist/core/streaming-operations.js +409 -0
  77. package/dist/core/types.d.ts +66 -0
  78. package/dist/core/types.js +6 -0
  79. package/dist/core/universal-embedder.d.ts +177 -0
  80. package/dist/core/universal-embedder.js +139 -0
  81. package/dist/core/validation-messages.d.ts +99 -0
  82. package/dist/core/validation-messages.js +334 -0
  83. package/dist/{vector-index.d.ts → core/vector-index.d.ts} +4 -0
  84. package/dist/{vector-index.js → core/vector-index.js} +21 -3
  85. package/dist/dom-polyfills.d.ts +6 -0
  86. package/dist/dom-polyfills.js +40 -0
  87. package/dist/factories/index.d.ts +43 -0
  88. package/dist/factories/index.js +44 -0
  89. package/dist/factories/text-factory.d.ts +560 -0
  90. package/dist/factories/text-factory.js +968 -0
  91. package/dist/file-processor.d.ts +90 -4
  92. package/dist/file-processor.js +723 -20
  93. package/dist/index-manager.d.ts +3 -2
  94. package/dist/index-manager.js +13 -11
  95. package/dist/index.d.ts +72 -8
  96. package/dist/index.js +102 -16
  97. package/dist/indexer.js +1 -1
  98. package/dist/ingestion.d.ts +44 -154
  99. package/dist/ingestion.js +75 -671
  100. package/dist/mcp-server.d.ts +35 -3
  101. package/dist/mcp-server.js +1186 -79
  102. package/dist/multimodal/clip-embedder.d.ts +314 -0
  103. package/dist/multimodal/clip-embedder.js +945 -0
  104. package/dist/multimodal/index.d.ts +6 -0
  105. package/dist/multimodal/index.js +6 -0
  106. package/dist/preprocess.js +1 -1
  107. package/dist/run-error-recovery-tests.d.ts +7 -0
  108. package/dist/run-error-recovery-tests.js +101 -0
  109. package/dist/search-standalone.js +1 -1
  110. package/dist/search.d.ts +51 -69
  111. package/dist/search.js +117 -412
  112. package/dist/test-utils.d.ts +8 -26
  113. package/dist/text/chunker.d.ts +33 -0
  114. package/dist/{chunker.js → text/chunker.js} +98 -75
  115. package/dist/{embedder.d.ts → text/embedder.d.ts} +22 -1
  116. package/dist/{embedder.js → text/embedder.js} +84 -10
  117. package/dist/text/index.d.ts +8 -0
  118. package/dist/text/index.js +9 -0
  119. package/dist/text/preprocessors/index.d.ts +17 -0
  120. package/dist/text/preprocessors/index.js +38 -0
  121. package/dist/text/preprocessors/mdx.d.ts +25 -0
  122. package/dist/text/preprocessors/mdx.js +101 -0
  123. package/dist/text/preprocessors/mermaid.d.ts +68 -0
  124. package/dist/text/preprocessors/mermaid.js +330 -0
  125. package/dist/text/preprocessors/registry.d.ts +56 -0
  126. package/dist/text/preprocessors/registry.js +180 -0
  127. package/dist/text/reranker.d.ts +59 -0
  128. package/dist/{reranker.js → text/reranker.js} +138 -53
  129. package/dist/text/sentence-transformer-embedder.d.ts +96 -0
  130. package/dist/text/sentence-transformer-embedder.js +340 -0
  131. package/dist/{tokenizer.d.ts → text/tokenizer.d.ts} +1 -0
  132. package/dist/{tokenizer.js → text/tokenizer.js} +7 -2
  133. package/dist/types.d.ts +40 -1
  134. package/dist/utils/vector-math.d.ts +31 -0
  135. package/dist/utils/vector-math.js +70 -0
  136. package/package.json +16 -4
  137. package/dist/api-errors.d.ts.map +0 -1
  138. package/dist/api-errors.js.map +0 -1
  139. package/dist/chunker.d.ts +0 -47
  140. package/dist/chunker.d.ts.map +0 -1
  141. package/dist/chunker.js.map +0 -1
  142. package/dist/cli/indexer.d.ts.map +0 -1
  143. package/dist/cli/indexer.js.map +0 -1
  144. package/dist/cli/search.d.ts.map +0 -1
  145. package/dist/cli/search.js.map +0 -1
  146. package/dist/cli.d.ts.map +0 -1
  147. package/dist/cli.js.map +0 -1
  148. package/dist/config.d.ts.map +0 -1
  149. package/dist/config.js.map +0 -1
  150. package/dist/db.d.ts +0 -90
  151. package/dist/db.d.ts.map +0 -1
  152. package/dist/db.js +0 -340
  153. package/dist/db.js.map +0 -1
  154. package/dist/embedder.d.ts.map +0 -1
  155. package/dist/embedder.js.map +0 -1
  156. package/dist/error-handler.d.ts.map +0 -1
  157. package/dist/error-handler.js.map +0 -1
  158. package/dist/file-processor.d.ts.map +0 -1
  159. package/dist/file-processor.js.map +0 -1
  160. package/dist/index-manager.d.ts.map +0 -1
  161. package/dist/index-manager.js.map +0 -1
  162. package/dist/index.d.ts.map +0 -1
  163. package/dist/index.js.map +0 -1
  164. package/dist/indexer.d.ts.map +0 -1
  165. package/dist/indexer.js.map +0 -1
  166. package/dist/ingestion.d.ts.map +0 -1
  167. package/dist/ingestion.js.map +0 -1
  168. package/dist/mcp-server.d.ts.map +0 -1
  169. package/dist/mcp-server.js.map +0 -1
  170. package/dist/path-manager.d.ts.map +0 -1
  171. package/dist/path-manager.js.map +0 -1
  172. package/dist/preprocess.d.ts.map +0 -1
  173. package/dist/preprocess.js.map +0 -1
  174. package/dist/preprocessors/index.d.ts.map +0 -1
  175. package/dist/preprocessors/index.js.map +0 -1
  176. package/dist/preprocessors/mdx.d.ts.map +0 -1
  177. package/dist/preprocessors/mdx.js.map +0 -1
  178. package/dist/preprocessors/mermaid.d.ts.map +0 -1
  179. package/dist/preprocessors/mermaid.js.map +0 -1
  180. package/dist/preprocessors/registry.d.ts.map +0 -1
  181. package/dist/preprocessors/registry.js.map +0 -1
  182. package/dist/reranker.d.ts +0 -40
  183. package/dist/reranker.d.ts.map +0 -1
  184. package/dist/reranker.js.map +0 -1
  185. package/dist/resource-manager-demo.d.ts +0 -7
  186. package/dist/resource-manager-demo.d.ts.map +0 -1
  187. package/dist/resource-manager-demo.js +0 -52
  188. package/dist/resource-manager-demo.js.map +0 -1
  189. package/dist/resource-manager.d.ts +0 -129
  190. package/dist/resource-manager.d.ts.map +0 -1
  191. package/dist/resource-manager.js +0 -389
  192. package/dist/resource-manager.js.map +0 -1
  193. package/dist/search-standalone.d.ts.map +0 -1
  194. package/dist/search-standalone.js.map +0 -1
  195. package/dist/search.d.ts.map +0 -1
  196. package/dist/search.js.map +0 -1
  197. package/dist/test-utils.d.ts.map +0 -1
  198. package/dist/test-utils.js.map +0 -1
  199. package/dist/tokenizer.d.ts.map +0 -1
  200. package/dist/tokenizer.js.map +0 -1
  201. package/dist/types.d.ts.map +0 -1
  202. package/dist/types.js.map +0 -1
  203. package/dist/vector-index.d.ts.map +0 -1
  204. package/dist/vector-index.js.map +0 -1
@@ -1,82 +1,82 @@
1
- import { AutoTokenizer, AutoModelForSequenceClassification } from '@huggingface/transformers';
2
- import { config } from './config.js';
1
+ // Set up polyfills immediately before any other imports
2
+ // Force polyfills in Node.js environment regardless of window state
3
+ if (typeof globalThis !== 'undefined') {
4
+ if (typeof globalThis.self === 'undefined') {
5
+ globalThis.self = globalThis;
6
+ }
7
+ if (typeof global.self === 'undefined') {
8
+ global.self = global;
9
+ }
10
+ }
11
+ import '../dom-polyfills.js';
12
+ import { config } from '../core/config.js';
3
13
  /**
4
- * Cross-encoder reranker for improving search result quality
5
- * Uses a cross-encoder model to rerank initial vector search results
14
+ * Embedding-based reranker for improving search result quality
15
+ * Uses embedding similarity to rerank initial vector search results
6
16
  */
7
17
  export class CrossEncoderReranker {
8
18
  model = null; // Use any to avoid complex transformers.js typing issues
9
19
  tokenizer = null;
10
- modelName = 'Xenova/ms-marco-MiniLM-L-6-v2'; // Use working model as default
11
- // Alternative models in case the primary fails
12
- static FALLBACK_MODELS = [
13
- 'Xenova/ms-marco-MiniLM-L-6-v2', // Primary - optimized for transformers.js
14
- 'cross-encoder/ms-marco-MiniLM-L-6-v2', // Original (likely to fail)
15
- 'cross-encoder/ms-marco-MiniLM-L-2-v2', // Smaller original (likely to fail)
16
- // Note: sentence-transformers/all-MiniLM-L6-v2 is a bi-encoder, not cross-encoder, so removed
17
- ];
20
+ modelName = 'Xenova/ms-marco-MiniLM-L-6-v2'; // Use working cross-encoder model
18
21
  /**
19
- * Load the cross-encoder model with graceful fallback
22
+ * Ensure DOM polyfills are set up for transformers.js
20
23
  */
21
- async loadModel() {
22
- // Try primary model first (should work since it's Xenova)
23
- if (await this.tryLoadModel(this.modelName)) {
24
- return;
25
- }
26
- // Try fallback models if primary fails
27
- console.warn(`Primary model ${this.modelName} failed, trying fallbacks...`);
28
- for (const fallbackModel of CrossEncoderReranker.FALLBACK_MODELS) {
29
- if (fallbackModel === this.modelName)
30
- continue; // Skip already tried model
31
- console.warn(`Trying fallback model: ${fallbackModel}`);
32
- if (await this.tryLoadModel(fallbackModel)) {
33
- this.modelName = fallbackModel;
34
- return;
24
+ ensurePolyfills() {
25
+ // Use the exact same approach as the working standalone version
26
+ if (typeof window === 'undefined' && typeof globalThis !== 'undefined') {
27
+ if (typeof globalThis.self === 'undefined') {
28
+ globalThis.self = globalThis;
35
29
  }
30
+ if (typeof global.self === 'undefined') {
31
+ global.self = global;
32
+ }
33
+ // Polyfills already set up at module level
36
34
  }
37
- console.warn('All cross-encoder models failed to load. Reranking will be disabled.');
38
- this.model = null;
39
- this.tokenizer = null;
35
+ }
36
+ /**
37
+ * Load the embedding model
38
+ */
39
+ async loadModel() {
40
+ await this.tryLoadModel(this.modelName);
40
41
  }
41
42
  /**
42
43
  * Try to load a specific model
43
44
  */
44
45
  async tryLoadModel(modelName) {
45
- try {
46
- console.log(`Loading cross-encoder model: ${modelName}`);
47
- // Load model and tokenizer separately for proper cross-encoder usage
48
- this.model = await AutoModelForSequenceClassification.from_pretrained(modelName, {
49
- cache_dir: config.model_cache_path,
50
- dtype: 'fp32' // Suppress dtype warnings
51
- });
52
- this.tokenizer = await AutoTokenizer.from_pretrained(modelName, {
53
- cache_dir: config.model_cache_path
54
- });
55
- console.log(`Cross-encoder model loaded successfully: ${modelName}`);
56
- return true;
57
- }
58
- catch (error) {
59
- console.warn(`Failed to load model ${modelName}: ${error instanceof Error ? error.message : 'Unknown error'}`);
60
- return false;
61
- }
46
+ console.log(`Loading cross-encoder model: ${modelName}`);
47
+ // Ensure polyfills are set up exactly like the working standalone version
48
+ this.ensurePolyfills();
49
+ // Use the exact same approach as the working standalone test
50
+ const { AutoTokenizer, AutoModelForSequenceClassification } = await import('@huggingface/transformers');
51
+ console.log('Loading model...');
52
+ this.model = await AutoModelForSequenceClassification.from_pretrained(modelName, {
53
+ cache_dir: config.model_cache_path,
54
+ dtype: 'fp32'
55
+ });
56
+ console.log('Loading tokenizer...');
57
+ this.tokenizer = await AutoTokenizer.from_pretrained(modelName, {
58
+ cache_dir: config.model_cache_path
59
+ });
60
+ console.log(`Cross-encoder model loaded successfully: ${modelName}`);
62
61
  }
63
62
  /**
64
- * Rerank search results using cross-encoder scoring
63
+ * Rerank search results using embedding similarity scoring
65
64
  * @param query - Original search query
66
65
  * @param results - Initial search results from vector search
67
66
  * @returns Promise resolving to reranked results
68
67
  */
69
68
  async rerank(query, results) {
70
- if (!this.model || !this.tokenizer) {
69
+ if (!this.model) {
71
70
  throw new Error('Cross-encoder model not loaded. Call loadModel() first.');
72
71
  }
73
72
  if (results.length === 0) {
74
73
  return results;
75
74
  }
76
75
  try {
77
- // Prepare queries and documents for proper cross-encoder format
76
+ // Use cross-encoder approach - prepare queries and documents for proper cross-encoder format
78
77
  const queries = results.map(() => query);
79
- const documents = results.map(result => result.text);
78
+ const documents = results.map(result => result.content);
79
+ console.log('Tokenizing query-document pairs...');
80
80
  // Tokenize using the proper cross-encoder format with text_pair
81
81
  const features = this.tokenizer(queries, {
82
82
  text_pair: documents,
@@ -84,6 +84,7 @@ export class CrossEncoderReranker {
84
84
  truncation: true,
85
85
  return_tensors: 'pt'
86
86
  });
87
+ console.log('Running cross-encoder inference...');
87
88
  // Get model predictions
88
89
  const output = await this.model(features);
89
90
  // Extract logits - these are the raw relevance scores
@@ -134,15 +135,33 @@ export class CrossEncoderReranker {
134
135
  getModelName() {
135
136
  return this.modelName;
136
137
  }
138
+ /**
139
+ * Compute cosine similarity between two Float32Array embeddings
140
+ */
141
+ cosineSimilarity(a, b) {
142
+ if (a.length !== b.length) {
143
+ return 0;
144
+ }
145
+ let dotProduct = 0;
146
+ let normA = 0;
147
+ let normB = 0;
148
+ for (let i = 0; i < a.length; i++) {
149
+ dotProduct += a[i] * b[i];
150
+ normA += a[i] * a[i];
151
+ normB += b[i] * b[i];
152
+ }
153
+ const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
154
+ return magnitude === 0 ? 0 : dotProduct / magnitude;
155
+ }
137
156
  /**
138
157
  * Simple text-based reranking using keyword matching and text similarity
139
- * This is a fallback when cross-encoder models don't work well for the content
158
+ * This is a fallback when embedding similarity doesn't provide good discrimination
140
159
  */
141
160
  simpleTextReranking(query, results) {
142
161
  const queryLower = query.toLowerCase();
143
162
  const queryWords = queryLower.split(/\s+/).filter(word => word.length > 2);
144
163
  const rerankedResults = results.map(result => {
145
- const textLower = result.text.toLowerCase();
164
+ const textLower = result.content.toLowerCase();
146
165
  const titleLower = result.document.title?.toLowerCase() || '';
147
166
  let score = result.score; // Start with vector search score
148
167
  let bonus = 0;
@@ -209,4 +228,70 @@ export class CrossEncoderReranker {
209
228
  return rerankedResults;
210
229
  }
211
230
  }
231
+ /*
232
+ *
233
+ * Create a RerankFunction implementation using the embedding-based reranker
234
+ * This function implements the core RerankFunction interface for dependency injection
235
+ * @param modelName - Optional model name override
236
+ * @returns RerankFunction that can be injected into core components
237
+ */
238
+ export function createTextRerankFunction(modelName) {
239
+ let reranker = null;
240
+ const rerankFunction = async (query, results, contentType) => {
241
+ // Only support text content type
242
+ if (contentType && contentType !== 'text') {
243
+ throw new Error(`Text reranker only supports 'text' content type, got: ${contentType}`);
244
+ }
245
+ // Initialize reranker if not already done
246
+ if (!reranker) {
247
+ reranker = new CrossEncoderReranker();
248
+ if (modelName) {
249
+ // Set custom model name if provided
250
+ reranker.modelName = modelName;
251
+ }
252
+ await reranker.loadModel();
253
+ }
254
+ // If reranker failed to load, return results unchanged
255
+ if (!reranker.isLoaded()) {
256
+ console.warn('Text reranker not loaded, returning results unchanged');
257
+ return results;
258
+ }
259
+ // Use the existing rerank method
260
+ return await reranker.rerank(query, results);
261
+ };
262
+ return rerankFunction;
263
+ }
264
+ /**
265
+ * Create a text reranker factory function
266
+ * @param modelName - Optional model name override
267
+ * @returns Factory function that creates initialized rerankers
268
+ */
269
+ export function createTextReranker(modelName) {
270
+ return {
271
+ async rerank(query, results) {
272
+ const reranker = new CrossEncoderReranker();
273
+ if (modelName) {
274
+ reranker.modelName = modelName;
275
+ }
276
+ await reranker.loadModel();
277
+ if (!reranker.isLoaded()) {
278
+ console.warn('Text reranker not loaded, returning results unchanged');
279
+ return results;
280
+ }
281
+ return reranker.rerank(query, results);
282
+ },
283
+ async loadModel() {
284
+ const reranker = new CrossEncoderReranker();
285
+ if (modelName) {
286
+ reranker.modelName = modelName;
287
+ }
288
+ await reranker.loadModel();
289
+ },
290
+ isLoaded() {
291
+ // For the factory version, we create new instances each time
292
+ // so we can't track loaded state
293
+ return true;
294
+ }
295
+ };
296
+ }
212
297
  //# sourceMappingURL=reranker.js.map
@@ -0,0 +1,96 @@
1
+ /**
2
+ * TEXT IMPLEMENTATION — Sentence Transformer Embedder Implementation
3
+ * Implements UniversalEmbedder interface for sentence-transformer models
4
+ * Adapts existing text embedding logic to the universal interface
5
+ */
6
+ import '../dom-polyfills.js';
7
+ import { BaseUniversalEmbedder, type EmbedderOptions } from '../core/abstract-embedder.js';
8
+ import type { EmbeddingResult } from '../types.js';
9
+ /**
10
+ * Sentence transformer embedder implementation
11
+ * Supports sentence-transformers/all-MiniLM-L6-v2 and Xenova/all-mpnet-base-v2
12
+ * Ensures consistent EmbeddingResult format with contentType='text'
13
+ * Adapts existing EmbeddingEngine to UniversalEmbedder interface
14
+ */
15
+ export declare class SentenceTransformerEmbedder extends BaseUniversalEmbedder {
16
+ private embeddingEngine;
17
+ private resourceManager;
18
+ private embedderResourceId?;
19
+ private engineResourceId?;
20
+ constructor(modelName: string, options?: EmbedderOptions);
21
+ /**
22
+ * Load the sentence transformer model using existing EmbeddingEngine
23
+ */
24
+ loadModel(): Promise<void>;
25
+ /**
26
+ * Clean up model resources with comprehensive disposal
27
+ */
28
+ cleanup(): Promise<void>;
29
+ /**
30
+ * Embed text using the existing EmbeddingEngine
31
+ */
32
+ embedText(text: string): Promise<EmbeddingResult>;
33
+ /**
34
+ * Optimized batch processing using existing EmbeddingEngine and BatchProcessingOptimizer
35
+ * Overrides the base implementation for better performance with progress reporting
36
+ */
37
+ protected processBatch(batch: Array<{
38
+ content: string;
39
+ contentType: string;
40
+ metadata?: Record<string, any>;
41
+ }>): Promise<EmbeddingResult[]>;
42
+ /**
43
+ * Get model-specific information
44
+ */
45
+ getModelInfo(): {
46
+ capabilities: {
47
+ supportsSemanticSimilarity: boolean;
48
+ supportsTextClassification: boolean;
49
+ supportsTextClustering: boolean;
50
+ recommendedUseCase: string;
51
+ supportsText: boolean;
52
+ supportsImages: boolean;
53
+ supportsBatchProcessing: boolean;
54
+ supportsMetadata: boolean;
55
+ maxBatchSize?: number;
56
+ maxTextLength?: number;
57
+ supportedImageFormats?: readonly string[];
58
+ supportsMultimodal?: boolean;
59
+ supportsCrossModalSearch?: boolean;
60
+ unifiedEmbeddingSpace?: boolean;
61
+ reliableImplementation?: boolean;
62
+ };
63
+ name: string;
64
+ type: import("../core/universal-embedder.js").ModelType;
65
+ dimensions: number;
66
+ version: string;
67
+ supportedContentTypes: readonly string[];
68
+ requirements: import("../types.js").ModelRequirements;
69
+ };
70
+ /**
71
+ * Check if the model is suitable for a specific task
72
+ */
73
+ isSuitableForTask(task: 'similarity' | 'classification' | 'clustering' | 'retrieval'): boolean;
74
+ /**
75
+ * Embed document batch using existing EmbeddingEngine's optimized method
76
+ * This method provides compatibility with the existing document ingestion pipeline
77
+ */
78
+ embedDocumentBatch(chunks: string[]): Promise<EmbeddingResult[]>;
79
+ /**
80
+ * Get the model version from the underlying EmbeddingEngine
81
+ */
82
+ getModelVersion(): string;
83
+ /**
84
+ * Get the batch size from the underlying EmbeddingEngine
85
+ */
86
+ getBatchSize(): number;
87
+ /**
88
+ * Check if the underlying EmbeddingEngine is loaded
89
+ */
90
+ isEngineLoaded(): boolean;
91
+ /**
92
+ * Override isLoaded to check both internal state and engine state
93
+ */
94
+ isLoaded(): boolean;
95
+ }
96
+ //# sourceMappingURL=sentence-transformer-embedder.d.ts.map