rag-lite-ts 1.0.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/README.md +606 -93
  2. package/dist/cli/indexer.js +192 -4
  3. package/dist/cli/search.js +50 -11
  4. package/dist/cli.js +183 -26
  5. package/dist/core/abstract-embedder.d.ts +125 -0
  6. package/dist/core/abstract-embedder.js +264 -0
  7. package/dist/core/actionable-error-messages.d.ts +60 -0
  8. package/dist/core/actionable-error-messages.js +397 -0
  9. package/dist/core/batch-processing-optimizer.d.ts +155 -0
  10. package/dist/core/batch-processing-optimizer.js +541 -0
  11. package/dist/core/chunker.d.ts +2 -0
  12. package/dist/core/cli-database-utils.d.ts +53 -0
  13. package/dist/core/cli-database-utils.js +239 -0
  14. package/dist/core/config.js +10 -3
  15. package/dist/core/content-errors.d.ts +111 -0
  16. package/dist/core/content-errors.js +362 -0
  17. package/dist/core/content-manager.d.ts +343 -0
  18. package/dist/core/content-manager.js +1504 -0
  19. package/dist/core/content-performance-optimizer.d.ts +150 -0
  20. package/dist/core/content-performance-optimizer.js +516 -0
  21. package/dist/core/content-resolver.d.ts +104 -0
  22. package/dist/core/content-resolver.js +285 -0
  23. package/dist/core/cross-modal-search.d.ts +164 -0
  24. package/dist/core/cross-modal-search.js +342 -0
  25. package/dist/core/database-connection-manager.d.ts +109 -0
  26. package/dist/core/database-connection-manager.js +304 -0
  27. package/dist/core/db.d.ts +141 -2
  28. package/dist/core/db.js +631 -89
  29. package/dist/core/embedder-factory.d.ts +176 -0
  30. package/dist/core/embedder-factory.js +338 -0
  31. package/dist/core/index.d.ts +3 -1
  32. package/dist/core/index.js +4 -1
  33. package/dist/core/ingestion.d.ts +85 -15
  34. package/dist/core/ingestion.js +510 -45
  35. package/dist/core/lazy-dependency-loader.d.ts +152 -0
  36. package/dist/core/lazy-dependency-loader.js +453 -0
  37. package/dist/core/mode-detection-service.d.ts +150 -0
  38. package/dist/core/mode-detection-service.js +565 -0
  39. package/dist/core/mode-model-validator.d.ts +92 -0
  40. package/dist/core/mode-model-validator.js +203 -0
  41. package/dist/core/model-registry.d.ts +120 -0
  42. package/dist/core/model-registry.js +415 -0
  43. package/dist/core/model-validator.d.ts +217 -0
  44. package/dist/core/model-validator.js +782 -0
  45. package/dist/core/polymorphic-search-factory.d.ts +154 -0
  46. package/dist/core/polymorphic-search-factory.js +344 -0
  47. package/dist/core/raglite-paths.d.ts +121 -0
  48. package/dist/core/raglite-paths.js +145 -0
  49. package/dist/core/reranking-config.d.ts +42 -0
  50. package/dist/core/reranking-config.js +156 -0
  51. package/dist/core/reranking-factory.d.ts +92 -0
  52. package/dist/core/reranking-factory.js +591 -0
  53. package/dist/core/reranking-strategies.d.ts +325 -0
  54. package/dist/core/reranking-strategies.js +720 -0
  55. package/dist/core/resource-cleanup.d.ts +163 -0
  56. package/dist/core/resource-cleanup.js +371 -0
  57. package/dist/core/resource-manager.d.ts +212 -0
  58. package/dist/core/resource-manager.js +564 -0
  59. package/dist/core/search.d.ts +28 -1
  60. package/dist/core/search.js +83 -5
  61. package/dist/core/streaming-operations.d.ts +145 -0
  62. package/dist/core/streaming-operations.js +409 -0
  63. package/dist/core/types.d.ts +3 -0
  64. package/dist/core/universal-embedder.d.ts +177 -0
  65. package/dist/core/universal-embedder.js +139 -0
  66. package/dist/core/validation-messages.d.ts +99 -0
  67. package/dist/core/validation-messages.js +334 -0
  68. package/dist/core/vector-index.js +7 -8
  69. package/dist/factories/index.d.ts +1 -1
  70. package/dist/factories/text-factory.d.ts +128 -34
  71. package/dist/factories/text-factory.js +346 -97
  72. package/dist/file-processor.d.ts +88 -2
  73. package/dist/file-processor.js +720 -17
  74. package/dist/index.d.ts +9 -0
  75. package/dist/index.js +11 -0
  76. package/dist/ingestion.d.ts +16 -0
  77. package/dist/ingestion.js +21 -0
  78. package/dist/mcp-server.d.ts +35 -3
  79. package/dist/mcp-server.js +1107 -31
  80. package/dist/multimodal/clip-embedder.d.ts +314 -0
  81. package/dist/multimodal/clip-embedder.js +945 -0
  82. package/dist/multimodal/index.d.ts +6 -0
  83. package/dist/multimodal/index.js +6 -0
  84. package/dist/run-error-recovery-tests.d.ts +7 -0
  85. package/dist/run-error-recovery-tests.js +101 -0
  86. package/dist/search.d.ts +26 -0
  87. package/dist/search.js +54 -1
  88. package/dist/test-utils.d.ts +8 -26
  89. package/dist/text/chunker.d.ts +1 -0
  90. package/dist/text/embedder.js +15 -8
  91. package/dist/text/index.d.ts +1 -0
  92. package/dist/text/index.js +1 -0
  93. package/dist/text/reranker.d.ts +1 -2
  94. package/dist/text/reranker.js +17 -47
  95. package/dist/text/sentence-transformer-embedder.d.ts +96 -0
  96. package/dist/text/sentence-transformer-embedder.js +340 -0
  97. package/dist/types.d.ts +39 -0
  98. package/dist/utils/vector-math.d.ts +31 -0
  99. package/dist/utils/vector-math.js +70 -0
  100. package/package.json +15 -3
  101. package/dist/api-errors.d.ts.map +0 -1
  102. package/dist/api-errors.js.map +0 -1
  103. package/dist/cli/indexer.d.ts.map +0 -1
  104. package/dist/cli/indexer.js.map +0 -1
  105. package/dist/cli/search.d.ts.map +0 -1
  106. package/dist/cli/search.js.map +0 -1
  107. package/dist/cli.d.ts.map +0 -1
  108. package/dist/cli.js.map +0 -1
  109. package/dist/config.d.ts.map +0 -1
  110. package/dist/config.js.map +0 -1
  111. package/dist/core/adapters.d.ts.map +0 -1
  112. package/dist/core/adapters.js.map +0 -1
  113. package/dist/core/chunker.d.ts.map +0 -1
  114. package/dist/core/chunker.js.map +0 -1
  115. package/dist/core/config.d.ts.map +0 -1
  116. package/dist/core/config.js.map +0 -1
  117. package/dist/core/db.d.ts.map +0 -1
  118. package/dist/core/db.js.map +0 -1
  119. package/dist/core/error-handler.d.ts.map +0 -1
  120. package/dist/core/error-handler.js.map +0 -1
  121. package/dist/core/index.d.ts.map +0 -1
  122. package/dist/core/index.js.map +0 -1
  123. package/dist/core/ingestion.d.ts.map +0 -1
  124. package/dist/core/ingestion.js.map +0 -1
  125. package/dist/core/interfaces.d.ts.map +0 -1
  126. package/dist/core/interfaces.js.map +0 -1
  127. package/dist/core/path-manager.d.ts.map +0 -1
  128. package/dist/core/path-manager.js.map +0 -1
  129. package/dist/core/search-example.d.ts +0 -25
  130. package/dist/core/search-example.d.ts.map +0 -1
  131. package/dist/core/search-example.js +0 -138
  132. package/dist/core/search-example.js.map +0 -1
  133. package/dist/core/search-pipeline-example.d.ts +0 -21
  134. package/dist/core/search-pipeline-example.d.ts.map +0 -1
  135. package/dist/core/search-pipeline-example.js +0 -188
  136. package/dist/core/search-pipeline-example.js.map +0 -1
  137. package/dist/core/search-pipeline.d.ts.map +0 -1
  138. package/dist/core/search-pipeline.js.map +0 -1
  139. package/dist/core/search.d.ts.map +0 -1
  140. package/dist/core/search.js.map +0 -1
  141. package/dist/core/types.d.ts.map +0 -1
  142. package/dist/core/types.js.map +0 -1
  143. package/dist/core/vector-index.d.ts.map +0 -1
  144. package/dist/core/vector-index.js.map +0 -1
  145. package/dist/dom-polyfills.d.ts.map +0 -1
  146. package/dist/dom-polyfills.js.map +0 -1
  147. package/dist/examples/clean-api-examples.d.ts +0 -44
  148. package/dist/examples/clean-api-examples.d.ts.map +0 -1
  149. package/dist/examples/clean-api-examples.js +0 -206
  150. package/dist/examples/clean-api-examples.js.map +0 -1
  151. package/dist/factories/index.d.ts.map +0 -1
  152. package/dist/factories/index.js.map +0 -1
  153. package/dist/factories/text-factory.d.ts.map +0 -1
  154. package/dist/factories/text-factory.js.map +0 -1
  155. package/dist/file-processor.d.ts.map +0 -1
  156. package/dist/file-processor.js.map +0 -1
  157. package/dist/index-manager.d.ts.map +0 -1
  158. package/dist/index-manager.js.map +0 -1
  159. package/dist/index.d.ts.map +0 -1
  160. package/dist/index.js.map +0 -1
  161. package/dist/indexer.d.ts.map +0 -1
  162. package/dist/indexer.js.map +0 -1
  163. package/dist/ingestion.d.ts.map +0 -1
  164. package/dist/ingestion.js.map +0 -1
  165. package/dist/mcp-server.d.ts.map +0 -1
  166. package/dist/mcp-server.js.map +0 -1
  167. package/dist/preprocess.d.ts.map +0 -1
  168. package/dist/preprocess.js.map +0 -1
  169. package/dist/preprocessors/index.d.ts.map +0 -1
  170. package/dist/preprocessors/index.js.map +0 -1
  171. package/dist/preprocessors/mdx.d.ts.map +0 -1
  172. package/dist/preprocessors/mdx.js.map +0 -1
  173. package/dist/preprocessors/mermaid.d.ts.map +0 -1
  174. package/dist/preprocessors/mermaid.js.map +0 -1
  175. package/dist/preprocessors/registry.d.ts.map +0 -1
  176. package/dist/preprocessors/registry.js.map +0 -1
  177. package/dist/search-standalone.d.ts.map +0 -1
  178. package/dist/search-standalone.js.map +0 -1
  179. package/dist/search.d.ts.map +0 -1
  180. package/dist/search.js.map +0 -1
  181. package/dist/test-utils.d.ts.map +0 -1
  182. package/dist/test-utils.js.map +0 -1
  183. package/dist/text/chunker.d.ts.map +0 -1
  184. package/dist/text/chunker.js.map +0 -1
  185. package/dist/text/embedder.d.ts.map +0 -1
  186. package/dist/text/embedder.js.map +0 -1
  187. package/dist/text/index.d.ts.map +0 -1
  188. package/dist/text/index.js.map +0 -1
  189. package/dist/text/preprocessors/index.d.ts.map +0 -1
  190. package/dist/text/preprocessors/index.js.map +0 -1
  191. package/dist/text/preprocessors/mdx.d.ts.map +0 -1
  192. package/dist/text/preprocessors/mdx.js.map +0 -1
  193. package/dist/text/preprocessors/mermaid.d.ts.map +0 -1
  194. package/dist/text/preprocessors/mermaid.js.map +0 -1
  195. package/dist/text/preprocessors/registry.d.ts.map +0 -1
  196. package/dist/text/preprocessors/registry.js.map +0 -1
  197. package/dist/text/reranker.d.ts.map +0 -1
  198. package/dist/text/reranker.js.map +0 -1
  199. package/dist/text/tokenizer.d.ts.map +0 -1
  200. package/dist/text/tokenizer.js.map +0 -1
  201. package/dist/types.d.ts.map +0 -1
  202. package/dist/types.js.map +0 -1
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Multimodal implementation layer exports
3
+ * Provides CLIP-based embedders for cross-modal search capabilities
4
+ */
5
+ export { CLIPEmbedder } from './clip-embedder.js';
6
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Multimodal implementation layer exports
3
+ * Provides CLIP-based embedders for cross-modal search capabilities
4
+ */
5
+ export { CLIPEmbedder } from './clip-embedder.js';
6
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Test Runner for Chameleon Error Recovery and Reliability Tests
3
+ * Runs the comprehensive error recovery test suite
4
+ */
5
+ declare function runTests(): Promise<boolean>;
6
+ export { runTests };
7
+ //# sourceMappingURL=run-error-recovery-tests.d.ts.map
@@ -0,0 +1,101 @@
1
+ /**
2
+ * Test Runner for Chameleon Error Recovery and Reliability Tests
3
+ * Runs the comprehensive error recovery test suite
4
+ */
5
+ import { spawn } from 'child_process';
6
+ import { fileURLToPath } from 'url';
7
+ import { dirname } from 'path';
8
+ const __filename = fileURLToPath(import.meta.url);
9
+ const __dirname = dirname(__filename);
10
+ async function runTests() {
11
+ console.log('🧪 Running Chameleon Error Recovery and Reliability Tests...\n');
12
+ const testFiles = [
13
+ 'chameleon-error-recovery.test.ts',
14
+ 'chameleon-reliability-integration.test.ts',
15
+ 'chameleon-stress-testing.test.ts',
16
+ 'chameleon-error-simulation.test.ts'
17
+ ];
18
+ let totalTests = 0;
19
+ let passedTests = 0;
20
+ let failedTests = 0;
21
+ for (const testFile of testFiles) {
22
+ console.log(`\n📋 Running ${testFile}...`);
23
+ try {
24
+ // Build the test file first
25
+ const buildProcess = spawn('npx', ['tsc', '--project', 'tsconfig.test.json'], {
26
+ stdio: 'pipe',
27
+ shell: true
28
+ });
29
+ await new Promise((resolve, reject) => {
30
+ buildProcess.on('close', (code) => {
31
+ if (code === 0) {
32
+ resolve(code);
33
+ }
34
+ else {
35
+ reject(new Error(`Build failed with code ${code}`));
36
+ }
37
+ });
38
+ });
39
+ // Run the compiled test
40
+ const testProcess = spawn('node', ['--test', `dist/${testFile.replace('.ts', '.js')}`], {
41
+ stdio: 'pipe',
42
+ shell: true
43
+ });
44
+ let output = '';
45
+ let errorOutput = '';
46
+ testProcess.stdout?.on('data', (data) => {
47
+ output += data.toString();
48
+ });
49
+ testProcess.stderr?.on('data', (data) => {
50
+ errorOutput += data.toString();
51
+ });
52
+ await new Promise((resolve) => {
53
+ testProcess.on('close', (code) => {
54
+ console.log(`Exit code: ${code}`);
55
+ if (output) {
56
+ console.log('Output:', output);
57
+ }
58
+ if (errorOutput) {
59
+ console.log('Errors:', errorOutput);
60
+ }
61
+ // Count tests (this is a simple approximation)
62
+ const testMatches = output.match(/✓|×/g);
63
+ const currentTests = testMatches ? testMatches.length : 0;
64
+ totalTests += currentTests;
65
+ if (code === 0) {
66
+ passedTests += currentTests;
67
+ console.log(`✅ ${testFile} completed successfully`);
68
+ }
69
+ else {
70
+ failedTests += currentTests;
71
+ console.log(`❌ ${testFile} failed`);
72
+ }
73
+ resolve(code);
74
+ });
75
+ });
76
+ }
77
+ catch (error) {
78
+ console.error(`❌ Failed to run ${testFile}:`, error instanceof Error ? error.message : String(error));
79
+ failedTests++;
80
+ }
81
+ }
82
+ console.log('\n📊 Test Summary:');
83
+ console.log(`Total Tests: ${totalTests}`);
84
+ console.log(`Passed: ${passedTests}`);
85
+ console.log(`Failed: ${failedTests}`);
86
+ if (failedTests === 0) {
87
+ console.log('\n🎉 All error recovery tests completed!');
88
+ console.log('✅ System demonstrates robust error handling and recovery mechanisms');
89
+ }
90
+ else {
91
+ console.log('\n⚠️ Some tests failed - this may be expected in test environments');
92
+ console.log('🔍 Review the output above for specific failure details');
93
+ }
94
+ return failedTests === 0;
95
+ }
96
+ // Run tests if this file is executed directly
97
+ if (import.meta.url === `file://${process.argv[1]}`) {
98
+ runTests().catch(console.error);
99
+ }
100
+ export { runTests };
101
+ //# sourceMappingURL=run-error-recovery-tests.js.map
package/dist/search.d.ts CHANGED
@@ -40,6 +40,32 @@ export declare class SearchEngine {
40
40
  * Perform semantic search
41
41
  */
42
42
  search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
43
+ /**
44
+ * Retrieve content by ID in the specified format
45
+ * @param contentId - Content ID to retrieve
46
+ * @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
47
+ * @returns Promise that resolves to content in requested format
48
+ */
49
+ getContent(contentId: string, format?: 'file' | 'base64'): Promise<string>;
50
+ /**
51
+ * Retrieve multiple content items efficiently in batch
52
+ * @param contentIds - Array of content IDs to retrieve
53
+ * @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
54
+ * @returns Promise that resolves to array of content in requested format
55
+ */
56
+ getContentBatch(contentIds: string[], format?: 'file' | 'base64'): Promise<string[]>;
57
+ /**
58
+ * Retrieve content metadata for result enhancement
59
+ * @param contentId - Content ID to get metadata for
60
+ * @returns Promise that resolves to content metadata
61
+ */
62
+ getContentMetadata(contentId: string): Promise<import('./core/content-resolver.js').ContentMetadata>;
63
+ /**
64
+ * Verify that content exists and is accessible
65
+ * @param contentId - Content ID to verify
66
+ * @returns Promise that resolves to true if content exists, false otherwise
67
+ */
68
+ verifyContentExists(contentId: string): Promise<boolean>;
43
69
  /**
44
70
  * Clean up resources
45
71
  */
package/dist/search.js CHANGED
@@ -74,8 +74,11 @@ export class SearchEngine {
74
74
  const db = await openDatabase(this.dbPath);
75
75
  const indexManager = new IndexManager(this.indexPath, this.dbPath, modelDefaults.dimensions, this.options.embeddingModel);
76
76
  await indexManager.initialize();
77
+ // Create ContentResolver for unified content system
78
+ const { ContentResolver } = await import('./core/content-resolver.js');
79
+ const contentResolver = new ContentResolver(db);
77
80
  // Create core engine with dependency injection
78
- this.coreEngine = new CoreSearchEngine(embedFn, indexManager, db, this.options.rerankFn);
81
+ this.coreEngine = new CoreSearchEngine(embedFn, indexManager, db, this.options.rerankFn, contentResolver);
79
82
  }
80
83
  else {
81
84
  // Use factory for standard initialization
@@ -94,6 +97,56 @@ export class SearchEngine {
94
97
  }
95
98
  return this.coreEngine.search(query, options);
96
99
  }
100
+ /**
101
+ * Retrieve content by ID in the specified format
102
+ * @param contentId - Content ID to retrieve
103
+ * @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
104
+ * @returns Promise that resolves to content in requested format
105
+ */
106
+ async getContent(contentId, format = 'file') {
107
+ await this.initialize();
108
+ if (!this.coreEngine) {
109
+ throw new Error('SearchEngine failed to initialize');
110
+ }
111
+ return this.coreEngine.getContent(contentId, format);
112
+ }
113
+ /**
114
+ * Retrieve multiple content items efficiently in batch
115
+ * @param contentIds - Array of content IDs to retrieve
116
+ * @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
117
+ * @returns Promise that resolves to array of content in requested format
118
+ */
119
+ async getContentBatch(contentIds, format = 'file') {
120
+ await this.initialize();
121
+ if (!this.coreEngine) {
122
+ throw new Error('SearchEngine failed to initialize');
123
+ }
124
+ return this.coreEngine.getContentBatch(contentIds, format);
125
+ }
126
+ /**
127
+ * Retrieve content metadata for result enhancement
128
+ * @param contentId - Content ID to get metadata for
129
+ * @returns Promise that resolves to content metadata
130
+ */
131
+ async getContentMetadata(contentId) {
132
+ await this.initialize();
133
+ if (!this.coreEngine) {
134
+ throw new Error('SearchEngine failed to initialize');
135
+ }
136
+ return this.coreEngine.getContentMetadata(contentId);
137
+ }
138
+ /**
139
+ * Verify that content exists and is accessible
140
+ * @param contentId - Content ID to verify
141
+ * @returns Promise that resolves to true if content exists, false otherwise
142
+ */
143
+ async verifyContentExists(contentId) {
144
+ await this.initialize();
145
+ if (!this.coreEngine) {
146
+ throw new Error('SearchEngine failed to initialize');
147
+ }
148
+ return this.coreEngine.verifyContentExists(contentId);
149
+ }
97
150
  /**
98
151
  * Clean up resources
99
152
  */
@@ -2,35 +2,17 @@
2
2
  * Test utilities for multi-model support
3
3
  * Provides common configurations and helpers for testing with different embedding models
4
4
  */
5
- export declare const TEST_MODELS: readonly [{
6
- readonly name: "sentence-transformers/all-MiniLM-L6-v2";
7
- readonly dimensions: 384;
8
- readonly chunkSize: 250;
9
- readonly batchSize: 16;
10
- }, {
11
- readonly name: "Xenova/all-mpnet-base-v2";
12
- readonly dimensions: 768;
13
- readonly chunkSize: 400;
14
- readonly batchSize: 8;
15
- }];
5
+ export interface TestModel {
6
+ name: string;
7
+ dimensions: number;
8
+ chunkSize: number;
9
+ batchSize: number;
10
+ }
11
+ export declare const TEST_MODELS: TestModel[];
16
12
  /**
17
13
  * Retrieve model configuration by name
18
14
  * @param modelName - The name of the model to retrieve
19
15
  * @returns Model configuration object or undefined if not found
20
16
  */
21
- export declare function getTestModel(modelName: string): {
22
- readonly name: "sentence-transformers/all-MiniLM-L6-v2";
23
- readonly dimensions: 384;
24
- readonly chunkSize: 250;
25
- readonly batchSize: 16;
26
- } | {
27
- readonly name: "Xenova/all-mpnet-base-v2";
28
- readonly dimensions: 768;
29
- readonly chunkSize: 400;
30
- readonly batchSize: 8;
31
- } | undefined;
32
- /**
33
- * Type for test model configuration
34
- */
35
- export type TestModel = typeof TEST_MODELS[number];
17
+ export declare function getTestModel(modelName: string): TestModel | undefined;
36
18
  //# sourceMappingURL=test-utils.d.ts.map
@@ -11,6 +11,7 @@ export interface Document {
11
11
  source: string;
12
12
  title: string;
13
13
  content: string;
14
+ metadata?: Record<string, any>;
14
15
  }
15
16
  export interface Chunk {
16
17
  text: string;
@@ -2,6 +2,7 @@ import '../dom-polyfills.js';
2
2
  import { createHash } from 'crypto';
3
3
  import { config } from '../core/config.js';
4
4
  import { handleError, ErrorCategory, ErrorSeverity, safeExecute } from '../core/error-handler.js';
5
+ import { createModelLoadingError, createInvalidContentError, createMissingDependencyError } from '../core/actionable-error-messages.js';
5
6
  /**
6
7
  * List of supported embedding models
7
8
  */
@@ -22,8 +23,7 @@ export class EmbeddingEngine {
22
23
  this.batchSize = batchSize || config.batch_size;
23
24
  // Validate that the model is supported
24
25
  if (!SUPPORTED_MODELS.includes(this.modelName)) {
25
- throw new Error(`Unsupported model: ${this.modelName}\n` +
26
- `Supported models: ${SUPPORTED_MODELS.join(', ')}`);
26
+ throw createModelLoadingError(this.modelName, `Model not in supported list. Supported models: ${SUPPORTED_MODELS.join(', ')}`, { operationContext: 'EmbeddingEngine constructor' });
27
27
  }
28
28
  console.log(`🤖 EmbeddingEngine initialized with model: ${this.modelName}, batchSize: ${this.batchSize}`);
29
29
  }
@@ -88,7 +88,10 @@ export class EmbeddingEngine {
88
88
  */
89
89
  async embedBatch(texts) {
90
90
  if (!this.model) {
91
- throw new Error('Model not loaded. Call loadModel() first.');
91
+ throw createMissingDependencyError('model', 'object', {
92
+ operationContext: 'embedBatch',
93
+ includeTroubleshooting: true
94
+ });
92
95
  }
93
96
  if (texts.length === 0) {
94
97
  return [];
@@ -123,7 +126,8 @@ export class EmbeddingEngine {
123
126
  const vector = new Float32Array(embeddingData[i]);
124
127
  results.push({
125
128
  embedding_id,
126
- vector
129
+ vector,
130
+ contentType: 'text'
127
131
  });
128
132
  }
129
133
  return results;
@@ -173,7 +177,8 @@ export class EmbeddingEngine {
173
177
  const vector = new Float32Array(embeddingData[0]);
174
178
  return {
175
179
  embedding_id,
176
- vector
180
+ vector,
181
+ contentType: 'text'
177
182
  };
178
183
  }
179
184
  catch (error) {
@@ -189,7 +194,9 @@ export class EmbeddingEngine {
189
194
  async embedSingle(text) {
190
195
  const results = await this.embedBatch([text]);
191
196
  if (results.length === 0) {
192
- throw new Error('Failed to generate embedding for single text');
197
+ throw createInvalidContentError('text', 'empty', {
198
+ operationContext: 'embedText'
199
+ });
193
200
  }
194
201
  return results[0];
195
202
  }
@@ -357,10 +364,10 @@ export function createTextEmbedFunction(modelName, batchSize) {
357
364
  }
358
365
  // Use the existing embedSingle method
359
366
  const result = await engine.embedSingle(query);
360
- // Add contentType to the result
367
+ // Ensure contentType is present (should already be included from embedSingle)
361
368
  return {
362
369
  ...result,
363
- contentType: 'text'
370
+ contentType: result.contentType || 'text'
364
371
  };
365
372
  };
366
373
  return embedFunction;
@@ -3,5 +3,6 @@ export { CrossEncoderReranker, createTextRerankFunction, createTextReranker } fr
3
3
  export { countTokens, getTokenizer, resetTokenizer } from './tokenizer.js';
4
4
  export { chunkDocument, type Chunk, type Document } from '../core/chunker.js';
5
5
  export { type ChunkConfig } from '../core/chunker.js';
6
+ export { SentenceTransformerEmbedder } from './sentence-transformer-embedder.js';
6
7
  export * from './preprocessors/index.js';
7
8
  //# sourceMappingURL=index.d.ts.map
@@ -3,6 +3,7 @@ export { EmbeddingEngine, getEmbeddingEngine, initializeEmbeddingEngine, createT
3
3
  export { CrossEncoderReranker, createTextRerankFunction, createTextReranker } from './reranker.js';
4
4
  export { countTokens, getTokenizer, resetTokenizer } from './tokenizer.js';
5
5
  export { chunkDocument } from '../core/chunker.js';
6
+ export { SentenceTransformerEmbedder } from './sentence-transformer-embedder.js';
6
7
  // Re-export preprocessors
7
8
  export * from './preprocessors/index.js';
8
9
  //# sourceMappingURL=index.js.map
@@ -8,13 +8,12 @@ export declare class CrossEncoderReranker {
8
8
  private model;
9
9
  private tokenizer;
10
10
  private modelName;
11
- private static readonly FALLBACK_MODELS;
12
11
  /**
13
12
  * Ensure DOM polyfills are set up for transformers.js
14
13
  */
15
14
  private ensurePolyfills;
16
15
  /**
17
- * Load the embedding model with graceful fallback
16
+ * Load the embedding model
18
17
  */
19
18
  loadModel(): Promise<void>;
20
19
  /**
@@ -18,12 +18,6 @@ export class CrossEncoderReranker {
18
18
  model = null; // Use any to avoid complex transformers.js typing issues
19
19
  tokenizer = null;
20
20
  modelName = 'Xenova/ms-marco-MiniLM-L-6-v2'; // Use working cross-encoder model
21
- // Alternative models in case the primary fails
22
- static FALLBACK_MODELS = [
23
- 'Xenova/ms-marco-MiniLM-L-6-v2', // Primary - proven to work in standalone test
24
- 'cross-encoder/ms-marco-MiniLM-L-6-v2', // Original (may have issues)
25
- 'cross-encoder/ms-marco-MiniLM-L-2-v2', // Smaller original (may have issues)
26
- ];
27
21
  /**
28
22
  * Ensure DOM polyfills are set up for transformers.js
29
23
  */
@@ -40,54 +34,30 @@ export class CrossEncoderReranker {
40
34
  }
41
35
  }
42
36
  /**
43
- * Load the embedding model with graceful fallback
37
+ * Load the embedding model
44
38
  */
45
39
  async loadModel() {
46
- // Try primary model first (should work since it's Xenova)
47
- if (await this.tryLoadModel(this.modelName)) {
48
- return;
49
- }
50
- // Try fallback models if primary fails
51
- console.warn(`Primary model ${this.modelName} failed, trying fallbacks...`);
52
- for (const fallbackModel of CrossEncoderReranker.FALLBACK_MODELS) {
53
- if (fallbackModel === this.modelName)
54
- continue; // Skip already tried model
55
- console.warn(`Trying fallback model: ${fallbackModel}`);
56
- if (await this.tryLoadModel(fallbackModel)) {
57
- this.modelName = fallbackModel;
58
- return;
59
- }
60
- }
61
- console.warn('All embedding models failed to load. Reranking will be disabled.');
62
- this.model = null;
63
- this.tokenizer = null;
40
+ await this.tryLoadModel(this.modelName);
64
41
  }
65
42
  /**
66
43
  * Try to load a specific model
67
44
  */
68
45
  async tryLoadModel(modelName) {
69
- try {
70
- console.log(`Loading cross-encoder model: ${modelName}`);
71
- // Ensure polyfills are set up exactly like the working standalone version
72
- this.ensurePolyfills();
73
- // Use the exact same approach as the working standalone test
74
- const { AutoTokenizer, AutoModelForSequenceClassification } = await import('@huggingface/transformers');
75
- console.log('Loading model...');
76
- this.model = await AutoModelForSequenceClassification.from_pretrained(modelName, {
77
- cache_dir: config.model_cache_path,
78
- dtype: 'fp32'
79
- });
80
- console.log('Loading tokenizer...');
81
- this.tokenizer = await AutoTokenizer.from_pretrained(modelName, {
82
- cache_dir: config.model_cache_path
83
- });
84
- console.log(`Cross-encoder model loaded successfully: ${modelName}`);
85
- return true;
86
- }
87
- catch (error) {
88
- console.warn(`Failed to load model ${modelName}: ${error instanceof Error ? error.message : 'Unknown error'}`);
89
- return false;
90
- }
46
+ console.log(`Loading cross-encoder model: ${modelName}`);
47
+ // Ensure polyfills are set up exactly like the working standalone version
48
+ this.ensurePolyfills();
49
+ // Use the exact same approach as the working standalone test
50
+ const { AutoTokenizer, AutoModelForSequenceClassification } = await import('@huggingface/transformers');
51
+ console.log('Loading model...');
52
+ this.model = await AutoModelForSequenceClassification.from_pretrained(modelName, {
53
+ cache_dir: config.model_cache_path,
54
+ dtype: 'fp32'
55
+ });
56
+ console.log('Loading tokenizer...');
57
+ this.tokenizer = await AutoTokenizer.from_pretrained(modelName, {
58
+ cache_dir: config.model_cache_path
59
+ });
60
+ console.log(`Cross-encoder model loaded successfully: ${modelName}`);
91
61
  }
92
62
  /**
93
63
  * Rerank search results using embedding similarity scoring
@@ -0,0 +1,96 @@
1
+ /**
2
+ * TEXT IMPLEMENTATION — Sentence Transformer Embedder Implementation
3
+ * Implements UniversalEmbedder interface for sentence-transformer models
4
+ * Adapts existing text embedding logic to the universal interface
5
+ */
6
+ import '../dom-polyfills.js';
7
+ import { BaseUniversalEmbedder, type EmbedderOptions } from '../core/abstract-embedder.js';
8
+ import type { EmbeddingResult } from '../types.js';
9
+ /**
10
+ * Sentence transformer embedder implementation
11
+ * Supports sentence-transformers/all-MiniLM-L6-v2 and Xenova/all-mpnet-base-v2
12
+ * Ensures consistent EmbeddingResult format with contentType='text'
13
+ * Adapts existing EmbeddingEngine to UniversalEmbedder interface
14
+ */
15
+ export declare class SentenceTransformerEmbedder extends BaseUniversalEmbedder {
16
+ private embeddingEngine;
17
+ private resourceManager;
18
+ private embedderResourceId?;
19
+ private engineResourceId?;
20
+ constructor(modelName: string, options?: EmbedderOptions);
21
+ /**
22
+ * Load the sentence transformer model using existing EmbeddingEngine
23
+ */
24
+ loadModel(): Promise<void>;
25
+ /**
26
+ * Clean up model resources with comprehensive disposal
27
+ */
28
+ cleanup(): Promise<void>;
29
+ /**
30
+ * Embed text using the existing EmbeddingEngine
31
+ */
32
+ embedText(text: string): Promise<EmbeddingResult>;
33
+ /**
34
+ * Optimized batch processing using existing EmbeddingEngine and BatchProcessingOptimizer
35
+ * Overrides the base implementation for better performance with progress reporting
36
+ */
37
+ protected processBatch(batch: Array<{
38
+ content: string;
39
+ contentType: string;
40
+ metadata?: Record<string, any>;
41
+ }>): Promise<EmbeddingResult[]>;
42
+ /**
43
+ * Get model-specific information
44
+ */
45
+ getModelInfo(): {
46
+ capabilities: {
47
+ supportsSemanticSimilarity: boolean;
48
+ supportsTextClassification: boolean;
49
+ supportsTextClustering: boolean;
50
+ recommendedUseCase: string;
51
+ supportsText: boolean;
52
+ supportsImages: boolean;
53
+ supportsBatchProcessing: boolean;
54
+ supportsMetadata: boolean;
55
+ maxBatchSize?: number;
56
+ maxTextLength?: number;
57
+ supportedImageFormats?: readonly string[];
58
+ supportsMultimodal?: boolean;
59
+ supportsCrossModalSearch?: boolean;
60
+ unifiedEmbeddingSpace?: boolean;
61
+ reliableImplementation?: boolean;
62
+ };
63
+ name: string;
64
+ type: import("../core/universal-embedder.js").ModelType;
65
+ dimensions: number;
66
+ version: string;
67
+ supportedContentTypes: readonly string[];
68
+ requirements: import("../types.js").ModelRequirements;
69
+ };
70
+ /**
71
+ * Check if the model is suitable for a specific task
72
+ */
73
+ isSuitableForTask(task: 'similarity' | 'classification' | 'clustering' | 'retrieval'): boolean;
74
+ /**
75
+ * Embed document batch using existing EmbeddingEngine's optimized method
76
+ * This method provides compatibility with the existing document ingestion pipeline
77
+ */
78
+ embedDocumentBatch(chunks: string[]): Promise<EmbeddingResult[]>;
79
+ /**
80
+ * Get the model version from the underlying EmbeddingEngine
81
+ */
82
+ getModelVersion(): string;
83
+ /**
84
+ * Get the batch size from the underlying EmbeddingEngine
85
+ */
86
+ getBatchSize(): number;
87
+ /**
88
+ * Check if the underlying EmbeddingEngine is loaded
89
+ */
90
+ isEngineLoaded(): boolean;
91
+ /**
92
+ * Override isLoaded to check both internal state and engine state
93
+ */
94
+ isLoaded(): boolean;
95
+ }
96
+ //# sourceMappingURL=sentence-transformer-embedder.d.ts.map