rag-lite-ts 1.0.2 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (208) hide show
  1. package/README.md +605 -93
  2. package/dist/cli/indexer.js +192 -4
  3. package/dist/cli/search.js +50 -11
  4. package/dist/cli.js +183 -26
  5. package/dist/core/abstract-embedder.d.ts +125 -0
  6. package/dist/core/abstract-embedder.js +264 -0
  7. package/dist/core/actionable-error-messages.d.ts +60 -0
  8. package/dist/core/actionable-error-messages.js +397 -0
  9. package/dist/core/batch-processing-optimizer.d.ts +155 -0
  10. package/dist/core/batch-processing-optimizer.js +541 -0
  11. package/dist/core/binary-index-format.d.ts +52 -0
  12. package/dist/core/binary-index-format.js +122 -0
  13. package/dist/core/chunker.d.ts +2 -0
  14. package/dist/core/cli-database-utils.d.ts +53 -0
  15. package/dist/core/cli-database-utils.js +239 -0
  16. package/dist/core/config.js +10 -3
  17. package/dist/core/content-errors.d.ts +111 -0
  18. package/dist/core/content-errors.js +362 -0
  19. package/dist/core/content-manager.d.ts +343 -0
  20. package/dist/core/content-manager.js +1504 -0
  21. package/dist/core/content-performance-optimizer.d.ts +150 -0
  22. package/dist/core/content-performance-optimizer.js +516 -0
  23. package/dist/core/content-resolver.d.ts +104 -0
  24. package/dist/core/content-resolver.js +285 -0
  25. package/dist/core/cross-modal-search.d.ts +164 -0
  26. package/dist/core/cross-modal-search.js +342 -0
  27. package/dist/core/database-connection-manager.d.ts +109 -0
  28. package/dist/core/database-connection-manager.js +304 -0
  29. package/dist/core/db.d.ts +141 -2
  30. package/dist/core/db.js +631 -89
  31. package/dist/core/embedder-factory.d.ts +176 -0
  32. package/dist/core/embedder-factory.js +338 -0
  33. package/dist/core/index.d.ts +3 -1
  34. package/dist/core/index.js +4 -1
  35. package/dist/core/ingestion.d.ts +85 -15
  36. package/dist/core/ingestion.js +510 -45
  37. package/dist/core/lazy-dependency-loader.d.ts +152 -0
  38. package/dist/core/lazy-dependency-loader.js +453 -0
  39. package/dist/core/mode-detection-service.d.ts +150 -0
  40. package/dist/core/mode-detection-service.js +565 -0
  41. package/dist/core/mode-model-validator.d.ts +92 -0
  42. package/dist/core/mode-model-validator.js +203 -0
  43. package/dist/core/model-registry.d.ts +120 -0
  44. package/dist/core/model-registry.js +415 -0
  45. package/dist/core/model-validator.d.ts +217 -0
  46. package/dist/core/model-validator.js +782 -0
  47. package/dist/core/polymorphic-search-factory.d.ts +154 -0
  48. package/dist/core/polymorphic-search-factory.js +344 -0
  49. package/dist/core/raglite-paths.d.ts +121 -0
  50. package/dist/core/raglite-paths.js +145 -0
  51. package/dist/core/reranking-config.d.ts +42 -0
  52. package/dist/core/reranking-config.js +156 -0
  53. package/dist/core/reranking-factory.d.ts +92 -0
  54. package/dist/core/reranking-factory.js +591 -0
  55. package/dist/core/reranking-strategies.d.ts +325 -0
  56. package/dist/core/reranking-strategies.js +720 -0
  57. package/dist/core/resource-cleanup.d.ts +163 -0
  58. package/dist/core/resource-cleanup.js +371 -0
  59. package/dist/core/resource-manager.d.ts +212 -0
  60. package/dist/core/resource-manager.js +564 -0
  61. package/dist/core/search.d.ts +28 -1
  62. package/dist/core/search.js +83 -5
  63. package/dist/core/streaming-operations.d.ts +145 -0
  64. package/dist/core/streaming-operations.js +409 -0
  65. package/dist/core/types.d.ts +3 -0
  66. package/dist/core/universal-embedder.d.ts +177 -0
  67. package/dist/core/universal-embedder.js +139 -0
  68. package/dist/core/validation-messages.d.ts +99 -0
  69. package/dist/core/validation-messages.js +334 -0
  70. package/dist/core/vector-index.d.ts +1 -1
  71. package/dist/core/vector-index.js +37 -39
  72. package/dist/factories/index.d.ts +3 -1
  73. package/dist/factories/index.js +2 -0
  74. package/dist/factories/polymorphic-factory.d.ts +50 -0
  75. package/dist/factories/polymorphic-factory.js +159 -0
  76. package/dist/factories/text-factory.d.ts +128 -34
  77. package/dist/factories/text-factory.js +346 -97
  78. package/dist/file-processor.d.ts +88 -2
  79. package/dist/file-processor.js +720 -17
  80. package/dist/index.d.ts +32 -0
  81. package/dist/index.js +29 -0
  82. package/dist/ingestion.d.ts +16 -0
  83. package/dist/ingestion.js +21 -0
  84. package/dist/mcp-server.d.ts +35 -3
  85. package/dist/mcp-server.js +1107 -31
  86. package/dist/multimodal/clip-embedder.d.ts +327 -0
  87. package/dist/multimodal/clip-embedder.js +992 -0
  88. package/dist/multimodal/index.d.ts +6 -0
  89. package/dist/multimodal/index.js +6 -0
  90. package/dist/run-error-recovery-tests.d.ts +7 -0
  91. package/dist/run-error-recovery-tests.js +101 -0
  92. package/dist/search.d.ts +60 -9
  93. package/dist/search.js +82 -11
  94. package/dist/test-utils.d.ts +8 -26
  95. package/dist/text/chunker.d.ts +1 -0
  96. package/dist/text/embedder.js +15 -8
  97. package/dist/text/index.d.ts +1 -0
  98. package/dist/text/index.js +1 -0
  99. package/dist/text/reranker.d.ts +1 -2
  100. package/dist/text/reranker.js +17 -47
  101. package/dist/text/sentence-transformer-embedder.d.ts +96 -0
  102. package/dist/text/sentence-transformer-embedder.js +340 -0
  103. package/dist/types.d.ts +39 -0
  104. package/dist/utils/vector-math.d.ts +31 -0
  105. package/dist/utils/vector-math.js +70 -0
  106. package/package.json +27 -6
  107. package/dist/api-errors.d.ts.map +0 -1
  108. package/dist/api-errors.js.map +0 -1
  109. package/dist/cli/indexer.d.ts.map +0 -1
  110. package/dist/cli/indexer.js.map +0 -1
  111. package/dist/cli/search.d.ts.map +0 -1
  112. package/dist/cli/search.js.map +0 -1
  113. package/dist/cli.d.ts.map +0 -1
  114. package/dist/cli.js.map +0 -1
  115. package/dist/config.d.ts.map +0 -1
  116. package/dist/config.js.map +0 -1
  117. package/dist/core/adapters.d.ts.map +0 -1
  118. package/dist/core/adapters.js.map +0 -1
  119. package/dist/core/chunker.d.ts.map +0 -1
  120. package/dist/core/chunker.js.map +0 -1
  121. package/dist/core/config.d.ts.map +0 -1
  122. package/dist/core/config.js.map +0 -1
  123. package/dist/core/db.d.ts.map +0 -1
  124. package/dist/core/db.js.map +0 -1
  125. package/dist/core/error-handler.d.ts.map +0 -1
  126. package/dist/core/error-handler.js.map +0 -1
  127. package/dist/core/index.d.ts.map +0 -1
  128. package/dist/core/index.js.map +0 -1
  129. package/dist/core/ingestion.d.ts.map +0 -1
  130. package/dist/core/ingestion.js.map +0 -1
  131. package/dist/core/interfaces.d.ts.map +0 -1
  132. package/dist/core/interfaces.js.map +0 -1
  133. package/dist/core/path-manager.d.ts.map +0 -1
  134. package/dist/core/path-manager.js.map +0 -1
  135. package/dist/core/search-example.d.ts +0 -25
  136. package/dist/core/search-example.d.ts.map +0 -1
  137. package/dist/core/search-example.js +0 -138
  138. package/dist/core/search-example.js.map +0 -1
  139. package/dist/core/search-pipeline-example.d.ts +0 -21
  140. package/dist/core/search-pipeline-example.d.ts.map +0 -1
  141. package/dist/core/search-pipeline-example.js +0 -188
  142. package/dist/core/search-pipeline-example.js.map +0 -1
  143. package/dist/core/search-pipeline.d.ts.map +0 -1
  144. package/dist/core/search-pipeline.js.map +0 -1
  145. package/dist/core/search.d.ts.map +0 -1
  146. package/dist/core/search.js.map +0 -1
  147. package/dist/core/types.d.ts.map +0 -1
  148. package/dist/core/types.js.map +0 -1
  149. package/dist/core/vector-index.d.ts.map +0 -1
  150. package/dist/core/vector-index.js.map +0 -1
  151. package/dist/dom-polyfills.d.ts.map +0 -1
  152. package/dist/dom-polyfills.js.map +0 -1
  153. package/dist/examples/clean-api-examples.d.ts +0 -44
  154. package/dist/examples/clean-api-examples.d.ts.map +0 -1
  155. package/dist/examples/clean-api-examples.js +0 -206
  156. package/dist/examples/clean-api-examples.js.map +0 -1
  157. package/dist/factories/index.d.ts.map +0 -1
  158. package/dist/factories/index.js.map +0 -1
  159. package/dist/factories/text-factory.d.ts.map +0 -1
  160. package/dist/factories/text-factory.js.map +0 -1
  161. package/dist/file-processor.d.ts.map +0 -1
  162. package/dist/file-processor.js.map +0 -1
  163. package/dist/index-manager.d.ts.map +0 -1
  164. package/dist/index-manager.js.map +0 -1
  165. package/dist/index.d.ts.map +0 -1
  166. package/dist/index.js.map +0 -1
  167. package/dist/indexer.d.ts.map +0 -1
  168. package/dist/indexer.js.map +0 -1
  169. package/dist/ingestion.d.ts.map +0 -1
  170. package/dist/ingestion.js.map +0 -1
  171. package/dist/mcp-server.d.ts.map +0 -1
  172. package/dist/mcp-server.js.map +0 -1
  173. package/dist/preprocess.d.ts.map +0 -1
  174. package/dist/preprocess.js.map +0 -1
  175. package/dist/preprocessors/index.d.ts.map +0 -1
  176. package/dist/preprocessors/index.js.map +0 -1
  177. package/dist/preprocessors/mdx.d.ts.map +0 -1
  178. package/dist/preprocessors/mdx.js.map +0 -1
  179. package/dist/preprocessors/mermaid.d.ts.map +0 -1
  180. package/dist/preprocessors/mermaid.js.map +0 -1
  181. package/dist/preprocessors/registry.d.ts.map +0 -1
  182. package/dist/preprocessors/registry.js.map +0 -1
  183. package/dist/search-standalone.d.ts.map +0 -1
  184. package/dist/search-standalone.js.map +0 -1
  185. package/dist/search.d.ts.map +0 -1
  186. package/dist/search.js.map +0 -1
  187. package/dist/test-utils.d.ts.map +0 -1
  188. package/dist/test-utils.js.map +0 -1
  189. package/dist/text/chunker.d.ts.map +0 -1
  190. package/dist/text/chunker.js.map +0 -1
  191. package/dist/text/embedder.d.ts.map +0 -1
  192. package/dist/text/embedder.js.map +0 -1
  193. package/dist/text/index.d.ts.map +0 -1
  194. package/dist/text/index.js.map +0 -1
  195. package/dist/text/preprocessors/index.d.ts.map +0 -1
  196. package/dist/text/preprocessors/index.js.map +0 -1
  197. package/dist/text/preprocessors/mdx.d.ts.map +0 -1
  198. package/dist/text/preprocessors/mdx.js.map +0 -1
  199. package/dist/text/preprocessors/mermaid.d.ts.map +0 -1
  200. package/dist/text/preprocessors/mermaid.js.map +0 -1
  201. package/dist/text/preprocessors/registry.d.ts.map +0 -1
  202. package/dist/text/preprocessors/registry.js.map +0 -1
  203. package/dist/text/reranker.d.ts.map +0 -1
  204. package/dist/text/reranker.js.map +0 -1
  205. package/dist/text/tokenizer.d.ts.map +0 -1
  206. package/dist/text/tokenizer.js.map +0 -1
  207. package/dist/types.d.ts.map +0 -1
  208. package/dist/types.js.map +0 -1
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Multimodal implementation layer exports
3
+ * Provides CLIP-based embedders for cross-modal search capabilities
4
+ */
5
+ export { CLIPEmbedder } from './clip-embedder.js';
6
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Multimodal implementation layer exports
3
+ * Provides CLIP-based embedders for cross-modal search capabilities
4
+ */
5
+ export { CLIPEmbedder } from './clip-embedder.js';
6
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Test Runner for Chameleon Error Recovery and Reliability Tests
3
+ * Runs the comprehensive error recovery test suite
4
+ */
5
+ declare function runTests(): Promise<boolean>;
6
+ export { runTests };
7
+ //# sourceMappingURL=run-error-recovery-tests.d.ts.map
@@ -0,0 +1,101 @@
1
+ /**
2
+ * Test Runner for Chameleon Error Recovery and Reliability Tests
3
+ * Runs the comprehensive error recovery test suite
4
+ */
5
+ import { spawn } from 'child_process';
6
+ import { fileURLToPath } from 'url';
7
+ import { dirname } from 'path';
8
+ const __filename = fileURLToPath(import.meta.url);
9
+ const __dirname = dirname(__filename);
10
+ async function runTests() {
11
+ console.log('🧪 Running Chameleon Error Recovery and Reliability Tests...\n');
12
+ const testFiles = [
13
+ 'chameleon-error-recovery.test.ts',
14
+ 'chameleon-reliability-integration.test.ts',
15
+ 'chameleon-stress-testing.test.ts',
16
+ 'chameleon-error-simulation.test.ts'
17
+ ];
18
+ let totalTests = 0;
19
+ let passedTests = 0;
20
+ let failedTests = 0;
21
+ for (const testFile of testFiles) {
22
+ console.log(`\n📋 Running ${testFile}...`);
23
+ try {
24
+ // Build the test file first
25
+ const buildProcess = spawn('npx', ['tsc', '--project', 'tsconfig.test.json'], {
26
+ stdio: 'pipe',
27
+ shell: true
28
+ });
29
+ await new Promise((resolve, reject) => {
30
+ buildProcess.on('close', (code) => {
31
+ if (code === 0) {
32
+ resolve(code);
33
+ }
34
+ else {
35
+ reject(new Error(`Build failed with code ${code}`));
36
+ }
37
+ });
38
+ });
39
+ // Run the compiled test
40
+ const testProcess = spawn('node', ['--test', `dist/${testFile.replace('.ts', '.js')}`], {
41
+ stdio: 'pipe',
42
+ shell: true
43
+ });
44
+ let output = '';
45
+ let errorOutput = '';
46
+ testProcess.stdout?.on('data', (data) => {
47
+ output += data.toString();
48
+ });
49
+ testProcess.stderr?.on('data', (data) => {
50
+ errorOutput += data.toString();
51
+ });
52
+ await new Promise((resolve) => {
53
+ testProcess.on('close', (code) => {
54
+ console.log(`Exit code: ${code}`);
55
+ if (output) {
56
+ console.log('Output:', output);
57
+ }
58
+ if (errorOutput) {
59
+ console.log('Errors:', errorOutput);
60
+ }
61
+ // Count tests (this is a simple approximation)
62
+ const testMatches = output.match(/✓|×/g);
63
+ const currentTests = testMatches ? testMatches.length : 0;
64
+ totalTests += currentTests;
65
+ if (code === 0) {
66
+ passedTests += currentTests;
67
+ console.log(`✅ ${testFile} completed successfully`);
68
+ }
69
+ else {
70
+ failedTests += currentTests;
71
+ console.log(`❌ ${testFile} failed`);
72
+ }
73
+ resolve(code);
74
+ });
75
+ });
76
+ }
77
+ catch (error) {
78
+ console.error(`❌ Failed to run ${testFile}:`, error instanceof Error ? error.message : String(error));
79
+ failedTests++;
80
+ }
81
+ }
82
+ console.log('\n📊 Test Summary:');
83
+ console.log(`Total Tests: ${totalTests}`);
84
+ console.log(`Passed: ${passedTests}`);
85
+ console.log(`Failed: ${failedTests}`);
86
+ if (failedTests === 0) {
87
+ console.log('\n🎉 All error recovery tests completed!');
88
+ console.log('✅ System demonstrates robust error handling and recovery mechanisms');
89
+ }
90
+ else {
91
+ console.log('\n⚠️ Some tests failed - this may be expected in test environments');
92
+ console.log('🔍 Review the output above for specific failure details');
93
+ }
94
+ return failedTests === 0;
95
+ }
96
+ // Run tests if this file is executed directly
97
+ if (import.meta.url === `file://${process.argv[1]}`) {
98
+ runTests().catch(console.error);
99
+ }
100
+ export { runTests };
101
+ //# sourceMappingURL=run-error-recovery-tests.js.map
package/dist/search.d.ts CHANGED
@@ -1,25 +1,44 @@
1
1
  /**
2
- * Public API SearchEngine - Simple constructor interface with internal factory usage
2
+ * Public API SearchEngine - Simple constructor with Chameleon Architecture
3
3
  *
4
- * This class provides a clean, simple API while using the new core architecture
5
- * internally. It handles dependency injection automatically.
4
+ * This class provides a clean, simple API that automatically adapts to the mode
5
+ * (text or multimodal) stored in the database during ingestion. The system detects
6
+ * the mode and creates the appropriate embedder and reranker without user intervention.
7
+ *
8
+ * Chameleon Architecture Features:
9
+ * - Automatic mode detection from database configuration
10
+ * - Seamless switching between text and multimodal modes
11
+ * - Appropriate embedder selection (sentence-transformer or CLIP)
12
+ * - Mode-specific reranking strategies
6
13
  *
7
14
  * @example
8
15
  * ```typescript
9
- * // Simple usage
16
+ * // Simple usage - mode automatically detected from database
10
17
  * const search = new SearchEngine('./index.bin', './db.sqlite');
11
18
  * const results = await search.search('query');
12
19
  *
13
- * // With options
20
+ * // Works for both text and multimodal databases
21
+ * // Text mode: uses sentence-transformer embeddings
22
+ * // Multimodal mode: uses CLIP embeddings for cross-modal search
23
+ *
24
+ * // With options (advanced)
14
25
  * const search = new SearchEngine('./index.bin', './db.sqlite', {
15
- * embeddingModel: 'all-MiniLM-L6-v2',
16
26
  * enableReranking: true
17
27
  * });
18
28
  * ```
19
29
  */
20
- import { type TextSearchOptions } from './factories/index.js';
21
30
  import type { SearchResult, SearchOptions, EmbedFunction, RerankFunction } from './core/types.js';
22
- export interface SearchEngineOptions extends TextSearchOptions {
31
+ export interface SearchEngineOptions {
32
+ /** Embedding model name override */
33
+ embeddingModel?: string;
34
+ /** Embedding batch size override */
35
+ batchSize?: number;
36
+ /** Reranking model name override */
37
+ rerankingModel?: string;
38
+ /** Whether to enable reranking (default: true) */
39
+ enableReranking?: boolean;
40
+ /** Top-k results to return (default: from config) */
41
+ topK?: number;
23
42
  /** Custom embedding function (advanced usage) */
24
43
  embedFn?: EmbedFunction;
25
44
  /** Custom reranking function (advanced usage) */
@@ -33,13 +52,45 @@ export declare class SearchEngine {
33
52
  private initPromise;
34
53
  constructor(indexPath: string, dbPath: string, options?: SearchEngineOptions);
35
54
  /**
36
- * Initialize the search engine using the factory or direct injection
55
+ * Initialize the search engine using polymorphic factory or direct injection
56
+ *
57
+ * Chameleon Architecture Implementation:
58
+ * - Automatically detects mode from database (text or multimodal)
59
+ * - Creates appropriate embedder based on detected mode
60
+ * - Applies mode-specific reranking strategies
61
+ * - Provides seamless polymorphic behavior
37
62
  */
38
63
  private initialize;
39
64
  /**
40
65
  * Perform semantic search
41
66
  */
42
67
  search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
68
+ /**
69
+ * Retrieve content by ID in the specified format
70
+ * @param contentId - Content ID to retrieve
71
+ * @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
72
+ * @returns Promise that resolves to content in requested format
73
+ */
74
+ getContent(contentId: string, format?: 'file' | 'base64'): Promise<string>;
75
+ /**
76
+ * Retrieve multiple content items efficiently in batch
77
+ * @param contentIds - Array of content IDs to retrieve
78
+ * @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
79
+ * @returns Promise that resolves to array of content in requested format
80
+ */
81
+ getContentBatch(contentIds: string[], format?: 'file' | 'base64'): Promise<string[]>;
82
+ /**
83
+ * Retrieve content metadata for result enhancement
84
+ * @param contentId - Content ID to get metadata for
85
+ * @returns Promise that resolves to content metadata
86
+ */
87
+ getContentMetadata(contentId: string): Promise<import('./core/content-resolver.js').ContentMetadata>;
88
+ /**
89
+ * Verify that content exists and is accessible
90
+ * @param contentId - Content ID to verify
91
+ * @returns Promise that resolves to true if content exists, false otherwise
92
+ */
93
+ verifyContentExists(contentId: string): Promise<boolean>;
43
94
  /**
44
95
  * Clean up resources
45
96
  */
package/dist/search.js CHANGED
@@ -1,24 +1,33 @@
1
1
  /**
2
- * Public API SearchEngine - Simple constructor interface with internal factory usage
2
+ * Public API SearchEngine - Simple constructor with Chameleon Architecture
3
3
  *
4
- * This class provides a clean, simple API while using the new core architecture
5
- * internally. It handles dependency injection automatically.
4
+ * This class provides a clean, simple API that automatically adapts to the mode
5
+ * (text or multimodal) stored in the database during ingestion. The system detects
6
+ * the mode and creates the appropriate embedder and reranker without user intervention.
7
+ *
8
+ * Chameleon Architecture Features:
9
+ * - Automatic mode detection from database configuration
10
+ * - Seamless switching between text and multimodal modes
11
+ * - Appropriate embedder selection (sentence-transformer or CLIP)
12
+ * - Mode-specific reranking strategies
6
13
  *
7
14
  * @example
8
15
  * ```typescript
9
- * // Simple usage
16
+ * // Simple usage - mode automatically detected from database
10
17
  * const search = new SearchEngine('./index.bin', './db.sqlite');
11
18
  * const results = await search.search('query');
12
19
  *
13
- * // With options
20
+ * // Works for both text and multimodal databases
21
+ * // Text mode: uses sentence-transformer embeddings
22
+ * // Multimodal mode: uses CLIP embeddings for cross-modal search
23
+ *
24
+ * // With options (advanced)
14
25
  * const search = new SearchEngine('./index.bin', './db.sqlite', {
15
- * embeddingModel: 'all-MiniLM-L6-v2',
16
26
  * enableReranking: true
17
27
  * });
18
28
  * ```
19
29
  */
20
30
  import { SearchEngine as CoreSearchEngine } from './core/search.js';
21
- import { TextSearchFactory } from './factories/index.js';
22
31
  export class SearchEngine {
23
32
  indexPath;
24
33
  dbPath;
@@ -42,7 +51,13 @@ export class SearchEngine {
42
51
  }
43
52
  }
44
53
  /**
45
- * Initialize the search engine using the factory or direct injection
54
+ * Initialize the search engine using polymorphic factory or direct injection
55
+ *
56
+ * Chameleon Architecture Implementation:
57
+ * - Automatically detects mode from database (text or multimodal)
58
+ * - Creates appropriate embedder based on detected mode
59
+ * - Applies mode-specific reranking strategies
60
+ * - Provides seamless polymorphic behavior
46
61
  */
47
62
  async initialize() {
48
63
  if (this.coreEngine) {
@@ -74,12 +89,18 @@ export class SearchEngine {
74
89
  const db = await openDatabase(this.dbPath);
75
90
  const indexManager = new IndexManager(this.indexPath, this.dbPath, modelDefaults.dimensions, this.options.embeddingModel);
76
91
  await indexManager.initialize();
92
+ // Create ContentResolver for unified content system
93
+ const { ContentResolver } = await import('./core/content-resolver.js');
94
+ const contentResolver = new ContentResolver(db);
77
95
  // Create core engine with dependency injection
78
- this.coreEngine = new CoreSearchEngine(embedFn, indexManager, db, this.options.rerankFn);
96
+ this.coreEngine = new CoreSearchEngine(embedFn, indexManager, db, this.options.rerankFn, contentResolver);
79
97
  }
80
98
  else {
81
- // Use factory for standard initialization
82
- this.coreEngine = await TextSearchFactory.create(this.indexPath, this.dbPath, this.options);
99
+ // Use core polymorphic factory for automatic mode detection (Chameleon Architecture)
100
+ // This enables SearchEngine to automatically adapt to text or multimodal mode
101
+ // based on the configuration stored in the database during ingestion
102
+ const { PolymorphicSearchFactory } = await import('./core/polymorphic-search-factory.js');
103
+ this.coreEngine = await PolymorphicSearchFactory.create(this.indexPath, this.dbPath);
83
104
  }
84
105
  })();
85
106
  return this.initPromise;
@@ -94,6 +115,56 @@ export class SearchEngine {
94
115
  }
95
116
  return this.coreEngine.search(query, options);
96
117
  }
118
+ /**
119
+ * Retrieve content by ID in the specified format
120
+ * @param contentId - Content ID to retrieve
121
+ * @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
122
+ * @returns Promise that resolves to content in requested format
123
+ */
124
+ async getContent(contentId, format = 'file') {
125
+ await this.initialize();
126
+ if (!this.coreEngine) {
127
+ throw new Error('SearchEngine failed to initialize');
128
+ }
129
+ return this.coreEngine.getContent(contentId, format);
130
+ }
131
+ /**
132
+ * Retrieve multiple content items efficiently in batch
133
+ * @param contentIds - Array of content IDs to retrieve
134
+ * @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
135
+ * @returns Promise that resolves to array of content in requested format
136
+ */
137
+ async getContentBatch(contentIds, format = 'file') {
138
+ await this.initialize();
139
+ if (!this.coreEngine) {
140
+ throw new Error('SearchEngine failed to initialize');
141
+ }
142
+ return this.coreEngine.getContentBatch(contentIds, format);
143
+ }
144
+ /**
145
+ * Retrieve content metadata for result enhancement
146
+ * @param contentId - Content ID to get metadata for
147
+ * @returns Promise that resolves to content metadata
148
+ */
149
+ async getContentMetadata(contentId) {
150
+ await this.initialize();
151
+ if (!this.coreEngine) {
152
+ throw new Error('SearchEngine failed to initialize');
153
+ }
154
+ return this.coreEngine.getContentMetadata(contentId);
155
+ }
156
+ /**
157
+ * Verify that content exists and is accessible
158
+ * @param contentId - Content ID to verify
159
+ * @returns Promise that resolves to true if content exists, false otherwise
160
+ */
161
+ async verifyContentExists(contentId) {
162
+ await this.initialize();
163
+ if (!this.coreEngine) {
164
+ throw new Error('SearchEngine failed to initialize');
165
+ }
166
+ return this.coreEngine.verifyContentExists(contentId);
167
+ }
97
168
  /**
98
169
  * Clean up resources
99
170
  */
@@ -2,35 +2,17 @@
2
2
  * Test utilities for multi-model support
3
3
  * Provides common configurations and helpers for testing with different embedding models
4
4
  */
5
- export declare const TEST_MODELS: readonly [{
6
- readonly name: "sentence-transformers/all-MiniLM-L6-v2";
7
- readonly dimensions: 384;
8
- readonly chunkSize: 250;
9
- readonly batchSize: 16;
10
- }, {
11
- readonly name: "Xenova/all-mpnet-base-v2";
12
- readonly dimensions: 768;
13
- readonly chunkSize: 400;
14
- readonly batchSize: 8;
15
- }];
5
+ export interface TestModel {
6
+ name: string;
7
+ dimensions: number;
8
+ chunkSize: number;
9
+ batchSize: number;
10
+ }
11
+ export declare const TEST_MODELS: TestModel[];
16
12
  /**
17
13
  * Retrieve model configuration by name
18
14
  * @param modelName - The name of the model to retrieve
19
15
  * @returns Model configuration object or undefined if not found
20
16
  */
21
- export declare function getTestModel(modelName: string): {
22
- readonly name: "sentence-transformers/all-MiniLM-L6-v2";
23
- readonly dimensions: 384;
24
- readonly chunkSize: 250;
25
- readonly batchSize: 16;
26
- } | {
27
- readonly name: "Xenova/all-mpnet-base-v2";
28
- readonly dimensions: 768;
29
- readonly chunkSize: 400;
30
- readonly batchSize: 8;
31
- } | undefined;
32
- /**
33
- * Type for test model configuration
34
- */
35
- export type TestModel = typeof TEST_MODELS[number];
17
+ export declare function getTestModel(modelName: string): TestModel | undefined;
36
18
  //# sourceMappingURL=test-utils.d.ts.map
@@ -11,6 +11,7 @@ export interface Document {
11
11
  source: string;
12
12
  title: string;
13
13
  content: string;
14
+ metadata?: Record<string, any>;
14
15
  }
15
16
  export interface Chunk {
16
17
  text: string;
@@ -2,6 +2,7 @@ import '../dom-polyfills.js';
2
2
  import { createHash } from 'crypto';
3
3
  import { config } from '../core/config.js';
4
4
  import { handleError, ErrorCategory, ErrorSeverity, safeExecute } from '../core/error-handler.js';
5
+ import { createModelLoadingError, createInvalidContentError, createMissingDependencyError } from '../core/actionable-error-messages.js';
5
6
  /**
6
7
  * List of supported embedding models
7
8
  */
@@ -22,8 +23,7 @@ export class EmbeddingEngine {
22
23
  this.batchSize = batchSize || config.batch_size;
23
24
  // Validate that the model is supported
24
25
  if (!SUPPORTED_MODELS.includes(this.modelName)) {
25
- throw new Error(`Unsupported model: ${this.modelName}\n` +
26
- `Supported models: ${SUPPORTED_MODELS.join(', ')}`);
26
+ throw createModelLoadingError(this.modelName, `Model not in supported list. Supported models: ${SUPPORTED_MODELS.join(', ')}`, { operationContext: 'EmbeddingEngine constructor' });
27
27
  }
28
28
  console.log(`🤖 EmbeddingEngine initialized with model: ${this.modelName}, batchSize: ${this.batchSize}`);
29
29
  }
@@ -88,7 +88,10 @@ export class EmbeddingEngine {
88
88
  */
89
89
  async embedBatch(texts) {
90
90
  if (!this.model) {
91
- throw new Error('Model not loaded. Call loadModel() first.');
91
+ throw createMissingDependencyError('model', 'object', {
92
+ operationContext: 'embedBatch',
93
+ includeTroubleshooting: true
94
+ });
92
95
  }
93
96
  if (texts.length === 0) {
94
97
  return [];
@@ -123,7 +126,8 @@ export class EmbeddingEngine {
123
126
  const vector = new Float32Array(embeddingData[i]);
124
127
  results.push({
125
128
  embedding_id,
126
- vector
129
+ vector,
130
+ contentType: 'text'
127
131
  });
128
132
  }
129
133
  return results;
@@ -173,7 +177,8 @@ export class EmbeddingEngine {
173
177
  const vector = new Float32Array(embeddingData[0]);
174
178
  return {
175
179
  embedding_id,
176
- vector
180
+ vector,
181
+ contentType: 'text'
177
182
  };
178
183
  }
179
184
  catch (error) {
@@ -189,7 +194,9 @@ export class EmbeddingEngine {
189
194
  async embedSingle(text) {
190
195
  const results = await this.embedBatch([text]);
191
196
  if (results.length === 0) {
192
- throw new Error('Failed to generate embedding for single text');
197
+ throw createInvalidContentError('text', 'empty', {
198
+ operationContext: 'embedText'
199
+ });
193
200
  }
194
201
  return results[0];
195
202
  }
@@ -357,10 +364,10 @@ export function createTextEmbedFunction(modelName, batchSize) {
357
364
  }
358
365
  // Use the existing embedSingle method
359
366
  const result = await engine.embedSingle(query);
360
- // Add contentType to the result
367
+ // Ensure contentType is present (should already be included from embedSingle)
361
368
  return {
362
369
  ...result,
363
- contentType: 'text'
370
+ contentType: result.contentType || 'text'
364
371
  };
365
372
  };
366
373
  return embedFunction;
@@ -3,5 +3,6 @@ export { CrossEncoderReranker, createTextRerankFunction, createTextReranker } fr
3
3
  export { countTokens, getTokenizer, resetTokenizer } from './tokenizer.js';
4
4
  export { chunkDocument, type Chunk, type Document } from '../core/chunker.js';
5
5
  export { type ChunkConfig } from '../core/chunker.js';
6
+ export { SentenceTransformerEmbedder } from './sentence-transformer-embedder.js';
6
7
  export * from './preprocessors/index.js';
7
8
  //# sourceMappingURL=index.d.ts.map
@@ -3,6 +3,7 @@ export { EmbeddingEngine, getEmbeddingEngine, initializeEmbeddingEngine, createT
3
3
  export { CrossEncoderReranker, createTextRerankFunction, createTextReranker } from './reranker.js';
4
4
  export { countTokens, getTokenizer, resetTokenizer } from './tokenizer.js';
5
5
  export { chunkDocument } from '../core/chunker.js';
6
+ export { SentenceTransformerEmbedder } from './sentence-transformer-embedder.js';
6
7
  // Re-export preprocessors
7
8
  export * from './preprocessors/index.js';
8
9
  //# sourceMappingURL=index.js.map
@@ -8,13 +8,12 @@ export declare class CrossEncoderReranker {
8
8
  private model;
9
9
  private tokenizer;
10
10
  private modelName;
11
- private static readonly FALLBACK_MODELS;
12
11
  /**
13
12
  * Ensure DOM polyfills are set up for transformers.js
14
13
  */
15
14
  private ensurePolyfills;
16
15
  /**
17
- * Load the embedding model with graceful fallback
16
+ * Load the embedding model
18
17
  */
19
18
  loadModel(): Promise<void>;
20
19
  /**
@@ -18,12 +18,6 @@ export class CrossEncoderReranker {
18
18
  model = null; // Use any to avoid complex transformers.js typing issues
19
19
  tokenizer = null;
20
20
  modelName = 'Xenova/ms-marco-MiniLM-L-6-v2'; // Use working cross-encoder model
21
- // Alternative models in case the primary fails
22
- static FALLBACK_MODELS = [
23
- 'Xenova/ms-marco-MiniLM-L-6-v2', // Primary - proven to work in standalone test
24
- 'cross-encoder/ms-marco-MiniLM-L-6-v2', // Original (may have issues)
25
- 'cross-encoder/ms-marco-MiniLM-L-2-v2', // Smaller original (may have issues)
26
- ];
27
21
  /**
28
22
  * Ensure DOM polyfills are set up for transformers.js
29
23
  */
@@ -40,54 +34,30 @@ export class CrossEncoderReranker {
40
34
  }
41
35
  }
42
36
  /**
43
- * Load the embedding model with graceful fallback
37
+ * Load the embedding model
44
38
  */
45
39
  async loadModel() {
46
- // Try primary model first (should work since it's Xenova)
47
- if (await this.tryLoadModel(this.modelName)) {
48
- return;
49
- }
50
- // Try fallback models if primary fails
51
- console.warn(`Primary model ${this.modelName} failed, trying fallbacks...`);
52
- for (const fallbackModel of CrossEncoderReranker.FALLBACK_MODELS) {
53
- if (fallbackModel === this.modelName)
54
- continue; // Skip already tried model
55
- console.warn(`Trying fallback model: ${fallbackModel}`);
56
- if (await this.tryLoadModel(fallbackModel)) {
57
- this.modelName = fallbackModel;
58
- return;
59
- }
60
- }
61
- console.warn('All embedding models failed to load. Reranking will be disabled.');
62
- this.model = null;
63
- this.tokenizer = null;
40
+ await this.tryLoadModel(this.modelName);
64
41
  }
65
42
  /**
66
43
  * Try to load a specific model
67
44
  */
68
45
  async tryLoadModel(modelName) {
69
- try {
70
- console.log(`Loading cross-encoder model: ${modelName}`);
71
- // Ensure polyfills are set up exactly like the working standalone version
72
- this.ensurePolyfills();
73
- // Use the exact same approach as the working standalone test
74
- const { AutoTokenizer, AutoModelForSequenceClassification } = await import('@huggingface/transformers');
75
- console.log('Loading model...');
76
- this.model = await AutoModelForSequenceClassification.from_pretrained(modelName, {
77
- cache_dir: config.model_cache_path,
78
- dtype: 'fp32'
79
- });
80
- console.log('Loading tokenizer...');
81
- this.tokenizer = await AutoTokenizer.from_pretrained(modelName, {
82
- cache_dir: config.model_cache_path
83
- });
84
- console.log(`Cross-encoder model loaded successfully: ${modelName}`);
85
- return true;
86
- }
87
- catch (error) {
88
- console.warn(`Failed to load model ${modelName}: ${error instanceof Error ? error.message : 'Unknown error'}`);
89
- return false;
90
- }
46
+ console.log(`Loading cross-encoder model: ${modelName}`);
47
+ // Ensure polyfills are set up exactly like the working standalone version
48
+ this.ensurePolyfills();
49
+ // Use the exact same approach as the working standalone test
50
+ const { AutoTokenizer, AutoModelForSequenceClassification } = await import('@huggingface/transformers');
51
+ console.log('Loading model...');
52
+ this.model = await AutoModelForSequenceClassification.from_pretrained(modelName, {
53
+ cache_dir: config.model_cache_path,
54
+ dtype: 'fp32'
55
+ });
56
+ console.log('Loading tokenizer...');
57
+ this.tokenizer = await AutoTokenizer.from_pretrained(modelName, {
58
+ cache_dir: config.model_cache_path
59
+ });
60
+ console.log(`Cross-encoder model loaded successfully: ${modelName}`);
91
61
  }
92
62
  /**
93
63
  * Rerank search results using embedding similarity scoring