rag-lite-ts 1.0.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. package/README.md +651 -109
  2. package/dist/cli/indexer.js +262 -46
  3. package/dist/cli/search.js +54 -32
  4. package/dist/cli.js +185 -28
  5. package/dist/config.d.ts +34 -73
  6. package/dist/config.js +50 -255
  7. package/dist/core/abstract-embedder.d.ts +125 -0
  8. package/dist/core/abstract-embedder.js +264 -0
  9. package/dist/core/actionable-error-messages.d.ts +60 -0
  10. package/dist/core/actionable-error-messages.js +397 -0
  11. package/dist/core/adapters.d.ts +93 -0
  12. package/dist/core/adapters.js +139 -0
  13. package/dist/core/batch-processing-optimizer.d.ts +155 -0
  14. package/dist/core/batch-processing-optimizer.js +541 -0
  15. package/dist/core/chunker.d.ts +119 -0
  16. package/dist/core/chunker.js +73 -0
  17. package/dist/core/cli-database-utils.d.ts +53 -0
  18. package/dist/core/cli-database-utils.js +239 -0
  19. package/dist/core/config.d.ts +102 -0
  20. package/dist/core/config.js +247 -0
  21. package/dist/core/content-errors.d.ts +111 -0
  22. package/dist/core/content-errors.js +362 -0
  23. package/dist/core/content-manager.d.ts +343 -0
  24. package/dist/core/content-manager.js +1504 -0
  25. package/dist/core/content-performance-optimizer.d.ts +150 -0
  26. package/dist/core/content-performance-optimizer.js +516 -0
  27. package/dist/core/content-resolver.d.ts +104 -0
  28. package/dist/core/content-resolver.js +285 -0
  29. package/dist/core/cross-modal-search.d.ts +164 -0
  30. package/dist/core/cross-modal-search.js +342 -0
  31. package/dist/core/database-connection-manager.d.ts +109 -0
  32. package/dist/core/database-connection-manager.js +304 -0
  33. package/dist/core/db.d.ts +245 -0
  34. package/dist/core/db.js +952 -0
  35. package/dist/core/embedder-factory.d.ts +176 -0
  36. package/dist/core/embedder-factory.js +338 -0
  37. package/dist/{error-handler.d.ts → core/error-handler.d.ts} +23 -2
  38. package/dist/{error-handler.js → core/error-handler.js} +51 -8
  39. package/dist/core/index.d.ts +59 -0
  40. package/dist/core/index.js +69 -0
  41. package/dist/core/ingestion.d.ts +213 -0
  42. package/dist/core/ingestion.js +812 -0
  43. package/dist/core/interfaces.d.ts +408 -0
  44. package/dist/core/interfaces.js +106 -0
  45. package/dist/core/lazy-dependency-loader.d.ts +152 -0
  46. package/dist/core/lazy-dependency-loader.js +453 -0
  47. package/dist/core/mode-detection-service.d.ts +150 -0
  48. package/dist/core/mode-detection-service.js +565 -0
  49. package/dist/core/mode-model-validator.d.ts +92 -0
  50. package/dist/core/mode-model-validator.js +203 -0
  51. package/dist/core/model-registry.d.ts +120 -0
  52. package/dist/core/model-registry.js +415 -0
  53. package/dist/core/model-validator.d.ts +217 -0
  54. package/dist/core/model-validator.js +782 -0
  55. package/dist/{path-manager.d.ts → core/path-manager.d.ts} +5 -0
  56. package/dist/{path-manager.js → core/path-manager.js} +5 -0
  57. package/dist/core/polymorphic-search-factory.d.ts +154 -0
  58. package/dist/core/polymorphic-search-factory.js +344 -0
  59. package/dist/core/raglite-paths.d.ts +121 -0
  60. package/dist/core/raglite-paths.js +145 -0
  61. package/dist/core/reranking-config.d.ts +42 -0
  62. package/dist/core/reranking-config.js +156 -0
  63. package/dist/core/reranking-factory.d.ts +92 -0
  64. package/dist/core/reranking-factory.js +591 -0
  65. package/dist/core/reranking-strategies.d.ts +325 -0
  66. package/dist/core/reranking-strategies.js +720 -0
  67. package/dist/core/resource-cleanup.d.ts +163 -0
  68. package/dist/core/resource-cleanup.js +371 -0
  69. package/dist/core/resource-manager.d.ts +212 -0
  70. package/dist/core/resource-manager.js +564 -0
  71. package/dist/core/search-pipeline.d.ts +111 -0
  72. package/dist/core/search-pipeline.js +287 -0
  73. package/dist/core/search.d.ts +131 -0
  74. package/dist/core/search.js +296 -0
  75. package/dist/core/streaming-operations.d.ts +145 -0
  76. package/dist/core/streaming-operations.js +409 -0
  77. package/dist/core/types.d.ts +66 -0
  78. package/dist/core/types.js +6 -0
  79. package/dist/core/universal-embedder.d.ts +177 -0
  80. package/dist/core/universal-embedder.js +139 -0
  81. package/dist/core/validation-messages.d.ts +99 -0
  82. package/dist/core/validation-messages.js +334 -0
  83. package/dist/{vector-index.d.ts → core/vector-index.d.ts} +4 -0
  84. package/dist/{vector-index.js → core/vector-index.js} +21 -3
  85. package/dist/dom-polyfills.d.ts +6 -0
  86. package/dist/dom-polyfills.js +40 -0
  87. package/dist/factories/index.d.ts +43 -0
  88. package/dist/factories/index.js +44 -0
  89. package/dist/factories/text-factory.d.ts +560 -0
  90. package/dist/factories/text-factory.js +968 -0
  91. package/dist/file-processor.d.ts +90 -4
  92. package/dist/file-processor.js +723 -20
  93. package/dist/index-manager.d.ts +3 -2
  94. package/dist/index-manager.js +13 -11
  95. package/dist/index.d.ts +72 -8
  96. package/dist/index.js +102 -16
  97. package/dist/indexer.js +1 -1
  98. package/dist/ingestion.d.ts +44 -154
  99. package/dist/ingestion.js +75 -671
  100. package/dist/mcp-server.d.ts +35 -3
  101. package/dist/mcp-server.js +1186 -79
  102. package/dist/multimodal/clip-embedder.d.ts +314 -0
  103. package/dist/multimodal/clip-embedder.js +945 -0
  104. package/dist/multimodal/index.d.ts +6 -0
  105. package/dist/multimodal/index.js +6 -0
  106. package/dist/preprocess.js +1 -1
  107. package/dist/run-error-recovery-tests.d.ts +7 -0
  108. package/dist/run-error-recovery-tests.js +101 -0
  109. package/dist/search-standalone.js +1 -1
  110. package/dist/search.d.ts +51 -69
  111. package/dist/search.js +117 -412
  112. package/dist/test-utils.d.ts +8 -26
  113. package/dist/text/chunker.d.ts +33 -0
  114. package/dist/{chunker.js → text/chunker.js} +98 -75
  115. package/dist/{embedder.d.ts → text/embedder.d.ts} +22 -1
  116. package/dist/{embedder.js → text/embedder.js} +84 -10
  117. package/dist/text/index.d.ts +8 -0
  118. package/dist/text/index.js +9 -0
  119. package/dist/text/preprocessors/index.d.ts +17 -0
  120. package/dist/text/preprocessors/index.js +38 -0
  121. package/dist/text/preprocessors/mdx.d.ts +25 -0
  122. package/dist/text/preprocessors/mdx.js +101 -0
  123. package/dist/text/preprocessors/mermaid.d.ts +68 -0
  124. package/dist/text/preprocessors/mermaid.js +330 -0
  125. package/dist/text/preprocessors/registry.d.ts +56 -0
  126. package/dist/text/preprocessors/registry.js +180 -0
  127. package/dist/text/reranker.d.ts +59 -0
  128. package/dist/{reranker.js → text/reranker.js} +138 -53
  129. package/dist/text/sentence-transformer-embedder.d.ts +96 -0
  130. package/dist/text/sentence-transformer-embedder.js +340 -0
  131. package/dist/{tokenizer.d.ts → text/tokenizer.d.ts} +1 -0
  132. package/dist/{tokenizer.js → text/tokenizer.js} +7 -2
  133. package/dist/types.d.ts +40 -1
  134. package/dist/utils/vector-math.d.ts +31 -0
  135. package/dist/utils/vector-math.js +70 -0
  136. package/package.json +16 -4
  137. package/dist/api-errors.d.ts.map +0 -1
  138. package/dist/api-errors.js.map +0 -1
  139. package/dist/chunker.d.ts +0 -47
  140. package/dist/chunker.d.ts.map +0 -1
  141. package/dist/chunker.js.map +0 -1
  142. package/dist/cli/indexer.d.ts.map +0 -1
  143. package/dist/cli/indexer.js.map +0 -1
  144. package/dist/cli/search.d.ts.map +0 -1
  145. package/dist/cli/search.js.map +0 -1
  146. package/dist/cli.d.ts.map +0 -1
  147. package/dist/cli.js.map +0 -1
  148. package/dist/config.d.ts.map +0 -1
  149. package/dist/config.js.map +0 -1
  150. package/dist/db.d.ts +0 -90
  151. package/dist/db.d.ts.map +0 -1
  152. package/dist/db.js +0 -340
  153. package/dist/db.js.map +0 -1
  154. package/dist/embedder.d.ts.map +0 -1
  155. package/dist/embedder.js.map +0 -1
  156. package/dist/error-handler.d.ts.map +0 -1
  157. package/dist/error-handler.js.map +0 -1
  158. package/dist/file-processor.d.ts.map +0 -1
  159. package/dist/file-processor.js.map +0 -1
  160. package/dist/index-manager.d.ts.map +0 -1
  161. package/dist/index-manager.js.map +0 -1
  162. package/dist/index.d.ts.map +0 -1
  163. package/dist/index.js.map +0 -1
  164. package/dist/indexer.d.ts.map +0 -1
  165. package/dist/indexer.js.map +0 -1
  166. package/dist/ingestion.d.ts.map +0 -1
  167. package/dist/ingestion.js.map +0 -1
  168. package/dist/mcp-server.d.ts.map +0 -1
  169. package/dist/mcp-server.js.map +0 -1
  170. package/dist/path-manager.d.ts.map +0 -1
  171. package/dist/path-manager.js.map +0 -1
  172. package/dist/preprocess.d.ts.map +0 -1
  173. package/dist/preprocess.js.map +0 -1
  174. package/dist/preprocessors/index.d.ts.map +0 -1
  175. package/dist/preprocessors/index.js.map +0 -1
  176. package/dist/preprocessors/mdx.d.ts.map +0 -1
  177. package/dist/preprocessors/mdx.js.map +0 -1
  178. package/dist/preprocessors/mermaid.d.ts.map +0 -1
  179. package/dist/preprocessors/mermaid.js.map +0 -1
  180. package/dist/preprocessors/registry.d.ts.map +0 -1
  181. package/dist/preprocessors/registry.js.map +0 -1
  182. package/dist/reranker.d.ts +0 -40
  183. package/dist/reranker.d.ts.map +0 -1
  184. package/dist/reranker.js.map +0 -1
  185. package/dist/resource-manager-demo.d.ts +0 -7
  186. package/dist/resource-manager-demo.d.ts.map +0 -1
  187. package/dist/resource-manager-demo.js +0 -52
  188. package/dist/resource-manager-demo.js.map +0 -1
  189. package/dist/resource-manager.d.ts +0 -129
  190. package/dist/resource-manager.d.ts.map +0 -1
  191. package/dist/resource-manager.js +0 -389
  192. package/dist/resource-manager.js.map +0 -1
  193. package/dist/search-standalone.d.ts.map +0 -1
  194. package/dist/search-standalone.js.map +0 -1
  195. package/dist/search.d.ts.map +0 -1
  196. package/dist/search.js.map +0 -1
  197. package/dist/test-utils.d.ts.map +0 -1
  198. package/dist/test-utils.js.map +0 -1
  199. package/dist/tokenizer.d.ts.map +0 -1
  200. package/dist/tokenizer.js.map +0 -1
  201. package/dist/types.d.ts.map +0 -1
  202. package/dist/types.js.map +0 -1
  203. package/dist/vector-index.d.ts.map +0 -1
  204. package/dist/vector-index.js.map +0 -1
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Multimodal implementation layer exports
3
+ * Provides CLIP-based embedders for cross-modal search capabilities
4
+ */
5
+ export { CLIPEmbedder } from './clip-embedder.js';
6
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Multimodal implementation layer exports
3
+ * Provides CLIP-based embedders for cross-modal search capabilities
4
+ */
5
+ export { CLIPEmbedder } from './clip-embedder.js';
6
+ //# sourceMappingURL=index.js.map
@@ -1,4 +1,4 @@
1
- import { mergePreprocessingConfig, validatePreprocessingConfig } from './config.js';
1
+ import { mergePreprocessingConfig, validatePreprocessingConfig } from './core/config.js';
2
2
  import { preprocessorRegistry, ContentTypeDetector } from './preprocessors/index.js';
3
3
  /**
4
4
  * Main preprocessing function that processes document content based on configuration
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Test Runner for Chameleon Error Recovery and Reliability Tests
3
+ * Runs the comprehensive error recovery test suite
4
+ */
5
+ declare function runTests(): Promise<boolean>;
6
+ export { runTests };
7
+ //# sourceMappingURL=run-error-recovery-tests.d.ts.map
@@ -0,0 +1,101 @@
1
+ /**
2
+ * Test Runner for Chameleon Error Recovery and Reliability Tests
3
+ * Runs the comprehensive error recovery test suite
4
+ */
5
+ import { spawn } from 'child_process';
6
+ import { fileURLToPath } from 'url';
7
+ import { dirname } from 'path';
8
+ const __filename = fileURLToPath(import.meta.url);
9
+ const __dirname = dirname(__filename);
10
+ async function runTests() {
11
+ console.log('🧪 Running Chameleon Error Recovery and Reliability Tests...\n');
12
+ const testFiles = [
13
+ 'chameleon-error-recovery.test.ts',
14
+ 'chameleon-reliability-integration.test.ts',
15
+ 'chameleon-stress-testing.test.ts',
16
+ 'chameleon-error-simulation.test.ts'
17
+ ];
18
+ let totalTests = 0;
19
+ let passedTests = 0;
20
+ let failedTests = 0;
21
+ for (const testFile of testFiles) {
22
+ console.log(`\n📋 Running ${testFile}...`);
23
+ try {
24
+ // Build the test file first
25
+ const buildProcess = spawn('npx', ['tsc', '--project', 'tsconfig.test.json'], {
26
+ stdio: 'pipe',
27
+ shell: true
28
+ });
29
+ await new Promise((resolve, reject) => {
30
+ buildProcess.on('close', (code) => {
31
+ if (code === 0) {
32
+ resolve(code);
33
+ }
34
+ else {
35
+ reject(new Error(`Build failed with code ${code}`));
36
+ }
37
+ });
38
+ });
39
+ // Run the compiled test
40
+ const testProcess = spawn('node', ['--test', `dist/${testFile.replace('.ts', '.js')}`], {
41
+ stdio: 'pipe',
42
+ shell: true
43
+ });
44
+ let output = '';
45
+ let errorOutput = '';
46
+ testProcess.stdout?.on('data', (data) => {
47
+ output += data.toString();
48
+ });
49
+ testProcess.stderr?.on('data', (data) => {
50
+ errorOutput += data.toString();
51
+ });
52
+ await new Promise((resolve) => {
53
+ testProcess.on('close', (code) => {
54
+ console.log(`Exit code: ${code}`);
55
+ if (output) {
56
+ console.log('Output:', output);
57
+ }
58
+ if (errorOutput) {
59
+ console.log('Errors:', errorOutput);
60
+ }
61
+ // Count tests (this is a simple approximation)
62
+ const testMatches = output.match(/✓|×/g);
63
+ const currentTests = testMatches ? testMatches.length : 0;
64
+ totalTests += currentTests;
65
+ if (code === 0) {
66
+ passedTests += currentTests;
67
+ console.log(`✅ ${testFile} completed successfully`);
68
+ }
69
+ else {
70
+ failedTests += currentTests;
71
+ console.log(`❌ ${testFile} failed`);
72
+ }
73
+ resolve(code);
74
+ });
75
+ });
76
+ }
77
+ catch (error) {
78
+ console.error(`❌ Failed to run ${testFile}:`, error instanceof Error ? error.message : String(error));
79
+ failedTests++;
80
+ }
81
+ }
82
+ console.log('\n📊 Test Summary:');
83
+ console.log(`Total Tests: ${totalTests}`);
84
+ console.log(`Passed: ${passedTests}`);
85
+ console.log(`Failed: ${failedTests}`);
86
+ if (failedTests === 0) {
87
+ console.log('\n🎉 All error recovery tests completed!');
88
+ console.log('✅ System demonstrates robust error handling and recovery mechanisms');
89
+ }
90
+ else {
91
+ console.log('\n⚠️ Some tests failed - this may be expected in test environments');
92
+ console.log('🔍 Review the output above for specific failure details');
93
+ }
94
+ return failedTests === 0;
95
+ }
96
+ // Run tests if this file is executed directly
97
+ if (import.meta.url === `file://${process.argv[1]}`) {
98
+ runTests().catch(console.error);
99
+ }
100
+ export { runTests };
101
+ //# sourceMappingURL=run-error-recovery-tests.js.map
@@ -4,7 +4,7 @@
4
4
  * Usage: node search.js <query> [--top-k <number>] [--rerank|--no-rerank]
5
5
  */
6
6
  import { runSearch } from './cli/search.js';
7
- import { EXIT_CODES, ConfigurationError } from './config.js';
7
+ import { EXIT_CODES, ConfigurationError } from './core/config.js';
8
8
  function parseArgs() {
9
9
  const args = process.argv.slice(2);
10
10
  if (args.length === 0) {
package/dist/search.d.ts CHANGED
@@ -1,91 +1,73 @@
1
- import { EmbeddingEngine } from './embedder.js';
2
- import { IndexManager } from './index-manager.js';
3
- import { DatabaseConnection } from './db.js';
4
- import type { SearchResult, SearchOptions } from './types.js';
5
1
  /**
6
- * User-friendly error class with actionable suggestions
2
+ * Public API SearchEngine - Simple constructor interface with internal factory usage
3
+ *
4
+ * This class provides a clean, simple API while using the new core architecture
5
+ * internally. It handles dependency injection automatically.
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * // Simple usage
10
+ * const search = new SearchEngine('./index.bin', './db.sqlite');
11
+ * const results = await search.search('query');
12
+ *
13
+ * // With options
14
+ * const search = new SearchEngine('./index.bin', './db.sqlite', {
15
+ * embeddingModel: 'all-MiniLM-L6-v2',
16
+ * enableReranking: true
17
+ * });
18
+ * ```
7
19
  */
8
- export declare class SearchError extends Error {
9
- code: string;
10
- suggestions: string[];
11
- constructor(message: string, code: string, suggestions: string[]);
20
+ import { type TextSearchOptions } from './factories/index.js';
21
+ import type { SearchResult, SearchOptions, EmbedFunction, RerankFunction } from './core/types.js';
22
+ export interface SearchEngineOptions extends TextSearchOptions {
23
+ /** Custom embedding function (advanced usage) */
24
+ embedFn?: EmbedFunction;
25
+ /** Custom reranking function (advanced usage) */
26
+ rerankFn?: RerankFunction;
12
27
  }
13
- /**
14
- * Search engine that provides semantic search capabilities
15
- * Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
16
- * Supports concurrent read operations for multiple simultaneous queries
17
- */
18
28
  export declare class SearchEngine {
19
- private static instances;
20
- private static cleanupHandlersSet;
21
- private embedder;
22
- private indexManager;
23
- private dbConnection;
24
- private reranker;
25
- private isInitialized;
26
29
  private indexPath;
27
30
  private dbPath;
28
- private enableReranking;
29
- /**
30
- * Creates a new SearchEngine with simplified constructor
31
- * Search engine is ready to use immediately without requiring initialization calls (Requirement 3.5)
32
- * @param indexPath - Path to vector index file (defaults to './vector-index.bin')
33
- * @param dbPath - Path to database file (defaults to './db.sqlite')
34
- */
35
- constructor(indexPath?: string, dbPath?: string);
36
- /**
37
- * Legacy constructor for backward compatibility
38
- * @deprecated Use the simple constructor new SearchEngine(indexPath?, dbPath?) instead
39
- */
40
- static createWithComponents(embedder: EmbeddingEngine, indexManager: IndexManager, dbConnection: DatabaseConnection, enableReranking?: boolean): SearchEngine;
41
- /**
42
- * Automatically initialize resources on first use with user-friendly error handling
43
- * Implements lazy initialization as required by Requirements 3.5, 4.3, 5.1, 5.2
44
- */
45
- private ensureInitialized;
31
+ private options;
32
+ private coreEngine;
33
+ private initPromise;
34
+ constructor(indexPath: string, dbPath: string, options?: SearchEngineOptions);
46
35
  /**
47
- * Create user-friendly error messages with actionable suggestions
48
- * Implements requirement 5.3: Clear, actionable error messages with specific next steps
36
+ * Initialize the search engine using the factory or direct injection
49
37
  */
50
- private createUserFriendlyError;
38
+ private initialize;
51
39
  /**
52
- * Initialize the search engine (public method for backward compatibility)
53
- * Sets up database, index manager, and embedding engine
40
+ * Perform semantic search
54
41
  */
55
- initialize(): Promise<void>;
42
+ search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
56
43
  /**
57
- * Perform semantic search on the indexed documents (matches README API)
58
- * Automatically initializes resources on first use (Requirements 4.1, 4.2, 4.4, 4.5)
59
- * Supports concurrent read operations for multiple simultaneous queries
60
- * @param query - Search query string
61
- * @param options - Search options including top_k and rerank settings
62
- * @returns Promise resolving to array of search results
44
+ * Retrieve content by ID in the specified format
45
+ * @param contentId - Content ID to retrieve
46
+ * @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
47
+ * @returns Promise that resolves to content in requested format
63
48
  */
64
- search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
49
+ getContent(contentId: string, format?: 'file' | 'base64'): Promise<string>;
65
50
  /**
66
- * Format search results with proper structure
67
- * @param chunks - Database chunks with metadata
68
- * @param distances - Similarity distances from vector search
69
- * @param embeddingIds - Embedding IDs in search result order
70
- * @returns Formatted search results
51
+ * Retrieve multiple content items efficiently in batch
52
+ * @param contentIds - Array of content IDs to retrieve
53
+ * @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
54
+ * @returns Promise that resolves to array of content in requested format
71
55
  */
72
- private formatSearchResults;
56
+ getContentBatch(contentIds: string[], format?: 'file' | 'base64'): Promise<string[]>;
73
57
  /**
74
- * Get search engine statistics
75
- * @returns Object with current search engine stats
58
+ * Retrieve content metadata for result enhancement
59
+ * @param contentId - Content ID to get metadata for
60
+ * @returns Promise that resolves to content metadata
76
61
  */
77
- getStats(): Promise<{
78
- totalChunks: number;
79
- indexSize: number;
80
- rerankingEnabled: boolean;
81
- isInitialized: boolean;
82
- }>;
62
+ getContentMetadata(contentId: string): Promise<import('./core/content-resolver.js').ContentMetadata>;
83
63
  /**
84
- * Set up automatic cleanup on process exit (Requirement 5.5)
64
+ * Verify that content exists and is accessible
65
+ * @param contentId - Content ID to verify
66
+ * @returns Promise that resolves to true if content exists, false otherwise
85
67
  */
86
- private setupAutomaticCleanup;
68
+ verifyContentExists(contentId: string): Promise<boolean>;
87
69
  /**
88
- * Clean up resources (Requirement 5.5)
70
+ * Clean up resources
89
71
  */
90
72
  cleanup(): Promise<void>;
91
73
  }