rag-lite-ts 1.0.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/README.md +606 -93
  2. package/dist/cli/indexer.js +192 -4
  3. package/dist/cli/search.js +50 -11
  4. package/dist/cli.js +183 -26
  5. package/dist/core/abstract-embedder.d.ts +125 -0
  6. package/dist/core/abstract-embedder.js +264 -0
  7. package/dist/core/actionable-error-messages.d.ts +60 -0
  8. package/dist/core/actionable-error-messages.js +397 -0
  9. package/dist/core/batch-processing-optimizer.d.ts +155 -0
  10. package/dist/core/batch-processing-optimizer.js +541 -0
  11. package/dist/core/chunker.d.ts +2 -0
  12. package/dist/core/cli-database-utils.d.ts +53 -0
  13. package/dist/core/cli-database-utils.js +239 -0
  14. package/dist/core/config.js +10 -3
  15. package/dist/core/content-errors.d.ts +111 -0
  16. package/dist/core/content-errors.js +362 -0
  17. package/dist/core/content-manager.d.ts +343 -0
  18. package/dist/core/content-manager.js +1504 -0
  19. package/dist/core/content-performance-optimizer.d.ts +150 -0
  20. package/dist/core/content-performance-optimizer.js +516 -0
  21. package/dist/core/content-resolver.d.ts +104 -0
  22. package/dist/core/content-resolver.js +285 -0
  23. package/dist/core/cross-modal-search.d.ts +164 -0
  24. package/dist/core/cross-modal-search.js +342 -0
  25. package/dist/core/database-connection-manager.d.ts +109 -0
  26. package/dist/core/database-connection-manager.js +304 -0
  27. package/dist/core/db.d.ts +141 -2
  28. package/dist/core/db.js +631 -89
  29. package/dist/core/embedder-factory.d.ts +176 -0
  30. package/dist/core/embedder-factory.js +338 -0
  31. package/dist/core/index.d.ts +3 -1
  32. package/dist/core/index.js +4 -1
  33. package/dist/core/ingestion.d.ts +85 -15
  34. package/dist/core/ingestion.js +510 -45
  35. package/dist/core/lazy-dependency-loader.d.ts +152 -0
  36. package/dist/core/lazy-dependency-loader.js +453 -0
  37. package/dist/core/mode-detection-service.d.ts +150 -0
  38. package/dist/core/mode-detection-service.js +565 -0
  39. package/dist/core/mode-model-validator.d.ts +92 -0
  40. package/dist/core/mode-model-validator.js +203 -0
  41. package/dist/core/model-registry.d.ts +120 -0
  42. package/dist/core/model-registry.js +415 -0
  43. package/dist/core/model-validator.d.ts +217 -0
  44. package/dist/core/model-validator.js +782 -0
  45. package/dist/core/polymorphic-search-factory.d.ts +154 -0
  46. package/dist/core/polymorphic-search-factory.js +344 -0
  47. package/dist/core/raglite-paths.d.ts +121 -0
  48. package/dist/core/raglite-paths.js +145 -0
  49. package/dist/core/reranking-config.d.ts +42 -0
  50. package/dist/core/reranking-config.js +156 -0
  51. package/dist/core/reranking-factory.d.ts +92 -0
  52. package/dist/core/reranking-factory.js +591 -0
  53. package/dist/core/reranking-strategies.d.ts +325 -0
  54. package/dist/core/reranking-strategies.js +720 -0
  55. package/dist/core/resource-cleanup.d.ts +163 -0
  56. package/dist/core/resource-cleanup.js +371 -0
  57. package/dist/core/resource-manager.d.ts +212 -0
  58. package/dist/core/resource-manager.js +564 -0
  59. package/dist/core/search.d.ts +28 -1
  60. package/dist/core/search.js +83 -5
  61. package/dist/core/streaming-operations.d.ts +145 -0
  62. package/dist/core/streaming-operations.js +409 -0
  63. package/dist/core/types.d.ts +3 -0
  64. package/dist/core/universal-embedder.d.ts +177 -0
  65. package/dist/core/universal-embedder.js +139 -0
  66. package/dist/core/validation-messages.d.ts +99 -0
  67. package/dist/core/validation-messages.js +334 -0
  68. package/dist/core/vector-index.js +7 -8
  69. package/dist/factories/index.d.ts +1 -1
  70. package/dist/factories/text-factory.d.ts +128 -34
  71. package/dist/factories/text-factory.js +346 -97
  72. package/dist/file-processor.d.ts +88 -2
  73. package/dist/file-processor.js +720 -17
  74. package/dist/index.d.ts +9 -0
  75. package/dist/index.js +11 -0
  76. package/dist/ingestion.d.ts +16 -0
  77. package/dist/ingestion.js +21 -0
  78. package/dist/mcp-server.d.ts +35 -3
  79. package/dist/mcp-server.js +1107 -31
  80. package/dist/multimodal/clip-embedder.d.ts +314 -0
  81. package/dist/multimodal/clip-embedder.js +945 -0
  82. package/dist/multimodal/index.d.ts +6 -0
  83. package/dist/multimodal/index.js +6 -0
  84. package/dist/run-error-recovery-tests.d.ts +7 -0
  85. package/dist/run-error-recovery-tests.js +101 -0
  86. package/dist/search.d.ts +26 -0
  87. package/dist/search.js +54 -1
  88. package/dist/test-utils.d.ts +8 -26
  89. package/dist/text/chunker.d.ts +1 -0
  90. package/dist/text/embedder.js +15 -8
  91. package/dist/text/index.d.ts +1 -0
  92. package/dist/text/index.js +1 -0
  93. package/dist/text/reranker.d.ts +1 -2
  94. package/dist/text/reranker.js +17 -47
  95. package/dist/text/sentence-transformer-embedder.d.ts +96 -0
  96. package/dist/text/sentence-transformer-embedder.js +340 -0
  97. package/dist/types.d.ts +39 -0
  98. package/dist/utils/vector-math.d.ts +31 -0
  99. package/dist/utils/vector-math.js +70 -0
  100. package/package.json +15 -3
  101. package/dist/api-errors.d.ts.map +0 -1
  102. package/dist/api-errors.js.map +0 -1
  103. package/dist/cli/indexer.d.ts.map +0 -1
  104. package/dist/cli/indexer.js.map +0 -1
  105. package/dist/cli/search.d.ts.map +0 -1
  106. package/dist/cli/search.js.map +0 -1
  107. package/dist/cli.d.ts.map +0 -1
  108. package/dist/cli.js.map +0 -1
  109. package/dist/config.d.ts.map +0 -1
  110. package/dist/config.js.map +0 -1
  111. package/dist/core/adapters.d.ts.map +0 -1
  112. package/dist/core/adapters.js.map +0 -1
  113. package/dist/core/chunker.d.ts.map +0 -1
  114. package/dist/core/chunker.js.map +0 -1
  115. package/dist/core/config.d.ts.map +0 -1
  116. package/dist/core/config.js.map +0 -1
  117. package/dist/core/db.d.ts.map +0 -1
  118. package/dist/core/db.js.map +0 -1
  119. package/dist/core/error-handler.d.ts.map +0 -1
  120. package/dist/core/error-handler.js.map +0 -1
  121. package/dist/core/index.d.ts.map +0 -1
  122. package/dist/core/index.js.map +0 -1
  123. package/dist/core/ingestion.d.ts.map +0 -1
  124. package/dist/core/ingestion.js.map +0 -1
  125. package/dist/core/interfaces.d.ts.map +0 -1
  126. package/dist/core/interfaces.js.map +0 -1
  127. package/dist/core/path-manager.d.ts.map +0 -1
  128. package/dist/core/path-manager.js.map +0 -1
  129. package/dist/core/search-example.d.ts +0 -25
  130. package/dist/core/search-example.d.ts.map +0 -1
  131. package/dist/core/search-example.js +0 -138
  132. package/dist/core/search-example.js.map +0 -1
  133. package/dist/core/search-pipeline-example.d.ts +0 -21
  134. package/dist/core/search-pipeline-example.d.ts.map +0 -1
  135. package/dist/core/search-pipeline-example.js +0 -188
  136. package/dist/core/search-pipeline-example.js.map +0 -1
  137. package/dist/core/search-pipeline.d.ts.map +0 -1
  138. package/dist/core/search-pipeline.js.map +0 -1
  139. package/dist/core/search.d.ts.map +0 -1
  140. package/dist/core/search.js.map +0 -1
  141. package/dist/core/types.d.ts.map +0 -1
  142. package/dist/core/types.js.map +0 -1
  143. package/dist/core/vector-index.d.ts.map +0 -1
  144. package/dist/core/vector-index.js.map +0 -1
  145. package/dist/dom-polyfills.d.ts.map +0 -1
  146. package/dist/dom-polyfills.js.map +0 -1
  147. package/dist/examples/clean-api-examples.d.ts +0 -44
  148. package/dist/examples/clean-api-examples.d.ts.map +0 -1
  149. package/dist/examples/clean-api-examples.js +0 -206
  150. package/dist/examples/clean-api-examples.js.map +0 -1
  151. package/dist/factories/index.d.ts.map +0 -1
  152. package/dist/factories/index.js.map +0 -1
  153. package/dist/factories/text-factory.d.ts.map +0 -1
  154. package/dist/factories/text-factory.js.map +0 -1
  155. package/dist/file-processor.d.ts.map +0 -1
  156. package/dist/file-processor.js.map +0 -1
  157. package/dist/index-manager.d.ts.map +0 -1
  158. package/dist/index-manager.js.map +0 -1
  159. package/dist/index.d.ts.map +0 -1
  160. package/dist/index.js.map +0 -1
  161. package/dist/indexer.d.ts.map +0 -1
  162. package/dist/indexer.js.map +0 -1
  163. package/dist/ingestion.d.ts.map +0 -1
  164. package/dist/ingestion.js.map +0 -1
  165. package/dist/mcp-server.d.ts.map +0 -1
  166. package/dist/mcp-server.js.map +0 -1
  167. package/dist/preprocess.d.ts.map +0 -1
  168. package/dist/preprocess.js.map +0 -1
  169. package/dist/preprocessors/index.d.ts.map +0 -1
  170. package/dist/preprocessors/index.js.map +0 -1
  171. package/dist/preprocessors/mdx.d.ts.map +0 -1
  172. package/dist/preprocessors/mdx.js.map +0 -1
  173. package/dist/preprocessors/mermaid.d.ts.map +0 -1
  174. package/dist/preprocessors/mermaid.js.map +0 -1
  175. package/dist/preprocessors/registry.d.ts.map +0 -1
  176. package/dist/preprocessors/registry.js.map +0 -1
  177. package/dist/search-standalone.d.ts.map +0 -1
  178. package/dist/search-standalone.js.map +0 -1
  179. package/dist/search.d.ts.map +0 -1
  180. package/dist/search.js.map +0 -1
  181. package/dist/test-utils.d.ts.map +0 -1
  182. package/dist/test-utils.js.map +0 -1
  183. package/dist/text/chunker.d.ts.map +0 -1
  184. package/dist/text/chunker.js.map +0 -1
  185. package/dist/text/embedder.d.ts.map +0 -1
  186. package/dist/text/embedder.js.map +0 -1
  187. package/dist/text/index.d.ts.map +0 -1
  188. package/dist/text/index.js.map +0 -1
  189. package/dist/text/preprocessors/index.d.ts.map +0 -1
  190. package/dist/text/preprocessors/index.js.map +0 -1
  191. package/dist/text/preprocessors/mdx.d.ts.map +0 -1
  192. package/dist/text/preprocessors/mdx.js.map +0 -1
  193. package/dist/text/preprocessors/mermaid.d.ts.map +0 -1
  194. package/dist/text/preprocessors/mermaid.js.map +0 -1
  195. package/dist/text/preprocessors/registry.d.ts.map +0 -1
  196. package/dist/text/preprocessors/registry.js.map +0 -1
  197. package/dist/text/reranker.d.ts.map +0 -1
  198. package/dist/text/reranker.js.map +0 -1
  199. package/dist/text/tokenizer.d.ts.map +0 -1
  200. package/dist/text/tokenizer.js.map +0 -1
  201. package/dist/types.d.ts.map +0 -1
  202. package/dist/types.js.map +0 -1
package/dist/index.d.ts CHANGED
@@ -48,11 +48,19 @@ export { SearchEngine as CoreSearchEngine } from './core/search.js';
48
48
  export { IngestionPipeline as CoreIngestionPipeline } from './core/ingestion.js';
49
49
  export { SearchEngine } from './search.js';
50
50
  export { IngestionPipeline } from './ingestion.js';
51
+ export { LazyEmbedderLoader, LazyRerankerLoader, LazyMultimodalLoader, LazyDependencyManager } from './core/lazy-dependency-loader.js';
51
52
  export type { EmbedFunction, RerankFunction, EmbeddingQueryInterface, RerankingInterface, SearchEngineConfig, ContentTypeStrategy, ModelAgnosticInterface, ExtendedEmbeddingInterface, ExtendedRerankingInterface, SearchPipelineInterface, SearchDependencyFactory } from './core/interfaces.js';
52
53
  export { InterfaceValidator } from './core/interfaces.js';
54
+ export { validateModeModelCompatibility, validateModeModelCompatibilityOrThrow, getRecommendedModelsForMode, isModeModelCompatible, getCompatibleModelsForMode, type ModeModelValidationResult } from './core/mode-model-validator.js';
55
+ export { createMissingFileError, createInvalidPathError, createModelLoadingError, createDimensionMismatchError, createModeMismatchError, createInvalidContentError, createMissingDependencyError, createFactoryCreationError, enhanceError, createContextualError, type ActionableErrorConfig } from './core/actionable-error-messages.js';
53
56
  export { EmbeddingEngine, getEmbeddingEngine, initializeEmbeddingEngine, createTextEmbedFunction, createTextEmbedder } from './text/embedder.js';
57
+ export type { UniversalEmbedder } from './core/universal-embedder.js';
58
+ export { CLIPEmbedder } from './multimodal/clip-embedder.js';
59
+ export { createEmbedder } from './core/embedder-factory.js';
54
60
  export { CrossEncoderReranker, createTextRerankFunction, createTextReranker } from './text/reranker.js';
55
61
  export { countTokens } from './text/tokenizer.js';
62
+ export type { RerankingStrategyType, RerankingConfig } from './core/reranking-config.js';
63
+ export { validateRerankingStrategy, validateRerankingConfig, getDefaultRerankingConfig, isStrategySupported, getSupportedStrategies, RerankingConfigBuilder, DEFAULT_TEXT_RERANKING_CONFIG, DEFAULT_MULTIMODAL_RERANKING_CONFIG } from './core/reranking-config.js';
56
64
  export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, getModelVersion, setModelVersion, getStoredModelInfo, setStoredModelInfo, type DatabaseConnection } from './core/db.js';
57
65
  export { IndexManager } from './index-manager.js';
58
66
  export { VectorIndex } from './core/vector-index.js';
@@ -60,6 +68,7 @@ export { config, getModelDefaults, type CoreConfig, type ExtensibleConfig, type
60
68
  export { discoverFiles, processFiles, discoverAndProcessFiles, DEFAULT_FILE_PROCESSOR_OPTIONS, type FileProcessorOptions, type FileDiscoveryResult, type DocumentProcessingResult } from './file-processor.js';
61
69
  export { chunkDocument, type ChunkConfig } from './core/chunker.js';
62
70
  export { DocumentPathManager } from './core/path-manager.js';
71
+ export { resolveRagLitePaths, ensureRagLiteStructure, migrateToRagLiteStructure, getStandardRagLitePaths, type RagLiteConfig, type RagLitePaths } from './core/raglite-paths.js';
63
72
  export type { SearchResult, SearchOptions, Document, EmbeddingResult, ContentDocument, ContentChunk } from './core/types.js';
64
73
  export type { Chunk, Preprocessor, PreprocessorOptions, PreprocessingConfig } from './types.js';
65
74
  export type { IngestionOptions, IngestionResult } from './core/ingestion.js';
package/dist/index.js CHANGED
@@ -56,17 +56,26 @@ export { IngestionPipeline as CoreIngestionPipeline } from './core/ingestion.js'
56
56
  // Public API classes
57
57
  export { SearchEngine } from './search.js';
58
58
  export { IngestionPipeline } from './ingestion.js';
59
+ // Lazy loading system for performance optimization
60
+ export { LazyEmbedderLoader, LazyRerankerLoader, LazyMultimodalLoader, LazyDependencyManager } from './core/lazy-dependency-loader.js';
59
61
  // Interface validation utilities
60
62
  export { InterfaceValidator } from './core/interfaces.js';
63
+ // Mode-model compatibility validation
64
+ export { validateModeModelCompatibility, validateModeModelCompatibilityOrThrow, getRecommendedModelsForMode, isModeModelCompatible, getCompatibleModelsForMode } from './core/mode-model-validator.js';
65
+ // Actionable error messages
66
+ export { createMissingFileError, createInvalidPathError, createModelLoadingError, createDimensionMismatchError, createModeMismatchError, createInvalidContentError, createMissingDependencyError, createFactoryCreationError, enhanceError, createContextualError } from './core/actionable-error-messages.js';
61
67
  // =============================================================================
62
68
  // TEXT IMPLEMENTATIONS (FOR CUSTOM DEPENDENCY INJECTION)
63
69
  // =============================================================================
64
70
  // Text-specific embedding implementations
65
71
  export { EmbeddingEngine, getEmbeddingEngine, initializeEmbeddingEngine, createTextEmbedFunction, createTextEmbedder } from './text/embedder.js';
72
+ export { CLIPEmbedder } from './multimodal/clip-embedder.js';
73
+ export { createEmbedder } from './core/embedder-factory.js';
66
74
  // Text-specific reranking implementations
67
75
  export { CrossEncoderReranker, createTextRerankFunction, createTextReranker } from './text/reranker.js';
68
76
  // Text tokenization utilities
69
77
  export { countTokens } from './text/tokenizer.js';
78
+ export { validateRerankingStrategy, validateRerankingConfig, getDefaultRerankingConfig, isStrategySupported, getSupportedStrategies, RerankingConfigBuilder, DEFAULT_TEXT_RERANKING_CONFIG, DEFAULT_MULTIMODAL_RERANKING_CONFIG } from './core/reranking-config.js';
70
79
  // =============================================================================
71
80
  // CORE INFRASTRUCTURE (FOR ADVANCED USERS)
72
81
  // =============================================================================
@@ -86,6 +95,8 @@ export { discoverFiles, processFiles, discoverAndProcessFiles, DEFAULT_FILE_PROC
86
95
  export { chunkDocument } from './core/chunker.js';
87
96
  // Path management
88
97
  export { DocumentPathManager } from './core/path-manager.js';
98
+ // RAG-lite directory structure management
99
+ export { resolveRagLitePaths, ensureRagLiteStructure, migrateToRagLiteStructure, getStandardRagLitePaths } from './core/raglite-paths.js';
89
100
  // =============================================================================
90
101
  // ERROR HANDLING
91
102
  // =============================================================================
@@ -19,6 +19,7 @@
19
19
  */
20
20
  import { type TextIngestionOptions } from './factories/index.js';
21
21
  import type { IngestionOptions, IngestionResult } from './core/ingestion.js';
22
+ import type { MemoryContentMetadata } from './core/content-manager.js';
22
23
  export interface IngestionPipelineOptions extends TextIngestionOptions {
23
24
  }
24
25
  export declare class IngestionPipeline {
@@ -41,6 +42,21 @@ export declare class IngestionPipeline {
41
42
  * Ingest all documents in a directory
42
43
  */
43
44
  ingestDirectory(directoryPath: string, options?: IngestionOptions): Promise<IngestionResult>;
45
+ /**
46
+ * Ingest content from memory buffer
47
+ * Enables MCP integration and real-time content processing
48
+ *
49
+ * @example
50
+ * ```typescript
51
+ * const pipeline = new IngestionPipeline('./db.sqlite', './index.bin');
52
+ * const contentId = await pipeline.ingestFromMemory(buffer, {
53
+ * displayName: 'uploaded-file.txt',
54
+ * contentType: 'text/plain'
55
+ * });
56
+ * console.log('Content ingested with ID:', contentId);
57
+ * ```
58
+ */
59
+ ingestFromMemory(content: Buffer, metadata: MemoryContentMetadata, options?: IngestionOptions): Promise<string>;
44
60
  /**
45
61
  * Clean up resources
46
62
  */
package/dist/ingestion.js CHANGED
@@ -76,6 +76,27 @@ export class IngestionPipeline {
76
76
  }
77
77
  return this.corePipeline.ingestDirectory(directoryPath, options);
78
78
  }
79
+ /**
80
+ * Ingest content from memory buffer
81
+ * Enables MCP integration and real-time content processing
82
+ *
83
+ * @example
84
+ * ```typescript
85
+ * const pipeline = new IngestionPipeline('./db.sqlite', './index.bin');
86
+ * const contentId = await pipeline.ingestFromMemory(buffer, {
87
+ * displayName: 'uploaded-file.txt',
88
+ * contentType: 'text/plain'
89
+ * });
90
+ * console.log('Content ingested with ID:', contentId);
91
+ * ```
92
+ */
93
+ async ingestFromMemory(content, metadata, options) {
94
+ await this.initialize();
95
+ if (!this.corePipeline) {
96
+ throw new Error('IngestionPipeline failed to initialize');
97
+ }
98
+ return this.corePipeline.ingestFromMemory(content, metadata, options);
99
+ }
79
100
  /**
80
101
  * Clean up resources
81
102
  */
@@ -1,14 +1,46 @@
1
1
  #!/usr/bin/env node
2
2
  /**
3
- * MCP server entry point for rag-lite-ts
3
+ * MCP server entry point for rag-lite-ts with Chameleon Multimodal Architecture
4
4
  *
5
- * This is a thin wrapper around existing search and indexing functions
5
+ * This is a thin wrapper around the polymorphic search and ingestion functions
6
6
  * that exposes them as MCP tools without creating REST/GraphQL endpoints.
7
7
  *
8
+ * The MCP server supports both text-only and multimodal modes:
9
+ * - Text mode: Optimized for text documents using sentence-transformer models
10
+ * - Multimodal mode: Supports mixed text and image content using CLIP models
11
+ *
12
+ * Key Features:
13
+ * - Automatic mode detection from database configuration
14
+ * - Polymorphic runtime that adapts to stored mode settings
15
+ * - Support for multiple embedding models and reranking strategies
16
+ * - Content type filtering and multimodal search capabilities
17
+ * - Comprehensive model and strategy information tools
18
+ *
8
19
  * The MCP server lives in the same package as CLI with dual entry points
9
20
  * and provides proper MCP tool definitions for search and indexing capabilities.
10
21
  *
11
- * Requirements addressed: 6.2, 6.4, 6.5, 6.6
22
+ * Requirements addressed: 6.2, 6.4, 6.5, 6.6, 9.1, 9.2, 9.3
12
23
  */
13
24
  export {};
25
+ /**
26
+ * MCP Server Multimodal Integration Complete
27
+ *
28
+ * This implementation addresses task 9.3 requirements:
29
+ * ✅ Updated MCP server configuration to support multimodal parameters
30
+ * ✅ Added new MCP tools for mode configuration and multimodal search
31
+ * ✅ Integrated with polymorphic runtime system and mode detection
32
+ * ✅ Enhanced error handling for multimodal-specific errors
33
+ * ✅ Created comprehensive documentation and examples
34
+ * ✅ Added support for content type filtering and model selection
35
+ * ✅ Implemented reranking strategy configuration
36
+ * ✅ Provided detailed system information and statistics tools
37
+ *
38
+ * Key Features Added:
39
+ * - Multimodal ingestion with mode and model parameters
40
+ * - Content type filtering in search operations
41
+ * - Comprehensive model and strategy information tools
42
+ * - Enhanced error handling with recovery guidance
43
+ * - Automatic mode detection and polymorphic behavior
44
+ * - Detailed documentation and configuration examples
45
+ */
14
46
  //# sourceMappingURL=mcp-server.d.ts.map