rag-lite-ts 1.0.2 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (208) hide show
  1. package/README.md +605 -93
  2. package/dist/cli/indexer.js +192 -4
  3. package/dist/cli/search.js +50 -11
  4. package/dist/cli.js +183 -26
  5. package/dist/core/abstract-embedder.d.ts +125 -0
  6. package/dist/core/abstract-embedder.js +264 -0
  7. package/dist/core/actionable-error-messages.d.ts +60 -0
  8. package/dist/core/actionable-error-messages.js +397 -0
  9. package/dist/core/batch-processing-optimizer.d.ts +155 -0
  10. package/dist/core/batch-processing-optimizer.js +541 -0
  11. package/dist/core/binary-index-format.d.ts +52 -0
  12. package/dist/core/binary-index-format.js +122 -0
  13. package/dist/core/chunker.d.ts +2 -0
  14. package/dist/core/cli-database-utils.d.ts +53 -0
  15. package/dist/core/cli-database-utils.js +239 -0
  16. package/dist/core/config.js +10 -3
  17. package/dist/core/content-errors.d.ts +111 -0
  18. package/dist/core/content-errors.js +362 -0
  19. package/dist/core/content-manager.d.ts +343 -0
  20. package/dist/core/content-manager.js +1504 -0
  21. package/dist/core/content-performance-optimizer.d.ts +150 -0
  22. package/dist/core/content-performance-optimizer.js +516 -0
  23. package/dist/core/content-resolver.d.ts +104 -0
  24. package/dist/core/content-resolver.js +285 -0
  25. package/dist/core/cross-modal-search.d.ts +164 -0
  26. package/dist/core/cross-modal-search.js +342 -0
  27. package/dist/core/database-connection-manager.d.ts +109 -0
  28. package/dist/core/database-connection-manager.js +304 -0
  29. package/dist/core/db.d.ts +141 -2
  30. package/dist/core/db.js +631 -89
  31. package/dist/core/embedder-factory.d.ts +176 -0
  32. package/dist/core/embedder-factory.js +338 -0
  33. package/dist/core/index.d.ts +3 -1
  34. package/dist/core/index.js +4 -1
  35. package/dist/core/ingestion.d.ts +85 -15
  36. package/dist/core/ingestion.js +510 -45
  37. package/dist/core/lazy-dependency-loader.d.ts +152 -0
  38. package/dist/core/lazy-dependency-loader.js +453 -0
  39. package/dist/core/mode-detection-service.d.ts +150 -0
  40. package/dist/core/mode-detection-service.js +565 -0
  41. package/dist/core/mode-model-validator.d.ts +92 -0
  42. package/dist/core/mode-model-validator.js +203 -0
  43. package/dist/core/model-registry.d.ts +120 -0
  44. package/dist/core/model-registry.js +415 -0
  45. package/dist/core/model-validator.d.ts +217 -0
  46. package/dist/core/model-validator.js +782 -0
  47. package/dist/core/polymorphic-search-factory.d.ts +154 -0
  48. package/dist/core/polymorphic-search-factory.js +344 -0
  49. package/dist/core/raglite-paths.d.ts +121 -0
  50. package/dist/core/raglite-paths.js +145 -0
  51. package/dist/core/reranking-config.d.ts +42 -0
  52. package/dist/core/reranking-config.js +156 -0
  53. package/dist/core/reranking-factory.d.ts +92 -0
  54. package/dist/core/reranking-factory.js +591 -0
  55. package/dist/core/reranking-strategies.d.ts +325 -0
  56. package/dist/core/reranking-strategies.js +720 -0
  57. package/dist/core/resource-cleanup.d.ts +163 -0
  58. package/dist/core/resource-cleanup.js +371 -0
  59. package/dist/core/resource-manager.d.ts +212 -0
  60. package/dist/core/resource-manager.js +564 -0
  61. package/dist/core/search.d.ts +28 -1
  62. package/dist/core/search.js +83 -5
  63. package/dist/core/streaming-operations.d.ts +145 -0
  64. package/dist/core/streaming-operations.js +409 -0
  65. package/dist/core/types.d.ts +3 -0
  66. package/dist/core/universal-embedder.d.ts +177 -0
  67. package/dist/core/universal-embedder.js +139 -0
  68. package/dist/core/validation-messages.d.ts +99 -0
  69. package/dist/core/validation-messages.js +334 -0
  70. package/dist/core/vector-index.d.ts +1 -1
  71. package/dist/core/vector-index.js +37 -39
  72. package/dist/factories/index.d.ts +3 -1
  73. package/dist/factories/index.js +2 -0
  74. package/dist/factories/polymorphic-factory.d.ts +50 -0
  75. package/dist/factories/polymorphic-factory.js +159 -0
  76. package/dist/factories/text-factory.d.ts +128 -34
  77. package/dist/factories/text-factory.js +346 -97
  78. package/dist/file-processor.d.ts +88 -2
  79. package/dist/file-processor.js +720 -17
  80. package/dist/index.d.ts +32 -0
  81. package/dist/index.js +29 -0
  82. package/dist/ingestion.d.ts +16 -0
  83. package/dist/ingestion.js +21 -0
  84. package/dist/mcp-server.d.ts +35 -3
  85. package/dist/mcp-server.js +1107 -31
  86. package/dist/multimodal/clip-embedder.d.ts +327 -0
  87. package/dist/multimodal/clip-embedder.js +992 -0
  88. package/dist/multimodal/index.d.ts +6 -0
  89. package/dist/multimodal/index.js +6 -0
  90. package/dist/run-error-recovery-tests.d.ts +7 -0
  91. package/dist/run-error-recovery-tests.js +101 -0
  92. package/dist/search.d.ts +60 -9
  93. package/dist/search.js +82 -11
  94. package/dist/test-utils.d.ts +8 -26
  95. package/dist/text/chunker.d.ts +1 -0
  96. package/dist/text/embedder.js +15 -8
  97. package/dist/text/index.d.ts +1 -0
  98. package/dist/text/index.js +1 -0
  99. package/dist/text/reranker.d.ts +1 -2
  100. package/dist/text/reranker.js +17 -47
  101. package/dist/text/sentence-transformer-embedder.d.ts +96 -0
  102. package/dist/text/sentence-transformer-embedder.js +340 -0
  103. package/dist/types.d.ts +39 -0
  104. package/dist/utils/vector-math.d.ts +31 -0
  105. package/dist/utils/vector-math.js +70 -0
  106. package/package.json +27 -6
  107. package/dist/api-errors.d.ts.map +0 -1
  108. package/dist/api-errors.js.map +0 -1
  109. package/dist/cli/indexer.d.ts.map +0 -1
  110. package/dist/cli/indexer.js.map +0 -1
  111. package/dist/cli/search.d.ts.map +0 -1
  112. package/dist/cli/search.js.map +0 -1
  113. package/dist/cli.d.ts.map +0 -1
  114. package/dist/cli.js.map +0 -1
  115. package/dist/config.d.ts.map +0 -1
  116. package/dist/config.js.map +0 -1
  117. package/dist/core/adapters.d.ts.map +0 -1
  118. package/dist/core/adapters.js.map +0 -1
  119. package/dist/core/chunker.d.ts.map +0 -1
  120. package/dist/core/chunker.js.map +0 -1
  121. package/dist/core/config.d.ts.map +0 -1
  122. package/dist/core/config.js.map +0 -1
  123. package/dist/core/db.d.ts.map +0 -1
  124. package/dist/core/db.js.map +0 -1
  125. package/dist/core/error-handler.d.ts.map +0 -1
  126. package/dist/core/error-handler.js.map +0 -1
  127. package/dist/core/index.d.ts.map +0 -1
  128. package/dist/core/index.js.map +0 -1
  129. package/dist/core/ingestion.d.ts.map +0 -1
  130. package/dist/core/ingestion.js.map +0 -1
  131. package/dist/core/interfaces.d.ts.map +0 -1
  132. package/dist/core/interfaces.js.map +0 -1
  133. package/dist/core/path-manager.d.ts.map +0 -1
  134. package/dist/core/path-manager.js.map +0 -1
  135. package/dist/core/search-example.d.ts +0 -25
  136. package/dist/core/search-example.d.ts.map +0 -1
  137. package/dist/core/search-example.js +0 -138
  138. package/dist/core/search-example.js.map +0 -1
  139. package/dist/core/search-pipeline-example.d.ts +0 -21
  140. package/dist/core/search-pipeline-example.d.ts.map +0 -1
  141. package/dist/core/search-pipeline-example.js +0 -188
  142. package/dist/core/search-pipeline-example.js.map +0 -1
  143. package/dist/core/search-pipeline.d.ts.map +0 -1
  144. package/dist/core/search-pipeline.js.map +0 -1
  145. package/dist/core/search.d.ts.map +0 -1
  146. package/dist/core/search.js.map +0 -1
  147. package/dist/core/types.d.ts.map +0 -1
  148. package/dist/core/types.js.map +0 -1
  149. package/dist/core/vector-index.d.ts.map +0 -1
  150. package/dist/core/vector-index.js.map +0 -1
  151. package/dist/dom-polyfills.d.ts.map +0 -1
  152. package/dist/dom-polyfills.js.map +0 -1
  153. package/dist/examples/clean-api-examples.d.ts +0 -44
  154. package/dist/examples/clean-api-examples.d.ts.map +0 -1
  155. package/dist/examples/clean-api-examples.js +0 -206
  156. package/dist/examples/clean-api-examples.js.map +0 -1
  157. package/dist/factories/index.d.ts.map +0 -1
  158. package/dist/factories/index.js.map +0 -1
  159. package/dist/factories/text-factory.d.ts.map +0 -1
  160. package/dist/factories/text-factory.js.map +0 -1
  161. package/dist/file-processor.d.ts.map +0 -1
  162. package/dist/file-processor.js.map +0 -1
  163. package/dist/index-manager.d.ts.map +0 -1
  164. package/dist/index-manager.js.map +0 -1
  165. package/dist/index.d.ts.map +0 -1
  166. package/dist/index.js.map +0 -1
  167. package/dist/indexer.d.ts.map +0 -1
  168. package/dist/indexer.js.map +0 -1
  169. package/dist/ingestion.d.ts.map +0 -1
  170. package/dist/ingestion.js.map +0 -1
  171. package/dist/mcp-server.d.ts.map +0 -1
  172. package/dist/mcp-server.js.map +0 -1
  173. package/dist/preprocess.d.ts.map +0 -1
  174. package/dist/preprocess.js.map +0 -1
  175. package/dist/preprocessors/index.d.ts.map +0 -1
  176. package/dist/preprocessors/index.js.map +0 -1
  177. package/dist/preprocessors/mdx.d.ts.map +0 -1
  178. package/dist/preprocessors/mdx.js.map +0 -1
  179. package/dist/preprocessors/mermaid.d.ts.map +0 -1
  180. package/dist/preprocessors/mermaid.js.map +0 -1
  181. package/dist/preprocessors/registry.d.ts.map +0 -1
  182. package/dist/preprocessors/registry.js.map +0 -1
  183. package/dist/search-standalone.d.ts.map +0 -1
  184. package/dist/search-standalone.js.map +0 -1
  185. package/dist/search.d.ts.map +0 -1
  186. package/dist/search.js.map +0 -1
  187. package/dist/test-utils.d.ts.map +0 -1
  188. package/dist/test-utils.js.map +0 -1
  189. package/dist/text/chunker.d.ts.map +0 -1
  190. package/dist/text/chunker.js.map +0 -1
  191. package/dist/text/embedder.d.ts.map +0 -1
  192. package/dist/text/embedder.js.map +0 -1
  193. package/dist/text/index.d.ts.map +0 -1
  194. package/dist/text/index.js.map +0 -1
  195. package/dist/text/preprocessors/index.d.ts.map +0 -1
  196. package/dist/text/preprocessors/index.js.map +0 -1
  197. package/dist/text/preprocessors/mdx.d.ts.map +0 -1
  198. package/dist/text/preprocessors/mdx.js.map +0 -1
  199. package/dist/text/preprocessors/mermaid.d.ts.map +0 -1
  200. package/dist/text/preprocessors/mermaid.js.map +0 -1
  201. package/dist/text/preprocessors/registry.d.ts.map +0 -1
  202. package/dist/text/preprocessors/registry.js.map +0 -1
  203. package/dist/text/reranker.d.ts.map +0 -1
  204. package/dist/text/reranker.js.map +0 -1
  205. package/dist/text/tokenizer.d.ts.map +0 -1
  206. package/dist/text/tokenizer.js.map +0 -1
  207. package/dist/types.d.ts.map +0 -1
  208. package/dist/types.js.map +0 -1
@@ -0,0 +1,96 @@
1
+ /**
2
+ * TEXT IMPLEMENTATION — Sentence Transformer Embedder Implementation
3
+ * Implements UniversalEmbedder interface for sentence-transformer models
4
+ * Adapts existing text embedding logic to the universal interface
5
+ */
6
+ import '../dom-polyfills.js';
7
+ import { BaseUniversalEmbedder, type EmbedderOptions } from '../core/abstract-embedder.js';
8
+ import type { EmbeddingResult } from '../types.js';
9
+ /**
10
+ * Sentence transformer embedder implementation
11
+ * Supports sentence-transformers/all-MiniLM-L6-v2 and Xenova/all-mpnet-base-v2
12
+ * Ensures consistent EmbeddingResult format with contentType='text'
13
+ * Adapts existing EmbeddingEngine to UniversalEmbedder interface
14
+ */
15
+ export declare class SentenceTransformerEmbedder extends BaseUniversalEmbedder {
16
+ private embeddingEngine;
17
+ private resourceManager;
18
+ private embedderResourceId?;
19
+ private engineResourceId?;
20
+ constructor(modelName: string, options?: EmbedderOptions);
21
+ /**
22
+ * Load the sentence transformer model using existing EmbeddingEngine
23
+ */
24
+ loadModel(): Promise<void>;
25
+ /**
26
+ * Clean up model resources with comprehensive disposal
27
+ */
28
+ cleanup(): Promise<void>;
29
+ /**
30
+ * Embed text using the existing EmbeddingEngine
31
+ */
32
+ embedText(text: string): Promise<EmbeddingResult>;
33
+ /**
34
+ * Optimized batch processing using existing EmbeddingEngine and BatchProcessingOptimizer
35
+ * Overrides the base implementation for better performance with progress reporting
36
+ */
37
+ protected processBatch(batch: Array<{
38
+ content: string;
39
+ contentType: string;
40
+ metadata?: Record<string, any>;
41
+ }>): Promise<EmbeddingResult[]>;
42
+ /**
43
+ * Get model-specific information
44
+ */
45
+ getModelInfo(): {
46
+ capabilities: {
47
+ supportsSemanticSimilarity: boolean;
48
+ supportsTextClassification: boolean;
49
+ supportsTextClustering: boolean;
50
+ recommendedUseCase: string;
51
+ supportsText: boolean;
52
+ supportsImages: boolean;
53
+ supportsBatchProcessing: boolean;
54
+ supportsMetadata: boolean;
55
+ maxBatchSize?: number;
56
+ maxTextLength?: number;
57
+ supportedImageFormats?: readonly string[];
58
+ supportsMultimodal?: boolean;
59
+ supportsCrossModalSearch?: boolean;
60
+ unifiedEmbeddingSpace?: boolean;
61
+ reliableImplementation?: boolean;
62
+ };
63
+ name: string;
64
+ type: import("../core/universal-embedder.js").ModelType;
65
+ dimensions: number;
66
+ version: string;
67
+ supportedContentTypes: readonly string[];
68
+ requirements: import("../types.js").ModelRequirements;
69
+ };
70
+ /**
71
+ * Check if the model is suitable for a specific task
72
+ */
73
+ isSuitableForTask(task: 'similarity' | 'classification' | 'clustering' | 'retrieval'): boolean;
74
+ /**
75
+ * Embed document batch using existing EmbeddingEngine's optimized method
76
+ * This method provides compatibility with the existing document ingestion pipeline
77
+ */
78
+ embedDocumentBatch(chunks: string[]): Promise<EmbeddingResult[]>;
79
+ /**
80
+ * Get the model version from the underlying EmbeddingEngine
81
+ */
82
+ getModelVersion(): string;
83
+ /**
84
+ * Get the batch size from the underlying EmbeddingEngine
85
+ */
86
+ getBatchSize(): number;
87
+ /**
88
+ * Check if the underlying EmbeddingEngine is loaded
89
+ */
90
+ isEngineLoaded(): boolean;
91
+ /**
92
+ * Override isLoaded to check both internal state and engine state
93
+ */
94
+ isLoaded(): boolean;
95
+ }
96
+ //# sourceMappingURL=sentence-transformer-embedder.d.ts.map
@@ -0,0 +1,340 @@
1
+ /**
2
+ * TEXT IMPLEMENTATION — Sentence Transformer Embedder Implementation
3
+ * Implements UniversalEmbedder interface for sentence-transformer models
4
+ * Adapts existing text embedding logic to the universal interface
5
+ */
6
+ // Ensure DOM polyfills are set up before any other imports
7
+ import '../dom-polyfills.js';
8
+ import { BaseUniversalEmbedder } from '../core/abstract-embedder.js';
9
+ import { EmbeddingEngine } from './embedder.js';
10
+ import { getResourceManager } from '../core/resource-manager.js';
11
+ // =============================================================================
12
+ // SENTENCE TRANSFORMER EMBEDDER IMPLEMENTATION
13
+ // =============================================================================
14
+ /**
15
+ * Sentence transformer embedder implementation
16
+ * Supports sentence-transformers/all-MiniLM-L6-v2 and Xenova/all-mpnet-base-v2
17
+ * Ensures consistent EmbeddingResult format with contentType='text'
18
+ * Adapts existing EmbeddingEngine to UniversalEmbedder interface
19
+ */
20
+ export class SentenceTransformerEmbedder extends BaseUniversalEmbedder {
21
+ embeddingEngine = null;
22
+ resourceManager = getResourceManager();
23
+ embedderResourceId;
24
+ engineResourceId;
25
+ constructor(modelName, options = {}) {
26
+ super(modelName, options);
27
+ // Register this embedder with the resource manager
28
+ this.embedderResourceId = this.resourceManager.registerEmbedder(this);
29
+ }
30
+ // =============================================================================
31
+ // MODEL LIFECYCLE METHODS
32
+ // =============================================================================
33
+ /**
34
+ * Load the sentence transformer model using existing EmbeddingEngine
35
+ */
36
+ async loadModel() {
37
+ // Check if already loaded and engine is ready
38
+ if (this._isLoaded && this.embeddingEngine?.isLoaded()) {
39
+ return;
40
+ }
41
+ try {
42
+ this.logModelLoading('Loading sentence transformer model');
43
+ // Create EmbeddingEngine if not exists
44
+ if (!this.embeddingEngine) {
45
+ this.embeddingEngine = new EmbeddingEngine(this.modelName, this.options.maxBatchSize || this._modelInfo.capabilities.maxBatchSize || 8);
46
+ }
47
+ // Load the model using the existing engine (only if not already loaded)
48
+ if (!this.embeddingEngine.isLoaded()) {
49
+ await this.embeddingEngine.loadModel();
50
+ }
51
+ // Register the embedding engine with resource manager if not already registered
52
+ if (!this.engineResourceId) {
53
+ this.engineResourceId = this.resourceManager.registerModel(this.embeddingEngine, this.modelName, 'sentence-transformer');
54
+ }
55
+ // Synchronize loading state
56
+ this._isLoaded = this.embeddingEngine.isLoaded();
57
+ if (this._isLoaded) {
58
+ this.logModelLoading('Model loaded successfully');
59
+ }
60
+ else {
61
+ throw new Error('Model loading failed - engine reports not loaded');
62
+ }
63
+ }
64
+ catch (error) {
65
+ // Reset state on failure
66
+ this._isLoaded = false;
67
+ const enhancedError = this.handleLoadingError(error);
68
+ throw enhancedError;
69
+ }
70
+ }
71
+ /**
72
+ * Clean up model resources with comprehensive disposal
73
+ */
74
+ async cleanup() {
75
+ let cleanupErrors = [];
76
+ try {
77
+ // Clean up embedding engine resources
78
+ if (this.embeddingEngine) {
79
+ try {
80
+ // Use resource manager for proper cleanup
81
+ if (this.engineResourceId) {
82
+ await this.resourceManager.cleanupResource(this.engineResourceId);
83
+ this.engineResourceId = undefined;
84
+ }
85
+ // Clear the reference (EmbeddingEngine doesn't have cleanup methods)
86
+ this.embeddingEngine = null;
87
+ this.logModelLoading('Sentence transformer embedding engine disposed');
88
+ }
89
+ catch (error) {
90
+ const errorMsg = `Failed to dispose embedding engine: ${error instanceof Error ? error.message : 'Unknown error'}`;
91
+ cleanupErrors.push(errorMsg);
92
+ console.warn(errorMsg);
93
+ // Force clear reference even if disposal failed
94
+ this.embeddingEngine = null;
95
+ }
96
+ }
97
+ // Clear embedder resource registration (don't call resource manager to avoid circular cleanup)
98
+ if (this.embedderResourceId) {
99
+ this.embedderResourceId = undefined;
100
+ }
101
+ }
102
+ finally {
103
+ // Always clear loaded state regardless of cleanup success
104
+ this._isLoaded = false;
105
+ // Remove from lazy loading cache to ensure fresh instances
106
+ try {
107
+ const { LazyEmbedderLoader } = await import('../core/lazy-dependency-loader.js');
108
+ LazyEmbedderLoader.removeEmbedderFromCache(this.modelName, 'sentence-transformer');
109
+ }
110
+ catch (error) {
111
+ console.warn('Failed to remove embedder from cache:', error);
112
+ }
113
+ // Force garbage collection for sentence transformer models
114
+ if (global.gc) {
115
+ global.gc();
116
+ this.logModelLoading('Forced garbage collection after sentence transformer cleanup');
117
+ }
118
+ // Log cleanup completion
119
+ if (cleanupErrors.length === 0) {
120
+ this.logModelLoading('Sentence transformer resources cleaned up successfully');
121
+ }
122
+ else {
123
+ this.logModelLoading(`Sentence transformer cleanup completed with ${cleanupErrors.length} errors`);
124
+ // Don't throw errors during cleanup - just log them
125
+ }
126
+ }
127
+ }
128
+ // =============================================================================
129
+ // EMBEDDING METHODS
130
+ // =============================================================================
131
+ /**
132
+ * Embed text using the existing EmbeddingEngine
133
+ */
134
+ async embedText(text) {
135
+ // Validate input first, before checking if model is loaded
136
+ if (!text || text.trim().length === 0) {
137
+ throw new Error('Text input cannot be empty');
138
+ }
139
+ this.ensureLoaded();
140
+ // Update resource usage tracking
141
+ if (this.embedderResourceId) {
142
+ this.resourceManager.updateResourceUsage(this.embedderResourceId);
143
+ }
144
+ if (this.engineResourceId) {
145
+ this.resourceManager.updateResourceUsage(this.engineResourceId);
146
+ }
147
+ if (!this.embeddingEngine) {
148
+ throw new Error('Embedding engine not initialized');
149
+ }
150
+ try {
151
+ // Validate and truncate text if necessary
152
+ this.validateTextLength(text);
153
+ const processedText = this.truncateText(text.trim());
154
+ // Use the existing EmbeddingEngine to generate embeddings
155
+ const result = await this.embeddingEngine.embedSingle(processedText);
156
+ // Validate embedding dimensions
157
+ if (result.vector.length !== this.dimensions) {
158
+ throw new Error(`Embedding dimension mismatch: expected ${this.dimensions}, got ${result.vector.length}`);
159
+ }
160
+ // Ensure contentType is always present for UniversalEmbedder interface
161
+ return {
162
+ ...result,
163
+ contentType: 'text'
164
+ };
165
+ }
166
+ catch (error) {
167
+ if (error instanceof Error) {
168
+ throw new Error(`Failed to embed text: ${error.message}`);
169
+ }
170
+ throw new Error('Failed to embed text: Unknown error');
171
+ }
172
+ }
173
+ // =============================================================================
174
+ // BATCH PROCESSING OPTIMIZATION
175
+ // =============================================================================
176
+ /**
177
+ * Optimized batch processing using existing EmbeddingEngine and BatchProcessingOptimizer
178
+ * Overrides the base implementation for better performance with progress reporting
179
+ */
180
+ async processBatch(batch) {
181
+ this.ensureLoaded();
182
+ if (!this.embeddingEngine) {
183
+ throw new Error('Embedding engine not initialized');
184
+ }
185
+ // Filter for text content only (sentence transformers don't support other types)
186
+ const textItems = batch.filter(item => item.contentType === 'text');
187
+ if (textItems.length === 0) {
188
+ return [];
189
+ }
190
+ // For small batches, use the existing EmbeddingEngine directly
191
+ if (textItems.length <= 10) {
192
+ try {
193
+ // Prepare texts for batch processing
194
+ const texts = textItems.map(item => this.truncateText(item.content.trim()));
195
+ // Use the existing EmbeddingEngine's batch processing
196
+ const results = await this.embeddingEngine.embedBatch(texts);
197
+ // Validate dimensions for all results
198
+ for (let i = 0; i < results.length; i++) {
199
+ if (results[i].vector.length !== this.dimensions) {
200
+ throw new Error(`Embedding dimension mismatch for item ${i}: expected ${this.dimensions}, got ${results[i].vector.length}`);
201
+ }
202
+ }
203
+ return results;
204
+ }
205
+ catch (error) {
206
+ // Fall back to individual processing if batch fails
207
+ console.warn(`Batch processing failed, falling back to individual processing: ${error}`);
208
+ return super.processBatch(batch);
209
+ }
210
+ }
211
+ // For larger batches, use the BatchProcessingOptimizer
212
+ try {
213
+ const { createTextBatchProcessor } = await import('../core/batch-processing-optimizer.js');
214
+ const batchProcessor = createTextBatchProcessor();
215
+ // Convert to EmbeddingBatchItem format
216
+ const batchItems = textItems.map(item => ({
217
+ content: this.truncateText(item.content.trim()),
218
+ contentType: item.contentType,
219
+ metadata: item.metadata
220
+ }));
221
+ // Create embed function that uses this embedder
222
+ const embedFunction = async (item) => {
223
+ const result = await this.embeddingEngine.embedSingle(item.content);
224
+ // Validate dimensions
225
+ if (result.vector.length !== this.dimensions) {
226
+ throw new Error(`Embedding dimension mismatch: expected ${this.dimensions}, got ${result.vector.length}`);
227
+ }
228
+ return result;
229
+ };
230
+ // Process with optimization and progress reporting
231
+ const batchResult = await batchProcessor.processBatch(batchItems, embedFunction, (stats) => {
232
+ if (stats.totalItems > 50) { // Only log for larger batches
233
+ console.log(`Text embedding progress: ${stats.processedItems}/${stats.totalItems} (${Math.round((stats.processedItems / stats.totalItems) * 100)}%)`);
234
+ }
235
+ });
236
+ // Log final statistics for large batches
237
+ if (batchResult.stats.totalItems > 50) {
238
+ console.log(`✓ Text embedding complete: ${batchResult.stats.processedItems} processed, ${batchResult.stats.failedItems} failed`);
239
+ console.log(` Processing time: ${Math.round(batchResult.stats.processingTimeMs / 1000)}s, Rate: ${Math.round(batchResult.stats.itemsPerSecond)} items/sec`);
240
+ if (batchResult.stats.peakMemoryUsageMB > 100) {
241
+ console.log(` Peak memory usage: ${batchResult.stats.peakMemoryUsageMB}MB`);
242
+ }
243
+ }
244
+ return batchResult.results;
245
+ }
246
+ catch (error) {
247
+ // Fall back to existing implementation if optimizer fails
248
+ console.warn(`Batch processing optimizer failed, using fallback: ${error}`);
249
+ try {
250
+ const texts = textItems.map(item => this.truncateText(item.content.trim()));
251
+ const results = await this.embeddingEngine.embedBatch(texts);
252
+ for (let i = 0; i < results.length; i++) {
253
+ if (results[i].vector.length !== this.dimensions) {
254
+ throw new Error(`Embedding dimension mismatch for item ${i}: expected ${this.dimensions}, got ${results[i].vector.length}`);
255
+ }
256
+ }
257
+ return results;
258
+ }
259
+ catch (fallbackError) {
260
+ console.warn(`Fallback batch processing failed, using individual processing: ${fallbackError}`);
261
+ return super.processBatch(batch);
262
+ }
263
+ }
264
+ }
265
+ // =============================================================================
266
+ // UTILITY METHODS
267
+ // =============================================================================
268
+ /**
269
+ * Get model-specific information
270
+ */
271
+ getModelInfo() {
272
+ const baseInfo = super.getModelInfo();
273
+ return {
274
+ ...baseInfo,
275
+ capabilities: {
276
+ ...baseInfo.capabilities,
277
+ // Sentence transformers are optimized for text similarity
278
+ supportsSemanticSimilarity: true,
279
+ supportsTextClassification: true,
280
+ supportsTextClustering: true,
281
+ recommendedUseCase: 'text similarity and semantic search'
282
+ }
283
+ };
284
+ }
285
+ /**
286
+ * Check if the model is suitable for a specific task
287
+ */
288
+ isSuitableForTask(task) {
289
+ // Sentence transformers are suitable for all text-based tasks
290
+ const supportedTasks = ['similarity', 'classification', 'clustering', 'retrieval'];
291
+ return supportedTasks.includes(task);
292
+ }
293
+ // =============================================================================
294
+ // ADDITIONAL METHODS FOR COMPATIBILITY WITH EXISTING SYSTEM
295
+ // =============================================================================
296
+ /**
297
+ * Embed document batch using existing EmbeddingEngine's optimized method
298
+ * This method provides compatibility with the existing document ingestion pipeline
299
+ */
300
+ async embedDocumentBatch(chunks) {
301
+ this.ensureLoaded();
302
+ if (!this.embeddingEngine) {
303
+ throw new Error('Embedding engine not initialized');
304
+ }
305
+ // Use the existing EmbeddingEngine's document batch processing
306
+ // which includes progress logging and error handling
307
+ return await this.embeddingEngine.embedDocumentBatch(chunks);
308
+ }
309
+ /**
310
+ * Get the model version from the underlying EmbeddingEngine
311
+ */
312
+ getModelVersion() {
313
+ if (!this.embeddingEngine) {
314
+ throw new Error('Embedding engine not initialized');
315
+ }
316
+ return this.embeddingEngine.getModelVersion();
317
+ }
318
+ /**
319
+ * Get the batch size from the underlying EmbeddingEngine
320
+ */
321
+ getBatchSize() {
322
+ if (!this.embeddingEngine) {
323
+ return this.options.maxBatchSize || this._modelInfo.capabilities.maxBatchSize || 8;
324
+ }
325
+ return this.embeddingEngine.getBatchSize();
326
+ }
327
+ /**
328
+ * Check if the underlying EmbeddingEngine is loaded
329
+ */
330
+ isEngineLoaded() {
331
+ return this.embeddingEngine ? this.embeddingEngine.isLoaded() : false;
332
+ }
333
+ /**
334
+ * Override isLoaded to check both internal state and engine state
335
+ */
336
+ isLoaded() {
337
+ return this._isLoaded && this.isEngineLoaded();
338
+ }
339
+ }
340
+ //# sourceMappingURL=sentence-transformer-embedder.js.map
package/dist/types.d.ts CHANGED
@@ -19,10 +19,17 @@ export interface Document {
19
19
  source: string;
20
20
  title: string;
21
21
  content: string;
22
+ metadata?: Record<string, any>;
22
23
  }
23
24
  export interface EmbeddingResult {
24
25
  embedding_id: string;
25
26
  vector: Float32Array;
27
+ contentType?: string;
28
+ metadata?: Record<string, any>;
29
+ }
30
+ export interface EnhancedEmbeddingResult extends EmbeddingResult {
31
+ contentType: string;
32
+ metadata?: Record<string, any>;
26
33
  }
27
34
  export interface Preprocessor {
28
35
  appliesTo(language: string): boolean;
@@ -40,5 +47,37 @@ export interface PreprocessingConfig {
40
47
  code?: 'strip' | 'keep' | 'placeholder';
41
48
  };
42
49
  }
50
+ export type ModeType = 'text' | 'multimodal';
51
+ export type ModelType = 'sentence-transformer' | 'clip';
52
+ export type { RerankingStrategyType, RerankingConfig } from './core/reranking-config.js';
53
+ import type { RerankingStrategyType, RerankingConfig } from './core/reranking-config.js';
54
+ export interface SystemInfo {
55
+ mode: ModeType;
56
+ modelName: string;
57
+ modelType: ModelType;
58
+ modelDimensions: number;
59
+ modelVersion: string;
60
+ supportedContentTypes: string[];
61
+ rerankingStrategy: RerankingStrategyType;
62
+ rerankingModel?: string;
63
+ rerankingConfig?: RerankingConfig;
64
+ createdAt: Date;
65
+ updatedAt: Date;
66
+ }
67
+ export interface SystemInfoRow {
68
+ id: number;
69
+ mode: string;
70
+ model_name: string;
71
+ model_type: string;
72
+ model_dimensions: number;
73
+ model_version: string;
74
+ supported_content_types: string;
75
+ reranking_strategy: string;
76
+ reranking_model?: string;
77
+ reranking_config?: string;
78
+ created_at: string;
79
+ updated_at: string;
80
+ }
43
81
  export type { DatabaseConnection, ContentChunk } from './core/db.js';
82
+ export type { UniversalEmbedder, ModelInfo, ModelCapabilities, ModelRequirements, EmbeddingBatchItem, ModelValidationResult, CreateEmbedderFunction, EmbedderCreationOptions, ContentType } from './core/universal-embedder.js';
44
83
  //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Vector mathematics utilities for embedding operations
3
+ */
4
+ /**
5
+ * Compute cosine similarity between two vectors
6
+ * @param a - First vector (Float32Array or number array)
7
+ * @param b - Second vector (Float32Array or number array)
8
+ * @returns Cosine similarity score between -1 and 1
9
+ */
10
+ export declare function cosineSimilarity(a: Float32Array | number[], b: Float32Array | number[]): number;
11
+ /**
12
+ * Compute Euclidean distance between two vectors
13
+ * @param a - First vector
14
+ * @param b - Second vector
15
+ * @returns Euclidean distance
16
+ */
17
+ export declare function euclideanDistance(a: Float32Array | number[], b: Float32Array | number[]): number;
18
+ /**
19
+ * Compute dot product between two vectors
20
+ * @param a - First vector
21
+ * @param b - Second vector
22
+ * @returns Dot product
23
+ */
24
+ export declare function dotProduct(a: Float32Array | number[], b: Float32Array | number[]): number;
25
+ /**
26
+ * Compute vector magnitude (L2 norm)
27
+ * @param vector - Input vector
28
+ * @returns Vector magnitude
29
+ */
30
+ export declare function magnitude(vector: Float32Array | number[]): number;
31
+ //# sourceMappingURL=vector-math.d.ts.map
@@ -0,0 +1,70 @@
1
+ /**
2
+ * Vector mathematics utilities for embedding operations
3
+ */
4
+ /**
5
+ * Compute cosine similarity between two vectors
6
+ * @param a - First vector (Float32Array or number array)
7
+ * @param b - Second vector (Float32Array or number array)
8
+ * @returns Cosine similarity score between -1 and 1
9
+ */
10
+ export function cosineSimilarity(a, b) {
11
+ if (a.length !== b.length) {
12
+ throw new Error('Vectors must have the same length');
13
+ }
14
+ let dotProduct = 0;
15
+ let normA = 0;
16
+ let normB = 0;
17
+ for (let i = 0; i < a.length; i++) {
18
+ dotProduct += a[i] * b[i];
19
+ normA += a[i] * a[i];
20
+ normB += b[i] * b[i];
21
+ }
22
+ const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
23
+ return magnitude === 0 ? 0 : dotProduct / magnitude;
24
+ }
25
+ /**
26
+ * Compute Euclidean distance between two vectors
27
+ * @param a - First vector
28
+ * @param b - Second vector
29
+ * @returns Euclidean distance
30
+ */
31
+ export function euclideanDistance(a, b) {
32
+ if (a.length !== b.length) {
33
+ throw new Error('Vectors must have the same length');
34
+ }
35
+ let sum = 0;
36
+ for (let i = 0; i < a.length; i++) {
37
+ const diff = a[i] - b[i];
38
+ sum += diff * diff;
39
+ }
40
+ return Math.sqrt(sum);
41
+ }
42
+ /**
43
+ * Compute dot product between two vectors
44
+ * @param a - First vector
45
+ * @param b - Second vector
46
+ * @returns Dot product
47
+ */
48
+ export function dotProduct(a, b) {
49
+ if (a.length !== b.length) {
50
+ throw new Error('Vectors must have the same length');
51
+ }
52
+ let product = 0;
53
+ for (let i = 0; i < a.length; i++) {
54
+ product += a[i] * b[i];
55
+ }
56
+ return product;
57
+ }
58
+ /**
59
+ * Compute vector magnitude (L2 norm)
60
+ * @param vector - Input vector
61
+ * @returns Vector magnitude
62
+ */
63
+ export function magnitude(vector) {
64
+ let sum = 0;
65
+ for (let i = 0; i < vector.length; i++) {
66
+ sum += vector[i] * vector[i];
67
+ }
68
+ return Math.sqrt(sum);
69
+ }
70
+ //# sourceMappingURL=vector-math.js.map
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "rag-lite-ts",
3
- "version": "1.0.2",
4
- "description": "Local-first TypeScript retrieval engine for semantic search over static documents",
3
+ "version": "2.0.1",
4
+ "description": "Local-first TypeScript retrieval engine with Chameleon Multimodal Architecture for semantic search over text and image content",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
7
7
  "types": "./dist/index.d.ts",
@@ -20,7 +20,9 @@
20
20
  }
21
21
  },
22
22
  "files": [
23
- "dist/**/*",
23
+ "dist/**/*.js",
24
+ "dist/**/*.d.ts",
25
+ "!dist/**/*.map",
24
26
  "README.md",
25
27
  "LICENSE"
26
28
  ],
@@ -29,9 +31,16 @@
29
31
  "build:test": "tsc --project tsconfig.test.json",
30
32
  "clean": "rimraf dist",
31
33
  "dev": "tsc --watch",
32
- "test": "npm run build:test && node --test dist/text/tokenizer.test.js dist/core/chunker.test.js dist/text/embedder.test.js dist/core/vector-index.test.js dist/index-manager.test.js dist/core/search.test.js dist/file-processor.test.js dist/mcp-server.test.js dist/preprocess.test.js dist/core/config.test.js dist/preprocessors/integration.test.js dist/cli/cli.test.js",
33
- "test:integration": "npm run build && npm run build:test && node --test dist/integration.test.js",
34
- "test:all": "npm run test && npm run test:integration",
34
+ "test": "npm run build:test && node --expose-gc --test --test-concurrency=1 dist/__tests__/core dist/__tests__/text dist/__tests__/preprocessors",
35
+ "test:verbose": "npm run build:test && node --expose-gc --test --test-concurrency=1 --test-reporter=tap dist/__tests__/core dist/__tests__/text dist/__tests__/preprocessors",
36
+ "test:core": "npm run build:test && node --expose-gc --test --test-concurrency=1 dist/__tests__/core",
37
+ "test:core:verbose": "npm run build:test && node --expose-gc --test --test-concurrency=1 --test-reporter=tap dist/__tests__/core",
38
+ "test:text": "npm run build:test && node --expose-gc --test --test-concurrency=1 dist/__tests__/text",
39
+ "test:preprocessors": "npm run build:test && node --expose-gc --test --test-concurrency=1 dist/__tests__/preprocessors",
40
+ "test:integration": "npm run build && npm run build:test && node --expose-gc --test --test-concurrency=1 dist/__tests__/integration",
41
+ "test:integration:verbose": "npm run build && npm run build:test && node --expose-gc --test --test-concurrency=1 --test-reporter=tap dist/__tests__/integration",
42
+ "test:all": "npm run build:test && node --expose-gc --test --test-concurrency=1 dist/__tests__",
43
+ "test:all:verbose": "npm run build:test && node --expose-gc --test --test-concurrency=1 --test-reporter=tap dist/__tests__",
35
44
  "prepublishOnly": "npm run clean && npm run build"
36
45
  },
37
46
  "keywords": [
@@ -41,6 +50,11 @@
41
50
  "embeddings",
42
51
  "typescript",
43
52
  "local-first",
53
+ "multimodal",
54
+ "chameleon-architecture",
55
+ "clip",
56
+ "image-search",
57
+ "polymorphic-runtime",
44
58
  "mdx",
45
59
  "mermaid",
46
60
  "preprocessing",
@@ -64,17 +78,24 @@
64
78
  "dependencies": {
65
79
  "@huggingface/transformers": "^3.7.5",
66
80
  "@modelcontextprotocol/sdk": "^1.18.2",
81
+ "csv-parse": "^6.1.0",
67
82
  "hnswlib-wasm": "^0.8.2",
68
83
  "jsdom": "^27.0.0",
84
+ "lru-cache": "^11.2.2",
69
85
  "mammoth": "^1.11.0",
70
86
  "pdf-parse": "^2.1.10",
71
87
  "sqlite3": "^5.1.6"
72
88
  },
73
89
  "devDependencies": {
74
90
  "@types/jsdom": "^21.1.7",
91
+ "@types/lru-cache": "^7.10.9",
75
92
  "@types/node": "^20.11.0",
76
93
  "js-yaml": "^4.1.0",
77
94
  "rimraf": "^5.0.5",
95
+ "tsx": "^4.20.6",
78
96
  "typescript": "^5.3.0"
97
+ },
98
+ "optionalDependencies": {
99
+ "sharp": "^0.34.5"
79
100
  }
80
101
  }
@@ -1 +0,0 @@
1
- {"version":3,"file":"api-errors.d.ts","sourceRoot":"","sources":["../src/api-errors.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;GAEG;AACH,8BAAsB,QAAS,SAAQ,KAAK;IAGjC,IAAI,EAAE,MAAM;IACZ,WAAW,EAAE,MAAM,EAAE;IACrB,OAAO,CAAC,EAAE,MAAM;gBAHvB,OAAO,EAAE,MAAM,EACR,IAAI,EAAE,MAAM,EACZ,WAAW,EAAE,MAAM,EAAE,EACrB,OAAO,CAAC,EAAE,MAAM,YAAA;IAMzB;;OAEG;IACH,mBAAmB,IAAI,MAAM;IAa7B;;OAEG;IACH,QAAQ,IAAI,IAAI;CAejB;AAED;;GAEG;AACH,qBAAa,cAAe,SAAQ,QAAQ;gBAC9B,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,EAAE,OAAO,CAAC,EAAE,MAAM;CAGnF;AAED;;GAEG;AACH,qBAAa,WAAY,SAAQ,QAAQ;gBAC3B,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,EAAE,OAAO,CAAC,EAAE,MAAM;CAGnF;AAED;;GAEG;AACH,qBAAa,aAAc,SAAQ,QAAQ;gBAC7B,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,EAAE,OAAO,CAAC,EAAE,MAAM;CAGnF;AAED;;GAEG;AACH,qBAAa,uBAAwB,SAAQ,QAAQ;gBACvC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,EAAE,OAAO,CAAC,EAAE,MAAM;CAGnF;AAED;;;GAGG;AACH,qBAAa,YAAY;IACvB;;OAEG;IACH,MAAM,CAAC,oBAAoB,CAAC,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,GAAG,cAAc;IA+G5E;;OAEG;IACH,MAAM,CAAC,iBAAiB,CAAC,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,GAAG,WAAW;IA4HtE;;OAEG;IACH,MAAM,CAAC,mBAAmB,CAAC,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,GAAG,aAAa;CA4C3E;AAED;;;GAGG;AACH,eAAO,MAAM,YAAY;IACvB;;OAEG;;IAYH;;OAEG;;IAYH;;OAEG;;IAYH;;OAEG;;CAYJ,CAAC;AAEF;;GAEG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,WAAW,GAAG,QAAQ,GAAG,UAAU,GAAG,KAAK,CA4BrH"}