rag-lite-ts 1.0.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +606 -93
- package/dist/cli/indexer.js +192 -4
- package/dist/cli/search.js +50 -11
- package/dist/cli.js +183 -26
- package/dist/core/abstract-embedder.d.ts +125 -0
- package/dist/core/abstract-embedder.js +264 -0
- package/dist/core/actionable-error-messages.d.ts +60 -0
- package/dist/core/actionable-error-messages.js +397 -0
- package/dist/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/core/batch-processing-optimizer.js +541 -0
- package/dist/core/chunker.d.ts +2 -0
- package/dist/core/cli-database-utils.d.ts +53 -0
- package/dist/core/cli-database-utils.js +239 -0
- package/dist/core/config.js +10 -3
- package/dist/core/content-errors.d.ts +111 -0
- package/dist/core/content-errors.js +362 -0
- package/dist/core/content-manager.d.ts +343 -0
- package/dist/core/content-manager.js +1504 -0
- package/dist/core/content-performance-optimizer.d.ts +150 -0
- package/dist/core/content-performance-optimizer.js +516 -0
- package/dist/core/content-resolver.d.ts +104 -0
- package/dist/core/content-resolver.js +285 -0
- package/dist/core/cross-modal-search.d.ts +164 -0
- package/dist/core/cross-modal-search.js +342 -0
- package/dist/core/database-connection-manager.d.ts +109 -0
- package/dist/core/database-connection-manager.js +304 -0
- package/dist/core/db.d.ts +141 -2
- package/dist/core/db.js +631 -89
- package/dist/core/embedder-factory.d.ts +176 -0
- package/dist/core/embedder-factory.js +338 -0
- package/dist/core/index.d.ts +3 -1
- package/dist/core/index.js +4 -1
- package/dist/core/ingestion.d.ts +85 -15
- package/dist/core/ingestion.js +510 -45
- package/dist/core/lazy-dependency-loader.d.ts +152 -0
- package/dist/core/lazy-dependency-loader.js +453 -0
- package/dist/core/mode-detection-service.d.ts +150 -0
- package/dist/core/mode-detection-service.js +565 -0
- package/dist/core/mode-model-validator.d.ts +92 -0
- package/dist/core/mode-model-validator.js +203 -0
- package/dist/core/model-registry.d.ts +120 -0
- package/dist/core/model-registry.js +415 -0
- package/dist/core/model-validator.d.ts +217 -0
- package/dist/core/model-validator.js +782 -0
- package/dist/core/polymorphic-search-factory.d.ts +154 -0
- package/dist/core/polymorphic-search-factory.js +344 -0
- package/dist/core/raglite-paths.d.ts +121 -0
- package/dist/core/raglite-paths.js +145 -0
- package/dist/core/reranking-config.d.ts +42 -0
- package/dist/core/reranking-config.js +156 -0
- package/dist/core/reranking-factory.d.ts +92 -0
- package/dist/core/reranking-factory.js +591 -0
- package/dist/core/reranking-strategies.d.ts +325 -0
- package/dist/core/reranking-strategies.js +720 -0
- package/dist/core/resource-cleanup.d.ts +163 -0
- package/dist/core/resource-cleanup.js +371 -0
- package/dist/core/resource-manager.d.ts +212 -0
- package/dist/core/resource-manager.js +564 -0
- package/dist/core/search.d.ts +28 -1
- package/dist/core/search.js +83 -5
- package/dist/core/streaming-operations.d.ts +145 -0
- package/dist/core/streaming-operations.js +409 -0
- package/dist/core/types.d.ts +3 -0
- package/dist/core/universal-embedder.d.ts +177 -0
- package/dist/core/universal-embedder.js +139 -0
- package/dist/core/validation-messages.d.ts +99 -0
- package/dist/core/validation-messages.js +334 -0
- package/dist/core/vector-index.js +7 -8
- package/dist/factories/index.d.ts +1 -1
- package/dist/factories/text-factory.d.ts +128 -34
- package/dist/factories/text-factory.js +346 -97
- package/dist/file-processor.d.ts +88 -2
- package/dist/file-processor.js +720 -17
- package/dist/index.d.ts +9 -0
- package/dist/index.js +11 -0
- package/dist/ingestion.d.ts +16 -0
- package/dist/ingestion.js +21 -0
- package/dist/mcp-server.d.ts +35 -3
- package/dist/mcp-server.js +1107 -31
- package/dist/multimodal/clip-embedder.d.ts +314 -0
- package/dist/multimodal/clip-embedder.js +945 -0
- package/dist/multimodal/index.d.ts +6 -0
- package/dist/multimodal/index.js +6 -0
- package/dist/run-error-recovery-tests.d.ts +7 -0
- package/dist/run-error-recovery-tests.js +101 -0
- package/dist/search.d.ts +26 -0
- package/dist/search.js +54 -1
- package/dist/test-utils.d.ts +8 -26
- package/dist/text/chunker.d.ts +1 -0
- package/dist/text/embedder.js +15 -8
- package/dist/text/index.d.ts +1 -0
- package/dist/text/index.js +1 -0
- package/dist/text/reranker.d.ts +1 -2
- package/dist/text/reranker.js +17 -47
- package/dist/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/text/sentence-transformer-embedder.js +340 -0
- package/dist/types.d.ts +39 -0
- package/dist/utils/vector-math.d.ts +31 -0
- package/dist/utils/vector-math.js +70 -0
- package/package.json +15 -3
- package/dist/api-errors.d.ts.map +0 -1
- package/dist/api-errors.js.map +0 -1
- package/dist/cli/indexer.d.ts.map +0 -1
- package/dist/cli/indexer.js.map +0 -1
- package/dist/cli/search.d.ts.map +0 -1
- package/dist/cli/search.js.map +0 -1
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js.map +0 -1
- package/dist/core/adapters.d.ts.map +0 -1
- package/dist/core/adapters.js.map +0 -1
- package/dist/core/chunker.d.ts.map +0 -1
- package/dist/core/chunker.js.map +0 -1
- package/dist/core/config.d.ts.map +0 -1
- package/dist/core/config.js.map +0 -1
- package/dist/core/db.d.ts.map +0 -1
- package/dist/core/db.js.map +0 -1
- package/dist/core/error-handler.d.ts.map +0 -1
- package/dist/core/error-handler.js.map +0 -1
- package/dist/core/index.d.ts.map +0 -1
- package/dist/core/index.js.map +0 -1
- package/dist/core/ingestion.d.ts.map +0 -1
- package/dist/core/ingestion.js.map +0 -1
- package/dist/core/interfaces.d.ts.map +0 -1
- package/dist/core/interfaces.js.map +0 -1
- package/dist/core/path-manager.d.ts.map +0 -1
- package/dist/core/path-manager.js.map +0 -1
- package/dist/core/search-example.d.ts +0 -25
- package/dist/core/search-example.d.ts.map +0 -1
- package/dist/core/search-example.js +0 -138
- package/dist/core/search-example.js.map +0 -1
- package/dist/core/search-pipeline-example.d.ts +0 -21
- package/dist/core/search-pipeline-example.d.ts.map +0 -1
- package/dist/core/search-pipeline-example.js +0 -188
- package/dist/core/search-pipeline-example.js.map +0 -1
- package/dist/core/search-pipeline.d.ts.map +0 -1
- package/dist/core/search-pipeline.js.map +0 -1
- package/dist/core/search.d.ts.map +0 -1
- package/dist/core/search.js.map +0 -1
- package/dist/core/types.d.ts.map +0 -1
- package/dist/core/types.js.map +0 -1
- package/dist/core/vector-index.d.ts.map +0 -1
- package/dist/core/vector-index.js.map +0 -1
- package/dist/dom-polyfills.d.ts.map +0 -1
- package/dist/dom-polyfills.js.map +0 -1
- package/dist/examples/clean-api-examples.d.ts +0 -44
- package/dist/examples/clean-api-examples.d.ts.map +0 -1
- package/dist/examples/clean-api-examples.js +0 -206
- package/dist/examples/clean-api-examples.js.map +0 -1
- package/dist/factories/index.d.ts.map +0 -1
- package/dist/factories/index.js.map +0 -1
- package/dist/factories/text-factory.d.ts.map +0 -1
- package/dist/factories/text-factory.js.map +0 -1
- package/dist/file-processor.d.ts.map +0 -1
- package/dist/file-processor.js.map +0 -1
- package/dist/index-manager.d.ts.map +0 -1
- package/dist/index-manager.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/indexer.d.ts.map +0 -1
- package/dist/indexer.js.map +0 -1
- package/dist/ingestion.d.ts.map +0 -1
- package/dist/ingestion.js.map +0 -1
- package/dist/mcp-server.d.ts.map +0 -1
- package/dist/mcp-server.js.map +0 -1
- package/dist/preprocess.d.ts.map +0 -1
- package/dist/preprocess.js.map +0 -1
- package/dist/preprocessors/index.d.ts.map +0 -1
- package/dist/preprocessors/index.js.map +0 -1
- package/dist/preprocessors/mdx.d.ts.map +0 -1
- package/dist/preprocessors/mdx.js.map +0 -1
- package/dist/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/preprocessors/mermaid.js.map +0 -1
- package/dist/preprocessors/registry.d.ts.map +0 -1
- package/dist/preprocessors/registry.js.map +0 -1
- package/dist/search-standalone.d.ts.map +0 -1
- package/dist/search-standalone.js.map +0 -1
- package/dist/search.d.ts.map +0 -1
- package/dist/search.js.map +0 -1
- package/dist/test-utils.d.ts.map +0 -1
- package/dist/test-utils.js.map +0 -1
- package/dist/text/chunker.d.ts.map +0 -1
- package/dist/text/chunker.js.map +0 -1
- package/dist/text/embedder.d.ts.map +0 -1
- package/dist/text/embedder.js.map +0 -1
- package/dist/text/index.d.ts.map +0 -1
- package/dist/text/index.js.map +0 -1
- package/dist/text/preprocessors/index.d.ts.map +0 -1
- package/dist/text/preprocessors/index.js.map +0 -1
- package/dist/text/preprocessors/mdx.d.ts.map +0 -1
- package/dist/text/preprocessors/mdx.js.map +0 -1
- package/dist/text/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/text/preprocessors/mermaid.js.map +0 -1
- package/dist/text/preprocessors/registry.d.ts.map +0 -1
- package/dist/text/preprocessors/registry.js.map +0 -1
- package/dist/text/reranker.d.ts.map +0 -1
- package/dist/text/reranker.js.map +0 -1
- package/dist/text/tokenizer.d.ts.map +0 -1
- package/dist/text/tokenizer.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
package/dist/core/ingestion.d.ts
CHANGED
|
@@ -7,6 +7,7 @@ import { type ChunkConfig } from './chunker.js';
|
|
|
7
7
|
import { IndexManager } from '../index-manager.js';
|
|
8
8
|
import { type DatabaseConnection } from './db.js';
|
|
9
9
|
import type { EmbedFunction } from './interfaces.js';
|
|
10
|
+
import { ContentManager, type MemoryContentMetadata } from './content-manager.js';
|
|
10
11
|
/**
|
|
11
12
|
* Options for the ingestion pipeline
|
|
12
13
|
*/
|
|
@@ -17,6 +18,10 @@ export interface IngestionOptions {
|
|
|
17
18
|
chunkConfig?: ChunkConfig;
|
|
18
19
|
/** Whether to force rebuild the index */
|
|
19
20
|
forceRebuild?: boolean;
|
|
21
|
+
/** Mode for the ingestion pipeline (text or multimodal) */
|
|
22
|
+
mode?: 'text' | 'multimodal';
|
|
23
|
+
/** Content type for the ingested content */
|
|
24
|
+
contentType?: string;
|
|
20
25
|
}
|
|
21
26
|
/**
|
|
22
27
|
* Result of the ingestion process
|
|
@@ -34,6 +39,8 @@ export interface IngestionResult {
|
|
|
34
39
|
embeddingErrors: number;
|
|
35
40
|
/** Processing time in milliseconds */
|
|
36
41
|
processingTimeMs: number;
|
|
42
|
+
/** Content IDs of successfully ingested documents */
|
|
43
|
+
contentIds: string[];
|
|
37
44
|
}
|
|
38
45
|
/**
|
|
39
46
|
* Main ingestion pipeline class
|
|
@@ -46,8 +53,10 @@ export declare class IngestionPipeline {
|
|
|
46
53
|
private db;
|
|
47
54
|
private defaultChunkConfig?;
|
|
48
55
|
private pathManager;
|
|
56
|
+
private contentManager;
|
|
49
57
|
/**
|
|
50
58
|
* Creates a new IngestionPipeline with explicit dependency injection
|
|
59
|
+
* Enhanced with ContentManager integration for unified content system
|
|
51
60
|
*
|
|
52
61
|
* DEPENDENCY INJECTION PATTERN:
|
|
53
62
|
* This constructor requires all dependencies to be explicitly provided, enabling:
|
|
@@ -55,6 +64,7 @@ export declare class IngestionPipeline {
|
|
|
55
64
|
* - Support for different embedding models and content types
|
|
56
65
|
* - Testability through mock injection
|
|
57
66
|
* - Future extensibility for multimodal content processing
|
|
67
|
+
* - Unified content management for both filesystem and memory-based ingestion
|
|
58
68
|
*
|
|
59
69
|
* @param embedFn - Function to embed document chunks into vectors
|
|
60
70
|
* - Signature: (query: string, contentType?: string) => Promise<EmbeddingResult>
|
|
@@ -74,32 +84,33 @@ export declare class IngestionPipeline {
|
|
|
74
84
|
* - Supports different content types through metadata fields
|
|
75
85
|
* - Example: await openDatabase('./db.sqlite')
|
|
76
86
|
*
|
|
87
|
+
* @param contentManager - Optional ContentManager for unified content system
|
|
88
|
+
* - Handles content storage routing and deduplication
|
|
89
|
+
* - If not provided, creates default instance with standard configuration
|
|
90
|
+
* - Example: new ContentManager(db, { contentDir: '.raglite/content' })
|
|
91
|
+
*
|
|
77
92
|
* USAGE EXAMPLES:
|
|
78
93
|
* ```typescript
|
|
79
|
-
* // Text-only ingestion pipeline
|
|
94
|
+
* // Text-only ingestion pipeline with unified content system
|
|
80
95
|
* const textEmbedFn = await createTextEmbedder();
|
|
81
96
|
* const indexManager = new IndexManager('./index.bin');
|
|
82
97
|
* const db = await openDatabase('./db.sqlite');
|
|
98
|
+
* const contentManager = new ContentManager(db);
|
|
99
|
+
* const ingestion = new IngestionPipeline(textEmbedFn, indexManager, db, undefined, contentManager);
|
|
100
|
+
*
|
|
101
|
+
* // Simple usage (ContentManager created automatically)
|
|
83
102
|
* const ingestion = new IngestionPipeline(textEmbedFn, indexManager, db);
|
|
84
103
|
*
|
|
85
|
-
* // Custom embedding implementation
|
|
104
|
+
* // Custom embedding implementation with memory ingestion
|
|
86
105
|
* const customEmbedFn = async (text) => ({
|
|
87
106
|
* embedding_id: generateId(),
|
|
88
107
|
* vector: await myCustomModel.embed(text)
|
|
89
108
|
* });
|
|
90
109
|
* const ingestion = new IngestionPipeline(customEmbedFn, indexManager, db);
|
|
91
|
-
*
|
|
92
|
-
* // Multimodal ingestion (future)
|
|
93
|
-
* const multimodalEmbedFn = async (content, contentType) => {
|
|
94
|
-
* if (contentType === 'image') {
|
|
95
|
-
* return { embedding_id: generateId(), vector: await clipModel.embedImage(content) };
|
|
96
|
-
* }
|
|
97
|
-
* return { embedding_id: generateId(), vector: await clipModel.embedText(content) };
|
|
98
|
-
* };
|
|
99
|
-
* const ingestion = new IngestionPipeline(multimodalEmbedFn, indexManager, db);
|
|
110
|
+
* await ingestion.ingestFromMemory(buffer, { displayName: 'file.txt' });
|
|
100
111
|
* ```
|
|
101
112
|
*/
|
|
102
|
-
constructor(embedFn: EmbedFunction, indexManager: IndexManager, db: DatabaseConnection, defaultChunkConfig?: ChunkConfig | undefined);
|
|
113
|
+
constructor(embedFn: EmbedFunction, indexManager: IndexManager, db: DatabaseConnection, defaultChunkConfig?: ChunkConfig | undefined, contentManager?: ContentManager);
|
|
103
114
|
/**
|
|
104
115
|
* Ingest documents from a directory
|
|
105
116
|
* @param directoryPath - Path to directory containing documents
|
|
@@ -114,27 +125,86 @@ export declare class IngestionPipeline {
|
|
|
114
125
|
* @returns Promise resolving to ingestion results
|
|
115
126
|
*/
|
|
116
127
|
ingestFile(filePath: string, options?: IngestionOptions): Promise<IngestionResult>;
|
|
128
|
+
/**
|
|
129
|
+
* Ingest content from memory buffer
|
|
130
|
+
* Enables MCP integration and real-time content processing
|
|
131
|
+
* @param content - Buffer containing the content to ingest
|
|
132
|
+
* @param metadata - Memory content metadata including display name and content type
|
|
133
|
+
* @param options - Optional ingestion configuration
|
|
134
|
+
* @returns Promise resolving to content ID for the ingested content
|
|
135
|
+
*/
|
|
136
|
+
ingestFromMemory(content: Buffer, metadata: MemoryContentMetadata, options?: IngestionOptions): Promise<string>;
|
|
117
137
|
/**
|
|
118
138
|
* Ingest documents from a path (file or directory)
|
|
119
139
|
* Implements the complete pipeline: file processing → chunking → embedding → storage
|
|
140
|
+
* Enhanced to handle mixed content types (text and images) in multimodal mode
|
|
120
141
|
*/
|
|
121
142
|
ingestPath(path: string, options?: IngestionOptions): Promise<IngestionResult>;
|
|
122
143
|
/**
|
|
123
|
-
*
|
|
144
|
+
* Analyze content types in the document collection
|
|
145
|
+
* @private
|
|
146
|
+
*/
|
|
147
|
+
private analyzeContentTypes;
|
|
148
|
+
/**
|
|
149
|
+
* Chunk all documents and organize results with content-type awareness
|
|
150
|
+
* Enhanced to handle different content types appropriately
|
|
151
|
+
*/
|
|
152
|
+
private chunkDocumentsWithContentTypes;
|
|
153
|
+
/**
|
|
154
|
+
* Chunk all documents and organize results (legacy method for backward compatibility)
|
|
155
|
+
* @deprecated Use chunkDocumentsWithContentTypes for multimodal support
|
|
124
156
|
*/
|
|
125
157
|
private chunkDocuments;
|
|
126
158
|
/**
|
|
127
|
-
* Generate embeddings for all chunks with
|
|
159
|
+
* Generate embeddings for all chunks with content-type support
|
|
160
|
+
* Enhanced to handle different content types and pass metadata to embedding function
|
|
161
|
+
*/
|
|
162
|
+
private generateEmbeddingsWithContentTypes;
|
|
163
|
+
/**
|
|
164
|
+
* Generate embeddings for all chunks with error handling (legacy method for backward compatibility)
|
|
165
|
+
* @deprecated Use generateEmbeddingsWithContentTypes for multimodal support
|
|
128
166
|
*/
|
|
129
167
|
private generateEmbeddings;
|
|
130
168
|
/**
|
|
131
|
-
* Store documents and chunks in database
|
|
169
|
+
* Store documents and chunks in database with content-type support
|
|
170
|
+
* Enhanced to handle content type metadata and multimodal content
|
|
171
|
+
* @returns Array of content IDs for successfully stored documents
|
|
172
|
+
*/
|
|
173
|
+
private storeDocumentsAndChunksWithContentTypes;
|
|
174
|
+
/**
|
|
175
|
+
* Store documents and chunks in database (legacy method for backward compatibility)
|
|
176
|
+
* @deprecated Use storeDocumentsAndChunksWithContentTypes for multimodal support
|
|
132
177
|
*/
|
|
133
178
|
private storeDocumentsAndChunks;
|
|
134
179
|
/**
|
|
135
180
|
* Update vector index with new embeddings
|
|
136
181
|
*/
|
|
137
182
|
private updateVectorIndex;
|
|
183
|
+
/**
|
|
184
|
+
* Converts MIME type to simple content type for embedding function
|
|
185
|
+
* @param mimeType - MIME type string (e.g., 'text/plain', 'image/jpeg')
|
|
186
|
+
* @returns Simple content type ('text', 'image', etc.)
|
|
187
|
+
*/
|
|
188
|
+
private getContentTypeForEmbedding;
|
|
189
|
+
/**
|
|
190
|
+
* Save the vector index to disk
|
|
191
|
+
*/
|
|
192
|
+
saveIndex(): Promise<void>;
|
|
193
|
+
/**
|
|
194
|
+
* Process image content from memory using the existing image processing pipeline
|
|
195
|
+
* @private
|
|
196
|
+
*/
|
|
197
|
+
private processImageFromMemory;
|
|
198
|
+
/**
|
|
199
|
+
* Process PDF content from memory using the existing PDF processing pipeline
|
|
200
|
+
* @private
|
|
201
|
+
*/
|
|
202
|
+
private processPDFFromMemory;
|
|
203
|
+
/**
|
|
204
|
+
* Process DOCX content from memory using the existing DOCX processing pipeline
|
|
205
|
+
* @private
|
|
206
|
+
*/
|
|
207
|
+
private processDOCXFromMemory;
|
|
138
208
|
/**
|
|
139
209
|
* Clean up resources - explicit cleanup method
|
|
140
210
|
*/
|