rag-lite-ts 1.0.2 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +605 -93
- package/dist/cli/indexer.js +192 -4
- package/dist/cli/search.js +50 -11
- package/dist/cli.js +183 -26
- package/dist/core/abstract-embedder.d.ts +125 -0
- package/dist/core/abstract-embedder.js +264 -0
- package/dist/core/actionable-error-messages.d.ts +60 -0
- package/dist/core/actionable-error-messages.js +397 -0
- package/dist/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/core/batch-processing-optimizer.js +541 -0
- package/dist/core/binary-index-format.d.ts +52 -0
- package/dist/core/binary-index-format.js +122 -0
- package/dist/core/chunker.d.ts +2 -0
- package/dist/core/cli-database-utils.d.ts +53 -0
- package/dist/core/cli-database-utils.js +239 -0
- package/dist/core/config.js +10 -3
- package/dist/core/content-errors.d.ts +111 -0
- package/dist/core/content-errors.js +362 -0
- package/dist/core/content-manager.d.ts +343 -0
- package/dist/core/content-manager.js +1504 -0
- package/dist/core/content-performance-optimizer.d.ts +150 -0
- package/dist/core/content-performance-optimizer.js +516 -0
- package/dist/core/content-resolver.d.ts +104 -0
- package/dist/core/content-resolver.js +285 -0
- package/dist/core/cross-modal-search.d.ts +164 -0
- package/dist/core/cross-modal-search.js +342 -0
- package/dist/core/database-connection-manager.d.ts +109 -0
- package/dist/core/database-connection-manager.js +304 -0
- package/dist/core/db.d.ts +141 -2
- package/dist/core/db.js +631 -89
- package/dist/core/embedder-factory.d.ts +176 -0
- package/dist/core/embedder-factory.js +338 -0
- package/dist/core/index.d.ts +3 -1
- package/dist/core/index.js +4 -1
- package/dist/core/ingestion.d.ts +85 -15
- package/dist/core/ingestion.js +510 -45
- package/dist/core/lazy-dependency-loader.d.ts +152 -0
- package/dist/core/lazy-dependency-loader.js +453 -0
- package/dist/core/mode-detection-service.d.ts +150 -0
- package/dist/core/mode-detection-service.js +565 -0
- package/dist/core/mode-model-validator.d.ts +92 -0
- package/dist/core/mode-model-validator.js +203 -0
- package/dist/core/model-registry.d.ts +120 -0
- package/dist/core/model-registry.js +415 -0
- package/dist/core/model-validator.d.ts +217 -0
- package/dist/core/model-validator.js +782 -0
- package/dist/core/polymorphic-search-factory.d.ts +154 -0
- package/dist/core/polymorphic-search-factory.js +344 -0
- package/dist/core/raglite-paths.d.ts +121 -0
- package/dist/core/raglite-paths.js +145 -0
- package/dist/core/reranking-config.d.ts +42 -0
- package/dist/core/reranking-config.js +156 -0
- package/dist/core/reranking-factory.d.ts +92 -0
- package/dist/core/reranking-factory.js +591 -0
- package/dist/core/reranking-strategies.d.ts +325 -0
- package/dist/core/reranking-strategies.js +720 -0
- package/dist/core/resource-cleanup.d.ts +163 -0
- package/dist/core/resource-cleanup.js +371 -0
- package/dist/core/resource-manager.d.ts +212 -0
- package/dist/core/resource-manager.js +564 -0
- package/dist/core/search.d.ts +28 -1
- package/dist/core/search.js +83 -5
- package/dist/core/streaming-operations.d.ts +145 -0
- package/dist/core/streaming-operations.js +409 -0
- package/dist/core/types.d.ts +3 -0
- package/dist/core/universal-embedder.d.ts +177 -0
- package/dist/core/universal-embedder.js +139 -0
- package/dist/core/validation-messages.d.ts +99 -0
- package/dist/core/validation-messages.js +334 -0
- package/dist/core/vector-index.d.ts +1 -1
- package/dist/core/vector-index.js +37 -39
- package/dist/factories/index.d.ts +3 -1
- package/dist/factories/index.js +2 -0
- package/dist/factories/polymorphic-factory.d.ts +50 -0
- package/dist/factories/polymorphic-factory.js +159 -0
- package/dist/factories/text-factory.d.ts +128 -34
- package/dist/factories/text-factory.js +346 -97
- package/dist/file-processor.d.ts +88 -2
- package/dist/file-processor.js +720 -17
- package/dist/index.d.ts +32 -0
- package/dist/index.js +29 -0
- package/dist/ingestion.d.ts +16 -0
- package/dist/ingestion.js +21 -0
- package/dist/mcp-server.d.ts +35 -3
- package/dist/mcp-server.js +1107 -31
- package/dist/multimodal/clip-embedder.d.ts +327 -0
- package/dist/multimodal/clip-embedder.js +992 -0
- package/dist/multimodal/index.d.ts +6 -0
- package/dist/multimodal/index.js +6 -0
- package/dist/run-error-recovery-tests.d.ts +7 -0
- package/dist/run-error-recovery-tests.js +101 -0
- package/dist/search.d.ts +60 -9
- package/dist/search.js +82 -11
- package/dist/test-utils.d.ts +8 -26
- package/dist/text/chunker.d.ts +1 -0
- package/dist/text/embedder.js +15 -8
- package/dist/text/index.d.ts +1 -0
- package/dist/text/index.js +1 -0
- package/dist/text/reranker.d.ts +1 -2
- package/dist/text/reranker.js +17 -47
- package/dist/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/text/sentence-transformer-embedder.js +340 -0
- package/dist/types.d.ts +39 -0
- package/dist/utils/vector-math.d.ts +31 -0
- package/dist/utils/vector-math.js +70 -0
- package/package.json +27 -6
- package/dist/api-errors.d.ts.map +0 -1
- package/dist/api-errors.js.map +0 -1
- package/dist/cli/indexer.d.ts.map +0 -1
- package/dist/cli/indexer.js.map +0 -1
- package/dist/cli/search.d.ts.map +0 -1
- package/dist/cli/search.js.map +0 -1
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js.map +0 -1
- package/dist/core/adapters.d.ts.map +0 -1
- package/dist/core/adapters.js.map +0 -1
- package/dist/core/chunker.d.ts.map +0 -1
- package/dist/core/chunker.js.map +0 -1
- package/dist/core/config.d.ts.map +0 -1
- package/dist/core/config.js.map +0 -1
- package/dist/core/db.d.ts.map +0 -1
- package/dist/core/db.js.map +0 -1
- package/dist/core/error-handler.d.ts.map +0 -1
- package/dist/core/error-handler.js.map +0 -1
- package/dist/core/index.d.ts.map +0 -1
- package/dist/core/index.js.map +0 -1
- package/dist/core/ingestion.d.ts.map +0 -1
- package/dist/core/ingestion.js.map +0 -1
- package/dist/core/interfaces.d.ts.map +0 -1
- package/dist/core/interfaces.js.map +0 -1
- package/dist/core/path-manager.d.ts.map +0 -1
- package/dist/core/path-manager.js.map +0 -1
- package/dist/core/search-example.d.ts +0 -25
- package/dist/core/search-example.d.ts.map +0 -1
- package/dist/core/search-example.js +0 -138
- package/dist/core/search-example.js.map +0 -1
- package/dist/core/search-pipeline-example.d.ts +0 -21
- package/dist/core/search-pipeline-example.d.ts.map +0 -1
- package/dist/core/search-pipeline-example.js +0 -188
- package/dist/core/search-pipeline-example.js.map +0 -1
- package/dist/core/search-pipeline.d.ts.map +0 -1
- package/dist/core/search-pipeline.js.map +0 -1
- package/dist/core/search.d.ts.map +0 -1
- package/dist/core/search.js.map +0 -1
- package/dist/core/types.d.ts.map +0 -1
- package/dist/core/types.js.map +0 -1
- package/dist/core/vector-index.d.ts.map +0 -1
- package/dist/core/vector-index.js.map +0 -1
- package/dist/dom-polyfills.d.ts.map +0 -1
- package/dist/dom-polyfills.js.map +0 -1
- package/dist/examples/clean-api-examples.d.ts +0 -44
- package/dist/examples/clean-api-examples.d.ts.map +0 -1
- package/dist/examples/clean-api-examples.js +0 -206
- package/dist/examples/clean-api-examples.js.map +0 -1
- package/dist/factories/index.d.ts.map +0 -1
- package/dist/factories/index.js.map +0 -1
- package/dist/factories/text-factory.d.ts.map +0 -1
- package/dist/factories/text-factory.js.map +0 -1
- package/dist/file-processor.d.ts.map +0 -1
- package/dist/file-processor.js.map +0 -1
- package/dist/index-manager.d.ts.map +0 -1
- package/dist/index-manager.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/indexer.d.ts.map +0 -1
- package/dist/indexer.js.map +0 -1
- package/dist/ingestion.d.ts.map +0 -1
- package/dist/ingestion.js.map +0 -1
- package/dist/mcp-server.d.ts.map +0 -1
- package/dist/mcp-server.js.map +0 -1
- package/dist/preprocess.d.ts.map +0 -1
- package/dist/preprocess.js.map +0 -1
- package/dist/preprocessors/index.d.ts.map +0 -1
- package/dist/preprocessors/index.js.map +0 -1
- package/dist/preprocessors/mdx.d.ts.map +0 -1
- package/dist/preprocessors/mdx.js.map +0 -1
- package/dist/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/preprocessors/mermaid.js.map +0 -1
- package/dist/preprocessors/registry.d.ts.map +0 -1
- package/dist/preprocessors/registry.js.map +0 -1
- package/dist/search-standalone.d.ts.map +0 -1
- package/dist/search-standalone.js.map +0 -1
- package/dist/search.d.ts.map +0 -1
- package/dist/search.js.map +0 -1
- package/dist/test-utils.d.ts.map +0 -1
- package/dist/test-utils.js.map +0 -1
- package/dist/text/chunker.d.ts.map +0 -1
- package/dist/text/chunker.js.map +0 -1
- package/dist/text/embedder.d.ts.map +0 -1
- package/dist/text/embedder.js.map +0 -1
- package/dist/text/index.d.ts.map +0 -1
- package/dist/text/index.js.map +0 -1
- package/dist/text/preprocessors/index.d.ts.map +0 -1
- package/dist/text/preprocessors/index.js.map +0 -1
- package/dist/text/preprocessors/mdx.d.ts.map +0 -1
- package/dist/text/preprocessors/mdx.js.map +0 -1
- package/dist/text/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/text/preprocessors/mermaid.js.map +0 -1
- package/dist/text/preprocessors/registry.d.ts.map +0 -1
- package/dist/text/preprocessors/registry.js.map +0 -1
- package/dist/text/reranker.d.ts.map +0 -1
- package/dist/text/reranker.js.map +0 -1
- package/dist/text/tokenizer.d.ts.map +0 -1
- package/dist/text/tokenizer.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Polymorphic factory for creating mode-aware search engines
|
|
3
|
+
* Automatically detects mode from database and uses appropriate embedder
|
|
4
|
+
*
|
|
5
|
+
* This factory implements the Chameleon Architecture principle:
|
|
6
|
+
* - Detects mode (text/multimodal) from database configuration
|
|
7
|
+
* - Uses appropriate embedder based on detected mode
|
|
8
|
+
* - Provides seamless polymorphic behavior without user intervention
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```typescript
|
|
12
|
+
* // Automatically detects mode and creates appropriate search engine
|
|
13
|
+
* const search = await PolymorphicSearchFactory.create('./index.bin', './db.sqlite');
|
|
14
|
+
*
|
|
15
|
+
* // Works for both text and multimodal modes
|
|
16
|
+
* const results = await search.search('query');
|
|
17
|
+
* ```
|
|
18
|
+
*/
|
|
19
|
+
import { SearchEngine } from '../core/search.js';
|
|
20
|
+
import { IndexManager } from '../index-manager.js';
|
|
21
|
+
import { openDatabase, getSystemInfo } from '../core/db.js';
|
|
22
|
+
import { createTextEmbedFunction } from '../text/embedder.js';
|
|
23
|
+
import { createTextRerankFunction } from '../text/reranker.js';
|
|
24
|
+
import { config, getModelDefaults } from '../core/config.js';
|
|
25
|
+
import { existsSync } from 'fs';
|
|
26
|
+
import { createMissingFileError, createInvalidPathError, createFactoryCreationError } from '../core/actionable-error-messages.js';
|
|
27
|
+
/**
|
|
28
|
+
* Factory for creating mode-aware search engines
|
|
29
|
+
* Automatically detects mode from database and uses appropriate embedder
|
|
30
|
+
*/
|
|
31
|
+
export class PolymorphicSearchFactory {
|
|
32
|
+
/**
|
|
33
|
+
* Create a SearchEngine that automatically adapts to the mode stored in the database
|
|
34
|
+
*
|
|
35
|
+
* This method:
|
|
36
|
+
* 1. Validates that required files exist
|
|
37
|
+
* 2. Opens database and reads system configuration
|
|
38
|
+
* 3. Detects mode (text/multimodal) from database
|
|
39
|
+
* 4. Creates appropriate embedder based on mode
|
|
40
|
+
* 5. Optionally creates reranker based on configuration
|
|
41
|
+
* 6. Returns fully configured SearchEngine
|
|
42
|
+
*
|
|
43
|
+
* @param indexPath - Path to the vector index file (must exist)
|
|
44
|
+
* @param dbPath - Path to the SQLite database file (must exist)
|
|
45
|
+
* @param options - Optional configuration overrides
|
|
46
|
+
* @returns Promise resolving to configured SearchEngine
|
|
47
|
+
* @throws {Error} If required files don't exist or initialization fails
|
|
48
|
+
*/
|
|
49
|
+
static async create(indexPath, dbPath, options = {}) {
|
|
50
|
+
try {
|
|
51
|
+
console.log('🏭 PolymorphicSearchFactory: Initializing mode-aware search engine...');
|
|
52
|
+
// Validate input paths
|
|
53
|
+
if (!indexPath || !dbPath) {
|
|
54
|
+
throw createInvalidPathError([
|
|
55
|
+
{ name: 'indexPath', value: indexPath },
|
|
56
|
+
{ name: 'dbPath', value: dbPath }
|
|
57
|
+
], { operationContext: 'PolymorphicSearchFactory.create' });
|
|
58
|
+
}
|
|
59
|
+
// Check if required files exist
|
|
60
|
+
if (!existsSync(indexPath)) {
|
|
61
|
+
throw createMissingFileError(indexPath, 'index', {
|
|
62
|
+
operationContext: 'PolymorphicSearchFactory.create'
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
if (!existsSync(dbPath)) {
|
|
66
|
+
throw createMissingFileError(dbPath, 'database', {
|
|
67
|
+
operationContext: 'PolymorphicSearchFactory.create'
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
// Step 1: Open database and detect mode
|
|
71
|
+
console.log('💾 Opening database and detecting mode...');
|
|
72
|
+
const db = await openDatabase(dbPath);
|
|
73
|
+
let mode = 'text';
|
|
74
|
+
let embeddingModel;
|
|
75
|
+
let modelDimensions;
|
|
76
|
+
try {
|
|
77
|
+
const systemInfo = await getSystemInfo(db);
|
|
78
|
+
if (systemInfo) {
|
|
79
|
+
mode = systemInfo.mode;
|
|
80
|
+
embeddingModel = systemInfo.modelName;
|
|
81
|
+
modelDimensions = systemInfo.modelDimensions;
|
|
82
|
+
console.log(`📊 Detected mode: ${mode}`);
|
|
83
|
+
console.log(`📊 Detected model: ${embeddingModel} (${modelDimensions} dimensions)`);
|
|
84
|
+
}
|
|
85
|
+
else {
|
|
86
|
+
// Fallback to default if no system info
|
|
87
|
+
embeddingModel = config.embedding_model;
|
|
88
|
+
const modelDefaults = getModelDefaults(embeddingModel);
|
|
89
|
+
modelDimensions = modelDefaults.dimensions;
|
|
90
|
+
console.log(`📊 No system info found, using default: ${embeddingModel} (${modelDimensions} dimensions)`);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
catch (error) {
|
|
94
|
+
// If getSystemInfo fails, use defaults
|
|
95
|
+
embeddingModel = config.embedding_model;
|
|
96
|
+
const modelDefaults = getModelDefaults(embeddingModel);
|
|
97
|
+
modelDimensions = modelDefaults.dimensions;
|
|
98
|
+
console.log(`📊 Using default configuration: ${embeddingModel} (${modelDimensions} dimensions)`);
|
|
99
|
+
}
|
|
100
|
+
// Step 2: Create appropriate embedder based on mode
|
|
101
|
+
let embedFn;
|
|
102
|
+
if (mode === 'multimodal') {
|
|
103
|
+
console.log('📊 Loading CLIP embedder for multimodal mode...');
|
|
104
|
+
const { createEmbedder } = await import('../core/embedder-factory.js');
|
|
105
|
+
const clipEmbedder = await createEmbedder(embeddingModel);
|
|
106
|
+
// Wrap CLIP embedder to match EmbedFunction signature
|
|
107
|
+
embedFn = async (content, contentType) => {
|
|
108
|
+
if (contentType === 'image') {
|
|
109
|
+
return await clipEmbedder.embedImage(content);
|
|
110
|
+
}
|
|
111
|
+
return await clipEmbedder.embedText(content);
|
|
112
|
+
};
|
|
113
|
+
console.log('✓ CLIP embedder loaded for multimodal mode');
|
|
114
|
+
}
|
|
115
|
+
else {
|
|
116
|
+
console.log('📊 Loading text embedder for text mode...');
|
|
117
|
+
embedFn = createTextEmbedFunction(embeddingModel);
|
|
118
|
+
console.log('✓ Text embedder loaded');
|
|
119
|
+
}
|
|
120
|
+
// Step 3: Initialize reranking function (optional)
|
|
121
|
+
let rerankFn;
|
|
122
|
+
if (options.enableReranking === true) {
|
|
123
|
+
console.log('🔄 Loading reranking model...');
|
|
124
|
+
rerankFn = createTextRerankFunction();
|
|
125
|
+
await rerankFn('test query', []);
|
|
126
|
+
console.log('✓ Reranking model loaded successfully');
|
|
127
|
+
}
|
|
128
|
+
else {
|
|
129
|
+
console.log('🔄 Reranking disabled (local-first, fast mode)');
|
|
130
|
+
}
|
|
131
|
+
// Step 4: Initialize database schema
|
|
132
|
+
const { initializeSchema } = await import('../core/db.js');
|
|
133
|
+
await initializeSchema(db);
|
|
134
|
+
console.log('✓ Database connection established');
|
|
135
|
+
// Step 5: Initialize index manager
|
|
136
|
+
console.log('📇 Loading vector index...');
|
|
137
|
+
const indexManager = new IndexManager(indexPath, dbPath, modelDimensions, embeddingModel);
|
|
138
|
+
await indexManager.initialize();
|
|
139
|
+
console.log('✓ Vector index loaded successfully');
|
|
140
|
+
// Step 6: Create ContentResolver
|
|
141
|
+
console.log('📁 Initializing content resolver...');
|
|
142
|
+
const { ContentResolver } = await import('../core/content-resolver.js');
|
|
143
|
+
const contentResolver = new ContentResolver(db);
|
|
144
|
+
console.log('✓ Content resolver ready');
|
|
145
|
+
// Step 7: Create SearchEngine with dependency injection
|
|
146
|
+
const searchEngine = new SearchEngine(embedFn, indexManager, db, rerankFn, contentResolver);
|
|
147
|
+
// Step 8: Validate the setup
|
|
148
|
+
const stats = await searchEngine.getStats();
|
|
149
|
+
console.log(`✓ Search engine ready: ${stats.totalChunks} chunks indexed, mode: ${mode}, reranking ${stats.rerankingEnabled ? 'enabled' : 'disabled'}`);
|
|
150
|
+
console.log('🎉 PolymorphicSearchFactory: Mode-aware search engine initialized successfully');
|
|
151
|
+
return searchEngine;
|
|
152
|
+
}
|
|
153
|
+
catch (error) {
|
|
154
|
+
console.error('❌ PolymorphicSearchFactory: Failed to create search engine');
|
|
155
|
+
throw createFactoryCreationError('PolymorphicSearchFactory', error instanceof Error ? error.message : 'Unknown error', { operationContext: 'polymorphic search engine creation' });
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
//# sourceMappingURL=polymorphic-factory.js.map
|
|
@@ -5,10 +5,21 @@
|
|
|
5
5
|
* FACTORY PATTERN BENEFITS:
|
|
6
6
|
* - Abstracts complex initialization (model loading, database setup, index initialization)
|
|
7
7
|
* - Provides simple API for common use cases while preserving access to dependency injection
|
|
8
|
-
* -
|
|
8
|
+
* - Clear validation and error handling without fallback mechanisms
|
|
9
9
|
* - Supports different embedding models and configurations
|
|
10
10
|
* - Enables clean separation between simple usage and advanced customization
|
|
11
11
|
*
|
|
12
|
+
* MODE SELECTION GUIDE:
|
|
13
|
+
* - Text Mode (default): Optimized for text-only content
|
|
14
|
+
* - Uses sentence-transformer models (fast, accurate for text)
|
|
15
|
+
* - Images converted to text descriptions
|
|
16
|
+
* - Best for: document search, text clustering, semantic similarity
|
|
17
|
+
*
|
|
18
|
+
* - Multimodal Mode: Optimized for mixed text/image content
|
|
19
|
+
* - Uses CLIP models (unified embedding space)
|
|
20
|
+
* - True cross-modal search (text finds images, images find text)
|
|
21
|
+
* - Best for: image search, visual QA, multimodal retrieval
|
|
22
|
+
*
|
|
12
23
|
* USAGE PATTERNS:
|
|
13
24
|
*
|
|
14
25
|
* 1. Simple Search Setup:
|
|
@@ -43,15 +54,31 @@
|
|
|
43
54
|
* const results = await searchEngine.search('query');
|
|
44
55
|
* ```
|
|
45
56
|
*
|
|
46
|
-
* 4. Error
|
|
57
|
+
* 4. Clear Error Handling:
|
|
47
58
|
* ```typescript
|
|
48
|
-
* // Create with
|
|
49
|
-
* const search = await TextFactoryHelpers.
|
|
59
|
+
* // Create with clear validation and error reporting
|
|
60
|
+
* const search = await TextFactoryHelpers.createSearchWithValidation(
|
|
50
61
|
* './index.bin',
|
|
51
62
|
* './db.sqlite',
|
|
52
|
-
* { enableReranking: true } //
|
|
63
|
+
* { enableReranking: true } // Clear errors if issues occur
|
|
53
64
|
* );
|
|
54
65
|
* ```
|
|
66
|
+
*
|
|
67
|
+
* 5. Mode Selection:
|
|
68
|
+
* ```typescript
|
|
69
|
+
* // Text mode (default) - optimized for text-only content
|
|
70
|
+
* const textIngestion = await TextIngestionFactory.create('./db.sqlite', './index.bin', {
|
|
71
|
+
* mode: 'text',
|
|
72
|
+
* embeddingModel: 'sentence-transformers/all-MiniLM-L6-v2'
|
|
73
|
+
* });
|
|
74
|
+
*
|
|
75
|
+
* // Multimodal mode - enables cross-modal search
|
|
76
|
+
* const multimodalIngestion = await TextIngestionFactory.create('./db.sqlite', './index.bin', {
|
|
77
|
+
* mode: 'multimodal',
|
|
78
|
+
* embeddingModel: 'Xenova/clip-vit-base-patch32',
|
|
79
|
+
* rerankingStrategy: 'text-derived'
|
|
80
|
+
* });
|
|
81
|
+
* ```
|
|
55
82
|
*/
|
|
56
83
|
import { SearchEngine } from '../core/search.js';
|
|
57
84
|
import { IngestionPipeline } from '../core/ingestion.js';
|
|
@@ -70,6 +97,21 @@ export interface TextSearchOptions {
|
|
|
70
97
|
/** Top-k results to return (default: from config) */
|
|
71
98
|
topK?: number;
|
|
72
99
|
}
|
|
100
|
+
/**
|
|
101
|
+
* Content system configuration options
|
|
102
|
+
*/
|
|
103
|
+
export interface ContentSystemConfig {
|
|
104
|
+
/** Content directory path (default: '.raglite/content') */
|
|
105
|
+
contentDir?: string;
|
|
106
|
+
/** Maximum file size in bytes (default: 50MB) */
|
|
107
|
+
maxFileSize?: number;
|
|
108
|
+
/** Maximum content directory size in bytes (default: 2GB) */
|
|
109
|
+
maxContentDirSize?: number;
|
|
110
|
+
/** Enable content deduplication (default: true) */
|
|
111
|
+
enableDeduplication?: boolean;
|
|
112
|
+
/** Enable storage tracking (default: true) */
|
|
113
|
+
enableStorageTracking?: boolean;
|
|
114
|
+
}
|
|
73
115
|
/**
|
|
74
116
|
* Options for text ingestion factory
|
|
75
117
|
*/
|
|
@@ -84,17 +126,30 @@ export interface TextIngestionOptions {
|
|
|
84
126
|
chunkOverlap?: number;
|
|
85
127
|
/** Whether to force rebuild the index */
|
|
86
128
|
forceRebuild?: boolean;
|
|
129
|
+
/** Mode for the ingestion pipeline (text or multimodal) */
|
|
130
|
+
mode?: 'text' | 'multimodal';
|
|
131
|
+
/** Reranking strategy for multimodal mode */
|
|
132
|
+
rerankingStrategy?: 'cross-encoder' | 'text-derived' | 'metadata' | 'hybrid' | 'disabled';
|
|
133
|
+
/** Content system configuration */
|
|
134
|
+
contentSystemConfig?: ContentSystemConfig;
|
|
87
135
|
}
|
|
88
136
|
/**
|
|
89
137
|
* Factory for creating text-based SearchEngine instances
|
|
90
138
|
* Handles model loading, database initialization, and index setup
|
|
91
139
|
*
|
|
92
140
|
* This factory abstracts the complex initialization process required for text search:
|
|
93
|
-
* 1.
|
|
94
|
-
* 2.
|
|
95
|
-
* 3.
|
|
96
|
-
* 4.
|
|
97
|
-
* 5.
|
|
141
|
+
* 1. Auto-detects embedding model from database configuration
|
|
142
|
+
* 2. Validates mode-model compatibility (no fallback mechanisms)
|
|
143
|
+
* 3. Loads embedding models with clear error reporting
|
|
144
|
+
* 4. Optionally loads reranking models based on configuration
|
|
145
|
+
* 5. Establishes database connections and initializes schema
|
|
146
|
+
* 6. Loads vector indexes with proper model compatibility checking
|
|
147
|
+
* 7. Creates SearchEngine with proper dependency injection
|
|
148
|
+
*
|
|
149
|
+
* Mode Support:
|
|
150
|
+
* - Automatically detects mode from database (text or multimodal)
|
|
151
|
+
* - Each mode uses its optimal implementation without fallbacks
|
|
152
|
+
* - Clear validation ensures mode-model compatibility
|
|
98
153
|
*
|
|
99
154
|
* @example
|
|
100
155
|
* ```typescript
|
|
@@ -122,7 +177,7 @@ export declare class TextSearchFactory {
|
|
|
122
177
|
* This method handles the complete initialization process:
|
|
123
178
|
* - Validates that required files exist
|
|
124
179
|
* - Loads text embedding model (with lazy initialization)
|
|
125
|
-
* - Optionally loads reranking model (with
|
|
180
|
+
* - Optionally loads reranking model (with clear error reporting)
|
|
126
181
|
* - Opens database connection and initializes schema
|
|
127
182
|
* - Loads vector index with compatibility validation
|
|
128
183
|
* - Creates SearchEngine with dependency injection
|
|
@@ -187,10 +242,18 @@ export declare class TextSearchFactory {
|
|
|
187
242
|
*
|
|
188
243
|
* This factory abstracts the complex initialization process required for text ingestion:
|
|
189
244
|
* 1. Creates necessary directories if they don't exist
|
|
190
|
-
* 2.
|
|
191
|
-
* 3.
|
|
192
|
-
* 4.
|
|
193
|
-
* 5.
|
|
245
|
+
* 2. Validates mode-model compatibility (no fallback mechanisms)
|
|
246
|
+
* 3. Loads and validates embedding models with clear error reporting
|
|
247
|
+
* 4. Establishes database connections and initializes schema
|
|
248
|
+
* 5. Stores mode configuration in database for automatic detection
|
|
249
|
+
* 6. Creates or loads vector indexes with proper configuration
|
|
250
|
+
* 7. Creates IngestionPipeline with proper dependency injection
|
|
251
|
+
*
|
|
252
|
+
* Mode Configuration:
|
|
253
|
+
* - Text Mode (default): Uses sentence-transformer models for text-only content
|
|
254
|
+
* - Multimodal Mode: Uses CLIP models for mixed text/image content
|
|
255
|
+
* - Mode is stored in database and auto-detected during search
|
|
256
|
+
* - Clear validation prevents mode-model mismatches
|
|
194
257
|
*
|
|
195
258
|
* @example
|
|
196
259
|
* ```typescript
|
|
@@ -232,20 +295,39 @@ export declare class TextIngestionFactory {
|
|
|
232
295
|
* @param options.chunkSize - Override chunk size (default: from config)
|
|
233
296
|
* @param options.chunkOverlap - Override chunk overlap (default: from config)
|
|
234
297
|
* @param options.forceRebuild - Force rebuild of existing index (default: false)
|
|
298
|
+
* @param options.contentSystemConfig - Content system configuration options
|
|
299
|
+
* @param options.contentSystemConfig.contentDir - Content directory path (default: '.raglite/content')
|
|
300
|
+
* @param options.contentSystemConfig.maxFileSize - Maximum file size in bytes (default: 50MB)
|
|
301
|
+
* @param options.contentSystemConfig.maxContentDirSize - Maximum content directory size (default: 2GB)
|
|
302
|
+
* @param options.contentSystemConfig.enableDeduplication - Enable content deduplication (default: true)
|
|
303
|
+
* @param options.contentSystemConfig.enableStorageTracking - Enable storage tracking (default: true)
|
|
235
304
|
* @returns Promise resolving to configured IngestionPipeline
|
|
236
305
|
* @throws {Error} If initialization fails
|
|
237
306
|
*
|
|
238
307
|
* @example
|
|
239
308
|
* ```typescript
|
|
240
|
-
* // Create ingestion pipeline
|
|
309
|
+
* // Create ingestion pipeline with default content system
|
|
241
310
|
* const ingestion = await TextIngestionFactory.create('./my-db.sqlite', './my-index.bin');
|
|
242
311
|
*
|
|
312
|
+
* // Create with custom content system configuration
|
|
313
|
+
* const ingestion = await TextIngestionFactory.create('./my-db.sqlite', './my-index.bin', {
|
|
314
|
+
* contentSystemConfig: {
|
|
315
|
+
* contentDir: './custom-content',
|
|
316
|
+
* maxFileSize: 100 * 1024 * 1024, // 100MB
|
|
317
|
+
* maxContentDirSize: 5 * 1024 * 1024 * 1024, // 5GB
|
|
318
|
+
* enableDeduplication: true
|
|
319
|
+
* }
|
|
320
|
+
* });
|
|
321
|
+
*
|
|
243
322
|
* // Ingest documents from directory
|
|
244
323
|
* const result = await ingestion.ingestDirectory('./documents');
|
|
245
324
|
* console.log(`Processed ${result.documentsProcessed} documents`);
|
|
246
325
|
*
|
|
247
|
-
* // Ingest
|
|
248
|
-
* await ingestion.
|
|
326
|
+
* // Ingest content from memory (MCP integration)
|
|
327
|
+
* const contentId = await ingestion.ingestFromMemory(buffer, {
|
|
328
|
+
* displayName: 'uploaded-file.pdf',
|
|
329
|
+
* contentType: 'application/pdf'
|
|
330
|
+
* });
|
|
249
331
|
*
|
|
250
332
|
* // Clean up when done
|
|
251
333
|
* await ingestion.cleanup();
|
|
@@ -259,6 +341,22 @@ export declare class TextIngestionFactory {
|
|
|
259
341
|
* @returns Promise resolving to configured IngestionPipeline
|
|
260
342
|
*/
|
|
261
343
|
static createWithDefaults(options?: TextIngestionOptions): Promise<IngestionPipeline>;
|
|
344
|
+
/**
|
|
345
|
+
* Handles mode storage during ingestion
|
|
346
|
+
* Creates or validates system info based on the provided mode and options
|
|
347
|
+
* @private
|
|
348
|
+
*/
|
|
349
|
+
private static handleModeStorage;
|
|
350
|
+
/**
|
|
351
|
+
* Updates system info in the database
|
|
352
|
+
* @private
|
|
353
|
+
*/
|
|
354
|
+
private static updateSystemInfo;
|
|
355
|
+
/**
|
|
356
|
+
* Validates and prepares content system configuration
|
|
357
|
+
* @private
|
|
358
|
+
*/
|
|
359
|
+
private static validateAndPrepareContentSystemConfig;
|
|
262
360
|
}
|
|
263
361
|
/**
|
|
264
362
|
* Convenience factory to create both search and ingestion instances
|
|
@@ -365,9 +463,9 @@ export declare class TextRAGFactory {
|
|
|
365
463
|
* const { searchOptions, ingestionOptions } = TextFactoryHelpers.getRecommendedConfig('quality');
|
|
366
464
|
* const search = await TextSearchFactory.create('./index.bin', './db.sqlite', searchOptions);
|
|
367
465
|
*
|
|
368
|
-
* // Create with
|
|
369
|
-
* const search = await TextFactoryHelpers.
|
|
370
|
-
* enableReranking: true // Will
|
|
466
|
+
* // Create with clear validation and error reporting
|
|
467
|
+
* const search = await TextFactoryHelpers.createSearchWithValidation('./index.bin', './db.sqlite', {
|
|
468
|
+
* enableReranking: true // Will fail clearly if reranking has issues
|
|
371
469
|
* });
|
|
372
470
|
* ```
|
|
373
471
|
*/
|
|
@@ -433,34 +531,30 @@ export declare class TextFactoryHelpers {
|
|
|
433
531
|
ingestionOptions: TextIngestionOptions;
|
|
434
532
|
};
|
|
435
533
|
/**
|
|
436
|
-
* Create a search engine with
|
|
534
|
+
* Create a search engine with clear error reporting
|
|
437
535
|
*
|
|
438
|
-
* This method
|
|
439
|
-
*
|
|
440
|
-
* disabling reranking, which is a common source of initialization failures).
|
|
441
|
-
* This provides a more robust way to create search engines in environments
|
|
442
|
-
* where reranking models might not be available or might fail to load.
|
|
536
|
+
* This method creates a search engine with the provided options and fails
|
|
537
|
+
* clearly if there are any issues, providing actionable error messages.
|
|
443
538
|
*
|
|
444
539
|
* @param indexPath - Path to vector index file
|
|
445
540
|
* @param dbPath - Path to database file
|
|
446
|
-
* @param options -
|
|
447
|
-
* @returns Promise resolving to SearchEngine
|
|
448
|
-
* @throws {Error} If
|
|
541
|
+
* @param options - Configuration options
|
|
542
|
+
* @returns Promise resolving to SearchEngine
|
|
543
|
+
* @throws {Error} If creation fails with clear error message
|
|
449
544
|
*
|
|
450
545
|
* @example
|
|
451
546
|
* ```typescript
|
|
452
|
-
* //
|
|
453
|
-
* const search = await TextFactoryHelpers.
|
|
547
|
+
* // Create search engine with clear error handling
|
|
548
|
+
* const search = await TextFactoryHelpers.createSearchWithValidation(
|
|
454
549
|
* './index.bin',
|
|
455
550
|
* './db.sqlite',
|
|
456
551
|
* { enableReranking: true, topK: 20 }
|
|
457
552
|
* );
|
|
458
553
|
*
|
|
459
|
-
* // The search engine will work even if reranking model fails to load
|
|
460
554
|
* const results = await search.search('query');
|
|
461
555
|
* console.log(`Search created successfully with ${results.length} results`);
|
|
462
556
|
* ```
|
|
463
557
|
*/
|
|
464
|
-
static
|
|
558
|
+
static createSearchWithValidation(indexPath: string, dbPath: string, options?: TextSearchOptions): Promise<SearchEngine>;
|
|
465
559
|
}
|
|
466
560
|
//# sourceMappingURL=text-factory.d.ts.map
|