rag-lite-ts 1.0.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +606 -93
- package/dist/cli/indexer.js +192 -4
- package/dist/cli/search.js +50 -11
- package/dist/cli.js +183 -26
- package/dist/core/abstract-embedder.d.ts +125 -0
- package/dist/core/abstract-embedder.js +264 -0
- package/dist/core/actionable-error-messages.d.ts +60 -0
- package/dist/core/actionable-error-messages.js +397 -0
- package/dist/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/core/batch-processing-optimizer.js +541 -0
- package/dist/core/chunker.d.ts +2 -0
- package/dist/core/cli-database-utils.d.ts +53 -0
- package/dist/core/cli-database-utils.js +239 -0
- package/dist/core/config.js +10 -3
- package/dist/core/content-errors.d.ts +111 -0
- package/dist/core/content-errors.js +362 -0
- package/dist/core/content-manager.d.ts +343 -0
- package/dist/core/content-manager.js +1504 -0
- package/dist/core/content-performance-optimizer.d.ts +150 -0
- package/dist/core/content-performance-optimizer.js +516 -0
- package/dist/core/content-resolver.d.ts +104 -0
- package/dist/core/content-resolver.js +285 -0
- package/dist/core/cross-modal-search.d.ts +164 -0
- package/dist/core/cross-modal-search.js +342 -0
- package/dist/core/database-connection-manager.d.ts +109 -0
- package/dist/core/database-connection-manager.js +304 -0
- package/dist/core/db.d.ts +141 -2
- package/dist/core/db.js +631 -89
- package/dist/core/embedder-factory.d.ts +176 -0
- package/dist/core/embedder-factory.js +338 -0
- package/dist/core/index.d.ts +3 -1
- package/dist/core/index.js +4 -1
- package/dist/core/ingestion.d.ts +85 -15
- package/dist/core/ingestion.js +510 -45
- package/dist/core/lazy-dependency-loader.d.ts +152 -0
- package/dist/core/lazy-dependency-loader.js +453 -0
- package/dist/core/mode-detection-service.d.ts +150 -0
- package/dist/core/mode-detection-service.js +565 -0
- package/dist/core/mode-model-validator.d.ts +92 -0
- package/dist/core/mode-model-validator.js +203 -0
- package/dist/core/model-registry.d.ts +120 -0
- package/dist/core/model-registry.js +415 -0
- package/dist/core/model-validator.d.ts +217 -0
- package/dist/core/model-validator.js +782 -0
- package/dist/core/polymorphic-search-factory.d.ts +154 -0
- package/dist/core/polymorphic-search-factory.js +344 -0
- package/dist/core/raglite-paths.d.ts +121 -0
- package/dist/core/raglite-paths.js +145 -0
- package/dist/core/reranking-config.d.ts +42 -0
- package/dist/core/reranking-config.js +156 -0
- package/dist/core/reranking-factory.d.ts +92 -0
- package/dist/core/reranking-factory.js +591 -0
- package/dist/core/reranking-strategies.d.ts +325 -0
- package/dist/core/reranking-strategies.js +720 -0
- package/dist/core/resource-cleanup.d.ts +163 -0
- package/dist/core/resource-cleanup.js +371 -0
- package/dist/core/resource-manager.d.ts +212 -0
- package/dist/core/resource-manager.js +564 -0
- package/dist/core/search.d.ts +28 -1
- package/dist/core/search.js +83 -5
- package/dist/core/streaming-operations.d.ts +145 -0
- package/dist/core/streaming-operations.js +409 -0
- package/dist/core/types.d.ts +3 -0
- package/dist/core/universal-embedder.d.ts +177 -0
- package/dist/core/universal-embedder.js +139 -0
- package/dist/core/validation-messages.d.ts +99 -0
- package/dist/core/validation-messages.js +334 -0
- package/dist/core/vector-index.js +7 -8
- package/dist/factories/index.d.ts +1 -1
- package/dist/factories/text-factory.d.ts +128 -34
- package/dist/factories/text-factory.js +346 -97
- package/dist/file-processor.d.ts +88 -2
- package/dist/file-processor.js +720 -17
- package/dist/index.d.ts +9 -0
- package/dist/index.js +11 -0
- package/dist/ingestion.d.ts +16 -0
- package/dist/ingestion.js +21 -0
- package/dist/mcp-server.d.ts +35 -3
- package/dist/mcp-server.js +1107 -31
- package/dist/multimodal/clip-embedder.d.ts +314 -0
- package/dist/multimodal/clip-embedder.js +945 -0
- package/dist/multimodal/index.d.ts +6 -0
- package/dist/multimodal/index.js +6 -0
- package/dist/run-error-recovery-tests.d.ts +7 -0
- package/dist/run-error-recovery-tests.js +101 -0
- package/dist/search.d.ts +26 -0
- package/dist/search.js +54 -1
- package/dist/test-utils.d.ts +8 -26
- package/dist/text/chunker.d.ts +1 -0
- package/dist/text/embedder.js +15 -8
- package/dist/text/index.d.ts +1 -0
- package/dist/text/index.js +1 -0
- package/dist/text/reranker.d.ts +1 -2
- package/dist/text/reranker.js +17 -47
- package/dist/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/text/sentence-transformer-embedder.js +340 -0
- package/dist/types.d.ts +39 -0
- package/dist/utils/vector-math.d.ts +31 -0
- package/dist/utils/vector-math.js +70 -0
- package/package.json +15 -3
- package/dist/api-errors.d.ts.map +0 -1
- package/dist/api-errors.js.map +0 -1
- package/dist/cli/indexer.d.ts.map +0 -1
- package/dist/cli/indexer.js.map +0 -1
- package/dist/cli/search.d.ts.map +0 -1
- package/dist/cli/search.js.map +0 -1
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js.map +0 -1
- package/dist/core/adapters.d.ts.map +0 -1
- package/dist/core/adapters.js.map +0 -1
- package/dist/core/chunker.d.ts.map +0 -1
- package/dist/core/chunker.js.map +0 -1
- package/dist/core/config.d.ts.map +0 -1
- package/dist/core/config.js.map +0 -1
- package/dist/core/db.d.ts.map +0 -1
- package/dist/core/db.js.map +0 -1
- package/dist/core/error-handler.d.ts.map +0 -1
- package/dist/core/error-handler.js.map +0 -1
- package/dist/core/index.d.ts.map +0 -1
- package/dist/core/index.js.map +0 -1
- package/dist/core/ingestion.d.ts.map +0 -1
- package/dist/core/ingestion.js.map +0 -1
- package/dist/core/interfaces.d.ts.map +0 -1
- package/dist/core/interfaces.js.map +0 -1
- package/dist/core/path-manager.d.ts.map +0 -1
- package/dist/core/path-manager.js.map +0 -1
- package/dist/core/search-example.d.ts +0 -25
- package/dist/core/search-example.d.ts.map +0 -1
- package/dist/core/search-example.js +0 -138
- package/dist/core/search-example.js.map +0 -1
- package/dist/core/search-pipeline-example.d.ts +0 -21
- package/dist/core/search-pipeline-example.d.ts.map +0 -1
- package/dist/core/search-pipeline-example.js +0 -188
- package/dist/core/search-pipeline-example.js.map +0 -1
- package/dist/core/search-pipeline.d.ts.map +0 -1
- package/dist/core/search-pipeline.js.map +0 -1
- package/dist/core/search.d.ts.map +0 -1
- package/dist/core/search.js.map +0 -1
- package/dist/core/types.d.ts.map +0 -1
- package/dist/core/types.js.map +0 -1
- package/dist/core/vector-index.d.ts.map +0 -1
- package/dist/core/vector-index.js.map +0 -1
- package/dist/dom-polyfills.d.ts.map +0 -1
- package/dist/dom-polyfills.js.map +0 -1
- package/dist/examples/clean-api-examples.d.ts +0 -44
- package/dist/examples/clean-api-examples.d.ts.map +0 -1
- package/dist/examples/clean-api-examples.js +0 -206
- package/dist/examples/clean-api-examples.js.map +0 -1
- package/dist/factories/index.d.ts.map +0 -1
- package/dist/factories/index.js.map +0 -1
- package/dist/factories/text-factory.d.ts.map +0 -1
- package/dist/factories/text-factory.js.map +0 -1
- package/dist/file-processor.d.ts.map +0 -1
- package/dist/file-processor.js.map +0 -1
- package/dist/index-manager.d.ts.map +0 -1
- package/dist/index-manager.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/indexer.d.ts.map +0 -1
- package/dist/indexer.js.map +0 -1
- package/dist/ingestion.d.ts.map +0 -1
- package/dist/ingestion.js.map +0 -1
- package/dist/mcp-server.d.ts.map +0 -1
- package/dist/mcp-server.js.map +0 -1
- package/dist/preprocess.d.ts.map +0 -1
- package/dist/preprocess.js.map +0 -1
- package/dist/preprocessors/index.d.ts.map +0 -1
- package/dist/preprocessors/index.js.map +0 -1
- package/dist/preprocessors/mdx.d.ts.map +0 -1
- package/dist/preprocessors/mdx.js.map +0 -1
- package/dist/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/preprocessors/mermaid.js.map +0 -1
- package/dist/preprocessors/registry.d.ts.map +0 -1
- package/dist/preprocessors/registry.js.map +0 -1
- package/dist/search-standalone.d.ts.map +0 -1
- package/dist/search-standalone.js.map +0 -1
- package/dist/search.d.ts.map +0 -1
- package/dist/search.js.map +0 -1
- package/dist/test-utils.d.ts.map +0 -1
- package/dist/test-utils.js.map +0 -1
- package/dist/text/chunker.d.ts.map +0 -1
- package/dist/text/chunker.js.map +0 -1
- package/dist/text/embedder.d.ts.map +0 -1
- package/dist/text/embedder.js.map +0 -1
- package/dist/text/index.d.ts.map +0 -1
- package/dist/text/index.js.map +0 -1
- package/dist/text/preprocessors/index.d.ts.map +0 -1
- package/dist/text/preprocessors/index.js.map +0 -1
- package/dist/text/preprocessors/mdx.d.ts.map +0 -1
- package/dist/text/preprocessors/mdx.js.map +0 -1
- package/dist/text/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/text/preprocessors/mermaid.js.map +0 -1
- package/dist/text/preprocessors/registry.d.ts.map +0 -1
- package/dist/text/preprocessors/registry.js.map +0 -1
- package/dist/text/reranker.d.ts.map +0 -1
- package/dist/text/reranker.js.map +0 -1
- package/dist/text/tokenizer.d.ts.map +0 -1
- package/dist/text/tokenizer.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
|
@@ -5,10 +5,21 @@
|
|
|
5
5
|
* FACTORY PATTERN BENEFITS:
|
|
6
6
|
* - Abstracts complex initialization (model loading, database setup, index initialization)
|
|
7
7
|
* - Provides simple API for common use cases while preserving access to dependency injection
|
|
8
|
-
* -
|
|
8
|
+
* - Clear validation and error handling without fallback mechanisms
|
|
9
9
|
* - Supports different embedding models and configurations
|
|
10
10
|
* - Enables clean separation between simple usage and advanced customization
|
|
11
11
|
*
|
|
12
|
+
* MODE SELECTION GUIDE:
|
|
13
|
+
* - Text Mode (default): Optimized for text-only content
|
|
14
|
+
* - Uses sentence-transformer models (fast, accurate for text)
|
|
15
|
+
* - Images converted to text descriptions
|
|
16
|
+
* - Best for: document search, text clustering, semantic similarity
|
|
17
|
+
*
|
|
18
|
+
* - Multimodal Mode: Optimized for mixed text/image content
|
|
19
|
+
* - Uses CLIP models (unified embedding space)
|
|
20
|
+
* - True cross-modal search (text finds images, images find text)
|
|
21
|
+
* - Best for: image search, visual QA, multimodal retrieval
|
|
22
|
+
*
|
|
12
23
|
* USAGE PATTERNS:
|
|
13
24
|
*
|
|
14
25
|
* 1. Simple Search Setup:
|
|
@@ -43,15 +54,31 @@
|
|
|
43
54
|
* const results = await searchEngine.search('query');
|
|
44
55
|
* ```
|
|
45
56
|
*
|
|
46
|
-
* 4. Error
|
|
57
|
+
* 4. Clear Error Handling:
|
|
47
58
|
* ```typescript
|
|
48
|
-
* // Create with
|
|
49
|
-
* const search = await TextFactoryHelpers.
|
|
59
|
+
* // Create with clear validation and error reporting
|
|
60
|
+
* const search = await TextFactoryHelpers.createSearchWithValidation(
|
|
50
61
|
* './index.bin',
|
|
51
62
|
* './db.sqlite',
|
|
52
|
-
* { enableReranking: true } //
|
|
63
|
+
* { enableReranking: true } // Clear errors if issues occur
|
|
53
64
|
* );
|
|
54
65
|
* ```
|
|
66
|
+
*
|
|
67
|
+
* 5. Mode Selection:
|
|
68
|
+
* ```typescript
|
|
69
|
+
* // Text mode (default) - optimized for text-only content
|
|
70
|
+
* const textIngestion = await TextIngestionFactory.create('./db.sqlite', './index.bin', {
|
|
71
|
+
* mode: 'text',
|
|
72
|
+
* embeddingModel: 'sentence-transformers/all-MiniLM-L6-v2'
|
|
73
|
+
* });
|
|
74
|
+
*
|
|
75
|
+
* // Multimodal mode - enables cross-modal search
|
|
76
|
+
* const multimodalIngestion = await TextIngestionFactory.create('./db.sqlite', './index.bin', {
|
|
77
|
+
* mode: 'multimodal',
|
|
78
|
+
* embeddingModel: 'Xenova/clip-vit-base-patch32',
|
|
79
|
+
* rerankingStrategy: 'text-derived'
|
|
80
|
+
* });
|
|
81
|
+
* ```
|
|
55
82
|
*/
|
|
56
83
|
import { SearchEngine } from '../core/search.js';
|
|
57
84
|
import { IngestionPipeline } from '../core/ingestion.js';
|
|
@@ -63,16 +90,26 @@ import { config, getModelDefaults } from '../core/config.js';
|
|
|
63
90
|
import { existsSync } from 'fs';
|
|
64
91
|
import { dirname } from 'path';
|
|
65
92
|
import { mkdirSync } from 'fs';
|
|
93
|
+
import { ContentManager } from '../core/content-manager.js';
|
|
94
|
+
import { validateModeModelCompatibilityOrThrow } from '../core/mode-model-validator.js';
|
|
95
|
+
import { createMissingFileError, createInvalidPathError, createFactoryCreationError, createModeMismatchError } from '../core/actionable-error-messages.js';
|
|
66
96
|
/**
|
|
67
97
|
* Factory for creating text-based SearchEngine instances
|
|
68
98
|
* Handles model loading, database initialization, and index setup
|
|
69
99
|
*
|
|
70
100
|
* This factory abstracts the complex initialization process required for text search:
|
|
71
|
-
* 1.
|
|
72
|
-
* 2.
|
|
73
|
-
* 3.
|
|
74
|
-
* 4.
|
|
75
|
-
* 5.
|
|
101
|
+
* 1. Auto-detects embedding model from database configuration
|
|
102
|
+
* 2. Validates mode-model compatibility (no fallback mechanisms)
|
|
103
|
+
* 3. Loads embedding models with clear error reporting
|
|
104
|
+
* 4. Optionally loads reranking models based on configuration
|
|
105
|
+
* 5. Establishes database connections and initializes schema
|
|
106
|
+
* 6. Loads vector indexes with proper model compatibility checking
|
|
107
|
+
* 7. Creates SearchEngine with proper dependency injection
|
|
108
|
+
*
|
|
109
|
+
* Mode Support:
|
|
110
|
+
* - Automatically detects mode from database (text or multimodal)
|
|
111
|
+
* - Each mode uses its optimal implementation without fallbacks
|
|
112
|
+
* - Clear validation ensures mode-model compatibility
|
|
76
113
|
*
|
|
77
114
|
* @example
|
|
78
115
|
* ```typescript
|
|
@@ -100,7 +137,7 @@ export class TextSearchFactory {
|
|
|
100
137
|
* This method handles the complete initialization process:
|
|
101
138
|
* - Validates that required files exist
|
|
102
139
|
* - Loads text embedding model (with lazy initialization)
|
|
103
|
-
* - Optionally loads reranking model (with
|
|
140
|
+
* - Optionally loads reranking model (with clear error reporting)
|
|
104
141
|
* - Opens database connection and initializes schema
|
|
105
142
|
* - Loads vector index with compatibility validation
|
|
106
143
|
* - Creates SearchEngine with dependency injection
|
|
@@ -135,18 +172,21 @@ export class TextSearchFactory {
|
|
|
135
172
|
console.log('🏭 TextSearchFactory: Initializing text search engine...');
|
|
136
173
|
// Validate input paths
|
|
137
174
|
if (!indexPath || !dbPath) {
|
|
138
|
-
throw
|
|
175
|
+
throw createInvalidPathError([
|
|
176
|
+
{ name: 'indexPath', value: indexPath },
|
|
177
|
+
{ name: 'dbPath', value: dbPath }
|
|
178
|
+
], { operationContext: 'TextSearchFactory.create' });
|
|
139
179
|
}
|
|
140
180
|
// Check if required files exist
|
|
141
181
|
if (!existsSync(indexPath)) {
|
|
142
|
-
throw
|
|
143
|
-
'
|
|
144
|
-
|
|
182
|
+
throw createMissingFileError(indexPath, 'index', {
|
|
183
|
+
operationContext: 'TextSearchFactory.create'
|
|
184
|
+
});
|
|
145
185
|
}
|
|
146
186
|
if (!existsSync(dbPath)) {
|
|
147
|
-
throw
|
|
148
|
-
'
|
|
149
|
-
|
|
187
|
+
throw createMissingFileError(dbPath, 'database', {
|
|
188
|
+
operationContext: 'TextSearchFactory.create'
|
|
189
|
+
});
|
|
150
190
|
}
|
|
151
191
|
// Step 1: Auto-detect embedding model from database
|
|
152
192
|
let embeddingModel = options.embeddingModel;
|
|
@@ -180,6 +220,10 @@ export class TextSearchFactory {
|
|
|
180
220
|
modelDimensions = modelDefaults.dimensions;
|
|
181
221
|
console.log(`📊 Using specified embedding model: ${embeddingModel} (${modelDimensions} dimensions)`);
|
|
182
222
|
}
|
|
223
|
+
// Step 1.5: Validate mode-model compatibility at creation time
|
|
224
|
+
console.log('🔍 Validating mode-model compatibility...');
|
|
225
|
+
validateModeModelCompatibilityOrThrow('text', embeddingModel);
|
|
226
|
+
console.log('✓ Mode-model compatibility validated');
|
|
183
227
|
// Step 2: Initialize embedding function
|
|
184
228
|
console.log('📊 Loading text embedding model...');
|
|
185
229
|
const embedFn = createTextEmbedFunction(embeddingModel, options.batchSize);
|
|
@@ -188,17 +232,11 @@ export class TextSearchFactory {
|
|
|
188
232
|
// Step 3: Initialize reranking function (optional)
|
|
189
233
|
let rerankFn;
|
|
190
234
|
if (options.enableReranking === true) { // Default to disabled for local-first, fast RAG-lite
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
console.log('✓ Text reranking model loaded successfully');
|
|
197
|
-
}
|
|
198
|
-
catch (error) {
|
|
199
|
-
console.warn(`Failed to load reranking model, continuing without reranking: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
200
|
-
rerankFn = undefined;
|
|
201
|
-
}
|
|
235
|
+
console.log('🔄 Loading text reranking model...');
|
|
236
|
+
rerankFn = createTextRerankFunction(options.rerankingModel);
|
|
237
|
+
// Test reranking function - fail clearly if there are issues
|
|
238
|
+
await rerankFn('test query', []);
|
|
239
|
+
console.log('✓ Text reranking model loaded successfully');
|
|
202
240
|
}
|
|
203
241
|
else {
|
|
204
242
|
console.log('🔄 Reranking disabled by default (local-first, fast mode)');
|
|
@@ -215,9 +253,14 @@ export class TextSearchFactory {
|
|
|
215
253
|
const indexManager = new IndexManager(indexPath, dbPath, modelDimensions, embeddingModel);
|
|
216
254
|
await indexManager.initialize();
|
|
217
255
|
console.log('✓ Vector index loaded successfully');
|
|
218
|
-
// Step 7: Create
|
|
219
|
-
|
|
220
|
-
|
|
256
|
+
// Step 7: Create ContentResolver for unified content system
|
|
257
|
+
console.log('📁 Initializing content resolver...');
|
|
258
|
+
const { ContentResolver } = await import('../core/content-resolver.js');
|
|
259
|
+
const contentResolver = new ContentResolver(db);
|
|
260
|
+
console.log('✓ Content resolver ready');
|
|
261
|
+
// Step 8: Create SearchEngine with dependency injection
|
|
262
|
+
const searchEngine = new SearchEngine(embedFn, indexManager, db, rerankFn, contentResolver);
|
|
263
|
+
// Step 9: Validate the setup
|
|
221
264
|
const stats = await searchEngine.getStats();
|
|
222
265
|
console.log(`✓ Search engine ready: ${stats.totalChunks} chunks indexed, reranking ${stats.rerankingEnabled ? 'enabled' : 'disabled'}`);
|
|
223
266
|
console.log('🎉 TextSearchFactory: Search engine initialized successfully');
|
|
@@ -225,7 +268,7 @@ export class TextSearchFactory {
|
|
|
225
268
|
}
|
|
226
269
|
catch (error) {
|
|
227
270
|
console.error('❌ TextSearchFactory: Failed to create search engine');
|
|
228
|
-
throw
|
|
271
|
+
throw createFactoryCreationError('TextSearchFactory', error instanceof Error ? error.message : 'Unknown error', { operationContext: 'search engine creation' });
|
|
229
272
|
}
|
|
230
273
|
}
|
|
231
274
|
/**
|
|
@@ -266,10 +309,18 @@ export class TextSearchFactory {
|
|
|
266
309
|
*
|
|
267
310
|
* This factory abstracts the complex initialization process required for text ingestion:
|
|
268
311
|
* 1. Creates necessary directories if they don't exist
|
|
269
|
-
* 2.
|
|
270
|
-
* 3.
|
|
271
|
-
* 4.
|
|
272
|
-
* 5.
|
|
312
|
+
* 2. Validates mode-model compatibility (no fallback mechanisms)
|
|
313
|
+
* 3. Loads and validates embedding models with clear error reporting
|
|
314
|
+
* 4. Establishes database connections and initializes schema
|
|
315
|
+
* 5. Stores mode configuration in database for automatic detection
|
|
316
|
+
* 6. Creates or loads vector indexes with proper configuration
|
|
317
|
+
* 7. Creates IngestionPipeline with proper dependency injection
|
|
318
|
+
*
|
|
319
|
+
* Mode Configuration:
|
|
320
|
+
* - Text Mode (default): Uses sentence-transformer models for text-only content
|
|
321
|
+
* - Multimodal Mode: Uses CLIP models for mixed text/image content
|
|
322
|
+
* - Mode is stored in database and auto-detected during search
|
|
323
|
+
* - Clear validation prevents mode-model mismatches
|
|
273
324
|
*
|
|
274
325
|
* @example
|
|
275
326
|
* ```typescript
|
|
@@ -311,20 +362,39 @@ export class TextIngestionFactory {
|
|
|
311
362
|
* @param options.chunkSize - Override chunk size (default: from config)
|
|
312
363
|
* @param options.chunkOverlap - Override chunk overlap (default: from config)
|
|
313
364
|
* @param options.forceRebuild - Force rebuild of existing index (default: false)
|
|
365
|
+
* @param options.contentSystemConfig - Content system configuration options
|
|
366
|
+
* @param options.contentSystemConfig.contentDir - Content directory path (default: '.raglite/content')
|
|
367
|
+
* @param options.contentSystemConfig.maxFileSize - Maximum file size in bytes (default: 50MB)
|
|
368
|
+
* @param options.contentSystemConfig.maxContentDirSize - Maximum content directory size (default: 2GB)
|
|
369
|
+
* @param options.contentSystemConfig.enableDeduplication - Enable content deduplication (default: true)
|
|
370
|
+
* @param options.contentSystemConfig.enableStorageTracking - Enable storage tracking (default: true)
|
|
314
371
|
* @returns Promise resolving to configured IngestionPipeline
|
|
315
372
|
* @throws {Error} If initialization fails
|
|
316
373
|
*
|
|
317
374
|
* @example
|
|
318
375
|
* ```typescript
|
|
319
|
-
* // Create ingestion pipeline
|
|
376
|
+
* // Create ingestion pipeline with default content system
|
|
320
377
|
* const ingestion = await TextIngestionFactory.create('./my-db.sqlite', './my-index.bin');
|
|
321
378
|
*
|
|
379
|
+
* // Create with custom content system configuration
|
|
380
|
+
* const ingestion = await TextIngestionFactory.create('./my-db.sqlite', './my-index.bin', {
|
|
381
|
+
* contentSystemConfig: {
|
|
382
|
+
* contentDir: './custom-content',
|
|
383
|
+
* maxFileSize: 100 * 1024 * 1024, // 100MB
|
|
384
|
+
* maxContentDirSize: 5 * 1024 * 1024 * 1024, // 5GB
|
|
385
|
+
* enableDeduplication: true
|
|
386
|
+
* }
|
|
387
|
+
* });
|
|
388
|
+
*
|
|
322
389
|
* // Ingest documents from directory
|
|
323
390
|
* const result = await ingestion.ingestDirectory('./documents');
|
|
324
391
|
* console.log(`Processed ${result.documentsProcessed} documents`);
|
|
325
392
|
*
|
|
326
|
-
* // Ingest
|
|
327
|
-
* await ingestion.
|
|
393
|
+
* // Ingest content from memory (MCP integration)
|
|
394
|
+
* const contentId = await ingestion.ingestFromMemory(buffer, {
|
|
395
|
+
* displayName: 'uploaded-file.pdf',
|
|
396
|
+
* contentType: 'application/pdf'
|
|
397
|
+
* });
|
|
328
398
|
*
|
|
329
399
|
* // Clean up when done
|
|
330
400
|
* await ingestion.cleanup();
|
|
@@ -335,7 +405,10 @@ export class TextIngestionFactory {
|
|
|
335
405
|
console.log('🏭 TextIngestionFactory: Initializing text ingestion pipeline...');
|
|
336
406
|
// Validate input paths
|
|
337
407
|
if (!dbPath || !indexPath) {
|
|
338
|
-
throw
|
|
408
|
+
throw createInvalidPathError([
|
|
409
|
+
{ name: 'dbPath', value: dbPath },
|
|
410
|
+
{ name: 'indexPath', value: indexPath }
|
|
411
|
+
], { operationContext: 'TextIngestionFactory.create' });
|
|
339
412
|
}
|
|
340
413
|
// Ensure directories exist
|
|
341
414
|
const dbDir = dirname(dbPath);
|
|
@@ -353,12 +426,35 @@ export class TextIngestionFactory {
|
|
|
353
426
|
const effectiveBatchSize = options.batchSize ?? modelDefaults.batch_size;
|
|
354
427
|
const effectiveChunkSize = options.chunkSize ?? modelDefaults.chunk_size;
|
|
355
428
|
const effectiveChunkOverlap = options.chunkOverlap ?? modelDefaults.chunk_overlap;
|
|
356
|
-
// Step
|
|
357
|
-
|
|
358
|
-
const
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
console.log('✓
|
|
429
|
+
// Step 1.5: Validate mode-model compatibility at creation time
|
|
430
|
+
const effectiveMode = options.mode || 'text';
|
|
431
|
+
const effectiveModel = options.embeddingModel || config.embedding_model;
|
|
432
|
+
console.log('🔍 Validating mode-model compatibility...');
|
|
433
|
+
validateModeModelCompatibilityOrThrow(effectiveMode, effectiveModel);
|
|
434
|
+
console.log('✓ Mode-model compatibility validated');
|
|
435
|
+
// Step 2: Initialize embedding function based on mode
|
|
436
|
+
let embedFn;
|
|
437
|
+
if (effectiveMode === 'multimodal') {
|
|
438
|
+
console.log('📊 Loading CLIP embedding model for multimodal mode...');
|
|
439
|
+
const { createEmbedder } = await import('../core/embedder-factory.js');
|
|
440
|
+
const clipEmbedder = await createEmbedder(effectiveModel);
|
|
441
|
+
// Wrap CLIP embedder to match EmbedFunction signature
|
|
442
|
+
embedFn = async (content, contentType) => {
|
|
443
|
+
if (contentType === 'image') {
|
|
444
|
+
// Use CLIP image embedding for image content
|
|
445
|
+
return await clipEmbedder.embedImage(content);
|
|
446
|
+
}
|
|
447
|
+
// Use CLIP text embedding for text content
|
|
448
|
+
return await clipEmbedder.embedText(content);
|
|
449
|
+
};
|
|
450
|
+
console.log('✓ CLIP embedder created for multimodal mode');
|
|
451
|
+
}
|
|
452
|
+
else {
|
|
453
|
+
// Text mode: use sentence-transformer embedder (existing behavior)
|
|
454
|
+
console.log('📊 Loading text embedding model...');
|
|
455
|
+
embedFn = createTextEmbedFunction(options.embeddingModel, effectiveBatchSize);
|
|
456
|
+
console.log('✓ Text embedding function created successfully');
|
|
457
|
+
}
|
|
362
458
|
// Step 3: Initialize database connection
|
|
363
459
|
console.log('💾 Opening database connection...');
|
|
364
460
|
const db = await openDatabase(dbPath);
|
|
@@ -366,13 +462,17 @@ export class TextIngestionFactory {
|
|
|
366
462
|
const { initializeSchema } = await import('../core/db.js');
|
|
367
463
|
await initializeSchema(db);
|
|
368
464
|
console.log('✓ Database connection established');
|
|
465
|
+
// Step 3.1: Handle mode storage during ingestion
|
|
466
|
+
await this.handleModeStorage(db, options, modelDefaults);
|
|
369
467
|
// Step 4: Initialize index manager
|
|
370
468
|
console.log('📇 Initializing vector index...');
|
|
371
469
|
const indexManager = new IndexManager(indexPath, dbPath, modelDefaults.dimensions, options.embeddingModel || config.embedding_model);
|
|
372
470
|
// Check if we need to force recreation due to model change
|
|
373
471
|
let forceRecreate = false;
|
|
374
472
|
if (options.forceRebuild && existsSync(indexPath) && existsSync(dbPath)) {
|
|
375
|
-
//
|
|
473
|
+
// When forceRebuild is true, always force recreation to handle any model/dimension mismatches
|
|
474
|
+
forceRecreate = true;
|
|
475
|
+
// Check if model has changed during rebuild for logging purposes
|
|
376
476
|
const { getStoredModelInfo } = await import('../core/db.js');
|
|
377
477
|
const tempDb = await openDatabase(dbPath);
|
|
378
478
|
try {
|
|
@@ -381,7 +481,9 @@ export class TextIngestionFactory {
|
|
|
381
481
|
if (storedModel && storedModel.modelName !== currentModel) {
|
|
382
482
|
console.log(`🔄 Model change detected: ${storedModel.modelName} → ${currentModel}`);
|
|
383
483
|
console.log(`🔄 Dimensions change: ${storedModel.dimensions} → ${modelDefaults.dimensions}`);
|
|
384
|
-
|
|
484
|
+
}
|
|
485
|
+
else if (storedModel && storedModel.dimensions !== modelDefaults.dimensions) {
|
|
486
|
+
console.log(`🔄 Dimension mismatch detected: ${storedModel.dimensions} → ${modelDefaults.dimensions}`);
|
|
385
487
|
}
|
|
386
488
|
}
|
|
387
489
|
finally {
|
|
@@ -411,18 +513,30 @@ export class TextIngestionFactory {
|
|
|
411
513
|
await indexManager.initialize();
|
|
412
514
|
}
|
|
413
515
|
console.log('✓ Vector index ready');
|
|
414
|
-
// Step
|
|
516
|
+
// Step 5: Create ContentManager for unified content system
|
|
517
|
+
console.log('📁 Initializing content management system...');
|
|
518
|
+
const contentSystemConfig = await this.validateAndPrepareContentSystemConfig(options.contentSystemConfig);
|
|
519
|
+
const contentManager = new ContentManager(db, contentSystemConfig);
|
|
520
|
+
console.log('✓ Content management system ready');
|
|
521
|
+
// Step 6: Create IngestionPipeline with dependency injection and chunk configuration
|
|
415
522
|
const chunkConfig = {
|
|
416
523
|
chunkSize: effectiveChunkSize,
|
|
417
524
|
chunkOverlap: effectiveChunkOverlap
|
|
418
525
|
};
|
|
419
|
-
const ingestionPipeline = new IngestionPipeline(embedFn, indexManager, db, chunkConfig);
|
|
526
|
+
const ingestionPipeline = new IngestionPipeline(embedFn, indexManager, db, chunkConfig, contentManager);
|
|
420
527
|
console.log('🎉 TextIngestionFactory: Ingestion pipeline initialized successfully');
|
|
421
528
|
return ingestionPipeline;
|
|
422
529
|
}
|
|
423
530
|
catch (error) {
|
|
424
531
|
console.error('❌ TextIngestionFactory: Failed to create ingestion pipeline');
|
|
425
|
-
|
|
532
|
+
// Preserve custom error messages for model mismatch and mode mismatch
|
|
533
|
+
if (error instanceof Error && (error.message.includes('Model mismatch') ||
|
|
534
|
+
error.message.includes('Mode mismatch') ||
|
|
535
|
+
error.message.includes('--force-rebuild') ||
|
|
536
|
+
error.message.includes('--rebuild-if-needed'))) {
|
|
537
|
+
throw error; // Re-throw custom validation errors as-is
|
|
538
|
+
}
|
|
539
|
+
throw createFactoryCreationError('TextIngestionFactory', error instanceof Error ? error.message : 'Unknown error', { operationContext: 'ingestion pipeline creation' });
|
|
426
540
|
}
|
|
427
541
|
}
|
|
428
542
|
/**
|
|
@@ -436,6 +550,164 @@ export class TextIngestionFactory {
|
|
|
436
550
|
const indexPath = config.index_file || './index.bin';
|
|
437
551
|
return this.create(dbPath, indexPath, options);
|
|
438
552
|
}
|
|
553
|
+
/**
|
|
554
|
+
* Handles mode storage during ingestion
|
|
555
|
+
* Creates or validates system info based on the provided mode and options
|
|
556
|
+
* @private
|
|
557
|
+
*/
|
|
558
|
+
static async handleModeStorage(db, options, modelDefaults) {
|
|
559
|
+
const { getSystemInfo, setSystemInfo } = await import('../core/db.js');
|
|
560
|
+
// Determine the effective mode and model
|
|
561
|
+
const effectiveMode = options.mode || 'text';
|
|
562
|
+
const effectiveModel = options.embeddingModel || config.embedding_model;
|
|
563
|
+
const effectiveRerankingStrategy = options.rerankingStrategy || 'cross-encoder';
|
|
564
|
+
// Determine model type based on model name
|
|
565
|
+
let modelType;
|
|
566
|
+
if (effectiveModel.includes('clip')) {
|
|
567
|
+
modelType = 'clip';
|
|
568
|
+
}
|
|
569
|
+
else {
|
|
570
|
+
modelType = 'sentence-transformer';
|
|
571
|
+
}
|
|
572
|
+
// Determine supported content types based on mode
|
|
573
|
+
const supportedContentTypes = effectiveMode === 'multimodal' ? ['text', 'image'] : ['text'];
|
|
574
|
+
try {
|
|
575
|
+
// Check if system info already exists
|
|
576
|
+
const existingSystemInfo = await getSystemInfo(db);
|
|
577
|
+
if (existingSystemInfo) {
|
|
578
|
+
// Validate mode consistency for subsequent ingestions
|
|
579
|
+
if (existingSystemInfo.mode !== effectiveMode) {
|
|
580
|
+
console.warn(`⚠️ Mode mismatch detected!`);
|
|
581
|
+
console.warn(` Database mode: ${existingSystemInfo.mode}`);
|
|
582
|
+
console.warn(` Requested mode: ${effectiveMode}`);
|
|
583
|
+
if (options.forceRebuild) {
|
|
584
|
+
console.log('🔄 Force rebuild enabled, updating mode configuration...');
|
|
585
|
+
await this.updateSystemInfo(db, effectiveMode, effectiveModel, modelType, modelDefaults, effectiveRerankingStrategy, supportedContentTypes);
|
|
586
|
+
}
|
|
587
|
+
else {
|
|
588
|
+
throw createModeMismatchError(existingSystemInfo.mode, effectiveMode, { operationContext: 'TextIngestionFactory.create' });
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
else if (existingSystemInfo.modelName !== effectiveModel) {
|
|
592
|
+
// Model change within the same mode
|
|
593
|
+
console.log(`🔄 Model change detected: ${existingSystemInfo.modelName} → ${effectiveModel}`);
|
|
594
|
+
if (options.forceRebuild) {
|
|
595
|
+
console.log('🔄 Force rebuild enabled, updating model configuration...');
|
|
596
|
+
await this.updateSystemInfo(db, effectiveMode, effectiveModel, modelType, modelDefaults, effectiveRerankingStrategy, supportedContentTypes);
|
|
597
|
+
}
|
|
598
|
+
else {
|
|
599
|
+
// Create a specific error message for model mismatch with rebuild suggestions
|
|
600
|
+
const errorMessage = [
|
|
601
|
+
`❌ Model mismatch: Database is configured for '${existingSystemInfo.modelName}', but '${effectiveModel}' was requested.`,
|
|
602
|
+
'',
|
|
603
|
+
'🛠️ How to fix this:',
|
|
604
|
+
' 1. Use --force-rebuild to change models:',
|
|
605
|
+
' raglite ingest <path> --model ' + effectiveModel + ' --force-rebuild',
|
|
606
|
+
'',
|
|
607
|
+
' 2. Or use --rebuild-if-needed for automatic handling:',
|
|
608
|
+
' raglite ingest <path> --model ' + effectiveModel + ' --rebuild-if-needed',
|
|
609
|
+
'',
|
|
610
|
+
' 3. Or continue using the existing model:',
|
|
611
|
+
' raglite ingest <path> # Uses ' + existingSystemInfo.modelName,
|
|
612
|
+
'',
|
|
613
|
+
'🔍 Model switching requires rebuilding the vector index because different models',
|
|
614
|
+
' produce embeddings with different dimensions and characteristics.'
|
|
615
|
+
].join('\n');
|
|
616
|
+
throw new Error(errorMessage);
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
else {
|
|
620
|
+
console.log(`✅ Mode consistency validated: ${effectiveMode} mode with ${effectiveModel}`);
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
else {
|
|
624
|
+
// First ingestion - create system info
|
|
625
|
+
console.log(`🔧 First ingestion detected, storing system configuration...`);
|
|
626
|
+
console.log(` Mode: ${effectiveMode}`);
|
|
627
|
+
console.log(` Model: ${effectiveModel} (${modelType})`);
|
|
628
|
+
console.log(` Dimensions: ${modelDefaults.dimensions}`);
|
|
629
|
+
console.log(` Reranking: ${effectiveRerankingStrategy}`);
|
|
630
|
+
console.log(` Content types: ${supportedContentTypes.join(', ')}`);
|
|
631
|
+
await this.updateSystemInfo(db, effectiveMode, effectiveModel, modelType, modelDefaults, effectiveRerankingStrategy, supportedContentTypes);
|
|
632
|
+
console.log('✅ System configuration stored successfully');
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
catch (error) {
|
|
636
|
+
if (error instanceof Error && (error.message.includes('Mode mismatch') || error.message.includes('Model mismatch'))) {
|
|
637
|
+
throw error; // Re-throw validation errors with custom messages
|
|
638
|
+
}
|
|
639
|
+
console.error('❌ Failed to handle mode storage:', error);
|
|
640
|
+
throw new Error(`Mode storage failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
641
|
+
}
|
|
642
|
+
}
|
|
643
|
+
/**
|
|
644
|
+
* Updates system info in the database
|
|
645
|
+
* @private
|
|
646
|
+
*/
|
|
647
|
+
static async updateSystemInfo(db, mode, modelName, modelType, modelDefaults, rerankingStrategy, supportedContentTypes) {
|
|
648
|
+
const { setSystemInfo } = await import('../core/db.js');
|
|
649
|
+
await setSystemInfo(db, {
|
|
650
|
+
mode,
|
|
651
|
+
modelName,
|
|
652
|
+
modelType,
|
|
653
|
+
modelDimensions: modelDefaults.dimensions,
|
|
654
|
+
modelVersion: '1.0.0', // TODO: Get actual version from model
|
|
655
|
+
supportedContentTypes,
|
|
656
|
+
rerankingStrategy: rerankingStrategy,
|
|
657
|
+
rerankingModel: undefined,
|
|
658
|
+
rerankingConfig: undefined
|
|
659
|
+
});
|
|
660
|
+
}
|
|
661
|
+
/**
|
|
662
|
+
* Validates and prepares content system configuration
|
|
663
|
+
* @private
|
|
664
|
+
*/
|
|
665
|
+
static async validateAndPrepareContentSystemConfig(userConfig) {
|
|
666
|
+
// Default configuration
|
|
667
|
+
const defaultConfig = {
|
|
668
|
+
contentDir: '.raglite/content',
|
|
669
|
+
maxFileSize: 50 * 1024 * 1024, // 50MB
|
|
670
|
+
maxContentDirSize: 2 * 1024 * 1024 * 1024, // 2GB
|
|
671
|
+
enableDeduplication: true,
|
|
672
|
+
enableStorageTracking: true
|
|
673
|
+
};
|
|
674
|
+
// Merge with user configuration
|
|
675
|
+
const config = { ...defaultConfig, ...userConfig };
|
|
676
|
+
// Validate content directory path
|
|
677
|
+
if (!config.contentDir || typeof config.contentDir !== 'string') {
|
|
678
|
+
throw new Error('Content directory path must be a non-empty string');
|
|
679
|
+
}
|
|
680
|
+
// Validate file size limits
|
|
681
|
+
if (config.maxFileSize && (typeof config.maxFileSize !== 'number' || config.maxFileSize <= 0)) {
|
|
682
|
+
throw new Error('Maximum file size must be a positive number');
|
|
683
|
+
}
|
|
684
|
+
if (config.maxContentDirSize && (typeof config.maxContentDirSize !== 'number' || config.maxContentDirSize <= 0)) {
|
|
685
|
+
throw new Error('Maximum content directory size must be a positive number');
|
|
686
|
+
}
|
|
687
|
+
// Validate that maxFileSize is not larger than maxContentDirSize
|
|
688
|
+
if (config.maxFileSize && config.maxContentDirSize && config.maxFileSize > config.maxContentDirSize) {
|
|
689
|
+
throw new Error('Maximum file size cannot be larger than maximum content directory size');
|
|
690
|
+
}
|
|
691
|
+
// Validate boolean options
|
|
692
|
+
if (config.enableDeduplication !== undefined && typeof config.enableDeduplication !== 'boolean') {
|
|
693
|
+
throw new Error('enableDeduplication must be a boolean value');
|
|
694
|
+
}
|
|
695
|
+
if (config.enableStorageTracking !== undefined && typeof config.enableStorageTracking !== 'boolean') {
|
|
696
|
+
throw new Error('enableStorageTracking must be a boolean value');
|
|
697
|
+
}
|
|
698
|
+
// Create content directory if it doesn't exist
|
|
699
|
+
try {
|
|
700
|
+
const { promises: fs } = await import('fs');
|
|
701
|
+
await fs.mkdir(config.contentDir, { recursive: true });
|
|
702
|
+
// Verify directory is writable
|
|
703
|
+
await fs.access(config.contentDir, (await import('fs')).constants.W_OK);
|
|
704
|
+
console.log(`✓ Content directory validated: ${config.contentDir}`);
|
|
705
|
+
}
|
|
706
|
+
catch (error) {
|
|
707
|
+
throw new Error(`Failed to create or access content directory '${config.contentDir}': ${error instanceof Error ? error.message : 'Unknown error'}. Please check permissions and path validity.`);
|
|
708
|
+
}
|
|
709
|
+
return config;
|
|
710
|
+
}
|
|
439
711
|
}
|
|
440
712
|
/**
|
|
441
713
|
* Convenience factory to create both search and ingestion instances
|
|
@@ -548,9 +820,9 @@ export class TextRAGFactory {
|
|
|
548
820
|
* const { searchOptions, ingestionOptions } = TextFactoryHelpers.getRecommendedConfig('quality');
|
|
549
821
|
* const search = await TextSearchFactory.create('./index.bin', './db.sqlite', searchOptions);
|
|
550
822
|
*
|
|
551
|
-
* // Create with
|
|
552
|
-
* const search = await TextFactoryHelpers.
|
|
553
|
-
* enableReranking: true // Will
|
|
823
|
+
* // Create with clear validation and error reporting
|
|
824
|
+
* const search = await TextFactoryHelpers.createSearchWithValidation('./index.bin', './db.sqlite', {
|
|
825
|
+
* enableReranking: true // Will fail clearly if reranking has issues
|
|
554
826
|
* });
|
|
555
827
|
* ```
|
|
556
828
|
*/
|
|
@@ -581,16 +853,14 @@ export class TextFactoryHelpers {
|
|
|
581
853
|
*/
|
|
582
854
|
static validateSearchFiles(indexPath, dbPath) {
|
|
583
855
|
if (!existsSync(indexPath)) {
|
|
584
|
-
throw
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
'Or check if the path is correct.');
|
|
856
|
+
throw createMissingFileError(indexPath, 'index', {
|
|
857
|
+
operationContext: 'search file validation'
|
|
858
|
+
});
|
|
588
859
|
}
|
|
589
860
|
if (!existsSync(dbPath)) {
|
|
590
|
-
throw
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
'Or check if the path is correct.');
|
|
861
|
+
throw createMissingFileError(dbPath, 'database', {
|
|
862
|
+
operationContext: 'search file validation'
|
|
863
|
+
});
|
|
594
864
|
}
|
|
595
865
|
}
|
|
596
866
|
/**
|
|
@@ -664,56 +934,35 @@ export class TextFactoryHelpers {
|
|
|
664
934
|
}
|
|
665
935
|
}
|
|
666
936
|
/**
|
|
667
|
-
* Create a search engine with
|
|
937
|
+
* Create a search engine with clear error reporting
|
|
668
938
|
*
|
|
669
|
-
* This method
|
|
670
|
-
*
|
|
671
|
-
* disabling reranking, which is a common source of initialization failures).
|
|
672
|
-
* This provides a more robust way to create search engines in environments
|
|
673
|
-
* where reranking models might not be available or might fail to load.
|
|
939
|
+
* This method creates a search engine with the provided options and fails
|
|
940
|
+
* clearly if there are any issues, providing actionable error messages.
|
|
674
941
|
*
|
|
675
942
|
* @param indexPath - Path to vector index file
|
|
676
943
|
* @param dbPath - Path to database file
|
|
677
|
-
* @param options -
|
|
678
|
-
* @returns Promise resolving to SearchEngine
|
|
679
|
-
* @throws {Error} If
|
|
944
|
+
* @param options - Configuration options
|
|
945
|
+
* @returns Promise resolving to SearchEngine
|
|
946
|
+
* @throws {Error} If creation fails with clear error message
|
|
680
947
|
*
|
|
681
948
|
* @example
|
|
682
949
|
* ```typescript
|
|
683
|
-
* //
|
|
684
|
-
* const search = await TextFactoryHelpers.
|
|
950
|
+
* // Create search engine with clear error handling
|
|
951
|
+
* const search = await TextFactoryHelpers.createSearchWithValidation(
|
|
685
952
|
* './index.bin',
|
|
686
953
|
* './db.sqlite',
|
|
687
954
|
* { enableReranking: true, topK: 20 }
|
|
688
955
|
* );
|
|
689
956
|
*
|
|
690
|
-
* // The search engine will work even if reranking model fails to load
|
|
691
957
|
* const results = await search.search('query');
|
|
692
958
|
* console.log(`Search created successfully with ${results.length} results`);
|
|
693
959
|
* ```
|
|
694
960
|
*/
|
|
695
|
-
static async
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
catch (error) {
|
|
701
|
-
console.warn(`Initial search creation failed, trying fallback options: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
702
|
-
// Try with reranking disabled as fallback
|
|
703
|
-
const fallbackOptions = {
|
|
704
|
-
...options,
|
|
705
|
-
enableReranking: false
|
|
706
|
-
};
|
|
707
|
-
try {
|
|
708
|
-
return await TextSearchFactory.create(indexPath, dbPath, fallbackOptions);
|
|
709
|
-
}
|
|
710
|
-
catch (fallbackError) {
|
|
711
|
-
console.error('Fallback search creation also failed');
|
|
712
|
-
throw new Error(`Failed to create search engine with both original and fallback options:\n` +
|
|
713
|
-
`Original error: ${error instanceof Error ? error.message : 'Unknown error'}\n` +
|
|
714
|
-
`Fallback error: ${fallbackError instanceof Error ? fallbackError.message : 'Unknown error'}`);
|
|
715
|
-
}
|
|
716
|
-
}
|
|
961
|
+
static async createSearchWithValidation(indexPath, dbPath, options = {}) {
|
|
962
|
+
// Validate files first
|
|
963
|
+
this.validateSearchFiles(indexPath, dbPath);
|
|
964
|
+
// Create with clear error reporting
|
|
965
|
+
return await TextSearchFactory.create(indexPath, dbPath, options);
|
|
717
966
|
}
|
|
718
967
|
}
|
|
719
968
|
//# sourceMappingURL=text-factory.js.map
|