rag-lite-ts 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +94 -65
- package/dist/cli/indexer.d.ts.map +1 -1
- package/dist/cli/indexer.js +78 -50
- package/dist/cli/indexer.js.map +1 -1
- package/dist/cli/search.d.ts.map +1 -1
- package/dist/cli/search.js +13 -30
- package/dist/cli/search.js.map +1 -1
- package/dist/cli.js +2 -2
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts +34 -73
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +50 -255
- package/dist/config.js.map +1 -1
- package/dist/core/adapters.d.ts +93 -0
- package/dist/core/adapters.d.ts.map +1 -0
- package/dist/core/adapters.js +139 -0
- package/dist/core/adapters.js.map +1 -0
- package/dist/core/chunker.d.ts +117 -0
- package/dist/core/chunker.d.ts.map +1 -0
- package/dist/core/chunker.js +73 -0
- package/dist/core/chunker.js.map +1 -0
- package/dist/core/config.d.ts +102 -0
- package/dist/core/config.d.ts.map +1 -0
- package/dist/core/config.js +240 -0
- package/dist/core/config.js.map +1 -0
- package/dist/{db.d.ts → core/db.d.ts} +25 -9
- package/dist/core/db.d.ts.map +1 -0
- package/dist/{db.js → core/db.js} +86 -16
- package/dist/core/db.js.map +1 -0
- package/dist/{error-handler.d.ts → core/error-handler.d.ts} +23 -2
- package/dist/core/error-handler.d.ts.map +1 -0
- package/dist/{error-handler.js → core/error-handler.js} +51 -8
- package/dist/core/error-handler.js.map +1 -0
- package/dist/core/index.d.ts +57 -0
- package/dist/core/index.d.ts.map +1 -0
- package/dist/core/index.js +66 -0
- package/dist/core/index.js.map +1 -0
- package/dist/core/ingestion.d.ts +143 -0
- package/dist/core/ingestion.d.ts.map +1 -0
- package/dist/core/ingestion.js +347 -0
- package/dist/core/ingestion.js.map +1 -0
- package/dist/core/interfaces.d.ts +408 -0
- package/dist/core/interfaces.d.ts.map +1 -0
- package/dist/core/interfaces.js +106 -0
- package/dist/core/interfaces.js.map +1 -0
- package/dist/{path-manager.d.ts → core/path-manager.d.ts} +5 -0
- package/dist/core/path-manager.d.ts.map +1 -0
- package/dist/{path-manager.js → core/path-manager.js} +5 -0
- package/dist/core/path-manager.js.map +1 -0
- package/dist/core/search-example.d.ts +25 -0
- package/dist/core/search-example.d.ts.map +1 -0
- package/dist/core/search-example.js +138 -0
- package/dist/core/search-example.js.map +1 -0
- package/dist/core/search-pipeline-example.d.ts +21 -0
- package/dist/core/search-pipeline-example.d.ts.map +1 -0
- package/dist/core/search-pipeline-example.js +188 -0
- package/dist/core/search-pipeline-example.js.map +1 -0
- package/dist/core/search-pipeline.d.ts +111 -0
- package/dist/core/search-pipeline.d.ts.map +1 -0
- package/dist/core/search-pipeline.js +287 -0
- package/dist/core/search-pipeline.js.map +1 -0
- package/dist/core/search.d.ts +104 -0
- package/dist/core/search.d.ts.map +1 -0
- package/dist/core/search.js +218 -0
- package/dist/core/search.js.map +1 -0
- package/dist/core/types.d.ts +63 -0
- package/dist/core/types.d.ts.map +1 -0
- package/dist/core/types.js +6 -0
- package/dist/core/types.js.map +1 -0
- package/dist/{vector-index.d.ts → core/vector-index.d.ts} +4 -0
- package/dist/core/vector-index.d.ts.map +1 -0
- package/dist/{vector-index.js → core/vector-index.js} +19 -0
- package/dist/core/vector-index.js.map +1 -0
- package/dist/dom-polyfills.d.ts +6 -0
- package/dist/dom-polyfills.d.ts.map +1 -0
- package/dist/dom-polyfills.js +40 -0
- package/dist/dom-polyfills.js.map +1 -0
- package/dist/examples/clean-api-examples.d.ts +44 -0
- package/dist/examples/clean-api-examples.d.ts.map +1 -0
- package/dist/examples/clean-api-examples.js +206 -0
- package/dist/examples/clean-api-examples.js.map +1 -0
- package/dist/factories/index.d.ts +43 -0
- package/dist/factories/index.d.ts.map +1 -0
- package/dist/factories/index.js +44 -0
- package/dist/factories/index.js.map +1 -0
- package/dist/factories/text-factory.d.ts +466 -0
- package/dist/factories/text-factory.d.ts.map +1 -0
- package/dist/factories/text-factory.js +719 -0
- package/dist/factories/text-factory.js.map +1 -0
- package/dist/file-processor.d.ts +2 -2
- package/dist/file-processor.d.ts.map +1 -1
- package/dist/file-processor.js +3 -3
- package/dist/file-processor.js.map +1 -1
- package/dist/index-manager.d.ts +3 -2
- package/dist/index-manager.d.ts.map +1 -1
- package/dist/index-manager.js +13 -11
- package/dist/index-manager.js.map +1 -1
- package/dist/index.d.ts +63 -8
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +91 -16
- package/dist/index.js.map +1 -1
- package/dist/indexer.js +1 -1
- package/dist/indexer.js.map +1 -1
- package/dist/ingestion.d.ts +30 -156
- package/dist/ingestion.d.ts.map +1 -1
- package/dist/ingestion.js +58 -675
- package/dist/ingestion.js.map +1 -1
- package/dist/mcp-server.js +86 -55
- package/dist/mcp-server.js.map +1 -1
- package/dist/preprocess.js +1 -1
- package/dist/preprocess.js.map +1 -1
- package/dist/search-standalone.js +1 -1
- package/dist/search-standalone.js.map +1 -1
- package/dist/search.d.ts +32 -76
- package/dist/search.d.ts.map +1 -1
- package/dist/search.js +80 -428
- package/dist/search.js.map +1 -1
- package/dist/text/chunker.d.ts +32 -0
- package/dist/text/chunker.d.ts.map +1 -0
- package/dist/{chunker.js → text/chunker.js} +98 -75
- package/dist/text/chunker.js.map +1 -0
- package/dist/{embedder.d.ts → text/embedder.d.ts} +22 -1
- package/dist/text/embedder.d.ts.map +1 -0
- package/dist/{embedder.js → text/embedder.js} +71 -4
- package/dist/text/embedder.js.map +1 -0
- package/dist/text/index.d.ts +7 -0
- package/dist/text/index.d.ts.map +1 -0
- package/dist/text/index.js +8 -0
- package/dist/text/index.js.map +1 -0
- package/dist/text/preprocessors/index.d.ts +17 -0
- package/dist/text/preprocessors/index.d.ts.map +1 -0
- package/dist/text/preprocessors/index.js +38 -0
- package/dist/text/preprocessors/index.js.map +1 -0
- package/dist/text/preprocessors/mdx.d.ts +25 -0
- package/dist/text/preprocessors/mdx.d.ts.map +1 -0
- package/dist/text/preprocessors/mdx.js +101 -0
- package/dist/text/preprocessors/mdx.js.map +1 -0
- package/dist/text/preprocessors/mermaid.d.ts +68 -0
- package/dist/text/preprocessors/mermaid.d.ts.map +1 -0
- package/dist/text/preprocessors/mermaid.js +330 -0
- package/dist/text/preprocessors/mermaid.js.map +1 -0
- package/dist/text/preprocessors/registry.d.ts +56 -0
- package/dist/text/preprocessors/registry.d.ts.map +1 -0
- package/dist/text/preprocessors/registry.js +180 -0
- package/dist/text/preprocessors/registry.js.map +1 -0
- package/dist/text/reranker.d.ts +60 -0
- package/dist/text/reranker.d.ts.map +1 -0
- package/dist/{reranker.js → text/reranker.js} +134 -19
- package/dist/text/reranker.js.map +1 -0
- package/dist/{tokenizer.d.ts → text/tokenizer.d.ts} +1 -0
- package/dist/text/tokenizer.d.ts.map +1 -0
- package/dist/{tokenizer.js → text/tokenizer.js} +7 -2
- package/dist/text/tokenizer.js.map +1 -0
- package/dist/types.d.ts +1 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +2 -2
- package/dist/chunker.d.ts +0 -47
- package/dist/chunker.d.ts.map +0 -1
- package/dist/chunker.js.map +0 -1
- package/dist/db.d.ts.map +0 -1
- package/dist/db.js.map +0 -1
- package/dist/embedder.d.ts.map +0 -1
- package/dist/embedder.js.map +0 -1
- package/dist/error-handler.d.ts.map +0 -1
- package/dist/error-handler.js.map +0 -1
- package/dist/path-manager.d.ts.map +0 -1
- package/dist/path-manager.js.map +0 -1
- package/dist/reranker.d.ts +0 -40
- package/dist/reranker.d.ts.map +0 -1
- package/dist/reranker.js.map +0 -1
- package/dist/resource-manager-demo.d.ts +0 -7
- package/dist/resource-manager-demo.d.ts.map +0 -1
- package/dist/resource-manager-demo.js +0 -52
- package/dist/resource-manager-demo.js.map +0 -1
- package/dist/resource-manager.d.ts +0 -129
- package/dist/resource-manager.d.ts.map +0 -1
- package/dist/resource-manager.js +0 -389
- package/dist/resource-manager.js.map +0 -1
- package/dist/tokenizer.d.ts.map +0 -1
- package/dist/tokenizer.js.map +0 -1
- package/dist/vector-index.d.ts.map +0 -1
- package/dist/vector-index.js.map +0 -1
|
@@ -0,0 +1,719 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Factory functions for creating text-specific search and ingestion instances
|
|
3
|
+
* Handles complex initialization logic while providing clean API for common use cases
|
|
4
|
+
*
|
|
5
|
+
* FACTORY PATTERN BENEFITS:
|
|
6
|
+
* - Abstracts complex initialization (model loading, database setup, index initialization)
|
|
7
|
+
* - Provides simple API for common use cases while preserving access to dependency injection
|
|
8
|
+
* - Handles error recovery and validation
|
|
9
|
+
* - Supports different embedding models and configurations
|
|
10
|
+
* - Enables clean separation between simple usage and advanced customization
|
|
11
|
+
*
|
|
12
|
+
* USAGE PATTERNS:
|
|
13
|
+
*
|
|
14
|
+
* 1. Simple Search Setup:
|
|
15
|
+
* ```typescript
|
|
16
|
+
* // Create search engine with defaults
|
|
17
|
+
* const search = await TextSearchFactory.create('./index.bin', './db.sqlite');
|
|
18
|
+
* const results = await search.search('query');
|
|
19
|
+
* ```
|
|
20
|
+
*
|
|
21
|
+
* 2. Custom Configuration:
|
|
22
|
+
* ```typescript
|
|
23
|
+
* // Create with custom options
|
|
24
|
+
* const search = await TextSearchFactory.create('./index.bin', './db.sqlite', {
|
|
25
|
+
* embeddingModel: 'all-MiniLM-L6-v2',
|
|
26
|
+
* enableReranking: true,
|
|
27
|
+
* topK: 20
|
|
28
|
+
* });
|
|
29
|
+
* ```
|
|
30
|
+
*
|
|
31
|
+
* 3. Complete RAG System:
|
|
32
|
+
* ```typescript
|
|
33
|
+
* // Create both ingestion and search
|
|
34
|
+
* const { searchEngine, ingestionPipeline } = await TextRAGFactory.createBoth(
|
|
35
|
+
* './index.bin',
|
|
36
|
+
* './db.sqlite'
|
|
37
|
+
* );
|
|
38
|
+
*
|
|
39
|
+
* // Ingest documents
|
|
40
|
+
* await ingestionPipeline.ingestDirectory('./docs');
|
|
41
|
+
*
|
|
42
|
+
* // Search documents
|
|
43
|
+
* const results = await searchEngine.search('query');
|
|
44
|
+
* ```
|
|
45
|
+
*
|
|
46
|
+
* 4. Error Recovery:
|
|
47
|
+
* ```typescript
|
|
48
|
+
* // Create with automatic fallback options
|
|
49
|
+
* const search = await TextFactoryHelpers.createSearchWithFallback(
|
|
50
|
+
* './index.bin',
|
|
51
|
+
* './db.sqlite',
|
|
52
|
+
* { enableReranking: true } // Will fallback to disabled if reranking fails
|
|
53
|
+
* );
|
|
54
|
+
* ```
|
|
55
|
+
*/
|
|
56
|
+
import { SearchEngine } from '../core/search.js';
|
|
57
|
+
import { IngestionPipeline } from '../core/ingestion.js';
|
|
58
|
+
import { IndexManager } from '../index-manager.js';
|
|
59
|
+
import { openDatabase } from '../core/db.js';
|
|
60
|
+
import { createTextEmbedFunction } from '../text/embedder.js';
|
|
61
|
+
import { createTextRerankFunction } from '../text/reranker.js';
|
|
62
|
+
import { config, getModelDefaults } from '../core/config.js';
|
|
63
|
+
import { existsSync } from 'fs';
|
|
64
|
+
import { dirname } from 'path';
|
|
65
|
+
import { mkdirSync } from 'fs';
|
|
66
|
+
/**
|
|
67
|
+
* Factory for creating text-based SearchEngine instances
|
|
68
|
+
* Handles model loading, database initialization, and index setup
|
|
69
|
+
*
|
|
70
|
+
* This factory abstracts the complex initialization process required for text search:
|
|
71
|
+
* 1. Loads and validates text embedding models
|
|
72
|
+
* 2. Optionally loads reranking models with fallback handling
|
|
73
|
+
* 3. Establishes database connections and initializes schema
|
|
74
|
+
* 4. Loads vector indexes with proper model compatibility checking
|
|
75
|
+
* 5. Creates SearchEngine with proper dependency injection
|
|
76
|
+
*
|
|
77
|
+
* @example
|
|
78
|
+
* ```typescript
|
|
79
|
+
* // Basic usage
|
|
80
|
+
* const search = await TextSearchFactory.create('./index.bin', './db.sqlite');
|
|
81
|
+
* const results = await search.search('What is machine learning?');
|
|
82
|
+
*
|
|
83
|
+
* // With custom configuration
|
|
84
|
+
* const search = await TextSearchFactory.create('./index.bin', './db.sqlite', {
|
|
85
|
+
* embeddingModel: 'all-MiniLM-L6-v2',
|
|
86
|
+
* enableReranking: true,
|
|
87
|
+
* topK: 15
|
|
88
|
+
* });
|
|
89
|
+
*
|
|
90
|
+
* // With defaults (uses config file paths)
|
|
91
|
+
* const search = await TextSearchFactory.createWithDefaults({
|
|
92
|
+
* enableReranking: false // Faster search
|
|
93
|
+
* });
|
|
94
|
+
* ```
|
|
95
|
+
*/
|
|
96
|
+
export class TextSearchFactory {
|
|
97
|
+
/**
|
|
98
|
+
* Create a SearchEngine configured for text search
|
|
99
|
+
*
|
|
100
|
+
* This method handles the complete initialization process:
|
|
101
|
+
* - Validates that required files exist
|
|
102
|
+
* - Loads text embedding model (with lazy initialization)
|
|
103
|
+
* - Optionally loads reranking model (with graceful fallback)
|
|
104
|
+
* - Opens database connection and initializes schema
|
|
105
|
+
* - Loads vector index with compatibility validation
|
|
106
|
+
* - Creates SearchEngine with dependency injection
|
|
107
|
+
* - Validates the complete setup
|
|
108
|
+
*
|
|
109
|
+
* @param indexPath - Path to the vector index file (must exist)
|
|
110
|
+
* @param dbPath - Path to the SQLite database file (must exist)
|
|
111
|
+
* @param options - Optional configuration overrides
|
|
112
|
+
* @param options.embeddingModel - Override embedding model (default: from config)
|
|
113
|
+
* @param options.batchSize - Override embedding batch size (default: from config)
|
|
114
|
+
* @param options.rerankingModel - Override reranking model (default: from config)
|
|
115
|
+
* @param options.enableReranking - Enable/disable reranking (default: true)
|
|
116
|
+
* @param options.topK - Number of results to return (default: from config)
|
|
117
|
+
* @returns Promise resolving to configured SearchEngine
|
|
118
|
+
* @throws {Error} If required files don't exist or initialization fails
|
|
119
|
+
*
|
|
120
|
+
* @example
|
|
121
|
+
* ```typescript
|
|
122
|
+
* // Create search engine for existing index
|
|
123
|
+
* const search = await TextSearchFactory.create('./my-index.bin', './my-db.sqlite');
|
|
124
|
+
*
|
|
125
|
+
* // Search with the created engine
|
|
126
|
+
* const results = await search.search('artificial intelligence');
|
|
127
|
+
* console.log(`Found ${results.length} results`);
|
|
128
|
+
*
|
|
129
|
+
* // Clean up when done
|
|
130
|
+
* await search.cleanup();
|
|
131
|
+
* ```
|
|
132
|
+
*/
|
|
133
|
+
static async create(indexPath, dbPath, options = {}) {
|
|
134
|
+
try {
|
|
135
|
+
console.log('🏭 TextSearchFactory: Initializing text search engine...');
|
|
136
|
+
// Validate input paths
|
|
137
|
+
if (!indexPath || !dbPath) {
|
|
138
|
+
throw new Error('Both indexPath and dbPath are required');
|
|
139
|
+
}
|
|
140
|
+
// Check if required files exist
|
|
141
|
+
if (!existsSync(indexPath)) {
|
|
142
|
+
throw new Error(`Vector index not found at: ${indexPath}\n` +
|
|
143
|
+
'Run ingestion first to create the index, or check the path.\n' +
|
|
144
|
+
'Example: const ingestion = await IngestionFactory.create(dbPath, indexPath);');
|
|
145
|
+
}
|
|
146
|
+
if (!existsSync(dbPath)) {
|
|
147
|
+
throw new Error(`Database not found at: ${dbPath}\n` +
|
|
148
|
+
'Run ingestion first to create the database, or check the path.\n' +
|
|
149
|
+
'Example: const ingestion = await IngestionFactory.create(dbPath, indexPath);');
|
|
150
|
+
}
|
|
151
|
+
// Step 1: Auto-detect embedding model from database
|
|
152
|
+
let embeddingModel = options.embeddingModel;
|
|
153
|
+
let modelDimensions;
|
|
154
|
+
if (!embeddingModel) {
|
|
155
|
+
// Auto-detect model from database
|
|
156
|
+
const { openDatabase, getStoredModelInfo } = await import('../core/db.js');
|
|
157
|
+
const db = await openDatabase(dbPath);
|
|
158
|
+
try {
|
|
159
|
+
const storedModelInfo = await getStoredModelInfo(db);
|
|
160
|
+
if (storedModelInfo) {
|
|
161
|
+
embeddingModel = storedModelInfo.modelName;
|
|
162
|
+
modelDimensions = storedModelInfo.dimensions;
|
|
163
|
+
console.log(`📊 Auto-detected embedding model: ${embeddingModel} (${modelDimensions} dimensions)`);
|
|
164
|
+
}
|
|
165
|
+
else {
|
|
166
|
+
// Fallback to config default
|
|
167
|
+
embeddingModel = config.embedding_model;
|
|
168
|
+
const modelDefaults = getModelDefaults(embeddingModel);
|
|
169
|
+
modelDimensions = modelDefaults.dimensions;
|
|
170
|
+
console.log(`📊 Using default embedding model: ${embeddingModel} (${modelDimensions} dimensions)`);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
finally {
|
|
174
|
+
await db.close();
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
else {
|
|
178
|
+
// Use provided model
|
|
179
|
+
const modelDefaults = getModelDefaults(embeddingModel);
|
|
180
|
+
modelDimensions = modelDefaults.dimensions;
|
|
181
|
+
console.log(`📊 Using specified embedding model: ${embeddingModel} (${modelDimensions} dimensions)`);
|
|
182
|
+
}
|
|
183
|
+
// Step 2: Initialize embedding function
|
|
184
|
+
console.log('📊 Loading text embedding model...');
|
|
185
|
+
const embedFn = createTextEmbedFunction(embeddingModel, options.batchSize);
|
|
186
|
+
// Embedding function created successfully (will be tested on first use)
|
|
187
|
+
console.log('✓ Text embedding function created successfully');
|
|
188
|
+
// Step 3: Initialize reranking function (optional)
|
|
189
|
+
let rerankFn;
|
|
190
|
+
if (options.enableReranking === true) { // Default to disabled for local-first, fast RAG-lite
|
|
191
|
+
try {
|
|
192
|
+
console.log('🔄 Loading text reranking model...');
|
|
193
|
+
rerankFn = createTextRerankFunction(options.rerankingModel);
|
|
194
|
+
// Test reranking function
|
|
195
|
+
await rerankFn('test query', []);
|
|
196
|
+
console.log('✓ Text reranking model loaded successfully');
|
|
197
|
+
}
|
|
198
|
+
catch (error) {
|
|
199
|
+
console.warn(`Failed to load reranking model, continuing without reranking: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
200
|
+
rerankFn = undefined;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
else {
|
|
204
|
+
console.log('🔄 Reranking disabled by default (local-first, fast mode)');
|
|
205
|
+
}
|
|
206
|
+
// Step 5: Initialize database connection
|
|
207
|
+
console.log('💾 Opening database connection...');
|
|
208
|
+
const db = await openDatabase(dbPath);
|
|
209
|
+
// Initialize database schema if needed
|
|
210
|
+
const { initializeSchema } = await import('../core/db.js');
|
|
211
|
+
await initializeSchema(db);
|
|
212
|
+
console.log('✓ Database connection established');
|
|
213
|
+
// Step 6: Initialize index manager
|
|
214
|
+
console.log('📇 Loading vector index...');
|
|
215
|
+
const indexManager = new IndexManager(indexPath, dbPath, modelDimensions, embeddingModel);
|
|
216
|
+
await indexManager.initialize();
|
|
217
|
+
console.log('✓ Vector index loaded successfully');
|
|
218
|
+
// Step 7: Create SearchEngine with dependency injection
|
|
219
|
+
const searchEngine = new SearchEngine(embedFn, indexManager, db, rerankFn);
|
|
220
|
+
// Step 8: Validate the setup
|
|
221
|
+
const stats = await searchEngine.getStats();
|
|
222
|
+
console.log(`✓ Search engine ready: ${stats.totalChunks} chunks indexed, reranking ${stats.rerankingEnabled ? 'enabled' : 'disabled'}`);
|
|
223
|
+
console.log('🎉 TextSearchFactory: Search engine initialized successfully');
|
|
224
|
+
return searchEngine;
|
|
225
|
+
}
|
|
226
|
+
catch (error) {
|
|
227
|
+
console.error('❌ TextSearchFactory: Failed to create search engine');
|
|
228
|
+
throw new Error(`TextSearchFactory.create failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
/**
|
|
232
|
+
* Create a SearchEngine with automatic path resolution
|
|
233
|
+
* Uses default paths from configuration (config.index_file, config.db_file)
|
|
234
|
+
*
|
|
235
|
+
* This is a convenience method that uses the default file paths from the configuration,
|
|
236
|
+
* making it easy to create a search engine without specifying paths explicitly.
|
|
237
|
+
*
|
|
238
|
+
* @param options - Optional configuration overrides
|
|
239
|
+
* @param options.embeddingModel - Override embedding model
|
|
240
|
+
* @param options.enableReranking - Enable/disable reranking
|
|
241
|
+
* @param options.topK - Number of results to return
|
|
242
|
+
* @returns Promise resolving to configured SearchEngine
|
|
243
|
+
* @throws {Error} If default files don't exist or initialization fails
|
|
244
|
+
*
|
|
245
|
+
* @example
|
|
246
|
+
* ```typescript
|
|
247
|
+
* // Use default paths from config
|
|
248
|
+
* const search = await TextSearchFactory.createWithDefaults();
|
|
249
|
+
*
|
|
250
|
+
* // Use defaults with custom options
|
|
251
|
+
* const search = await TextSearchFactory.createWithDefaults({
|
|
252
|
+
* enableReranking: false,
|
|
253
|
+
* topK: 5
|
|
254
|
+
* });
|
|
255
|
+
* ```
|
|
256
|
+
*/
|
|
257
|
+
static async createWithDefaults(options = {}) {
|
|
258
|
+
const indexPath = config.index_file || './index.bin';
|
|
259
|
+
const dbPath = config.db_file || './database.sqlite';
|
|
260
|
+
return this.create(indexPath, dbPath, options);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
/**
|
|
264
|
+
* Factory for creating text-based IngestionPipeline instances
|
|
265
|
+
* Handles model loading, database initialization, and index setup
|
|
266
|
+
*
|
|
267
|
+
* This factory abstracts the complex initialization process required for text ingestion:
|
|
268
|
+
* 1. Creates necessary directories if they don't exist
|
|
269
|
+
* 2. Loads and validates text embedding models
|
|
270
|
+
* 3. Establishes database connections and initializes schema
|
|
271
|
+
* 4. Creates or loads vector indexes with proper configuration
|
|
272
|
+
* 5. Creates IngestionPipeline with proper dependency injection
|
|
273
|
+
*
|
|
274
|
+
* @example
|
|
275
|
+
* ```typescript
|
|
276
|
+
* // Basic usage
|
|
277
|
+
* const ingestion = await TextIngestionFactory.create('./db.sqlite', './index.bin');
|
|
278
|
+
* await ingestion.ingestDirectory('./documents');
|
|
279
|
+
*
|
|
280
|
+
* // With custom configuration
|
|
281
|
+
* const ingestion = await TextIngestionFactory.create('./db.sqlite', './index.bin', {
|
|
282
|
+
* embeddingModel: 'all-MiniLM-L6-v2',
|
|
283
|
+
* chunkSize: 512,
|
|
284
|
+
* chunkOverlap: 50,
|
|
285
|
+
* forceRebuild: true
|
|
286
|
+
* });
|
|
287
|
+
*
|
|
288
|
+
* // With defaults
|
|
289
|
+
* const ingestion = await TextIngestionFactory.createWithDefaults({
|
|
290
|
+
* batchSize: 32 // Faster processing
|
|
291
|
+
* });
|
|
292
|
+
* ```
|
|
293
|
+
*/
|
|
294
|
+
export class TextIngestionFactory {
|
|
295
|
+
/**
|
|
296
|
+
* Create an IngestionPipeline configured for text ingestion
|
|
297
|
+
*
|
|
298
|
+
* This method handles the complete initialization process:
|
|
299
|
+
* - Creates necessary directories if they don't exist
|
|
300
|
+
* - Loads text embedding model (with lazy initialization)
|
|
301
|
+
* - Opens database connection and initializes schema
|
|
302
|
+
* - Creates or loads vector index (with force rebuild option)
|
|
303
|
+
* - Creates IngestionPipeline with dependency injection
|
|
304
|
+
* - Validates the complete setup
|
|
305
|
+
*
|
|
306
|
+
* @param dbPath - Path to the SQLite database file (will be created if doesn't exist)
|
|
307
|
+
* @param indexPath - Path to the vector index file (will be created if doesn't exist)
|
|
308
|
+
* @param options - Optional configuration overrides
|
|
309
|
+
* @param options.embeddingModel - Override embedding model (default: from config)
|
|
310
|
+
* @param options.batchSize - Override embedding batch size (default: from config)
|
|
311
|
+
* @param options.chunkSize - Override chunk size (default: from config)
|
|
312
|
+
* @param options.chunkOverlap - Override chunk overlap (default: from config)
|
|
313
|
+
* @param options.forceRebuild - Force rebuild of existing index (default: false)
|
|
314
|
+
* @returns Promise resolving to configured IngestionPipeline
|
|
315
|
+
* @throws {Error} If initialization fails
|
|
316
|
+
*
|
|
317
|
+
* @example
|
|
318
|
+
* ```typescript
|
|
319
|
+
* // Create ingestion pipeline
|
|
320
|
+
* const ingestion = await TextIngestionFactory.create('./my-db.sqlite', './my-index.bin');
|
|
321
|
+
*
|
|
322
|
+
* // Ingest documents from directory
|
|
323
|
+
* const result = await ingestion.ingestDirectory('./documents');
|
|
324
|
+
* console.log(`Processed ${result.documentsProcessed} documents`);
|
|
325
|
+
*
|
|
326
|
+
* // Ingest single file
|
|
327
|
+
* await ingestion.ingestFile('./document.pdf');
|
|
328
|
+
*
|
|
329
|
+
* // Clean up when done
|
|
330
|
+
* await ingestion.cleanup();
|
|
331
|
+
* ```
|
|
332
|
+
*/
|
|
333
|
+
static async create(dbPath, indexPath, options = {}) {
|
|
334
|
+
try {
|
|
335
|
+
console.log('🏭 TextIngestionFactory: Initializing text ingestion pipeline...');
|
|
336
|
+
// Validate input paths
|
|
337
|
+
if (!dbPath || !indexPath) {
|
|
338
|
+
throw new Error('Both dbPath and indexPath are required');
|
|
339
|
+
}
|
|
340
|
+
// Ensure directories exist
|
|
341
|
+
const dbDir = dirname(dbPath);
|
|
342
|
+
const indexDir = dirname(indexPath);
|
|
343
|
+
if (!existsSync(dbDir)) {
|
|
344
|
+
console.log(`📁 Creating database directory: ${dbDir}`);
|
|
345
|
+
mkdirSync(dbDir, { recursive: true });
|
|
346
|
+
}
|
|
347
|
+
if (!existsSync(indexDir)) {
|
|
348
|
+
console.log(`📁 Creating index directory: ${indexDir}`);
|
|
349
|
+
mkdirSync(indexDir, { recursive: true });
|
|
350
|
+
}
|
|
351
|
+
// Step 1: Get model-specific defaults and merge with options
|
|
352
|
+
const modelDefaults = getModelDefaults(options.embeddingModel || config.embedding_model);
|
|
353
|
+
const effectiveBatchSize = options.batchSize ?? modelDefaults.batch_size;
|
|
354
|
+
const effectiveChunkSize = options.chunkSize ?? modelDefaults.chunk_size;
|
|
355
|
+
const effectiveChunkOverlap = options.chunkOverlap ?? modelDefaults.chunk_overlap;
|
|
356
|
+
// Step 2: Initialize embedding function
|
|
357
|
+
console.log('📊 Loading text embedding model...');
|
|
358
|
+
const embedFn = createTextEmbedFunction(options.embeddingModel, effectiveBatchSize);
|
|
359
|
+
// Test embedding function to ensure it works
|
|
360
|
+
// Embedding function created successfully (will be tested on first use)
|
|
361
|
+
console.log('✓ Text embedding function created successfully');
|
|
362
|
+
// Step 3: Initialize database connection
|
|
363
|
+
console.log('💾 Opening database connection...');
|
|
364
|
+
const db = await openDatabase(dbPath);
|
|
365
|
+
// Initialize database schema if needed
|
|
366
|
+
const { initializeSchema } = await import('../core/db.js');
|
|
367
|
+
await initializeSchema(db);
|
|
368
|
+
console.log('✓ Database connection established');
|
|
369
|
+
// Step 4: Initialize index manager
|
|
370
|
+
console.log('📇 Initializing vector index...');
|
|
371
|
+
const indexManager = new IndexManager(indexPath, dbPath, modelDefaults.dimensions, options.embeddingModel || config.embedding_model);
|
|
372
|
+
// Check if we need to force recreation due to model change
|
|
373
|
+
let forceRecreate = false;
|
|
374
|
+
if (options.forceRebuild && existsSync(indexPath) && existsSync(dbPath)) {
|
|
375
|
+
// Check if model has changed during rebuild
|
|
376
|
+
const { getStoredModelInfo } = await import('../core/db.js');
|
|
377
|
+
const tempDb = await openDatabase(dbPath);
|
|
378
|
+
try {
|
|
379
|
+
const storedModel = await getStoredModelInfo(tempDb);
|
|
380
|
+
const currentModel = options.embeddingModel || config.embedding_model;
|
|
381
|
+
if (storedModel && storedModel.modelName !== currentModel) {
|
|
382
|
+
console.log(`🔄 Model change detected: ${storedModel.modelName} → ${currentModel}`);
|
|
383
|
+
console.log(`🔄 Dimensions change: ${storedModel.dimensions} → ${modelDefaults.dimensions}`);
|
|
384
|
+
forceRecreate = true;
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
finally {
|
|
388
|
+
await tempDb.close();
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
// Handle force rebuild or create new index
|
|
392
|
+
if (options.forceRebuild || !existsSync(indexPath)) {
|
|
393
|
+
if (options.forceRebuild && existsSync(indexPath)) {
|
|
394
|
+
console.log('🔄 Force rebuild requested, recreating index...');
|
|
395
|
+
}
|
|
396
|
+
else {
|
|
397
|
+
console.log('📇 Creating new vector index...');
|
|
398
|
+
}
|
|
399
|
+
// Initialize with skipModelCheck and forceRecreate for rebuilds
|
|
400
|
+
await indexManager.initialize(options.forceRebuild, forceRecreate);
|
|
401
|
+
// Update stored model info when rebuilding or creating new index
|
|
402
|
+
if (options.forceRebuild || forceRecreate) {
|
|
403
|
+
const { setStoredModelInfo } = await import('../core/db.js');
|
|
404
|
+
const currentModel = options.embeddingModel || config.embedding_model;
|
|
405
|
+
await setStoredModelInfo(db, currentModel, modelDefaults.dimensions);
|
|
406
|
+
console.log(`✓ Updated stored model info: ${currentModel} (${modelDefaults.dimensions} dimensions)`);
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
else {
|
|
410
|
+
// Load existing index
|
|
411
|
+
await indexManager.initialize();
|
|
412
|
+
}
|
|
413
|
+
console.log('✓ Vector index ready');
|
|
414
|
+
// Step 4: Create IngestionPipeline with dependency injection and chunk configuration
|
|
415
|
+
const chunkConfig = {
|
|
416
|
+
chunkSize: effectiveChunkSize,
|
|
417
|
+
chunkOverlap: effectiveChunkOverlap
|
|
418
|
+
};
|
|
419
|
+
const ingestionPipeline = new IngestionPipeline(embedFn, indexManager, db, chunkConfig);
|
|
420
|
+
console.log('🎉 TextIngestionFactory: Ingestion pipeline initialized successfully');
|
|
421
|
+
return ingestionPipeline;
|
|
422
|
+
}
|
|
423
|
+
catch (error) {
|
|
424
|
+
console.error('❌ TextIngestionFactory: Failed to create ingestion pipeline');
|
|
425
|
+
throw new Error(`TextIngestionFactory.create failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
/**
|
|
429
|
+
* Create an IngestionPipeline with automatic path resolution
|
|
430
|
+
* Uses default paths based on current working directory
|
|
431
|
+
* @param options - Optional configuration overrides
|
|
432
|
+
* @returns Promise resolving to configured IngestionPipeline
|
|
433
|
+
*/
|
|
434
|
+
static async createWithDefaults(options = {}) {
|
|
435
|
+
const dbPath = config.db_file || './database.sqlite';
|
|
436
|
+
const indexPath = config.index_file || './index.bin';
|
|
437
|
+
return this.create(dbPath, indexPath, options);
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
/**
|
|
441
|
+
* Convenience factory to create both search and ingestion instances
|
|
442
|
+
* Useful for applications that need both capabilities with shared configuration
|
|
443
|
+
*
|
|
444
|
+
* This factory creates a complete RAG (Retrieval-Augmented Generation) system
|
|
445
|
+
* by initializing both ingestion and search capabilities with shared resources.
|
|
446
|
+
* The ingestion pipeline is created first to handle directory creation and
|
|
447
|
+
* initial setup, then the search engine is created to use the same resources.
|
|
448
|
+
*
|
|
449
|
+
* @example
|
|
450
|
+
* ```typescript
|
|
451
|
+
* // Create complete RAG system
|
|
452
|
+
* const { searchEngine, ingestionPipeline } = await TextRAGFactory.createBoth(
|
|
453
|
+
* './index.bin',
|
|
454
|
+
* './db.sqlite'
|
|
455
|
+
* );
|
|
456
|
+
*
|
|
457
|
+
* // First, ingest some documents
|
|
458
|
+
* await ingestionPipeline.ingestDirectory('./knowledge-base');
|
|
459
|
+
*
|
|
460
|
+
* // Then search the ingested content
|
|
461
|
+
* const results = await searchEngine.search('What is the main topic?');
|
|
462
|
+
*
|
|
463
|
+
* // Clean up both instances
|
|
464
|
+
* await Promise.all([
|
|
465
|
+
* searchEngine.cleanup(),
|
|
466
|
+
* ingestionPipeline.cleanup()
|
|
467
|
+
* ]);
|
|
468
|
+
* ```
|
|
469
|
+
*/
|
|
470
|
+
export class TextRAGFactory {
|
|
471
|
+
/**
|
|
472
|
+
* Create both SearchEngine and IngestionPipeline instances
|
|
473
|
+
*
|
|
474
|
+
* This method creates a complete RAG system by:
|
|
475
|
+
* 1. Creating an ingestion pipeline (handles directory creation)
|
|
476
|
+
* 2. Creating a search engine (uses the same database and index)
|
|
477
|
+
* 3. Ensuring both instances use compatible configurations
|
|
478
|
+
*
|
|
479
|
+
* The ingestion pipeline is created first because it handles directory
|
|
480
|
+
* creation and initial setup, while the search engine requires existing
|
|
481
|
+
* files to validate the setup.
|
|
482
|
+
*
|
|
483
|
+
* @param indexPath - Path to the vector index file
|
|
484
|
+
* @param dbPath - Path to the SQLite database file
|
|
485
|
+
* @param searchOptions - Optional search configuration
|
|
486
|
+
* @param searchOptions.enableReranking - Enable reranking for better results
|
|
487
|
+
* @param searchOptions.topK - Number of search results to return
|
|
488
|
+
* @param ingestionOptions - Optional ingestion configuration
|
|
489
|
+
* @param ingestionOptions.chunkSize - Size of text chunks for processing
|
|
490
|
+
* @param ingestionOptions.forceRebuild - Force rebuild of existing index
|
|
491
|
+
* @returns Promise resolving to both configured instances
|
|
492
|
+
* @throws {Error} If initialization of either component fails
|
|
493
|
+
*
|
|
494
|
+
* @example
|
|
495
|
+
* ```typescript
|
|
496
|
+
* // Create with custom options for both components
|
|
497
|
+
* const { searchEngine, ingestionPipeline } = await TextRAGFactory.createBoth(
|
|
498
|
+
* './index.bin',
|
|
499
|
+
* './db.sqlite',
|
|
500
|
+
* { enableReranking: true, topK: 15 }, // Search options
|
|
501
|
+
* { chunkSize: 512, forceRebuild: true } // Ingestion options
|
|
502
|
+
* );
|
|
503
|
+
*
|
|
504
|
+
* // Use the complete system
|
|
505
|
+
* await ingestionPipeline.ingestDirectory('./docs');
|
|
506
|
+
* const results = await searchEngine.search('machine learning');
|
|
507
|
+
* ```
|
|
508
|
+
*/
|
|
509
|
+
static async createBoth(indexPath, dbPath, searchOptions = {}, ingestionOptions = {}) {
|
|
510
|
+
console.log('🏭 TextRAGFactory: Creating complete RAG system...');
|
|
511
|
+
// Create ingestion pipeline first (handles directory creation)
|
|
512
|
+
const ingestionPipeline = await TextIngestionFactory.create(dbPath, indexPath, ingestionOptions);
|
|
513
|
+
// Create search engine (requires existing files)
|
|
514
|
+
const searchEngine = await TextSearchFactory.create(indexPath, dbPath, searchOptions);
|
|
515
|
+
console.log('🎉 TextRAGFactory: Complete RAG system ready');
|
|
516
|
+
return { searchEngine, ingestionPipeline };
|
|
517
|
+
}
|
|
518
|
+
/**
|
|
519
|
+
* Create both instances with default paths
|
|
520
|
+
* @param searchOptions - Optional search configuration
|
|
521
|
+
* @param ingestionOptions - Optional ingestion configuration
|
|
522
|
+
* @returns Promise resolving to both instances
|
|
523
|
+
*/
|
|
524
|
+
static async createBothWithDefaults(searchOptions = {}, ingestionOptions = {}) {
|
|
525
|
+
const indexPath = config.index_file || './index.bin';
|
|
526
|
+
const dbPath = config.db_file || './database.sqlite';
|
|
527
|
+
return this.createBoth(indexPath, dbPath, searchOptions, ingestionOptions);
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
/**
|
|
531
|
+
* Helper functions for common factory patterns and error recovery
|
|
532
|
+
*
|
|
533
|
+
* This class provides utility functions that support the main factory classes
|
|
534
|
+
* with validation, configuration recommendations, and error recovery patterns.
|
|
535
|
+
* These helpers enable more robust factory usage and better error handling.
|
|
536
|
+
*
|
|
537
|
+
* @example
|
|
538
|
+
* ```typescript
|
|
539
|
+
* // Validate files before creating search engine
|
|
540
|
+
* try {
|
|
541
|
+
* TextFactoryHelpers.validateSearchFiles('./index.bin', './db.sqlite');
|
|
542
|
+
* const search = await TextSearchFactory.create('./index.bin', './db.sqlite');
|
|
543
|
+
* } catch (error) {
|
|
544
|
+
* console.error('Files not ready for search:', error.message);
|
|
545
|
+
* }
|
|
546
|
+
*
|
|
547
|
+
* // Get recommended configuration for different use cases
|
|
548
|
+
* const { searchOptions, ingestionOptions } = TextFactoryHelpers.getRecommendedConfig('quality');
|
|
549
|
+
* const search = await TextSearchFactory.create('./index.bin', './db.sqlite', searchOptions);
|
|
550
|
+
*
|
|
551
|
+
* // Create with automatic error recovery
|
|
552
|
+
* const search = await TextFactoryHelpers.createSearchWithFallback('./index.bin', './db.sqlite', {
|
|
553
|
+
* enableReranking: true // Will fallback to disabled if reranking fails
|
|
554
|
+
* });
|
|
555
|
+
* ```
|
|
556
|
+
*/
|
|
557
|
+
export class TextFactoryHelpers {
|
|
558
|
+
/**
|
|
559
|
+
* Validate that required files exist for search operations
|
|
560
|
+
*
|
|
561
|
+
* This method checks that both the vector index and database files exist
|
|
562
|
+
* and provides helpful error messages with suggestions for resolution.
|
|
563
|
+
* Use this before attempting to create a search engine to get better
|
|
564
|
+
* error messages than the generic file not found errors.
|
|
565
|
+
*
|
|
566
|
+
* @param indexPath - Path to vector index file
|
|
567
|
+
* @param dbPath - Path to database file
|
|
568
|
+
* @throws {Error} If either file doesn't exist, with helpful resolution steps
|
|
569
|
+
*
|
|
570
|
+
* @example
|
|
571
|
+
* ```typescript
|
|
572
|
+
* // Validate before creating search engine
|
|
573
|
+
* try {
|
|
574
|
+
* TextFactoryHelpers.validateSearchFiles('./index.bin', './db.sqlite');
|
|
575
|
+
* console.log('Files are ready for search');
|
|
576
|
+
* } catch (error) {
|
|
577
|
+
* console.error('Search files not ready:', error.message);
|
|
578
|
+
* // Error message includes suggestions like "Run ingestion first"
|
|
579
|
+
* }
|
|
580
|
+
* ```
|
|
581
|
+
*/
|
|
582
|
+
static validateSearchFiles(indexPath, dbPath) {
|
|
583
|
+
if (!existsSync(indexPath)) {
|
|
584
|
+
throw new Error(`Vector index not found: ${indexPath}\n` +
|
|
585
|
+
'Run ingestion first: raglite ingest <directory>\n' +
|
|
586
|
+
'Or use: const ingestion = await IngestionFactory.create(dbPath, indexPath);\n' +
|
|
587
|
+
'Or check if the path is correct.');
|
|
588
|
+
}
|
|
589
|
+
if (!existsSync(dbPath)) {
|
|
590
|
+
throw new Error(`Database not found: ${dbPath}\n` +
|
|
591
|
+
'Run ingestion first: raglite ingest <directory>\n' +
|
|
592
|
+
'Or use: const ingestion = await IngestionFactory.create(dbPath, indexPath);\n' +
|
|
593
|
+
'Or check if the path is correct.');
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
/**
|
|
597
|
+
* Get recommended configuration for different use cases
|
|
598
|
+
*
|
|
599
|
+
* This method provides pre-configured options optimized for different
|
|
600
|
+
* performance vs quality trade-offs. Use these as starting points
|
|
601
|
+
* and adjust based on your specific requirements.
|
|
602
|
+
*
|
|
603
|
+
* @param useCase - The intended use case scenario
|
|
604
|
+
* @param useCase.fast - Optimized for speed (no reranking, smaller chunks)
|
|
605
|
+
* @param useCase.balanced - Good balance of speed and quality (default)
|
|
606
|
+
* @param useCase.quality - Optimized for best results (reranking enabled, larger chunks)
|
|
607
|
+
* @returns Recommended configuration for both search and ingestion
|
|
608
|
+
*
|
|
609
|
+
* @example
|
|
610
|
+
* ```typescript
|
|
611
|
+
* // Get configuration for quality-focused use case
|
|
612
|
+
* const { searchOptions, ingestionOptions } = TextFactoryHelpers.getRecommendedConfig('quality');
|
|
613
|
+
*
|
|
614
|
+
* // Create instances with recommended settings
|
|
615
|
+
* const ingestion = await TextIngestionFactory.create('./db.sqlite', './index.bin', ingestionOptions);
|
|
616
|
+
* const search = await TextSearchFactory.create('./index.bin', './db.sqlite', searchOptions);
|
|
617
|
+
*
|
|
618
|
+
* // Or use with RAG factory
|
|
619
|
+
* const { searchEngine, ingestionPipeline } = await TextRAGFactory.createBoth(
|
|
620
|
+
* './index.bin',
|
|
621
|
+
* './db.sqlite',
|
|
622
|
+
* searchOptions,
|
|
623
|
+
* ingestionOptions
|
|
624
|
+
* );
|
|
625
|
+
* ```
|
|
626
|
+
*/
|
|
627
|
+
static getRecommendedConfig(useCase) {
|
|
628
|
+
switch (useCase) {
|
|
629
|
+
case 'fast':
|
|
630
|
+
return {
|
|
631
|
+
searchOptions: {
|
|
632
|
+
enableReranking: false,
|
|
633
|
+
topK: 5
|
|
634
|
+
},
|
|
635
|
+
ingestionOptions: {
|
|
636
|
+
batchSize: 32,
|
|
637
|
+
chunkSize: 512
|
|
638
|
+
}
|
|
639
|
+
};
|
|
640
|
+
case 'balanced':
|
|
641
|
+
return {
|
|
642
|
+
searchOptions: {
|
|
643
|
+
enableReranking: true,
|
|
644
|
+
topK: 10
|
|
645
|
+
},
|
|
646
|
+
ingestionOptions: {
|
|
647
|
+
batchSize: 16,
|
|
648
|
+
chunkSize: 1024
|
|
649
|
+
}
|
|
650
|
+
};
|
|
651
|
+
case 'quality':
|
|
652
|
+
return {
|
|
653
|
+
searchOptions: {
|
|
654
|
+
enableReranking: true,
|
|
655
|
+
topK: 20
|
|
656
|
+
},
|
|
657
|
+
ingestionOptions: {
|
|
658
|
+
batchSize: 8,
|
|
659
|
+
chunkSize: 2048
|
|
660
|
+
}
|
|
661
|
+
};
|
|
662
|
+
default:
|
|
663
|
+
return this.getRecommendedConfig('balanced');
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
/**
|
|
667
|
+
* Create a search engine with automatic error recovery
|
|
668
|
+
*
|
|
669
|
+
* This method attempts to create a search engine with the provided options,
|
|
670
|
+
* and if that fails, it tries again with fallback options (primarily
|
|
671
|
+
* disabling reranking, which is a common source of initialization failures).
|
|
672
|
+
* This provides a more robust way to create search engines in environments
|
|
673
|
+
* where reranking models might not be available or might fail to load.
|
|
674
|
+
*
|
|
675
|
+
* @param indexPath - Path to vector index file
|
|
676
|
+
* @param dbPath - Path to database file
|
|
677
|
+
* @param options - Initial options to try
|
|
678
|
+
* @returns Promise resolving to SearchEngine (possibly with fallback options)
|
|
679
|
+
* @throws {Error} If both original and fallback creation attempts fail
|
|
680
|
+
*
|
|
681
|
+
* @example
|
|
682
|
+
* ```typescript
|
|
683
|
+
* // Try to create with reranking, fallback to without if it fails
|
|
684
|
+
* const search = await TextFactoryHelpers.createSearchWithFallback(
|
|
685
|
+
* './index.bin',
|
|
686
|
+
* './db.sqlite',
|
|
687
|
+
* { enableReranking: true, topK: 20 }
|
|
688
|
+
* );
|
|
689
|
+
*
|
|
690
|
+
* // The search engine will work even if reranking model fails to load
|
|
691
|
+
* const results = await search.search('query');
|
|
692
|
+
* console.log(`Search created successfully with ${results.length} results`);
|
|
693
|
+
* ```
|
|
694
|
+
*/
|
|
695
|
+
static async createSearchWithFallback(indexPath, dbPath, options = {}) {
|
|
696
|
+
try {
|
|
697
|
+
// Try with original options
|
|
698
|
+
return await TextSearchFactory.create(indexPath, dbPath, options);
|
|
699
|
+
}
|
|
700
|
+
catch (error) {
|
|
701
|
+
console.warn(`Initial search creation failed, trying fallback options: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
702
|
+
// Try with reranking disabled as fallback
|
|
703
|
+
const fallbackOptions = {
|
|
704
|
+
...options,
|
|
705
|
+
enableReranking: false
|
|
706
|
+
};
|
|
707
|
+
try {
|
|
708
|
+
return await TextSearchFactory.create(indexPath, dbPath, fallbackOptions);
|
|
709
|
+
}
|
|
710
|
+
catch (fallbackError) {
|
|
711
|
+
console.error('Fallback search creation also failed');
|
|
712
|
+
throw new Error(`Failed to create search engine with both original and fallback options:\n` +
|
|
713
|
+
`Original error: ${error instanceof Error ? error.message : 'Unknown error'}\n` +
|
|
714
|
+
`Fallback error: ${fallbackError instanceof Error ? fallbackError.message : 'Unknown error'}`);
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
//# sourceMappingURL=text-factory.js.map
|