rag-lite-ts 2.0.2 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -0
- package/dist/cli/indexer.js +25 -6
- package/dist/cli/search.js +3 -3
- package/dist/cli.js +33 -6
- package/dist/core/actionable-error-messages.js +3 -3
- package/dist/core/content-manager.d.ts +0 -8
- package/dist/core/content-manager.js +2 -30
- package/dist/core/database-connection-manager.js +10 -0
- package/dist/core/db.d.ts +0 -32
- package/dist/core/db.js +11 -68
- package/dist/core/embedder-factory.d.ts +0 -22
- package/dist/core/embedder-factory.js +8 -35
- package/dist/core/index.d.ts +3 -3
- package/dist/core/index.js +3 -3
- package/dist/core/ingestion.d.ts +1 -16
- package/dist/core/ingestion.js +1 -30
- package/dist/core/interfaces.d.ts +1 -1
- package/dist/core/interfaces.js +1 -1
- package/dist/core/model-registry.d.ts +0 -4
- package/dist/core/model-registry.js +5 -9
- package/dist/core/search.d.ts +2 -2
- package/dist/core/search.js +2 -2
- package/dist/factories/index.d.ts +11 -29
- package/dist/factories/index.js +12 -29
- package/dist/factories/ingestion-factory.d.ts +200 -0
- package/dist/factories/ingestion-factory.js +475 -0
- package/dist/{core/polymorphic-search-factory.d.ts → factories/search-factory.d.ts} +7 -7
- package/dist/{core/polymorphic-search-factory.js → factories/search-factory.js} +22 -22
- package/dist/index-manager.js +25 -14
- package/dist/index.d.ts +5 -30
- package/dist/index.js +9 -24
- package/dist/indexer.js +5 -2
- package/dist/ingestion.d.ts +2 -4
- package/dist/ingestion.js +2 -2
- package/dist/mcp-server.js +31 -25
- package/dist/search.js +2 -2
- package/dist/text/embedder.d.ts +0 -11
- package/dist/text/embedder.js +11 -22
- package/dist/text/index.d.ts +2 -2
- package/dist/text/index.js +2 -2
- package/dist/text/reranker.d.ts +0 -10
- package/dist/text/reranker.js +10 -33
- package/package.json +7 -3
- package/dist/factories/polymorphic-factory.d.ts +0 -50
- package/dist/factories/polymorphic-factory.js +0 -159
- package/dist/factories/text-factory.d.ts +0 -560
- package/dist/factories/text-factory.js +0 -968
|
@@ -0,0 +1,475 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Factory functions for creating text-specific search and ingestion instances
|
|
3
|
+
* Handles complex initialization logic while providing clean API for common use cases
|
|
4
|
+
*
|
|
5
|
+
* FACTORY PATTERN BENEFITS:
|
|
6
|
+
* - Abstracts complex initialization (model loading, database setup, index initialization)
|
|
7
|
+
* - Provides simple API for common use cases while preserving access to dependency injection
|
|
8
|
+
* - Clear validation and error handling without fallback mechanisms
|
|
9
|
+
* - Supports different embedding models and configurations
|
|
10
|
+
* - Enables clean separation between simple usage and advanced customization
|
|
11
|
+
*
|
|
12
|
+
* MODE SELECTION GUIDE:
|
|
13
|
+
* - Text Mode (default): Optimized for text-only content
|
|
14
|
+
* - Uses sentence-transformer models (fast, accurate for text)
|
|
15
|
+
* - Images converted to text descriptions
|
|
16
|
+
* - Best for: document search, text clustering, semantic similarity
|
|
17
|
+
*
|
|
18
|
+
* - Multimodal Mode: Optimized for mixed text/image content
|
|
19
|
+
* - Uses CLIP models (unified embedding space)
|
|
20
|
+
* - True cross-modal search (text finds images, images find text)
|
|
21
|
+
* - Best for: image search, visual QA, multimodal retrieval
|
|
22
|
+
*
|
|
23
|
+
* USAGE PATTERNS:
|
|
24
|
+
*
|
|
25
|
+
* 1. Mode Selection:
|
|
26
|
+
* ```typescript
|
|
27
|
+
* // Text mode (default) - optimized for text-only content
|
|
28
|
+
* const textIngestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
|
|
29
|
+
* mode: 'text',
|
|
30
|
+
* embeddingModel: 'sentence-transformers/all-MiniLM-L6-v2'
|
|
31
|
+
* });
|
|
32
|
+
*
|
|
33
|
+
* // Multimodal mode - enables cross-modal search
|
|
34
|
+
* const multimodalIngestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
|
|
35
|
+
* mode: 'multimodal',
|
|
36
|
+
* embeddingModel: 'Xenova/clip-vit-base-patch32',
|
|
37
|
+
* rerankingStrategy: 'text-derived'
|
|
38
|
+
* });
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
import { IngestionPipeline } from '../core/ingestion.js';
|
|
42
|
+
import { IndexManager } from '../index-manager.js';
|
|
43
|
+
import { openDatabase } from '../core/db.js';
|
|
44
|
+
import { createTextEmbedFunction } from '../text/embedder.js';
|
|
45
|
+
import { config, getModelDefaults } from '../core/config.js';
|
|
46
|
+
import { existsSync } from 'fs';
|
|
47
|
+
import { dirname } from 'path';
|
|
48
|
+
import { mkdirSync } from 'fs';
|
|
49
|
+
import { ContentManager } from '../core/content-manager.js';
|
|
50
|
+
import { validateModeModelCompatibilityOrThrow } from '../core/mode-model-validator.js';
|
|
51
|
+
import { createInvalidPathError, createFactoryCreationError, createModeMismatchError } from '../core/actionable-error-messages.js';
|
|
52
|
+
/**
|
|
53
|
+
* Factory for creating text-based IngestionPipeline instances
|
|
54
|
+
* Handles model loading, database initialization, and index setup
|
|
55
|
+
*
|
|
56
|
+
* This factory abstracts the complex initialization process required for text ingestion:
|
|
57
|
+
* 1. Creates necessary directories if they don't exist
|
|
58
|
+
* 2. Validates mode-model compatibility (no fallback mechanisms)
|
|
59
|
+
* 3. Loads and validates embedding models with clear error reporting
|
|
60
|
+
* 4. Establishes database connections and initializes schema
|
|
61
|
+
* 5. Stores mode configuration in database for automatic detection
|
|
62
|
+
* 6. Creates or loads vector indexes with proper configuration
|
|
63
|
+
* 7. Creates IngestionPipeline with proper dependency injection
|
|
64
|
+
*
|
|
65
|
+
* Mode Configuration:
|
|
66
|
+
* - Text Mode (default): Uses sentence-transformer models for text-only content
|
|
67
|
+
* - Multimodal Mode: Uses CLIP models for mixed text/image content
|
|
68
|
+
* - Mode is stored in database and auto-detected during search
|
|
69
|
+
* - Clear validation prevents mode-model mismatches
|
|
70
|
+
*
|
|
71
|
+
* @example
|
|
72
|
+
* ```typescript
|
|
73
|
+
* // Basic usage
|
|
74
|
+
* const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
|
|
75
|
+
* await ingestion.ingestDirectory('./documents');
|
|
76
|
+
*
|
|
77
|
+
* // With custom configuration
|
|
78
|
+
* const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
|
|
79
|
+
* embeddingModel: 'all-MiniLM-L6-v2',
|
|
80
|
+
* chunkSize: 512,
|
|
81
|
+
* chunkOverlap: 50,
|
|
82
|
+
* forceRebuild: true
|
|
83
|
+
* });
|
|
84
|
+
*
|
|
85
|
+
* // With defaults
|
|
86
|
+
* const ingestion = await IngestionFactory.createWithDefaults({
|
|
87
|
+
* batchSize: 32 // Faster processing
|
|
88
|
+
* });
|
|
89
|
+
* ```
|
|
90
|
+
*/
|
|
91
|
+
export class IngestionFactory {
|
|
92
|
+
/**
|
|
93
|
+
* Create an IngestionPipeline configured for text ingestion
|
|
94
|
+
*
|
|
95
|
+
* This method handles the complete initialization process:
|
|
96
|
+
* - Creates necessary directories if they don't exist
|
|
97
|
+
* - Loads text embedding model (with lazy initialization)
|
|
98
|
+
* - Opens database connection and initializes schema
|
|
99
|
+
* - Creates or loads vector index (with force rebuild option)
|
|
100
|
+
* - Creates IngestionPipeline with dependency injection
|
|
101
|
+
* - Validates the complete setup
|
|
102
|
+
*
|
|
103
|
+
* @param dbPath - Path to the SQLite database file (will be created if doesn't exist)
|
|
104
|
+
* @param indexPath - Path to the vector index file (will be created if doesn't exist)
|
|
105
|
+
* @param options - Optional configuration overrides
|
|
106
|
+
* @param options.embeddingModel - Override embedding model (default: from config)
|
|
107
|
+
* @param options.batchSize - Override embedding batch size (default: from config)
|
|
108
|
+
* @param options.chunkSize - Override chunk size (default: from config)
|
|
109
|
+
* @param options.chunkOverlap - Override chunk overlap (default: from config)
|
|
110
|
+
* @param options.forceRebuild - Force rebuild of existing index (default: false)
|
|
111
|
+
* @param options.contentSystemConfig - Content system configuration options
|
|
112
|
+
* @param options.contentSystemConfig.contentDir - Content directory path (default: '.raglite/content')
|
|
113
|
+
* @param options.contentSystemConfig.maxFileSize - Maximum file size in bytes (default: 50MB)
|
|
114
|
+
* @param options.contentSystemConfig.maxContentDirSize - Maximum content directory size (default: 2GB)
|
|
115
|
+
* @param options.contentSystemConfig.enableDeduplication - Enable content deduplication (default: true)
|
|
116
|
+
* @param options.contentSystemConfig.enableStorageTracking - Enable storage tracking (default: true)
|
|
117
|
+
* @returns Promise resolving to configured IngestionPipeline
|
|
118
|
+
* @throws {Error} If initialization fails
|
|
119
|
+
*
|
|
120
|
+
* @example
|
|
121
|
+
* ```typescript
|
|
122
|
+
* // Create ingestion pipeline with default content system
|
|
123
|
+
* const ingestion = await IngestionFactory.create('./my-db.sqlite', './my-index.bin');
|
|
124
|
+
*
|
|
125
|
+
* // Create with custom content system configuration
|
|
126
|
+
* const ingestion = await IngestionFactory.create('./my-db.sqlite', './my-index.bin', {
|
|
127
|
+
* contentSystemConfig: {
|
|
128
|
+
* contentDir: './custom-content',
|
|
129
|
+
* maxFileSize: 100 * 1024 * 1024, // 100MB
|
|
130
|
+
* maxContentDirSize: 5 * 1024 * 1024 * 1024, // 5GB
|
|
131
|
+
* enableDeduplication: true
|
|
132
|
+
* }
|
|
133
|
+
* });
|
|
134
|
+
*
|
|
135
|
+
* // Ingest documents from directory
|
|
136
|
+
* const result = await ingestion.ingestDirectory('./documents');
|
|
137
|
+
* console.log(`Processed ${result.documentsProcessed} documents`);
|
|
138
|
+
*
|
|
139
|
+
* // Ingest content from memory (MCP integration)
|
|
140
|
+
* const contentId = await ingestion.ingestFromMemory(buffer, {
|
|
141
|
+
* displayName: 'uploaded-file.pdf',
|
|
142
|
+
* contentType: 'application/pdf'
|
|
143
|
+
* });
|
|
144
|
+
*
|
|
145
|
+
* // Clean up when done
|
|
146
|
+
* await ingestion.cleanup();
|
|
147
|
+
* ```
|
|
148
|
+
*/
|
|
149
|
+
static async create(dbPath, indexPath, options = {}) {
|
|
150
|
+
try {
|
|
151
|
+
console.log('🏭 IngestionFactory: Initializing text ingestion pipeline...');
|
|
152
|
+
// Validate input paths
|
|
153
|
+
if (!dbPath || !indexPath) {
|
|
154
|
+
throw createInvalidPathError([
|
|
155
|
+
{ name: 'dbPath', value: dbPath },
|
|
156
|
+
{ name: 'indexPath', value: indexPath }
|
|
157
|
+
], { operationContext: 'IngestionFactory.create' });
|
|
158
|
+
}
|
|
159
|
+
// Ensure directories exist
|
|
160
|
+
const dbDir = dirname(dbPath);
|
|
161
|
+
const indexDir = dirname(indexPath);
|
|
162
|
+
if (!existsSync(dbDir)) {
|
|
163
|
+
console.log(`📁 Creating database directory: ${dbDir}`);
|
|
164
|
+
mkdirSync(dbDir, { recursive: true });
|
|
165
|
+
}
|
|
166
|
+
if (!existsSync(indexDir)) {
|
|
167
|
+
console.log(`📁 Creating index directory: ${indexDir}`);
|
|
168
|
+
mkdirSync(indexDir, { recursive: true });
|
|
169
|
+
}
|
|
170
|
+
// Step 1: Determine effective mode and select appropriate default model
|
|
171
|
+
const effectiveMode = options.mode || 'text';
|
|
172
|
+
// Step 1.5: Select model based on mode if not explicitly provided
|
|
173
|
+
let effectiveModel;
|
|
174
|
+
if (options.embeddingModel) {
|
|
175
|
+
// Use explicitly provided model
|
|
176
|
+
effectiveModel = options.embeddingModel;
|
|
177
|
+
}
|
|
178
|
+
else {
|
|
179
|
+
// Select default model based on mode
|
|
180
|
+
if (effectiveMode === 'multimodal') {
|
|
181
|
+
const { DEFAULT_MODELS } = await import('../core/model-registry.js');
|
|
182
|
+
effectiveModel = DEFAULT_MODELS['clip'];
|
|
183
|
+
console.log(`📊 No model specified for multimodal mode, using default: ${effectiveModel}`);
|
|
184
|
+
}
|
|
185
|
+
else {
|
|
186
|
+
effectiveModel = config.embedding_model;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
// Step 2: Get model-specific defaults and merge with options
|
|
190
|
+
const modelDefaults = getModelDefaults(effectiveModel);
|
|
191
|
+
const effectiveBatchSize = options.batchSize ?? modelDefaults.batch_size;
|
|
192
|
+
const effectiveChunkSize = options.chunkSize ?? modelDefaults.chunk_size;
|
|
193
|
+
const effectiveChunkOverlap = options.chunkOverlap ?? modelDefaults.chunk_overlap;
|
|
194
|
+
// Step 3: Validate mode-model compatibility at creation time
|
|
195
|
+
console.log('🔍 Validating mode-model compatibility...');
|
|
196
|
+
validateModeModelCompatibilityOrThrow(effectiveMode, effectiveModel);
|
|
197
|
+
console.log('✓ Mode-model compatibility validated');
|
|
198
|
+
// Step 4: Initialize embedding function based on mode
|
|
199
|
+
let embedFn;
|
|
200
|
+
if (effectiveMode === 'multimodal') {
|
|
201
|
+
console.log('📊 Loading CLIP embedding model for multimodal mode...');
|
|
202
|
+
const { createEmbedder } = await import('../core/embedder-factory.js');
|
|
203
|
+
const clipEmbedder = await createEmbedder(effectiveModel);
|
|
204
|
+
// Wrap CLIP embedder to match EmbedFunction signature
|
|
205
|
+
embedFn = async (content, contentType) => {
|
|
206
|
+
if (contentType === 'image') {
|
|
207
|
+
// Use CLIP image embedding for image content
|
|
208
|
+
return await clipEmbedder.embedImage(content);
|
|
209
|
+
}
|
|
210
|
+
// Use CLIP text embedding for text content
|
|
211
|
+
return await clipEmbedder.embedText(content);
|
|
212
|
+
};
|
|
213
|
+
console.log('✓ CLIP embedder created for multimodal mode');
|
|
214
|
+
}
|
|
215
|
+
else {
|
|
216
|
+
// Text mode: use sentence-transformer embedder (existing behavior)
|
|
217
|
+
console.log('📊 Loading text embedding model...');
|
|
218
|
+
embedFn = createTextEmbedFunction(options.embeddingModel, effectiveBatchSize);
|
|
219
|
+
console.log('✓ Text embedding function created successfully');
|
|
220
|
+
}
|
|
221
|
+
// Step 3: Initialize database connection
|
|
222
|
+
console.log('💾 Opening database connection...');
|
|
223
|
+
const db = await openDatabase(dbPath);
|
|
224
|
+
// Initialize database schema if needed
|
|
225
|
+
const { initializeSchema } = await import('../core/db.js');
|
|
226
|
+
await initializeSchema(db);
|
|
227
|
+
console.log('✓ Database connection established');
|
|
228
|
+
// Step 3.1: Handle mode storage during ingestion
|
|
229
|
+
await this.handleModeStorage(db, options, modelDefaults, effectiveModel);
|
|
230
|
+
// Step 5: Initialize index manager
|
|
231
|
+
console.log('📇 Initializing vector index...');
|
|
232
|
+
const indexManager = new IndexManager(indexPath, dbPath, modelDefaults.dimensions, effectiveModel);
|
|
233
|
+
// Check if we need to force recreation due to model change
|
|
234
|
+
let forceRecreate = false;
|
|
235
|
+
if (options.forceRebuild && existsSync(indexPath) && existsSync(dbPath)) {
|
|
236
|
+
// When forceRebuild is true, always force recreation to handle any model/dimension mismatches
|
|
237
|
+
forceRecreate = true;
|
|
238
|
+
// Check if model has changed during rebuild for logging purposes
|
|
239
|
+
const { getSystemInfo } = await import('../core/db.js');
|
|
240
|
+
const tempDb = await openDatabase(dbPath);
|
|
241
|
+
try {
|
|
242
|
+
const systemInfo = await getSystemInfo(tempDb);
|
|
243
|
+
if (systemInfo && systemInfo.modelName && systemInfo.modelName !== effectiveModel) {
|
|
244
|
+
console.log(`🔄 Model change detected: ${systemInfo.modelName} → ${effectiveModel}`);
|
|
245
|
+
console.log(`🔄 Dimensions change: ${systemInfo.modelDimensions} → ${modelDefaults.dimensions}`);
|
|
246
|
+
}
|
|
247
|
+
else if (systemInfo && systemInfo.modelDimensions && systemInfo.modelDimensions !== modelDefaults.dimensions) {
|
|
248
|
+
console.log(`🔄 Dimension mismatch detected: ${systemInfo.modelDimensions} → ${modelDefaults.dimensions}`);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
finally {
|
|
252
|
+
await tempDb.close();
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
// Handle force rebuild or create new index
|
|
256
|
+
if (options.forceRebuild || !existsSync(indexPath)) {
|
|
257
|
+
if (options.forceRebuild && existsSync(indexPath)) {
|
|
258
|
+
console.log('🔄 Force rebuild requested, recreating index...');
|
|
259
|
+
}
|
|
260
|
+
else {
|
|
261
|
+
console.log('📇 Creating new vector index...');
|
|
262
|
+
}
|
|
263
|
+
// Initialize with skipModelCheck and forceRecreate for rebuilds
|
|
264
|
+
await indexManager.initialize(options.forceRebuild, forceRecreate);
|
|
265
|
+
// Update stored model info when rebuilding or creating new index
|
|
266
|
+
if (options.forceRebuild || forceRecreate) {
|
|
267
|
+
const { setSystemInfo } = await import('../core/db.js');
|
|
268
|
+
await setSystemInfo(db, {
|
|
269
|
+
modelName: effectiveModel,
|
|
270
|
+
modelDimensions: modelDefaults.dimensions
|
|
271
|
+
});
|
|
272
|
+
console.log(`✓ Updated stored model info: ${effectiveModel} (${modelDefaults.dimensions} dimensions)`);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
else {
|
|
276
|
+
// Load existing index
|
|
277
|
+
await indexManager.initialize();
|
|
278
|
+
}
|
|
279
|
+
console.log('✓ Vector index ready');
|
|
280
|
+
// Step 5: Create ContentManager for unified content system
|
|
281
|
+
console.log('📁 Initializing content management system...');
|
|
282
|
+
const contentSystemConfig = await this.validateAndPrepareContentSystemConfig(options.contentSystemConfig);
|
|
283
|
+
const contentManager = new ContentManager(db, contentSystemConfig);
|
|
284
|
+
console.log('✓ Content management system ready');
|
|
285
|
+
// Step 6: Create IngestionPipeline with dependency injection and chunk configuration
|
|
286
|
+
const chunkConfig = {
|
|
287
|
+
chunkSize: effectiveChunkSize,
|
|
288
|
+
chunkOverlap: effectiveChunkOverlap
|
|
289
|
+
};
|
|
290
|
+
const ingestionPipeline = new IngestionPipeline(embedFn, indexManager, db, chunkConfig, contentManager);
|
|
291
|
+
console.log('🎉 IngestionFactory: Ingestion pipeline initialized successfully');
|
|
292
|
+
return ingestionPipeline;
|
|
293
|
+
}
|
|
294
|
+
catch (error) {
|
|
295
|
+
console.error('❌ IngestionFactory: Failed to create ingestion pipeline');
|
|
296
|
+
// Preserve custom error messages for model mismatch and mode mismatch
|
|
297
|
+
if (error instanceof Error && (error.message.includes('Model mismatch') ||
|
|
298
|
+
error.message.includes('Mode mismatch') ||
|
|
299
|
+
error.message.includes('--force-rebuild') ||
|
|
300
|
+
error.message.includes('--rebuild-if-needed'))) {
|
|
301
|
+
throw error; // Re-throw custom validation errors as-is
|
|
302
|
+
}
|
|
303
|
+
throw createFactoryCreationError('IngestionFactory', error instanceof Error ? error.message : 'Unknown error', { operationContext: 'ingestion pipeline creation' });
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
/**
|
|
307
|
+
* Create an IngestionPipeline with automatic path resolution
|
|
308
|
+
* Uses default paths based on current working directory
|
|
309
|
+
* @param options - Optional configuration overrides
|
|
310
|
+
* @returns Promise resolving to configured IngestionPipeline
|
|
311
|
+
*/
|
|
312
|
+
static async createWithDefaults(options = {}) {
|
|
313
|
+
const dbPath = config.db_file || './database.sqlite';
|
|
314
|
+
const indexPath = config.index_file || './index.bin';
|
|
315
|
+
return this.create(dbPath, indexPath, options);
|
|
316
|
+
}
|
|
317
|
+
/**
|
|
318
|
+
* Handles mode storage during ingestion
|
|
319
|
+
* Creates or validates system info based on the provided mode and options
|
|
320
|
+
* @private
|
|
321
|
+
*/
|
|
322
|
+
static async handleModeStorage(db, options, modelDefaults, effectiveModel) {
|
|
323
|
+
const { getSystemInfo, setSystemInfo } = await import('../core/db.js');
|
|
324
|
+
// Determine the effective mode and reranking strategy
|
|
325
|
+
const effectiveMode = options.mode || 'text';
|
|
326
|
+
const effectiveRerankingStrategy = options.rerankingStrategy || 'cross-encoder';
|
|
327
|
+
// Determine model type based on model name
|
|
328
|
+
let modelType;
|
|
329
|
+
if (effectiveModel.includes('clip')) {
|
|
330
|
+
modelType = 'clip';
|
|
331
|
+
}
|
|
332
|
+
else {
|
|
333
|
+
modelType = 'sentence-transformer';
|
|
334
|
+
}
|
|
335
|
+
// Determine supported content types based on mode
|
|
336
|
+
const supportedContentTypes = effectiveMode === 'multimodal' ? ['text', 'image'] : ['text'];
|
|
337
|
+
try {
|
|
338
|
+
// Check if system info already exists
|
|
339
|
+
const existingSystemInfo = await getSystemInfo(db);
|
|
340
|
+
if (existingSystemInfo) {
|
|
341
|
+
// Validate mode consistency for subsequent ingestions
|
|
342
|
+
if (existingSystemInfo.mode !== effectiveMode) {
|
|
343
|
+
console.warn(`⚠️ Mode mismatch detected!`);
|
|
344
|
+
console.warn(` Database mode: ${existingSystemInfo.mode}`);
|
|
345
|
+
console.warn(` Requested mode: ${effectiveMode}`);
|
|
346
|
+
if (options.forceRebuild) {
|
|
347
|
+
console.log('🔄 Force rebuild enabled, updating mode configuration...');
|
|
348
|
+
await this.updateSystemInfo(db, effectiveMode, effectiveModel, modelType, modelDefaults, effectiveRerankingStrategy, supportedContentTypes);
|
|
349
|
+
}
|
|
350
|
+
else {
|
|
351
|
+
throw createModeMismatchError(existingSystemInfo.mode, effectiveMode, { operationContext: 'IngestionFactory.create' });
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
else if (existingSystemInfo.modelName !== effectiveModel) {
|
|
355
|
+
// Model change within the same mode
|
|
356
|
+
console.log(`🔄 Model change detected: ${existingSystemInfo.modelName} → ${effectiveModel}`);
|
|
357
|
+
if (options.forceRebuild) {
|
|
358
|
+
console.log('🔄 Force rebuild enabled, updating model configuration...');
|
|
359
|
+
await this.updateSystemInfo(db, effectiveMode, effectiveModel, modelType, modelDefaults, effectiveRerankingStrategy, supportedContentTypes);
|
|
360
|
+
}
|
|
361
|
+
else {
|
|
362
|
+
// Create a specific error message for model mismatch with rebuild suggestions
|
|
363
|
+
const errorMessage = [
|
|
364
|
+
`❌ Model mismatch: Database is configured for '${existingSystemInfo.modelName}', but '${effectiveModel}' was requested.`,
|
|
365
|
+
'',
|
|
366
|
+
'🛠️ How to fix this:',
|
|
367
|
+
' 1. Use --force-rebuild to change models:',
|
|
368
|
+
' raglite ingest <path> --model ' + effectiveModel + ' --force-rebuild',
|
|
369
|
+
'',
|
|
370
|
+
' 2. Or use --rebuild-if-needed for automatic handling:',
|
|
371
|
+
' raglite ingest <path> --model ' + effectiveModel + ' --rebuild-if-needed',
|
|
372
|
+
'',
|
|
373
|
+
' 3. Or continue using the existing model:',
|
|
374
|
+
' raglite ingest <path> # Uses ' + existingSystemInfo.modelName,
|
|
375
|
+
'',
|
|
376
|
+
'🔍 Model switching requires rebuilding the vector index because different models',
|
|
377
|
+
' produce embeddings with different dimensions and characteristics.'
|
|
378
|
+
].join('\n');
|
|
379
|
+
throw new Error(errorMessage);
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
else {
|
|
383
|
+
console.log(`✅ Mode consistency validated: ${effectiveMode} mode with ${effectiveModel}`);
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
else {
|
|
387
|
+
// First ingestion - create system info
|
|
388
|
+
console.log(`🔧 First ingestion detected, storing system configuration...`);
|
|
389
|
+
console.log(` Mode: ${effectiveMode}`);
|
|
390
|
+
console.log(` Model: ${effectiveModel} (${modelType})`);
|
|
391
|
+
console.log(` Dimensions: ${modelDefaults.dimensions}`);
|
|
392
|
+
console.log(` Reranking: ${effectiveRerankingStrategy}`);
|
|
393
|
+
console.log(` Content types: ${supportedContentTypes.join(', ')}`);
|
|
394
|
+
await this.updateSystemInfo(db, effectiveMode, effectiveModel, modelType, modelDefaults, effectiveRerankingStrategy, supportedContentTypes);
|
|
395
|
+
console.log('✅ System configuration stored successfully');
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
catch (error) {
|
|
399
|
+
if (error instanceof Error && (error.message.includes('Mode mismatch') || error.message.includes('Model mismatch'))) {
|
|
400
|
+
throw error; // Re-throw validation errors with custom messages
|
|
401
|
+
}
|
|
402
|
+
console.error('❌ Failed to handle mode storage:', error);
|
|
403
|
+
throw new Error(`Mode storage failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
/**
|
|
407
|
+
* Updates system info in the database
|
|
408
|
+
* @private
|
|
409
|
+
*/
|
|
410
|
+
static async updateSystemInfo(db, mode, modelName, modelType, modelDefaults, rerankingStrategy, supportedContentTypes) {
|
|
411
|
+
const { setSystemInfo } = await import('../core/db.js');
|
|
412
|
+
await setSystemInfo(db, {
|
|
413
|
+
mode,
|
|
414
|
+
modelName,
|
|
415
|
+
modelType,
|
|
416
|
+
modelDimensions: modelDefaults.dimensions,
|
|
417
|
+
modelVersion: '1.0.0', // TODO: Get actual version from model
|
|
418
|
+
supportedContentTypes,
|
|
419
|
+
rerankingStrategy: rerankingStrategy,
|
|
420
|
+
rerankingModel: undefined,
|
|
421
|
+
rerankingConfig: undefined
|
|
422
|
+
});
|
|
423
|
+
}
|
|
424
|
+
/**
|
|
425
|
+
* Validates and prepares content system configuration
|
|
426
|
+
* @private
|
|
427
|
+
*/
|
|
428
|
+
static async validateAndPrepareContentSystemConfig(userConfig) {
|
|
429
|
+
// Default configuration
|
|
430
|
+
const defaultConfig = {
|
|
431
|
+
contentDir: '.raglite/content',
|
|
432
|
+
maxFileSize: 50 * 1024 * 1024, // 50MB
|
|
433
|
+
maxContentDirSize: 2 * 1024 * 1024 * 1024, // 2GB
|
|
434
|
+
enableDeduplication: true,
|
|
435
|
+
enableStorageTracking: true
|
|
436
|
+
};
|
|
437
|
+
// Merge with user configuration
|
|
438
|
+
const config = { ...defaultConfig, ...userConfig };
|
|
439
|
+
// Validate content directory path
|
|
440
|
+
if (!config.contentDir || typeof config.contentDir !== 'string') {
|
|
441
|
+
throw new Error('Content directory path must be a non-empty string');
|
|
442
|
+
}
|
|
443
|
+
// Validate file size limits
|
|
444
|
+
if (config.maxFileSize && (typeof config.maxFileSize !== 'number' || config.maxFileSize <= 0)) {
|
|
445
|
+
throw new Error('Maximum file size must be a positive number');
|
|
446
|
+
}
|
|
447
|
+
if (config.maxContentDirSize && (typeof config.maxContentDirSize !== 'number' || config.maxContentDirSize <= 0)) {
|
|
448
|
+
throw new Error('Maximum content directory size must be a positive number');
|
|
449
|
+
}
|
|
450
|
+
// Validate that maxFileSize is not larger than maxContentDirSize
|
|
451
|
+
if (config.maxFileSize && config.maxContentDirSize && config.maxFileSize > config.maxContentDirSize) {
|
|
452
|
+
throw new Error('Maximum file size cannot be larger than maximum content directory size');
|
|
453
|
+
}
|
|
454
|
+
// Validate boolean options
|
|
455
|
+
if (config.enableDeduplication !== undefined && typeof config.enableDeduplication !== 'boolean') {
|
|
456
|
+
throw new Error('enableDeduplication must be a boolean value');
|
|
457
|
+
}
|
|
458
|
+
if (config.enableStorageTracking !== undefined && typeof config.enableStorageTracking !== 'boolean') {
|
|
459
|
+
throw new Error('enableStorageTracking must be a boolean value');
|
|
460
|
+
}
|
|
461
|
+
// Create content directory if it doesn't exist
|
|
462
|
+
try {
|
|
463
|
+
const { promises: fs } = await import('fs');
|
|
464
|
+
await fs.mkdir(config.contentDir, { recursive: true });
|
|
465
|
+
// Verify directory is writable
|
|
466
|
+
await fs.access(config.contentDir, (await import('fs')).constants.W_OK);
|
|
467
|
+
console.log(`✓ Content directory validated: ${config.contentDir}`);
|
|
468
|
+
}
|
|
469
|
+
catch (error) {
|
|
470
|
+
throw new Error(`Failed to create or access content directory '${config.contentDir}': ${error instanceof Error ? error.message : 'Unknown error'}. Please check permissions and path validity.`);
|
|
471
|
+
}
|
|
472
|
+
return config;
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
//# sourceMappingURL=ingestion-factory.js.map
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
* automatically detected during search - no manual configuration needed.
|
|
13
13
|
*/
|
|
14
14
|
import '../dom-polyfills.js';
|
|
15
|
-
import { SearchEngine } from '
|
|
15
|
+
import { SearchEngine } from '../core/search.js';
|
|
16
16
|
import type { SystemInfo, ModeType } from '../types.js';
|
|
17
17
|
/**
|
|
18
18
|
* Factory for creating search engines with automatic mode detection
|
|
@@ -33,7 +33,7 @@ import type { SystemInfo, ModeType } from '../types.js';
|
|
|
33
33
|
* - True cross-modal search capabilities
|
|
34
34
|
* - Text queries find images, image queries find text
|
|
35
35
|
*/
|
|
36
|
-
export declare class
|
|
36
|
+
export declare class SearchFactory {
|
|
37
37
|
/**
|
|
38
38
|
* Create a SearchEngine with automatic mode detection and configuration
|
|
39
39
|
*
|
|
@@ -66,7 +66,7 @@ export declare class PolymorphicSearchFactory {
|
|
|
66
66
|
* @example
|
|
67
67
|
* ```typescript
|
|
68
68
|
* // Automatic mode detection and engine creation
|
|
69
|
-
* const search = await
|
|
69
|
+
* const search = await SearchFactory.create('./index.bin', './db.sqlite');
|
|
70
70
|
*
|
|
71
71
|
* // Search works based on detected mode:
|
|
72
72
|
* // Text mode: fast text similarity search
|
|
@@ -110,7 +110,7 @@ export declare class PolymorphicSearchFactory {
|
|
|
110
110
|
}
|
|
111
111
|
/**
|
|
112
112
|
* Quick function to create a search engine with automatic mode detection
|
|
113
|
-
* Convenience wrapper around
|
|
113
|
+
* Convenience wrapper around SearchFactory.create
|
|
114
114
|
*
|
|
115
115
|
* @param indexPath - Path to the vector index file
|
|
116
116
|
* @param dbPath - Path to the database file
|
|
@@ -118,11 +118,11 @@ export declare class PolymorphicSearchFactory {
|
|
|
118
118
|
*
|
|
119
119
|
* @example
|
|
120
120
|
* ```typescript
|
|
121
|
-
* const search = await
|
|
121
|
+
* const search = await createSearchEngine('./index.bin', './db.sqlite');
|
|
122
122
|
* const results = await search.search('query');
|
|
123
123
|
* ```
|
|
124
124
|
*/
|
|
125
|
-
export declare function
|
|
125
|
+
export declare function createSearchEngine(indexPath: string, dbPath: string): Promise<SearchEngine>;
|
|
126
126
|
/**
|
|
127
127
|
* Check what mode a database is configured for
|
|
128
128
|
* Convenience function for inspecting database configuration
|
|
@@ -151,4 +151,4 @@ export declare function detectSearchEngineMode(dbPath: string): Promise<ModeType
|
|
|
151
151
|
* ```
|
|
152
152
|
*/
|
|
153
153
|
export declare function getSearchEngineInfo(dbPath: string): Promise<SystemInfo>;
|
|
154
|
-
//# sourceMappingURL=
|
|
154
|
+
//# sourceMappingURL=search-factory.d.ts.map
|
|
@@ -13,15 +13,15 @@
|
|
|
13
13
|
*/
|
|
14
14
|
// Ensure DOM polyfills are set up before any transformers.js usage
|
|
15
15
|
import '../dom-polyfills.js';
|
|
16
|
-
import { SearchEngine } from '
|
|
17
|
-
import { ModeDetectionService } from '
|
|
16
|
+
import { SearchEngine } from '../core/search.js';
|
|
17
|
+
import { ModeDetectionService } from '../core/mode-detection-service.js';
|
|
18
18
|
import { IndexManager } from '../index-manager.js';
|
|
19
|
-
import { DatabaseConnectionManager } from '
|
|
20
|
-
import { createEmbedder } from '
|
|
21
|
-
import { ContentResolver } from '
|
|
22
|
-
import { validateModeModelCompatibilityOrThrow } from '
|
|
23
|
-
import { createMissingFileError } from '
|
|
24
|
-
import { handleError, ErrorCategory, ErrorSeverity, createError } from '
|
|
19
|
+
import { DatabaseConnectionManager } from '../core/database-connection-manager.js';
|
|
20
|
+
import { createEmbedder } from '../core/embedder-factory.js';
|
|
21
|
+
import { ContentResolver } from '../core/content-resolver.js';
|
|
22
|
+
import { validateModeModelCompatibilityOrThrow } from '../core/mode-model-validator.js';
|
|
23
|
+
import { createMissingFileError } from '../core/actionable-error-messages.js';
|
|
24
|
+
import { handleError, ErrorCategory, ErrorSeverity, createError } from '../core/error-handler.js';
|
|
25
25
|
import { existsSync } from 'fs';
|
|
26
26
|
// =============================================================================
|
|
27
27
|
// POLYMORPHIC SEARCH FACTORY
|
|
@@ -45,7 +45,7 @@ import { existsSync } from 'fs';
|
|
|
45
45
|
* - True cross-modal search capabilities
|
|
46
46
|
* - Text queries find images, image queries find text
|
|
47
47
|
*/
|
|
48
|
-
export class
|
|
48
|
+
export class SearchFactory {
|
|
49
49
|
/**
|
|
50
50
|
* Create a SearchEngine with automatic mode detection and configuration
|
|
51
51
|
*
|
|
@@ -78,7 +78,7 @@ export class PolymorphicSearchFactory {
|
|
|
78
78
|
* @example
|
|
79
79
|
* ```typescript
|
|
80
80
|
* // Automatic mode detection and engine creation
|
|
81
|
-
* const search = await
|
|
81
|
+
* const search = await SearchFactory.create('./index.bin', './db.sqlite');
|
|
82
82
|
*
|
|
83
83
|
* // Search works based on detected mode:
|
|
84
84
|
* // Text mode: fast text similarity search
|
|
@@ -90,7 +90,7 @@ export class PolymorphicSearchFactory {
|
|
|
90
90
|
*/
|
|
91
91
|
static async create(indexPath, dbPath) {
|
|
92
92
|
try {
|
|
93
|
-
console.log('🎭
|
|
93
|
+
console.log('🎭 SearchFactory: Initializing search engine with mode detection...');
|
|
94
94
|
// Step 1: Validate input paths
|
|
95
95
|
if (!indexPath || !dbPath) {
|
|
96
96
|
throw createError.validation('Both indexPath and dbPath are required');
|
|
@@ -206,7 +206,7 @@ export class PolymorphicSearchFactory {
|
|
|
206
206
|
return undefined;
|
|
207
207
|
}
|
|
208
208
|
// Use lazy loading to avoid loading reranking dependencies unless needed
|
|
209
|
-
const { LazyRerankerLoader } = await import('
|
|
209
|
+
const { LazyRerankerLoader } = await import('../core/lazy-dependency-loader.js');
|
|
210
210
|
// For text mode, use cross-encoder reranking
|
|
211
211
|
if (strategy === 'cross-encoder') {
|
|
212
212
|
return LazyRerankerLoader.loadTextReranker();
|
|
@@ -228,7 +228,7 @@ export class PolymorphicSearchFactory {
|
|
|
228
228
|
return undefined;
|
|
229
229
|
}
|
|
230
230
|
// Use lazy loading to avoid loading multimodal dependencies unless needed
|
|
231
|
-
const { LazyDependencyManager } = await import('
|
|
231
|
+
const { LazyDependencyManager } = await import('../core/lazy-dependency-loader.js');
|
|
232
232
|
// Load the appropriate reranker based on strategy
|
|
233
233
|
return LazyDependencyManager.loadReranker(strategy);
|
|
234
234
|
}
|
|
@@ -246,12 +246,12 @@ export class PolymorphicSearchFactory {
|
|
|
246
246
|
static validateRequiredFiles(indexPath, dbPath) {
|
|
247
247
|
if (!existsSync(indexPath)) {
|
|
248
248
|
throw createMissingFileError(indexPath, 'index', {
|
|
249
|
-
operationContext: '
|
|
249
|
+
operationContext: 'SearchFactory.create'
|
|
250
250
|
});
|
|
251
251
|
}
|
|
252
252
|
if (!existsSync(dbPath)) {
|
|
253
253
|
throw createMissingFileError(dbPath, 'database', {
|
|
254
|
-
operationContext: '
|
|
254
|
+
operationContext: 'SearchFactory.create'
|
|
255
255
|
});
|
|
256
256
|
}
|
|
257
257
|
}
|
|
@@ -262,7 +262,7 @@ export class PolymorphicSearchFactory {
|
|
|
262
262
|
static enhanceCreationError(error, indexPath, dbPath) {
|
|
263
263
|
if (error instanceof Error) {
|
|
264
264
|
// Add context about the operation that failed
|
|
265
|
-
let enhancedMessage = `
|
|
265
|
+
let enhancedMessage = `SearchFactory.create failed: ${error.message}`;
|
|
266
266
|
// Provide specific guidance based on error type
|
|
267
267
|
if (error.message.includes('ENOENT')) {
|
|
268
268
|
enhancedMessage += '\n\n💡 Make sure both the vector index and database files exist.';
|
|
@@ -283,7 +283,7 @@ export class PolymorphicSearchFactory {
|
|
|
283
283
|
}
|
|
284
284
|
return new Error(enhancedMessage);
|
|
285
285
|
}
|
|
286
|
-
return new Error(`
|
|
286
|
+
return new Error(`SearchFactory.create failed: Unknown error`);
|
|
287
287
|
}
|
|
288
288
|
}
|
|
289
289
|
// =============================================================================
|
|
@@ -291,7 +291,7 @@ export class PolymorphicSearchFactory {
|
|
|
291
291
|
// =============================================================================
|
|
292
292
|
/**
|
|
293
293
|
* Quick function to create a search engine with automatic mode detection
|
|
294
|
-
* Convenience wrapper around
|
|
294
|
+
* Convenience wrapper around SearchFactory.create
|
|
295
295
|
*
|
|
296
296
|
* @param indexPath - Path to the vector index file
|
|
297
297
|
* @param dbPath - Path to the database file
|
|
@@ -299,12 +299,12 @@ export class PolymorphicSearchFactory {
|
|
|
299
299
|
*
|
|
300
300
|
* @example
|
|
301
301
|
* ```typescript
|
|
302
|
-
* const search = await
|
|
302
|
+
* const search = await createSearchEngine('./index.bin', './db.sqlite');
|
|
303
303
|
* const results = await search.search('query');
|
|
304
304
|
* ```
|
|
305
305
|
*/
|
|
306
|
-
export async function
|
|
307
|
-
return
|
|
306
|
+
export async function createSearchEngine(indexPath, dbPath) {
|
|
307
|
+
return SearchFactory.create(indexPath, dbPath);
|
|
308
308
|
}
|
|
309
309
|
/**
|
|
310
310
|
* Check what mode a database is configured for
|
|
@@ -341,4 +341,4 @@ export async function getSearchEngineInfo(dbPath) {
|
|
|
341
341
|
const modeService = new ModeDetectionService(dbPath);
|
|
342
342
|
return modeService.detectMode();
|
|
343
343
|
}
|
|
344
|
-
//# sourceMappingURL=
|
|
344
|
+
//# sourceMappingURL=search-factory.js.map
|