rag-lite-ts 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/dist/core/batch-processing-optimizer.js +6 -11
- package/dist/core/binary-index-format.d.ts +52 -0
- package/dist/core/binary-index-format.js +122 -0
- package/dist/core/ingestion.js +13 -3
- package/dist/core/model-registry.js +4 -4
- package/dist/core/reranking-strategies.d.ts +1 -16
- package/dist/core/reranking-strategies.js +12 -82
- package/dist/core/vector-index.d.ts +1 -1
- package/dist/core/vector-index.js +31 -32
- package/dist/dom-polyfills.js +3 -6
- package/dist/factories/index.d.ts +2 -0
- package/dist/factories/index.js +2 -0
- package/dist/factories/polymorphic-factory.d.ts +50 -0
- package/dist/factories/polymorphic-factory.js +159 -0
- package/dist/file-processor.js +30 -102
- package/dist/index.d.ts +23 -0
- package/dist/index.js +18 -0
- package/dist/ingestion.js +18 -3
- package/dist/multimodal/clip-embedder.d.ts +18 -5
- package/dist/multimodal/clip-embedder.js +73 -26
- package/dist/search.d.ts +34 -9
- package/dist/search.js +28 -10
- package/package.json +13 -4
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Polymorphic factory for creating mode-aware search engines
|
|
3
|
+
* Automatically detects mode from database and uses appropriate embedder
|
|
4
|
+
*
|
|
5
|
+
* This factory implements the Chameleon Architecture principle:
|
|
6
|
+
* - Detects mode (text/multimodal) from database configuration
|
|
7
|
+
* - Uses appropriate embedder based on detected mode
|
|
8
|
+
* - Provides seamless polymorphic behavior without user intervention
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```typescript
|
|
12
|
+
* // Automatically detects mode and creates appropriate search engine
|
|
13
|
+
* const search = await PolymorphicSearchFactory.create('./index.bin', './db.sqlite');
|
|
14
|
+
*
|
|
15
|
+
* // Works for both text and multimodal modes
|
|
16
|
+
* const results = await search.search('query');
|
|
17
|
+
* ```
|
|
18
|
+
*/
|
|
19
|
+
import { SearchEngine } from '../core/search.js';
|
|
20
|
+
export interface PolymorphicSearchOptions {
|
|
21
|
+
/** Whether to enable reranking (default: true) */
|
|
22
|
+
enableReranking?: boolean;
|
|
23
|
+
/** Top-k results to return (default: from config) */
|
|
24
|
+
topK?: number;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Factory for creating mode-aware search engines
|
|
28
|
+
* Automatically detects mode from database and uses appropriate embedder
|
|
29
|
+
*/
|
|
30
|
+
export declare class PolymorphicSearchFactory {
|
|
31
|
+
/**
|
|
32
|
+
* Create a SearchEngine that automatically adapts to the mode stored in the database
|
|
33
|
+
*
|
|
34
|
+
* This method:
|
|
35
|
+
* 1. Validates that required files exist
|
|
36
|
+
* 2. Opens database and reads system configuration
|
|
37
|
+
* 3. Detects mode (text/multimodal) from database
|
|
38
|
+
* 4. Creates appropriate embedder based on mode
|
|
39
|
+
* 5. Optionally creates reranker based on configuration
|
|
40
|
+
* 6. Returns fully configured SearchEngine
|
|
41
|
+
*
|
|
42
|
+
* @param indexPath - Path to the vector index file (must exist)
|
|
43
|
+
* @param dbPath - Path to the SQLite database file (must exist)
|
|
44
|
+
* @param options - Optional configuration overrides
|
|
45
|
+
* @returns Promise resolving to configured SearchEngine
|
|
46
|
+
* @throws {Error} If required files don't exist or initialization fails
|
|
47
|
+
*/
|
|
48
|
+
static create(indexPath: string, dbPath: string, options?: PolymorphicSearchOptions): Promise<SearchEngine>;
|
|
49
|
+
}
|
|
50
|
+
//# sourceMappingURL=polymorphic-factory.d.ts.map
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Polymorphic factory for creating mode-aware search engines
|
|
3
|
+
* Automatically detects mode from database and uses appropriate embedder
|
|
4
|
+
*
|
|
5
|
+
* This factory implements the Chameleon Architecture principle:
|
|
6
|
+
* - Detects mode (text/multimodal) from database configuration
|
|
7
|
+
* - Uses appropriate embedder based on detected mode
|
|
8
|
+
* - Provides seamless polymorphic behavior without user intervention
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```typescript
|
|
12
|
+
* // Automatically detects mode and creates appropriate search engine
|
|
13
|
+
* const search = await PolymorphicSearchFactory.create('./index.bin', './db.sqlite');
|
|
14
|
+
*
|
|
15
|
+
* // Works for both text and multimodal modes
|
|
16
|
+
* const results = await search.search('query');
|
|
17
|
+
* ```
|
|
18
|
+
*/
|
|
19
|
+
import { SearchEngine } from '../core/search.js';
|
|
20
|
+
import { IndexManager } from '../index-manager.js';
|
|
21
|
+
import { openDatabase, getSystemInfo } from '../core/db.js';
|
|
22
|
+
import { createTextEmbedFunction } from '../text/embedder.js';
|
|
23
|
+
import { createTextRerankFunction } from '../text/reranker.js';
|
|
24
|
+
import { config, getModelDefaults } from '../core/config.js';
|
|
25
|
+
import { existsSync } from 'fs';
|
|
26
|
+
import { createMissingFileError, createInvalidPathError, createFactoryCreationError } from '../core/actionable-error-messages.js';
|
|
27
|
+
/**
|
|
28
|
+
* Factory for creating mode-aware search engines
|
|
29
|
+
* Automatically detects mode from database and uses appropriate embedder
|
|
30
|
+
*/
|
|
31
|
+
export class PolymorphicSearchFactory {
|
|
32
|
+
/**
|
|
33
|
+
* Create a SearchEngine that automatically adapts to the mode stored in the database
|
|
34
|
+
*
|
|
35
|
+
* This method:
|
|
36
|
+
* 1. Validates that required files exist
|
|
37
|
+
* 2. Opens database and reads system configuration
|
|
38
|
+
* 3. Detects mode (text/multimodal) from database
|
|
39
|
+
* 4. Creates appropriate embedder based on mode
|
|
40
|
+
* 5. Optionally creates reranker based on configuration
|
|
41
|
+
* 6. Returns fully configured SearchEngine
|
|
42
|
+
*
|
|
43
|
+
* @param indexPath - Path to the vector index file (must exist)
|
|
44
|
+
* @param dbPath - Path to the SQLite database file (must exist)
|
|
45
|
+
* @param options - Optional configuration overrides
|
|
46
|
+
* @returns Promise resolving to configured SearchEngine
|
|
47
|
+
* @throws {Error} If required files don't exist or initialization fails
|
|
48
|
+
*/
|
|
49
|
+
static async create(indexPath, dbPath, options = {}) {
|
|
50
|
+
try {
|
|
51
|
+
console.log('🏭 PolymorphicSearchFactory: Initializing mode-aware search engine...');
|
|
52
|
+
// Validate input paths
|
|
53
|
+
if (!indexPath || !dbPath) {
|
|
54
|
+
throw createInvalidPathError([
|
|
55
|
+
{ name: 'indexPath', value: indexPath },
|
|
56
|
+
{ name: 'dbPath', value: dbPath }
|
|
57
|
+
], { operationContext: 'PolymorphicSearchFactory.create' });
|
|
58
|
+
}
|
|
59
|
+
// Check if required files exist
|
|
60
|
+
if (!existsSync(indexPath)) {
|
|
61
|
+
throw createMissingFileError(indexPath, 'index', {
|
|
62
|
+
operationContext: 'PolymorphicSearchFactory.create'
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
if (!existsSync(dbPath)) {
|
|
66
|
+
throw createMissingFileError(dbPath, 'database', {
|
|
67
|
+
operationContext: 'PolymorphicSearchFactory.create'
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
// Step 1: Open database and detect mode
|
|
71
|
+
console.log('💾 Opening database and detecting mode...');
|
|
72
|
+
const db = await openDatabase(dbPath);
|
|
73
|
+
let mode = 'text';
|
|
74
|
+
let embeddingModel;
|
|
75
|
+
let modelDimensions;
|
|
76
|
+
try {
|
|
77
|
+
const systemInfo = await getSystemInfo(db);
|
|
78
|
+
if (systemInfo) {
|
|
79
|
+
mode = systemInfo.mode;
|
|
80
|
+
embeddingModel = systemInfo.modelName;
|
|
81
|
+
modelDimensions = systemInfo.modelDimensions;
|
|
82
|
+
console.log(`📊 Detected mode: ${mode}`);
|
|
83
|
+
console.log(`📊 Detected model: ${embeddingModel} (${modelDimensions} dimensions)`);
|
|
84
|
+
}
|
|
85
|
+
else {
|
|
86
|
+
// Fallback to default if no system info
|
|
87
|
+
embeddingModel = config.embedding_model;
|
|
88
|
+
const modelDefaults = getModelDefaults(embeddingModel);
|
|
89
|
+
modelDimensions = modelDefaults.dimensions;
|
|
90
|
+
console.log(`📊 No system info found, using default: ${embeddingModel} (${modelDimensions} dimensions)`);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
catch (error) {
|
|
94
|
+
// If getSystemInfo fails, use defaults
|
|
95
|
+
embeddingModel = config.embedding_model;
|
|
96
|
+
const modelDefaults = getModelDefaults(embeddingModel);
|
|
97
|
+
modelDimensions = modelDefaults.dimensions;
|
|
98
|
+
console.log(`📊 Using default configuration: ${embeddingModel} (${modelDimensions} dimensions)`);
|
|
99
|
+
}
|
|
100
|
+
// Step 2: Create appropriate embedder based on mode
|
|
101
|
+
let embedFn;
|
|
102
|
+
if (mode === 'multimodal') {
|
|
103
|
+
console.log('📊 Loading CLIP embedder for multimodal mode...');
|
|
104
|
+
const { createEmbedder } = await import('../core/embedder-factory.js');
|
|
105
|
+
const clipEmbedder = await createEmbedder(embeddingModel);
|
|
106
|
+
// Wrap CLIP embedder to match EmbedFunction signature
|
|
107
|
+
embedFn = async (content, contentType) => {
|
|
108
|
+
if (contentType === 'image') {
|
|
109
|
+
return await clipEmbedder.embedImage(content);
|
|
110
|
+
}
|
|
111
|
+
return await clipEmbedder.embedText(content);
|
|
112
|
+
};
|
|
113
|
+
console.log('✓ CLIP embedder loaded for multimodal mode');
|
|
114
|
+
}
|
|
115
|
+
else {
|
|
116
|
+
console.log('📊 Loading text embedder for text mode...');
|
|
117
|
+
embedFn = createTextEmbedFunction(embeddingModel);
|
|
118
|
+
console.log('✓ Text embedder loaded');
|
|
119
|
+
}
|
|
120
|
+
// Step 3: Initialize reranking function (optional)
|
|
121
|
+
let rerankFn;
|
|
122
|
+
if (options.enableReranking === true) {
|
|
123
|
+
console.log('🔄 Loading reranking model...');
|
|
124
|
+
rerankFn = createTextRerankFunction();
|
|
125
|
+
await rerankFn('test query', []);
|
|
126
|
+
console.log('✓ Reranking model loaded successfully');
|
|
127
|
+
}
|
|
128
|
+
else {
|
|
129
|
+
console.log('🔄 Reranking disabled (local-first, fast mode)');
|
|
130
|
+
}
|
|
131
|
+
// Step 4: Initialize database schema
|
|
132
|
+
const { initializeSchema } = await import('../core/db.js');
|
|
133
|
+
await initializeSchema(db);
|
|
134
|
+
console.log('✓ Database connection established');
|
|
135
|
+
// Step 5: Initialize index manager
|
|
136
|
+
console.log('📇 Loading vector index...');
|
|
137
|
+
const indexManager = new IndexManager(indexPath, dbPath, modelDimensions, embeddingModel);
|
|
138
|
+
await indexManager.initialize();
|
|
139
|
+
console.log('✓ Vector index loaded successfully');
|
|
140
|
+
// Step 6: Create ContentResolver
|
|
141
|
+
console.log('📁 Initializing content resolver...');
|
|
142
|
+
const { ContentResolver } = await import('../core/content-resolver.js');
|
|
143
|
+
const contentResolver = new ContentResolver(db);
|
|
144
|
+
console.log('✓ Content resolver ready');
|
|
145
|
+
// Step 7: Create SearchEngine with dependency injection
|
|
146
|
+
const searchEngine = new SearchEngine(embedFn, indexManager, db, rerankFn, contentResolver);
|
|
147
|
+
// Step 8: Validate the setup
|
|
148
|
+
const stats = await searchEngine.getStats();
|
|
149
|
+
console.log(`✓ Search engine ready: ${stats.totalChunks} chunks indexed, mode: ${mode}, reranking ${stats.rerankingEnabled ? 'enabled' : 'disabled'}`);
|
|
150
|
+
console.log('🎉 PolymorphicSearchFactory: Mode-aware search engine initialized successfully');
|
|
151
|
+
return searchEngine;
|
|
152
|
+
}
|
|
153
|
+
catch (error) {
|
|
154
|
+
console.error('❌ PolymorphicSearchFactory: Failed to create search engine');
|
|
155
|
+
throw createFactoryCreationError('PolymorphicSearchFactory', error instanceof Error ? error.message : 'Unknown error', { operationContext: 'polymorphic search engine creation' });
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
//# sourceMappingURL=polymorphic-factory.js.map
|
package/dist/file-processor.js
CHANGED
|
@@ -346,24 +346,35 @@ function extractTitle(content, filePath) {
|
|
|
346
346
|
* Cache for image-to-text pipeline to avoid reloading
|
|
347
347
|
*/
|
|
348
348
|
let imageToTextPipeline = null;
|
|
349
|
+
let imageToTextPipelinePromise = null;
|
|
349
350
|
/**
|
|
350
|
-
* Initialize the image-to-text pipeline
|
|
351
|
+
* Initialize the image-to-text pipeline with proper async locking
|
|
351
352
|
*/
|
|
352
353
|
async function initializeImageToTextPipeline(modelName = 'Xenova/vit-gpt2-image-captioning') {
|
|
354
|
+
// Return cached pipeline if available
|
|
353
355
|
if (imageToTextPipeline) {
|
|
354
356
|
return imageToTextPipeline;
|
|
355
357
|
}
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
imageToTextPipeline = await pipeline('image-to-text', modelName);
|
|
360
|
-
console.log(`Successfully loaded image-to-text model: ${modelName}`);
|
|
361
|
-
return imageToTextPipeline;
|
|
362
|
-
}
|
|
363
|
-
catch (error) {
|
|
364
|
-
console.error(`Failed to load image-to-text model ${modelName}:`, error);
|
|
365
|
-
throw new Error(`Failed to initialize image-to-text pipeline: ${error instanceof Error ? error.message : String(error)}`);
|
|
358
|
+
// If pipeline is currently loading, wait for it
|
|
359
|
+
if (imageToTextPipelinePromise) {
|
|
360
|
+
return imageToTextPipelinePromise;
|
|
366
361
|
}
|
|
362
|
+
// Start loading pipeline
|
|
363
|
+
imageToTextPipelinePromise = (async () => {
|
|
364
|
+
try {
|
|
365
|
+
const { pipeline } = await import('@huggingface/transformers');
|
|
366
|
+
console.log(`Loading image-to-text model: ${modelName}`);
|
|
367
|
+
imageToTextPipeline = await pipeline('image-to-text', modelName);
|
|
368
|
+
console.log(`Successfully loaded image-to-text model: ${modelName}`);
|
|
369
|
+
return imageToTextPipeline;
|
|
370
|
+
}
|
|
371
|
+
catch (error) {
|
|
372
|
+
console.error(`Failed to load image-to-text model ${modelName}:`, error);
|
|
373
|
+
imageToTextPipelinePromise = null; // Reset on error so it can be retried
|
|
374
|
+
throw new Error(`Failed to initialize image-to-text pipeline: ${error instanceof Error ? error.message : String(error)}`);
|
|
375
|
+
}
|
|
376
|
+
})();
|
|
377
|
+
return imageToTextPipelinePromise;
|
|
367
378
|
}
|
|
368
379
|
/**
|
|
369
380
|
* Parse PNG image dimensions from file buffer
|
|
@@ -545,8 +556,11 @@ async function extractImageMetadata(imagePath) {
|
|
|
545
556
|
async function generateImageDescription(imagePath, options = DEFAULT_IMAGE_TO_TEXT_OPTIONS) {
|
|
546
557
|
try {
|
|
547
558
|
const pipeline = await initializeImageToTextPipeline(options.model);
|
|
548
|
-
//
|
|
549
|
-
const
|
|
559
|
+
// Load image using RawImage.fromURL which works with local file paths
|
|
560
|
+
const { RawImage } = await import('@huggingface/transformers');
|
|
561
|
+
const image = await RawImage.fromURL(imagePath);
|
|
562
|
+
// Generate description with loaded image
|
|
563
|
+
const result = await pipeline(image, {
|
|
550
564
|
max_length: options.maxLength || 50,
|
|
551
565
|
num_beams: 4,
|
|
552
566
|
early_stopping: true
|
|
@@ -597,93 +611,6 @@ async function generateImageDescriptionsBatch(imagePaths, options = DEFAULT_IMAG
|
|
|
597
611
|
}
|
|
598
612
|
return results;
|
|
599
613
|
}
|
|
600
|
-
/**
|
|
601
|
-
* Generate text descriptions for multiple images using optimized batch processing
|
|
602
|
-
* Uses BatchProcessingOptimizer for memory-efficient processing of large image collections
|
|
603
|
-
*/
|
|
604
|
-
async function generateImageDescriptionsBatchOptimized(imagePaths, options = DEFAULT_IMAGE_TO_TEXT_OPTIONS) {
|
|
605
|
-
// For small batches, use the existing implementation
|
|
606
|
-
if (imagePaths.length <= 10) {
|
|
607
|
-
return generateImageDescriptionsBatch(imagePaths, options);
|
|
608
|
-
}
|
|
609
|
-
try {
|
|
610
|
-
// Import batch processing optimizer
|
|
611
|
-
const { createImageBatchProcessor } = await import('./core/batch-processing-optimizer.js');
|
|
612
|
-
const batchProcessor = createImageBatchProcessor();
|
|
613
|
-
// Convert image paths to batch items
|
|
614
|
-
const batchItems = imagePaths.map(path => ({
|
|
615
|
-
content: path,
|
|
616
|
-
contentType: 'image',
|
|
617
|
-
metadata: { originalPath: path }
|
|
618
|
-
}));
|
|
619
|
-
// Create image description function
|
|
620
|
-
const imageDescriptionFunction = async (item) => {
|
|
621
|
-
try {
|
|
622
|
-
const result = await generateImageDescription(item.content, options);
|
|
623
|
-
return {
|
|
624
|
-
embedding_id: `img_desc_${Date.now()}_${Math.random()}`,
|
|
625
|
-
vector: new Float32Array([0]), // Placeholder vector
|
|
626
|
-
contentType: 'image',
|
|
627
|
-
metadata: {
|
|
628
|
-
path: item.content,
|
|
629
|
-
description: result.description,
|
|
630
|
-
confidence: result.confidence,
|
|
631
|
-
model: result.model
|
|
632
|
-
}
|
|
633
|
-
};
|
|
634
|
-
}
|
|
635
|
-
catch (error) {
|
|
636
|
-
throw new Error(`Failed to generate description for ${item.content}: ${error instanceof Error ? error.message : String(error)}`);
|
|
637
|
-
}
|
|
638
|
-
};
|
|
639
|
-
// Process with optimization and progress reporting
|
|
640
|
-
const batchResult = await batchProcessor.processBatch(batchItems, imageDescriptionFunction, (stats) => {
|
|
641
|
-
console.log(`Image description progress: ${stats.processedItems}/${stats.totalItems} (${Math.round((stats.processedItems / stats.totalItems) * 100)}%)`);
|
|
642
|
-
console.log(` Memory usage: ${stats.memoryUsageMB}MB (peak: ${stats.peakMemoryUsageMB}MB)`);
|
|
643
|
-
if (stats.failedItems > 0) {
|
|
644
|
-
console.log(` Failed items: ${stats.failedItems}`);
|
|
645
|
-
}
|
|
646
|
-
});
|
|
647
|
-
// Log final statistics
|
|
648
|
-
console.log(`✓ Image description generation complete:`);
|
|
649
|
-
console.log(` Processed: ${batchResult.stats.processedItems}/${batchResult.stats.totalItems}`);
|
|
650
|
-
console.log(` Failed: ${batchResult.stats.failedItems}`);
|
|
651
|
-
console.log(` Processing time: ${Math.round(batchResult.stats.processingTimeMs / 1000)}s`);
|
|
652
|
-
console.log(` Rate: ${Math.round(batchResult.stats.itemsPerSecond)} images/sec`);
|
|
653
|
-
console.log(` Peak memory usage: ${batchResult.stats.peakMemoryUsageMB}MB`);
|
|
654
|
-
if (batchResult.stats.retryCount > 0) {
|
|
655
|
-
console.log(` Retries: ${batchResult.stats.retryCount}`);
|
|
656
|
-
}
|
|
657
|
-
// Convert results back to expected format
|
|
658
|
-
const results = [];
|
|
659
|
-
// Add successful results
|
|
660
|
-
for (const result of batchResult.results) {
|
|
661
|
-
if (result.metadata?.description) {
|
|
662
|
-
results.push({
|
|
663
|
-
path: result.metadata.path,
|
|
664
|
-
result: {
|
|
665
|
-
description: result.metadata.description,
|
|
666
|
-
confidence: result.metadata.confidence,
|
|
667
|
-
model: result.metadata.model
|
|
668
|
-
}
|
|
669
|
-
});
|
|
670
|
-
}
|
|
671
|
-
}
|
|
672
|
-
// Add failed results
|
|
673
|
-
for (const error of batchResult.errors) {
|
|
674
|
-
results.push({
|
|
675
|
-
path: error.item.content,
|
|
676
|
-
error: error.error
|
|
677
|
-
});
|
|
678
|
-
}
|
|
679
|
-
return results;
|
|
680
|
-
}
|
|
681
|
-
catch (error) {
|
|
682
|
-
console.warn(`Optimized batch processing failed, falling back to standard batch processing: ${error instanceof Error ? error.message : String(error)}`);
|
|
683
|
-
// Fall back to existing implementation
|
|
684
|
-
return generateImageDescriptionsBatch(imagePaths, options);
|
|
685
|
-
}
|
|
686
|
-
}
|
|
687
614
|
/**
|
|
688
615
|
* Process image file to extract text description and metadata
|
|
689
616
|
*/
|
|
@@ -834,8 +761,8 @@ export async function processFiles(filePaths, pathManager, imageToTextOptions) {
|
|
|
834
761
|
if (imageFiles.length > 0) {
|
|
835
762
|
console.log(`Processing ${imageFiles.length} image files with optimized batch processing`);
|
|
836
763
|
try {
|
|
837
|
-
// Use
|
|
838
|
-
const batchResults = await
|
|
764
|
+
// Use batch processing for image descriptions
|
|
765
|
+
const batchResults = await generateImageDescriptionsBatch(imageFiles, imageToTextOptions);
|
|
839
766
|
// Convert batch results to documents with metadata extraction
|
|
840
767
|
for (const batchResult of batchResults) {
|
|
841
768
|
try {
|
|
@@ -961,6 +888,7 @@ export async function cleanupImageProcessingResources() {
|
|
|
961
888
|
await imageToTextPipeline.dispose();
|
|
962
889
|
}
|
|
963
890
|
imageToTextPipeline = null;
|
|
891
|
+
imageToTextPipelinePromise = null;
|
|
964
892
|
console.log('Image-to-text pipeline cleaned up');
|
|
965
893
|
}
|
|
966
894
|
catch (error) {
|
package/dist/index.d.ts
CHANGED
|
@@ -41,8 +41,31 @@
|
|
|
41
41
|
* ```
|
|
42
42
|
*/
|
|
43
43
|
export { TextSearchFactory, TextIngestionFactory, TextRAGFactory, TextFactoryHelpers } from './factories/index.js';
|
|
44
|
+
/**
|
|
45
|
+
* @deprecated PolymorphicSearchFactory is no longer needed - SearchEngine now automatically
|
|
46
|
+
* detects mode from database and adapts accordingly (Chameleon Architecture).
|
|
47
|
+
*
|
|
48
|
+
* Migration Guide:
|
|
49
|
+
* ```typescript
|
|
50
|
+
* // Old way (deprecated):
|
|
51
|
+
* const search = await PolymorphicSearchFactory.create('./index.bin', './db.sqlite');
|
|
52
|
+
*
|
|
53
|
+
* // New way (recommended):
|
|
54
|
+
* const search = new SearchEngine('./index.bin', './db.sqlite');
|
|
55
|
+
* await search.search('query'); // Mode automatically detected
|
|
56
|
+
* ```
|
|
57
|
+
*
|
|
58
|
+
* The SearchEngine constructor now uses the polymorphic factory internally,
|
|
59
|
+
* providing the same automatic mode detection without requiring explicit factory usage.
|
|
60
|
+
*/
|
|
61
|
+
export { PolymorphicSearchFactory } from './factories/index.js';
|
|
44
62
|
export { TextSearchFactory as SearchFactory, TextIngestionFactory as IngestionFactory, TextRAGFactory as RAGFactory } from './factories/index.js';
|
|
45
63
|
export type { TextSearchOptions, TextIngestionOptions } from './factories/index.js';
|
|
64
|
+
/**
|
|
65
|
+
* @deprecated PolymorphicSearchOptions is no longer needed - use SearchEngineOptions instead.
|
|
66
|
+
* SearchEngine now automatically detects mode and adapts (Chameleon Architecture).
|
|
67
|
+
*/
|
|
68
|
+
export type { PolymorphicSearchOptions } from './factories/index.js';
|
|
46
69
|
export type { TextSearchOptions as SearchEngineOptions, TextIngestionOptions as IngestionPipelineOptions } from './factories/index.js';
|
|
47
70
|
export { SearchEngine as CoreSearchEngine } from './core/search.js';
|
|
48
71
|
export { IngestionPipeline as CoreIngestionPipeline } from './core/ingestion.js';
|
package/dist/index.js
CHANGED
|
@@ -45,6 +45,24 @@
|
|
|
45
45
|
// =============================================================================
|
|
46
46
|
// Main factory classes for simple usage
|
|
47
47
|
export { TextSearchFactory, TextIngestionFactory, TextRAGFactory, TextFactoryHelpers } from './factories/index.js';
|
|
48
|
+
/**
|
|
49
|
+
* @deprecated PolymorphicSearchFactory is no longer needed - SearchEngine now automatically
|
|
50
|
+
* detects mode from database and adapts accordingly (Chameleon Architecture).
|
|
51
|
+
*
|
|
52
|
+
* Migration Guide:
|
|
53
|
+
* ```typescript
|
|
54
|
+
* // Old way (deprecated):
|
|
55
|
+
* const search = await PolymorphicSearchFactory.create('./index.bin', './db.sqlite');
|
|
56
|
+
*
|
|
57
|
+
* // New way (recommended):
|
|
58
|
+
* const search = new SearchEngine('./index.bin', './db.sqlite');
|
|
59
|
+
* await search.search('query'); // Mode automatically detected
|
|
60
|
+
* ```
|
|
61
|
+
*
|
|
62
|
+
* The SearchEngine constructor now uses the polymorphic factory internally,
|
|
63
|
+
* providing the same automatic mode detection without requiring explicit factory usage.
|
|
64
|
+
*/
|
|
65
|
+
export { PolymorphicSearchFactory } from './factories/index.js';
|
|
48
66
|
// Convenience aliases for common usage
|
|
49
67
|
export { TextSearchFactory as SearchFactory, TextIngestionFactory as IngestionFactory, TextRAGFactory as RAGFactory } from './factories/index.js';
|
|
50
68
|
// =============================================================================
|
package/dist/ingestion.js
CHANGED
|
@@ -64,7 +64,12 @@ export class IngestionPipeline {
|
|
|
64
64
|
if (!this.corePipeline) {
|
|
65
65
|
throw new Error('IngestionPipeline failed to initialize');
|
|
66
66
|
}
|
|
67
|
-
|
|
67
|
+
// Merge mode from constructor options with runtime options
|
|
68
|
+
const mergedOptions = {
|
|
69
|
+
...options,
|
|
70
|
+
mode: options?.mode || this.options.mode
|
|
71
|
+
};
|
|
72
|
+
return this.corePipeline.ingestFile(filePath, mergedOptions);
|
|
68
73
|
}
|
|
69
74
|
/**
|
|
70
75
|
* Ingest all documents in a directory
|
|
@@ -74,7 +79,12 @@ export class IngestionPipeline {
|
|
|
74
79
|
if (!this.corePipeline) {
|
|
75
80
|
throw new Error('IngestionPipeline failed to initialize');
|
|
76
81
|
}
|
|
77
|
-
|
|
82
|
+
// Merge mode from constructor options with runtime options
|
|
83
|
+
const mergedOptions = {
|
|
84
|
+
...options,
|
|
85
|
+
mode: options?.mode || this.options.mode
|
|
86
|
+
};
|
|
87
|
+
return this.corePipeline.ingestDirectory(directoryPath, mergedOptions);
|
|
78
88
|
}
|
|
79
89
|
/**
|
|
80
90
|
* Ingest content from memory buffer
|
|
@@ -95,7 +105,12 @@ export class IngestionPipeline {
|
|
|
95
105
|
if (!this.corePipeline) {
|
|
96
106
|
throw new Error('IngestionPipeline failed to initialize');
|
|
97
107
|
}
|
|
98
|
-
|
|
108
|
+
// Merge mode from constructor options with runtime options
|
|
109
|
+
const mergedOptions = {
|
|
110
|
+
...options,
|
|
111
|
+
mode: options?.mode || this.options.mode
|
|
112
|
+
};
|
|
113
|
+
return this.corePipeline.ingestFromMemory(content, metadata, mergedOptions);
|
|
99
114
|
}
|
|
100
115
|
/**
|
|
101
116
|
* Clean up resources
|
|
@@ -84,6 +84,19 @@ export declare class CLIPEmbedder extends BaseUniversalEmbedder {
|
|
|
84
84
|
* during cleanup - errors are logged but don't prevent cleanup completion.
|
|
85
85
|
*/
|
|
86
86
|
cleanup(): Promise<void>;
|
|
87
|
+
/**
|
|
88
|
+
* Apply L2-normalization to an embedding vector
|
|
89
|
+
*
|
|
90
|
+
* L2-normalization ensures that all embeddings have unit length (magnitude = 1),
|
|
91
|
+
* which is essential for CLIP models as they were trained with normalized embeddings.
|
|
92
|
+
* This normalization makes cosine similarity calculations more reliable and ensures
|
|
93
|
+
* that vector magnitudes don't affect similarity scores.
|
|
94
|
+
*
|
|
95
|
+
* @param embedding - The embedding vector to normalize (modified in-place)
|
|
96
|
+
* @returns The normalized embedding vector (same reference as input)
|
|
97
|
+
* @private
|
|
98
|
+
*/
|
|
99
|
+
private normalizeEmbedding;
|
|
87
100
|
/**
|
|
88
101
|
* Embed text using CLIP text encoder
|
|
89
102
|
*
|
|
@@ -91,11 +104,11 @@ export declare class CLIPEmbedder extends BaseUniversalEmbedder {
|
|
|
91
104
|
* pixel_values errors. Text is tokenized with CLIP's 77 token limit and
|
|
92
105
|
* automatically truncated if necessary.
|
|
93
106
|
*
|
|
94
|
-
* Returns a 512-dimensional embedding vector in the unified CLIP
|
|
95
|
-
* which is directly comparable to image embeddings for cross-modal search.
|
|
107
|
+
* Returns a 512-dimensional L2-normalized embedding vector in the unified CLIP
|
|
108
|
+
* embedding space, which is directly comparable to image embeddings for cross-modal search.
|
|
96
109
|
*
|
|
97
110
|
* @param text - The text to embed (will be trimmed and validated)
|
|
98
|
-
* @returns EmbeddingResult with 512-dimensional vector and metadata
|
|
111
|
+
* @returns EmbeddingResult with 512-dimensional normalized vector and metadata
|
|
99
112
|
* @throws {Error} If text is empty, model not loaded, or embedding fails
|
|
100
113
|
*
|
|
101
114
|
* @example
|
|
@@ -117,10 +130,10 @@ export declare class CLIPEmbedder extends BaseUniversalEmbedder {
|
|
|
117
130
|
* - Converted to proper pixel_values format using AutoProcessor
|
|
118
131
|
* - Normalized for CLIP vision model
|
|
119
132
|
*
|
|
120
|
-
* Returns a 512-dimensional embedding vector directly comparable to text embeddings.
|
|
133
|
+
* Returns a 512-dimensional L2-normalized embedding vector directly comparable to text embeddings.
|
|
121
134
|
*
|
|
122
135
|
* @param imagePath - Local file path or URL to the image
|
|
123
|
-
* @returns EmbeddingResult with 512-dimensional vector and metadata
|
|
136
|
+
* @returns EmbeddingResult with 512-dimensional normalized vector and metadata
|
|
124
137
|
* @throws {Error} If image not found, unsupported format, or embedding fails
|
|
125
138
|
*
|
|
126
139
|
* @example
|