rag-lite-ts 1.0.2 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +605 -93
- package/dist/cli/indexer.js +192 -4
- package/dist/cli/search.js +50 -11
- package/dist/cli.js +183 -26
- package/dist/core/abstract-embedder.d.ts +125 -0
- package/dist/core/abstract-embedder.js +264 -0
- package/dist/core/actionable-error-messages.d.ts +60 -0
- package/dist/core/actionable-error-messages.js +397 -0
- package/dist/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/core/batch-processing-optimizer.js +541 -0
- package/dist/core/binary-index-format.d.ts +52 -0
- package/dist/core/binary-index-format.js +122 -0
- package/dist/core/chunker.d.ts +2 -0
- package/dist/core/cli-database-utils.d.ts +53 -0
- package/dist/core/cli-database-utils.js +239 -0
- package/dist/core/config.js +10 -3
- package/dist/core/content-errors.d.ts +111 -0
- package/dist/core/content-errors.js +362 -0
- package/dist/core/content-manager.d.ts +343 -0
- package/dist/core/content-manager.js +1504 -0
- package/dist/core/content-performance-optimizer.d.ts +150 -0
- package/dist/core/content-performance-optimizer.js +516 -0
- package/dist/core/content-resolver.d.ts +104 -0
- package/dist/core/content-resolver.js +285 -0
- package/dist/core/cross-modal-search.d.ts +164 -0
- package/dist/core/cross-modal-search.js +342 -0
- package/dist/core/database-connection-manager.d.ts +109 -0
- package/dist/core/database-connection-manager.js +304 -0
- package/dist/core/db.d.ts +141 -2
- package/dist/core/db.js +631 -89
- package/dist/core/embedder-factory.d.ts +176 -0
- package/dist/core/embedder-factory.js +338 -0
- package/dist/core/index.d.ts +3 -1
- package/dist/core/index.js +4 -1
- package/dist/core/ingestion.d.ts +85 -15
- package/dist/core/ingestion.js +510 -45
- package/dist/core/lazy-dependency-loader.d.ts +152 -0
- package/dist/core/lazy-dependency-loader.js +453 -0
- package/dist/core/mode-detection-service.d.ts +150 -0
- package/dist/core/mode-detection-service.js +565 -0
- package/dist/core/mode-model-validator.d.ts +92 -0
- package/dist/core/mode-model-validator.js +203 -0
- package/dist/core/model-registry.d.ts +120 -0
- package/dist/core/model-registry.js +415 -0
- package/dist/core/model-validator.d.ts +217 -0
- package/dist/core/model-validator.js +782 -0
- package/dist/core/polymorphic-search-factory.d.ts +154 -0
- package/dist/core/polymorphic-search-factory.js +344 -0
- package/dist/core/raglite-paths.d.ts +121 -0
- package/dist/core/raglite-paths.js +145 -0
- package/dist/core/reranking-config.d.ts +42 -0
- package/dist/core/reranking-config.js +156 -0
- package/dist/core/reranking-factory.d.ts +92 -0
- package/dist/core/reranking-factory.js +591 -0
- package/dist/core/reranking-strategies.d.ts +325 -0
- package/dist/core/reranking-strategies.js +720 -0
- package/dist/core/resource-cleanup.d.ts +163 -0
- package/dist/core/resource-cleanup.js +371 -0
- package/dist/core/resource-manager.d.ts +212 -0
- package/dist/core/resource-manager.js +564 -0
- package/dist/core/search.d.ts +28 -1
- package/dist/core/search.js +83 -5
- package/dist/core/streaming-operations.d.ts +145 -0
- package/dist/core/streaming-operations.js +409 -0
- package/dist/core/types.d.ts +3 -0
- package/dist/core/universal-embedder.d.ts +177 -0
- package/dist/core/universal-embedder.js +139 -0
- package/dist/core/validation-messages.d.ts +99 -0
- package/dist/core/validation-messages.js +334 -0
- package/dist/core/vector-index.d.ts +1 -1
- package/dist/core/vector-index.js +37 -39
- package/dist/factories/index.d.ts +3 -1
- package/dist/factories/index.js +2 -0
- package/dist/factories/polymorphic-factory.d.ts +50 -0
- package/dist/factories/polymorphic-factory.js +159 -0
- package/dist/factories/text-factory.d.ts +128 -34
- package/dist/factories/text-factory.js +346 -97
- package/dist/file-processor.d.ts +88 -2
- package/dist/file-processor.js +720 -17
- package/dist/index.d.ts +32 -0
- package/dist/index.js +29 -0
- package/dist/ingestion.d.ts +16 -0
- package/dist/ingestion.js +21 -0
- package/dist/mcp-server.d.ts +35 -3
- package/dist/mcp-server.js +1107 -31
- package/dist/multimodal/clip-embedder.d.ts +327 -0
- package/dist/multimodal/clip-embedder.js +992 -0
- package/dist/multimodal/index.d.ts +6 -0
- package/dist/multimodal/index.js +6 -0
- package/dist/run-error-recovery-tests.d.ts +7 -0
- package/dist/run-error-recovery-tests.js +101 -0
- package/dist/search.d.ts +60 -9
- package/dist/search.js +82 -11
- package/dist/test-utils.d.ts +8 -26
- package/dist/text/chunker.d.ts +1 -0
- package/dist/text/embedder.js +15 -8
- package/dist/text/index.d.ts +1 -0
- package/dist/text/index.js +1 -0
- package/dist/text/reranker.d.ts +1 -2
- package/dist/text/reranker.js +17 -47
- package/dist/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/text/sentence-transformer-embedder.js +340 -0
- package/dist/types.d.ts +39 -0
- package/dist/utils/vector-math.d.ts +31 -0
- package/dist/utils/vector-math.js +70 -0
- package/package.json +27 -6
- package/dist/api-errors.d.ts.map +0 -1
- package/dist/api-errors.js.map +0 -1
- package/dist/cli/indexer.d.ts.map +0 -1
- package/dist/cli/indexer.js.map +0 -1
- package/dist/cli/search.d.ts.map +0 -1
- package/dist/cli/search.js.map +0 -1
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js.map +0 -1
- package/dist/core/adapters.d.ts.map +0 -1
- package/dist/core/adapters.js.map +0 -1
- package/dist/core/chunker.d.ts.map +0 -1
- package/dist/core/chunker.js.map +0 -1
- package/dist/core/config.d.ts.map +0 -1
- package/dist/core/config.js.map +0 -1
- package/dist/core/db.d.ts.map +0 -1
- package/dist/core/db.js.map +0 -1
- package/dist/core/error-handler.d.ts.map +0 -1
- package/dist/core/error-handler.js.map +0 -1
- package/dist/core/index.d.ts.map +0 -1
- package/dist/core/index.js.map +0 -1
- package/dist/core/ingestion.d.ts.map +0 -1
- package/dist/core/ingestion.js.map +0 -1
- package/dist/core/interfaces.d.ts.map +0 -1
- package/dist/core/interfaces.js.map +0 -1
- package/dist/core/path-manager.d.ts.map +0 -1
- package/dist/core/path-manager.js.map +0 -1
- package/dist/core/search-example.d.ts +0 -25
- package/dist/core/search-example.d.ts.map +0 -1
- package/dist/core/search-example.js +0 -138
- package/dist/core/search-example.js.map +0 -1
- package/dist/core/search-pipeline-example.d.ts +0 -21
- package/dist/core/search-pipeline-example.d.ts.map +0 -1
- package/dist/core/search-pipeline-example.js +0 -188
- package/dist/core/search-pipeline-example.js.map +0 -1
- package/dist/core/search-pipeline.d.ts.map +0 -1
- package/dist/core/search-pipeline.js.map +0 -1
- package/dist/core/search.d.ts.map +0 -1
- package/dist/core/search.js.map +0 -1
- package/dist/core/types.d.ts.map +0 -1
- package/dist/core/types.js.map +0 -1
- package/dist/core/vector-index.d.ts.map +0 -1
- package/dist/core/vector-index.js.map +0 -1
- package/dist/dom-polyfills.d.ts.map +0 -1
- package/dist/dom-polyfills.js.map +0 -1
- package/dist/examples/clean-api-examples.d.ts +0 -44
- package/dist/examples/clean-api-examples.d.ts.map +0 -1
- package/dist/examples/clean-api-examples.js +0 -206
- package/dist/examples/clean-api-examples.js.map +0 -1
- package/dist/factories/index.d.ts.map +0 -1
- package/dist/factories/index.js.map +0 -1
- package/dist/factories/text-factory.d.ts.map +0 -1
- package/dist/factories/text-factory.js.map +0 -1
- package/dist/file-processor.d.ts.map +0 -1
- package/dist/file-processor.js.map +0 -1
- package/dist/index-manager.d.ts.map +0 -1
- package/dist/index-manager.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/indexer.d.ts.map +0 -1
- package/dist/indexer.js.map +0 -1
- package/dist/ingestion.d.ts.map +0 -1
- package/dist/ingestion.js.map +0 -1
- package/dist/mcp-server.d.ts.map +0 -1
- package/dist/mcp-server.js.map +0 -1
- package/dist/preprocess.d.ts.map +0 -1
- package/dist/preprocess.js.map +0 -1
- package/dist/preprocessors/index.d.ts.map +0 -1
- package/dist/preprocessors/index.js.map +0 -1
- package/dist/preprocessors/mdx.d.ts.map +0 -1
- package/dist/preprocessors/mdx.js.map +0 -1
- package/dist/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/preprocessors/mermaid.js.map +0 -1
- package/dist/preprocessors/registry.d.ts.map +0 -1
- package/dist/preprocessors/registry.js.map +0 -1
- package/dist/search-standalone.d.ts.map +0 -1
- package/dist/search-standalone.js.map +0 -1
- package/dist/search.d.ts.map +0 -1
- package/dist/search.js.map +0 -1
- package/dist/test-utils.d.ts.map +0 -1
- package/dist/test-utils.js.map +0 -1
- package/dist/text/chunker.d.ts.map +0 -1
- package/dist/text/chunker.js.map +0 -1
- package/dist/text/embedder.d.ts.map +0 -1
- package/dist/text/embedder.js.map +0 -1
- package/dist/text/index.d.ts.map +0 -1
- package/dist/text/index.js.map +0 -1
- package/dist/text/preprocessors/index.d.ts.map +0 -1
- package/dist/text/preprocessors/index.js.map +0 -1
- package/dist/text/preprocessors/mdx.d.ts.map +0 -1
- package/dist/text/preprocessors/mdx.js.map +0 -1
- package/dist/text/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/text/preprocessors/mermaid.js.map +0 -1
- package/dist/text/preprocessors/registry.d.ts.map +0 -1
- package/dist/text/preprocessors/registry.js.map +0 -1
- package/dist/text/reranker.d.ts.map +0 -1
- package/dist/text/reranker.js.map +0 -1
- package/dist/text/tokenizer.d.ts.map +0 -1
- package/dist/text/tokenizer.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Test Runner for Chameleon Error Recovery and Reliability Tests
|
|
3
|
+
* Runs the comprehensive error recovery test suite
|
|
4
|
+
*/
|
|
5
|
+
import { spawn } from 'child_process';
|
|
6
|
+
import { fileURLToPath } from 'url';
|
|
7
|
+
import { dirname } from 'path';
|
|
8
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
9
|
+
const __dirname = dirname(__filename);
|
|
10
|
+
async function runTests() {
|
|
11
|
+
console.log('🧪 Running Chameleon Error Recovery and Reliability Tests...\n');
|
|
12
|
+
const testFiles = [
|
|
13
|
+
'chameleon-error-recovery.test.ts',
|
|
14
|
+
'chameleon-reliability-integration.test.ts',
|
|
15
|
+
'chameleon-stress-testing.test.ts',
|
|
16
|
+
'chameleon-error-simulation.test.ts'
|
|
17
|
+
];
|
|
18
|
+
let totalTests = 0;
|
|
19
|
+
let passedTests = 0;
|
|
20
|
+
let failedTests = 0;
|
|
21
|
+
for (const testFile of testFiles) {
|
|
22
|
+
console.log(`\n📋 Running ${testFile}...`);
|
|
23
|
+
try {
|
|
24
|
+
// Build the test file first
|
|
25
|
+
const buildProcess = spawn('npx', ['tsc', '--project', 'tsconfig.test.json'], {
|
|
26
|
+
stdio: 'pipe',
|
|
27
|
+
shell: true
|
|
28
|
+
});
|
|
29
|
+
await new Promise((resolve, reject) => {
|
|
30
|
+
buildProcess.on('close', (code) => {
|
|
31
|
+
if (code === 0) {
|
|
32
|
+
resolve(code);
|
|
33
|
+
}
|
|
34
|
+
else {
|
|
35
|
+
reject(new Error(`Build failed with code ${code}`));
|
|
36
|
+
}
|
|
37
|
+
});
|
|
38
|
+
});
|
|
39
|
+
// Run the compiled test
|
|
40
|
+
const testProcess = spawn('node', ['--test', `dist/${testFile.replace('.ts', '.js')}`], {
|
|
41
|
+
stdio: 'pipe',
|
|
42
|
+
shell: true
|
|
43
|
+
});
|
|
44
|
+
let output = '';
|
|
45
|
+
let errorOutput = '';
|
|
46
|
+
testProcess.stdout?.on('data', (data) => {
|
|
47
|
+
output += data.toString();
|
|
48
|
+
});
|
|
49
|
+
testProcess.stderr?.on('data', (data) => {
|
|
50
|
+
errorOutput += data.toString();
|
|
51
|
+
});
|
|
52
|
+
await new Promise((resolve) => {
|
|
53
|
+
testProcess.on('close', (code) => {
|
|
54
|
+
console.log(`Exit code: ${code}`);
|
|
55
|
+
if (output) {
|
|
56
|
+
console.log('Output:', output);
|
|
57
|
+
}
|
|
58
|
+
if (errorOutput) {
|
|
59
|
+
console.log('Errors:', errorOutput);
|
|
60
|
+
}
|
|
61
|
+
// Count tests (this is a simple approximation)
|
|
62
|
+
const testMatches = output.match(/✓|×/g);
|
|
63
|
+
const currentTests = testMatches ? testMatches.length : 0;
|
|
64
|
+
totalTests += currentTests;
|
|
65
|
+
if (code === 0) {
|
|
66
|
+
passedTests += currentTests;
|
|
67
|
+
console.log(`✅ ${testFile} completed successfully`);
|
|
68
|
+
}
|
|
69
|
+
else {
|
|
70
|
+
failedTests += currentTests;
|
|
71
|
+
console.log(`❌ ${testFile} failed`);
|
|
72
|
+
}
|
|
73
|
+
resolve(code);
|
|
74
|
+
});
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
catch (error) {
|
|
78
|
+
console.error(`❌ Failed to run ${testFile}:`, error instanceof Error ? error.message : String(error));
|
|
79
|
+
failedTests++;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
console.log('\n📊 Test Summary:');
|
|
83
|
+
console.log(`Total Tests: ${totalTests}`);
|
|
84
|
+
console.log(`Passed: ${passedTests}`);
|
|
85
|
+
console.log(`Failed: ${failedTests}`);
|
|
86
|
+
if (failedTests === 0) {
|
|
87
|
+
console.log('\n🎉 All error recovery tests completed!');
|
|
88
|
+
console.log('✅ System demonstrates robust error handling and recovery mechanisms');
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
console.log('\n⚠️ Some tests failed - this may be expected in test environments');
|
|
92
|
+
console.log('🔍 Review the output above for specific failure details');
|
|
93
|
+
}
|
|
94
|
+
return failedTests === 0;
|
|
95
|
+
}
|
|
96
|
+
// Run tests if this file is executed directly
|
|
97
|
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
98
|
+
runTests().catch(console.error);
|
|
99
|
+
}
|
|
100
|
+
export { runTests };
|
|
101
|
+
//# sourceMappingURL=run-error-recovery-tests.js.map
|
package/dist/search.d.ts
CHANGED
|
@@ -1,25 +1,44 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Public API SearchEngine - Simple constructor
|
|
2
|
+
* Public API SearchEngine - Simple constructor with Chameleon Architecture
|
|
3
3
|
*
|
|
4
|
-
* This class provides a clean, simple API
|
|
5
|
-
*
|
|
4
|
+
* This class provides a clean, simple API that automatically adapts to the mode
|
|
5
|
+
* (text or multimodal) stored in the database during ingestion. The system detects
|
|
6
|
+
* the mode and creates the appropriate embedder and reranker without user intervention.
|
|
7
|
+
*
|
|
8
|
+
* Chameleon Architecture Features:
|
|
9
|
+
* - Automatic mode detection from database configuration
|
|
10
|
+
* - Seamless switching between text and multimodal modes
|
|
11
|
+
* - Appropriate embedder selection (sentence-transformer or CLIP)
|
|
12
|
+
* - Mode-specific reranking strategies
|
|
6
13
|
*
|
|
7
14
|
* @example
|
|
8
15
|
* ```typescript
|
|
9
|
-
* // Simple usage
|
|
16
|
+
* // Simple usage - mode automatically detected from database
|
|
10
17
|
* const search = new SearchEngine('./index.bin', './db.sqlite');
|
|
11
18
|
* const results = await search.search('query');
|
|
12
19
|
*
|
|
13
|
-
* //
|
|
20
|
+
* // Works for both text and multimodal databases
|
|
21
|
+
* // Text mode: uses sentence-transformer embeddings
|
|
22
|
+
* // Multimodal mode: uses CLIP embeddings for cross-modal search
|
|
23
|
+
*
|
|
24
|
+
* // With options (advanced)
|
|
14
25
|
* const search = new SearchEngine('./index.bin', './db.sqlite', {
|
|
15
|
-
* embeddingModel: 'all-MiniLM-L6-v2',
|
|
16
26
|
* enableReranking: true
|
|
17
27
|
* });
|
|
18
28
|
* ```
|
|
19
29
|
*/
|
|
20
|
-
import { type TextSearchOptions } from './factories/index.js';
|
|
21
30
|
import type { SearchResult, SearchOptions, EmbedFunction, RerankFunction } from './core/types.js';
|
|
22
|
-
export interface SearchEngineOptions
|
|
31
|
+
export interface SearchEngineOptions {
|
|
32
|
+
/** Embedding model name override */
|
|
33
|
+
embeddingModel?: string;
|
|
34
|
+
/** Embedding batch size override */
|
|
35
|
+
batchSize?: number;
|
|
36
|
+
/** Reranking model name override */
|
|
37
|
+
rerankingModel?: string;
|
|
38
|
+
/** Whether to enable reranking (default: true) */
|
|
39
|
+
enableReranking?: boolean;
|
|
40
|
+
/** Top-k results to return (default: from config) */
|
|
41
|
+
topK?: number;
|
|
23
42
|
/** Custom embedding function (advanced usage) */
|
|
24
43
|
embedFn?: EmbedFunction;
|
|
25
44
|
/** Custom reranking function (advanced usage) */
|
|
@@ -33,13 +52,45 @@ export declare class SearchEngine {
|
|
|
33
52
|
private initPromise;
|
|
34
53
|
constructor(indexPath: string, dbPath: string, options?: SearchEngineOptions);
|
|
35
54
|
/**
|
|
36
|
-
* Initialize the search engine using
|
|
55
|
+
* Initialize the search engine using polymorphic factory or direct injection
|
|
56
|
+
*
|
|
57
|
+
* Chameleon Architecture Implementation:
|
|
58
|
+
* - Automatically detects mode from database (text or multimodal)
|
|
59
|
+
* - Creates appropriate embedder based on detected mode
|
|
60
|
+
* - Applies mode-specific reranking strategies
|
|
61
|
+
* - Provides seamless polymorphic behavior
|
|
37
62
|
*/
|
|
38
63
|
private initialize;
|
|
39
64
|
/**
|
|
40
65
|
* Perform semantic search
|
|
41
66
|
*/
|
|
42
67
|
search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
|
|
68
|
+
/**
|
|
69
|
+
* Retrieve content by ID in the specified format
|
|
70
|
+
* @param contentId - Content ID to retrieve
|
|
71
|
+
* @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
|
|
72
|
+
* @returns Promise that resolves to content in requested format
|
|
73
|
+
*/
|
|
74
|
+
getContent(contentId: string, format?: 'file' | 'base64'): Promise<string>;
|
|
75
|
+
/**
|
|
76
|
+
* Retrieve multiple content items efficiently in batch
|
|
77
|
+
* @param contentIds - Array of content IDs to retrieve
|
|
78
|
+
* @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
|
|
79
|
+
* @returns Promise that resolves to array of content in requested format
|
|
80
|
+
*/
|
|
81
|
+
getContentBatch(contentIds: string[], format?: 'file' | 'base64'): Promise<string[]>;
|
|
82
|
+
/**
|
|
83
|
+
* Retrieve content metadata for result enhancement
|
|
84
|
+
* @param contentId - Content ID to get metadata for
|
|
85
|
+
* @returns Promise that resolves to content metadata
|
|
86
|
+
*/
|
|
87
|
+
getContentMetadata(contentId: string): Promise<import('./core/content-resolver.js').ContentMetadata>;
|
|
88
|
+
/**
|
|
89
|
+
* Verify that content exists and is accessible
|
|
90
|
+
* @param contentId - Content ID to verify
|
|
91
|
+
* @returns Promise that resolves to true if content exists, false otherwise
|
|
92
|
+
*/
|
|
93
|
+
verifyContentExists(contentId: string): Promise<boolean>;
|
|
43
94
|
/**
|
|
44
95
|
* Clean up resources
|
|
45
96
|
*/
|
package/dist/search.js
CHANGED
|
@@ -1,24 +1,33 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Public API SearchEngine - Simple constructor
|
|
2
|
+
* Public API SearchEngine - Simple constructor with Chameleon Architecture
|
|
3
3
|
*
|
|
4
|
-
* This class provides a clean, simple API
|
|
5
|
-
*
|
|
4
|
+
* This class provides a clean, simple API that automatically adapts to the mode
|
|
5
|
+
* (text or multimodal) stored in the database during ingestion. The system detects
|
|
6
|
+
* the mode and creates the appropriate embedder and reranker without user intervention.
|
|
7
|
+
*
|
|
8
|
+
* Chameleon Architecture Features:
|
|
9
|
+
* - Automatic mode detection from database configuration
|
|
10
|
+
* - Seamless switching between text and multimodal modes
|
|
11
|
+
* - Appropriate embedder selection (sentence-transformer or CLIP)
|
|
12
|
+
* - Mode-specific reranking strategies
|
|
6
13
|
*
|
|
7
14
|
* @example
|
|
8
15
|
* ```typescript
|
|
9
|
-
* // Simple usage
|
|
16
|
+
* // Simple usage - mode automatically detected from database
|
|
10
17
|
* const search = new SearchEngine('./index.bin', './db.sqlite');
|
|
11
18
|
* const results = await search.search('query');
|
|
12
19
|
*
|
|
13
|
-
* //
|
|
20
|
+
* // Works for both text and multimodal databases
|
|
21
|
+
* // Text mode: uses sentence-transformer embeddings
|
|
22
|
+
* // Multimodal mode: uses CLIP embeddings for cross-modal search
|
|
23
|
+
*
|
|
24
|
+
* // With options (advanced)
|
|
14
25
|
* const search = new SearchEngine('./index.bin', './db.sqlite', {
|
|
15
|
-
* embeddingModel: 'all-MiniLM-L6-v2',
|
|
16
26
|
* enableReranking: true
|
|
17
27
|
* });
|
|
18
28
|
* ```
|
|
19
29
|
*/
|
|
20
30
|
import { SearchEngine as CoreSearchEngine } from './core/search.js';
|
|
21
|
-
import { TextSearchFactory } from './factories/index.js';
|
|
22
31
|
export class SearchEngine {
|
|
23
32
|
indexPath;
|
|
24
33
|
dbPath;
|
|
@@ -42,7 +51,13 @@ export class SearchEngine {
|
|
|
42
51
|
}
|
|
43
52
|
}
|
|
44
53
|
/**
|
|
45
|
-
* Initialize the search engine using
|
|
54
|
+
* Initialize the search engine using polymorphic factory or direct injection
|
|
55
|
+
*
|
|
56
|
+
* Chameleon Architecture Implementation:
|
|
57
|
+
* - Automatically detects mode from database (text or multimodal)
|
|
58
|
+
* - Creates appropriate embedder based on detected mode
|
|
59
|
+
* - Applies mode-specific reranking strategies
|
|
60
|
+
* - Provides seamless polymorphic behavior
|
|
46
61
|
*/
|
|
47
62
|
async initialize() {
|
|
48
63
|
if (this.coreEngine) {
|
|
@@ -74,12 +89,18 @@ export class SearchEngine {
|
|
|
74
89
|
const db = await openDatabase(this.dbPath);
|
|
75
90
|
const indexManager = new IndexManager(this.indexPath, this.dbPath, modelDefaults.dimensions, this.options.embeddingModel);
|
|
76
91
|
await indexManager.initialize();
|
|
92
|
+
// Create ContentResolver for unified content system
|
|
93
|
+
const { ContentResolver } = await import('./core/content-resolver.js');
|
|
94
|
+
const contentResolver = new ContentResolver(db);
|
|
77
95
|
// Create core engine with dependency injection
|
|
78
|
-
this.coreEngine = new CoreSearchEngine(embedFn, indexManager, db, this.options.rerankFn);
|
|
96
|
+
this.coreEngine = new CoreSearchEngine(embedFn, indexManager, db, this.options.rerankFn, contentResolver);
|
|
79
97
|
}
|
|
80
98
|
else {
|
|
81
|
-
// Use factory for
|
|
82
|
-
|
|
99
|
+
// Use core polymorphic factory for automatic mode detection (Chameleon Architecture)
|
|
100
|
+
// This enables SearchEngine to automatically adapt to text or multimodal mode
|
|
101
|
+
// based on the configuration stored in the database during ingestion
|
|
102
|
+
const { PolymorphicSearchFactory } = await import('./core/polymorphic-search-factory.js');
|
|
103
|
+
this.coreEngine = await PolymorphicSearchFactory.create(this.indexPath, this.dbPath);
|
|
83
104
|
}
|
|
84
105
|
})();
|
|
85
106
|
return this.initPromise;
|
|
@@ -94,6 +115,56 @@ export class SearchEngine {
|
|
|
94
115
|
}
|
|
95
116
|
return this.coreEngine.search(query, options);
|
|
96
117
|
}
|
|
118
|
+
/**
|
|
119
|
+
* Retrieve content by ID in the specified format
|
|
120
|
+
* @param contentId - Content ID to retrieve
|
|
121
|
+
* @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
|
|
122
|
+
* @returns Promise that resolves to content in requested format
|
|
123
|
+
*/
|
|
124
|
+
async getContent(contentId, format = 'file') {
|
|
125
|
+
await this.initialize();
|
|
126
|
+
if (!this.coreEngine) {
|
|
127
|
+
throw new Error('SearchEngine failed to initialize');
|
|
128
|
+
}
|
|
129
|
+
return this.coreEngine.getContent(contentId, format);
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Retrieve multiple content items efficiently in batch
|
|
133
|
+
* @param contentIds - Array of content IDs to retrieve
|
|
134
|
+
* @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
|
|
135
|
+
* @returns Promise that resolves to array of content in requested format
|
|
136
|
+
*/
|
|
137
|
+
async getContentBatch(contentIds, format = 'file') {
|
|
138
|
+
await this.initialize();
|
|
139
|
+
if (!this.coreEngine) {
|
|
140
|
+
throw new Error('SearchEngine failed to initialize');
|
|
141
|
+
}
|
|
142
|
+
return this.coreEngine.getContentBatch(contentIds, format);
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Retrieve content metadata for result enhancement
|
|
146
|
+
* @param contentId - Content ID to get metadata for
|
|
147
|
+
* @returns Promise that resolves to content metadata
|
|
148
|
+
*/
|
|
149
|
+
async getContentMetadata(contentId) {
|
|
150
|
+
await this.initialize();
|
|
151
|
+
if (!this.coreEngine) {
|
|
152
|
+
throw new Error('SearchEngine failed to initialize');
|
|
153
|
+
}
|
|
154
|
+
return this.coreEngine.getContentMetadata(contentId);
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Verify that content exists and is accessible
|
|
158
|
+
* @param contentId - Content ID to verify
|
|
159
|
+
* @returns Promise that resolves to true if content exists, false otherwise
|
|
160
|
+
*/
|
|
161
|
+
async verifyContentExists(contentId) {
|
|
162
|
+
await this.initialize();
|
|
163
|
+
if (!this.coreEngine) {
|
|
164
|
+
throw new Error('SearchEngine failed to initialize');
|
|
165
|
+
}
|
|
166
|
+
return this.coreEngine.verifyContentExists(contentId);
|
|
167
|
+
}
|
|
97
168
|
/**
|
|
98
169
|
* Clean up resources
|
|
99
170
|
*/
|
package/dist/test-utils.d.ts
CHANGED
|
@@ -2,35 +2,17 @@
|
|
|
2
2
|
* Test utilities for multi-model support
|
|
3
3
|
* Provides common configurations and helpers for testing with different embedding models
|
|
4
4
|
*/
|
|
5
|
-
export
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
readonly dimensions: 768;
|
|
13
|
-
readonly chunkSize: 400;
|
|
14
|
-
readonly batchSize: 8;
|
|
15
|
-
}];
|
|
5
|
+
export interface TestModel {
|
|
6
|
+
name: string;
|
|
7
|
+
dimensions: number;
|
|
8
|
+
chunkSize: number;
|
|
9
|
+
batchSize: number;
|
|
10
|
+
}
|
|
11
|
+
export declare const TEST_MODELS: TestModel[];
|
|
16
12
|
/**
|
|
17
13
|
* Retrieve model configuration by name
|
|
18
14
|
* @param modelName - The name of the model to retrieve
|
|
19
15
|
* @returns Model configuration object or undefined if not found
|
|
20
16
|
*/
|
|
21
|
-
export declare function getTestModel(modelName: string):
|
|
22
|
-
readonly name: "sentence-transformers/all-MiniLM-L6-v2";
|
|
23
|
-
readonly dimensions: 384;
|
|
24
|
-
readonly chunkSize: 250;
|
|
25
|
-
readonly batchSize: 16;
|
|
26
|
-
} | {
|
|
27
|
-
readonly name: "Xenova/all-mpnet-base-v2";
|
|
28
|
-
readonly dimensions: 768;
|
|
29
|
-
readonly chunkSize: 400;
|
|
30
|
-
readonly batchSize: 8;
|
|
31
|
-
} | undefined;
|
|
32
|
-
/**
|
|
33
|
-
* Type for test model configuration
|
|
34
|
-
*/
|
|
35
|
-
export type TestModel = typeof TEST_MODELS[number];
|
|
17
|
+
export declare function getTestModel(modelName: string): TestModel | undefined;
|
|
36
18
|
//# sourceMappingURL=test-utils.d.ts.map
|
package/dist/text/chunker.d.ts
CHANGED
package/dist/text/embedder.js
CHANGED
|
@@ -2,6 +2,7 @@ import '../dom-polyfills.js';
|
|
|
2
2
|
import { createHash } from 'crypto';
|
|
3
3
|
import { config } from '../core/config.js';
|
|
4
4
|
import { handleError, ErrorCategory, ErrorSeverity, safeExecute } from '../core/error-handler.js';
|
|
5
|
+
import { createModelLoadingError, createInvalidContentError, createMissingDependencyError } from '../core/actionable-error-messages.js';
|
|
5
6
|
/**
|
|
6
7
|
* List of supported embedding models
|
|
7
8
|
*/
|
|
@@ -22,8 +23,7 @@ export class EmbeddingEngine {
|
|
|
22
23
|
this.batchSize = batchSize || config.batch_size;
|
|
23
24
|
// Validate that the model is supported
|
|
24
25
|
if (!SUPPORTED_MODELS.includes(this.modelName)) {
|
|
25
|
-
throw
|
|
26
|
-
`Supported models: ${SUPPORTED_MODELS.join(', ')}`);
|
|
26
|
+
throw createModelLoadingError(this.modelName, `Model not in supported list. Supported models: ${SUPPORTED_MODELS.join(', ')}`, { operationContext: 'EmbeddingEngine constructor' });
|
|
27
27
|
}
|
|
28
28
|
console.log(`🤖 EmbeddingEngine initialized with model: ${this.modelName}, batchSize: ${this.batchSize}`);
|
|
29
29
|
}
|
|
@@ -88,7 +88,10 @@ export class EmbeddingEngine {
|
|
|
88
88
|
*/
|
|
89
89
|
async embedBatch(texts) {
|
|
90
90
|
if (!this.model) {
|
|
91
|
-
throw
|
|
91
|
+
throw createMissingDependencyError('model', 'object', {
|
|
92
|
+
operationContext: 'embedBatch',
|
|
93
|
+
includeTroubleshooting: true
|
|
94
|
+
});
|
|
92
95
|
}
|
|
93
96
|
if (texts.length === 0) {
|
|
94
97
|
return [];
|
|
@@ -123,7 +126,8 @@ export class EmbeddingEngine {
|
|
|
123
126
|
const vector = new Float32Array(embeddingData[i]);
|
|
124
127
|
results.push({
|
|
125
128
|
embedding_id,
|
|
126
|
-
vector
|
|
129
|
+
vector,
|
|
130
|
+
contentType: 'text'
|
|
127
131
|
});
|
|
128
132
|
}
|
|
129
133
|
return results;
|
|
@@ -173,7 +177,8 @@ export class EmbeddingEngine {
|
|
|
173
177
|
const vector = new Float32Array(embeddingData[0]);
|
|
174
178
|
return {
|
|
175
179
|
embedding_id,
|
|
176
|
-
vector
|
|
180
|
+
vector,
|
|
181
|
+
contentType: 'text'
|
|
177
182
|
};
|
|
178
183
|
}
|
|
179
184
|
catch (error) {
|
|
@@ -189,7 +194,9 @@ export class EmbeddingEngine {
|
|
|
189
194
|
async embedSingle(text) {
|
|
190
195
|
const results = await this.embedBatch([text]);
|
|
191
196
|
if (results.length === 0) {
|
|
192
|
-
throw
|
|
197
|
+
throw createInvalidContentError('text', 'empty', {
|
|
198
|
+
operationContext: 'embedText'
|
|
199
|
+
});
|
|
193
200
|
}
|
|
194
201
|
return results[0];
|
|
195
202
|
}
|
|
@@ -357,10 +364,10 @@ export function createTextEmbedFunction(modelName, batchSize) {
|
|
|
357
364
|
}
|
|
358
365
|
// Use the existing embedSingle method
|
|
359
366
|
const result = await engine.embedSingle(query);
|
|
360
|
-
//
|
|
367
|
+
// Ensure contentType is present (should already be included from embedSingle)
|
|
361
368
|
return {
|
|
362
369
|
...result,
|
|
363
|
-
contentType: 'text'
|
|
370
|
+
contentType: result.contentType || 'text'
|
|
364
371
|
};
|
|
365
372
|
};
|
|
366
373
|
return embedFunction;
|
package/dist/text/index.d.ts
CHANGED
|
@@ -3,5 +3,6 @@ export { CrossEncoderReranker, createTextRerankFunction, createTextReranker } fr
|
|
|
3
3
|
export { countTokens, getTokenizer, resetTokenizer } from './tokenizer.js';
|
|
4
4
|
export { chunkDocument, type Chunk, type Document } from '../core/chunker.js';
|
|
5
5
|
export { type ChunkConfig } from '../core/chunker.js';
|
|
6
|
+
export { SentenceTransformerEmbedder } from './sentence-transformer-embedder.js';
|
|
6
7
|
export * from './preprocessors/index.js';
|
|
7
8
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/text/index.js
CHANGED
|
@@ -3,6 +3,7 @@ export { EmbeddingEngine, getEmbeddingEngine, initializeEmbeddingEngine, createT
|
|
|
3
3
|
export { CrossEncoderReranker, createTextRerankFunction, createTextReranker } from './reranker.js';
|
|
4
4
|
export { countTokens, getTokenizer, resetTokenizer } from './tokenizer.js';
|
|
5
5
|
export { chunkDocument } from '../core/chunker.js';
|
|
6
|
+
export { SentenceTransformerEmbedder } from './sentence-transformer-embedder.js';
|
|
6
7
|
// Re-export preprocessors
|
|
7
8
|
export * from './preprocessors/index.js';
|
|
8
9
|
//# sourceMappingURL=index.js.map
|
package/dist/text/reranker.d.ts
CHANGED
|
@@ -8,13 +8,12 @@ export declare class CrossEncoderReranker {
|
|
|
8
8
|
private model;
|
|
9
9
|
private tokenizer;
|
|
10
10
|
private modelName;
|
|
11
|
-
private static readonly FALLBACK_MODELS;
|
|
12
11
|
/**
|
|
13
12
|
* Ensure DOM polyfills are set up for transformers.js
|
|
14
13
|
*/
|
|
15
14
|
private ensurePolyfills;
|
|
16
15
|
/**
|
|
17
|
-
* Load the embedding model
|
|
16
|
+
* Load the embedding model
|
|
18
17
|
*/
|
|
19
18
|
loadModel(): Promise<void>;
|
|
20
19
|
/**
|
package/dist/text/reranker.js
CHANGED
|
@@ -18,12 +18,6 @@ export class CrossEncoderReranker {
|
|
|
18
18
|
model = null; // Use any to avoid complex transformers.js typing issues
|
|
19
19
|
tokenizer = null;
|
|
20
20
|
modelName = 'Xenova/ms-marco-MiniLM-L-6-v2'; // Use working cross-encoder model
|
|
21
|
-
// Alternative models in case the primary fails
|
|
22
|
-
static FALLBACK_MODELS = [
|
|
23
|
-
'Xenova/ms-marco-MiniLM-L-6-v2', // Primary - proven to work in standalone test
|
|
24
|
-
'cross-encoder/ms-marco-MiniLM-L-6-v2', // Original (may have issues)
|
|
25
|
-
'cross-encoder/ms-marco-MiniLM-L-2-v2', // Smaller original (may have issues)
|
|
26
|
-
];
|
|
27
21
|
/**
|
|
28
22
|
* Ensure DOM polyfills are set up for transformers.js
|
|
29
23
|
*/
|
|
@@ -40,54 +34,30 @@ export class CrossEncoderReranker {
|
|
|
40
34
|
}
|
|
41
35
|
}
|
|
42
36
|
/**
|
|
43
|
-
* Load the embedding model
|
|
37
|
+
* Load the embedding model
|
|
44
38
|
*/
|
|
45
39
|
async loadModel() {
|
|
46
|
-
|
|
47
|
-
if (await this.tryLoadModel(this.modelName)) {
|
|
48
|
-
return;
|
|
49
|
-
}
|
|
50
|
-
// Try fallback models if primary fails
|
|
51
|
-
console.warn(`Primary model ${this.modelName} failed, trying fallbacks...`);
|
|
52
|
-
for (const fallbackModel of CrossEncoderReranker.FALLBACK_MODELS) {
|
|
53
|
-
if (fallbackModel === this.modelName)
|
|
54
|
-
continue; // Skip already tried model
|
|
55
|
-
console.warn(`Trying fallback model: ${fallbackModel}`);
|
|
56
|
-
if (await this.tryLoadModel(fallbackModel)) {
|
|
57
|
-
this.modelName = fallbackModel;
|
|
58
|
-
return;
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
console.warn('All embedding models failed to load. Reranking will be disabled.');
|
|
62
|
-
this.model = null;
|
|
63
|
-
this.tokenizer = null;
|
|
40
|
+
await this.tryLoadModel(this.modelName);
|
|
64
41
|
}
|
|
65
42
|
/**
|
|
66
43
|
* Try to load a specific model
|
|
67
44
|
*/
|
|
68
45
|
async tryLoadModel(modelName) {
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
console.log(`Cross-encoder model loaded successfully: ${modelName}`);
|
|
85
|
-
return true;
|
|
86
|
-
}
|
|
87
|
-
catch (error) {
|
|
88
|
-
console.warn(`Failed to load model ${modelName}: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
89
|
-
return false;
|
|
90
|
-
}
|
|
46
|
+
console.log(`Loading cross-encoder model: ${modelName}`);
|
|
47
|
+
// Ensure polyfills are set up exactly like the working standalone version
|
|
48
|
+
this.ensurePolyfills();
|
|
49
|
+
// Use the exact same approach as the working standalone test
|
|
50
|
+
const { AutoTokenizer, AutoModelForSequenceClassification } = await import('@huggingface/transformers');
|
|
51
|
+
console.log('Loading model...');
|
|
52
|
+
this.model = await AutoModelForSequenceClassification.from_pretrained(modelName, {
|
|
53
|
+
cache_dir: config.model_cache_path,
|
|
54
|
+
dtype: 'fp32'
|
|
55
|
+
});
|
|
56
|
+
console.log('Loading tokenizer...');
|
|
57
|
+
this.tokenizer = await AutoTokenizer.from_pretrained(modelName, {
|
|
58
|
+
cache_dir: config.model_cache_path
|
|
59
|
+
});
|
|
60
|
+
console.log(`Cross-encoder model loaded successfully: ${modelName}`);
|
|
91
61
|
}
|
|
92
62
|
/**
|
|
93
63
|
* Rerank search results using embedding similarity scoring
|