rag-lite-ts 1.0.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +606 -93
- package/dist/cli/indexer.js +192 -4
- package/dist/cli/search.js +50 -11
- package/dist/cli.js +183 -26
- package/dist/core/abstract-embedder.d.ts +125 -0
- package/dist/core/abstract-embedder.js +264 -0
- package/dist/core/actionable-error-messages.d.ts +60 -0
- package/dist/core/actionable-error-messages.js +397 -0
- package/dist/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/core/batch-processing-optimizer.js +541 -0
- package/dist/core/chunker.d.ts +2 -0
- package/dist/core/cli-database-utils.d.ts +53 -0
- package/dist/core/cli-database-utils.js +239 -0
- package/dist/core/config.js +10 -3
- package/dist/core/content-errors.d.ts +111 -0
- package/dist/core/content-errors.js +362 -0
- package/dist/core/content-manager.d.ts +343 -0
- package/dist/core/content-manager.js +1504 -0
- package/dist/core/content-performance-optimizer.d.ts +150 -0
- package/dist/core/content-performance-optimizer.js +516 -0
- package/dist/core/content-resolver.d.ts +104 -0
- package/dist/core/content-resolver.js +285 -0
- package/dist/core/cross-modal-search.d.ts +164 -0
- package/dist/core/cross-modal-search.js +342 -0
- package/dist/core/database-connection-manager.d.ts +109 -0
- package/dist/core/database-connection-manager.js +304 -0
- package/dist/core/db.d.ts +141 -2
- package/dist/core/db.js +631 -89
- package/dist/core/embedder-factory.d.ts +176 -0
- package/dist/core/embedder-factory.js +338 -0
- package/dist/core/index.d.ts +3 -1
- package/dist/core/index.js +4 -1
- package/dist/core/ingestion.d.ts +85 -15
- package/dist/core/ingestion.js +510 -45
- package/dist/core/lazy-dependency-loader.d.ts +152 -0
- package/dist/core/lazy-dependency-loader.js +453 -0
- package/dist/core/mode-detection-service.d.ts +150 -0
- package/dist/core/mode-detection-service.js +565 -0
- package/dist/core/mode-model-validator.d.ts +92 -0
- package/dist/core/mode-model-validator.js +203 -0
- package/dist/core/model-registry.d.ts +120 -0
- package/dist/core/model-registry.js +415 -0
- package/dist/core/model-validator.d.ts +217 -0
- package/dist/core/model-validator.js +782 -0
- package/dist/core/polymorphic-search-factory.d.ts +154 -0
- package/dist/core/polymorphic-search-factory.js +344 -0
- package/dist/core/raglite-paths.d.ts +121 -0
- package/dist/core/raglite-paths.js +145 -0
- package/dist/core/reranking-config.d.ts +42 -0
- package/dist/core/reranking-config.js +156 -0
- package/dist/core/reranking-factory.d.ts +92 -0
- package/dist/core/reranking-factory.js +591 -0
- package/dist/core/reranking-strategies.d.ts +325 -0
- package/dist/core/reranking-strategies.js +720 -0
- package/dist/core/resource-cleanup.d.ts +163 -0
- package/dist/core/resource-cleanup.js +371 -0
- package/dist/core/resource-manager.d.ts +212 -0
- package/dist/core/resource-manager.js +564 -0
- package/dist/core/search.d.ts +28 -1
- package/dist/core/search.js +83 -5
- package/dist/core/streaming-operations.d.ts +145 -0
- package/dist/core/streaming-operations.js +409 -0
- package/dist/core/types.d.ts +3 -0
- package/dist/core/universal-embedder.d.ts +177 -0
- package/dist/core/universal-embedder.js +139 -0
- package/dist/core/validation-messages.d.ts +99 -0
- package/dist/core/validation-messages.js +334 -0
- package/dist/core/vector-index.js +7 -8
- package/dist/factories/index.d.ts +1 -1
- package/dist/factories/text-factory.d.ts +128 -34
- package/dist/factories/text-factory.js +346 -97
- package/dist/file-processor.d.ts +88 -2
- package/dist/file-processor.js +720 -17
- package/dist/index.d.ts +9 -0
- package/dist/index.js +11 -0
- package/dist/ingestion.d.ts +16 -0
- package/dist/ingestion.js +21 -0
- package/dist/mcp-server.d.ts +35 -3
- package/dist/mcp-server.js +1107 -31
- package/dist/multimodal/clip-embedder.d.ts +314 -0
- package/dist/multimodal/clip-embedder.js +945 -0
- package/dist/multimodal/index.d.ts +6 -0
- package/dist/multimodal/index.js +6 -0
- package/dist/run-error-recovery-tests.d.ts +7 -0
- package/dist/run-error-recovery-tests.js +101 -0
- package/dist/search.d.ts +26 -0
- package/dist/search.js +54 -1
- package/dist/test-utils.d.ts +8 -26
- package/dist/text/chunker.d.ts +1 -0
- package/dist/text/embedder.js +15 -8
- package/dist/text/index.d.ts +1 -0
- package/dist/text/index.js +1 -0
- package/dist/text/reranker.d.ts +1 -2
- package/dist/text/reranker.js +17 -47
- package/dist/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/text/sentence-transformer-embedder.js +340 -0
- package/dist/types.d.ts +39 -0
- package/dist/utils/vector-math.d.ts +31 -0
- package/dist/utils/vector-math.js +70 -0
- package/package.json +15 -3
- package/dist/api-errors.d.ts.map +0 -1
- package/dist/api-errors.js.map +0 -1
- package/dist/cli/indexer.d.ts.map +0 -1
- package/dist/cli/indexer.js.map +0 -1
- package/dist/cli/search.d.ts.map +0 -1
- package/dist/cli/search.js.map +0 -1
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js.map +0 -1
- package/dist/core/adapters.d.ts.map +0 -1
- package/dist/core/adapters.js.map +0 -1
- package/dist/core/chunker.d.ts.map +0 -1
- package/dist/core/chunker.js.map +0 -1
- package/dist/core/config.d.ts.map +0 -1
- package/dist/core/config.js.map +0 -1
- package/dist/core/db.d.ts.map +0 -1
- package/dist/core/db.js.map +0 -1
- package/dist/core/error-handler.d.ts.map +0 -1
- package/dist/core/error-handler.js.map +0 -1
- package/dist/core/index.d.ts.map +0 -1
- package/dist/core/index.js.map +0 -1
- package/dist/core/ingestion.d.ts.map +0 -1
- package/dist/core/ingestion.js.map +0 -1
- package/dist/core/interfaces.d.ts.map +0 -1
- package/dist/core/interfaces.js.map +0 -1
- package/dist/core/path-manager.d.ts.map +0 -1
- package/dist/core/path-manager.js.map +0 -1
- package/dist/core/search-example.d.ts +0 -25
- package/dist/core/search-example.d.ts.map +0 -1
- package/dist/core/search-example.js +0 -138
- package/dist/core/search-example.js.map +0 -1
- package/dist/core/search-pipeline-example.d.ts +0 -21
- package/dist/core/search-pipeline-example.d.ts.map +0 -1
- package/dist/core/search-pipeline-example.js +0 -188
- package/dist/core/search-pipeline-example.js.map +0 -1
- package/dist/core/search-pipeline.d.ts.map +0 -1
- package/dist/core/search-pipeline.js.map +0 -1
- package/dist/core/search.d.ts.map +0 -1
- package/dist/core/search.js.map +0 -1
- package/dist/core/types.d.ts.map +0 -1
- package/dist/core/types.js.map +0 -1
- package/dist/core/vector-index.d.ts.map +0 -1
- package/dist/core/vector-index.js.map +0 -1
- package/dist/dom-polyfills.d.ts.map +0 -1
- package/dist/dom-polyfills.js.map +0 -1
- package/dist/examples/clean-api-examples.d.ts +0 -44
- package/dist/examples/clean-api-examples.d.ts.map +0 -1
- package/dist/examples/clean-api-examples.js +0 -206
- package/dist/examples/clean-api-examples.js.map +0 -1
- package/dist/factories/index.d.ts.map +0 -1
- package/dist/factories/index.js.map +0 -1
- package/dist/factories/text-factory.d.ts.map +0 -1
- package/dist/factories/text-factory.js.map +0 -1
- package/dist/file-processor.d.ts.map +0 -1
- package/dist/file-processor.js.map +0 -1
- package/dist/index-manager.d.ts.map +0 -1
- package/dist/index-manager.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/indexer.d.ts.map +0 -1
- package/dist/indexer.js.map +0 -1
- package/dist/ingestion.d.ts.map +0 -1
- package/dist/ingestion.js.map +0 -1
- package/dist/mcp-server.d.ts.map +0 -1
- package/dist/mcp-server.js.map +0 -1
- package/dist/preprocess.d.ts.map +0 -1
- package/dist/preprocess.js.map +0 -1
- package/dist/preprocessors/index.d.ts.map +0 -1
- package/dist/preprocessors/index.js.map +0 -1
- package/dist/preprocessors/mdx.d.ts.map +0 -1
- package/dist/preprocessors/mdx.js.map +0 -1
- package/dist/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/preprocessors/mermaid.js.map +0 -1
- package/dist/preprocessors/registry.d.ts.map +0 -1
- package/dist/preprocessors/registry.js.map +0 -1
- package/dist/search-standalone.d.ts.map +0 -1
- package/dist/search-standalone.js.map +0 -1
- package/dist/search.d.ts.map +0 -1
- package/dist/search.js.map +0 -1
- package/dist/test-utils.d.ts.map +0 -1
- package/dist/test-utils.js.map +0 -1
- package/dist/text/chunker.d.ts.map +0 -1
- package/dist/text/chunker.js.map +0 -1
- package/dist/text/embedder.d.ts.map +0 -1
- package/dist/text/embedder.js.map +0 -1
- package/dist/text/index.d.ts.map +0 -1
- package/dist/text/index.js.map +0 -1
- package/dist/text/preprocessors/index.d.ts.map +0 -1
- package/dist/text/preprocessors/index.js.map +0 -1
- package/dist/text/preprocessors/mdx.d.ts.map +0 -1
- package/dist/text/preprocessors/mdx.js.map +0 -1
- package/dist/text/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/text/preprocessors/mermaid.js.map +0 -1
- package/dist/text/preprocessors/registry.d.ts.map +0 -1
- package/dist/text/preprocessors/registry.js.map +0 -1
- package/dist/text/reranker.d.ts.map +0 -1
- package/dist/text/reranker.js.map +0 -1
- package/dist/text/tokenizer.d.ts.map +0 -1
- package/dist/text/tokenizer.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Test Runner for Chameleon Error Recovery and Reliability Tests
|
|
3
|
+
* Runs the comprehensive error recovery test suite
|
|
4
|
+
*/
|
|
5
|
+
import { spawn } from 'child_process';
|
|
6
|
+
import { fileURLToPath } from 'url';
|
|
7
|
+
import { dirname } from 'path';
|
|
8
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
9
|
+
const __dirname = dirname(__filename);
|
|
10
|
+
async function runTests() {
|
|
11
|
+
console.log('🧪 Running Chameleon Error Recovery and Reliability Tests...\n');
|
|
12
|
+
const testFiles = [
|
|
13
|
+
'chameleon-error-recovery.test.ts',
|
|
14
|
+
'chameleon-reliability-integration.test.ts',
|
|
15
|
+
'chameleon-stress-testing.test.ts',
|
|
16
|
+
'chameleon-error-simulation.test.ts'
|
|
17
|
+
];
|
|
18
|
+
let totalTests = 0;
|
|
19
|
+
let passedTests = 0;
|
|
20
|
+
let failedTests = 0;
|
|
21
|
+
for (const testFile of testFiles) {
|
|
22
|
+
console.log(`\n📋 Running ${testFile}...`);
|
|
23
|
+
try {
|
|
24
|
+
// Build the test file first
|
|
25
|
+
const buildProcess = spawn('npx', ['tsc', '--project', 'tsconfig.test.json'], {
|
|
26
|
+
stdio: 'pipe',
|
|
27
|
+
shell: true
|
|
28
|
+
});
|
|
29
|
+
await new Promise((resolve, reject) => {
|
|
30
|
+
buildProcess.on('close', (code) => {
|
|
31
|
+
if (code === 0) {
|
|
32
|
+
resolve(code);
|
|
33
|
+
}
|
|
34
|
+
else {
|
|
35
|
+
reject(new Error(`Build failed with code ${code}`));
|
|
36
|
+
}
|
|
37
|
+
});
|
|
38
|
+
});
|
|
39
|
+
// Run the compiled test
|
|
40
|
+
const testProcess = spawn('node', ['--test', `dist/${testFile.replace('.ts', '.js')}`], {
|
|
41
|
+
stdio: 'pipe',
|
|
42
|
+
shell: true
|
|
43
|
+
});
|
|
44
|
+
let output = '';
|
|
45
|
+
let errorOutput = '';
|
|
46
|
+
testProcess.stdout?.on('data', (data) => {
|
|
47
|
+
output += data.toString();
|
|
48
|
+
});
|
|
49
|
+
testProcess.stderr?.on('data', (data) => {
|
|
50
|
+
errorOutput += data.toString();
|
|
51
|
+
});
|
|
52
|
+
await new Promise((resolve) => {
|
|
53
|
+
testProcess.on('close', (code) => {
|
|
54
|
+
console.log(`Exit code: ${code}`);
|
|
55
|
+
if (output) {
|
|
56
|
+
console.log('Output:', output);
|
|
57
|
+
}
|
|
58
|
+
if (errorOutput) {
|
|
59
|
+
console.log('Errors:', errorOutput);
|
|
60
|
+
}
|
|
61
|
+
// Count tests (this is a simple approximation)
|
|
62
|
+
const testMatches = output.match(/✓|×/g);
|
|
63
|
+
const currentTests = testMatches ? testMatches.length : 0;
|
|
64
|
+
totalTests += currentTests;
|
|
65
|
+
if (code === 0) {
|
|
66
|
+
passedTests += currentTests;
|
|
67
|
+
console.log(`✅ ${testFile} completed successfully`);
|
|
68
|
+
}
|
|
69
|
+
else {
|
|
70
|
+
failedTests += currentTests;
|
|
71
|
+
console.log(`❌ ${testFile} failed`);
|
|
72
|
+
}
|
|
73
|
+
resolve(code);
|
|
74
|
+
});
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
catch (error) {
|
|
78
|
+
console.error(`❌ Failed to run ${testFile}:`, error instanceof Error ? error.message : String(error));
|
|
79
|
+
failedTests++;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
console.log('\n📊 Test Summary:');
|
|
83
|
+
console.log(`Total Tests: ${totalTests}`);
|
|
84
|
+
console.log(`Passed: ${passedTests}`);
|
|
85
|
+
console.log(`Failed: ${failedTests}`);
|
|
86
|
+
if (failedTests === 0) {
|
|
87
|
+
console.log('\n🎉 All error recovery tests completed!');
|
|
88
|
+
console.log('✅ System demonstrates robust error handling and recovery mechanisms');
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
console.log('\n⚠️ Some tests failed - this may be expected in test environments');
|
|
92
|
+
console.log('🔍 Review the output above for specific failure details');
|
|
93
|
+
}
|
|
94
|
+
return failedTests === 0;
|
|
95
|
+
}
|
|
96
|
+
// Run tests if this file is executed directly
|
|
97
|
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
98
|
+
runTests().catch(console.error);
|
|
99
|
+
}
|
|
100
|
+
export { runTests };
|
|
101
|
+
//# sourceMappingURL=run-error-recovery-tests.js.map
|
package/dist/search.d.ts
CHANGED
|
@@ -40,6 +40,32 @@ export declare class SearchEngine {
|
|
|
40
40
|
* Perform semantic search
|
|
41
41
|
*/
|
|
42
42
|
search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
|
|
43
|
+
/**
|
|
44
|
+
* Retrieve content by ID in the specified format
|
|
45
|
+
* @param contentId - Content ID to retrieve
|
|
46
|
+
* @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
|
|
47
|
+
* @returns Promise that resolves to content in requested format
|
|
48
|
+
*/
|
|
49
|
+
getContent(contentId: string, format?: 'file' | 'base64'): Promise<string>;
|
|
50
|
+
/**
|
|
51
|
+
* Retrieve multiple content items efficiently in batch
|
|
52
|
+
* @param contentIds - Array of content IDs to retrieve
|
|
53
|
+
* @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
|
|
54
|
+
* @returns Promise that resolves to array of content in requested format
|
|
55
|
+
*/
|
|
56
|
+
getContentBatch(contentIds: string[], format?: 'file' | 'base64'): Promise<string[]>;
|
|
57
|
+
/**
|
|
58
|
+
* Retrieve content metadata for result enhancement
|
|
59
|
+
* @param contentId - Content ID to get metadata for
|
|
60
|
+
* @returns Promise that resolves to content metadata
|
|
61
|
+
*/
|
|
62
|
+
getContentMetadata(contentId: string): Promise<import('./core/content-resolver.js').ContentMetadata>;
|
|
63
|
+
/**
|
|
64
|
+
* Verify that content exists and is accessible
|
|
65
|
+
* @param contentId - Content ID to verify
|
|
66
|
+
* @returns Promise that resolves to true if content exists, false otherwise
|
|
67
|
+
*/
|
|
68
|
+
verifyContentExists(contentId: string): Promise<boolean>;
|
|
43
69
|
/**
|
|
44
70
|
* Clean up resources
|
|
45
71
|
*/
|
package/dist/search.js
CHANGED
|
@@ -74,8 +74,11 @@ export class SearchEngine {
|
|
|
74
74
|
const db = await openDatabase(this.dbPath);
|
|
75
75
|
const indexManager = new IndexManager(this.indexPath, this.dbPath, modelDefaults.dimensions, this.options.embeddingModel);
|
|
76
76
|
await indexManager.initialize();
|
|
77
|
+
// Create ContentResolver for unified content system
|
|
78
|
+
const { ContentResolver } = await import('./core/content-resolver.js');
|
|
79
|
+
const contentResolver = new ContentResolver(db);
|
|
77
80
|
// Create core engine with dependency injection
|
|
78
|
-
this.coreEngine = new CoreSearchEngine(embedFn, indexManager, db, this.options.rerankFn);
|
|
81
|
+
this.coreEngine = new CoreSearchEngine(embedFn, indexManager, db, this.options.rerankFn, contentResolver);
|
|
79
82
|
}
|
|
80
83
|
else {
|
|
81
84
|
// Use factory for standard initialization
|
|
@@ -94,6 +97,56 @@ export class SearchEngine {
|
|
|
94
97
|
}
|
|
95
98
|
return this.coreEngine.search(query, options);
|
|
96
99
|
}
|
|
100
|
+
/**
|
|
101
|
+
* Retrieve content by ID in the specified format
|
|
102
|
+
* @param contentId - Content ID to retrieve
|
|
103
|
+
* @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
|
|
104
|
+
* @returns Promise that resolves to content in requested format
|
|
105
|
+
*/
|
|
106
|
+
async getContent(contentId, format = 'file') {
|
|
107
|
+
await this.initialize();
|
|
108
|
+
if (!this.coreEngine) {
|
|
109
|
+
throw new Error('SearchEngine failed to initialize');
|
|
110
|
+
}
|
|
111
|
+
return this.coreEngine.getContent(contentId, format);
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Retrieve multiple content items efficiently in batch
|
|
115
|
+
* @param contentIds - Array of content IDs to retrieve
|
|
116
|
+
* @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
|
|
117
|
+
* @returns Promise that resolves to array of content in requested format
|
|
118
|
+
*/
|
|
119
|
+
async getContentBatch(contentIds, format = 'file') {
|
|
120
|
+
await this.initialize();
|
|
121
|
+
if (!this.coreEngine) {
|
|
122
|
+
throw new Error('SearchEngine failed to initialize');
|
|
123
|
+
}
|
|
124
|
+
return this.coreEngine.getContentBatch(contentIds, format);
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Retrieve content metadata for result enhancement
|
|
128
|
+
* @param contentId - Content ID to get metadata for
|
|
129
|
+
* @returns Promise that resolves to content metadata
|
|
130
|
+
*/
|
|
131
|
+
async getContentMetadata(contentId) {
|
|
132
|
+
await this.initialize();
|
|
133
|
+
if (!this.coreEngine) {
|
|
134
|
+
throw new Error('SearchEngine failed to initialize');
|
|
135
|
+
}
|
|
136
|
+
return this.coreEngine.getContentMetadata(contentId);
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Verify that content exists and is accessible
|
|
140
|
+
* @param contentId - Content ID to verify
|
|
141
|
+
* @returns Promise that resolves to true if content exists, false otherwise
|
|
142
|
+
*/
|
|
143
|
+
async verifyContentExists(contentId) {
|
|
144
|
+
await this.initialize();
|
|
145
|
+
if (!this.coreEngine) {
|
|
146
|
+
throw new Error('SearchEngine failed to initialize');
|
|
147
|
+
}
|
|
148
|
+
return this.coreEngine.verifyContentExists(contentId);
|
|
149
|
+
}
|
|
97
150
|
/**
|
|
98
151
|
* Clean up resources
|
|
99
152
|
*/
|
package/dist/test-utils.d.ts
CHANGED
|
@@ -2,35 +2,17 @@
|
|
|
2
2
|
* Test utilities for multi-model support
|
|
3
3
|
* Provides common configurations and helpers for testing with different embedding models
|
|
4
4
|
*/
|
|
5
|
-
export
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
readonly dimensions: 768;
|
|
13
|
-
readonly chunkSize: 400;
|
|
14
|
-
readonly batchSize: 8;
|
|
15
|
-
}];
|
|
5
|
+
export interface TestModel {
|
|
6
|
+
name: string;
|
|
7
|
+
dimensions: number;
|
|
8
|
+
chunkSize: number;
|
|
9
|
+
batchSize: number;
|
|
10
|
+
}
|
|
11
|
+
export declare const TEST_MODELS: TestModel[];
|
|
16
12
|
/**
|
|
17
13
|
* Retrieve model configuration by name
|
|
18
14
|
* @param modelName - The name of the model to retrieve
|
|
19
15
|
* @returns Model configuration object or undefined if not found
|
|
20
16
|
*/
|
|
21
|
-
export declare function getTestModel(modelName: string):
|
|
22
|
-
readonly name: "sentence-transformers/all-MiniLM-L6-v2";
|
|
23
|
-
readonly dimensions: 384;
|
|
24
|
-
readonly chunkSize: 250;
|
|
25
|
-
readonly batchSize: 16;
|
|
26
|
-
} | {
|
|
27
|
-
readonly name: "Xenova/all-mpnet-base-v2";
|
|
28
|
-
readonly dimensions: 768;
|
|
29
|
-
readonly chunkSize: 400;
|
|
30
|
-
readonly batchSize: 8;
|
|
31
|
-
} | undefined;
|
|
32
|
-
/**
|
|
33
|
-
* Type for test model configuration
|
|
34
|
-
*/
|
|
35
|
-
export type TestModel = typeof TEST_MODELS[number];
|
|
17
|
+
export declare function getTestModel(modelName: string): TestModel | undefined;
|
|
36
18
|
//# sourceMappingURL=test-utils.d.ts.map
|
package/dist/text/chunker.d.ts
CHANGED
package/dist/text/embedder.js
CHANGED
|
@@ -2,6 +2,7 @@ import '../dom-polyfills.js';
|
|
|
2
2
|
import { createHash } from 'crypto';
|
|
3
3
|
import { config } from '../core/config.js';
|
|
4
4
|
import { handleError, ErrorCategory, ErrorSeverity, safeExecute } from '../core/error-handler.js';
|
|
5
|
+
import { createModelLoadingError, createInvalidContentError, createMissingDependencyError } from '../core/actionable-error-messages.js';
|
|
5
6
|
/**
|
|
6
7
|
* List of supported embedding models
|
|
7
8
|
*/
|
|
@@ -22,8 +23,7 @@ export class EmbeddingEngine {
|
|
|
22
23
|
this.batchSize = batchSize || config.batch_size;
|
|
23
24
|
// Validate that the model is supported
|
|
24
25
|
if (!SUPPORTED_MODELS.includes(this.modelName)) {
|
|
25
|
-
throw
|
|
26
|
-
`Supported models: ${SUPPORTED_MODELS.join(', ')}`);
|
|
26
|
+
throw createModelLoadingError(this.modelName, `Model not in supported list. Supported models: ${SUPPORTED_MODELS.join(', ')}`, { operationContext: 'EmbeddingEngine constructor' });
|
|
27
27
|
}
|
|
28
28
|
console.log(`🤖 EmbeddingEngine initialized with model: ${this.modelName}, batchSize: ${this.batchSize}`);
|
|
29
29
|
}
|
|
@@ -88,7 +88,10 @@ export class EmbeddingEngine {
|
|
|
88
88
|
*/
|
|
89
89
|
async embedBatch(texts) {
|
|
90
90
|
if (!this.model) {
|
|
91
|
-
throw
|
|
91
|
+
throw createMissingDependencyError('model', 'object', {
|
|
92
|
+
operationContext: 'embedBatch',
|
|
93
|
+
includeTroubleshooting: true
|
|
94
|
+
});
|
|
92
95
|
}
|
|
93
96
|
if (texts.length === 0) {
|
|
94
97
|
return [];
|
|
@@ -123,7 +126,8 @@ export class EmbeddingEngine {
|
|
|
123
126
|
const vector = new Float32Array(embeddingData[i]);
|
|
124
127
|
results.push({
|
|
125
128
|
embedding_id,
|
|
126
|
-
vector
|
|
129
|
+
vector,
|
|
130
|
+
contentType: 'text'
|
|
127
131
|
});
|
|
128
132
|
}
|
|
129
133
|
return results;
|
|
@@ -173,7 +177,8 @@ export class EmbeddingEngine {
|
|
|
173
177
|
const vector = new Float32Array(embeddingData[0]);
|
|
174
178
|
return {
|
|
175
179
|
embedding_id,
|
|
176
|
-
vector
|
|
180
|
+
vector,
|
|
181
|
+
contentType: 'text'
|
|
177
182
|
};
|
|
178
183
|
}
|
|
179
184
|
catch (error) {
|
|
@@ -189,7 +194,9 @@ export class EmbeddingEngine {
|
|
|
189
194
|
async embedSingle(text) {
|
|
190
195
|
const results = await this.embedBatch([text]);
|
|
191
196
|
if (results.length === 0) {
|
|
192
|
-
throw
|
|
197
|
+
throw createInvalidContentError('text', 'empty', {
|
|
198
|
+
operationContext: 'embedText'
|
|
199
|
+
});
|
|
193
200
|
}
|
|
194
201
|
return results[0];
|
|
195
202
|
}
|
|
@@ -357,10 +364,10 @@ export function createTextEmbedFunction(modelName, batchSize) {
|
|
|
357
364
|
}
|
|
358
365
|
// Use the existing embedSingle method
|
|
359
366
|
const result = await engine.embedSingle(query);
|
|
360
|
-
//
|
|
367
|
+
// Ensure contentType is present (should already be included from embedSingle)
|
|
361
368
|
return {
|
|
362
369
|
...result,
|
|
363
|
-
contentType: 'text'
|
|
370
|
+
contentType: result.contentType || 'text'
|
|
364
371
|
};
|
|
365
372
|
};
|
|
366
373
|
return embedFunction;
|
package/dist/text/index.d.ts
CHANGED
|
@@ -3,5 +3,6 @@ export { CrossEncoderReranker, createTextRerankFunction, createTextReranker } fr
|
|
|
3
3
|
export { countTokens, getTokenizer, resetTokenizer } from './tokenizer.js';
|
|
4
4
|
export { chunkDocument, type Chunk, type Document } from '../core/chunker.js';
|
|
5
5
|
export { type ChunkConfig } from '../core/chunker.js';
|
|
6
|
+
export { SentenceTransformerEmbedder } from './sentence-transformer-embedder.js';
|
|
6
7
|
export * from './preprocessors/index.js';
|
|
7
8
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/text/index.js
CHANGED
|
@@ -3,6 +3,7 @@ export { EmbeddingEngine, getEmbeddingEngine, initializeEmbeddingEngine, createT
|
|
|
3
3
|
export { CrossEncoderReranker, createTextRerankFunction, createTextReranker } from './reranker.js';
|
|
4
4
|
export { countTokens, getTokenizer, resetTokenizer } from './tokenizer.js';
|
|
5
5
|
export { chunkDocument } from '../core/chunker.js';
|
|
6
|
+
export { SentenceTransformerEmbedder } from './sentence-transformer-embedder.js';
|
|
6
7
|
// Re-export preprocessors
|
|
7
8
|
export * from './preprocessors/index.js';
|
|
8
9
|
//# sourceMappingURL=index.js.map
|
package/dist/text/reranker.d.ts
CHANGED
|
@@ -8,13 +8,12 @@ export declare class CrossEncoderReranker {
|
|
|
8
8
|
private model;
|
|
9
9
|
private tokenizer;
|
|
10
10
|
private modelName;
|
|
11
|
-
private static readonly FALLBACK_MODELS;
|
|
12
11
|
/**
|
|
13
12
|
* Ensure DOM polyfills are set up for transformers.js
|
|
14
13
|
*/
|
|
15
14
|
private ensurePolyfills;
|
|
16
15
|
/**
|
|
17
|
-
* Load the embedding model
|
|
16
|
+
* Load the embedding model
|
|
18
17
|
*/
|
|
19
18
|
loadModel(): Promise<void>;
|
|
20
19
|
/**
|
package/dist/text/reranker.js
CHANGED
|
@@ -18,12 +18,6 @@ export class CrossEncoderReranker {
|
|
|
18
18
|
model = null; // Use any to avoid complex transformers.js typing issues
|
|
19
19
|
tokenizer = null;
|
|
20
20
|
modelName = 'Xenova/ms-marco-MiniLM-L-6-v2'; // Use working cross-encoder model
|
|
21
|
-
// Alternative models in case the primary fails
|
|
22
|
-
static FALLBACK_MODELS = [
|
|
23
|
-
'Xenova/ms-marco-MiniLM-L-6-v2', // Primary - proven to work in standalone test
|
|
24
|
-
'cross-encoder/ms-marco-MiniLM-L-6-v2', // Original (may have issues)
|
|
25
|
-
'cross-encoder/ms-marco-MiniLM-L-2-v2', // Smaller original (may have issues)
|
|
26
|
-
];
|
|
27
21
|
/**
|
|
28
22
|
* Ensure DOM polyfills are set up for transformers.js
|
|
29
23
|
*/
|
|
@@ -40,54 +34,30 @@ export class CrossEncoderReranker {
|
|
|
40
34
|
}
|
|
41
35
|
}
|
|
42
36
|
/**
|
|
43
|
-
* Load the embedding model
|
|
37
|
+
* Load the embedding model
|
|
44
38
|
*/
|
|
45
39
|
async loadModel() {
|
|
46
|
-
|
|
47
|
-
if (await this.tryLoadModel(this.modelName)) {
|
|
48
|
-
return;
|
|
49
|
-
}
|
|
50
|
-
// Try fallback models if primary fails
|
|
51
|
-
console.warn(`Primary model ${this.modelName} failed, trying fallbacks...`);
|
|
52
|
-
for (const fallbackModel of CrossEncoderReranker.FALLBACK_MODELS) {
|
|
53
|
-
if (fallbackModel === this.modelName)
|
|
54
|
-
continue; // Skip already tried model
|
|
55
|
-
console.warn(`Trying fallback model: ${fallbackModel}`);
|
|
56
|
-
if (await this.tryLoadModel(fallbackModel)) {
|
|
57
|
-
this.modelName = fallbackModel;
|
|
58
|
-
return;
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
console.warn('All embedding models failed to load. Reranking will be disabled.');
|
|
62
|
-
this.model = null;
|
|
63
|
-
this.tokenizer = null;
|
|
40
|
+
await this.tryLoadModel(this.modelName);
|
|
64
41
|
}
|
|
65
42
|
/**
|
|
66
43
|
* Try to load a specific model
|
|
67
44
|
*/
|
|
68
45
|
async tryLoadModel(modelName) {
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
console.log(`Cross-encoder model loaded successfully: ${modelName}`);
|
|
85
|
-
return true;
|
|
86
|
-
}
|
|
87
|
-
catch (error) {
|
|
88
|
-
console.warn(`Failed to load model ${modelName}: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
89
|
-
return false;
|
|
90
|
-
}
|
|
46
|
+
console.log(`Loading cross-encoder model: ${modelName}`);
|
|
47
|
+
// Ensure polyfills are set up exactly like the working standalone version
|
|
48
|
+
this.ensurePolyfills();
|
|
49
|
+
// Use the exact same approach as the working standalone test
|
|
50
|
+
const { AutoTokenizer, AutoModelForSequenceClassification } = await import('@huggingface/transformers');
|
|
51
|
+
console.log('Loading model...');
|
|
52
|
+
this.model = await AutoModelForSequenceClassification.from_pretrained(modelName, {
|
|
53
|
+
cache_dir: config.model_cache_path,
|
|
54
|
+
dtype: 'fp32'
|
|
55
|
+
});
|
|
56
|
+
console.log('Loading tokenizer...');
|
|
57
|
+
this.tokenizer = await AutoTokenizer.from_pretrained(modelName, {
|
|
58
|
+
cache_dir: config.model_cache_path
|
|
59
|
+
});
|
|
60
|
+
console.log(`Cross-encoder model loaded successfully: ${modelName}`);
|
|
91
61
|
}
|
|
92
62
|
/**
|
|
93
63
|
* Rerank search results using embedding similarity scoring
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TEXT IMPLEMENTATION — Sentence Transformer Embedder Implementation
|
|
3
|
+
* Implements UniversalEmbedder interface for sentence-transformer models
|
|
4
|
+
* Adapts existing text embedding logic to the universal interface
|
|
5
|
+
*/
|
|
6
|
+
import '../dom-polyfills.js';
|
|
7
|
+
import { BaseUniversalEmbedder, type EmbedderOptions } from '../core/abstract-embedder.js';
|
|
8
|
+
import type { EmbeddingResult } from '../types.js';
|
|
9
|
+
/**
|
|
10
|
+
* Sentence transformer embedder implementation
|
|
11
|
+
* Supports sentence-transformers/all-MiniLM-L6-v2 and Xenova/all-mpnet-base-v2
|
|
12
|
+
* Ensures consistent EmbeddingResult format with contentType='text'
|
|
13
|
+
* Adapts existing EmbeddingEngine to UniversalEmbedder interface
|
|
14
|
+
*/
|
|
15
|
+
export declare class SentenceTransformerEmbedder extends BaseUniversalEmbedder {
|
|
16
|
+
private embeddingEngine;
|
|
17
|
+
private resourceManager;
|
|
18
|
+
private embedderResourceId?;
|
|
19
|
+
private engineResourceId?;
|
|
20
|
+
constructor(modelName: string, options?: EmbedderOptions);
|
|
21
|
+
/**
|
|
22
|
+
* Load the sentence transformer model using existing EmbeddingEngine
|
|
23
|
+
*/
|
|
24
|
+
loadModel(): Promise<void>;
|
|
25
|
+
/**
|
|
26
|
+
* Clean up model resources with comprehensive disposal
|
|
27
|
+
*/
|
|
28
|
+
cleanup(): Promise<void>;
|
|
29
|
+
/**
|
|
30
|
+
* Embed text using the existing EmbeddingEngine
|
|
31
|
+
*/
|
|
32
|
+
embedText(text: string): Promise<EmbeddingResult>;
|
|
33
|
+
/**
|
|
34
|
+
* Optimized batch processing using existing EmbeddingEngine and BatchProcessingOptimizer
|
|
35
|
+
* Overrides the base implementation for better performance with progress reporting
|
|
36
|
+
*/
|
|
37
|
+
protected processBatch(batch: Array<{
|
|
38
|
+
content: string;
|
|
39
|
+
contentType: string;
|
|
40
|
+
metadata?: Record<string, any>;
|
|
41
|
+
}>): Promise<EmbeddingResult[]>;
|
|
42
|
+
/**
|
|
43
|
+
* Get model-specific information
|
|
44
|
+
*/
|
|
45
|
+
getModelInfo(): {
|
|
46
|
+
capabilities: {
|
|
47
|
+
supportsSemanticSimilarity: boolean;
|
|
48
|
+
supportsTextClassification: boolean;
|
|
49
|
+
supportsTextClustering: boolean;
|
|
50
|
+
recommendedUseCase: string;
|
|
51
|
+
supportsText: boolean;
|
|
52
|
+
supportsImages: boolean;
|
|
53
|
+
supportsBatchProcessing: boolean;
|
|
54
|
+
supportsMetadata: boolean;
|
|
55
|
+
maxBatchSize?: number;
|
|
56
|
+
maxTextLength?: number;
|
|
57
|
+
supportedImageFormats?: readonly string[];
|
|
58
|
+
supportsMultimodal?: boolean;
|
|
59
|
+
supportsCrossModalSearch?: boolean;
|
|
60
|
+
unifiedEmbeddingSpace?: boolean;
|
|
61
|
+
reliableImplementation?: boolean;
|
|
62
|
+
};
|
|
63
|
+
name: string;
|
|
64
|
+
type: import("../core/universal-embedder.js").ModelType;
|
|
65
|
+
dimensions: number;
|
|
66
|
+
version: string;
|
|
67
|
+
supportedContentTypes: readonly string[];
|
|
68
|
+
requirements: import("../types.js").ModelRequirements;
|
|
69
|
+
};
|
|
70
|
+
/**
|
|
71
|
+
* Check if the model is suitable for a specific task
|
|
72
|
+
*/
|
|
73
|
+
isSuitableForTask(task: 'similarity' | 'classification' | 'clustering' | 'retrieval'): boolean;
|
|
74
|
+
/**
|
|
75
|
+
* Embed document batch using existing EmbeddingEngine's optimized method
|
|
76
|
+
* This method provides compatibility with the existing document ingestion pipeline
|
|
77
|
+
*/
|
|
78
|
+
embedDocumentBatch(chunks: string[]): Promise<EmbeddingResult[]>;
|
|
79
|
+
/**
|
|
80
|
+
* Get the model version from the underlying EmbeddingEngine
|
|
81
|
+
*/
|
|
82
|
+
getModelVersion(): string;
|
|
83
|
+
/**
|
|
84
|
+
* Get the batch size from the underlying EmbeddingEngine
|
|
85
|
+
*/
|
|
86
|
+
getBatchSize(): number;
|
|
87
|
+
/**
|
|
88
|
+
* Check if the underlying EmbeddingEngine is loaded
|
|
89
|
+
*/
|
|
90
|
+
isEngineLoaded(): boolean;
|
|
91
|
+
/**
|
|
92
|
+
* Override isLoaded to check both internal state and engine state
|
|
93
|
+
*/
|
|
94
|
+
isLoaded(): boolean;
|
|
95
|
+
}
|
|
96
|
+
//# sourceMappingURL=sentence-transformer-embedder.d.ts.map
|