rag-lite-ts 1.0.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +651 -109
- package/dist/cli/indexer.js +262 -46
- package/dist/cli/search.js +54 -32
- package/dist/cli.js +185 -28
- package/dist/config.d.ts +34 -73
- package/dist/config.js +50 -255
- package/dist/core/abstract-embedder.d.ts +125 -0
- package/dist/core/abstract-embedder.js +264 -0
- package/dist/core/actionable-error-messages.d.ts +60 -0
- package/dist/core/actionable-error-messages.js +397 -0
- package/dist/core/adapters.d.ts +93 -0
- package/dist/core/adapters.js +139 -0
- package/dist/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/core/batch-processing-optimizer.js +541 -0
- package/dist/core/chunker.d.ts +119 -0
- package/dist/core/chunker.js +73 -0
- package/dist/core/cli-database-utils.d.ts +53 -0
- package/dist/core/cli-database-utils.js +239 -0
- package/dist/core/config.d.ts +102 -0
- package/dist/core/config.js +247 -0
- package/dist/core/content-errors.d.ts +111 -0
- package/dist/core/content-errors.js +362 -0
- package/dist/core/content-manager.d.ts +343 -0
- package/dist/core/content-manager.js +1504 -0
- package/dist/core/content-performance-optimizer.d.ts +150 -0
- package/dist/core/content-performance-optimizer.js +516 -0
- package/dist/core/content-resolver.d.ts +104 -0
- package/dist/core/content-resolver.js +285 -0
- package/dist/core/cross-modal-search.d.ts +164 -0
- package/dist/core/cross-modal-search.js +342 -0
- package/dist/core/database-connection-manager.d.ts +109 -0
- package/dist/core/database-connection-manager.js +304 -0
- package/dist/core/db.d.ts +245 -0
- package/dist/core/db.js +952 -0
- package/dist/core/embedder-factory.d.ts +176 -0
- package/dist/core/embedder-factory.js +338 -0
- package/dist/{error-handler.d.ts → core/error-handler.d.ts} +23 -2
- package/dist/{error-handler.js → core/error-handler.js} +51 -8
- package/dist/core/index.d.ts +59 -0
- package/dist/core/index.js +69 -0
- package/dist/core/ingestion.d.ts +213 -0
- package/dist/core/ingestion.js +812 -0
- package/dist/core/interfaces.d.ts +408 -0
- package/dist/core/interfaces.js +106 -0
- package/dist/core/lazy-dependency-loader.d.ts +152 -0
- package/dist/core/lazy-dependency-loader.js +453 -0
- package/dist/core/mode-detection-service.d.ts +150 -0
- package/dist/core/mode-detection-service.js +565 -0
- package/dist/core/mode-model-validator.d.ts +92 -0
- package/dist/core/mode-model-validator.js +203 -0
- package/dist/core/model-registry.d.ts +120 -0
- package/dist/core/model-registry.js +415 -0
- package/dist/core/model-validator.d.ts +217 -0
- package/dist/core/model-validator.js +782 -0
- package/dist/{path-manager.d.ts → core/path-manager.d.ts} +5 -0
- package/dist/{path-manager.js → core/path-manager.js} +5 -0
- package/dist/core/polymorphic-search-factory.d.ts +154 -0
- package/dist/core/polymorphic-search-factory.js +344 -0
- package/dist/core/raglite-paths.d.ts +121 -0
- package/dist/core/raglite-paths.js +145 -0
- package/dist/core/reranking-config.d.ts +42 -0
- package/dist/core/reranking-config.js +156 -0
- package/dist/core/reranking-factory.d.ts +92 -0
- package/dist/core/reranking-factory.js +591 -0
- package/dist/core/reranking-strategies.d.ts +325 -0
- package/dist/core/reranking-strategies.js +720 -0
- package/dist/core/resource-cleanup.d.ts +163 -0
- package/dist/core/resource-cleanup.js +371 -0
- package/dist/core/resource-manager.d.ts +212 -0
- package/dist/core/resource-manager.js +564 -0
- package/dist/core/search-pipeline.d.ts +111 -0
- package/dist/core/search-pipeline.js +287 -0
- package/dist/core/search.d.ts +131 -0
- package/dist/core/search.js +296 -0
- package/dist/core/streaming-operations.d.ts +145 -0
- package/dist/core/streaming-operations.js +409 -0
- package/dist/core/types.d.ts +66 -0
- package/dist/core/types.js +6 -0
- package/dist/core/universal-embedder.d.ts +177 -0
- package/dist/core/universal-embedder.js +139 -0
- package/dist/core/validation-messages.d.ts +99 -0
- package/dist/core/validation-messages.js +334 -0
- package/dist/{vector-index.d.ts → core/vector-index.d.ts} +4 -0
- package/dist/{vector-index.js → core/vector-index.js} +21 -3
- package/dist/dom-polyfills.d.ts +6 -0
- package/dist/dom-polyfills.js +40 -0
- package/dist/factories/index.d.ts +43 -0
- package/dist/factories/index.js +44 -0
- package/dist/factories/text-factory.d.ts +560 -0
- package/dist/factories/text-factory.js +968 -0
- package/dist/file-processor.d.ts +90 -4
- package/dist/file-processor.js +723 -20
- package/dist/index-manager.d.ts +3 -2
- package/dist/index-manager.js +13 -11
- package/dist/index.d.ts +72 -8
- package/dist/index.js +102 -16
- package/dist/indexer.js +1 -1
- package/dist/ingestion.d.ts +44 -154
- package/dist/ingestion.js +75 -671
- package/dist/mcp-server.d.ts +35 -3
- package/dist/mcp-server.js +1186 -79
- package/dist/multimodal/clip-embedder.d.ts +314 -0
- package/dist/multimodal/clip-embedder.js +945 -0
- package/dist/multimodal/index.d.ts +6 -0
- package/dist/multimodal/index.js +6 -0
- package/dist/preprocess.js +1 -1
- package/dist/run-error-recovery-tests.d.ts +7 -0
- package/dist/run-error-recovery-tests.js +101 -0
- package/dist/search-standalone.js +1 -1
- package/dist/search.d.ts +51 -69
- package/dist/search.js +117 -412
- package/dist/test-utils.d.ts +8 -26
- package/dist/text/chunker.d.ts +33 -0
- package/dist/{chunker.js → text/chunker.js} +98 -75
- package/dist/{embedder.d.ts → text/embedder.d.ts} +22 -1
- package/dist/{embedder.js → text/embedder.js} +84 -10
- package/dist/text/index.d.ts +8 -0
- package/dist/text/index.js +9 -0
- package/dist/text/preprocessors/index.d.ts +17 -0
- package/dist/text/preprocessors/index.js +38 -0
- package/dist/text/preprocessors/mdx.d.ts +25 -0
- package/dist/text/preprocessors/mdx.js +101 -0
- package/dist/text/preprocessors/mermaid.d.ts +68 -0
- package/dist/text/preprocessors/mermaid.js +330 -0
- package/dist/text/preprocessors/registry.d.ts +56 -0
- package/dist/text/preprocessors/registry.js +180 -0
- package/dist/text/reranker.d.ts +59 -0
- package/dist/{reranker.js → text/reranker.js} +138 -53
- package/dist/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/text/sentence-transformer-embedder.js +340 -0
- package/dist/{tokenizer.d.ts → text/tokenizer.d.ts} +1 -0
- package/dist/{tokenizer.js → text/tokenizer.js} +7 -2
- package/dist/types.d.ts +40 -1
- package/dist/utils/vector-math.d.ts +31 -0
- package/dist/utils/vector-math.js +70 -0
- package/package.json +16 -4
- package/dist/api-errors.d.ts.map +0 -1
- package/dist/api-errors.js.map +0 -1
- package/dist/chunker.d.ts +0 -47
- package/dist/chunker.d.ts.map +0 -1
- package/dist/chunker.js.map +0 -1
- package/dist/cli/indexer.d.ts.map +0 -1
- package/dist/cli/indexer.js.map +0 -1
- package/dist/cli/search.d.ts.map +0 -1
- package/dist/cli/search.js.map +0 -1
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js.map +0 -1
- package/dist/db.d.ts +0 -90
- package/dist/db.d.ts.map +0 -1
- package/dist/db.js +0 -340
- package/dist/db.js.map +0 -1
- package/dist/embedder.d.ts.map +0 -1
- package/dist/embedder.js.map +0 -1
- package/dist/error-handler.d.ts.map +0 -1
- package/dist/error-handler.js.map +0 -1
- package/dist/file-processor.d.ts.map +0 -1
- package/dist/file-processor.js.map +0 -1
- package/dist/index-manager.d.ts.map +0 -1
- package/dist/index-manager.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/indexer.d.ts.map +0 -1
- package/dist/indexer.js.map +0 -1
- package/dist/ingestion.d.ts.map +0 -1
- package/dist/ingestion.js.map +0 -1
- package/dist/mcp-server.d.ts.map +0 -1
- package/dist/mcp-server.js.map +0 -1
- package/dist/path-manager.d.ts.map +0 -1
- package/dist/path-manager.js.map +0 -1
- package/dist/preprocess.d.ts.map +0 -1
- package/dist/preprocess.js.map +0 -1
- package/dist/preprocessors/index.d.ts.map +0 -1
- package/dist/preprocessors/index.js.map +0 -1
- package/dist/preprocessors/mdx.d.ts.map +0 -1
- package/dist/preprocessors/mdx.js.map +0 -1
- package/dist/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/preprocessors/mermaid.js.map +0 -1
- package/dist/preprocessors/registry.d.ts.map +0 -1
- package/dist/preprocessors/registry.js.map +0 -1
- package/dist/reranker.d.ts +0 -40
- package/dist/reranker.d.ts.map +0 -1
- package/dist/reranker.js.map +0 -1
- package/dist/resource-manager-demo.d.ts +0 -7
- package/dist/resource-manager-demo.d.ts.map +0 -1
- package/dist/resource-manager-demo.js +0 -52
- package/dist/resource-manager-demo.js.map +0 -1
- package/dist/resource-manager.d.ts +0 -129
- package/dist/resource-manager.d.ts.map +0 -1
- package/dist/resource-manager.js +0 -389
- package/dist/resource-manager.js.map +0 -1
- package/dist/search-standalone.d.ts.map +0 -1
- package/dist/search-standalone.js.map +0 -1
- package/dist/search.d.ts.map +0 -1
- package/dist/search.js.map +0 -1
- package/dist/test-utils.d.ts.map +0 -1
- package/dist/test-utils.js.map +0 -1
- package/dist/tokenizer.d.ts.map +0 -1
- package/dist/tokenizer.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
- package/dist/vector-index.d.ts.map +0 -1
- package/dist/vector-index.js.map +0 -1
package/dist/search.js
CHANGED
|
@@ -1,453 +1,158 @@
|
|
|
1
|
-
import { initializeEmbeddingEngine } from './embedder.js';
|
|
2
|
-
import { IndexManager } from './index-manager.js';
|
|
3
|
-
import { getChunksByEmbeddingIds, openDatabase, getStoredModelInfo } from './db.js';
|
|
4
|
-
import { CrossEncoderReranker } from './reranker.js';
|
|
5
|
-
import { config } from './config.js';
|
|
6
|
-
import { join, resolve } from 'path';
|
|
7
|
-
import { existsSync } from 'fs';
|
|
8
1
|
/**
|
|
9
|
-
*
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
*
|
|
23
|
-
*
|
|
24
|
-
*
|
|
25
|
-
*
|
|
26
|
-
*/
|
|
27
|
-
function resolveSearchPaths(indexPath, dbPath) {
|
|
28
|
-
const currentDir = process.cwd();
|
|
29
|
-
return {
|
|
30
|
-
indexPath: indexPath ? resolve(indexPath) : join(currentDir, 'vector-index.bin'),
|
|
31
|
-
dbPath: dbPath ? resolve(dbPath) : join(currentDir, 'db.sqlite')
|
|
32
|
-
};
|
|
33
|
-
}
|
|
34
|
-
/**
|
|
35
|
-
* Search engine that provides semantic search capabilities
|
|
36
|
-
* Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
|
|
37
|
-
* Supports concurrent read operations for multiple simultaneous queries
|
|
2
|
+
* Public API SearchEngine - Simple constructor interface with internal factory usage
|
|
3
|
+
*
|
|
4
|
+
* This class provides a clean, simple API while using the new core architecture
|
|
5
|
+
* internally. It handles dependency injection automatically.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* // Simple usage
|
|
10
|
+
* const search = new SearchEngine('./index.bin', './db.sqlite');
|
|
11
|
+
* const results = await search.search('query');
|
|
12
|
+
*
|
|
13
|
+
* // With options
|
|
14
|
+
* const search = new SearchEngine('./index.bin', './db.sqlite', {
|
|
15
|
+
* embeddingModel: 'all-MiniLM-L6-v2',
|
|
16
|
+
* enableReranking: true
|
|
17
|
+
* });
|
|
18
|
+
* ```
|
|
38
19
|
*/
|
|
20
|
+
import { SearchEngine as CoreSearchEngine } from './core/search.js';
|
|
21
|
+
import { TextSearchFactory } from './factories/index.js';
|
|
39
22
|
export class SearchEngine {
|
|
40
|
-
// Static properties for automatic resource management (Requirement 5.1, 5.2)
|
|
41
|
-
static instances = new Set();
|
|
42
|
-
static cleanupHandlersSet = false;
|
|
43
|
-
embedder = null;
|
|
44
|
-
indexManager = null;
|
|
45
|
-
dbConnection = null;
|
|
46
|
-
reranker = null;
|
|
47
|
-
isInitialized = false;
|
|
48
23
|
indexPath;
|
|
49
24
|
dbPath;
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
this.indexPath = pathConfig.indexPath;
|
|
68
|
-
this.dbPath = pathConfig.dbPath;
|
|
69
|
-
// Set up automatic cleanup on process exit (Requirement 5.5)
|
|
70
|
-
this.setupAutomaticCleanup();
|
|
71
|
-
}
|
|
72
|
-
/**
|
|
73
|
-
* Legacy constructor for backward compatibility
|
|
74
|
-
* @deprecated Use the simple constructor new SearchEngine(indexPath?, dbPath?) instead
|
|
75
|
-
*/
|
|
76
|
-
static createWithComponents(embedder, indexManager, dbConnection, enableReranking = false) {
|
|
77
|
-
const engine = new SearchEngine();
|
|
78
|
-
engine.embedder = embedder;
|
|
79
|
-
engine.indexManager = indexManager;
|
|
80
|
-
engine.dbConnection = dbConnection;
|
|
81
|
-
engine.enableReranking = enableReranking;
|
|
82
|
-
// Initialize reranker if enabled
|
|
83
|
-
if (enableReranking) {
|
|
84
|
-
engine.reranker = new CrossEncoderReranker();
|
|
25
|
+
options;
|
|
26
|
+
coreEngine = null;
|
|
27
|
+
initPromise = null;
|
|
28
|
+
constructor(indexPath, dbPath, options = {}) {
|
|
29
|
+
this.indexPath = indexPath;
|
|
30
|
+
this.dbPath = dbPath;
|
|
31
|
+
this.options = options;
|
|
32
|
+
// Validate required parameters
|
|
33
|
+
if (!indexPath || typeof indexPath !== 'string' || indexPath.trim() === '') {
|
|
34
|
+
throw new Error('Both indexPath and dbPath are required.\n' +
|
|
35
|
+
'Example: const search = new SearchEngine("./index.bin", "./db.sqlite");\n' +
|
|
36
|
+
'Or use: const search = await SearchFactory.create("./index.bin", "./db.sqlite");');
|
|
37
|
+
}
|
|
38
|
+
if (!dbPath || typeof dbPath !== 'string' || dbPath.trim() === '') {
|
|
39
|
+
throw new Error('Both indexPath and dbPath are required.\n' +
|
|
40
|
+
'Example: const search = new SearchEngine("./index.bin", "./db.sqlite");\n' +
|
|
41
|
+
'Or use: const search = await SearchFactory.create("./index.bin", "./db.sqlite");');
|
|
85
42
|
}
|
|
86
|
-
engine.isInitialized = true;
|
|
87
|
-
return engine;
|
|
88
43
|
}
|
|
89
44
|
/**
|
|
90
|
-
*
|
|
91
|
-
* Implements lazy initialization as required by Requirements 3.5, 4.3, 5.1, 5.2
|
|
45
|
+
* Initialize the search engine using the factory or direct injection
|
|
92
46
|
*/
|
|
93
|
-
async
|
|
94
|
-
if (this.
|
|
95
|
-
return;
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
console.log('Reading stored model information...');
|
|
111
|
-
const storedModelInfo = await getStoredModelInfo(this.dbConnection);
|
|
112
|
-
if (!storedModelInfo) {
|
|
113
|
-
throw this.createUserFriendlyError(new Error('No model information found in database'), 'missing_model_info');
|
|
114
|
-
}
|
|
115
|
-
// Initialize embedder with stored model info (Requirement 3.5)
|
|
116
|
-
console.log(`Loading embedding model: ${storedModelInfo.modelName}...`);
|
|
117
|
-
try {
|
|
118
|
-
this.embedder = await initializeEmbeddingEngine(storedModelInfo.modelName);
|
|
119
|
-
}
|
|
120
|
-
catch (error) {
|
|
121
|
-
throw this.createUserFriendlyError(error, 'model_loading');
|
|
122
|
-
}
|
|
123
|
-
// Initialize index manager with model compatibility validation
|
|
124
|
-
console.log('Initializing index manager...');
|
|
125
|
-
try {
|
|
126
|
-
this.indexManager = new IndexManager(this.indexPath, this.dbPath, storedModelInfo.dimensions, storedModelInfo.modelName);
|
|
127
|
-
await this.indexManager.initialize();
|
|
128
|
-
}
|
|
129
|
-
catch (error) {
|
|
130
|
-
// Check if this is a model compatibility issue
|
|
131
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
132
|
-
if (errorMessage.includes('mismatch') || errorMessage.includes('version') || errorMessage.includes('model')) {
|
|
133
|
-
throw this.createUserFriendlyError(error, 'model_compatibility');
|
|
134
|
-
}
|
|
135
|
-
throw error;
|
|
136
|
-
}
|
|
137
|
-
// Load reranker model if enabled
|
|
138
|
-
if (this.enableReranking) {
|
|
139
|
-
this.reranker = new CrossEncoderReranker();
|
|
140
|
-
console.log('Loading reranker model...');
|
|
141
|
-
try {
|
|
142
|
-
await this.reranker.loadModel();
|
|
47
|
+
async initialize() {
|
|
48
|
+
if (this.coreEngine) {
|
|
49
|
+
return; // Already initialized
|
|
50
|
+
}
|
|
51
|
+
if (this.initPromise) {
|
|
52
|
+
return this.initPromise; // Initialization in progress
|
|
53
|
+
}
|
|
54
|
+
this.initPromise = (async () => {
|
|
55
|
+
// If custom functions are provided, use direct dependency injection
|
|
56
|
+
if (this.options.embedFn || this.options.rerankFn) {
|
|
57
|
+
const { IndexManager } = await import('./index-manager.js');
|
|
58
|
+
const { openDatabase } = await import('./core/db.js');
|
|
59
|
+
const { createTextEmbedFunction } = await import('./text/embedder.js');
|
|
60
|
+
const { existsSync } = await import('fs');
|
|
61
|
+
// Validate files exist
|
|
62
|
+
if (!existsSync(this.indexPath)) {
|
|
63
|
+
throw new Error(`Vector index not found at: ${this.indexPath}`);
|
|
143
64
|
}
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
console.warn('Continuing with vector search only (reranking disabled)');
|
|
147
|
-
this.reranker = null; // Disable reranker for this session
|
|
65
|
+
if (!existsSync(this.dbPath)) {
|
|
66
|
+
throw new Error(`Database not found at: ${this.dbPath}`);
|
|
148
67
|
}
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
68
|
+
// Use custom embedFn or create default
|
|
69
|
+
const embedFn = this.options.embedFn || createTextEmbedFunction(this.options.embeddingModel);
|
|
70
|
+
// Get model defaults for dimensions
|
|
71
|
+
const { getModelDefaults, config } = await import('./core/config.js');
|
|
72
|
+
const modelDefaults = getModelDefaults(this.options.embeddingModel || config.embedding_model);
|
|
73
|
+
// Initialize dependencies
|
|
74
|
+
const db = await openDatabase(this.dbPath);
|
|
75
|
+
const indexManager = new IndexManager(this.indexPath, this.dbPath, modelDefaults.dimensions, this.options.embeddingModel);
|
|
76
|
+
await indexManager.initialize();
|
|
77
|
+
// Create ContentResolver for unified content system
|
|
78
|
+
const { ContentResolver } = await import('./core/content-resolver.js');
|
|
79
|
+
const contentResolver = new ContentResolver(db);
|
|
80
|
+
// Create core engine with dependency injection
|
|
81
|
+
this.coreEngine = new CoreSearchEngine(embedFn, indexManager, db, this.options.rerankFn, contentResolver);
|
|
158
82
|
}
|
|
159
83
|
else {
|
|
160
|
-
|
|
84
|
+
// Use factory for standard initialization
|
|
85
|
+
this.coreEngine = await TextSearchFactory.create(this.indexPath, this.dbPath, this.options);
|
|
161
86
|
}
|
|
162
|
-
}
|
|
87
|
+
})();
|
|
88
|
+
return this.initPromise;
|
|
163
89
|
}
|
|
164
90
|
/**
|
|
165
|
-
*
|
|
166
|
-
* Implements requirement 5.3: Clear, actionable error messages with specific next steps
|
|
91
|
+
* Perform semantic search
|
|
167
92
|
*/
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
return new SearchError(`Database file not found: ${this.dbPath}`, 'DATABASE_NOT_FOUND', [
|
|
173
|
-
'Run ingestion first to create the database: pipeline.ingestDirectory("./docs/")',
|
|
174
|
-
'Check that the database path is correct',
|
|
175
|
-
'Ensure the ingestion process completed successfully'
|
|
176
|
-
]);
|
|
93
|
+
async search(query, options) {
|
|
94
|
+
await this.initialize();
|
|
95
|
+
if (!this.coreEngine) {
|
|
96
|
+
throw new Error('SearchEngine failed to initialize');
|
|
177
97
|
}
|
|
178
|
-
|
|
179
|
-
return new SearchError(`Vector index file not found: ${this.indexPath}`, 'INDEX_NOT_FOUND', [
|
|
180
|
-
'Run ingestion first to create the index: pipeline.ingestDirectory("./docs/")',
|
|
181
|
-
'Check that the index path is correct',
|
|
182
|
-
'Ensure the ingestion process completed successfully'
|
|
183
|
-
]);
|
|
184
|
-
}
|
|
185
|
-
if (context === 'missing_model_info') {
|
|
186
|
-
return new SearchError('No embedding model information found in database. The database may be from an older version or corrupted.', 'MODEL_INFO_NOT_FOUND', [
|
|
187
|
-
'Run ingestion again to store model information: pipeline.ingestDirectory("./docs/")',
|
|
188
|
-
'If the problem persists, delete the database and index files and run ingestion from scratch',
|
|
189
|
-
'Check that the database was created with a compatible version of the library'
|
|
190
|
-
]);
|
|
191
|
-
}
|
|
192
|
-
if (context === 'model_loading') {
|
|
193
|
-
return new SearchError(`Failed to load embedding model: ${errorMessage}`, 'MODEL_LOADING_FAILED', [
|
|
194
|
-
'Check that the model name is correct and supported',
|
|
195
|
-
'Ensure you have internet connection for model download',
|
|
196
|
-
'Try running ingestion again with a supported model',
|
|
197
|
-
'Check the model configuration in your setup'
|
|
198
|
-
]);
|
|
199
|
-
}
|
|
200
|
-
if (context === 'model_compatibility' || (errorMessage.includes('model') && errorMessage.includes('mismatch'))) {
|
|
201
|
-
return new SearchError(`Model compatibility issue detected: ${errorMessage}`, 'MODEL_COMPATIBILITY', [
|
|
202
|
-
'The stored model information doesn\'t match the current configuration',
|
|
203
|
-
'Run pipeline.rebuildIndex() to rebuild with the current model',
|
|
204
|
-
'Or ensure you\'re using the same model that was used during ingestion',
|
|
205
|
-
'Check that the index and database files are from the same ingestion run'
|
|
206
|
-
]);
|
|
207
|
-
}
|
|
208
|
-
if (errorMessage.includes('ENOENT') || errorMessage.includes('no such file')) {
|
|
209
|
-
return new SearchError(`Required files not found: ${errorMessage}`, 'FILES_NOT_FOUND', [
|
|
210
|
-
'Run ingestion first to create the required files',
|
|
211
|
-
'Check that the file paths are correct',
|
|
212
|
-
'Ensure you have read permissions for the files'
|
|
213
|
-
]);
|
|
214
|
-
}
|
|
215
|
-
if (errorMessage.includes('EACCES') || errorMessage.includes('permission denied')) {
|
|
216
|
-
return new SearchError(`Permission denied: ${errorMessage}`, 'PERMISSION_DENIED', [
|
|
217
|
-
'Check that you have read permissions for the database and index files',
|
|
218
|
-
'Ensure the files are not locked by another process',
|
|
219
|
-
'Try running with appropriate permissions'
|
|
220
|
-
]);
|
|
221
|
-
}
|
|
222
|
-
if (errorMessage.includes('database') || errorMessage.includes('sqlite')) {
|
|
223
|
-
return new SearchError(`Database error: ${errorMessage}`, 'DATABASE_ERROR', [
|
|
224
|
-
'Check that the database file is not corrupted',
|
|
225
|
-
'Ensure no other processes are using the database',
|
|
226
|
-
'Try recreating the database by running ingestion again'
|
|
227
|
-
]);
|
|
228
|
-
}
|
|
229
|
-
// Generic error with basic suggestions
|
|
230
|
-
return new SearchError(`Search engine ${context} failed: ${errorMessage}`, 'GENERAL_ERROR', [
|
|
231
|
-
'Check the error message above for specific details',
|
|
232
|
-
'Ensure all required files exist and are accessible',
|
|
233
|
-
'Try running ingestion first if you haven\'t already',
|
|
234
|
-
'Contact support if the issue persists'
|
|
235
|
-
]);
|
|
98
|
+
return this.coreEngine.search(query, options);
|
|
236
99
|
}
|
|
237
100
|
/**
|
|
238
|
-
*
|
|
239
|
-
*
|
|
101
|
+
* Retrieve content by ID in the specified format
|
|
102
|
+
* @param contentId - Content ID to retrieve
|
|
103
|
+
* @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
|
|
104
|
+
* @returns Promise that resolves to content in requested format
|
|
240
105
|
*/
|
|
241
|
-
async
|
|
242
|
-
await this.
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
* Perform semantic search on the indexed documents (matches README API)
|
|
246
|
-
* Automatically initializes resources on first use (Requirements 4.1, 4.2, 4.4, 4.5)
|
|
247
|
-
* Supports concurrent read operations for multiple simultaneous queries
|
|
248
|
-
* @param query - Search query string
|
|
249
|
-
* @param options - Search options including top_k and rerank settings
|
|
250
|
-
* @returns Promise resolving to array of search results
|
|
251
|
-
*/
|
|
252
|
-
async search(query, options = {}) {
|
|
253
|
-
// Automatic initialization on first use (Requirement 4.1, 4.2)
|
|
254
|
-
await this.ensureInitialized();
|
|
255
|
-
if (!query || query.trim().length === 0) {
|
|
256
|
-
return [];
|
|
257
|
-
}
|
|
258
|
-
const startTime = performance.now();
|
|
259
|
-
const topK = options.top_k || config.top_k || 10;
|
|
260
|
-
const shouldRerank = options.rerank !== undefined ? options.rerank : config.rerank_enabled;
|
|
261
|
-
try {
|
|
262
|
-
// Ensure all components are initialized
|
|
263
|
-
if (!this.embedder || !this.indexManager || !this.dbConnection) {
|
|
264
|
-
throw new Error('Search engine components not properly initialized');
|
|
265
|
-
}
|
|
266
|
-
// Step 1: Build query embedding using same model as document chunks
|
|
267
|
-
const embeddingStartTime = performance.now();
|
|
268
|
-
const queryEmbedding = await this.embedder.embedSingle(query);
|
|
269
|
-
const embeddingTime = performance.now() - embeddingStartTime;
|
|
270
|
-
// Step 2: Search using IndexManager (which handles hash mapping properly)
|
|
271
|
-
const searchStartTime = performance.now();
|
|
272
|
-
let searchResult;
|
|
273
|
-
try {
|
|
274
|
-
searchResult = this.indexManager.search(queryEmbedding.vector, topK);
|
|
275
|
-
}
|
|
276
|
-
catch (error) {
|
|
277
|
-
if (error instanceof Error && error.message.includes('No embedding ID found for hash')) {
|
|
278
|
-
console.warn(`Hash mapping issue detected: ${error.message}`);
|
|
279
|
-
console.warn('This may indicate index/database synchronization issues. Consider running: raglite rebuild');
|
|
280
|
-
return [];
|
|
281
|
-
}
|
|
282
|
-
throw error;
|
|
283
|
-
}
|
|
284
|
-
const vectorSearchTime = performance.now() - searchStartTime;
|
|
285
|
-
if (searchResult.embeddingIds.length === 0) {
|
|
286
|
-
const totalTime = performance.now() - startTime;
|
|
287
|
-
console.log(`No similar documents found (${totalTime.toFixed(2)}ms total)`);
|
|
288
|
-
return [];
|
|
289
|
-
}
|
|
290
|
-
// Step 3: Retrieve chunks from database using embedding IDs
|
|
291
|
-
const retrievalStartTime = performance.now();
|
|
292
|
-
const chunks = await getChunksByEmbeddingIds(this.dbConnection, searchResult.embeddingIds);
|
|
293
|
-
const retrievalTime = performance.now() - retrievalStartTime;
|
|
294
|
-
// Step 4: Format results as JSON with text, score, and document metadata
|
|
295
|
-
let results = this.formatSearchResults(chunks, searchResult.distances, searchResult.embeddingIds);
|
|
296
|
-
// Step 5: Optional reranking with cross-encoder when enabled
|
|
297
|
-
let rerankTime = 0;
|
|
298
|
-
if (shouldRerank && this.reranker && this.reranker.isLoaded() && results.length > 1) {
|
|
299
|
-
try {
|
|
300
|
-
const rerankStartTime = performance.now();
|
|
301
|
-
results = await this.reranker.rerank(query, results);
|
|
302
|
-
rerankTime = performance.now() - rerankStartTime;
|
|
303
|
-
}
|
|
304
|
-
catch (error) {
|
|
305
|
-
// Fallback to vector search results and log the error
|
|
306
|
-
console.warn(`Reranking failed, using vector search results: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
307
|
-
}
|
|
308
|
-
}
|
|
309
|
-
const totalTime = performance.now() - startTime;
|
|
310
|
-
// Measure latency without premature optimization - just log for monitoring
|
|
311
|
-
console.log(`Search completed: ${results.length} results in ${totalTime.toFixed(2)}ms ` +
|
|
312
|
-
`(embed: ${embeddingTime.toFixed(2)}ms, vector: ${vectorSearchTime.toFixed(2)}ms, ` +
|
|
313
|
-
`retrieval: ${retrievalTime.toFixed(2)}ms${rerankTime > 0 ? `, rerank: ${rerankTime.toFixed(2)}ms` : ''})`);
|
|
314
|
-
return results;
|
|
315
|
-
}
|
|
316
|
-
catch (error) {
|
|
317
|
-
throw new Error(`Search failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
106
|
+
async getContent(contentId, format = 'file') {
|
|
107
|
+
await this.initialize();
|
|
108
|
+
if (!this.coreEngine) {
|
|
109
|
+
throw new Error('SearchEngine failed to initialize');
|
|
318
110
|
}
|
|
111
|
+
return this.coreEngine.getContent(contentId, format);
|
|
319
112
|
}
|
|
320
113
|
/**
|
|
321
|
-
*
|
|
322
|
-
* @param
|
|
323
|
-
* @param
|
|
324
|
-
* @
|
|
325
|
-
* @returns Formatted search results
|
|
114
|
+
* Retrieve multiple content items efficiently in batch
|
|
115
|
+
* @param contentIds - Array of content IDs to retrieve
|
|
116
|
+
* @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
|
|
117
|
+
* @returns Promise that resolves to array of content in requested format
|
|
326
118
|
*/
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
chunks.forEach(chunk => {
|
|
332
|
-
chunkMap.set(chunk.embedding_id, chunk);
|
|
333
|
-
});
|
|
334
|
-
// Build results in the order of search results
|
|
335
|
-
for (let i = 0; i < embeddingIds.length; i++) {
|
|
336
|
-
const embeddingId = embeddingIds[i];
|
|
337
|
-
const chunk = chunkMap.get(embeddingId);
|
|
338
|
-
if (chunk) {
|
|
339
|
-
// Convert cosine distance to similarity score (1 - distance)
|
|
340
|
-
// hnswlib-wasm returns cosine distance, we want similarity
|
|
341
|
-
const score = Math.max(0, 1 - distances[i]);
|
|
342
|
-
results.push({
|
|
343
|
-
text: chunk.text,
|
|
344
|
-
score: score,
|
|
345
|
-
document: {
|
|
346
|
-
id: chunk.document_id,
|
|
347
|
-
source: chunk.document_source,
|
|
348
|
-
title: chunk.document_title
|
|
349
|
-
}
|
|
350
|
-
});
|
|
351
|
-
}
|
|
119
|
+
async getContentBatch(contentIds, format = 'file') {
|
|
120
|
+
await this.initialize();
|
|
121
|
+
if (!this.coreEngine) {
|
|
122
|
+
throw new Error('SearchEngine failed to initialize');
|
|
352
123
|
}
|
|
353
|
-
return
|
|
124
|
+
return this.coreEngine.getContentBatch(contentIds, format);
|
|
354
125
|
}
|
|
355
126
|
/**
|
|
356
|
-
*
|
|
357
|
-
* @
|
|
127
|
+
* Retrieve content metadata for result enhancement
|
|
128
|
+
* @param contentId - Content ID to get metadata for
|
|
129
|
+
* @returns Promise that resolves to content metadata
|
|
358
130
|
*/
|
|
359
|
-
async
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
indexSize: 0,
|
|
364
|
-
rerankingEnabled: false,
|
|
365
|
-
isInitialized: false
|
|
366
|
-
};
|
|
131
|
+
async getContentMetadata(contentId) {
|
|
132
|
+
await this.initialize();
|
|
133
|
+
if (!this.coreEngine) {
|
|
134
|
+
throw new Error('SearchEngine failed to initialize');
|
|
367
135
|
}
|
|
368
|
-
|
|
369
|
-
return {
|
|
370
|
-
totalChunks: indexStats.totalVectors,
|
|
371
|
-
indexSize: indexStats.totalVectors,
|
|
372
|
-
rerankingEnabled: this.reranker !== null && this.reranker.isLoaded(),
|
|
373
|
-
isInitialized: this.isInitialized
|
|
374
|
-
};
|
|
136
|
+
return this.coreEngine.getContentMetadata(contentId);
|
|
375
137
|
}
|
|
376
138
|
/**
|
|
377
|
-
*
|
|
139
|
+
* Verify that content exists and is accessible
|
|
140
|
+
* @param contentId - Content ID to verify
|
|
141
|
+
* @returns Promise that resolves to true if content exists, false otherwise
|
|
378
142
|
*/
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
if (!SearchEngine.cleanupHandlersSet) {
|
|
384
|
-
SearchEngine.cleanupHandlersSet = true;
|
|
385
|
-
const cleanupAll = async () => {
|
|
386
|
-
const instances = Array.from(SearchEngine.instances);
|
|
387
|
-
await Promise.all(instances.map(instance => instance.cleanup()));
|
|
388
|
-
};
|
|
389
|
-
// Handle various exit scenarios
|
|
390
|
-
process.on('exit', () => {
|
|
391
|
-
// Synchronous cleanup for exit event
|
|
392
|
-
for (const instance of SearchEngine.instances) {
|
|
393
|
-
try {
|
|
394
|
-
if (instance.dbConnection) {
|
|
395
|
-
// Synchronous close for exit handler
|
|
396
|
-
instance.dbConnection = null;
|
|
397
|
-
}
|
|
398
|
-
if (instance.indexManager) {
|
|
399
|
-
instance.indexManager = null;
|
|
400
|
-
}
|
|
401
|
-
instance.embedder = null;
|
|
402
|
-
instance.reranker = null;
|
|
403
|
-
instance.isInitialized = false;
|
|
404
|
-
}
|
|
405
|
-
catch (error) {
|
|
406
|
-
// Silent cleanup on exit
|
|
407
|
-
}
|
|
408
|
-
}
|
|
409
|
-
});
|
|
410
|
-
process.on('SIGINT', async () => {
|
|
411
|
-
await cleanupAll();
|
|
412
|
-
process.exit(0);
|
|
413
|
-
});
|
|
414
|
-
process.on('SIGTERM', async () => {
|
|
415
|
-
await cleanupAll();
|
|
416
|
-
process.exit(0);
|
|
417
|
-
});
|
|
418
|
-
process.on('uncaughtException', async (error) => {
|
|
419
|
-
console.error('Uncaught exception:', error);
|
|
420
|
-
await cleanupAll();
|
|
421
|
-
process.exit(1);
|
|
422
|
-
});
|
|
423
|
-
process.on('unhandledRejection', async (reason) => {
|
|
424
|
-
console.error('Unhandled rejection:', reason);
|
|
425
|
-
await cleanupAll();
|
|
426
|
-
process.exit(1);
|
|
427
|
-
});
|
|
143
|
+
async verifyContentExists(contentId) {
|
|
144
|
+
await this.initialize();
|
|
145
|
+
if (!this.coreEngine) {
|
|
146
|
+
throw new Error('SearchEngine failed to initialize');
|
|
428
147
|
}
|
|
148
|
+
return this.coreEngine.verifyContentExists(contentId);
|
|
429
149
|
}
|
|
430
150
|
/**
|
|
431
|
-
* Clean up resources
|
|
151
|
+
* Clean up resources
|
|
432
152
|
*/
|
|
433
153
|
async cleanup() {
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
await this.dbConnection.close();
|
|
437
|
-
this.dbConnection = null;
|
|
438
|
-
}
|
|
439
|
-
if (this.indexManager) {
|
|
440
|
-
await this.indexManager.close();
|
|
441
|
-
this.indexManager = null;
|
|
442
|
-
}
|
|
443
|
-
this.embedder = null;
|
|
444
|
-
this.reranker = null;
|
|
445
|
-
this.isInitialized = false;
|
|
446
|
-
// Remove from instances set
|
|
447
|
-
SearchEngine.instances.delete(this);
|
|
448
|
-
}
|
|
449
|
-
catch (error) {
|
|
450
|
-
console.error('Error during SearchEngine cleanup:', error instanceof Error ? error.message : String(error));
|
|
154
|
+
if (this.coreEngine) {
|
|
155
|
+
await this.coreEngine.cleanup();
|
|
451
156
|
}
|
|
452
157
|
}
|
|
453
158
|
}
|
package/dist/test-utils.d.ts
CHANGED
|
@@ -2,35 +2,17 @@
|
|
|
2
2
|
* Test utilities for multi-model support
|
|
3
3
|
* Provides common configurations and helpers for testing with different embedding models
|
|
4
4
|
*/
|
|
5
|
-
export
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
readonly dimensions: 768;
|
|
13
|
-
readonly chunkSize: 400;
|
|
14
|
-
readonly batchSize: 8;
|
|
15
|
-
}];
|
|
5
|
+
export interface TestModel {
|
|
6
|
+
name: string;
|
|
7
|
+
dimensions: number;
|
|
8
|
+
chunkSize: number;
|
|
9
|
+
batchSize: number;
|
|
10
|
+
}
|
|
11
|
+
export declare const TEST_MODELS: TestModel[];
|
|
16
12
|
/**
|
|
17
13
|
* Retrieve model configuration by name
|
|
18
14
|
* @param modelName - The name of the model to retrieve
|
|
19
15
|
* @returns Model configuration object or undefined if not found
|
|
20
16
|
*/
|
|
21
|
-
export declare function getTestModel(modelName: string):
|
|
22
|
-
readonly name: "sentence-transformers/all-MiniLM-L6-v2";
|
|
23
|
-
readonly dimensions: 384;
|
|
24
|
-
readonly chunkSize: 250;
|
|
25
|
-
readonly batchSize: 16;
|
|
26
|
-
} | {
|
|
27
|
-
readonly name: "Xenova/all-mpnet-base-v2";
|
|
28
|
-
readonly dimensions: 768;
|
|
29
|
-
readonly chunkSize: 400;
|
|
30
|
-
readonly batchSize: 8;
|
|
31
|
-
} | undefined;
|
|
32
|
-
/**
|
|
33
|
-
* Type for test model configuration
|
|
34
|
-
*/
|
|
35
|
-
export type TestModel = typeof TEST_MODELS[number];
|
|
17
|
+
export declare function getTestModel(modelName: string): TestModel | undefined;
|
|
36
18
|
//# sourceMappingURL=test-utils.d.ts.map
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Text-specific chunking implementation
|
|
3
|
+
* Implements the ChunkingStrategy interface for text content
|
|
4
|
+
*/
|
|
5
|
+
import '../dom-polyfills.js';
|
|
6
|
+
import { ChunkingStrategy, GenericDocument, GenericChunk, ChunkConfig } from '../core/chunker.js';
|
|
7
|
+
/**
|
|
8
|
+
* Document interface for text chunking
|
|
9
|
+
*/
|
|
10
|
+
export interface Document {
|
|
11
|
+
source: string;
|
|
12
|
+
title: string;
|
|
13
|
+
content: string;
|
|
14
|
+
metadata?: Record<string, any>;
|
|
15
|
+
}
|
|
16
|
+
export interface Chunk {
|
|
17
|
+
text: string;
|
|
18
|
+
chunkIndex: number;
|
|
19
|
+
tokenCount: number;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Text chunking strategy implementation
|
|
23
|
+
*/
|
|
24
|
+
export declare class TextChunkingStrategy implements ChunkingStrategy {
|
|
25
|
+
appliesTo(contentType: string): boolean;
|
|
26
|
+
chunk(document: GenericDocument, config: ChunkConfig): Promise<GenericChunk[]>;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Text document chunking function
|
|
30
|
+
* Converts between text-specific and generic interfaces
|
|
31
|
+
*/
|
|
32
|
+
export declare function chunkDocument(document: Document, config?: ChunkConfig): Promise<Chunk[]>;
|
|
33
|
+
//# sourceMappingURL=chunker.d.ts.map
|