rag-lite-ts 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +240 -0
- package/dist/api-errors.d.ts +90 -0
- package/dist/api-errors.d.ts.map +1 -0
- package/dist/api-errors.js +320 -0
- package/dist/api-errors.js.map +1 -0
- package/dist/chunker.d.ts +47 -0
- package/dist/chunker.d.ts.map +1 -0
- package/dist/chunker.js +256 -0
- package/dist/chunker.js.map +1 -0
- package/dist/cli/indexer.d.ts +11 -0
- package/dist/cli/indexer.d.ts.map +1 -0
- package/dist/cli/indexer.js +272 -0
- package/dist/cli/indexer.js.map +1 -0
- package/dist/cli/search.d.ts +7 -0
- package/dist/cli/search.d.ts.map +1 -0
- package/dist/cli/search.js +206 -0
- package/dist/cli/search.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +362 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +90 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +281 -0
- package/dist/config.js.map +1 -0
- package/dist/db.d.ts +90 -0
- package/dist/db.d.ts.map +1 -0
- package/dist/db.js +340 -0
- package/dist/db.js.map +1 -0
- package/dist/embedder.d.ts +101 -0
- package/dist/embedder.d.ts.map +1 -0
- package/dist/embedder.js +323 -0
- package/dist/embedder.js.map +1 -0
- package/dist/error-handler.d.ts +91 -0
- package/dist/error-handler.d.ts.map +1 -0
- package/dist/error-handler.js +196 -0
- package/dist/error-handler.js.map +1 -0
- package/dist/file-processor.d.ts +59 -0
- package/dist/file-processor.d.ts.map +1 -0
- package/dist/file-processor.js +312 -0
- package/dist/file-processor.js.map +1 -0
- package/dist/index-manager.d.ts +99 -0
- package/dist/index-manager.d.ts.map +1 -0
- package/dist/index-manager.js +444 -0
- package/dist/index-manager.js.map +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +21 -0
- package/dist/index.js.map +1 -0
- package/dist/indexer.d.ts +7 -0
- package/dist/indexer.d.ts.map +1 -0
- package/dist/indexer.js +51 -0
- package/dist/indexer.js.map +1 -0
- package/dist/ingestion.d.ts +175 -0
- package/dist/ingestion.d.ts.map +1 -0
- package/dist/ingestion.js +705 -0
- package/dist/ingestion.js.map +1 -0
- package/dist/mcp-server.d.ts +14 -0
- package/dist/mcp-server.d.ts.map +1 -0
- package/dist/mcp-server.js +680 -0
- package/dist/mcp-server.js.map +1 -0
- package/dist/path-manager.d.ts +42 -0
- package/dist/path-manager.d.ts.map +1 -0
- package/dist/path-manager.js +66 -0
- package/dist/path-manager.js.map +1 -0
- package/dist/preprocess.d.ts +19 -0
- package/dist/preprocess.d.ts.map +1 -0
- package/dist/preprocess.js +203 -0
- package/dist/preprocess.js.map +1 -0
- package/dist/preprocessors/index.d.ts +17 -0
- package/dist/preprocessors/index.d.ts.map +1 -0
- package/dist/preprocessors/index.js +38 -0
- package/dist/preprocessors/index.js.map +1 -0
- package/dist/preprocessors/mdx.d.ts +25 -0
- package/dist/preprocessors/mdx.d.ts.map +1 -0
- package/dist/preprocessors/mdx.js +101 -0
- package/dist/preprocessors/mdx.js.map +1 -0
- package/dist/preprocessors/mermaid.d.ts +68 -0
- package/dist/preprocessors/mermaid.d.ts.map +1 -0
- package/dist/preprocessors/mermaid.js +329 -0
- package/dist/preprocessors/mermaid.js.map +1 -0
- package/dist/preprocessors/registry.d.ts +56 -0
- package/dist/preprocessors/registry.d.ts.map +1 -0
- package/dist/preprocessors/registry.js +179 -0
- package/dist/preprocessors/registry.js.map +1 -0
- package/dist/reranker.d.ts +40 -0
- package/dist/reranker.d.ts.map +1 -0
- package/dist/reranker.js +212 -0
- package/dist/reranker.js.map +1 -0
- package/dist/resource-manager-demo.d.ts +7 -0
- package/dist/resource-manager-demo.d.ts.map +1 -0
- package/dist/resource-manager-demo.js +52 -0
- package/dist/resource-manager-demo.js.map +1 -0
- package/dist/resource-manager.d.ts +129 -0
- package/dist/resource-manager.d.ts.map +1 -0
- package/dist/resource-manager.js +389 -0
- package/dist/resource-manager.js.map +1 -0
- package/dist/search-standalone.d.ts +7 -0
- package/dist/search-standalone.d.ts.map +1 -0
- package/dist/search-standalone.js +117 -0
- package/dist/search-standalone.js.map +1 -0
- package/dist/search.d.ts +92 -0
- package/dist/search.d.ts.map +1 -0
- package/dist/search.js +454 -0
- package/dist/search.js.map +1 -0
- package/dist/test-utils.d.ts +36 -0
- package/dist/test-utils.d.ts.map +1 -0
- package/dist/test-utils.js +27 -0
- package/dist/test-utils.js.map +1 -0
- package/dist/tokenizer.d.ts +21 -0
- package/dist/tokenizer.d.ts.map +1 -0
- package/dist/tokenizer.js +59 -0
- package/dist/tokenizer.js.map +1 -0
- package/dist/types.d.ts +44 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/dist/vector-index.d.ts +64 -0
- package/dist/vector-index.d.ts.map +1 -0
- package/dist/vector-index.js +308 -0
- package/dist/vector-index.js.map +1 -0
- package/package.json +80 -0
package/dist/search.js
ADDED
|
@@ -0,0 +1,454 @@
|
|
|
1
|
+
import { initializeEmbeddingEngine } from './embedder.js';
|
|
2
|
+
import { IndexManager } from './index-manager.js';
|
|
3
|
+
import { getChunksByEmbeddingIds, openDatabase, getStoredModelInfo } from './db.js';
|
|
4
|
+
import { CrossEncoderReranker } from './reranker.js';
|
|
5
|
+
import { config } from './config.js';
|
|
6
|
+
import { join, resolve } from 'path';
|
|
7
|
+
import { existsSync } from 'fs';
|
|
8
|
+
/**
|
|
9
|
+
* User-friendly error class with actionable suggestions
|
|
10
|
+
*/
|
|
11
|
+
export class SearchError extends Error {
|
|
12
|
+
code;
|
|
13
|
+
suggestions;
|
|
14
|
+
constructor(message, code, suggestions) {
|
|
15
|
+
super(message);
|
|
16
|
+
this.code = code;
|
|
17
|
+
this.suggestions = suggestions;
|
|
18
|
+
this.name = 'SearchError';
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Resolves paths for the search engine based on provided paths or defaults
|
|
23
|
+
* @param indexPath - Path to vector index file (optional)
|
|
24
|
+
* @param dbPath - Path to database file (optional)
|
|
25
|
+
* @returns Resolved paths for index and database files
|
|
26
|
+
*/
|
|
27
|
+
function resolveSearchPaths(indexPath, dbPath) {
|
|
28
|
+
const currentDir = process.cwd();
|
|
29
|
+
return {
|
|
30
|
+
indexPath: indexPath ? resolve(indexPath) : join(currentDir, 'vector-index.bin'),
|
|
31
|
+
dbPath: dbPath ? resolve(dbPath) : join(currentDir, 'db.sqlite')
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Search engine that provides semantic search capabilities
|
|
36
|
+
* Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
|
|
37
|
+
* Supports concurrent read operations for multiple simultaneous queries
|
|
38
|
+
*/
|
|
39
|
+
export class SearchEngine {
|
|
40
|
+
// Static properties for automatic resource management (Requirement 5.1, 5.2)
|
|
41
|
+
static instances = new Set();
|
|
42
|
+
static cleanupHandlersSet = false;
|
|
43
|
+
embedder = null;
|
|
44
|
+
indexManager = null;
|
|
45
|
+
dbConnection = null;
|
|
46
|
+
reranker = null;
|
|
47
|
+
isInitialized = false;
|
|
48
|
+
indexPath;
|
|
49
|
+
dbPath;
|
|
50
|
+
enableReranking = false;
|
|
51
|
+
/**
|
|
52
|
+
* Creates a new SearchEngine with simplified constructor
|
|
53
|
+
* Search engine is ready to use immediately without requiring initialization calls (Requirement 3.5)
|
|
54
|
+
* @param indexPath - Path to vector index file (defaults to './vector-index.bin')
|
|
55
|
+
* @param dbPath - Path to database file (defaults to './db.sqlite')
|
|
56
|
+
*/
|
|
57
|
+
constructor(indexPath, dbPath) {
|
|
58
|
+
// Validate parameters
|
|
59
|
+
if (indexPath !== undefined && (typeof indexPath !== 'string' || indexPath.trim() === '')) {
|
|
60
|
+
throw new Error('indexPath must be a non-empty string when provided');
|
|
61
|
+
}
|
|
62
|
+
if (dbPath !== undefined && (typeof dbPath !== 'string' || dbPath.trim() === '')) {
|
|
63
|
+
throw new Error('dbPath must be a non-empty string when provided');
|
|
64
|
+
}
|
|
65
|
+
// Resolve paths automatically
|
|
66
|
+
const pathConfig = resolveSearchPaths(indexPath, dbPath);
|
|
67
|
+
this.indexPath = pathConfig.indexPath;
|
|
68
|
+
this.dbPath = pathConfig.dbPath;
|
|
69
|
+
// Set up automatic cleanup on process exit (Requirement 5.5)
|
|
70
|
+
this.setupAutomaticCleanup();
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Legacy constructor for backward compatibility
|
|
74
|
+
* @deprecated Use the simple constructor new SearchEngine(indexPath?, dbPath?) instead
|
|
75
|
+
*/
|
|
76
|
+
static createWithComponents(embedder, indexManager, dbConnection, enableReranking = false) {
|
|
77
|
+
const engine = new SearchEngine();
|
|
78
|
+
engine.embedder = embedder;
|
|
79
|
+
engine.indexManager = indexManager;
|
|
80
|
+
engine.dbConnection = dbConnection;
|
|
81
|
+
engine.enableReranking = enableReranking;
|
|
82
|
+
// Initialize reranker if enabled
|
|
83
|
+
if (enableReranking) {
|
|
84
|
+
engine.reranker = new CrossEncoderReranker();
|
|
85
|
+
}
|
|
86
|
+
engine.isInitialized = true;
|
|
87
|
+
return engine;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Automatically initialize resources on first use with user-friendly error handling
|
|
91
|
+
* Implements lazy initialization as required by Requirements 3.5, 4.3, 5.1, 5.2
|
|
92
|
+
*/
|
|
93
|
+
async ensureInitialized() {
|
|
94
|
+
if (this.isInitialized) {
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
// Check if required files exist first (before any initialization attempts)
|
|
98
|
+
if (!existsSync(this.dbPath)) {
|
|
99
|
+
throw this.createUserFriendlyError(new Error(`Database file not found: ${this.dbPath}`), 'missing_database');
|
|
100
|
+
}
|
|
101
|
+
if (!existsSync(this.indexPath)) {
|
|
102
|
+
throw this.createUserFriendlyError(new Error(`Vector index file not found: ${this.indexPath}`), 'missing_index');
|
|
103
|
+
}
|
|
104
|
+
try {
|
|
105
|
+
console.log('Initializing search engine...');
|
|
106
|
+
// Initialize database connection
|
|
107
|
+
console.log('Opening database connection...');
|
|
108
|
+
this.dbConnection = await openDatabase(this.dbPath);
|
|
109
|
+
// Read stored model info from database (Requirement 4.3)
|
|
110
|
+
console.log('Reading stored model information...');
|
|
111
|
+
const storedModelInfo = await getStoredModelInfo(this.dbConnection);
|
|
112
|
+
if (!storedModelInfo) {
|
|
113
|
+
throw this.createUserFriendlyError(new Error('No model information found in database'), 'missing_model_info');
|
|
114
|
+
}
|
|
115
|
+
// Initialize embedder with stored model info (Requirement 3.5)
|
|
116
|
+
console.log(`Loading embedding model: ${storedModelInfo.modelName}...`);
|
|
117
|
+
try {
|
|
118
|
+
this.embedder = await initializeEmbeddingEngine(storedModelInfo.modelName);
|
|
119
|
+
}
|
|
120
|
+
catch (error) {
|
|
121
|
+
throw this.createUserFriendlyError(error, 'model_loading');
|
|
122
|
+
}
|
|
123
|
+
// Initialize index manager with model compatibility validation
|
|
124
|
+
console.log('Initializing index manager...');
|
|
125
|
+
try {
|
|
126
|
+
this.indexManager = new IndexManager(this.indexPath, this.dbPath, storedModelInfo.dimensions, storedModelInfo.modelName);
|
|
127
|
+
await this.indexManager.initialize();
|
|
128
|
+
}
|
|
129
|
+
catch (error) {
|
|
130
|
+
// Check if this is a model compatibility issue
|
|
131
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
132
|
+
if (errorMessage.includes('mismatch') || errorMessage.includes('version') || errorMessage.includes('model')) {
|
|
133
|
+
throw this.createUserFriendlyError(error, 'model_compatibility');
|
|
134
|
+
}
|
|
135
|
+
throw error;
|
|
136
|
+
}
|
|
137
|
+
// Load reranker model if enabled
|
|
138
|
+
if (this.enableReranking) {
|
|
139
|
+
this.reranker = new CrossEncoderReranker();
|
|
140
|
+
console.log('Loading reranker model...');
|
|
141
|
+
try {
|
|
142
|
+
await this.reranker.loadModel();
|
|
143
|
+
}
|
|
144
|
+
catch (error) {
|
|
145
|
+
console.warn(`Reranker initialization failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
146
|
+
console.warn('Continuing with vector search only (reranking disabled)');
|
|
147
|
+
this.reranker = null; // Disable reranker for this session
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
this.isInitialized = true;
|
|
151
|
+
const stats = await this.indexManager.getStats();
|
|
152
|
+
console.log(`Search engine initialized with ${stats.totalVectors} chunks${this.reranker && this.reranker.isLoaded() ? ' and reranking enabled' : ''}`);
|
|
153
|
+
}
|
|
154
|
+
catch (error) {
|
|
155
|
+
await this.cleanup();
|
|
156
|
+
if (error instanceof SearchError) {
|
|
157
|
+
throw error;
|
|
158
|
+
}
|
|
159
|
+
else {
|
|
160
|
+
throw this.createUserFriendlyError(error, 'initialization');
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Create user-friendly error messages with actionable suggestions
|
|
166
|
+
* Implements requirement 5.3: Clear, actionable error messages with specific next steps
|
|
167
|
+
*/
|
|
168
|
+
createUserFriendlyError(error, context) {
|
|
169
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
170
|
+
// Handle common error scenarios with specific guidance
|
|
171
|
+
if (context === 'missing_database') {
|
|
172
|
+
return new SearchError(`Database file not found: ${this.dbPath}`, 'DATABASE_NOT_FOUND', [
|
|
173
|
+
'Run ingestion first to create the database: pipeline.ingestDirectory("./docs/")',
|
|
174
|
+
'Check that the database path is correct',
|
|
175
|
+
'Ensure the ingestion process completed successfully'
|
|
176
|
+
]);
|
|
177
|
+
}
|
|
178
|
+
if (context === 'missing_index') {
|
|
179
|
+
return new SearchError(`Vector index file not found: ${this.indexPath}`, 'INDEX_NOT_FOUND', [
|
|
180
|
+
'Run ingestion first to create the index: pipeline.ingestDirectory("./docs/")',
|
|
181
|
+
'Check that the index path is correct',
|
|
182
|
+
'Ensure the ingestion process completed successfully'
|
|
183
|
+
]);
|
|
184
|
+
}
|
|
185
|
+
if (context === 'missing_model_info') {
|
|
186
|
+
return new SearchError('No embedding model information found in database. The database may be from an older version or corrupted.', 'MODEL_INFO_NOT_FOUND', [
|
|
187
|
+
'Run ingestion again to store model information: pipeline.ingestDirectory("./docs/")',
|
|
188
|
+
'If the problem persists, delete the database and index files and run ingestion from scratch',
|
|
189
|
+
'Check that the database was created with a compatible version of the library'
|
|
190
|
+
]);
|
|
191
|
+
}
|
|
192
|
+
if (context === 'model_loading') {
|
|
193
|
+
return new SearchError(`Failed to load embedding model: ${errorMessage}`, 'MODEL_LOADING_FAILED', [
|
|
194
|
+
'Check that the model name is correct and supported',
|
|
195
|
+
'Ensure you have internet connection for model download',
|
|
196
|
+
'Try running ingestion again with a supported model',
|
|
197
|
+
'Check the model configuration in your setup'
|
|
198
|
+
]);
|
|
199
|
+
}
|
|
200
|
+
if (context === 'model_compatibility' || (errorMessage.includes('model') && errorMessage.includes('mismatch'))) {
|
|
201
|
+
return new SearchError(`Model compatibility issue detected: ${errorMessage}`, 'MODEL_COMPATIBILITY', [
|
|
202
|
+
'The stored model information doesn\'t match the current configuration',
|
|
203
|
+
'Run pipeline.rebuildIndex() to rebuild with the current model',
|
|
204
|
+
'Or ensure you\'re using the same model that was used during ingestion',
|
|
205
|
+
'Check that the index and database files are from the same ingestion run'
|
|
206
|
+
]);
|
|
207
|
+
}
|
|
208
|
+
if (errorMessage.includes('ENOENT') || errorMessage.includes('no such file')) {
|
|
209
|
+
return new SearchError(`Required files not found: ${errorMessage}`, 'FILES_NOT_FOUND', [
|
|
210
|
+
'Run ingestion first to create the required files',
|
|
211
|
+
'Check that the file paths are correct',
|
|
212
|
+
'Ensure you have read permissions for the files'
|
|
213
|
+
]);
|
|
214
|
+
}
|
|
215
|
+
if (errorMessage.includes('EACCES') || errorMessage.includes('permission denied')) {
|
|
216
|
+
return new SearchError(`Permission denied: ${errorMessage}`, 'PERMISSION_DENIED', [
|
|
217
|
+
'Check that you have read permissions for the database and index files',
|
|
218
|
+
'Ensure the files are not locked by another process',
|
|
219
|
+
'Try running with appropriate permissions'
|
|
220
|
+
]);
|
|
221
|
+
}
|
|
222
|
+
if (errorMessage.includes('database') || errorMessage.includes('sqlite')) {
|
|
223
|
+
return new SearchError(`Database error: ${errorMessage}`, 'DATABASE_ERROR', [
|
|
224
|
+
'Check that the database file is not corrupted',
|
|
225
|
+
'Ensure no other processes are using the database',
|
|
226
|
+
'Try recreating the database by running ingestion again'
|
|
227
|
+
]);
|
|
228
|
+
}
|
|
229
|
+
// Generic error with basic suggestions
|
|
230
|
+
return new SearchError(`Search engine ${context} failed: ${errorMessage}`, 'GENERAL_ERROR', [
|
|
231
|
+
'Check the error message above for specific details',
|
|
232
|
+
'Ensure all required files exist and are accessible',
|
|
233
|
+
'Try running ingestion first if you haven\'t already',
|
|
234
|
+
'Contact support if the issue persists'
|
|
235
|
+
]);
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* Initialize the search engine (public method for backward compatibility)
|
|
239
|
+
* Sets up database, index manager, and embedding engine
|
|
240
|
+
*/
|
|
241
|
+
async initialize() {
|
|
242
|
+
await this.ensureInitialized();
|
|
243
|
+
}
|
|
244
|
+
/**
|
|
245
|
+
* Perform semantic search on the indexed documents (matches README API)
|
|
246
|
+
* Automatically initializes resources on first use (Requirements 4.1, 4.2, 4.4, 4.5)
|
|
247
|
+
* Supports concurrent read operations for multiple simultaneous queries
|
|
248
|
+
* @param query - Search query string
|
|
249
|
+
* @param options - Search options including top_k and rerank settings
|
|
250
|
+
* @returns Promise resolving to array of search results
|
|
251
|
+
*/
|
|
252
|
+
async search(query, options = {}) {
|
|
253
|
+
// Automatic initialization on first use (Requirement 4.1, 4.2)
|
|
254
|
+
await this.ensureInitialized();
|
|
255
|
+
if (!query || query.trim().length === 0) {
|
|
256
|
+
return [];
|
|
257
|
+
}
|
|
258
|
+
const startTime = performance.now();
|
|
259
|
+
const topK = options.top_k || config.top_k || 10;
|
|
260
|
+
const shouldRerank = options.rerank !== undefined ? options.rerank : config.rerank_enabled;
|
|
261
|
+
try {
|
|
262
|
+
// Ensure all components are initialized
|
|
263
|
+
if (!this.embedder || !this.indexManager || !this.dbConnection) {
|
|
264
|
+
throw new Error('Search engine components not properly initialized');
|
|
265
|
+
}
|
|
266
|
+
// Step 1: Build query embedding using same model as document chunks
|
|
267
|
+
const embeddingStartTime = performance.now();
|
|
268
|
+
const queryEmbedding = await this.embedder.embedSingle(query);
|
|
269
|
+
const embeddingTime = performance.now() - embeddingStartTime;
|
|
270
|
+
// Step 2: Search using IndexManager (which handles hash mapping properly)
|
|
271
|
+
const searchStartTime = performance.now();
|
|
272
|
+
let searchResult;
|
|
273
|
+
try {
|
|
274
|
+
searchResult = this.indexManager.search(queryEmbedding.vector, topK);
|
|
275
|
+
}
|
|
276
|
+
catch (error) {
|
|
277
|
+
if (error instanceof Error && error.message.includes('No embedding ID found for hash')) {
|
|
278
|
+
console.warn(`Hash mapping issue detected: ${error.message}`);
|
|
279
|
+
console.warn('This may indicate index/database synchronization issues. Consider running: raglite rebuild');
|
|
280
|
+
return [];
|
|
281
|
+
}
|
|
282
|
+
throw error;
|
|
283
|
+
}
|
|
284
|
+
const vectorSearchTime = performance.now() - searchStartTime;
|
|
285
|
+
if (searchResult.embeddingIds.length === 0) {
|
|
286
|
+
const totalTime = performance.now() - startTime;
|
|
287
|
+
console.log(`No similar documents found (${totalTime.toFixed(2)}ms total)`);
|
|
288
|
+
return [];
|
|
289
|
+
}
|
|
290
|
+
// Step 3: Retrieve chunks from database using embedding IDs
|
|
291
|
+
const retrievalStartTime = performance.now();
|
|
292
|
+
const chunks = await getChunksByEmbeddingIds(this.dbConnection, searchResult.embeddingIds);
|
|
293
|
+
const retrievalTime = performance.now() - retrievalStartTime;
|
|
294
|
+
// Step 4: Format results as JSON with text, score, and document metadata
|
|
295
|
+
let results = this.formatSearchResults(chunks, searchResult.distances, searchResult.embeddingIds);
|
|
296
|
+
// Step 5: Optional reranking with cross-encoder when enabled
|
|
297
|
+
let rerankTime = 0;
|
|
298
|
+
if (shouldRerank && this.reranker && this.reranker.isLoaded() && results.length > 1) {
|
|
299
|
+
try {
|
|
300
|
+
const rerankStartTime = performance.now();
|
|
301
|
+
results = await this.reranker.rerank(query, results);
|
|
302
|
+
rerankTime = performance.now() - rerankStartTime;
|
|
303
|
+
}
|
|
304
|
+
catch (error) {
|
|
305
|
+
// Fallback to vector search results and log the error
|
|
306
|
+
console.warn(`Reranking failed, using vector search results: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
const totalTime = performance.now() - startTime;
|
|
310
|
+
// Measure latency without premature optimization - just log for monitoring
|
|
311
|
+
console.log(`Search completed: ${results.length} results in ${totalTime.toFixed(2)}ms ` +
|
|
312
|
+
`(embed: ${embeddingTime.toFixed(2)}ms, vector: ${vectorSearchTime.toFixed(2)}ms, ` +
|
|
313
|
+
`retrieval: ${retrievalTime.toFixed(2)}ms${rerankTime > 0 ? `, rerank: ${rerankTime.toFixed(2)}ms` : ''})`);
|
|
314
|
+
return results;
|
|
315
|
+
}
|
|
316
|
+
catch (error) {
|
|
317
|
+
throw new Error(`Search failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
/**
|
|
321
|
+
* Format search results with proper structure
|
|
322
|
+
* @param chunks - Database chunks with metadata
|
|
323
|
+
* @param distances - Similarity distances from vector search
|
|
324
|
+
* @param embeddingIds - Embedding IDs in search result order
|
|
325
|
+
* @returns Formatted search results
|
|
326
|
+
*/
|
|
327
|
+
formatSearchResults(chunks, distances, embeddingIds) {
|
|
328
|
+
const results = [];
|
|
329
|
+
// Create a map for quick chunk lookup by embedding_id
|
|
330
|
+
const chunkMap = new Map();
|
|
331
|
+
chunks.forEach(chunk => {
|
|
332
|
+
chunkMap.set(chunk.embedding_id, chunk);
|
|
333
|
+
});
|
|
334
|
+
// Build results in the order of search results
|
|
335
|
+
for (let i = 0; i < embeddingIds.length; i++) {
|
|
336
|
+
const embeddingId = embeddingIds[i];
|
|
337
|
+
const chunk = chunkMap.get(embeddingId);
|
|
338
|
+
if (chunk) {
|
|
339
|
+
// Convert cosine distance to similarity score (1 - distance)
|
|
340
|
+
// hnswlib-wasm returns cosine distance, we want similarity
|
|
341
|
+
const score = Math.max(0, 1 - distances[i]);
|
|
342
|
+
results.push({
|
|
343
|
+
text: chunk.text,
|
|
344
|
+
score: score,
|
|
345
|
+
document: {
|
|
346
|
+
id: chunk.document_id,
|
|
347
|
+
source: chunk.document_source,
|
|
348
|
+
title: chunk.document_title
|
|
349
|
+
}
|
|
350
|
+
});
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
return results;
|
|
354
|
+
}
|
|
355
|
+
/**
|
|
356
|
+
* Get search engine statistics
|
|
357
|
+
* @returns Object with current search engine stats
|
|
358
|
+
*/
|
|
359
|
+
async getStats() {
|
|
360
|
+
if (!this.isInitialized) {
|
|
361
|
+
return {
|
|
362
|
+
totalChunks: 0,
|
|
363
|
+
indexSize: 0,
|
|
364
|
+
rerankingEnabled: false,
|
|
365
|
+
isInitialized: false
|
|
366
|
+
};
|
|
367
|
+
}
|
|
368
|
+
const indexStats = await this.indexManager.getStats();
|
|
369
|
+
return {
|
|
370
|
+
totalChunks: indexStats.totalVectors,
|
|
371
|
+
indexSize: indexStats.totalVectors,
|
|
372
|
+
rerankingEnabled: this.reranker !== null && this.reranker.isLoaded(),
|
|
373
|
+
isInitialized: this.isInitialized
|
|
374
|
+
};
|
|
375
|
+
}
|
|
376
|
+
/**
|
|
377
|
+
* Set up automatic cleanup on process exit (Requirement 5.5)
|
|
378
|
+
*/
|
|
379
|
+
setupAutomaticCleanup() {
|
|
380
|
+
// Track this instance for cleanup
|
|
381
|
+
SearchEngine.instances.add(this);
|
|
382
|
+
// Set up process exit handlers only once
|
|
383
|
+
if (!SearchEngine.cleanupHandlersSet) {
|
|
384
|
+
SearchEngine.cleanupHandlersSet = true;
|
|
385
|
+
const cleanupAll = async () => {
|
|
386
|
+
const instances = Array.from(SearchEngine.instances);
|
|
387
|
+
await Promise.all(instances.map(instance => instance.cleanup()));
|
|
388
|
+
};
|
|
389
|
+
// Handle various exit scenarios
|
|
390
|
+
process.on('exit', () => {
|
|
391
|
+
// Synchronous cleanup for exit event
|
|
392
|
+
for (const instance of SearchEngine.instances) {
|
|
393
|
+
try {
|
|
394
|
+
if (instance.dbConnection) {
|
|
395
|
+
// Synchronous close for exit handler
|
|
396
|
+
instance.dbConnection = null;
|
|
397
|
+
}
|
|
398
|
+
if (instance.indexManager) {
|
|
399
|
+
instance.indexManager = null;
|
|
400
|
+
}
|
|
401
|
+
instance.embedder = null;
|
|
402
|
+
instance.reranker = null;
|
|
403
|
+
instance.isInitialized = false;
|
|
404
|
+
}
|
|
405
|
+
catch (error) {
|
|
406
|
+
// Silent cleanup on exit
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
});
|
|
410
|
+
process.on('SIGINT', async () => {
|
|
411
|
+
await cleanupAll();
|
|
412
|
+
process.exit(0);
|
|
413
|
+
});
|
|
414
|
+
process.on('SIGTERM', async () => {
|
|
415
|
+
await cleanupAll();
|
|
416
|
+
process.exit(0);
|
|
417
|
+
});
|
|
418
|
+
process.on('uncaughtException', async (error) => {
|
|
419
|
+
console.error('Uncaught exception:', error);
|
|
420
|
+
await cleanupAll();
|
|
421
|
+
process.exit(1);
|
|
422
|
+
});
|
|
423
|
+
process.on('unhandledRejection', async (reason) => {
|
|
424
|
+
console.error('Unhandled rejection:', reason);
|
|
425
|
+
await cleanupAll();
|
|
426
|
+
process.exit(1);
|
|
427
|
+
});
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
/**
|
|
431
|
+
* Clean up resources (Requirement 5.5)
|
|
432
|
+
*/
|
|
433
|
+
async cleanup() {
|
|
434
|
+
try {
|
|
435
|
+
if (this.dbConnection) {
|
|
436
|
+
await this.dbConnection.close();
|
|
437
|
+
this.dbConnection = null;
|
|
438
|
+
}
|
|
439
|
+
if (this.indexManager) {
|
|
440
|
+
await this.indexManager.close();
|
|
441
|
+
this.indexManager = null;
|
|
442
|
+
}
|
|
443
|
+
this.embedder = null;
|
|
444
|
+
this.reranker = null;
|
|
445
|
+
this.isInitialized = false;
|
|
446
|
+
// Remove from instances set
|
|
447
|
+
SearchEngine.instances.delete(this);
|
|
448
|
+
}
|
|
449
|
+
catch (error) {
|
|
450
|
+
console.error('Error during SearchEngine cleanup:', error instanceof Error ? error.message : String(error));
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
//# sourceMappingURL=search.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"search.js","sourceRoot":"","sources":["../src/search.ts"],"names":[],"mappings":"AAAA,OAAO,EAAmB,yBAAyB,EAAE,MAAM,eAAe,CAAC;AAC3E,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAsB,uBAAuB,EAAE,YAAY,EAAE,kBAAkB,EAAE,MAAM,SAAS,CAAC;AACxG,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AAErD,OAAO,EAAE,MAAM,EAAoB,MAAM,aAAa,CAAC;AACvD,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AACrC,OAAO,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;AAEhC;;GAEG;AACH,MAAM,OAAO,WAAY,SAAQ,KAAK;IAG3B;IACA;IAHT,YACE,OAAe,EACR,IAAY,EACZ,WAAqB;QAE5B,KAAK,CAAC,OAAO,CAAC,CAAC;QAHR,SAAI,GAAJ,IAAI,CAAQ;QACZ,gBAAW,GAAX,WAAW,CAAU;QAG5B,IAAI,CAAC,IAAI,GAAG,aAAa,CAAC;IAC5B,CAAC;CACF;AAUD;;;;;GAKG;AACH,SAAS,kBAAkB,CAAC,SAAkB,EAAE,MAAe;IAC7D,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IAEjC,OAAO;QACL,SAAS,EAAE,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,EAAE,kBAAkB,CAAC;QAChF,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,EAAE,WAAW,CAAC;KACjE,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,MAAM,OAAO,YAAY;IACvB,6EAA6E;IACrE,MAAM,CAAC,SAAS,GAAG,IAAI,GAAG,EAAgB,CAAC;IAC3C,MAAM,CAAC,kBAAkB,GAAG,KAAK,CAAC;IAElC,QAAQ,GAA2B,IAAI,CAAC;IACxC,YAAY,GAAwB,IAAI,CAAC;IACzC,YAAY,GAA8B,IAAI,CAAC;IAC/C,QAAQ,GAAgC,IAAI,CAAC;IAC7C,aAAa,GAAY,KAAK,CAAC;IAC/B,SAAS,CAAS;IAClB,MAAM,CAAS;IACf,eAAe,GAAY,KAAK,CAAC;IAEzC;;;;;OAKG;IACH,YAAY,SAAkB,EAAE,MAAe;QAC7C,sBAAsB;QACtB,IAAI,SAAS,KAAK,SAAS,IAAI,CAAC,OAAO,SAAS,KAAK,QAAQ,IAAI,SAAS,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;YAC1F,MAAM,IAAI,KAAK,CAAC,oDAAoD,CAAC,CAAC;QACxE,CAAC;QAED,IAAI,MAAM,KAAK,SAAS,IAAI,CAAC,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;YACjF,MAAM,IAAI,KAAK,CAAC,iDAAiD,CAAC,CAAC;QACrE,CAAC;QAED,8BAA8B;QAC9B,MAAM,UAAU,GAAG,kBAAkB,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;QACzD,IAAI,CAAC,SAAS,GAAG,UAAU,CAAC,SAAS,CAAC;QACtC,IAAI,CAAC,MAAM,GAAG,UAAU,CAAC,MAAM,CAAC;QAEhC,6DAA6D;QAC7D,IAAI,CAAC,qBAAqB,EAAE,CAAC;IAC/B,CAAC;IAED;;;OAGG;IACH,MAAM,CAAC,oBAAoB,CACzB,QAAyB,EACzB,YAA0B,EAC1B,YAAgC,EAChC,kBAA2B,KAAK;QAEhC,MAAM,MAAM,GAAG,IAAI,YAAY,EAAE,CAAC;QAClC,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAC;QAC3B,MAAM,CAAC,YAAY,GAAG,YAAY,CAAC;QACnC,MAAM,CAAC,YAAY,GAAG,YAAY,CAAC;QACnC,MAAM,CAAC,eAAe,GAAG,eAAe,CAAC;QAEzC,iCAAiC;QACjC,IAAI,eAAe,EAAE,CAAC;YACpB,MAAM,CAAC,QAAQ,GAAG,IAAI,oBAAoB,EAAE,CAAC;QAC/C,CAAC;QAED,MAAM,CAAC,aAAa,GAAG,IAAI,CAAC;QAC5B,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,iBAAiB;QAC7B,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACvB,OAAO;QACT,CAAC;QAED,2EAA2E;QAC3E,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;YAC7B,MAAM,IAAI,CAAC,uBAAuB,CAChC,IAAI,KAAK,CAAC,4BAA4B,IAAI,CAAC,MAAM,EAAE,CAAC,EACpD,kBAAkB,CACnB,CAAC;QACJ,CAAC;QAED,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;YAChC,MAAM,IAAI,CAAC,uBAAuB,CAChC,IAAI,KAAK,CAAC,gCAAgC,IAAI,CAAC,SAAS,EAAE,CAAC,EAC3D,eAAe,CAChB,CAAC;QACJ,CAAC;QAED,IAAI,CAAC;YACH,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAAC,CAAC;YAE7C,iCAAiC;YACjC,OAAO,CAAC,GAAG,CAAC,gCAAgC,CAAC,CAAC;YAC9C,IAAI,CAAC,YAAY,GAAG,MAAM,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAEpD,yDAAyD;YACzD,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC;YACnD,MAAM,eAAe,GAAG,MAAM,kBAAkB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;YAEpE,IAAI,CAAC,eAAe,EAAE,CAAC;gBACrB,MAAM,IAAI,CAAC,uBAAuB,CAChC,IAAI,KAAK,CAAC,wCAAwC,CAAC,EACnD,oBAAoB,CACrB,CAAC;YACJ,CAAC;YAED,+DAA+D;YAC/D,OAAO,CAAC,GAAG,CAAC,4BAA4B,eAAe,CAAC,SAAS,KAAK,CAAC,CAAC;YACxE,IAAI,CAAC;gBACH,IAAI,CAAC,QAAQ,GAAG,MAAM,yBAAyB,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;YAC7E,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,IAAI,CAAC,uBAAuB,CAChC,KAAK,EACL,eAAe,CAChB,CAAC;YACJ,CAAC;YAED,+DAA+D;YAC/D,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAAC,CAAC;YAC7C,IAAI,CAAC;gBACH,IAAI,CAAC,YAAY,GAAG,IAAI,YAAY,CAClC,IAAI,CAAC,SAAS,EACd,IAAI,CAAC,MAAM,EACX,eAAe,CAAC,UAAU,EAC1B,eAAe,CAAC,SAAS,CAC1B,CAAC;gBACF,MAAM,IAAI,CAAC,YAAY,CAAC,UAAU,EAAE,CAAC;YACvC,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,+CAA+C;gBAC/C,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAC5E,IAAI,YAAY,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;oBAC5G,MAAM,IAAI,CAAC,uBAAuB,CAAC,KAAK,EAAE,qBAAqB,CAAC,CAAC;gBACnE,CAAC;gBACD,MAAM,KAAK,CAAC;YACd,CAAC;YAED,iCAAiC;YACjC,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;gBACzB,IAAI,CAAC,QAAQ,GAAG,IAAI,oBAAoB,EAAE,CAAC;gBAC3C,OAAO,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC;gBACzC,IAAI,CAAC;oBACH,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE,CAAC;gBAClC,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,OAAO,CAAC,IAAI,CAAC,mCAAmC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;oBAC5G,OAAO,CAAC,IAAI,CAAC,yDAAyD,CAAC,CAAC;oBACxE,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,CAAC,oCAAoC;gBAC5D,CAAC;YACH,CAAC;YAED,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;YAC1B,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,QAAQ,EAAE,CAAC;YACjD,OAAO,CAAC,GAAG,CAAC,kCAAkC,KAAK,CAAC,YAAY,UAAU,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,wBAAwB,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAEzJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;YACrB,IAAI,KAAK,YAAY,WAAW,EAAE,CAAC;gBACjC,MAAM,KAAK,CAAC;YACd,CAAC;iBAAM,CAAC;gBACN,MAAM,IAAI,CAAC,uBAAuB,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;YAC9D,CAAC;QACH,CAAC;IACH,CAAC;IAED;;;OAGG;IACK,uBAAuB,CAAC,KAAc,EAAE,OAAe;QAC7D,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QAE5E,uDAAuD;QACvD,IAAI,OAAO,KAAK,kBAAkB,EAAE,CAAC;YACnC,OAAO,IAAI,WAAW,CACpB,4BAA4B,IAAI,CAAC,MAAM,EAAE,EACzC,oBAAoB,EACpB;gBACE,iFAAiF;gBACjF,yCAAyC;gBACzC,qDAAqD;aACtD,CACF,CAAC;QACJ,CAAC;QAED,IAAI,OAAO,KAAK,eAAe,EAAE,CAAC;YAChC,OAAO,IAAI,WAAW,CACpB,gCAAgC,IAAI,CAAC,SAAS,EAAE,EAChD,iBAAiB,EACjB;gBACE,8EAA8E;gBAC9E,sCAAsC;gBACtC,qDAAqD;aACtD,CACF,CAAC;QACJ,CAAC;QAED,IAAI,OAAO,KAAK,oBAAoB,EAAE,CAAC;YACrC,OAAO,IAAI,WAAW,CACpB,2GAA2G,EAC3G,sBAAsB,EACtB;gBACE,qFAAqF;gBACrF,6FAA6F;gBAC7F,8EAA8E;aAC/E,CACF,CAAC;QACJ,CAAC;QAED,IAAI,OAAO,KAAK,eAAe,EAAE,CAAC;YAChC,OAAO,IAAI,WAAW,CACpB,mCAAmC,YAAY,EAAE,EACjD,sBAAsB,EACtB;gBACE,oDAAoD;gBACpD,wDAAwD;gBACxD,oDAAoD;gBACpD,6CAA6C;aAC9C,CACF,CAAC;QACJ,CAAC;QAED,IAAI,OAAO,KAAK,qBAAqB,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,EAAE,CAAC;YAC/G,OAAO,IAAI,WAAW,CACpB,uCAAuC,YAAY,EAAE,EACrD,qBAAqB,EACrB;gBACE,uEAAuE;gBACvE,+DAA+D;gBAC/D,uEAAuE;gBACvE,yEAAyE;aAC1E,CACF,CAAC;QACJ,CAAC;QAED,IAAI,YAAY,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,cAAc,CAAC,EAAE,CAAC;YAC7E,OAAO,IAAI,WAAW,CACpB,6BAA6B,YAAY,EAAE,EAC3C,iBAAiB,EACjB;gBACE,kDAAkD;gBAClD,uCAAuC;gBACvC,gDAAgD;aACjD,CACF,CAAC;QACJ,CAAC;QAED,IAAI,YAAY,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,mBAAmB,CAAC,EAAE,CAAC;YAClF,OAAO,IAAI,WAAW,CACpB,sBAAsB,YAAY,EAAE,EACpC,mBAAmB,EACnB;gBACE,uEAAuE;gBACvE,oDAAoD;gBACpD,0CAA0C;aAC3C,CACF,CAAC;QACJ,CAAC;QAED,IAAI,YAAY,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;YACzE,OAAO,IAAI,WAAW,CACpB,mBAAmB,YAAY,EAAE,EACjC,gBAAgB,EAChB;gBACE,+CAA+C;gBAC/C,kDAAkD;gBAClD,wDAAwD;aACzD,CACF,CAAC;QACJ,CAAC;QAED,uCAAuC;QACvC,OAAO,IAAI,WAAW,CACpB,iBAAiB,OAAO,YAAY,YAAY,EAAE,EAClD,eAAe,EACf;YACE,oDAAoD;YACpD,oDAAoD;YACpD,qDAAqD;YACrD,uCAAuC;SACxC,CACF,CAAC;IACJ,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,UAAU;QACd,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;IACjC,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,UAAyB,EAAE;QACrD,+DAA+D;QAC/D,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE/B,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxC,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QACpC,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC;QACjD,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,cAAc,CAAC;QAE3F,IAAI,CAAC;YACH,wCAAwC;YACxC,IAAI,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,IAAI,CAAC,YAAY,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;gBAC/D,MAAM,IAAI,KAAK,CAAC,mDAAmD,CAAC,CAAC;YACvE,CAAC;YAED,oEAAoE;YACpE,MAAM,kBAAkB,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YAC7C,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;YAC9D,MAAM,aAAa,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,kBAAkB,CAAC;YAE7D,0EAA0E;YAC1E,MAAM,eAAe,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YAC1C,IAAI,YAAY,CAAC;YACjB,IAAI,CAAC;gBACH,YAAY,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,cAAc,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YACvE,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,IAAI,KAAK,YAAY,KAAK,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,gCAAgC,CAAC,EAAE,CAAC;oBACvF,OAAO,CAAC,IAAI,CAAC,gCAAgC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;oBAC9D,OAAO,CAAC,IAAI,CAAC,4FAA4F,CAAC,CAAC;oBAC3G,OAAO,EAAE,CAAC;gBACZ,CAAC;gBACD,MAAM,KAAK,CAAC;YACd,CAAC;YACD,MAAM,gBAAgB,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,eAAe,CAAC;YAE7D,IAAI,YAAY,CAAC,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC3C,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;gBAChD,OAAO,CAAC,GAAG,CAAC,+BAA+B,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;gBAC5E,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,4DAA4D;YAC5D,MAAM,kBAAkB,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YAC7C,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,IAAI,CAAC,YAAY,EAAE,YAAY,CAAC,YAAY,CAAC,CAAC;YAC3F,MAAM,aAAa,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,kBAAkB,CAAC;YAE7D,yEAAyE;YACzE,IAAI,OAAO,GAAG,IAAI,CAAC,mBAAmB,CAAC,MAAM,EAAE,YAAY,CAAC,SAAS,EAAE,YAAY,CAAC,YAAY,CAAC,CAAC;YAElG,6DAA6D;YAC7D,IAAI,UAAU,GAAG,CAAC,CAAC;YACnB,IAAI,YAAY,IAAI,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpF,IAAI,CAAC;oBACH,MAAM,eAAe,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;oBAC1C,OAAO,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;oBACrD,UAAU,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,eAAe,CAAC;gBACnD,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,sDAAsD;oBACtD,OAAO,CAAC,IAAI,CAAC,kDAAkD,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;gBAC7H,CAAC;YACH,CAAC;YAED,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YAEhD,2EAA2E;YAC3E,OAAO,CAAC,GAAG,CAAC,qBAAqB,OAAO,CAAC,MAAM,eAAe,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;gBACrF,WAAW,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,gBAAgB,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM;gBACnF,cAAc,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,aAAa,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;YAE9G,OAAO,OAAO,CAAC;QAEjB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,kBAAkB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;QAChG,CAAC;IACH,CAAC;IAID;;;;;;OAMG;IACK,mBAAmB,CACzB,MAAa,EACb,SAAmB,EACnB,YAAsB;QAEtB,MAAM,OAAO,GAAmB,EAAE,CAAC;QAEnC,sDAAsD;QACtD,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAE,CAAC;QAC3B,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YACrB,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC;QAC1C,CAAC,CAAC,CAAC;QAEH,+CAA+C;QAC/C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC7C,MAAM,WAAW,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;YACpC,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;YAExC,IAAI,KAAK,EAAE,CAAC;gBACV,6DAA6D;gBAC7D,2DAA2D;gBAC3D,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;gBAE5C,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,KAAK,CAAC,IAAI;oBAChB,KAAK,EAAE,KAAK;oBACZ,QAAQ,EAAE;wBACR,EAAE,EAAE,KAAK,CAAC,WAAW;wBACrB,MAAM,EAAE,KAAK,CAAC,eAAe;wBAC7B,KAAK,EAAE,KAAK,CAAC,cAAc;qBAC5B;iBACF,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,QAAQ;QAMZ,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,CAAC;YACxB,OAAO;gBACL,WAAW,EAAE,CAAC;gBACd,SAAS,EAAE,CAAC;gBACZ,gBAAgB,EAAE,KAAK;gBACvB,aAAa,EAAE,KAAK;aACrB,CAAC;QACJ,CAAC;QAED,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,YAAa,CAAC,QAAQ,EAAE,CAAC;QACvD,OAAO;YACL,WAAW,EAAE,UAAU,CAAC,YAAY;YACpC,SAAS,EAAE,UAAU,CAAC,YAAY;YAClC,gBAAgB,EAAE,IAAI,CAAC,QAAQ,KAAK,IAAI,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE;YACpE,aAAa,EAAE,IAAI,CAAC,aAAa;SAClC,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,qBAAqB;QAC3B,kCAAkC;QAClC,YAAY,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAEjC,yCAAyC;QACzC,IAAI,CAAC,YAAY,CAAC,kBAAkB,EAAE,CAAC;YACrC,YAAY,CAAC,kBAAkB,GAAG,IAAI,CAAC;YAEvC,MAAM,UAAU,GAAG,KAAK,IAAI,EAAE;gBAC5B,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;gBACrD,MAAM,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;YACnE,CAAC,CAAC;YAEF,gCAAgC;YAChC,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,GAAG,EAAE;gBACtB,qCAAqC;gBACrC,KAAK,MAAM,QAAQ,IAAI,YAAY,CAAC,SAAS,EAAE,CAAC;oBAC9C,IAAI,CAAC;wBACH,IAAI,QAAQ,CAAC,YAAY,EAAE,CAAC;4BAC1B,qCAAqC;4BACrC,QAAQ,CAAC,YAAY,GAAG,IAAI,CAAC;wBAC/B,CAAC;wBACD,IAAI,QAAQ,CAAC,YAAY,EAAE,CAAC;4BAC1B,QAAQ,CAAC,YAAY,GAAG,IAAI,CAAC;wBAC/B,CAAC;wBACD,QAAQ,CAAC,QAAQ,GAAG,IAAI,CAAC;wBACzB,QAAQ,CAAC,QAAQ,GAAG,IAAI,CAAC;wBACzB,QAAQ,CAAC,aAAa,GAAG,KAAK,CAAC;oBACjC,CAAC;oBAAC,OAAO,KAAK,EAAE,CAAC;wBACf,yBAAyB;oBAC3B,CAAC;gBACH,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,KAAK,IAAI,EAAE;gBAC9B,MAAM,UAAU,EAAE,CAAC;gBACnB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC,CAAC,CAAC;YAEH,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,KAAK,IAAI,EAAE;gBAC/B,MAAM,UAAU,EAAE,CAAC;gBACnB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC,CAAC,CAAC;YAEH,OAAO,CAAC,EAAE,CAAC,mBAAmB,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE;gBAC9C,OAAO,CAAC,KAAK,CAAC,qBAAqB,EAAE,KAAK,CAAC,CAAC;gBAC5C,MAAM,UAAU,EAAE,CAAC;gBACnB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC,CAAC,CAAC;YAEH,OAAO,CAAC,EAAE,CAAC,oBAAoB,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE;gBAChD,OAAO,CAAC,KAAK,CAAC,sBAAsB,EAAE,MAAM,CAAC,CAAC;gBAC9C,MAAM,UAAU,EAAE,CAAC;gBACnB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,IAAI,CAAC;YACH,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;gBACtB,MAAM,IAAI,CAAC,YAAY,CAAC,KAAK,EAAE,CAAC;gBAChC,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;YAC3B,CAAC;YAED,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;gBACtB,MAAM,IAAI,CAAC,YAAY,CAAC,KAAK,EAAE,CAAC;gBAChC,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;YAC3B,CAAC;YAED,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;YACrB,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;YACrB,IAAI,CAAC,aAAa,GAAG,KAAK,CAAC;YAE3B,4BAA4B;YAC5B,YAAY,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QACtC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,oCAAoC,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QAC9G,CAAC;IACH,CAAC"}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Test utilities for multi-model support
|
|
3
|
+
* Provides common configurations and helpers for testing with different embedding models
|
|
4
|
+
*/
|
|
5
|
+
export declare const TEST_MODELS: readonly [{
|
|
6
|
+
readonly name: "sentence-transformers/all-MiniLM-L6-v2";
|
|
7
|
+
readonly dimensions: 384;
|
|
8
|
+
readonly chunkSize: 250;
|
|
9
|
+
readonly batchSize: 16;
|
|
10
|
+
}, {
|
|
11
|
+
readonly name: "Xenova/all-mpnet-base-v2";
|
|
12
|
+
readonly dimensions: 768;
|
|
13
|
+
readonly chunkSize: 400;
|
|
14
|
+
readonly batchSize: 8;
|
|
15
|
+
}];
|
|
16
|
+
/**
|
|
17
|
+
* Retrieve model configuration by name
|
|
18
|
+
* @param modelName - The name of the model to retrieve
|
|
19
|
+
* @returns Model configuration object or undefined if not found
|
|
20
|
+
*/
|
|
21
|
+
export declare function getTestModel(modelName: string): {
|
|
22
|
+
readonly name: "sentence-transformers/all-MiniLM-L6-v2";
|
|
23
|
+
readonly dimensions: 384;
|
|
24
|
+
readonly chunkSize: 250;
|
|
25
|
+
readonly batchSize: 16;
|
|
26
|
+
} | {
|
|
27
|
+
readonly name: "Xenova/all-mpnet-base-v2";
|
|
28
|
+
readonly dimensions: 768;
|
|
29
|
+
readonly chunkSize: 400;
|
|
30
|
+
readonly batchSize: 8;
|
|
31
|
+
} | undefined;
|
|
32
|
+
/**
|
|
33
|
+
* Type for test model configuration
|
|
34
|
+
*/
|
|
35
|
+
export type TestModel = typeof TEST_MODELS[number];
|
|
36
|
+
//# sourceMappingURL=test-utils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"test-utils.d.ts","sourceRoot":"","sources":["../src/test-utils.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,eAAO,MAAM,WAAW;;;;;;;;;;EAad,CAAC;AAEX;;;;GAIG;AACH,wBAAgB,YAAY,CAAC,SAAS,EAAE,MAAM;;;;;;;;;;cAE7C;AAED;;GAEG;AACH,MAAM,MAAM,SAAS,GAAG,OAAO,WAAW,CAAC,MAAM,CAAC,CAAC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Test utilities for multi-model support
|
|
3
|
+
* Provides common configurations and helpers for testing with different embedding models
|
|
4
|
+
*/
|
|
5
|
+
export const TEST_MODELS = [
|
|
6
|
+
{
|
|
7
|
+
name: 'sentence-transformers/all-MiniLM-L6-v2',
|
|
8
|
+
dimensions: 384,
|
|
9
|
+
chunkSize: 250,
|
|
10
|
+
batchSize: 16
|
|
11
|
+
},
|
|
12
|
+
{
|
|
13
|
+
name: 'Xenova/all-mpnet-base-v2',
|
|
14
|
+
dimensions: 768,
|
|
15
|
+
chunkSize: 400,
|
|
16
|
+
batchSize: 8
|
|
17
|
+
}
|
|
18
|
+
];
|
|
19
|
+
/**
|
|
20
|
+
* Retrieve model configuration by name
|
|
21
|
+
* @param modelName - The name of the model to retrieve
|
|
22
|
+
* @returns Model configuration object or undefined if not found
|
|
23
|
+
*/
|
|
24
|
+
export function getTestModel(modelName) {
|
|
25
|
+
return TEST_MODELS.find(m => m.name === modelName);
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=test-utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"test-utils.js","sourceRoot":"","sources":["../src/test-utils.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,CAAC,MAAM,WAAW,GAAG;IACzB;QACE,IAAI,EAAE,wCAAwC;QAC9C,UAAU,EAAE,GAAG;QACf,SAAS,EAAE,GAAG;QACd,SAAS,EAAE,EAAE;KACd;IACD;QACE,IAAI,EAAE,0BAA0B;QAChC,UAAU,EAAE,GAAG;QACf,SAAS,EAAE,GAAG;QACd,SAAS,EAAE,CAAC;KACb;CACO,CAAC;AAEX;;;;GAIG;AACH,MAAM,UAAU,YAAY,CAAC,SAAiB;IAC5C,OAAO,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC;AACrD,CAAC"}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { PreTrainedTokenizer } from '@huggingface/transformers';
|
|
2
|
+
/**
|
|
3
|
+
* Count tokens in a text string using the MiniLM-L6-v2 tokenizer
|
|
4
|
+
* This ensures token counts match exactly with the embedding model
|
|
5
|
+
*
|
|
6
|
+
* @param text - Text to count tokens for
|
|
7
|
+
* @returns Number of tokens in the text
|
|
8
|
+
* @throws {Error} If tokenizer fails to initialize or tokenize
|
|
9
|
+
*/
|
|
10
|
+
export declare function countTokens(text: string): Promise<number>;
|
|
11
|
+
/**
|
|
12
|
+
* Get the tokenizer instance (for testing purposes)
|
|
13
|
+
* @internal
|
|
14
|
+
*/
|
|
15
|
+
export declare function getTokenizer(): Promise<PreTrainedTokenizer>;
|
|
16
|
+
/**
|
|
17
|
+
* Reset the tokenizer instance (for testing purposes)
|
|
18
|
+
* @internal
|
|
19
|
+
*/
|
|
20
|
+
export declare function resetTokenizer(): void;
|
|
21
|
+
//# sourceMappingURL=tokenizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../src/tokenizer.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,2BAA2B,CAAC;AA0BrE;;;;;;;GAOG;AACH,wBAAsB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAY/D;AAED;;;GAGG;AACH,wBAAsB,YAAY,IAAI,OAAO,CAAC,mBAAmB,CAAC,CAEjE;AAED;;;GAGG;AACH,wBAAgB,cAAc,IAAI,IAAI,CAErC"}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { AutoTokenizer } from '@huggingface/transformers';
|
|
2
|
+
/**
|
|
3
|
+
* Tokenizer instance for consistent token counting
|
|
4
|
+
* Uses the same tokenizer as the embedding model (MiniLM-L6-v2)
|
|
5
|
+
*/
|
|
6
|
+
let tokenizer = null;
|
|
7
|
+
/**
|
|
8
|
+
* Initialize the tokenizer with the MiniLM-L6-v2 model
|
|
9
|
+
* This ensures token counting matches the embedding model exactly
|
|
10
|
+
*/
|
|
11
|
+
async function initializeTokenizer() {
|
|
12
|
+
if (tokenizer) {
|
|
13
|
+
return tokenizer;
|
|
14
|
+
}
|
|
15
|
+
try {
|
|
16
|
+
// Use the same model as embeddings for consistent token counting
|
|
17
|
+
tokenizer = await AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2');
|
|
18
|
+
return tokenizer;
|
|
19
|
+
}
|
|
20
|
+
catch (error) {
|
|
21
|
+
throw new Error(`Failed to initialize tokenizer: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Count tokens in a text string using the MiniLM-L6-v2 tokenizer
|
|
26
|
+
* This ensures token counts match exactly with the embedding model
|
|
27
|
+
*
|
|
28
|
+
* @param text - Text to count tokens for
|
|
29
|
+
* @returns Number of tokens in the text
|
|
30
|
+
* @throws {Error} If tokenizer fails to initialize or tokenize
|
|
31
|
+
*/
|
|
32
|
+
export async function countTokens(text) {
|
|
33
|
+
if (!text || typeof text !== 'string') {
|
|
34
|
+
return 0;
|
|
35
|
+
}
|
|
36
|
+
try {
|
|
37
|
+
const tokenizerInstance = await initializeTokenizer();
|
|
38
|
+
const tokens = await tokenizerInstance.encode(text);
|
|
39
|
+
return tokens.length;
|
|
40
|
+
}
|
|
41
|
+
catch (error) {
|
|
42
|
+
throw new Error(`Failed to count tokens: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Get the tokenizer instance (for testing purposes)
|
|
47
|
+
* @internal
|
|
48
|
+
*/
|
|
49
|
+
export async function getTokenizer() {
|
|
50
|
+
return await initializeTokenizer();
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Reset the tokenizer instance (for testing purposes)
|
|
54
|
+
* @internal
|
|
55
|
+
*/
|
|
56
|
+
export function resetTokenizer() {
|
|
57
|
+
tokenizer = null;
|
|
58
|
+
}
|
|
59
|
+
//# sourceMappingURL=tokenizer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../src/tokenizer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAG1D;;;GAGG;AACH,IAAI,SAAS,GAA+B,IAAI,CAAC;AAEjD;;;GAGG;AACH,KAAK,UAAU,mBAAmB;IAChC,IAAI,SAAS,EAAE,CAAC;QACd,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,IAAI,CAAC;QACH,iEAAiE;QACjE,SAAS,GAAG,MAAM,aAAa,CAAC,eAAe,CAAC,wCAAwC,CAAC,CAAC;QAC1F,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,mCAAmC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;IACjH,CAAC;AACH,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,IAAY;IAC5C,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtC,OAAO,CAAC,CAAC;IACX,CAAC;IAED,IAAI,CAAC;QACH,MAAM,iBAAiB,GAAG,MAAM,mBAAmB,EAAE,CAAC;QACtD,MAAM,MAAM,GAAG,MAAM,iBAAiB,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QACpD,OAAO,MAAM,CAAC,MAAM,CAAC;IACvB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,2BAA2B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;IACzG,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY;IAChC,OAAO,MAAM,mBAAmB,EAAE,CAAC;AACrC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc;IAC5B,SAAS,GAAG,IAAI,CAAC;AACnB,CAAC"}
|