rag-lite-ts 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +240 -0
  3. package/dist/api-errors.d.ts +90 -0
  4. package/dist/api-errors.d.ts.map +1 -0
  5. package/dist/api-errors.js +320 -0
  6. package/dist/api-errors.js.map +1 -0
  7. package/dist/chunker.d.ts +47 -0
  8. package/dist/chunker.d.ts.map +1 -0
  9. package/dist/chunker.js +256 -0
  10. package/dist/chunker.js.map +1 -0
  11. package/dist/cli/indexer.d.ts +11 -0
  12. package/dist/cli/indexer.d.ts.map +1 -0
  13. package/dist/cli/indexer.js +272 -0
  14. package/dist/cli/indexer.js.map +1 -0
  15. package/dist/cli/search.d.ts +7 -0
  16. package/dist/cli/search.d.ts.map +1 -0
  17. package/dist/cli/search.js +206 -0
  18. package/dist/cli/search.js.map +1 -0
  19. package/dist/cli.d.ts +3 -0
  20. package/dist/cli.d.ts.map +1 -0
  21. package/dist/cli.js +362 -0
  22. package/dist/cli.js.map +1 -0
  23. package/dist/config.d.ts +90 -0
  24. package/dist/config.d.ts.map +1 -0
  25. package/dist/config.js +281 -0
  26. package/dist/config.js.map +1 -0
  27. package/dist/db.d.ts +90 -0
  28. package/dist/db.d.ts.map +1 -0
  29. package/dist/db.js +340 -0
  30. package/dist/db.js.map +1 -0
  31. package/dist/embedder.d.ts +101 -0
  32. package/dist/embedder.d.ts.map +1 -0
  33. package/dist/embedder.js +323 -0
  34. package/dist/embedder.js.map +1 -0
  35. package/dist/error-handler.d.ts +91 -0
  36. package/dist/error-handler.d.ts.map +1 -0
  37. package/dist/error-handler.js +196 -0
  38. package/dist/error-handler.js.map +1 -0
  39. package/dist/file-processor.d.ts +59 -0
  40. package/dist/file-processor.d.ts.map +1 -0
  41. package/dist/file-processor.js +312 -0
  42. package/dist/file-processor.js.map +1 -0
  43. package/dist/index-manager.d.ts +99 -0
  44. package/dist/index-manager.d.ts.map +1 -0
  45. package/dist/index-manager.js +444 -0
  46. package/dist/index-manager.js.map +1 -0
  47. package/dist/index.d.ts +13 -0
  48. package/dist/index.d.ts.map +1 -0
  49. package/dist/index.js +21 -0
  50. package/dist/index.js.map +1 -0
  51. package/dist/indexer.d.ts +7 -0
  52. package/dist/indexer.d.ts.map +1 -0
  53. package/dist/indexer.js +51 -0
  54. package/dist/indexer.js.map +1 -0
  55. package/dist/ingestion.d.ts +175 -0
  56. package/dist/ingestion.d.ts.map +1 -0
  57. package/dist/ingestion.js +705 -0
  58. package/dist/ingestion.js.map +1 -0
  59. package/dist/mcp-server.d.ts +14 -0
  60. package/dist/mcp-server.d.ts.map +1 -0
  61. package/dist/mcp-server.js +680 -0
  62. package/dist/mcp-server.js.map +1 -0
  63. package/dist/path-manager.d.ts +42 -0
  64. package/dist/path-manager.d.ts.map +1 -0
  65. package/dist/path-manager.js +66 -0
  66. package/dist/path-manager.js.map +1 -0
  67. package/dist/preprocess.d.ts +19 -0
  68. package/dist/preprocess.d.ts.map +1 -0
  69. package/dist/preprocess.js +203 -0
  70. package/dist/preprocess.js.map +1 -0
  71. package/dist/preprocessors/index.d.ts +17 -0
  72. package/dist/preprocessors/index.d.ts.map +1 -0
  73. package/dist/preprocessors/index.js +38 -0
  74. package/dist/preprocessors/index.js.map +1 -0
  75. package/dist/preprocessors/mdx.d.ts +25 -0
  76. package/dist/preprocessors/mdx.d.ts.map +1 -0
  77. package/dist/preprocessors/mdx.js +101 -0
  78. package/dist/preprocessors/mdx.js.map +1 -0
  79. package/dist/preprocessors/mermaid.d.ts +68 -0
  80. package/dist/preprocessors/mermaid.d.ts.map +1 -0
  81. package/dist/preprocessors/mermaid.js +329 -0
  82. package/dist/preprocessors/mermaid.js.map +1 -0
  83. package/dist/preprocessors/registry.d.ts +56 -0
  84. package/dist/preprocessors/registry.d.ts.map +1 -0
  85. package/dist/preprocessors/registry.js +179 -0
  86. package/dist/preprocessors/registry.js.map +1 -0
  87. package/dist/reranker.d.ts +40 -0
  88. package/dist/reranker.d.ts.map +1 -0
  89. package/dist/reranker.js +212 -0
  90. package/dist/reranker.js.map +1 -0
  91. package/dist/resource-manager-demo.d.ts +7 -0
  92. package/dist/resource-manager-demo.d.ts.map +1 -0
  93. package/dist/resource-manager-demo.js +52 -0
  94. package/dist/resource-manager-demo.js.map +1 -0
  95. package/dist/resource-manager.d.ts +129 -0
  96. package/dist/resource-manager.d.ts.map +1 -0
  97. package/dist/resource-manager.js +389 -0
  98. package/dist/resource-manager.js.map +1 -0
  99. package/dist/search-standalone.d.ts +7 -0
  100. package/dist/search-standalone.d.ts.map +1 -0
  101. package/dist/search-standalone.js +117 -0
  102. package/dist/search-standalone.js.map +1 -0
  103. package/dist/search.d.ts +92 -0
  104. package/dist/search.d.ts.map +1 -0
  105. package/dist/search.js +454 -0
  106. package/dist/search.js.map +1 -0
  107. package/dist/test-utils.d.ts +36 -0
  108. package/dist/test-utils.d.ts.map +1 -0
  109. package/dist/test-utils.js +27 -0
  110. package/dist/test-utils.js.map +1 -0
  111. package/dist/tokenizer.d.ts +21 -0
  112. package/dist/tokenizer.d.ts.map +1 -0
  113. package/dist/tokenizer.js +59 -0
  114. package/dist/tokenizer.js.map +1 -0
  115. package/dist/types.d.ts +44 -0
  116. package/dist/types.d.ts.map +1 -0
  117. package/dist/types.js +3 -0
  118. package/dist/types.js.map +1 -0
  119. package/dist/vector-index.d.ts +64 -0
  120. package/dist/vector-index.d.ts.map +1 -0
  121. package/dist/vector-index.js +308 -0
  122. package/dist/vector-index.js.map +1 -0
  123. package/package.json +80 -0
package/dist/db.js ADDED
@@ -0,0 +1,340 @@
1
+ import sqlite3 from 'sqlite3';
2
+ import { promisify } from 'util';
3
+ import { handleError, ErrorSeverity, createError } from './error-handler.js';
4
+ /**
5
+ * Opens a SQLite database connection with promisified methods
6
+ * @param dbPath - Path to the SQLite database file
7
+ * @returns Promise that resolves to a database connection object
8
+ */
9
+ export function openDatabase(dbPath) {
10
+ return new Promise((resolve, reject) => {
11
+ const db = new sqlite3.Database(dbPath, (err) => {
12
+ if (err) {
13
+ const errorMsg = `Failed to open database at ${dbPath}: ${err.message}`;
14
+ // Categorize database errors for better handling
15
+ if (err.message.includes('ENOENT')) {
16
+ handleError(createError.fileSystem(`Database file not found: ${dbPath}. It will be created automatically.`), 'Database Connection', { severity: ErrorSeverity.INFO });
17
+ }
18
+ else if (err.message.includes('EACCES') || err.message.includes('permission')) {
19
+ reject(createError.database(`Permission denied accessing database: ${dbPath}. Check file permissions.`));
20
+ return;
21
+ }
22
+ else if (err.message.includes('SQLITE_CORRUPT')) {
23
+ reject(createError.database(`Database file is corrupted: ${dbPath}. Try running 'raglite rebuild'.`));
24
+ return;
25
+ }
26
+ else {
27
+ reject(createError.database(errorMsg));
28
+ return;
29
+ }
30
+ }
31
+ // Enable foreign key constraints
32
+ db.run('PRAGMA foreign_keys = ON', (err) => {
33
+ if (err) {
34
+ reject(createError.database(`Failed to enable foreign keys: ${err.message}`));
35
+ return;
36
+ }
37
+ // Create promisified methods with proper context binding and error handling
38
+ const connection = {
39
+ db,
40
+ run: (sql, params) => {
41
+ return new Promise((resolve, reject) => {
42
+ db.run(sql, params || [], function (err) {
43
+ if (err) {
44
+ // Enhance SQLite error messages
45
+ const enhancedError = enhanceSQLiteError(err, sql);
46
+ reject(enhancedError);
47
+ }
48
+ else {
49
+ resolve(this);
50
+ }
51
+ });
52
+ });
53
+ },
54
+ get: promisify(db.get.bind(db)),
55
+ all: promisify(db.all.bind(db)),
56
+ close: promisify(db.close.bind(db))
57
+ };
58
+ resolve(connection);
59
+ });
60
+ });
61
+ });
62
+ }
63
+ /**
64
+ * Enhance SQLite error messages with more context
65
+ */
66
+ function enhanceSQLiteError(error, sql) {
67
+ let enhancedMessage = error.message;
68
+ if (error.message.includes('SQLITE_BUSY')) {
69
+ enhancedMessage = 'Database is locked by another process. Ensure no other RAG-lite instances are running.';
70
+ }
71
+ else if (error.message.includes('SQLITE_FULL')) {
72
+ enhancedMessage = 'Database disk is full. Free up disk space and try again.';
73
+ }
74
+ else if (error.message.includes('SQLITE_CORRUPT')) {
75
+ enhancedMessage = 'Database file is corrupted. Try running "raglite rebuild" to recreate it.';
76
+ }
77
+ else if (error.message.includes('UNIQUE constraint failed')) {
78
+ enhancedMessage = `Duplicate entry detected: ${error.message}. This item may already exist.`;
79
+ }
80
+ else if (error.message.includes('FOREIGN KEY constraint failed')) {
81
+ enhancedMessage = `Foreign key constraint violation: ${error.message}. Referenced record may not exist.`;
82
+ }
83
+ if (sql && sql.length < 200) {
84
+ enhancedMessage += `\nSQL: ${sql}`;
85
+ }
86
+ return new Error(enhancedMessage);
87
+ }
88
+ /**
89
+ * Initializes the database schema with all required tables and indexes
90
+ * @param connection - Database connection object
91
+ */
92
+ export async function initializeSchema(connection) {
93
+ try {
94
+ // Create documents table
95
+ await connection.run(`
96
+ CREATE TABLE IF NOT EXISTS documents (
97
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
98
+ source TEXT NOT NULL UNIQUE,
99
+ title TEXT NOT NULL,
100
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP
101
+ )
102
+ `);
103
+ // Create chunks table with foreign key relationship
104
+ await connection.run(`
105
+ CREATE TABLE IF NOT EXISTS chunks (
106
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
107
+ embedding_id TEXT NOT NULL UNIQUE,
108
+ document_id INTEGER NOT NULL,
109
+ text TEXT NOT NULL,
110
+ chunk_index INTEGER NOT NULL,
111
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
112
+ FOREIGN KEY (document_id) REFERENCES documents(id) ON DELETE CASCADE
113
+ )
114
+ `);
115
+ // Create system_info table for model version tracking
116
+ await connection.run(`
117
+ CREATE TABLE IF NOT EXISTS system_info (
118
+ id INTEGER PRIMARY KEY CHECK (id = 1),
119
+ model_version TEXT NOT NULL,
120
+ model_name TEXT,
121
+ model_dimensions INTEGER,
122
+ updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
123
+ )
124
+ `);
125
+ // Add model tracking columns if they don't exist (migration)
126
+ try {
127
+ await connection.run(`ALTER TABLE system_info ADD COLUMN model_name TEXT`);
128
+ }
129
+ catch (error) {
130
+ // Column already exists, ignore error
131
+ if (error instanceof Error && !error.message.includes('duplicate column name')) {
132
+ throw error;
133
+ }
134
+ }
135
+ try {
136
+ await connection.run(`ALTER TABLE system_info ADD COLUMN model_dimensions INTEGER`);
137
+ }
138
+ catch (error) {
139
+ // Column already exists, ignore error
140
+ if (error instanceof Error && !error.message.includes('duplicate column name')) {
141
+ throw error;
142
+ }
143
+ }
144
+ // Create indexes for performance
145
+ await connection.run(`
146
+ CREATE INDEX IF NOT EXISTS idx_chunks_document_id ON chunks(document_id)
147
+ `);
148
+ await connection.run(`
149
+ CREATE INDEX IF NOT EXISTS idx_chunks_embedding_id ON chunks(embedding_id)
150
+ `);
151
+ await connection.run(`
152
+ CREATE INDEX IF NOT EXISTS idx_documents_source ON documents(source)
153
+ `);
154
+ console.log('Database schema initialized successfully');
155
+ }
156
+ catch (error) {
157
+ throw new Error(`Failed to initialize database schema: ${error instanceof Error ? error.message : 'Unknown error'}`);
158
+ }
159
+ }
160
+ /**
161
+ * Inserts a new document into the database
162
+ * @param connection - Database connection object
163
+ * @param source - Source path of the document
164
+ * @param title - Title of the document
165
+ * @returns Promise that resolves to the document ID
166
+ */
167
+ export async function insertDocument(connection, source, title) {
168
+ try {
169
+ const result = await connection.run('INSERT INTO documents (source, title) VALUES (?, ?)', [source, title]);
170
+ if (typeof result.lastID !== 'number' || result.lastID <= 0) {
171
+ throw new Error('Failed to get document ID after insertion');
172
+ }
173
+ return result.lastID;
174
+ }
175
+ catch (error) {
176
+ if (error instanceof Error && error.message.includes('UNIQUE constraint failed')) {
177
+ throw new Error(`Document with source '${source}' already exists`);
178
+ }
179
+ throw new Error(`Failed to insert document: ${error instanceof Error ? error.message : 'Unknown error'}`);
180
+ }
181
+ }
182
+ /**
183
+ * Inserts or updates a chunk in the database (upsert operation)
184
+ * @param connection - Database connection object
185
+ * @param embeddingId - Unique embedding ID for the chunk
186
+ * @param documentId - ID of the parent document
187
+ * @param text - Text content of the chunk
188
+ * @param chunkIndex - Index of the chunk within the document
189
+ */
190
+ export async function insertChunk(connection, embeddingId, documentId, text, chunkIndex) {
191
+ try {
192
+ // Use INSERT OR REPLACE to handle duplicates gracefully
193
+ await connection.run('INSERT OR REPLACE INTO chunks (embedding_id, document_id, text, chunk_index) VALUES (?, ?, ?, ?)', [embeddingId, documentId, text, chunkIndex]);
194
+ }
195
+ catch (error) {
196
+ if (error instanceof Error && error.message.includes('FOREIGN KEY constraint failed')) {
197
+ throw new Error(`Document with ID ${documentId} does not exist`);
198
+ }
199
+ throw new Error(`Failed to insert/update chunk: ${error instanceof Error ? error.message : 'Unknown error'}`);
200
+ }
201
+ }
202
+ /**
203
+ * Inserts a new document or returns existing document ID if it already exists
204
+ * @param connection - Database connection object
205
+ * @param source - Source path of the document
206
+ * @param title - Title of the document
207
+ * @returns Promise that resolves to the document ID
208
+ */
209
+ export async function upsertDocument(connection, source, title) {
210
+ try {
211
+ // First try to get existing document
212
+ const existing = await connection.get('SELECT id FROM documents WHERE source = ?', [source]);
213
+ if (existing) {
214
+ return existing.id;
215
+ }
216
+ // Insert new document if it doesn't exist
217
+ const result = await connection.run('INSERT INTO documents (source, title) VALUES (?, ?)', [source, title]);
218
+ if (typeof result.lastID !== 'number' || result.lastID <= 0) {
219
+ throw new Error('Failed to get document ID after insertion');
220
+ }
221
+ return result.lastID;
222
+ }
223
+ catch (error) {
224
+ throw new Error(`Failed to upsert document: ${error instanceof Error ? error.message : 'Unknown error'}`);
225
+ }
226
+ }
227
+ /**
228
+ * Retrieves chunks by their embedding IDs with document metadata
229
+ * @param connection - Database connection object
230
+ * @param embeddingIds - Array of embedding IDs to retrieve
231
+ * @returns Promise that resolves to an array of chunk results with document metadata
232
+ */
233
+ export async function getChunksByEmbeddingIds(connection, embeddingIds) {
234
+ if (embeddingIds.length === 0) {
235
+ return [];
236
+ }
237
+ try {
238
+ const placeholders = embeddingIds.map(() => '?').join(',');
239
+ const sql = `
240
+ SELECT
241
+ c.id,
242
+ c.embedding_id,
243
+ c.document_id,
244
+ c.text,
245
+ c.chunk_index,
246
+ c.created_at,
247
+ d.source as document_source,
248
+ d.title as document_title
249
+ FROM chunks c
250
+ JOIN documents d ON c.document_id = d.id
251
+ WHERE c.embedding_id IN (${placeholders})
252
+ ORDER BY c.chunk_index
253
+ `;
254
+ const results = await connection.all(sql, embeddingIds);
255
+ return results;
256
+ }
257
+ catch (error) {
258
+ throw new Error(`Failed to retrieve chunks: ${error instanceof Error ? error.message : 'Unknown error'}`);
259
+ }
260
+ }
261
+ /**
262
+ * Gets the current model version from system_info table
263
+ * @param connection - Database connection object
264
+ * @returns Promise that resolves to the model version string or null if not set
265
+ */
266
+ export async function getModelVersion(connection) {
267
+ try {
268
+ const result = await connection.get('SELECT model_version FROM system_info WHERE id = 1');
269
+ return result ? result.model_version : null;
270
+ }
271
+ catch (error) {
272
+ throw new Error(`Failed to get model version: ${error instanceof Error ? error.message : 'Unknown error'}`);
273
+ }
274
+ }
275
+ /**
276
+ * Sets the model version in system_info table
277
+ * @param connection - Database connection object
278
+ * @param modelVersion - Model version string to store
279
+ */
280
+ export async function setModelVersion(connection, modelVersion) {
281
+ try {
282
+ // Check if there's already a row
283
+ const existing = await connection.get('SELECT model_name, model_dimensions FROM system_info WHERE id = 1');
284
+ if (existing) {
285
+ // Update only the model_version field, preserve existing model info
286
+ await connection.run('UPDATE system_info SET model_version = ?, updated_at = CURRENT_TIMESTAMP WHERE id = 1', [modelVersion]);
287
+ }
288
+ else {
289
+ // Insert new row with just model_version
290
+ await connection.run('INSERT INTO system_info (id, model_version, updated_at) VALUES (1, ?, CURRENT_TIMESTAMP)', [modelVersion]);
291
+ }
292
+ }
293
+ catch (error) {
294
+ throw new Error(`Failed to set model version: ${error instanceof Error ? error.message : 'Unknown error'}`);
295
+ }
296
+ }
297
+ /**
298
+ * Gets the stored model information from system_info table
299
+ * @param connection - Database connection object
300
+ * @returns Promise that resolves to model info object or null if not set
301
+ */
302
+ export async function getStoredModelInfo(connection) {
303
+ try {
304
+ const result = await connection.get('SELECT model_name, model_dimensions FROM system_info WHERE id = 1');
305
+ if (!result || !result.model_name || !result.model_dimensions) {
306
+ return null;
307
+ }
308
+ return {
309
+ modelName: result.model_name,
310
+ dimensions: result.model_dimensions
311
+ };
312
+ }
313
+ catch (error) {
314
+ throw new Error(`Failed to get stored model info: ${error instanceof Error ? error.message : 'Unknown error'}`);
315
+ }
316
+ }
317
+ /**
318
+ * Sets the model information in system_info table
319
+ * @param connection - Database connection object
320
+ * @param modelName - Name of the embedding model
321
+ * @param dimensions - Number of dimensions for the model
322
+ */
323
+ export async function setStoredModelInfo(connection, modelName, dimensions) {
324
+ try {
325
+ // Check if there's already a row
326
+ const existing = await connection.get('SELECT model_version FROM system_info WHERE id = 1');
327
+ if (existing) {
328
+ // Update only the model info fields, preserve existing model_version
329
+ await connection.run('UPDATE system_info SET model_name = ?, model_dimensions = ?, updated_at = CURRENT_TIMESTAMP WHERE id = 1', [modelName, dimensions]);
330
+ }
331
+ else {
332
+ // Insert new row with placeholder model_version (will be updated by setModelVersion)
333
+ await connection.run('INSERT INTO system_info (id, model_version, model_name, model_dimensions, updated_at) VALUES (1, "", ?, ?, CURRENT_TIMESTAMP)', [modelName, dimensions]);
334
+ }
335
+ }
336
+ catch (error) {
337
+ throw new Error(`Failed to set stored model info: ${error instanceof Error ? error.message : 'Unknown error'}`);
338
+ }
339
+ }
340
+ //# sourceMappingURL=db.js.map
package/dist/db.js.map ADDED
@@ -0,0 +1 @@
1
+ {"version":3,"file":"db.js","sourceRoot":"","sources":["../src/db.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,SAAS,EAAE,MAAM,MAAM,CAAC;AACjC,OAAO,EAAE,WAAW,EAAiB,aAAa,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAsB5F;;;;GAIG;AACH,MAAM,UAAU,YAAY,CAAC,MAAc;IACzC,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACrC,MAAM,EAAE,GAAG,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,EAAE;YAC9C,IAAI,GAAG,EAAE,CAAC;gBACR,MAAM,QAAQ,GAAG,8BAA8B,MAAM,KAAK,GAAG,CAAC,OAAO,EAAE,CAAC;gBAExE,iDAAiD;gBACjD,IAAI,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;oBACnC,WAAW,CACT,WAAW,CAAC,UAAU,CAAC,4BAA4B,MAAM,qCAAqC,CAAC,EAC/F,qBAAqB,EACrB,EAAE,QAAQ,EAAE,aAAa,CAAC,IAAI,EAAE,CACjC,CAAC;gBACJ,CAAC;qBAAM,IAAI,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;oBAChF,MAAM,CAAC,WAAW,CAAC,QAAQ,CAAC,yCAAyC,MAAM,2BAA2B,CAAC,CAAC,CAAC;oBACzG,OAAO;gBACT,CAAC;qBAAM,IAAI,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAC,EAAE,CAAC;oBAClD,MAAM,CAAC,WAAW,CAAC,QAAQ,CAAC,+BAA+B,MAAM,kCAAkC,CAAC,CAAC,CAAC;oBACtG,OAAO;gBACT,CAAC;qBAAM,CAAC;oBACN,MAAM,CAAC,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC;oBACvC,OAAO;gBACT,CAAC;YACH,CAAC;YAED,iCAAiC;YACjC,EAAE,CAAC,GAAG,CAAC,0BAA0B,EAAE,CAAC,GAAG,EAAE,EAAE;gBACzC,IAAI,GAAG,EAAE,CAAC;oBACR,MAAM,CAAC,WAAW,CAAC,QAAQ,CAAC,kCAAkC,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;oBAC9E,OAAO;gBACT,CAAC;gBAED,4EAA4E;gBAC5E,MAAM,UAAU,GAAuB;oBACrC,EAAE;oBACF,GAAG,EAAE,CAAC,GAAW,EAAE,MAAc,EAAE,EAAE;wBACnC,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;4BACrC,EAAE,CAAC,GAAG,CAAC,GAAG,EAAE,MAAM,IAAI,EAAE,EAAE,UAAS,GAAG;gCACpC,IAAI,GAAG,EAAE,CAAC;oCACR,gCAAgC;oCAChC,MAAM,aAAa,GAAG,kBAAkB,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;oCACnD,MAAM,CAAC,aAAa,CAAC,CAAC;gCACxB,CAAC;qCAAM,CAAC;oCACN,OAAO,CAAC,IAAI,CAAC,CAAC;gCAChB,CAAC;4BACH,CAAC,CAAC,CAAC;wBACL,CAAC,CAAC,CAAC;oBACL,CAAC;oBACD,GAAG,EAAE,SAAS,CAAC,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;oBAC/B,GAAG,EAAE,SAAS,CAAC,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;oBAC/B,KAAK,EAAE,SAAS,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;iBACpC,CAAC;gBAEF,OAAO,CAAC,UAAU,CAAC,CAAC;YACtB,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB,CAAC,KAAY,EAAE,GAAY;IACpD,IAAI,eAAe,GAAG,KAAK,CAAC,OAAO,CAAC;IAEpC,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QAC1C,eAAe,GAAG,wFAAwF,CAAC;IAC7G,CAAC;SAAM,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QACjD,eAAe,GAAG,0DAA0D,CAAC;IAC/E,CAAC;SAAM,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAC,EAAE,CAAC;QACpD,eAAe,GAAG,2EAA2E,CAAC;IAChG,CAAC;SAAM,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,0BAA0B,CAAC,EAAE,CAAC;QAC9D,eAAe,GAAG,6BAA6B,KAAK,CAAC,OAAO,gCAAgC,CAAC;IAC/F,CAAC;SAAM,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,+BAA+B,CAAC,EAAE,CAAC;QACnE,eAAe,GAAG,qCAAqC,KAAK,CAAC,OAAO,oCAAoC,CAAC;IAC3G,CAAC;IAED,IAAI,GAAG,IAAI,GAAG,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;QAC5B,eAAe,IAAI,UAAU,GAAG,EAAE,CAAC;IACrC,CAAC;IAED,OAAO,IAAI,KAAK,CAAC,eAAe,CAAC,CAAC;AACpC,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,UAA8B;IACnE,IAAI,CAAC;QACH,yBAAyB;QACzB,MAAM,UAAU,CAAC,GAAG,CAAC;;;;;;;KAOpB,CAAC,CAAC;QAEH,oDAAoD;QACpD,MAAM,UAAU,CAAC,GAAG,CAAC;;;;;;;;;;KAUpB,CAAC,CAAC;QAEH,sDAAsD;QACtD,MAAM,UAAU,CAAC,GAAG,CAAC;;;;;;;;KAQpB,CAAC,CAAC;QAEH,6DAA6D;QAC7D,IAAI,CAAC;YACH,MAAM,UAAU,CAAC,GAAG,CAAC,oDAAoD,CAAC,CAAC;QAC7E,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,sCAAsC;YACtC,IAAI,KAAK,YAAY,KAAK,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,uBAAuB,CAAC,EAAE,CAAC;gBAC/E,MAAM,KAAK,CAAC;YACd,CAAC;QACH,CAAC;QAED,IAAI,CAAC;YACH,MAAM,UAAU,CAAC,GAAG,CAAC,6DAA6D,CAAC,CAAC;QACtF,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,sCAAsC;YACtC,IAAI,KAAK,YAAY,KAAK,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,uBAAuB,CAAC,EAAE,CAAC;gBAC/E,MAAM,KAAK,CAAC;YACd,CAAC;QACH,CAAC;QAED,iCAAiC;QACjC,MAAM,UAAU,CAAC,GAAG,CAAC;;KAEpB,CAAC,CAAC;QAEH,MAAM,UAAU,CAAC,GAAG,CAAC;;KAEpB,CAAC,CAAC;QAEH,MAAM,UAAU,CAAC,GAAG,CAAC;;KAEpB,CAAC,CAAC;QAEH,OAAO,CAAC,GAAG,CAAC,0CAA0C,CAAC,CAAC;IAC1D,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,yCAAyC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;IACvH,CAAC;AACH,CAAC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,UAA8B,EAC9B,MAAc,EACd,KAAa;IAEb,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,GAAG,CACjC,qDAAqD,EACrD,CAAC,MAAM,EAAE,KAAK,CAAC,CAChB,CAAC;QAEF,IAAI,OAAO,MAAM,CAAC,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YAC5D,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;QAC/D,CAAC;QAED,OAAO,MAAM,CAAC,MAAM,CAAC;IACvB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,0BAA0B,CAAC,EAAE,CAAC;YACjF,MAAM,IAAI,KAAK,CAAC,yBAAyB,MAAM,kBAAkB,CAAC,CAAC;QACrE,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,8BAA8B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;IAC5G,CAAC;AACH,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,UAA8B,EAC9B,WAAmB,EACnB,UAAkB,EAClB,IAAY,EACZ,UAAkB;IAElB,IAAI,CAAC;QACH,wDAAwD;QACxD,MAAM,UAAU,CAAC,GAAG,CAClB,kGAAkG,EAClG,CAAC,WAAW,EAAE,UAAU,EAAE,IAAI,EAAE,UAAU,CAAC,CAC5C,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,+BAA+B,CAAC,EAAE,CAAC;YACtF,MAAM,IAAI,KAAK,CAAC,oBAAoB,UAAU,iBAAiB,CAAC,CAAC;QACnE,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,kCAAkC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;IAChH,CAAC;AACH,CAAC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,UAA8B,EAC9B,MAAc,EACd,KAAa;IAEb,IAAI,CAAC;QACH,qCAAqC;QACrC,MAAM,QAAQ,GAAG,MAAM,UAAU,CAAC,GAAG,CACnC,2CAA2C,EAC3C,CAAC,MAAM,CAAC,CACT,CAAC;QAEF,IAAI,QAAQ,EAAE,CAAC;YACb,OAAO,QAAQ,CAAC,EAAE,CAAC;QACrB,CAAC;QAED,0CAA0C;QAC1C,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,GAAG,CACjC,qDAAqD,EACrD,CAAC,MAAM,EAAE,KAAK,CAAC,CAChB,CAAC;QAEF,IAAI,OAAO,MAAM,CAAC,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YAC5D,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;QAC/D,CAAC;QAED,OAAO,MAAM,CAAC,MAAM,CAAC;IACvB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,8BAA8B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;IAC5G,CAAC;AACH,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAC3C,UAA8B,EAC9B,YAAsB;IAEtB,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,IAAI,CAAC;QACH,MAAM,YAAY,GAAG,YAAY,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC3D,MAAM,GAAG,GAAG;;;;;;;;;;;;iCAYiB,YAAY;;KAExC,CAAC;QAEF,MAAM,OAAO,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC;QACxD,OAAO,OAAwB,CAAC;IAClC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,8BAA8B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;IAC5G,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,UAA8B;IAClE,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,oDAAoD,CAAC,CAAC;QAC1F,OAAO,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC;IAC9C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,gCAAgC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;IAC9G,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,UAA8B,EAAE,YAAoB;IACxF,IAAI,CAAC;QACH,iCAAiC;QACjC,MAAM,QAAQ,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,mEAAmE,CAAC,CAAC;QAE3G,IAAI,QAAQ,EAAE,CAAC;YACb,oEAAoE;YACpE,MAAM,UAAU,CAAC,GAAG,CAClB,uFAAuF,EACvF,CAAC,YAAY,CAAC,CACf,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,yCAAyC;YACzC,MAAM,UAAU,CAAC,GAAG,CAClB,0FAA0F,EAC1F,CAAC,YAAY,CAAC,CACf,CAAC;QACJ,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,gCAAgC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;IAC9G,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,UAA8B;IAIrE,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,GAAG,CACjC,mEAAmE,CACpE,CAAC;QAEF,IAAI,CAAC,MAAM,IAAI,CAAC,MAAM,CAAC,UAAU,IAAI,CAAC,MAAM,CAAC,gBAAgB,EAAE,CAAC;YAC9D,OAAO,IAAI,CAAC;QACd,CAAC;QAED,OAAO;YACL,SAAS,EAAE,MAAM,CAAC,UAAU;YAC5B,UAAU,EAAE,MAAM,CAAC,gBAAgB;SACpC,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,oCAAoC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;IAClH,CAAC;AACH,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,UAA8B,EAC9B,SAAiB,EACjB,UAAkB;IAElB,IAAI,CAAC;QACH,iCAAiC;QACjC,MAAM,QAAQ,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,oDAAoD,CAAC,CAAC;QAE5F,IAAI,QAAQ,EAAE,CAAC;YACb,qEAAqE;YACrE,MAAM,UAAU,CAAC,GAAG,CAClB,0GAA0G,EAC1G,CAAC,SAAS,EAAE,UAAU,CAAC,CACxB,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,qFAAqF;YACrF,MAAM,UAAU,CAAC,GAAG,CAClB,+HAA+H,EAC/H,CAAC,SAAS,EAAE,UAAU,CAAC,CACxB,CAAC;QACJ,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,oCAAoC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;IAClH,CAAC;AACH,CAAC"}
@@ -0,0 +1,101 @@
1
+ import type { EmbeddingResult } from './types.js';
2
+ /**
3
+ * Embedding engine using transformers.js for generating embeddings
4
+ */
5
+ export declare class EmbeddingEngine {
6
+ private model;
7
+ private modelVersion;
8
+ private readonly modelName;
9
+ private readonly batchSize;
10
+ constructor(modelName?: string, batchSize?: number);
11
+ /**
12
+ * Load the embedding model
13
+ * @throws {Error} If model loading fails
14
+ */
15
+ loadModel(): Promise<void>;
16
+ /**
17
+ * Generate embeddings for a batch of texts
18
+ * @param texts - Array of text strings to embed
19
+ * @returns Promise resolving to array of embedding results
20
+ */
21
+ embedBatch(texts: string[]): Promise<EmbeddingResult[]>;
22
+ /**
23
+ * Process a single batch with error handling for individual chunks
24
+ * @param batch - Array of text strings in this batch
25
+ * @param startIndex - Starting index for this batch in the original array
26
+ * @returns Promise resolving to array of embedding results
27
+ */
28
+ private processBatchWithErrorHandling;
29
+ /**
30
+ * Fallback to individual chunk processing when batch fails
31
+ */
32
+ private fallbackToIndividualProcessing;
33
+ /**
34
+ * Process a single chunk with error handling
35
+ * @param text - Text to embed
36
+ * @param index - Index of this chunk
37
+ * @returns Promise resolving to embedding result or null if failed
38
+ */
39
+ private processSingleChunk;
40
+ /**
41
+ * Generate embedding for a single text
42
+ * @param text - Text string to embed
43
+ * @returns Promise resolving to embedding result
44
+ */
45
+ embedSingle(text: string): Promise<EmbeddingResult>;
46
+ /**
47
+ * Generate embeddings for document chunks with progress logging
48
+ * Optimized for large document ingestion with batch processing
49
+ * @param chunks - Array of text chunks from documents
50
+ * @returns Promise resolving to array of embedding results
51
+ */
52
+ embedDocumentBatch(chunks: string[]): Promise<EmbeddingResult[]>;
53
+ /**
54
+ * Get the current model version identifier
55
+ * @returns Model version string
56
+ */
57
+ getModelVersion(): string;
58
+ /**
59
+ * Check if the model is loaded
60
+ * @returns True if model is loaded
61
+ */
62
+ isLoaded(): boolean;
63
+ /**
64
+ * Get the model name
65
+ * @returns Model name string
66
+ */
67
+ getModelName(): string;
68
+ /**
69
+ * Get the batch size
70
+ * @returns Batch size number
71
+ */
72
+ getBatchSize(): number;
73
+ /**
74
+ * Generate a deterministic model version identifier
75
+ * Uses model name and configuration for consistent versioning
76
+ * @returns Model version string
77
+ */
78
+ private generateModelVersion;
79
+ /**
80
+ * Generate a deterministic embedding ID for a text chunk
81
+ * @param text - The text content
82
+ * @param index - Index in the batch
83
+ * @returns Deterministic embedding ID
84
+ */
85
+ private generateEmbeddingId;
86
+ }
87
+ /**
88
+ * Get the singleton embedding engine instance
89
+ * @param modelName - Optional model name override
90
+ * @param batchSize - Optional batch size override
91
+ * @returns EmbeddingEngine instance
92
+ */
93
+ export declare function getEmbeddingEngine(modelName?: string, batchSize?: number): EmbeddingEngine;
94
+ /**
95
+ * Initialize the embedding engine and load the model
96
+ * @param modelName - Optional model name override
97
+ * @param batchSize - Optional batch size override
98
+ * @returns Promise resolving to the loaded embedding engine
99
+ */
100
+ export declare function initializeEmbeddingEngine(modelName?: string, batchSize?: number): Promise<EmbeddingEngine>;
101
+ //# sourceMappingURL=embedder.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedder.d.ts","sourceRoot":"","sources":["../src/embedder.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAUlD;;GAEG;AACH,qBAAa,eAAe;IAC1B,OAAO,CAAC,KAAK,CAAoB;IACjC,OAAO,CAAC,YAAY,CAAuB;IAC3C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;gBAEvB,SAAS,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM;IAelD;;;OAGG;IACG,SAAS,IAAI,OAAO,CAAC,IAAI,CAAC;IA6ChC;;;;OAIG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC;IAqB7D;;;;;OAKG;YACW,6BAA6B;IAmC3C;;OAEG;YACW,8BAA8B;IAgC5C;;;;;OAKG;YACW,kBAAkB;IAqBhC;;;;OAIG;IACG,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;IAQzD;;;;;OAKG;IACG,kBAAkB,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC;IA+CtE;;;OAGG;IACH,eAAe,IAAI,MAAM;IAOzB;;;OAGG;IACH,QAAQ,IAAI,OAAO;IAInB;;;OAGG;IACH,YAAY,IAAI,MAAM;IAItB;;;OAGG;IACH,YAAY,IAAI,MAAM;IAItB;;;;OAIG;IACH,OAAO,CAAC,oBAAoB;IAa5B;;;;;OAKG;IACH,OAAO,CAAC,mBAAmB;CAM5B;AAQD;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,eAAe,CAS1F;AAED;;;;;GAKG;AACH,wBAAsB,yBAAyB,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAQhH"}