@yamo/memory-mesh 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +80 -0
- package/bin/memory_mesh.js +69 -0
- package/bin/scrubber.js +81 -0
- package/index.d.ts +111 -0
- package/lib/adapters/index.js +3 -0
- package/lib/embeddings/factory.js +150 -0
- package/lib/embeddings/index.js +2 -0
- package/lib/embeddings/service.js +586 -0
- package/lib/index.js +18 -0
- package/lib/lancedb/client.js +631 -0
- package/lib/lancedb/config.js +215 -0
- package/lib/lancedb/errors.js +144 -0
- package/lib/lancedb/index.js +4 -0
- package/lib/lancedb/schema.js +197 -0
- package/lib/memory/index.js +3 -0
- package/lib/memory/memory-context-manager.js +388 -0
- package/lib/memory/memory-mesh.js +910 -0
- package/lib/memory/memory-translator.js +130 -0
- package/lib/memory/migrate-memory.js +227 -0
- package/lib/memory/migrate-to-v2.js +120 -0
- package/lib/memory/scorer.js +85 -0
- package/lib/memory/vector-memory.js +364 -0
- package/lib/privacy/audit-logger.js +176 -0
- package/lib/privacy/dlp-redactor.js +72 -0
- package/lib/privacy/index.js +10 -0
- package/lib/reporting/skill-report-generator.js +283 -0
- package/lib/scrubber/.gitkeep +1 -0
- package/lib/scrubber/config/defaults.js +62 -0
- package/lib/scrubber/errors/scrubber-error.js +43 -0
- package/lib/scrubber/index.js +25 -0
- package/lib/scrubber/scrubber.js +130 -0
- package/lib/scrubber/stages/chunker.js +103 -0
- package/lib/scrubber/stages/metadata-annotator.js +74 -0
- package/lib/scrubber/stages/normalizer.js +59 -0
- package/lib/scrubber/stages/semantic-filter.js +61 -0
- package/lib/scrubber/stages/structural-cleaner.js +82 -0
- package/lib/scrubber/stages/validator.js +66 -0
- package/lib/scrubber/telemetry.js +66 -0
- package/lib/scrubber/utils/hash.js +39 -0
- package/lib/scrubber/utils/html-parser.js +45 -0
- package/lib/scrubber/utils/pattern-matcher.js +63 -0
- package/lib/scrubber/utils/token-counter.js +31 -0
- package/lib/search/filter.js +275 -0
- package/lib/search/hybrid.js +137 -0
- package/lib/search/index.js +3 -0
- package/lib/search/pattern-miner.js +160 -0
- package/lib/utils/error-sanitizer.js +84 -0
- package/lib/utils/handoff-validator.js +85 -0
- package/lib/utils/index.js +4 -0
- package/lib/utils/spinner.js +190 -0
- package/lib/utils/streaming-client.js +128 -0
- package/package.json +39 -0
- package/skills/SKILL.md +462 -0
- package/skills/skill-scrubber.yamo +41 -0
|
@@ -0,0 +1,631 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LanceDB Client Wrapper
|
|
3
|
+
*
|
|
4
|
+
* A comprehensive wrapper around LanceDB JavaScript SDK providing:
|
|
5
|
+
* - Connection management with pooling and retries
|
|
6
|
+
* - CRUD operations for memory entries
|
|
7
|
+
* - Vector similarity search with filtering
|
|
8
|
+
* - Database statistics and monitoring
|
|
9
|
+
*
|
|
10
|
+
* @class LanceDBClient
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import lancedb from "@lancedb/lancedb";
|
|
14
|
+
import fs from "fs";
|
|
15
|
+
import path from "path";
|
|
16
|
+
import { createMemoryTableWithDimension, DEFAULT_VECTOR_DIMENSION } from "./schema.js";
|
|
17
|
+
import { StorageError, QueryError, ConfigurationError } from "./errors.js";
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* LanceDB Client wrapper class
|
|
21
|
+
*/
|
|
22
|
+
class LanceDBClient {
|
|
23
|
+
/**
|
|
24
|
+
* Create a new LanceDBClient instance
|
|
25
|
+
* @param {Object} [config={}] - Configuration object
|
|
26
|
+
* @param {string} [config.uri] - Database URI (default: from env or './data/lancedb')
|
|
27
|
+
* @param {string} [config.tableName] - Table name (default: from env or 'memory_entries')
|
|
28
|
+
* @param {number} [config.maxRetries] - Maximum connection retries (default: 3)
|
|
29
|
+
* @param {number} [config.retryDelay] - Delay between retries in ms (default: 1000)
|
|
30
|
+
* @param {number} [config.vectorDimension] - Vector dimension for embeddings (default: 384)
|
|
31
|
+
*/
|
|
32
|
+
constructor(config = {}) {
|
|
33
|
+
this.uri = (config && config.uri) || process.env.LANCEDB_URI || './data/lancedb';
|
|
34
|
+
this.tableName = (config && config.tableName) || process.env.LANCEDB_MEMORY_TABLE || 'memory_entries';
|
|
35
|
+
this.maxRetries = (config && config.maxRetries) || 3;
|
|
36
|
+
this.retryDelay = (config && config.retryDelay) || 1000;
|
|
37
|
+
this.vectorDimension = (config && config.vectorDimension) || DEFAULT_VECTOR_DIMENSION;
|
|
38
|
+
|
|
39
|
+
// Connection state
|
|
40
|
+
this.db = null;
|
|
41
|
+
this.table = null;
|
|
42
|
+
this.isConnected = false;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Connect to LanceDB and initialize table
|
|
47
|
+
* Creates the database directory and table if they don't exist
|
|
48
|
+
* @returns {Promise<void>}
|
|
49
|
+
* @throws {StorageError} If connection fails after retries
|
|
50
|
+
*/
|
|
51
|
+
async connect() {
|
|
52
|
+
if (this.isConnected) {
|
|
53
|
+
return; // Already connected
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
let lastError = null;
|
|
57
|
+
|
|
58
|
+
for (let attempt = 1; attempt <= this.maxRetries; attempt++) {
|
|
59
|
+
try {
|
|
60
|
+
// Ensure database directory exists
|
|
61
|
+
const dbPath = path.resolve(this.uri);
|
|
62
|
+
const dbDir = path.dirname(dbPath);
|
|
63
|
+
|
|
64
|
+
if (!fs.existsSync(dbDir)) {
|
|
65
|
+
fs.mkdirSync(dbDir, { recursive: true });
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Connect to database
|
|
69
|
+
this.db = await lancedb.connect(this.uri);
|
|
70
|
+
|
|
71
|
+
// Initialize table with dynamic dimension (creates if doesn't exist, opens if it does)
|
|
72
|
+
this.table = await createMemoryTableWithDimension(this.db, this.tableName, this.vectorDimension);
|
|
73
|
+
|
|
74
|
+
this.isConnected = true;
|
|
75
|
+
return;
|
|
76
|
+
|
|
77
|
+
} catch (error) {
|
|
78
|
+
lastError = error;
|
|
79
|
+
|
|
80
|
+
if (attempt < this.maxRetries) {
|
|
81
|
+
// Wait before retrying
|
|
82
|
+
await this._sleep(this.retryDelay * attempt);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// All retries failed
|
|
88
|
+
const errorMessage = lastError instanceof Error ? lastError.message : String(lastError);
|
|
89
|
+
throw new StorageError(
|
|
90
|
+
`Failed to connect to LanceDB after ${this.maxRetries} attempts: ${errorMessage}`,
|
|
91
|
+
{ uri: this.uri, tableName: this.tableName, originalError: lastError }
|
|
92
|
+
);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Disconnect from LanceDB
|
|
97
|
+
* @returns {Promise<void>}
|
|
98
|
+
*/
|
|
99
|
+
async disconnect() {
|
|
100
|
+
this.db = null;
|
|
101
|
+
this.table = null;
|
|
102
|
+
this.isConnected = false;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Add a single memory entry
|
|
107
|
+
* @param {Object} data - Entry data
|
|
108
|
+
* @param {string} data.id - Unique identifier
|
|
109
|
+
* @param {Array<number>} data.vector - Embedding vector (384 dimensions)
|
|
110
|
+
* @param {string} data.content - Text content
|
|
111
|
+
* @param {string} [data.metadata] - JSON string metadata
|
|
112
|
+
* @returns {Promise<Object>} Result with id and success status
|
|
113
|
+
* @throws {StorageError} If add operation fails
|
|
114
|
+
*/
|
|
115
|
+
async add(data) {
|
|
116
|
+
if (!this.isConnected) {
|
|
117
|
+
await this.connect();
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
this._validateRecord(data);
|
|
121
|
+
|
|
122
|
+
return await this._retryOperation(async () => {
|
|
123
|
+
const record = {
|
|
124
|
+
...data,
|
|
125
|
+
created_at: new Date(),
|
|
126
|
+
updated_at: new Date()
|
|
127
|
+
};
|
|
128
|
+
|
|
129
|
+
if (!this.table) {
|
|
130
|
+
throw new StorageError('Table not initialized');
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
await this.table.add([record]);
|
|
134
|
+
|
|
135
|
+
return {
|
|
136
|
+
id: data.id,
|
|
137
|
+
success: true
|
|
138
|
+
};
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Add multiple memory entries in batch
|
|
144
|
+
* @param {Array<Object>} records - Array of entry data objects
|
|
145
|
+
* @returns {Promise<Object>} Result with count of added records
|
|
146
|
+
* @throws {StorageError} If batch add fails
|
|
147
|
+
*/
|
|
148
|
+
async addBatch(records) {
|
|
149
|
+
if (!this.isConnected) {
|
|
150
|
+
await this.connect();
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
if (!Array.isArray(records) || records.length === 0) {
|
|
154
|
+
throw new StorageError('Records must be a non-empty array');
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Validate all records
|
|
158
|
+
records.forEach(record => this._validateRecord(record));
|
|
159
|
+
|
|
160
|
+
return await this._retryOperation(async () => {
|
|
161
|
+
const now = new Date();
|
|
162
|
+
const recordsWithTimestamps = records.map(record => ({
|
|
163
|
+
...record,
|
|
164
|
+
created_at: now,
|
|
165
|
+
updated_at: now
|
|
166
|
+
}));
|
|
167
|
+
|
|
168
|
+
if (!this.table) {
|
|
169
|
+
throw new StorageError('Table not initialized');
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
await this.table.add(recordsWithTimestamps);
|
|
173
|
+
|
|
174
|
+
return {
|
|
175
|
+
count: records.length,
|
|
176
|
+
success: true
|
|
177
|
+
};
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Search for similar vectors
|
|
183
|
+
* @param {Array<number>} vector - Query vector (384 dimensions)
|
|
184
|
+
* @param {Object} options - Search options
|
|
185
|
+
* @param {number} [options.limit=10] - Maximum number of results
|
|
186
|
+
* @param {string} [options.metric='cosine'] - Distance metric ('cosine', 'l2', 'dot')
|
|
187
|
+
* @param {number} [options.nprobes=20] - Number of IVF partitions to search
|
|
188
|
+
* @param {Object} [options.filter] - Filter expression for metadata (e.g., "content == 'value'")
|
|
189
|
+
* Note: Filters work on top-level schema fields only.
|
|
190
|
+
* The metadata field is stored as JSON string and cannot
|
|
191
|
+
* be filtered directly. Use content or other top-level fields.
|
|
192
|
+
* @returns {Promise<Array<Object>>} Array of search results with scores
|
|
193
|
+
* @throws {QueryError} If search fails
|
|
194
|
+
*/
|
|
195
|
+
async search(vector, options = {}) {
|
|
196
|
+
if (!this.isConnected) {
|
|
197
|
+
await this.connect();
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
this._validateVector(vector);
|
|
201
|
+
|
|
202
|
+
const {
|
|
203
|
+
limit = 10,
|
|
204
|
+
metric = 'cosine',
|
|
205
|
+
nprobes = 20,
|
|
206
|
+
filter = null
|
|
207
|
+
} = options;
|
|
208
|
+
|
|
209
|
+
return await this._retryOperation(async () => {
|
|
210
|
+
if (!this.table) {
|
|
211
|
+
throw new StorageError('Table not initialized');
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Build the search query with all applicable options
|
|
215
|
+
let query = this.table.search(vector);
|
|
216
|
+
|
|
217
|
+
// Apply nprobes for IVF index (if supported)
|
|
218
|
+
// Note: nprobes is typically set at index creation time, but we attempt to apply it here
|
|
219
|
+
if (nprobes && typeof nprobes === 'number') {
|
|
220
|
+
try {
|
|
221
|
+
// @ts-ignore - nprobes might not exist on all query types or versions
|
|
222
|
+
query = query.nprobes(nprobes);
|
|
223
|
+
} catch (e) {
|
|
224
|
+
// nprobes may not be supported in all LanceDB versions or configurations
|
|
225
|
+
// Silently continue if not applicable
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// Apply filter if provided
|
|
230
|
+
// LanceDB supports filtering with .where() clause
|
|
231
|
+
if (filter) {
|
|
232
|
+
query = query.where(filter);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Execute search with limit
|
|
236
|
+
// @ts-ignore - execute() is protected in types but public in JS implementation or types are wrong
|
|
237
|
+
const resultsGenerator = await query.limit(limit).execute();
|
|
238
|
+
const resultsArray = [];
|
|
239
|
+
|
|
240
|
+
for await (const batch of resultsGenerator) {
|
|
241
|
+
// Convert RecordBatch to array of StructRow objects
|
|
242
|
+
const rows = batch.toArray();
|
|
243
|
+
for (const row of rows) {
|
|
244
|
+
resultsArray.push({
|
|
245
|
+
id: row.id,
|
|
246
|
+
content: row.content,
|
|
247
|
+
metadata: row.metadata ? JSON.parse(row.metadata) : null,
|
|
248
|
+
// @ts-ignore - _distance is internal property
|
|
249
|
+
score: row._distance,
|
|
250
|
+
created_at: row.created_at
|
|
251
|
+
});
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
return resultsArray;
|
|
256
|
+
});
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Get a record by ID
|
|
261
|
+
* @param {string} id - Record ID
|
|
262
|
+
* @returns {Promise<Object|null>} Record object or null if not found
|
|
263
|
+
* @throws {QueryError} If query fails
|
|
264
|
+
*/
|
|
265
|
+
async getById(id) {
|
|
266
|
+
if (!this.isConnected) {
|
|
267
|
+
await this.connect();
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
return await this._retryOperation(async () => {
|
|
271
|
+
if (!this.table) {
|
|
272
|
+
throw new StorageError('Table not initialized');
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// Use a simple filter query instead of search
|
|
276
|
+
const results = await this.table.query()
|
|
277
|
+
.where(`id == '${id}'`)
|
|
278
|
+
// @ts-ignore
|
|
279
|
+
.execute();
|
|
280
|
+
|
|
281
|
+
// Convert AsyncGenerator of RecordBatches to array
|
|
282
|
+
const resultsArray = [];
|
|
283
|
+
for await (const batch of results) {
|
|
284
|
+
const rows = batch.toArray();
|
|
285
|
+
resultsArray.push(...rows);
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
if (resultsArray.length === 0) {
|
|
289
|
+
return null;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
const record = resultsArray[0];
|
|
293
|
+
return {
|
|
294
|
+
id: record.id,
|
|
295
|
+
vector: record.vector,
|
|
296
|
+
content: record.content,
|
|
297
|
+
metadata: record.metadata ? JSON.parse(record.metadata) : null,
|
|
298
|
+
created_at: record.created_at,
|
|
299
|
+
updated_at: record.updated_at
|
|
300
|
+
};
|
|
301
|
+
});
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Get all records from the database
|
|
306
|
+
* @param {Object} options - Options
|
|
307
|
+
* @param {number} [options.limit] - Optional limit
|
|
308
|
+
* @returns {Promise<Array<Object>>} Array of all records
|
|
309
|
+
*/
|
|
310
|
+
async getAll(options = {}) {
|
|
311
|
+
if (!this.isConnected) {
|
|
312
|
+
await this.connect();
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
return await this._retryOperation(async () => {
|
|
316
|
+
if (!this.table) {
|
|
317
|
+
throw new StorageError('Table not initialized');
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
let query = this.table.query();
|
|
321
|
+
|
|
322
|
+
if (options.limit) {
|
|
323
|
+
query = query.limit(options.limit);
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
// @ts-ignore
|
|
327
|
+
const results = await query.execute();
|
|
328
|
+
const resultsArray = [];
|
|
329
|
+
|
|
330
|
+
for await (const batch of results) {
|
|
331
|
+
const rows = batch.toArray();
|
|
332
|
+
for (const row of rows) {
|
|
333
|
+
resultsArray.push({
|
|
334
|
+
id: row.id,
|
|
335
|
+
content: row.content,
|
|
336
|
+
metadata: row.metadata ? JSON.parse(row.metadata) : null,
|
|
337
|
+
vector: row.vector,
|
|
338
|
+
created_at: row.created_at,
|
|
339
|
+
updated_at: row.updated_at
|
|
340
|
+
});
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
return resultsArray;
|
|
345
|
+
});
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Delete a record by ID
|
|
350
|
+
* @param {string} id - Record ID to delete
|
|
351
|
+
* @returns {Promise<Object>} Result with success status
|
|
352
|
+
* @throws {StorageError} If delete fails
|
|
353
|
+
*/
|
|
354
|
+
async delete(id) {
|
|
355
|
+
if (!this.isConnected) {
|
|
356
|
+
await this.connect();
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
return await this._retryOperation(async () => {
|
|
360
|
+
if (!this.table) {
|
|
361
|
+
throw new StorageError('Table not initialized');
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
await this.table.delete(`id == '${id}'`);
|
|
365
|
+
|
|
366
|
+
return {
|
|
367
|
+
id,
|
|
368
|
+
success: true
|
|
369
|
+
};
|
|
370
|
+
});
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
/**
|
|
374
|
+
* Update an existing record
|
|
375
|
+
* @param {string} id - Record ID to update
|
|
376
|
+
* @param {Object} data - Updated data fields
|
|
377
|
+
* @returns {Promise<Object>} Result with success status
|
|
378
|
+
* @throws {StorageError} If update fails
|
|
379
|
+
*/
|
|
380
|
+
async update(id, data) {
|
|
381
|
+
if (!this.isConnected) {
|
|
382
|
+
await this.connect();
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
return await this._retryOperation(async () => {
|
|
386
|
+
const updateData = {
|
|
387
|
+
...data,
|
|
388
|
+
updated_at: new Date()
|
|
389
|
+
};
|
|
390
|
+
|
|
391
|
+
if (!this.table) {
|
|
392
|
+
throw new StorageError('Table not initialized');
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// Update API expects filter and values separately
|
|
396
|
+
await this.table.update({
|
|
397
|
+
where: `id == '${id}'`,
|
|
398
|
+
values: updateData
|
|
399
|
+
});
|
|
400
|
+
|
|
401
|
+
return {
|
|
402
|
+
id,
|
|
403
|
+
success: true
|
|
404
|
+
};
|
|
405
|
+
});
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
/**
|
|
409
|
+
* Get database statistics
|
|
410
|
+
* @returns {Promise<Object>} Statistics including count, size, etc.
|
|
411
|
+
* @throws {QueryError} If stats query fails
|
|
412
|
+
*/
|
|
413
|
+
async getStats() {
|
|
414
|
+
if (!this.isConnected) {
|
|
415
|
+
await this.connect();
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
return await this._retryOperation(async () => {
|
|
419
|
+
if (!this.table) {
|
|
420
|
+
throw new StorageError('Table not initialized');
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
// Try to get count using table.count() method if available
|
|
424
|
+
let count = 0;
|
|
425
|
+
try {
|
|
426
|
+
// LanceDB tables may have a count() method
|
|
427
|
+
// @ts-ignore
|
|
428
|
+
if (typeof this.table.count === 'function') {
|
|
429
|
+
// @ts-ignore
|
|
430
|
+
count = await this.table.count();
|
|
431
|
+
} else {
|
|
432
|
+
// Fallback: use a limited query to avoid loading all records
|
|
433
|
+
// @ts-ignore
|
|
434
|
+
const results = await this.table.query().limit(0).execute();
|
|
435
|
+
// Try to extract count from metadata if available
|
|
436
|
+
for await (const batch of results) {
|
|
437
|
+
// Some LanceDB versions provide count in metadata
|
|
438
|
+
if (batch.numRows !== undefined) {
|
|
439
|
+
count = batch.numRows;
|
|
440
|
+
break;
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
// If count is still 0, we need to actually count
|
|
444
|
+
if (count === 0) {
|
|
445
|
+
// @ts-ignore
|
|
446
|
+
const countResults = await this.table.query().execute();
|
|
447
|
+
let tempCount = 0;
|
|
448
|
+
for await (const batch of countResults) {
|
|
449
|
+
tempCount += batch.numRows;
|
|
450
|
+
}
|
|
451
|
+
count = tempCount;
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
} catch (countError) {
|
|
455
|
+
// If all counting methods fail, mark as unknown (-1)
|
|
456
|
+
count = -1;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
const stats = {
|
|
460
|
+
tableName: this.tableName,
|
|
461
|
+
uri: this.uri,
|
|
462
|
+
count: count,
|
|
463
|
+
isConnected: this.isConnected
|
|
464
|
+
};
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
return stats;
|
|
468
|
+
});
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
/**
|
|
472
|
+
* Validate a record object
|
|
473
|
+
* @private
|
|
474
|
+
* @param {Object} record - Record to validate
|
|
475
|
+
* @throws {StorageError} If validation fails
|
|
476
|
+
*/
|
|
477
|
+
_validateRecord(record) {
|
|
478
|
+
if (!record || typeof record !== 'object') {
|
|
479
|
+
throw new StorageError('Record must be an object');
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
if (!record.id) {
|
|
483
|
+
throw new StorageError('Record must have an id field');
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
if (!record.content) {
|
|
487
|
+
throw new StorageError('Record must have a content field');
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
if (!record.vector) {
|
|
491
|
+
throw new StorageError('Record must have a vector field');
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
this._validateVector(record.vector);
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
/**
|
|
498
|
+
* Validate a vector array
|
|
499
|
+
* @private
|
|
500
|
+
* @param {Array<number>} vector - Vector to validate
|
|
501
|
+
* @throws {QueryError} If validation fails
|
|
502
|
+
*/
|
|
503
|
+
_validateVector(vector) {
|
|
504
|
+
if (!Array.isArray(vector)) {
|
|
505
|
+
throw new QueryError('Vector must be an array');
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
// Expected dimension for all-MiniLM-L6-v2 model
|
|
509
|
+
const expectedDim = 384;
|
|
510
|
+
|
|
511
|
+
if (vector.length !== expectedDim) {
|
|
512
|
+
throw new QueryError(
|
|
513
|
+
`Vector must have ${expectedDim} dimensions, got ${vector.length}`
|
|
514
|
+
);
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
// Validate all elements are numbers
|
|
518
|
+
for (let i = 0; i < vector.length; i++) {
|
|
519
|
+
if (typeof vector[i] !== 'number' || isNaN(vector[i])) {
|
|
520
|
+
throw new QueryError(`Vector element ${i} is not a valid number`);
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
/**
|
|
526
|
+
* Sleep for a specified duration
|
|
527
|
+
* @private
|
|
528
|
+
* @param {number} ms - Milliseconds to sleep
|
|
529
|
+
* @returns {Promise<void>}
|
|
530
|
+
*/
|
|
531
|
+
_sleep(ms) {
|
|
532
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
/**
|
|
536
|
+
* Check if an error is retryable (transient network/connection issues)
|
|
537
|
+
* @private
|
|
538
|
+
* @param {Error} error - Error to check
|
|
539
|
+
* @returns {boolean} True if error is retryable
|
|
540
|
+
*/
|
|
541
|
+
_isRetryableError(error) {
|
|
542
|
+
if (!error || !error.message) return false;
|
|
543
|
+
|
|
544
|
+
const message = error.message.toLowerCase();
|
|
545
|
+
|
|
546
|
+
// Network-related errors
|
|
547
|
+
const retryablePatterns = [
|
|
548
|
+
'econnreset', // Connection reset by peer
|
|
549
|
+
'etimedout', // Operation timed out
|
|
550
|
+
'enotfound', // DNS resolution failed
|
|
551
|
+
'econnrefused', // Connection refused
|
|
552
|
+
'enetunreach', // Network unreachable
|
|
553
|
+
'ehostunreach', // Host unreachable
|
|
554
|
+
'socket hang up', // Socket closed unexpectedly
|
|
555
|
+
'network error', // Generic network error
|
|
556
|
+
'failed to fetch', // Fetch/network failure
|
|
557
|
+
'timeout', // Timeout occurred
|
|
558
|
+
];
|
|
559
|
+
|
|
560
|
+
// Check for network patterns
|
|
561
|
+
const hasNetworkPattern = retryablePatterns.some(pattern => message.includes(pattern));
|
|
562
|
+
|
|
563
|
+
// Check for 5xx HTTP errors (server-side errors that may be transient)
|
|
564
|
+
const hasServerError = /5\d{2}/.test(message);
|
|
565
|
+
|
|
566
|
+
// Check for specific LanceDB/lancedb errors that may be transient
|
|
567
|
+
const lancedbRetryable = [
|
|
568
|
+
'connection',
|
|
569
|
+
'database closed',
|
|
570
|
+
'table not found',
|
|
571
|
+
'lock',
|
|
572
|
+
'busy',
|
|
573
|
+
'temporary'
|
|
574
|
+
].some(pattern => message.includes(pattern));
|
|
575
|
+
|
|
576
|
+
return hasNetworkPattern || hasServerError || lancedbRetryable;
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
/**
|
|
580
|
+
* Retry an operation with exponential backoff
|
|
581
|
+
* @private
|
|
582
|
+
* @param {Function} operation - Async function to retry
|
|
583
|
+
* @param {number} [maxRetries] - Maximum retry attempts (default: 3)
|
|
584
|
+
* @param {number} [baseDelay] - Base delay in ms (default: 1000)
|
|
585
|
+
* @returns {Promise<*>} Result of the operation
|
|
586
|
+
* @throws {Error} If all retries fail, throws the last error
|
|
587
|
+
*/
|
|
588
|
+
async _retryOperation(operation, maxRetries, baseDelay) {
|
|
589
|
+
const max = maxRetries ?? this.maxRetries;
|
|
590
|
+
const delay = baseDelay ?? this.retryDelay;
|
|
591
|
+
let lastError = null;
|
|
592
|
+
|
|
593
|
+
for (let attempt = 1; attempt <= max; attempt++) {
|
|
594
|
+
try {
|
|
595
|
+
return await operation();
|
|
596
|
+
} catch (error) {
|
|
597
|
+
lastError = error;
|
|
598
|
+
|
|
599
|
+
// Check if error is retryable
|
|
600
|
+
// @ts-ignore - check error type
|
|
601
|
+
if (!this._isRetryableError(error)) {
|
|
602
|
+
// Non-retryable error, throw immediately
|
|
603
|
+
throw error;
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
// Check if we've exhausted retries
|
|
607
|
+
if (attempt === max) {
|
|
608
|
+
throw error;
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
// Calculate exponential backoff delay (1s, 2s, 4s, etc.)
|
|
612
|
+
const backoffMs = delay * Math.pow(2, attempt - 1);
|
|
613
|
+
|
|
614
|
+
// Add jitter (0-25% of delay) to prevent thundering herd
|
|
615
|
+
const jitterMs = backoffMs * Math.random() * 0.25;
|
|
616
|
+
|
|
617
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
618
|
+
console.warn(
|
|
619
|
+
`[LanceDBClient] Retryable error on attempt ${attempt}/${max}: ${message}. ` +
|
|
620
|
+
`Retrying in ${Math.round((backoffMs + jitterMs))}ms...`
|
|
621
|
+
);
|
|
622
|
+
|
|
623
|
+
await this._sleep(backoffMs + jitterMs);
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
// Should not reach here, but just in case
|
|
627
|
+
throw lastError;
|
|
628
|
+
}}
|
|
629
|
+
|
|
630
|
+
export { LanceDBClient };
|
|
631
|
+
export default LanceDBClient;
|