@yamo/memory-mesh 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +80 -0
  3. package/bin/memory_mesh.js +69 -0
  4. package/bin/scrubber.js +81 -0
  5. package/index.d.ts +111 -0
  6. package/lib/adapters/index.js +3 -0
  7. package/lib/embeddings/factory.js +150 -0
  8. package/lib/embeddings/index.js +2 -0
  9. package/lib/embeddings/service.js +586 -0
  10. package/lib/index.js +18 -0
  11. package/lib/lancedb/client.js +631 -0
  12. package/lib/lancedb/config.js +215 -0
  13. package/lib/lancedb/errors.js +144 -0
  14. package/lib/lancedb/index.js +4 -0
  15. package/lib/lancedb/schema.js +197 -0
  16. package/lib/memory/index.js +3 -0
  17. package/lib/memory/memory-context-manager.js +388 -0
  18. package/lib/memory/memory-mesh.js +910 -0
  19. package/lib/memory/memory-translator.js +130 -0
  20. package/lib/memory/migrate-memory.js +227 -0
  21. package/lib/memory/migrate-to-v2.js +120 -0
  22. package/lib/memory/scorer.js +85 -0
  23. package/lib/memory/vector-memory.js +364 -0
  24. package/lib/privacy/audit-logger.js +176 -0
  25. package/lib/privacy/dlp-redactor.js +72 -0
  26. package/lib/privacy/index.js +10 -0
  27. package/lib/reporting/skill-report-generator.js +283 -0
  28. package/lib/scrubber/.gitkeep +1 -0
  29. package/lib/scrubber/config/defaults.js +62 -0
  30. package/lib/scrubber/errors/scrubber-error.js +43 -0
  31. package/lib/scrubber/index.js +25 -0
  32. package/lib/scrubber/scrubber.js +130 -0
  33. package/lib/scrubber/stages/chunker.js +103 -0
  34. package/lib/scrubber/stages/metadata-annotator.js +74 -0
  35. package/lib/scrubber/stages/normalizer.js +59 -0
  36. package/lib/scrubber/stages/semantic-filter.js +61 -0
  37. package/lib/scrubber/stages/structural-cleaner.js +82 -0
  38. package/lib/scrubber/stages/validator.js +66 -0
  39. package/lib/scrubber/telemetry.js +66 -0
  40. package/lib/scrubber/utils/hash.js +39 -0
  41. package/lib/scrubber/utils/html-parser.js +45 -0
  42. package/lib/scrubber/utils/pattern-matcher.js +63 -0
  43. package/lib/scrubber/utils/token-counter.js +31 -0
  44. package/lib/search/filter.js +275 -0
  45. package/lib/search/hybrid.js +137 -0
  46. package/lib/search/index.js +3 -0
  47. package/lib/search/pattern-miner.js +160 -0
  48. package/lib/utils/error-sanitizer.js +84 -0
  49. package/lib/utils/handoff-validator.js +85 -0
  50. package/lib/utils/index.js +4 -0
  51. package/lib/utils/spinner.js +190 -0
  52. package/lib/utils/streaming-client.js +128 -0
  53. package/package.json +39 -0
  54. package/skills/SKILL.md +462 -0
  55. package/skills/skill-scrubber.yamo +41 -0
@@ -0,0 +1,631 @@
1
+ /**
2
+ * LanceDB Client Wrapper
3
+ *
4
+ * A comprehensive wrapper around LanceDB JavaScript SDK providing:
5
+ * - Connection management with pooling and retries
6
+ * - CRUD operations for memory entries
7
+ * - Vector similarity search with filtering
8
+ * - Database statistics and monitoring
9
+ *
10
+ * @class LanceDBClient
11
+ */
12
+
13
+ import lancedb from "@lancedb/lancedb";
14
+ import fs from "fs";
15
+ import path from "path";
16
+ import { createMemoryTableWithDimension, DEFAULT_VECTOR_DIMENSION } from "./schema.js";
17
+ import { StorageError, QueryError, ConfigurationError } from "./errors.js";
18
+
19
+ /**
20
+ * LanceDB Client wrapper class
21
+ */
22
+ class LanceDBClient {
23
+ /**
24
+ * Create a new LanceDBClient instance
25
+ * @param {Object} [config={}] - Configuration object
26
+ * @param {string} [config.uri] - Database URI (default: from env or './data/lancedb')
27
+ * @param {string} [config.tableName] - Table name (default: from env or 'memory_entries')
28
+ * @param {number} [config.maxRetries] - Maximum connection retries (default: 3)
29
+ * @param {number} [config.retryDelay] - Delay between retries in ms (default: 1000)
30
+ * @param {number} [config.vectorDimension] - Vector dimension for embeddings (default: 384)
31
+ */
32
+ constructor(config = {}) {
33
+ this.uri = (config && config.uri) || process.env.LANCEDB_URI || './data/lancedb';
34
+ this.tableName = (config && config.tableName) || process.env.LANCEDB_MEMORY_TABLE || 'memory_entries';
35
+ this.maxRetries = (config && config.maxRetries) || 3;
36
+ this.retryDelay = (config && config.retryDelay) || 1000;
37
+ this.vectorDimension = (config && config.vectorDimension) || DEFAULT_VECTOR_DIMENSION;
38
+
39
+ // Connection state
40
+ this.db = null;
41
+ this.table = null;
42
+ this.isConnected = false;
43
+ }
44
+
45
+ /**
46
+ * Connect to LanceDB and initialize table
47
+ * Creates the database directory and table if they don't exist
48
+ * @returns {Promise<void>}
49
+ * @throws {StorageError} If connection fails after retries
50
+ */
51
+ async connect() {
52
+ if (this.isConnected) {
53
+ return; // Already connected
54
+ }
55
+
56
+ let lastError = null;
57
+
58
+ for (let attempt = 1; attempt <= this.maxRetries; attempt++) {
59
+ try {
60
+ // Ensure database directory exists
61
+ const dbPath = path.resolve(this.uri);
62
+ const dbDir = path.dirname(dbPath);
63
+
64
+ if (!fs.existsSync(dbDir)) {
65
+ fs.mkdirSync(dbDir, { recursive: true });
66
+ }
67
+
68
+ // Connect to database
69
+ this.db = await lancedb.connect(this.uri);
70
+
71
+ // Initialize table with dynamic dimension (creates if doesn't exist, opens if it does)
72
+ this.table = await createMemoryTableWithDimension(this.db, this.tableName, this.vectorDimension);
73
+
74
+ this.isConnected = true;
75
+ return;
76
+
77
+ } catch (error) {
78
+ lastError = error;
79
+
80
+ if (attempt < this.maxRetries) {
81
+ // Wait before retrying
82
+ await this._sleep(this.retryDelay * attempt);
83
+ }
84
+ }
85
+ }
86
+
87
+ // All retries failed
88
+ const errorMessage = lastError instanceof Error ? lastError.message : String(lastError);
89
+ throw new StorageError(
90
+ `Failed to connect to LanceDB after ${this.maxRetries} attempts: ${errorMessage}`,
91
+ { uri: this.uri, tableName: this.tableName, originalError: lastError }
92
+ );
93
+ }
94
+
95
+ /**
96
+ * Disconnect from LanceDB
97
+ * @returns {Promise<void>}
98
+ */
99
+ async disconnect() {
100
+ this.db = null;
101
+ this.table = null;
102
+ this.isConnected = false;
103
+ }
104
+
105
+ /**
106
+ * Add a single memory entry
107
+ * @param {Object} data - Entry data
108
+ * @param {string} data.id - Unique identifier
109
+ * @param {Array<number>} data.vector - Embedding vector (384 dimensions)
110
+ * @param {string} data.content - Text content
111
+ * @param {string} [data.metadata] - JSON string metadata
112
+ * @returns {Promise<Object>} Result with id and success status
113
+ * @throws {StorageError} If add operation fails
114
+ */
115
+ async add(data) {
116
+ if (!this.isConnected) {
117
+ await this.connect();
118
+ }
119
+
120
+ this._validateRecord(data);
121
+
122
+ return await this._retryOperation(async () => {
123
+ const record = {
124
+ ...data,
125
+ created_at: new Date(),
126
+ updated_at: new Date()
127
+ };
128
+
129
+ if (!this.table) {
130
+ throw new StorageError('Table not initialized');
131
+ }
132
+
133
+ await this.table.add([record]);
134
+
135
+ return {
136
+ id: data.id,
137
+ success: true
138
+ };
139
+ });
140
+ }
141
+
142
+ /**
143
+ * Add multiple memory entries in batch
144
+ * @param {Array<Object>} records - Array of entry data objects
145
+ * @returns {Promise<Object>} Result with count of added records
146
+ * @throws {StorageError} If batch add fails
147
+ */
148
+ async addBatch(records) {
149
+ if (!this.isConnected) {
150
+ await this.connect();
151
+ }
152
+
153
+ if (!Array.isArray(records) || records.length === 0) {
154
+ throw new StorageError('Records must be a non-empty array');
155
+ }
156
+
157
+ // Validate all records
158
+ records.forEach(record => this._validateRecord(record));
159
+
160
+ return await this._retryOperation(async () => {
161
+ const now = new Date();
162
+ const recordsWithTimestamps = records.map(record => ({
163
+ ...record,
164
+ created_at: now,
165
+ updated_at: now
166
+ }));
167
+
168
+ if (!this.table) {
169
+ throw new StorageError('Table not initialized');
170
+ }
171
+
172
+ await this.table.add(recordsWithTimestamps);
173
+
174
+ return {
175
+ count: records.length,
176
+ success: true
177
+ };
178
+ });
179
+ }
180
+
181
+ /**
182
+ * Search for similar vectors
183
+ * @param {Array<number>} vector - Query vector (384 dimensions)
184
+ * @param {Object} options - Search options
185
+ * @param {number} [options.limit=10] - Maximum number of results
186
+ * @param {string} [options.metric='cosine'] - Distance metric ('cosine', 'l2', 'dot')
187
+ * @param {number} [options.nprobes=20] - Number of IVF partitions to search
188
+ * @param {Object} [options.filter] - Filter expression for metadata (e.g., "content == 'value'")
189
+ * Note: Filters work on top-level schema fields only.
190
+ * The metadata field is stored as JSON string and cannot
191
+ * be filtered directly. Use content or other top-level fields.
192
+ * @returns {Promise<Array<Object>>} Array of search results with scores
193
+ * @throws {QueryError} If search fails
194
+ */
195
+ async search(vector, options = {}) {
196
+ if (!this.isConnected) {
197
+ await this.connect();
198
+ }
199
+
200
+ this._validateVector(vector);
201
+
202
+ const {
203
+ limit = 10,
204
+ metric = 'cosine',
205
+ nprobes = 20,
206
+ filter = null
207
+ } = options;
208
+
209
+ return await this._retryOperation(async () => {
210
+ if (!this.table) {
211
+ throw new StorageError('Table not initialized');
212
+ }
213
+
214
+ // Build the search query with all applicable options
215
+ let query = this.table.search(vector);
216
+
217
+ // Apply nprobes for IVF index (if supported)
218
+ // Note: nprobes is typically set at index creation time, but we attempt to apply it here
219
+ if (nprobes && typeof nprobes === 'number') {
220
+ try {
221
+ // @ts-ignore - nprobes might not exist on all query types or versions
222
+ query = query.nprobes(nprobes);
223
+ } catch (e) {
224
+ // nprobes may not be supported in all LanceDB versions or configurations
225
+ // Silently continue if not applicable
226
+ }
227
+ }
228
+
229
+ // Apply filter if provided
230
+ // LanceDB supports filtering with .where() clause
231
+ if (filter) {
232
+ query = query.where(filter);
233
+ }
234
+
235
+ // Execute search with limit
236
+ // @ts-ignore - execute() is protected in types but public in JS implementation or types are wrong
237
+ const resultsGenerator = await query.limit(limit).execute();
238
+ const resultsArray = [];
239
+
240
+ for await (const batch of resultsGenerator) {
241
+ // Convert RecordBatch to array of StructRow objects
242
+ const rows = batch.toArray();
243
+ for (const row of rows) {
244
+ resultsArray.push({
245
+ id: row.id,
246
+ content: row.content,
247
+ metadata: row.metadata ? JSON.parse(row.metadata) : null,
248
+ // @ts-ignore - _distance is internal property
249
+ score: row._distance,
250
+ created_at: row.created_at
251
+ });
252
+ }
253
+ }
254
+
255
+ return resultsArray;
256
+ });
257
+ }
258
+
259
+ /**
260
+ * Get a record by ID
261
+ * @param {string} id - Record ID
262
+ * @returns {Promise<Object|null>} Record object or null if not found
263
+ * @throws {QueryError} If query fails
264
+ */
265
+ async getById(id) {
266
+ if (!this.isConnected) {
267
+ await this.connect();
268
+ }
269
+
270
+ return await this._retryOperation(async () => {
271
+ if (!this.table) {
272
+ throw new StorageError('Table not initialized');
273
+ }
274
+
275
+ // Use a simple filter query instead of search
276
+ const results = await this.table.query()
277
+ .where(`id == '${id}'`)
278
+ // @ts-ignore
279
+ .execute();
280
+
281
+ // Convert AsyncGenerator of RecordBatches to array
282
+ const resultsArray = [];
283
+ for await (const batch of results) {
284
+ const rows = batch.toArray();
285
+ resultsArray.push(...rows);
286
+ }
287
+
288
+ if (resultsArray.length === 0) {
289
+ return null;
290
+ }
291
+
292
+ const record = resultsArray[0];
293
+ return {
294
+ id: record.id,
295
+ vector: record.vector,
296
+ content: record.content,
297
+ metadata: record.metadata ? JSON.parse(record.metadata) : null,
298
+ created_at: record.created_at,
299
+ updated_at: record.updated_at
300
+ };
301
+ });
302
+ }
303
+
304
+ /**
305
+ * Get all records from the database
306
+ * @param {Object} options - Options
307
+ * @param {number} [options.limit] - Optional limit
308
+ * @returns {Promise<Array<Object>>} Array of all records
309
+ */
310
+ async getAll(options = {}) {
311
+ if (!this.isConnected) {
312
+ await this.connect();
313
+ }
314
+
315
+ return await this._retryOperation(async () => {
316
+ if (!this.table) {
317
+ throw new StorageError('Table not initialized');
318
+ }
319
+
320
+ let query = this.table.query();
321
+
322
+ if (options.limit) {
323
+ query = query.limit(options.limit);
324
+ }
325
+
326
+ // @ts-ignore
327
+ const results = await query.execute();
328
+ const resultsArray = [];
329
+
330
+ for await (const batch of results) {
331
+ const rows = batch.toArray();
332
+ for (const row of rows) {
333
+ resultsArray.push({
334
+ id: row.id,
335
+ content: row.content,
336
+ metadata: row.metadata ? JSON.parse(row.metadata) : null,
337
+ vector: row.vector,
338
+ created_at: row.created_at,
339
+ updated_at: row.updated_at
340
+ });
341
+ }
342
+ }
343
+
344
+ return resultsArray;
345
+ });
346
+ }
347
+
348
+ /**
349
+ * Delete a record by ID
350
+ * @param {string} id - Record ID to delete
351
+ * @returns {Promise<Object>} Result with success status
352
+ * @throws {StorageError} If delete fails
353
+ */
354
+ async delete(id) {
355
+ if (!this.isConnected) {
356
+ await this.connect();
357
+ }
358
+
359
+ return await this._retryOperation(async () => {
360
+ if (!this.table) {
361
+ throw new StorageError('Table not initialized');
362
+ }
363
+
364
+ await this.table.delete(`id == '${id}'`);
365
+
366
+ return {
367
+ id,
368
+ success: true
369
+ };
370
+ });
371
+ }
372
+
373
+ /**
374
+ * Update an existing record
375
+ * @param {string} id - Record ID to update
376
+ * @param {Object} data - Updated data fields
377
+ * @returns {Promise<Object>} Result with success status
378
+ * @throws {StorageError} If update fails
379
+ */
380
+ async update(id, data) {
381
+ if (!this.isConnected) {
382
+ await this.connect();
383
+ }
384
+
385
+ return await this._retryOperation(async () => {
386
+ const updateData = {
387
+ ...data,
388
+ updated_at: new Date()
389
+ };
390
+
391
+ if (!this.table) {
392
+ throw new StorageError('Table not initialized');
393
+ }
394
+
395
+ // Update API expects filter and values separately
396
+ await this.table.update({
397
+ where: `id == '${id}'`,
398
+ values: updateData
399
+ });
400
+
401
+ return {
402
+ id,
403
+ success: true
404
+ };
405
+ });
406
+ }
407
+
408
+ /**
409
+ * Get database statistics
410
+ * @returns {Promise<Object>} Statistics including count, size, etc.
411
+ * @throws {QueryError} If stats query fails
412
+ */
413
+ async getStats() {
414
+ if (!this.isConnected) {
415
+ await this.connect();
416
+ }
417
+
418
+ return await this._retryOperation(async () => {
419
+ if (!this.table) {
420
+ throw new StorageError('Table not initialized');
421
+ }
422
+
423
+ // Try to get count using table.count() method if available
424
+ let count = 0;
425
+ try {
426
+ // LanceDB tables may have a count() method
427
+ // @ts-ignore
428
+ if (typeof this.table.count === 'function') {
429
+ // @ts-ignore
430
+ count = await this.table.count();
431
+ } else {
432
+ // Fallback: use a limited query to avoid loading all records
433
+ // @ts-ignore
434
+ const results = await this.table.query().limit(0).execute();
435
+ // Try to extract count from metadata if available
436
+ for await (const batch of results) {
437
+ // Some LanceDB versions provide count in metadata
438
+ if (batch.numRows !== undefined) {
439
+ count = batch.numRows;
440
+ break;
441
+ }
442
+ }
443
+ // If count is still 0, we need to actually count
444
+ if (count === 0) {
445
+ // @ts-ignore
446
+ const countResults = await this.table.query().execute();
447
+ let tempCount = 0;
448
+ for await (const batch of countResults) {
449
+ tempCount += batch.numRows;
450
+ }
451
+ count = tempCount;
452
+ }
453
+ }
454
+ } catch (countError) {
455
+ // If all counting methods fail, mark as unknown (-1)
456
+ count = -1;
457
+ }
458
+
459
+ const stats = {
460
+ tableName: this.tableName,
461
+ uri: this.uri,
462
+ count: count,
463
+ isConnected: this.isConnected
464
+ };
465
+
466
+
467
+ return stats;
468
+ });
469
+ }
470
+
471
+ /**
472
+ * Validate a record object
473
+ * @private
474
+ * @param {Object} record - Record to validate
475
+ * @throws {StorageError} If validation fails
476
+ */
477
+ _validateRecord(record) {
478
+ if (!record || typeof record !== 'object') {
479
+ throw new StorageError('Record must be an object');
480
+ }
481
+
482
+ if (!record.id) {
483
+ throw new StorageError('Record must have an id field');
484
+ }
485
+
486
+ if (!record.content) {
487
+ throw new StorageError('Record must have a content field');
488
+ }
489
+
490
+ if (!record.vector) {
491
+ throw new StorageError('Record must have a vector field');
492
+ }
493
+
494
+ this._validateVector(record.vector);
495
+ }
496
+
497
+ /**
498
+ * Validate a vector array
499
+ * @private
500
+ * @param {Array<number>} vector - Vector to validate
501
+ * @throws {QueryError} If validation fails
502
+ */
503
+ _validateVector(vector) {
504
+ if (!Array.isArray(vector)) {
505
+ throw new QueryError('Vector must be an array');
506
+ }
507
+
508
+ // Expected dimension for all-MiniLM-L6-v2 model
509
+ const expectedDim = 384;
510
+
511
+ if (vector.length !== expectedDim) {
512
+ throw new QueryError(
513
+ `Vector must have ${expectedDim} dimensions, got ${vector.length}`
514
+ );
515
+ }
516
+
517
+ // Validate all elements are numbers
518
+ for (let i = 0; i < vector.length; i++) {
519
+ if (typeof vector[i] !== 'number' || isNaN(vector[i])) {
520
+ throw new QueryError(`Vector element ${i} is not a valid number`);
521
+ }
522
+ }
523
+ }
524
+
525
+ /**
526
+ * Sleep for a specified duration
527
+ * @private
528
+ * @param {number} ms - Milliseconds to sleep
529
+ * @returns {Promise<void>}
530
+ */
531
+ _sleep(ms) {
532
+ return new Promise(resolve => setTimeout(resolve, ms));
533
+ }
534
+
535
+ /**
536
+ * Check if an error is retryable (transient network/connection issues)
537
+ * @private
538
+ * @param {Error} error - Error to check
539
+ * @returns {boolean} True if error is retryable
540
+ */
541
+ _isRetryableError(error) {
542
+ if (!error || !error.message) return false;
543
+
544
+ const message = error.message.toLowerCase();
545
+
546
+ // Network-related errors
547
+ const retryablePatterns = [
548
+ 'econnreset', // Connection reset by peer
549
+ 'etimedout', // Operation timed out
550
+ 'enotfound', // DNS resolution failed
551
+ 'econnrefused', // Connection refused
552
+ 'enetunreach', // Network unreachable
553
+ 'ehostunreach', // Host unreachable
554
+ 'socket hang up', // Socket closed unexpectedly
555
+ 'network error', // Generic network error
556
+ 'failed to fetch', // Fetch/network failure
557
+ 'timeout', // Timeout occurred
558
+ ];
559
+
560
+ // Check for network patterns
561
+ const hasNetworkPattern = retryablePatterns.some(pattern => message.includes(pattern));
562
+
563
+ // Check for 5xx HTTP errors (server-side errors that may be transient)
564
+ const hasServerError = /5\d{2}/.test(message);
565
+
566
+ // Check for specific LanceDB/lancedb errors that may be transient
567
+ const lancedbRetryable = [
568
+ 'connection',
569
+ 'database closed',
570
+ 'table not found',
571
+ 'lock',
572
+ 'busy',
573
+ 'temporary'
574
+ ].some(pattern => message.includes(pattern));
575
+
576
+ return hasNetworkPattern || hasServerError || lancedbRetryable;
577
+ }
578
+
579
+ /**
580
+ * Retry an operation with exponential backoff
581
+ * @private
582
+ * @param {Function} operation - Async function to retry
583
+ * @param {number} [maxRetries] - Maximum retry attempts (default: 3)
584
+ * @param {number} [baseDelay] - Base delay in ms (default: 1000)
585
+ * @returns {Promise<*>} Result of the operation
586
+ * @throws {Error} If all retries fail, throws the last error
587
+ */
588
+ async _retryOperation(operation, maxRetries, baseDelay) {
589
+ const max = maxRetries ?? this.maxRetries;
590
+ const delay = baseDelay ?? this.retryDelay;
591
+ let lastError = null;
592
+
593
+ for (let attempt = 1; attempt <= max; attempt++) {
594
+ try {
595
+ return await operation();
596
+ } catch (error) {
597
+ lastError = error;
598
+
599
+ // Check if error is retryable
600
+ // @ts-ignore - check error type
601
+ if (!this._isRetryableError(error)) {
602
+ // Non-retryable error, throw immediately
603
+ throw error;
604
+ }
605
+
606
+ // Check if we've exhausted retries
607
+ if (attempt === max) {
608
+ throw error;
609
+ }
610
+
611
+ // Calculate exponential backoff delay (1s, 2s, 4s, etc.)
612
+ const backoffMs = delay * Math.pow(2, attempt - 1);
613
+
614
+ // Add jitter (0-25% of delay) to prevent thundering herd
615
+ const jitterMs = backoffMs * Math.random() * 0.25;
616
+
617
+ const message = error instanceof Error ? error.message : String(error);
618
+ console.warn(
619
+ `[LanceDBClient] Retryable error on attempt ${attempt}/${max}: ${message}. ` +
620
+ `Retrying in ${Math.round((backoffMs + jitterMs))}ms...`
621
+ );
622
+
623
+ await this._sleep(backoffMs + jitterMs);
624
+ }
625
+ }
626
+ // Should not reach here, but just in case
627
+ throw lastError;
628
+ }}
629
+
630
+ export { LanceDBClient };
631
+ export default LanceDBClient;