@yamo/memory-mesh 3.0.0 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/README.md +9 -3
  2. package/bin/memory_mesh.js +95 -8
  3. package/lib/llm/client.d.ts +23 -48
  4. package/lib/llm/client.js +1 -0
  5. package/lib/llm/client.ts +298 -377
  6. package/lib/llm/index.js +1 -0
  7. package/lib/llm/index.ts +1 -2
  8. package/lib/memory/adapters/client.d.ts +22 -85
  9. package/lib/memory/adapters/client.js +1 -0
  10. package/lib/memory/adapters/client.ts +474 -633
  11. package/lib/memory/adapters/config.d.ts +82 -89
  12. package/lib/memory/adapters/config.js +1 -0
  13. package/lib/memory/adapters/config.ts +156 -225
  14. package/lib/memory/adapters/errors.d.ts +28 -20
  15. package/lib/memory/adapters/errors.js +1 -0
  16. package/lib/memory/adapters/errors.ts +83 -120
  17. package/lib/memory/context-manager.d.ts +15 -18
  18. package/lib/memory/context-manager.js +1 -0
  19. package/lib/memory/context-manager.ts +314 -401
  20. package/lib/memory/embeddings/factory.d.ts +18 -20
  21. package/lib/memory/embeddings/factory.js +1 -0
  22. package/lib/memory/embeddings/factory.ts +130 -173
  23. package/lib/memory/embeddings/index.js +1 -0
  24. package/lib/memory/embeddings/index.ts +1 -0
  25. package/lib/memory/embeddings/service.d.ts +36 -66
  26. package/lib/memory/embeddings/service.js +1 -0
  27. package/lib/memory/embeddings/service.ts +479 -616
  28. package/lib/memory/index.d.ts +2 -2
  29. package/lib/memory/index.js +1 -0
  30. package/lib/memory/index.ts +3 -13
  31. package/lib/memory/memory-mesh.d.ts +151 -93
  32. package/lib/memory/memory-mesh.js +1 -0
  33. package/lib/memory/memory-mesh.ts +1406 -1692
  34. package/lib/memory/memory-translator.d.ts +1 -6
  35. package/lib/memory/memory-translator.js +1 -0
  36. package/lib/memory/memory-translator.ts +96 -128
  37. package/lib/memory/schema.d.ts +29 -10
  38. package/lib/memory/schema.js +1 -0
  39. package/lib/memory/schema.ts +102 -185
  40. package/lib/memory/scorer.d.ts +3 -4
  41. package/lib/memory/scorer.js +1 -0
  42. package/lib/memory/scorer.ts +69 -86
  43. package/lib/memory/search/index.js +1 -0
  44. package/lib/memory/search/index.ts +1 -0
  45. package/lib/memory/search/keyword-search.d.ts +10 -26
  46. package/lib/memory/search/keyword-search.js +1 -0
  47. package/lib/memory/search/keyword-search.ts +123 -161
  48. package/lib/scrubber/config/defaults.d.ts +39 -46
  49. package/lib/scrubber/config/defaults.js +1 -0
  50. package/lib/scrubber/config/defaults.ts +50 -112
  51. package/lib/scrubber/errors/scrubber-error.d.ts +22 -0
  52. package/lib/scrubber/errors/scrubber-error.js +39 -0
  53. package/lib/scrubber/errors/scrubber-error.ts +44 -0
  54. package/lib/scrubber/index.d.ts +0 -1
  55. package/lib/scrubber/index.js +1 -0
  56. package/lib/scrubber/index.ts +1 -2
  57. package/lib/scrubber/scrubber.d.ts +14 -31
  58. package/lib/scrubber/scrubber.js +1 -0
  59. package/lib/scrubber/scrubber.ts +93 -152
  60. package/lib/scrubber/stages/chunker.d.ts +22 -10
  61. package/lib/scrubber/stages/chunker.js +86 -0
  62. package/lib/scrubber/stages/chunker.ts +104 -0
  63. package/lib/scrubber/stages/metadata-annotator.d.ts +14 -15
  64. package/lib/scrubber/stages/metadata-annotator.js +64 -0
  65. package/lib/scrubber/stages/metadata-annotator.ts +75 -0
  66. package/lib/scrubber/stages/normalizer.d.ts +13 -10
  67. package/lib/scrubber/stages/normalizer.js +51 -0
  68. package/lib/scrubber/stages/normalizer.ts +60 -0
  69. package/lib/scrubber/stages/semantic-filter.d.ts +13 -10
  70. package/lib/scrubber/stages/semantic-filter.js +51 -0
  71. package/lib/scrubber/stages/semantic-filter.ts +62 -0
  72. package/lib/scrubber/stages/structural-cleaner.d.ts +15 -10
  73. package/lib/scrubber/stages/structural-cleaner.js +73 -0
  74. package/lib/scrubber/stages/structural-cleaner.ts +83 -0
  75. package/lib/scrubber/stages/validator.d.ts +14 -15
  76. package/lib/scrubber/stages/validator.js +56 -0
  77. package/lib/scrubber/stages/validator.ts +67 -0
  78. package/lib/scrubber/telemetry.d.ts +20 -27
  79. package/lib/scrubber/telemetry.js +1 -0
  80. package/lib/scrubber/telemetry.ts +53 -90
  81. package/lib/scrubber/utils/hash.d.ts +14 -0
  82. package/lib/scrubber/utils/hash.js +37 -0
  83. package/lib/scrubber/utils/hash.ts +40 -0
  84. package/lib/scrubber/utils/html-parser.d.ts +14 -0
  85. package/lib/scrubber/utils/html-parser.js +38 -0
  86. package/lib/scrubber/utils/html-parser.ts +46 -0
  87. package/lib/scrubber/utils/pattern-matcher.d.ts +12 -0
  88. package/lib/scrubber/utils/pattern-matcher.js +54 -0
  89. package/lib/scrubber/utils/pattern-matcher.ts +64 -0
  90. package/lib/scrubber/utils/token-counter.d.ts +18 -0
  91. package/lib/scrubber/utils/token-counter.js +30 -0
  92. package/lib/scrubber/utils/token-counter.ts +32 -0
  93. package/lib/utils/logger.d.ts +1 -11
  94. package/lib/utils/logger.js +1 -0
  95. package/lib/utils/logger.ts +43 -63
  96. package/lib/utils/skill-metadata.d.ts +6 -14
  97. package/lib/utils/skill-metadata.js +1 -0
  98. package/lib/utils/skill-metadata.ts +89 -103
  99. package/lib/yamo/emitter.d.ts +8 -35
  100. package/lib/yamo/emitter.js +1 -0
  101. package/lib/yamo/emitter.ts +77 -155
  102. package/lib/yamo/index.d.ts +14 -0
  103. package/lib/yamo/index.js +14 -0
  104. package/lib/yamo/index.ts +16 -0
  105. package/lib/yamo/schema.d.ts +8 -10
  106. package/lib/yamo/schema.js +1 -0
  107. package/lib/yamo/schema.ts +82 -114
  108. package/package.json +5 -2
@@ -1,3 +1,4 @@
1
+ // @ts-nocheck
1
2
  /**
2
3
  * LanceDB Client Wrapper
3
4
  *
@@ -9,670 +10,510 @@
9
10
  *
10
11
  * @class LanceDBClient
11
12
  */
12
-
13
13
  import * as lancedb from "@lancedb/lancedb";
14
14
  import fs from "fs";
15
15
  import path from "path";
16
- import {
17
- createMemoryTableWithDimension,
18
- DEFAULT_VECTOR_DIMENSION,
19
- } from "../schema.js";
16
+ import { createMemoryTableWithDimension, DEFAULT_VECTOR_DIMENSION, } from "../schema.js";
20
17
  import { StorageError, QueryError } from "./errors.js";
21
18
  import { createLogger } from "../../utils/logger.js";
22
-
23
19
  const logger = createLogger("lancedb-client");
24
-
25
- /**
26
- * LanceDB driver interface for dependency injection/testing
27
- */
28
- export interface LanceDBDriver {
29
- connect(uri: string): Promise<lancedb.Connection>;
30
- }
31
-
32
- export interface ClientConfig {
33
- uri?: string;
34
- tableName?: string;
35
- maxRetries?: number;
36
- retryDelay?: number;
37
- vectorDimension?: number;
38
- driver?: LanceDBDriver; // For testing injection
39
- }
40
-
41
- export interface MemoryEntry {
42
- id: string;
43
- vector: number[];
44
- content: string;
45
- metadata?: string | Record<string, any> | null;
46
- created_at?: Date | string;
47
- updated_at?: Date | string;
48
- }
49
-
50
- export interface SearchResult extends MemoryEntry {
51
- score?: number;
52
- }
53
-
54
- export interface SearchOptions {
55
- limit?: number;
56
- metric?: string;
57
- nprobes?: number;
58
- filter?: string | null;
59
- }
60
-
61
- export interface Stats {
62
- tableName: string;
63
- uri: string;
64
- count: number;
65
- isConnected: boolean;
66
- }
67
-
68
20
  /**
69
21
  * LanceDB Client wrapper class
70
22
  */
71
23
  export class LanceDBClient {
72
- uri: string;
73
- tableName: string;
74
- maxRetries: number;
75
- retryDelay: number;
76
- vectorDimension: number;
77
- driver: LanceDBDriver;
78
- db: lancedb.Connection | null;
79
- table: lancedb.Table | null;
80
- isConnected: boolean;
81
- private tempDir?: string; // Track temp dirs for cleanup
82
-
83
- /**
84
- * Create a new LanceDBClient instance
85
- * @param {Object} [config={}] - Configuration object
86
- */
87
- constructor(config: ClientConfig = {}) {
88
- this.uri =
89
- (config && config.uri) || process.env.LANCEDB_URI || "./data/lancedb";
90
- this.tableName =
91
- (config && config.tableName) ||
92
- process.env.LANCEDB_MEMORY_TABLE ||
93
- "memory_entries";
94
- this.maxRetries = (config && config.maxRetries) || 3;
95
- this.retryDelay = (config && config.retryDelay) || 1000;
96
- this.vectorDimension =
97
- (config && config.vectorDimension) || DEFAULT_VECTOR_DIMENSION;
98
- this.driver = (config && config.driver) || lancedb;
99
-
100
- // Connection state
101
- this.db = null;
102
- this.table = null;
103
- this.isConnected = false;
104
- }
105
-
106
- /**
107
- * Connect to LanceDB and initialize table
108
- * Creates the database directory and table if they don't exist
109
- * @returns {Promise<void>}
110
- * @throws {StorageError} If connection fails after retries
111
- */
112
- async connect(): Promise<void> {
113
- if (this.isConnected) {
114
- return; // Already connected
24
+ uri;
25
+ tableName;
26
+ maxRetries;
27
+ retryDelay;
28
+ vectorDimension;
29
+ driver;
30
+ db;
31
+ table;
32
+ isConnected;
33
+ tempDir; // Track temp dirs for cleanup
34
+ /**
35
+ * Create a new LanceDBClient instance
36
+ * @param {Object} [config={}] - Configuration object
37
+ */
38
+ constructor(config = {}) {
39
+ this.uri =
40
+ (config && config.uri) || process.env.LANCEDB_URI || "./data/lancedb";
41
+ this.tableName =
42
+ (config && config.tableName) ||
43
+ process.env.LANCEDB_MEMORY_TABLE ||
44
+ "memory_entries";
45
+ this.maxRetries = (config && config.maxRetries) || 3;
46
+ this.retryDelay = (config && config.retryDelay) || 1000;
47
+ this.vectorDimension =
48
+ (config && config.vectorDimension) || DEFAULT_VECTOR_DIMENSION;
49
+ this.driver = (config && config.driver) || lancedb;
50
+ // Connection state
51
+ this.db = null;
52
+ this.table = null;
53
+ this.isConnected = false;
115
54
  }
116
-
117
- let lastError: unknown = null;
118
-
119
- for (let attempt = 1; attempt <= this.maxRetries; attempt++) {
120
- try {
121
- // Handle :memory: specially - LanceDB doesn't support true in-memory DBs
122
- // Use OS temp directory for isolation
123
- let dbPath = this.uri;
124
- if (this.uri === ":memory:") {
125
- const os = await import("os");
126
- const crypto = await import("crypto");
127
- const randomId = crypto.randomBytes(8).toString("hex");
128
- dbPath = path.join(os.tmpdir(), `yamo-memory-${randomId}`);
129
- this.tempDir = dbPath; // Track for cleanup
55
+ /**
56
+ * Connect to LanceDB and initialize table
57
+ * Creates the database directory and table if they don't exist
58
+ * @returns {Promise<void>}
59
+ * @throws {StorageError} If connection fails after retries
60
+ */
61
+ async connect() {
62
+ if (this.isConnected) {
63
+ return; // Already connected
130
64
  }
131
-
132
- // Ensure database directory exists
133
- const resolvedPath = path.resolve(dbPath);
134
- const dbDir = path.dirname(resolvedPath);
135
-
136
- if (!fs.existsSync(dbDir)) {
137
- fs.mkdirSync(dbDir, { recursive: true });
65
+ let lastError = null;
66
+ for (let attempt = 1; attempt <= this.maxRetries; attempt++) {
67
+ try {
68
+ // Handle :memory: specially - LanceDB doesn't support true in-memory DBs
69
+ // Use OS temp directory for isolation
70
+ let dbPath = this.uri;
71
+ if (this.uri === ":memory:") {
72
+ const os = await import("os");
73
+ const crypto = await import("crypto");
74
+ const randomId = crypto.randomBytes(8).toString("hex");
75
+ dbPath = path.join(os.tmpdir(), `yamo-memory-${randomId}`);
76
+ this.tempDir = dbPath; // Track for cleanup
77
+ }
78
+ // Ensure database directory exists
79
+ const resolvedPath = path.resolve(dbPath);
80
+ const dbDir = path.dirname(resolvedPath);
81
+ if (!fs.existsSync(dbDir)) {
82
+ fs.mkdirSync(dbDir, { recursive: true });
83
+ }
84
+ // Connect to database
85
+ this.db = await this.driver.connect(dbPath);
86
+ // Initialize table with dynamic dimension (creates if doesn't exist, opens if it does)
87
+ if (this.db) {
88
+ this.table = await createMemoryTableWithDimension(this.db, this.tableName, this.vectorDimension);
89
+ }
90
+ this.isConnected = true;
91
+ return;
92
+ }
93
+ catch (error) {
94
+ lastError = error;
95
+ const msg = error.message.toLowerCase();
96
+ // Specific check for locking/busy errors
97
+ if (msg.includes("busy") ||
98
+ msg.includes("locked") ||
99
+ msg.includes("resource temporarily unavailable")) {
100
+ logger.warn({ attempt, maxRetries: this.maxRetries, uri: this.uri }, "Database is locked by another process, retrying");
101
+ await this._sleep(this.retryDelay * attempt + Math.random() * 1000);
102
+ continue;
103
+ }
104
+ if (attempt < this.maxRetries) {
105
+ // Wait before retrying for other errors
106
+ await this._sleep(this.retryDelay * attempt);
107
+ }
108
+ }
138
109
  }
139
-
140
- // Connect to database
141
- this.db = await this.driver.connect(dbPath);
142
-
143
- // Initialize table with dynamic dimension (creates if doesn't exist, opens if it does)
144
- if (this.db) {
145
- this.table = await createMemoryTableWithDimension(
146
- this.db,
147
- this.tableName,
148
- this.vectorDimension,
149
- );
150
- }
151
-
152
- this.isConnected = true;
153
- return;
154
- } catch (error: any) {
155
- lastError = error;
156
- const msg = error.message.toLowerCase();
157
-
158
- // Specific check for locking/busy errors
159
- if (
160
- msg.includes("busy") ||
161
- msg.includes("locked") ||
162
- msg.includes("resource temporarily unavailable")
163
- ) {
164
- logger.warn(
165
- { attempt, maxRetries: this.maxRetries, uri: this.uri },
166
- "Database is locked by another process, retrying",
167
- );
168
- await this._sleep(this.retryDelay * attempt + Math.random() * 1000);
169
- continue;
170
- }
171
-
172
- if (attempt < this.maxRetries) {
173
- // Wait before retrying for other errors
174
- await this._sleep(this.retryDelay * attempt);
175
- }
176
- }
177
- }
178
-
179
- // All retries failed
180
- const errorMessage =
181
- lastError instanceof Error ? lastError.message : String(lastError);
182
- throw new StorageError(
183
- `Failed to connect to LanceDB after ${this.maxRetries} attempts: ${errorMessage}`,
184
- { uri: this.uri, tableName: this.tableName, originalError: lastError },
185
- );
186
- }
187
-
188
- /**
189
- * Disconnect from LanceDB
190
- * @returns {Promise<void>}
191
- */
192
- disconnect(): void {
193
- this.db = null;
194
- this.table = null;
195
- this.isConnected = false;
196
-
197
- // Clean up temp directory if we created one for :memory:
198
- if (this.tempDir && fs.existsSync(this.tempDir)) {
199
- try {
200
- fs.rmSync(this.tempDir, { recursive: true, force: true });
201
- } catch (_e) {
202
- // Best-effort cleanup, ignore errors
203
- }
204
- this.tempDir = undefined;
205
- }
206
- }
207
-
208
- /**
209
- * Add a single memory entry
210
- * @param {Object} data - Entry data
211
- * @returns {Promise<Object>} Result with id and success status
212
- * @throws {StorageError} If add operation fails
213
- */
214
- async add(data: MemoryEntry): Promise<{ id: string; success: boolean }> {
215
- if (!this.isConnected) {
216
- await this.connect();
217
- }
218
-
219
- this._validateRecord(data);
220
-
221
- return this._retryOperation(async () => {
222
- const record = {
223
- ...data,
224
- created_at: new Date(),
225
- updated_at: new Date(),
226
- };
227
-
228
- if (!this.table) {
229
- throw new StorageError("Table not initialized");
230
- }
231
-
232
- await this.table.add([record]);
233
-
234
- return {
235
- id: data.id,
236
- success: true,
237
- };
238
- });
239
- }
240
-
241
- /**
242
- * Add multiple memory entries in batch
243
- * @param {Array<Object>} records - Array of entry data objects
244
- * @returns {Promise<Object>} Result with count of added records
245
- * @throws {StorageError} If batch add fails
246
- */
247
- async addBatch(
248
- records: MemoryEntry[],
249
- ): Promise<{ count: number; success: boolean }> {
250
- if (!this.isConnected) {
251
- await this.connect();
252
- }
253
-
254
- if (!Array.isArray(records) || records.length === 0) {
255
- throw new StorageError("Records must be a non-empty array");
256
- }
257
-
258
- // Validate all records
259
- records.forEach((record) => this._validateRecord(record));
260
-
261
- return this._retryOperation(async () => {
262
- const now = new Date();
263
- const recordsWithTimestamps = records.map((record) => ({
264
- ...record,
265
- created_at: now,
266
- updated_at: now,
267
- }));
268
-
269
- if (!this.table) {
270
- throw new StorageError("Table not initialized");
271
- }
272
-
273
- await this.table.add(recordsWithTimestamps);
274
-
275
- return {
276
- count: records.length,
277
- success: true,
278
- };
279
- });
280
- }
281
-
282
- /**
283
- * Search for similar vectors
284
- * @param {Array<number>} vector - Query vector (384 dimensions)
285
- * @param {Object} options - Search options
286
- * @returns {Promise<Array<Object>>} Array of search results with scores
287
- * @throws {QueryError} If search fails
288
- */
289
- async search(
290
- vector: number[],
291
- options: SearchOptions = {},
292
- ): Promise<SearchResult[]> {
293
- if (!this.isConnected) {
294
- await this.connect();
110
+ // All retries failed
111
+ const errorMessage = lastError instanceof Error ? lastError.message : String(lastError);
112
+ throw new StorageError(`Failed to connect to LanceDB after ${this.maxRetries} attempts: ${errorMessage}`, { uri: this.uri, tableName: this.tableName, originalError: lastError });
295
113
  }
296
-
297
- this._validateVector(vector);
298
-
299
- const { limit = 10, nprobes = 20, filter = null } = options;
300
-
301
- return this._retryOperation(async () => {
302
- if (!this.table) {
303
- throw new StorageError("Table not initialized");
304
- }
305
-
306
- // Build the search query with all applicable options
307
- let query: any = this.table.search(vector);
308
-
309
- // Apply nprobes for IVF index (if supported)
310
- if (nprobes && typeof nprobes === "number") {
311
- try {
312
- query = query.nprobes(nprobes);
313
- } catch (_e) {
314
- // ignore
114
+ /**
115
+ * Disconnect from LanceDB
116
+ * @returns {Promise<void>}
117
+ */
118
+ disconnect() {
119
+ this.db = null;
120
+ this.table = null;
121
+ this.isConnected = false;
122
+ // Clean up temp directory if we created one for :memory:
123
+ if (this.tempDir && fs.existsSync(this.tempDir)) {
124
+ try {
125
+ fs.rmSync(this.tempDir, { recursive: true, force: true });
126
+ }
127
+ catch (_e) {
128
+ // Best-effort cleanup, ignore errors
129
+ }
130
+ this.tempDir = undefined;
315
131
  }
316
- }
317
-
318
- // Apply filter if provided
319
- if (filter) {
320
- query = query.where(filter);
321
- }
322
-
323
- // Execute search with limit
324
- const resultsArray = await query.limit(limit).toArray();
325
-
326
- return resultsArray.map((row: any) => ({
327
- id: row.id,
328
- content: row.content,
329
- metadata: row.metadata ? JSON.parse(row.metadata) : null,
330
- // _distance is internal LanceDB property
331
- score: row._distance,
332
- created_at: row.created_at,
333
- vector: row.vector, // Include vector if returned
334
- }));
335
- });
336
- }
337
-
338
- /**
339
- * Get a record by ID
340
- * @param {string} id - Record ID
341
- * @returns {Promise<Object|null>} Record object or null if not found
342
- * @throws {QueryError} If query fails
343
- */
344
- async getById(id: string): Promise<MemoryEntry | null> {
345
- if (!this.isConnected) {
346
- await this.connect();
347
132
  }
348
-
349
- return this._retryOperation(async () => {
350
- if (!this.table) {
351
- throw new StorageError("Table not initialized");
352
- }
353
-
354
- // Use a simple filter query instead of search
355
- const resultsArray = await this.table
356
- .query()
357
- .where(`id == '${this._sanitizeId(id)}'`)
358
- .toArray();
359
-
360
- if (resultsArray.length === 0) {
361
- return null;
362
- }
363
-
364
- const record = resultsArray[0];
365
- return {
366
- id: record.id as string,
367
- vector: record.vector as number[],
368
- content: record.content as string,
369
- metadata: record.metadata
370
- ? JSON.parse(record.metadata as string)
371
- : null,
372
- created_at: record.created_at,
373
- updated_at: record.updated_at,
374
- };
375
- });
376
- }
377
-
378
- /**
379
- * Get all records from the database
380
- * @param {Object} options - Options
381
- * @returns {Promise<Array<Object>>} Array of all records
382
- */
383
- async getAll(options: { limit?: number } = {}): Promise<MemoryEntry[]> {
384
- if (!this.isConnected) {
385
- await this.connect();
133
+ /**
134
+ * Add a single memory entry
135
+ * @param {Object} data - Entry data
136
+ * @returns {Promise<Object>} Result with id and success status
137
+ * @throws {StorageError} If add operation fails
138
+ */
139
+ async add(data) {
140
+ if (!this.isConnected) {
141
+ await this.connect();
142
+ }
143
+ this._validateRecord(data);
144
+ return this._retryOperation(async () => {
145
+ const record = {
146
+ ...data,
147
+ created_at: new Date(),
148
+ updated_at: new Date(),
149
+ };
150
+ if (!this.table) {
151
+ throw new StorageError("Table not initialized");
152
+ }
153
+ await this.table.add([record]);
154
+ return {
155
+ id: data.id,
156
+ success: true,
157
+ };
158
+ });
386
159
  }
387
-
388
- return this._retryOperation(async () => {
389
- if (!this.table) {
390
- throw new StorageError("Table not initialized");
391
- }
392
-
393
- let query = this.table.query();
394
-
395
- if (options.limit) {
396
- query = query.limit(options.limit);
397
- }
398
-
399
- const resultsArray = await query.toArray();
400
-
401
- return resultsArray.map((row: any) => ({
402
- id: row.id,
403
- content: row.content,
404
- metadata: row.metadata ? JSON.parse(row.metadata) : null,
405
- vector: row.vector,
406
- created_at: row.created_at,
407
- updated_at: row.updated_at,
408
- }));
409
- });
410
- }
411
-
412
- /**
413
- * Delete a record by ID
414
- * @param {string} id - Record ID to delete
415
- * @returns {Promise<Object>} Result with success status
416
- * @throws {StorageError} If delete fails
417
- */
418
- async delete(id: string): Promise<{ id: string; success: boolean }> {
419
- if (!this.isConnected) {
420
- await this.connect();
160
+ /**
161
+ * Add multiple memory entries in batch
162
+ * @param {Array<Object>} records - Array of entry data objects
163
+ * @returns {Promise<Object>} Result with count of added records
164
+ * @throws {StorageError} If batch add fails
165
+ */
166
+ async addBatch(records) {
167
+ if (!this.isConnected) {
168
+ await this.connect();
169
+ }
170
+ if (!Array.isArray(records) || records.length === 0) {
171
+ throw new StorageError("Records must be a non-empty array");
172
+ }
173
+ // Validate all records
174
+ records.forEach((record) => this._validateRecord(record));
175
+ return this._retryOperation(async () => {
176
+ const now = new Date();
177
+ const recordsWithTimestamps = records.map((record) => ({
178
+ ...record,
179
+ created_at: now,
180
+ updated_at: now,
181
+ }));
182
+ if (!this.table) {
183
+ throw new StorageError("Table not initialized");
184
+ }
185
+ await this.table.add(recordsWithTimestamps);
186
+ return {
187
+ count: records.length,
188
+ success: true,
189
+ };
190
+ });
421
191
  }
422
-
423
- return this._retryOperation(async () => {
424
- if (!this.table) {
425
- throw new StorageError("Table not initialized");
426
- }
427
-
428
- await this.table.delete(`id == '${this._sanitizeId(id)}'`);
429
-
430
- return {
431
- id,
432
- success: true,
433
- };
434
- });
435
- }
436
-
437
- /**
438
- * Update an existing record
439
- * @param {string} id - Record ID to update
440
- * @param {Object} data - Updated data fields
441
- * @returns {Promise<Object>} Result with success status
442
- * @throws {StorageError} If update fails
443
- */
444
- async update(
445
- id: string,
446
- data: Partial<MemoryEntry>,
447
- ): Promise<{ id: string; success: boolean }> {
448
- if (!this.isConnected) {
449
- await this.connect();
192
+ /**
193
+ * Search for similar vectors
194
+ * @param {Array<number>} vector - Query vector (384 dimensions)
195
+ * @param {Object} options - Search options
196
+ * @returns {Promise<Array<Object>>} Array of search results with scores
197
+ * @throws {QueryError} If search fails
198
+ */
199
+ async search(vector, options = {}) {
200
+ if (!this.isConnected) {
201
+ await this.connect();
202
+ }
203
+ this._validateVector(vector);
204
+ const { limit = 10, nprobes = 20, filter = null } = options;
205
+ return this._retryOperation(async () => {
206
+ if (!this.table) {
207
+ throw new StorageError("Table not initialized");
208
+ }
209
+ // Build the search query with all applicable options
210
+ let query = this.table.search(vector);
211
+ // Apply nprobes for IVF index (if supported)
212
+ if (nprobes && typeof nprobes === "number") {
213
+ try {
214
+ query = query.nprobes(nprobes);
215
+ }
216
+ catch (_e) {
217
+ // ignore
218
+ }
219
+ }
220
+ // Apply filter if provided
221
+ if (filter) {
222
+ query = query.where(filter);
223
+ }
224
+ // Execute search with limit
225
+ const resultsArray = await query.limit(limit).toArray();
226
+ return resultsArray.map((row) => ({
227
+ id: row.id,
228
+ content: row.content,
229
+ metadata: row.metadata ? JSON.parse(row.metadata) : null,
230
+ // _distance is internal LanceDB property
231
+ score: row._distance,
232
+ created_at: row.created_at,
233
+ vector: row.vector, // Include vector if returned
234
+ }));
235
+ });
450
236
  }
451
-
452
- return this._retryOperation(async () => {
453
- const updateData = {
454
- ...data,
455
- updated_at: new Date(),
456
- };
457
-
458
- if (!this.table) {
459
- throw new StorageError("Table not initialized");
460
- }
461
-
462
- // Update API expects filter and values separately
463
- await this.table.update({
464
- where: `id == '${this._sanitizeId(id)}'`,
465
- values: updateData,
466
- } as any);
467
-
468
- return {
469
- id,
470
- success: true,
471
- };
472
- });
473
- }
474
-
475
- /**
476
- * Get database statistics
477
- * @returns {Promise<Object>} Statistics including count, size, etc.
478
- * @throws {QueryError} If stats query fails
479
- */
480
- async getStats(): Promise<Stats> {
481
- if (!this.isConnected) {
482
- await this.connect();
237
+ /**
238
+ * Get a record by ID
239
+ * @param {string} id - Record ID
240
+ * @returns {Promise<Object|null>} Record object or null if not found
241
+ * @throws {QueryError} If query fails
242
+ */
243
+ async getById(id) {
244
+ if (!this.isConnected) {
245
+ await this.connect();
246
+ }
247
+ return this._retryOperation(async () => {
248
+ if (!this.table) {
249
+ throw new StorageError("Table not initialized");
250
+ }
251
+ // Use a simple filter query instead of search
252
+ const resultsArray = await this.table
253
+ .query()
254
+ .where(`id == '${this._sanitizeId(id)}'`)
255
+ .toArray();
256
+ if (resultsArray.length === 0) {
257
+ return null;
258
+ }
259
+ const record = resultsArray[0];
260
+ return {
261
+ id: record.id,
262
+ vector: record.vector,
263
+ content: record.content,
264
+ metadata: record.metadata
265
+ ? JSON.parse(record.metadata)
266
+ : null,
267
+ created_at: record.created_at,
268
+ updated_at: record.updated_at,
269
+ };
270
+ });
483
271
  }
484
-
485
- return this._retryOperation(async () => {
486
- if (!this.table) {
487
- throw new StorageError("Table not initialized");
488
- }
489
-
490
- let count = 0;
491
- try {
492
- if (typeof (this.table as any).count === "function") {
493
- count = await (this.table as any).count();
494
- } else {
495
- // Fallback: use a limited query to avoid loading all records
496
- const countResults = await (this.table as any).query().execute();
497
- for await (const batch of countResults) {
498
- count += batch.numRows;
499
- }
272
+ /**
273
+ * Get all records from the database
274
+ * @param {Object} options - Options
275
+ * @returns {Promise<Array<Object>>} Array of all records
276
+ */
277
+ async getAll(options = {}) {
278
+ if (!this.isConnected) {
279
+ await this.connect();
500
280
  }
501
- } catch (_countError) {
502
- count = -1;
503
- }
504
-
505
- return {
506
- tableName: this.tableName,
507
- uri: this.uri,
508
- count: count,
509
- isConnected: this.isConnected,
510
- };
511
- });
512
- }
513
-
514
- /**
515
- * Sanitize an ID to prevent SQL injection
516
- * Removes any characters that aren't alphanumeric, underscore, or hyphen
517
- * @private
518
- */
519
- _sanitizeId(id: string): string {
520
- // Remove any characters that aren't alphanumeric, underscore, or hyphen
521
- // This prevents SQL injection via raw string interpolation in queries
522
- return id.replace(/[^a-zA-Z0-9_-]/g, "");
523
- }
524
-
525
- /**
526
- * Validate a record object
527
- * @private
528
- */
529
- _validateRecord(record: any): void {
530
- if (!record || typeof record !== "object") {
531
- throw new StorageError("Record must be an object");
281
+ return this._retryOperation(async () => {
282
+ if (!this.table) {
283
+ throw new StorageError("Table not initialized");
284
+ }
285
+ let query = this.table.query();
286
+ if (options.limit) {
287
+ query = query.limit(options.limit);
288
+ }
289
+ const resultsArray = await query.toArray();
290
+ return resultsArray.map((row) => ({
291
+ id: row.id,
292
+ content: row.content,
293
+ metadata: row.metadata ? JSON.parse(row.metadata) : null,
294
+ vector: row.vector,
295
+ created_at: row.created_at,
296
+ updated_at: row.updated_at,
297
+ }));
298
+ });
532
299
  }
533
-
534
- if (!record.id) {
535
- throw new StorageError("Record must have an id field");
300
+ /**
301
+ * Delete a record by ID
302
+ * @param {string} id - Record ID to delete
303
+ * @returns {Promise<Object>} Result with success status
304
+ * @throws {StorageError} If delete fails
305
+ */
306
+ async delete(id) {
307
+ if (!this.isConnected) {
308
+ await this.connect();
309
+ }
310
+ return this._retryOperation(async () => {
311
+ if (!this.table) {
312
+ throw new StorageError("Table not initialized");
313
+ }
314
+ await this.table.delete(`id == '${this._sanitizeId(id)}'`);
315
+ return {
316
+ id,
317
+ success: true,
318
+ };
319
+ });
536
320
  }
537
-
538
- if (!record.content) {
539
- throw new StorageError("Record must have a content field");
321
+ /**
322
+ * Update an existing record
323
+ * @param {string} id - Record ID to update
324
+ * @param {Object} data - Updated data fields
325
+ * @returns {Promise<Object>} Result with success status
326
+ * @throws {StorageError} If update fails
327
+ */
328
+ async update(id, data) {
329
+ if (!this.isConnected) {
330
+ await this.connect();
331
+ }
332
+ return this._retryOperation(async () => {
333
+ const updateData = {
334
+ ...data,
335
+ updated_at: new Date(),
336
+ };
337
+ if (!this.table) {
338
+ throw new StorageError("Table not initialized");
339
+ }
340
+ // Update API expects filter and values separately
341
+ await this.table.update({
342
+ where: `id == '${this._sanitizeId(id)}'`,
343
+ values: updateData,
344
+ });
345
+ return {
346
+ id,
347
+ success: true,
348
+ };
349
+ });
540
350
  }
541
-
542
- if (!record.vector) {
543
- throw new StorageError("Record must have a vector field");
351
+ /**
352
+ * Get database statistics
353
+ * @returns {Promise<Object>} Statistics including count, size, etc.
354
+ * @throws {QueryError} If stats query fails
355
+ */
356
+ async getStats() {
357
+ if (!this.isConnected) {
358
+ await this.connect();
359
+ }
360
+ return this._retryOperation(async () => {
361
+ if (!this.table) {
362
+ throw new StorageError("Table not initialized");
363
+ }
364
+ let count = 0;
365
+ try {
366
+ if (typeof this.table.count === "function") {
367
+ count = await this.table.count();
368
+ }
369
+ else {
370
+ // Fallback: use a limited query to avoid loading all records
371
+ const countResults = await this.table.query().execute();
372
+ for await (const batch of countResults) {
373
+ count += batch.numRows;
374
+ }
375
+ }
376
+ }
377
+ catch (_countError) {
378
+ count = -1;
379
+ }
380
+ return {
381
+ tableName: this.tableName,
382
+ uri: this.uri,
383
+ count: count,
384
+ isConnected: this.isConnected,
385
+ };
386
+ });
544
387
  }
545
-
546
- this._validateVector(record.vector);
547
- }
548
-
549
- /**
550
- * Validate a vector array
551
- * @private
552
- */
553
- _validateVector(vector: any): void {
554
- if (!Array.isArray(vector)) {
555
- throw new QueryError("Vector must be an array");
388
+ /**
389
+ * Sanitize an ID to prevent SQL injection
390
+ * Removes any characters that aren't alphanumeric, underscore, or hyphen
391
+ * @private
392
+ */
393
+ _sanitizeId(id) {
394
+ // Remove any characters that aren't alphanumeric, underscore, or hyphen
395
+ // This prevents SQL injection via raw string interpolation in queries
396
+ return id.replace(/[^a-zA-Z0-9_-]/g, "");
556
397
  }
557
-
558
- // Expected dimension for all-MiniLM-L6-v2 model
559
- // This should ideally match this.vectorDimension
560
- // But keeping as is to match original logic or update to use this.vectorDimension
561
- const expectedDim = this.vectorDimension || 384;
562
-
563
- if (vector.length !== expectedDim) {
564
- // Loose validation for now as different models have different dims
565
- // throw new QueryError(`Vector must have ${expectedDim} dimensions, got ${vector.length}`);
398
+ /**
399
+ * Validate a record object
400
+ * @private
401
+ */
402
+ _validateRecord(record) {
403
+ if (!record || typeof record !== "object") {
404
+ throw new StorageError("Record must be an object");
405
+ }
406
+ if (!record.id) {
407
+ throw new StorageError("Record must have an id field");
408
+ }
409
+ if (!record.content) {
410
+ throw new StorageError("Record must have a content field");
411
+ }
412
+ if (!record.vector) {
413
+ throw new StorageError("Record must have a vector field");
414
+ }
415
+ this._validateVector(record.vector);
566
416
  }
567
-
568
- // Validate all elements are numbers
569
- for (let i = 0; i < vector.length; i++) {
570
- if (typeof vector[i] !== "number" || isNaN(vector[i])) {
571
- throw new QueryError(`Vector element ${i} is not a valid number`);
572
- }
417
+ /**
418
+ * Validate a vector array
419
+ * @private
420
+ */
421
+ _validateVector(vector) {
422
+ if (!Array.isArray(vector)) {
423
+ throw new QueryError("Vector must be an array");
424
+ }
425
+ // Expected dimension for all-MiniLM-L6-v2 model
426
+ // This should ideally match this.vectorDimension
427
+ // But keeping as is to match original logic or update to use this.vectorDimension
428
+ const expectedDim = this.vectorDimension || 384;
429
+ if (vector.length !== expectedDim) {
430
+ // Loose validation for now as different models have different dims
431
+ // throw new QueryError(`Vector must have ${expectedDim} dimensions, got ${vector.length}`);
432
+ }
433
+ // Validate all elements are numbers
434
+ for (let i = 0; i < vector.length; i++) {
435
+ if (typeof vector[i] !== "number" || isNaN(vector[i])) {
436
+ throw new QueryError(`Vector element ${i} is not a valid number`);
437
+ }
438
+ }
573
439
  }
574
- }
575
-
576
- /**
577
- * Sleep for a specified duration
578
- * @private
579
- */
580
- _sleep(ms: number): Promise<void> {
581
- return new Promise((resolve) => setTimeout(resolve, ms));
582
- }
583
-
584
- /**
585
- * Check if an error is retryable (transient network/connection issues)
586
- * @private
587
- */
588
- _isRetryableError(error: any): boolean {
589
- if (!error || !error.message) {
590
- return false;
440
+ /**
441
+ * Sleep for a specified duration
442
+ * @private
443
+ */
444
+ _sleep(ms) {
445
+ return new Promise((resolve) => setTimeout(resolve, ms));
591
446
  }
592
-
593
- const message = error.message.toLowerCase();
594
-
595
- // Network-related errors
596
- const retryablePatterns = [
597
- "econnreset", // Connection reset by peer
598
- "etimedout", // Operation timed out
599
- "enotfound", // DNS resolution failed
600
- "econnrefused", // Connection refused
601
- "enetunreach", // Network unreachable
602
- "ehostunreach", // Host unreachable
603
- "socket hang up", // Socket closed unexpectedly
604
- "network error", // Generic network error
605
- "failed to fetch", // Fetch/network failure
606
- "timeout", // Timeout occurred
607
- ];
608
-
609
- // Check for network patterns
610
- const hasNetworkPattern = retryablePatterns.some((pattern) =>
611
- message.includes(pattern),
612
- );
613
-
614
- // Check for 5xx HTTP errors (server-side errors that may be transient)
615
- const hasServerError = /5\d{2}/.test(message);
616
-
617
- // Check for specific LanceDB/lancedb errors that may be transient
618
- const lancedbRetryable = [
619
- "connection",
620
- "database closed",
621
- "table not found",
622
- "lock",
623
- "busy",
624
- "temporary",
625
- ].some((pattern) => message.includes(pattern));
626
-
627
- return hasNetworkPattern || hasServerError || lancedbRetryable;
628
- }
629
-
630
- /**
631
- * Retry an operation with exponential backoff
632
- * @private
633
- */
634
- async _retryOperation<T>(
635
- operation: () => Promise<T>,
636
- maxRetries?: number,
637
- baseDelay?: number,
638
- ): Promise<T> {
639
- const max = maxRetries ?? this.maxRetries;
640
- const delay = baseDelay ?? this.retryDelay;
641
- let lastError: any = null;
642
-
643
- for (let attempt = 1; attempt <= max; attempt++) {
644
- try {
645
- return await operation();
646
- } catch (error) {
647
- lastError = error;
648
-
649
- if (!this._isRetryableError(error)) {
650
- throw error;
447
+ /**
448
+ * Check if an error is retryable (transient network/connection issues)
449
+ * @private
450
+ */
451
+ _isRetryableError(error) {
452
+ if (!error || !error.message) {
453
+ return false;
651
454
  }
652
-
653
- if (attempt === max) {
654
- throw error;
455
+ const message = error.message.toLowerCase();
456
+ // Network-related errors
457
+ const retryablePatterns = [
458
+ "econnreset", // Connection reset by peer
459
+ "etimedout", // Operation timed out
460
+ "enotfound", // DNS resolution failed
461
+ "econnrefused", // Connection refused
462
+ "enetunreach", // Network unreachable
463
+ "ehostunreach", // Host unreachable
464
+ "socket hang up", // Socket closed unexpectedly
465
+ "network error", // Generic network error
466
+ "failed to fetch", // Fetch/network failure
467
+ "timeout", // Timeout occurred
468
+ ];
469
+ // Check for network patterns
470
+ const hasNetworkPattern = retryablePatterns.some((pattern) => message.includes(pattern));
471
+ // Check for 5xx HTTP errors (server-side errors that may be transient)
472
+ const hasServerError = /5\d{2}/.test(message);
473
+ // Check for specific LanceDB/lancedb errors that may be transient
474
+ const lancedbRetryable = [
475
+ "connection",
476
+ "database closed",
477
+ "table not found",
478
+ "lock",
479
+ "busy",
480
+ "temporary",
481
+ ].some((pattern) => message.includes(pattern));
482
+ return hasNetworkPattern || hasServerError || lancedbRetryable;
483
+ }
484
+ /**
485
+ * Retry an operation with exponential backoff
486
+ * @private
487
+ */
488
+ async _retryOperation(operation, maxRetries, baseDelay) {
489
+ const max = maxRetries ?? this.maxRetries;
490
+ const delay = baseDelay ?? this.retryDelay;
491
+ let lastError = null;
492
+ for (let attempt = 1; attempt <= max; attempt++) {
493
+ try {
494
+ return await operation();
495
+ }
496
+ catch (error) {
497
+ lastError = error;
498
+ if (!this._isRetryableError(error)) {
499
+ throw error;
500
+ }
501
+ if (attempt === max) {
502
+ throw error;
503
+ }
504
+ const backoffMs = delay * Math.pow(2, attempt - 1);
505
+ const jitterMs = backoffMs * Math.random() * 0.25;
506
+ const message = error instanceof Error ? error.message : String(error);
507
+ logger.debug({
508
+ attempt,
509
+ max,
510
+ message,
511
+ retryDelayMs: Math.round(backoffMs + jitterMs),
512
+ }, "Retryable error, retrying");
513
+ await this._sleep(backoffMs + jitterMs);
514
+ }
655
515
  }
656
-
657
- const backoffMs = delay * Math.pow(2, attempt - 1);
658
- const jitterMs = backoffMs * Math.random() * 0.25;
659
-
660
- const message = error instanceof Error ? error.message : String(error);
661
- logger.debug(
662
- {
663
- attempt,
664
- max,
665
- message,
666
- retryDelayMs: Math.round(backoffMs + jitterMs),
667
- },
668
- "Retryable error, retrying",
669
- );
670
-
671
- await this._sleep(backoffMs + jitterMs);
672
- }
516
+ throw lastError;
673
517
  }
674
- throw lastError;
675
- }
676
518
  }
677
-
678
519
  export default LanceDBClient;