@yamo/memory-mesh 2.3.1 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/memory_mesh.js +1 -1
- package/lib/llm/client.d.ts +111 -0
- package/lib/llm/client.js +299 -357
- package/lib/llm/client.ts +413 -0
- package/lib/llm/index.d.ts +17 -0
- package/lib/llm/index.js +15 -8
- package/lib/llm/index.ts +19 -0
- package/lib/memory/adapters/client.d.ts +183 -0
- package/lib/memory/adapters/client.js +518 -0
- package/lib/memory/adapters/client.ts +678 -0
- package/lib/memory/adapters/config.d.ts +137 -0
- package/lib/memory/adapters/config.js +189 -0
- package/lib/memory/adapters/config.ts +259 -0
- package/lib/memory/adapters/errors.d.ts +76 -0
- package/lib/memory/adapters/errors.js +128 -0
- package/lib/memory/adapters/errors.ts +166 -0
- package/lib/memory/context-manager.d.ts +44 -0
- package/lib/memory/context-manager.js +344 -0
- package/lib/memory/context-manager.ts +432 -0
- package/lib/memory/embeddings/factory.d.ts +59 -0
- package/lib/memory/embeddings/factory.js +148 -0
- package/lib/{embeddings/factory.js → memory/embeddings/factory.ts} +69 -28
- package/lib/memory/embeddings/index.d.ts +2 -0
- package/lib/memory/embeddings/index.js +2 -0
- package/lib/memory/embeddings/index.ts +2 -0
- package/lib/memory/embeddings/service.d.ts +164 -0
- package/lib/memory/embeddings/service.js +515 -0
- package/lib/{embeddings/service.js → memory/embeddings/service.ts} +223 -156
- package/lib/memory/index.d.ts +9 -0
- package/lib/memory/index.js +9 -1
- package/lib/memory/index.ts +20 -0
- package/lib/memory/memory-mesh.d.ts +274 -0
- package/lib/memory/memory-mesh.js +1445 -1189
- package/lib/memory/memory-mesh.ts +1803 -0
- package/lib/memory/memory-translator.d.ts +19 -0
- package/lib/memory/memory-translator.js +125 -0
- package/lib/memory/memory-translator.ts +158 -0
- package/lib/memory/schema.d.ts +111 -0
- package/lib/memory/schema.js +183 -0
- package/lib/memory/schema.ts +267 -0
- package/lib/memory/scorer.d.ts +26 -0
- package/lib/memory/scorer.js +77 -0
- package/lib/memory/scorer.ts +95 -0
- package/lib/memory/search/index.d.ts +1 -0
- package/lib/memory/search/index.js +1 -0
- package/lib/memory/search/index.ts +1 -0
- package/lib/memory/search/keyword-search.d.ts +62 -0
- package/lib/memory/search/keyword-search.js +135 -0
- package/lib/{search/keyword-search.js → memory/search/keyword-search.ts} +66 -36
- package/lib/scrubber/config/defaults.d.ts +53 -0
- package/lib/scrubber/config/defaults.js +49 -57
- package/lib/scrubber/config/defaults.ts +117 -0
- package/lib/scrubber/index.d.ts +6 -0
- package/lib/scrubber/index.js +3 -23
- package/lib/scrubber/index.ts +7 -0
- package/lib/scrubber/scrubber.d.ts +61 -0
- package/lib/scrubber/scrubber.js +99 -121
- package/lib/scrubber/scrubber.ts +168 -0
- package/lib/scrubber/stages/chunker.d.ts +13 -0
- package/lib/scrubber/stages/metadata-annotator.d.ts +18 -0
- package/lib/scrubber/stages/normalizer.d.ts +13 -0
- package/lib/scrubber/stages/semantic-filter.d.ts +13 -0
- package/lib/scrubber/stages/structural-cleaner.d.ts +13 -0
- package/lib/scrubber/stages/validator.d.ts +18 -0
- package/lib/scrubber/telemetry.d.ts +36 -0
- package/lib/scrubber/telemetry.js +53 -58
- package/lib/scrubber/telemetry.ts +99 -0
- package/lib/utils/logger.d.ts +29 -0
- package/lib/utils/logger.js +64 -0
- package/lib/utils/logger.ts +85 -0
- package/lib/utils/skill-metadata.d.ts +32 -0
- package/lib/utils/skill-metadata.js +132 -0
- package/lib/utils/skill-metadata.ts +147 -0
- package/lib/yamo/emitter.d.ts +73 -0
- package/lib/yamo/emitter.js +78 -143
- package/lib/yamo/emitter.ts +249 -0
- package/lib/yamo/schema.d.ts +58 -0
- package/lib/yamo/schema.js +81 -108
- package/lib/yamo/schema.ts +165 -0
- package/package.json +11 -8
- package/index.d.ts +0 -111
- package/lib/embeddings/index.js +0 -2
- package/lib/index.js +0 -6
- package/lib/lancedb/client.js +0 -633
- package/lib/lancedb/config.js +0 -215
- package/lib/lancedb/errors.js +0 -144
- package/lib/lancedb/index.js +0 -4
- package/lib/lancedb/schema.js +0 -197
- package/lib/scrubber/errors/scrubber-error.js +0 -43
- package/lib/scrubber/stages/chunker.js +0 -103
- package/lib/scrubber/stages/metadata-annotator.js +0 -74
- package/lib/scrubber/stages/normalizer.js +0 -59
- package/lib/scrubber/stages/semantic-filter.js +0 -61
- package/lib/scrubber/stages/structural-cleaner.js +0 -82
- package/lib/scrubber/stages/validator.js +0 -66
- package/lib/scrubber/utils/hash.js +0 -39
- package/lib/scrubber/utils/html-parser.js +0 -45
- package/lib/scrubber/utils/pattern-matcher.js +0 -63
- package/lib/scrubber/utils/token-counter.js +0 -31
- package/lib/search/index.js +0 -1
- package/lib/utils/index.js +0 -1
- package/lib/yamo/index.js +0 -15
|
@@ -0,0 +1,678 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LanceDB Client Wrapper
|
|
3
|
+
*
|
|
4
|
+
* A comprehensive wrapper around LanceDB JavaScript SDK providing:
|
|
5
|
+
* - Connection management with pooling and retries
|
|
6
|
+
* - CRUD operations for memory entries
|
|
7
|
+
* - Vector similarity search with filtering
|
|
8
|
+
* - Database statistics and monitoring
|
|
9
|
+
*
|
|
10
|
+
* @class LanceDBClient
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import * as lancedb from "@lancedb/lancedb";
|
|
14
|
+
import fs from "fs";
|
|
15
|
+
import path from "path";
|
|
16
|
+
import {
|
|
17
|
+
createMemoryTableWithDimension,
|
|
18
|
+
DEFAULT_VECTOR_DIMENSION,
|
|
19
|
+
} from "../schema.js";
|
|
20
|
+
import { StorageError, QueryError } from "./errors.js";
|
|
21
|
+
import { createLogger } from "../../utils/logger.js";
|
|
22
|
+
|
|
23
|
+
const logger = createLogger("lancedb-client");
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* LanceDB driver interface for dependency injection/testing
|
|
27
|
+
*/
|
|
28
|
+
export interface LanceDBDriver {
|
|
29
|
+
connect(uri: string): Promise<lancedb.Connection>;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface ClientConfig {
|
|
33
|
+
uri?: string;
|
|
34
|
+
tableName?: string;
|
|
35
|
+
maxRetries?: number;
|
|
36
|
+
retryDelay?: number;
|
|
37
|
+
vectorDimension?: number;
|
|
38
|
+
driver?: LanceDBDriver; // For testing injection
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export interface MemoryEntry {
|
|
42
|
+
id: string;
|
|
43
|
+
vector: number[];
|
|
44
|
+
content: string;
|
|
45
|
+
metadata?: string | Record<string, any> | null;
|
|
46
|
+
created_at?: Date | string;
|
|
47
|
+
updated_at?: Date | string;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export interface SearchResult extends MemoryEntry {
|
|
51
|
+
score?: number;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export interface SearchOptions {
|
|
55
|
+
limit?: number;
|
|
56
|
+
metric?: string;
|
|
57
|
+
nprobes?: number;
|
|
58
|
+
filter?: string | null;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export interface Stats {
|
|
62
|
+
tableName: string;
|
|
63
|
+
uri: string;
|
|
64
|
+
count: number;
|
|
65
|
+
isConnected: boolean;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* LanceDB Client wrapper class
|
|
70
|
+
*/
|
|
71
|
+
export class LanceDBClient {
|
|
72
|
+
uri: string;
|
|
73
|
+
tableName: string;
|
|
74
|
+
maxRetries: number;
|
|
75
|
+
retryDelay: number;
|
|
76
|
+
vectorDimension: number;
|
|
77
|
+
driver: LanceDBDriver;
|
|
78
|
+
db: lancedb.Connection | null;
|
|
79
|
+
table: lancedb.Table | null;
|
|
80
|
+
isConnected: boolean;
|
|
81
|
+
private tempDir?: string; // Track temp dirs for cleanup
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Create a new LanceDBClient instance
|
|
85
|
+
* @param {Object} [config={}] - Configuration object
|
|
86
|
+
*/
|
|
87
|
+
constructor(config: ClientConfig = {}) {
|
|
88
|
+
this.uri =
|
|
89
|
+
(config && config.uri) || process.env.LANCEDB_URI || "./data/lancedb";
|
|
90
|
+
this.tableName =
|
|
91
|
+
(config && config.tableName) ||
|
|
92
|
+
process.env.LANCEDB_MEMORY_TABLE ||
|
|
93
|
+
"memory_entries";
|
|
94
|
+
this.maxRetries = (config && config.maxRetries) || 3;
|
|
95
|
+
this.retryDelay = (config && config.retryDelay) || 1000;
|
|
96
|
+
this.vectorDimension =
|
|
97
|
+
(config && config.vectorDimension) || DEFAULT_VECTOR_DIMENSION;
|
|
98
|
+
this.driver = (config && config.driver) || lancedb;
|
|
99
|
+
|
|
100
|
+
// Connection state
|
|
101
|
+
this.db = null;
|
|
102
|
+
this.table = null;
|
|
103
|
+
this.isConnected = false;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Connect to LanceDB and initialize table
|
|
108
|
+
* Creates the database directory and table if they don't exist
|
|
109
|
+
* @returns {Promise<void>}
|
|
110
|
+
* @throws {StorageError} If connection fails after retries
|
|
111
|
+
*/
|
|
112
|
+
async connect(): Promise<void> {
|
|
113
|
+
if (this.isConnected) {
|
|
114
|
+
return; // Already connected
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
let lastError: unknown = null;
|
|
118
|
+
|
|
119
|
+
for (let attempt = 1; attempt <= this.maxRetries; attempt++) {
|
|
120
|
+
try {
|
|
121
|
+
// Handle :memory: specially - LanceDB doesn't support true in-memory DBs
|
|
122
|
+
// Use OS temp directory for isolation
|
|
123
|
+
let dbPath = this.uri;
|
|
124
|
+
if (this.uri === ":memory:") {
|
|
125
|
+
const os = await import("os");
|
|
126
|
+
const crypto = await import("crypto");
|
|
127
|
+
const randomId = crypto.randomBytes(8).toString("hex");
|
|
128
|
+
dbPath = path.join(os.tmpdir(), `yamo-memory-${randomId}`);
|
|
129
|
+
this.tempDir = dbPath; // Track for cleanup
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Ensure database directory exists
|
|
133
|
+
const resolvedPath = path.resolve(dbPath);
|
|
134
|
+
const dbDir = path.dirname(resolvedPath);
|
|
135
|
+
|
|
136
|
+
if (!fs.existsSync(dbDir)) {
|
|
137
|
+
fs.mkdirSync(dbDir, { recursive: true });
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Connect to database
|
|
141
|
+
this.db = await this.driver.connect(dbPath);
|
|
142
|
+
|
|
143
|
+
// Initialize table with dynamic dimension (creates if doesn't exist, opens if it does)
|
|
144
|
+
if (this.db) {
|
|
145
|
+
this.table = await createMemoryTableWithDimension(
|
|
146
|
+
this.db,
|
|
147
|
+
this.tableName,
|
|
148
|
+
this.vectorDimension,
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
this.isConnected = true;
|
|
153
|
+
return;
|
|
154
|
+
} catch (error: any) {
|
|
155
|
+
lastError = error;
|
|
156
|
+
const msg = error.message.toLowerCase();
|
|
157
|
+
|
|
158
|
+
// Specific check for locking/busy errors
|
|
159
|
+
if (
|
|
160
|
+
msg.includes("busy") ||
|
|
161
|
+
msg.includes("locked") ||
|
|
162
|
+
msg.includes("resource temporarily unavailable")
|
|
163
|
+
) {
|
|
164
|
+
logger.warn(
|
|
165
|
+
{ attempt, maxRetries: this.maxRetries, uri: this.uri },
|
|
166
|
+
"Database is locked by another process, retrying",
|
|
167
|
+
);
|
|
168
|
+
await this._sleep(this.retryDelay * attempt + Math.random() * 1000);
|
|
169
|
+
continue;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
if (attempt < this.maxRetries) {
|
|
173
|
+
// Wait before retrying for other errors
|
|
174
|
+
await this._sleep(this.retryDelay * attempt);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// All retries failed
|
|
180
|
+
const errorMessage =
|
|
181
|
+
lastError instanceof Error ? lastError.message : String(lastError);
|
|
182
|
+
throw new StorageError(
|
|
183
|
+
`Failed to connect to LanceDB after ${this.maxRetries} attempts: ${errorMessage}`,
|
|
184
|
+
{ uri: this.uri, tableName: this.tableName, originalError: lastError },
|
|
185
|
+
);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Disconnect from LanceDB
|
|
190
|
+
* @returns {Promise<void>}
|
|
191
|
+
*/
|
|
192
|
+
disconnect(): void {
|
|
193
|
+
this.db = null;
|
|
194
|
+
this.table = null;
|
|
195
|
+
this.isConnected = false;
|
|
196
|
+
|
|
197
|
+
// Clean up temp directory if we created one for :memory:
|
|
198
|
+
if (this.tempDir && fs.existsSync(this.tempDir)) {
|
|
199
|
+
try {
|
|
200
|
+
fs.rmSync(this.tempDir, { recursive: true, force: true });
|
|
201
|
+
} catch (_e) {
|
|
202
|
+
// Best-effort cleanup, ignore errors
|
|
203
|
+
}
|
|
204
|
+
this.tempDir = undefined;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Add a single memory entry
|
|
210
|
+
* @param {Object} data - Entry data
|
|
211
|
+
* @returns {Promise<Object>} Result with id and success status
|
|
212
|
+
* @throws {StorageError} If add operation fails
|
|
213
|
+
*/
|
|
214
|
+
async add(data: MemoryEntry): Promise<{ id: string; success: boolean }> {
|
|
215
|
+
if (!this.isConnected) {
|
|
216
|
+
await this.connect();
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
this._validateRecord(data);
|
|
220
|
+
|
|
221
|
+
return this._retryOperation(async () => {
|
|
222
|
+
const record = {
|
|
223
|
+
...data,
|
|
224
|
+
created_at: new Date(),
|
|
225
|
+
updated_at: new Date(),
|
|
226
|
+
};
|
|
227
|
+
|
|
228
|
+
if (!this.table) {
|
|
229
|
+
throw new StorageError("Table not initialized");
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
await this.table.add([record]);
|
|
233
|
+
|
|
234
|
+
return {
|
|
235
|
+
id: data.id,
|
|
236
|
+
success: true,
|
|
237
|
+
};
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Add multiple memory entries in batch
|
|
243
|
+
* @param {Array<Object>} records - Array of entry data objects
|
|
244
|
+
* @returns {Promise<Object>} Result with count of added records
|
|
245
|
+
* @throws {StorageError} If batch add fails
|
|
246
|
+
*/
|
|
247
|
+
async addBatch(
|
|
248
|
+
records: MemoryEntry[],
|
|
249
|
+
): Promise<{ count: number; success: boolean }> {
|
|
250
|
+
if (!this.isConnected) {
|
|
251
|
+
await this.connect();
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
if (!Array.isArray(records) || records.length === 0) {
|
|
255
|
+
throw new StorageError("Records must be a non-empty array");
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// Validate all records
|
|
259
|
+
records.forEach((record) => this._validateRecord(record));
|
|
260
|
+
|
|
261
|
+
return this._retryOperation(async () => {
|
|
262
|
+
const now = new Date();
|
|
263
|
+
const recordsWithTimestamps = records.map((record) => ({
|
|
264
|
+
...record,
|
|
265
|
+
created_at: now,
|
|
266
|
+
updated_at: now,
|
|
267
|
+
}));
|
|
268
|
+
|
|
269
|
+
if (!this.table) {
|
|
270
|
+
throw new StorageError("Table not initialized");
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
await this.table.add(recordsWithTimestamps);
|
|
274
|
+
|
|
275
|
+
return {
|
|
276
|
+
count: records.length,
|
|
277
|
+
success: true,
|
|
278
|
+
};
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/**
|
|
283
|
+
* Search for similar vectors
|
|
284
|
+
* @param {Array<number>} vector - Query vector (384 dimensions)
|
|
285
|
+
* @param {Object} options - Search options
|
|
286
|
+
* @returns {Promise<Array<Object>>} Array of search results with scores
|
|
287
|
+
* @throws {QueryError} If search fails
|
|
288
|
+
*/
|
|
289
|
+
async search(
|
|
290
|
+
vector: number[],
|
|
291
|
+
options: SearchOptions = {},
|
|
292
|
+
): Promise<SearchResult[]> {
|
|
293
|
+
if (!this.isConnected) {
|
|
294
|
+
await this.connect();
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
this._validateVector(vector);
|
|
298
|
+
|
|
299
|
+
const { limit = 10, nprobes = 20, filter = null } = options;
|
|
300
|
+
|
|
301
|
+
return this._retryOperation(async () => {
|
|
302
|
+
if (!this.table) {
|
|
303
|
+
throw new StorageError("Table not initialized");
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// Build the search query with all applicable options
|
|
307
|
+
let query: any = this.table.search(vector);
|
|
308
|
+
|
|
309
|
+
// Apply nprobes for IVF index (if supported)
|
|
310
|
+
if (nprobes && typeof nprobes === "number") {
|
|
311
|
+
try {
|
|
312
|
+
query = query.nprobes(nprobes);
|
|
313
|
+
} catch (_e) {
|
|
314
|
+
// ignore
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// Apply filter if provided
|
|
319
|
+
if (filter) {
|
|
320
|
+
query = query.where(filter);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// Execute search with limit
|
|
324
|
+
const resultsArray = await query.limit(limit).toArray();
|
|
325
|
+
|
|
326
|
+
return resultsArray.map((row: any) => ({
|
|
327
|
+
id: row.id,
|
|
328
|
+
content: row.content,
|
|
329
|
+
metadata: row.metadata ? JSON.parse(row.metadata) : null,
|
|
330
|
+
// _distance is internal LanceDB property
|
|
331
|
+
score: row._distance,
|
|
332
|
+
created_at: row.created_at,
|
|
333
|
+
vector: row.vector, // Include vector if returned
|
|
334
|
+
}));
|
|
335
|
+
});
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
/**
|
|
339
|
+
* Get a record by ID
|
|
340
|
+
* @param {string} id - Record ID
|
|
341
|
+
* @returns {Promise<Object|null>} Record object or null if not found
|
|
342
|
+
* @throws {QueryError} If query fails
|
|
343
|
+
*/
|
|
344
|
+
async getById(id: string): Promise<MemoryEntry | null> {
|
|
345
|
+
if (!this.isConnected) {
|
|
346
|
+
await this.connect();
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
return this._retryOperation(async () => {
|
|
350
|
+
if (!this.table) {
|
|
351
|
+
throw new StorageError("Table not initialized");
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
// Use a simple filter query instead of search
|
|
355
|
+
const resultsArray = await this.table
|
|
356
|
+
.query()
|
|
357
|
+
.where(`id == '${this._sanitizeId(id)}'`)
|
|
358
|
+
.toArray();
|
|
359
|
+
|
|
360
|
+
if (resultsArray.length === 0) {
|
|
361
|
+
return null;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
const record = resultsArray[0];
|
|
365
|
+
return {
|
|
366
|
+
id: record.id as string,
|
|
367
|
+
vector: record.vector as number[],
|
|
368
|
+
content: record.content as string,
|
|
369
|
+
metadata: record.metadata
|
|
370
|
+
? JSON.parse(record.metadata as string)
|
|
371
|
+
: null,
|
|
372
|
+
created_at: record.created_at,
|
|
373
|
+
updated_at: record.updated_at,
|
|
374
|
+
};
|
|
375
|
+
});
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
/**
|
|
379
|
+
* Get all records from the database
|
|
380
|
+
* @param {Object} options - Options
|
|
381
|
+
* @returns {Promise<Array<Object>>} Array of all records
|
|
382
|
+
*/
|
|
383
|
+
async getAll(options: { limit?: number } = {}): Promise<MemoryEntry[]> {
|
|
384
|
+
if (!this.isConnected) {
|
|
385
|
+
await this.connect();
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
return this._retryOperation(async () => {
|
|
389
|
+
if (!this.table) {
|
|
390
|
+
throw new StorageError("Table not initialized");
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
let query = this.table.query();
|
|
394
|
+
|
|
395
|
+
if (options.limit) {
|
|
396
|
+
query = query.limit(options.limit);
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
const resultsArray = await query.toArray();
|
|
400
|
+
|
|
401
|
+
return resultsArray.map((row: any) => ({
|
|
402
|
+
id: row.id,
|
|
403
|
+
content: row.content,
|
|
404
|
+
metadata: row.metadata ? JSON.parse(row.metadata) : null,
|
|
405
|
+
vector: row.vector,
|
|
406
|
+
created_at: row.created_at,
|
|
407
|
+
updated_at: row.updated_at,
|
|
408
|
+
}));
|
|
409
|
+
});
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
/**
|
|
413
|
+
* Delete a record by ID
|
|
414
|
+
* @param {string} id - Record ID to delete
|
|
415
|
+
* @returns {Promise<Object>} Result with success status
|
|
416
|
+
* @throws {StorageError} If delete fails
|
|
417
|
+
*/
|
|
418
|
+
async delete(id: string): Promise<{ id: string; success: boolean }> {
|
|
419
|
+
if (!this.isConnected) {
|
|
420
|
+
await this.connect();
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
return this._retryOperation(async () => {
|
|
424
|
+
if (!this.table) {
|
|
425
|
+
throw new StorageError("Table not initialized");
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
await this.table.delete(`id == '${this._sanitizeId(id)}'`);
|
|
429
|
+
|
|
430
|
+
return {
|
|
431
|
+
id,
|
|
432
|
+
success: true,
|
|
433
|
+
};
|
|
434
|
+
});
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
/**
|
|
438
|
+
* Update an existing record
|
|
439
|
+
* @param {string} id - Record ID to update
|
|
440
|
+
* @param {Object} data - Updated data fields
|
|
441
|
+
* @returns {Promise<Object>} Result with success status
|
|
442
|
+
* @throws {StorageError} If update fails
|
|
443
|
+
*/
|
|
444
|
+
async update(
|
|
445
|
+
id: string,
|
|
446
|
+
data: Partial<MemoryEntry>,
|
|
447
|
+
): Promise<{ id: string; success: boolean }> {
|
|
448
|
+
if (!this.isConnected) {
|
|
449
|
+
await this.connect();
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
return this._retryOperation(async () => {
|
|
453
|
+
const updateData = {
|
|
454
|
+
...data,
|
|
455
|
+
updated_at: new Date(),
|
|
456
|
+
};
|
|
457
|
+
|
|
458
|
+
if (!this.table) {
|
|
459
|
+
throw new StorageError("Table not initialized");
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// Update API expects filter and values separately
|
|
463
|
+
await this.table.update({
|
|
464
|
+
where: `id == '${this._sanitizeId(id)}'`,
|
|
465
|
+
values: updateData,
|
|
466
|
+
} as any);
|
|
467
|
+
|
|
468
|
+
return {
|
|
469
|
+
id,
|
|
470
|
+
success: true,
|
|
471
|
+
};
|
|
472
|
+
});
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
/**
|
|
476
|
+
* Get database statistics
|
|
477
|
+
* @returns {Promise<Object>} Statistics including count, size, etc.
|
|
478
|
+
* @throws {QueryError} If stats query fails
|
|
479
|
+
*/
|
|
480
|
+
async getStats(): Promise<Stats> {
|
|
481
|
+
if (!this.isConnected) {
|
|
482
|
+
await this.connect();
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
return this._retryOperation(async () => {
|
|
486
|
+
if (!this.table) {
|
|
487
|
+
throw new StorageError("Table not initialized");
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
let count = 0;
|
|
491
|
+
try {
|
|
492
|
+
if (typeof (this.table as any).count === "function") {
|
|
493
|
+
count = await (this.table as any).count();
|
|
494
|
+
} else {
|
|
495
|
+
// Fallback: use a limited query to avoid loading all records
|
|
496
|
+
const countResults = await (this.table as any).query().execute();
|
|
497
|
+
for await (const batch of countResults) {
|
|
498
|
+
count += batch.numRows;
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
} catch (_countError) {
|
|
502
|
+
count = -1;
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
return {
|
|
506
|
+
tableName: this.tableName,
|
|
507
|
+
uri: this.uri,
|
|
508
|
+
count: count,
|
|
509
|
+
isConnected: this.isConnected,
|
|
510
|
+
};
|
|
511
|
+
});
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
/**
|
|
515
|
+
* Sanitize an ID to prevent SQL injection
|
|
516
|
+
* Removes any characters that aren't alphanumeric, underscore, or hyphen
|
|
517
|
+
* @private
|
|
518
|
+
*/
|
|
519
|
+
_sanitizeId(id: string): string {
|
|
520
|
+
// Remove any characters that aren't alphanumeric, underscore, or hyphen
|
|
521
|
+
// This prevents SQL injection via raw string interpolation in queries
|
|
522
|
+
return id.replace(/[^a-zA-Z0-9_-]/g, "");
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
/**
|
|
526
|
+
* Validate a record object
|
|
527
|
+
* @private
|
|
528
|
+
*/
|
|
529
|
+
_validateRecord(record: any): void {
|
|
530
|
+
if (!record || typeof record !== "object") {
|
|
531
|
+
throw new StorageError("Record must be an object");
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
if (!record.id) {
|
|
535
|
+
throw new StorageError("Record must have an id field");
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
if (!record.content) {
|
|
539
|
+
throw new StorageError("Record must have a content field");
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
if (!record.vector) {
|
|
543
|
+
throw new StorageError("Record must have a vector field");
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
this._validateVector(record.vector);
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
/**
|
|
550
|
+
* Validate a vector array
|
|
551
|
+
* @private
|
|
552
|
+
*/
|
|
553
|
+
_validateVector(vector: any): void {
|
|
554
|
+
if (!Array.isArray(vector)) {
|
|
555
|
+
throw new QueryError("Vector must be an array");
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
// Expected dimension for all-MiniLM-L6-v2 model
|
|
559
|
+
// This should ideally match this.vectorDimension
|
|
560
|
+
// But keeping as is to match original logic or update to use this.vectorDimension
|
|
561
|
+
const expectedDim = this.vectorDimension || 384;
|
|
562
|
+
|
|
563
|
+
if (vector.length !== expectedDim) {
|
|
564
|
+
// Loose validation for now as different models have different dims
|
|
565
|
+
// throw new QueryError(`Vector must have ${expectedDim} dimensions, got ${vector.length}`);
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
// Validate all elements are numbers
|
|
569
|
+
for (let i = 0; i < vector.length; i++) {
|
|
570
|
+
if (typeof vector[i] !== "number" || isNaN(vector[i])) {
|
|
571
|
+
throw new QueryError(`Vector element ${i} is not a valid number`);
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
/**
|
|
577
|
+
* Sleep for a specified duration
|
|
578
|
+
* @private
|
|
579
|
+
*/
|
|
580
|
+
_sleep(ms: number): Promise<void> {
|
|
581
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
/**
|
|
585
|
+
* Check if an error is retryable (transient network/connection issues)
|
|
586
|
+
* @private
|
|
587
|
+
*/
|
|
588
|
+
_isRetryableError(error: any): boolean {
|
|
589
|
+
if (!error || !error.message) {
|
|
590
|
+
return false;
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
const message = error.message.toLowerCase();
|
|
594
|
+
|
|
595
|
+
// Network-related errors
|
|
596
|
+
const retryablePatterns = [
|
|
597
|
+
"econnreset", // Connection reset by peer
|
|
598
|
+
"etimedout", // Operation timed out
|
|
599
|
+
"enotfound", // DNS resolution failed
|
|
600
|
+
"econnrefused", // Connection refused
|
|
601
|
+
"enetunreach", // Network unreachable
|
|
602
|
+
"ehostunreach", // Host unreachable
|
|
603
|
+
"socket hang up", // Socket closed unexpectedly
|
|
604
|
+
"network error", // Generic network error
|
|
605
|
+
"failed to fetch", // Fetch/network failure
|
|
606
|
+
"timeout", // Timeout occurred
|
|
607
|
+
];
|
|
608
|
+
|
|
609
|
+
// Check for network patterns
|
|
610
|
+
const hasNetworkPattern = retryablePatterns.some((pattern) =>
|
|
611
|
+
message.includes(pattern),
|
|
612
|
+
);
|
|
613
|
+
|
|
614
|
+
// Check for 5xx HTTP errors (server-side errors that may be transient)
|
|
615
|
+
const hasServerError = /5\d{2}/.test(message);
|
|
616
|
+
|
|
617
|
+
// Check for specific LanceDB/lancedb errors that may be transient
|
|
618
|
+
const lancedbRetryable = [
|
|
619
|
+
"connection",
|
|
620
|
+
"database closed",
|
|
621
|
+
"table not found",
|
|
622
|
+
"lock",
|
|
623
|
+
"busy",
|
|
624
|
+
"temporary",
|
|
625
|
+
].some((pattern) => message.includes(pattern));
|
|
626
|
+
|
|
627
|
+
return hasNetworkPattern || hasServerError || lancedbRetryable;
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
/**
|
|
631
|
+
* Retry an operation with exponential backoff
|
|
632
|
+
* @private
|
|
633
|
+
*/
|
|
634
|
+
async _retryOperation<T>(
|
|
635
|
+
operation: () => Promise<T>,
|
|
636
|
+
maxRetries?: number,
|
|
637
|
+
baseDelay?: number,
|
|
638
|
+
): Promise<T> {
|
|
639
|
+
const max = maxRetries ?? this.maxRetries;
|
|
640
|
+
const delay = baseDelay ?? this.retryDelay;
|
|
641
|
+
let lastError: any = null;
|
|
642
|
+
|
|
643
|
+
for (let attempt = 1; attempt <= max; attempt++) {
|
|
644
|
+
try {
|
|
645
|
+
return await operation();
|
|
646
|
+
} catch (error) {
|
|
647
|
+
lastError = error;
|
|
648
|
+
|
|
649
|
+
if (!this._isRetryableError(error)) {
|
|
650
|
+
throw error;
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
if (attempt === max) {
|
|
654
|
+
throw error;
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
const backoffMs = delay * Math.pow(2, attempt - 1);
|
|
658
|
+
const jitterMs = backoffMs * Math.random() * 0.25;
|
|
659
|
+
|
|
660
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
661
|
+
logger.debug(
|
|
662
|
+
{
|
|
663
|
+
attempt,
|
|
664
|
+
max,
|
|
665
|
+
message,
|
|
666
|
+
retryDelayMs: Math.round(backoffMs + jitterMs),
|
|
667
|
+
},
|
|
668
|
+
"Retryable error, retrying",
|
|
669
|
+
);
|
|
670
|
+
|
|
671
|
+
await this._sleep(backoffMs + jitterMs);
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
throw lastError;
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
export default LanceDBClient;
|