memory-lancedb-pro 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/store.ts ADDED
@@ -0,0 +1,567 @@
1
+ /**
2
+ * LanceDB Storage Layer with Multi-Scope Support
3
+ */
4
+
5
+ import type * as LanceDB from "@lancedb/lancedb";
6
+ import { randomUUID } from "node:crypto";
7
+
8
+ // ============================================================================
9
+ // Types
10
+ // ============================================================================
11
+
12
+ export interface MemoryEntry {
13
+ id: string;
14
+ text: string;
15
+ vector: number[];
16
+ category: "preference" | "fact" | "decision" | "entity" | "other";
17
+ scope: string;
18
+ importance: number;
19
+ timestamp: number;
20
+ metadata?: string; // JSON string for extensible metadata
21
+ }
22
+
23
+ export interface MemorySearchResult {
24
+ entry: MemoryEntry;
25
+ score: number;
26
+ }
27
+
28
+ export interface StoreConfig {
29
+ dbPath: string;
30
+ vectorDim: number;
31
+ }
32
+
33
+ // ============================================================================
34
+ // LanceDB Dynamic Import
35
+ // ============================================================================
36
+
37
+ let lancedbImportPromise: Promise<typeof import("@lancedb/lancedb")> | null = null;
38
+
39
+ export const loadLanceDB = async (): Promise<typeof import("@lancedb/lancedb")> => {
40
+ if (!lancedbImportPromise) {
41
+ lancedbImportPromise = import("@lancedb/lancedb");
42
+ }
43
+ try {
44
+ return await lancedbImportPromise;
45
+ } catch (err) {
46
+ throw new Error(`memory-lancedb-pro: failed to load LanceDB. ${String(err)}`, { cause: err });
47
+ }
48
+ };
49
+
50
+ // ============================================================================
51
+ // Utility Functions
52
+ // ============================================================================
53
+
54
+ function clampInt(value: number, min: number, max: number): number {
55
+ if (!Number.isFinite(value)) return min;
56
+ return Math.min(max, Math.max(min, Math.floor(value)));
57
+ }
58
+
59
+ function escapeSqlLiteral(value: string): string {
60
+ return value.replace(/'/g, "''");
61
+ }
62
+
63
+ // ============================================================================
64
+ // Memory Store
65
+ // ============================================================================
66
+
67
+ const TABLE_NAME = "memories";
68
+
69
+ export class MemoryStore {
70
+ private db: LanceDB.Connection | null = null;
71
+ private table: LanceDB.Table | null = null;
72
+ private initPromise: Promise<void> | null = null;
73
+ private ftsIndexCreated = false;
74
+
75
+ constructor(private readonly config: StoreConfig) {}
76
+
77
+ get dbPath(): string {
78
+ return this.config.dbPath;
79
+ }
80
+
81
+ private async ensureInitialized(): Promise<void> {
82
+ if (this.table) {
83
+ return;
84
+ }
85
+ if (this.initPromise) {
86
+ return this.initPromise;
87
+ }
88
+
89
+ this.initPromise = this.doInitialize().catch((err) => {
90
+ this.initPromise = null;
91
+ throw err;
92
+ });
93
+ return this.initPromise;
94
+ }
95
+
96
+ private async doInitialize(): Promise<void> {
97
+ const lancedb = await loadLanceDB();
98
+ const db = await lancedb.connect(this.config.dbPath);
99
+ let table: LanceDB.Table;
100
+
101
+ // Idempotent table init: try openTable first, create only if missing,
102
+ // and handle the race where tableNames() misses an existing table but
103
+ // createTable then sees it (LanceDB eventual consistency).
104
+ try {
105
+ table = await db.openTable(TABLE_NAME);
106
+
107
+ // Check if we need to add scope column for backward compatibility
108
+ try {
109
+ const sample = await table.query().limit(1).toArray();
110
+ if (sample.length > 0 && !("scope" in sample[0])) {
111
+ console.warn("Adding scope column for backward compatibility with existing data");
112
+ }
113
+ } catch (err) {
114
+ console.warn("Could not check table schema:", err);
115
+ }
116
+ } catch (_openErr) {
117
+ // Table doesn't exist yet — create it
118
+ const schemaEntry: MemoryEntry = {
119
+ id: "__schema__",
120
+ text: "",
121
+ vector: Array.from({ length: this.config.vectorDim }).fill(0) as number[],
122
+ category: "other",
123
+ scope: "global",
124
+ importance: 0,
125
+ timestamp: 0,
126
+ metadata: "{}",
127
+ };
128
+
129
+ try {
130
+ table = await db.createTable(TABLE_NAME, [schemaEntry]);
131
+ await table.delete('id = "__schema__"');
132
+ } catch (createErr) {
133
+ // Race: another caller (or eventual consistency) created the table
134
+ // between our failed openTable and this createTable — just open it.
135
+ if (String(createErr).includes("already exists")) {
136
+ table = await db.openTable(TABLE_NAME);
137
+ } else {
138
+ throw createErr;
139
+ }
140
+ }
141
+ }
142
+
143
+ // Validate vector dimensions
144
+ // Note: LanceDB returns Arrow Vector objects, not plain JS arrays.
145
+ // Array.isArray() returns false for Arrow Vectors, so use .length instead.
146
+ const sample = await table.query().limit(1).toArray();
147
+ if (sample.length > 0 && sample[0]?.vector?.length) {
148
+ const existingDim = sample[0].vector.length;
149
+ if (existingDim !== this.config.vectorDim) {
150
+ throw new Error(
151
+ `Vector dimension mismatch: table=${existingDim}, config=${this.config.vectorDim}. Create a new table/dbPath or set matching embedding.dimensions.`
152
+ );
153
+ }
154
+ }
155
+
156
+ // Create FTS index for BM25 search (graceful fallback if unavailable)
157
+ try {
158
+ await this.createFtsIndex(table);
159
+ this.ftsIndexCreated = true;
160
+ } catch (err) {
161
+ console.warn("Failed to create FTS index, falling back to vector-only search:", err);
162
+ this.ftsIndexCreated = false;
163
+ }
164
+
165
+ this.db = db;
166
+ this.table = table;
167
+ }
168
+
169
+ private async createFtsIndex(table: LanceDB.Table): Promise<void> {
170
+ try {
171
+ // Check if FTS index already exists
172
+ const indices = await table.listIndices();
173
+ const hasFtsIndex = indices?.some((idx: any) =>
174
+ idx.indexType === "FTS" || idx.columns?.includes("text")
175
+ );
176
+
177
+ if (!hasFtsIndex) {
178
+ // LanceDB @lancedb/lancedb >=0.26: use Index.fts() config
179
+ const lancedb = await loadLanceDB();
180
+ await table.createIndex("text", {
181
+ config: (lancedb as any).Index.fts(),
182
+ });
183
+ }
184
+ } catch (err) {
185
+ throw new Error(`FTS index creation failed: ${err instanceof Error ? err.message : String(err)}`);
186
+ }
187
+ }
188
+
189
+ async store(entry: Omit<MemoryEntry, "id" | "timestamp">): Promise<MemoryEntry> {
190
+ await this.ensureInitialized();
191
+
192
+ const fullEntry: MemoryEntry = {
193
+ ...entry,
194
+ id: randomUUID(),
195
+ timestamp: Date.now(),
196
+ metadata: entry.metadata || "{}",
197
+ };
198
+
199
+ await this.table!.add([fullEntry]);
200
+ return fullEntry;
201
+ }
202
+
203
+ /**
204
+ * Import a pre-built entry while preserving its id/timestamp.
205
+ * Used for re-embedding / migration / A/B testing across embedding models.
206
+ * Intentionally separate from `store()` to keep normal writes simple.
207
+ */
208
+ async importEntry(entry: MemoryEntry): Promise<MemoryEntry> {
209
+ await this.ensureInitialized();
210
+
211
+ if (!entry.id || typeof entry.id !== "string") {
212
+ throw new Error("importEntry requires a stable id");
213
+ }
214
+
215
+ const vector = entry.vector || [];
216
+ if (!Array.isArray(vector) || vector.length !== this.config.vectorDim) {
217
+ throw new Error(
218
+ `Vector dimension mismatch: expected ${this.config.vectorDim}, got ${Array.isArray(vector) ? vector.length : 'non-array'}`
219
+ );
220
+ }
221
+
222
+ const full: MemoryEntry = {
223
+ ...entry,
224
+ scope: entry.scope || "global",
225
+ importance: Number.isFinite(entry.importance) ? entry.importance : 0.7,
226
+ timestamp: Number.isFinite(entry.timestamp) ? entry.timestamp : Date.now(),
227
+ metadata: entry.metadata || "{}",
228
+ };
229
+
230
+ await this.table!.add([full]);
231
+ return full;
232
+ }
233
+
234
+ async hasId(id: string): Promise<boolean> {
235
+ await this.ensureInitialized();
236
+ const safeId = escapeSqlLiteral(id);
237
+ const res = await this.table!.query().select(["id"]).where(`id = '${safeId}'`).limit(1).toArray();
238
+ return res.length > 0;
239
+ }
240
+
241
+ async vectorSearch(vector: number[], limit = 5, minScore = 0.3, scopeFilter?: string[]): Promise<MemorySearchResult[]> {
242
+ await this.ensureInitialized();
243
+
244
+ const safeLimit = clampInt(limit, 1, 20);
245
+ const fetchLimit = Math.min(safeLimit * 10, 200); // Over-fetch for scope filtering
246
+
247
+ let query = this.table!.vectorSearch(vector).limit(fetchLimit);
248
+
249
+ // Apply scope filter if provided
250
+ if (scopeFilter && scopeFilter.length > 0) {
251
+ const scopeConditions = scopeFilter
252
+ .map(scope => `scope = '${escapeSqlLiteral(scope)}'`)
253
+ .join(" OR ");
254
+ query = query.where(`(${scopeConditions}) OR scope IS NULL`); // NULL for backward compatibility
255
+ }
256
+
257
+ const results = await query.toArray();
258
+ const mapped: MemorySearchResult[] = [];
259
+
260
+ for (const row of results) {
261
+ const distance = row._distance ?? 0;
262
+ const score = 1 / (1 + distance);
263
+
264
+ if (score < minScore) continue;
265
+
266
+ const rowScope = (row.scope as string | undefined) ?? "global";
267
+
268
+ // Double-check scope filter in application layer
269
+ if (scopeFilter && scopeFilter.length > 0 && !scopeFilter.includes(rowScope)) {
270
+ continue;
271
+ }
272
+
273
+ mapped.push({
274
+ entry: {
275
+ id: row.id as string,
276
+ text: row.text as string,
277
+ vector: row.vector as number[],
278
+ category: row.category as MemoryEntry["category"],
279
+ scope: rowScope,
280
+ importance: row.importance as number,
281
+ timestamp: row.timestamp as number,
282
+ metadata: (row.metadata as string) || "{}",
283
+ },
284
+ score,
285
+ });
286
+
287
+ if (mapped.length >= safeLimit) break;
288
+ }
289
+
290
+ return mapped;
291
+ }
292
+
293
+ async bm25Search(query: string, limit = 5, scopeFilter?: string[]): Promise<MemorySearchResult[]> {
294
+ await this.ensureInitialized();
295
+
296
+ if (!this.ftsIndexCreated) {
297
+ return []; // Fallback to vector-only if FTS unavailable
298
+ }
299
+
300
+ const safeLimit = clampInt(limit, 1, 20);
301
+
302
+ try {
303
+ // Use FTS query type explicitly
304
+ let searchQuery = this.table!.search(query, "fts").limit(safeLimit);
305
+
306
+ // Apply scope filter if provided
307
+ if (scopeFilter && scopeFilter.length > 0) {
308
+ const scopeConditions = scopeFilter
309
+ .map(scope => `scope = '${escapeSqlLiteral(scope)}'`)
310
+ .join(" OR ");
311
+ searchQuery = searchQuery.where(`(${scopeConditions}) OR scope IS NULL`);
312
+ }
313
+
314
+ const results = await searchQuery.toArray();
315
+ const mapped: MemorySearchResult[] = [];
316
+
317
+ for (const row of results) {
318
+ const rowScope = (row.scope as string | undefined) ?? "global";
319
+
320
+ // Double-check scope filter in application layer
321
+ if (scopeFilter && scopeFilter.length > 0 && !scopeFilter.includes(rowScope)) {
322
+ continue;
323
+ }
324
+
325
+ // LanceDB FTS _score is raw BM25 (unbounded). Normalize with sigmoid.
326
+ const rawScore = typeof row._score === "number" ? row._score : 0;
327
+ const normalizedScore = rawScore > 0 ? 1 / (1 + Math.exp(-rawScore / 5)) : 0.5;
328
+
329
+ mapped.push({
330
+ entry: {
331
+ id: row.id as string,
332
+ text: row.text as string,
333
+ vector: row.vector as number[],
334
+ category: row.category as MemoryEntry["category"],
335
+ scope: rowScope,
336
+ importance: row.importance as number,
337
+ timestamp: row.timestamp as number,
338
+ metadata: (row.metadata as string) || "{}",
339
+ },
340
+ score: normalizedScore,
341
+ });
342
+ }
343
+
344
+ return mapped;
345
+ } catch (err) {
346
+ console.warn("BM25 search failed, falling back to empty results:", err);
347
+ return [];
348
+ }
349
+ }
350
+
351
+ async delete(id: string, scopeFilter?: string[]): Promise<boolean> {
352
+ await this.ensureInitialized();
353
+
354
+ // Support both full UUID and short prefix (8+ hex chars)
355
+ const uuidRegex = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
356
+ const prefixRegex = /^[0-9a-f]{8,}$/i;
357
+ const isFullId = uuidRegex.test(id);
358
+ const isPrefix = !isFullId && prefixRegex.test(id);
359
+
360
+ if (!isFullId && !isPrefix) {
361
+ throw new Error(`Invalid memory ID format: ${id}`);
362
+ }
363
+
364
+ let candidates: any[];
365
+ if (isFullId) {
366
+ candidates = await this.table!.query().where(`id = '${id}'`).limit(1).toArray();
367
+ } else {
368
+ // Prefix match: fetch candidates and filter in app layer
369
+ const all = await this.table!.query().select(["id", "scope"]).limit(1000).toArray();
370
+ candidates = all.filter((r: any) => (r.id as string).startsWith(id));
371
+ if (candidates.length > 1) {
372
+ throw new Error(`Ambiguous prefix "${id}" matches ${candidates.length} memories. Use a longer prefix or full ID.`);
373
+ }
374
+ }
375
+ if (candidates.length === 0) {
376
+ return false;
377
+ }
378
+
379
+ const resolvedId = candidates[0].id as string;
380
+ const rowScope = (candidates[0].scope as string | undefined) ?? "global";
381
+
382
+ // Check scope permissions
383
+ if (scopeFilter && scopeFilter.length > 0 && !scopeFilter.includes(rowScope)) {
384
+ throw new Error(`Memory ${resolvedId} is outside accessible scopes`);
385
+ }
386
+
387
+ await this.table!.delete(`id = '${resolvedId}'`);
388
+ return true;
389
+ }
390
+
391
+ async list(scopeFilter?: string[], category?: string, limit = 20, offset = 0): Promise<MemoryEntry[]> {
392
+ await this.ensureInitialized();
393
+
394
+ let query = this.table!.query();
395
+
396
+ // Build where conditions
397
+ const conditions: string[] = [];
398
+
399
+ if (scopeFilter && scopeFilter.length > 0) {
400
+ const scopeConditions = scopeFilter
401
+ .map(scope => `scope = '${escapeSqlLiteral(scope)}'`)
402
+ .join(" OR ");
403
+ conditions.push(`((${scopeConditions}) OR scope IS NULL)`);
404
+ }
405
+
406
+ if (category) {
407
+ conditions.push(`category = '${escapeSqlLiteral(category)}'`);
408
+ }
409
+
410
+ if (conditions.length > 0) {
411
+ query = query.where(conditions.join(" AND "));
412
+ }
413
+
414
+ // Fetch all matching rows (no pre-limit) so app-layer sort is correct across full dataset
415
+ const results = await query
416
+ .select(["id", "text", "category", "scope", "importance", "timestamp", "metadata"])
417
+ .toArray();
418
+
419
+ return results
420
+ .map((row): MemoryEntry => ({
421
+ id: row.id as string,
422
+ text: row.text as string,
423
+ vector: [], // Don't include vectors in list results for performance
424
+ category: row.category as MemoryEntry["category"],
425
+ scope: (row.scope as string | undefined) ?? "global",
426
+ importance: row.importance as number,
427
+ timestamp: row.timestamp as number,
428
+ metadata: (row.metadata as string) || "{}",
429
+ }))
430
+ .sort((a, b) => (b.timestamp || 0) - (a.timestamp || 0))
431
+ .slice(offset, offset + limit);
432
+ }
433
+
434
+ async stats(scopeFilter?: string[]): Promise<{
435
+ totalCount: number;
436
+ scopeCounts: Record<string, number>;
437
+ categoryCounts: Record<string, number>
438
+ }> {
439
+ await this.ensureInitialized();
440
+
441
+ let query = this.table!.query();
442
+
443
+ if (scopeFilter && scopeFilter.length > 0) {
444
+ const scopeConditions = scopeFilter
445
+ .map(scope => `scope = '${escapeSqlLiteral(scope)}'`)
446
+ .join(" OR ");
447
+ query = query.where(`((${scopeConditions}) OR scope IS NULL)`);
448
+ }
449
+
450
+ const results = await query.select(["scope", "category"]).toArray();
451
+
452
+ const scopeCounts: Record<string, number> = {};
453
+ const categoryCounts: Record<string, number> = {};
454
+
455
+ for (const row of results) {
456
+ const scope = (row.scope as string | undefined) ?? "global";
457
+ const category = row.category as string;
458
+
459
+ scopeCounts[scope] = (scopeCounts[scope] || 0) + 1;
460
+ categoryCounts[category] = (categoryCounts[category] || 0) + 1;
461
+ }
462
+
463
+ return {
464
+ totalCount: results.length,
465
+ scopeCounts,
466
+ categoryCounts,
467
+ };
468
+ }
469
+
470
+ async update(
471
+ id: string,
472
+ updates: { text?: string; vector?: number[]; importance?: number; category?: MemoryEntry["category"]; metadata?: string },
473
+ scopeFilter?: string[]
474
+ ): Promise<MemoryEntry | null> {
475
+ await this.ensureInitialized();
476
+
477
+ // Support both full UUID and short prefix (8+ hex chars), same as delete()
478
+ const uuidRegex = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
479
+ const prefixRegex = /^[0-9a-f]{8,}$/i;
480
+ const isFullId = uuidRegex.test(id);
481
+ const isPrefix = !isFullId && prefixRegex.test(id);
482
+
483
+ if (!isFullId && !isPrefix) {
484
+ throw new Error(`Invalid memory ID format: ${id}`);
485
+ }
486
+
487
+ let rows: any[];
488
+ if (isFullId) {
489
+ const safeId = escapeSqlLiteral(id);
490
+ rows = await this.table!.query().where(`id = '${safeId}'`).limit(1).toArray();
491
+ } else {
492
+ // Prefix match
493
+ const all = await this.table!.query().select(["id", "text", "vector", "category", "scope", "importance", "timestamp", "metadata"]).limit(1000).toArray();
494
+ rows = all.filter((r: any) => (r.id as string).startsWith(id));
495
+ if (rows.length > 1) {
496
+ throw new Error(`Ambiguous prefix "${id}" matches ${rows.length} memories. Use a longer prefix or full ID.`);
497
+ }
498
+ }
499
+
500
+ if (rows.length === 0) return null;
501
+
502
+ const row = rows[0];
503
+ const rowScope = (row.scope as string | undefined) ?? "global";
504
+
505
+ // Check scope permissions
506
+ if (scopeFilter && scopeFilter.length > 0 && !scopeFilter.includes(rowScope)) {
507
+ throw new Error(`Memory ${id} is outside accessible scopes`);
508
+ }
509
+
510
+ // Build updated entry, preserving original timestamp
511
+ const updated: MemoryEntry = {
512
+ id: row.id as string,
513
+ text: updates.text ?? (row.text as string),
514
+ vector: updates.vector ?? (Array.from(row.vector as Iterable<number>)),
515
+ category: updates.category ?? (row.category as MemoryEntry["category"]),
516
+ scope: rowScope,
517
+ importance: updates.importance ?? (row.importance as number),
518
+ timestamp: row.timestamp as number, // preserve original
519
+ metadata: updates.metadata ?? ((row.metadata as string) || "{}"),
520
+ };
521
+
522
+ // LanceDB doesn't support in-place update; delete + re-add
523
+ const resolvedId = escapeSqlLiteral(row.id as string);
524
+ await this.table!.delete(`id = '${resolvedId}'`);
525
+ await this.table!.add([updated]);
526
+
527
+ return updated;
528
+ }
529
+
530
+ async bulkDelete(scopeFilter: string[], beforeTimestamp?: number): Promise<number> {
531
+ await this.ensureInitialized();
532
+
533
+ const conditions: string[] = [];
534
+
535
+ if (scopeFilter.length > 0) {
536
+ const scopeConditions = scopeFilter
537
+ .map(scope => `scope = '${escapeSqlLiteral(scope)}'`)
538
+ .join(" OR ");
539
+ conditions.push(`(${scopeConditions})`);
540
+ }
541
+
542
+ if (beforeTimestamp) {
543
+ conditions.push(`timestamp < ${beforeTimestamp}`);
544
+ }
545
+
546
+ if (conditions.length === 0) {
547
+ throw new Error("Bulk delete requires at least scope or timestamp filter for safety");
548
+ }
549
+
550
+ const whereClause = conditions.join(" AND ");
551
+
552
+ // Count first
553
+ const countResults = await this.table!.query().where(whereClause).toArray();
554
+ const deleteCount = countResults.length;
555
+
556
+ // Then delete
557
+ if (deleteCount > 0) {
558
+ await this.table!.delete(whereClause);
559
+ }
560
+
561
+ return deleteCount;
562
+ }
563
+
564
+ get hasFtsSupport(): boolean {
565
+ return this.ftsIndexCreated;
566
+ }
567
+ }