@pi-unipi/memory 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/storage.ts ADDED
@@ -0,0 +1,716 @@
1
+ /**
2
+ * @unipi/memory — Storage layer
3
+ *
4
+ * Two-tier storage: SQLite + sqlite-vec for vector search,
5
+ * markdown files for human-readable memory.
6
+ */
7
+
8
+ import Database from "better-sqlite3";
9
+ import * as sqliteVec from "sqlite-vec";
10
+ import * as yaml from "js-yaml";
11
+ import * as fs from "node:fs";
12
+ import * as path from "node:path";
13
+ import * as os from "node:os";
14
+ import { randomUUID } from "node:crypto";
15
+
16
+ /** Memory record interface */
17
+ export interface MemoryRecord {
18
+ id: string;
19
+ title: string;
20
+ content: string;
21
+ tags: string[];
22
+ project: string;
23
+ type: "preference" | "decision" | "pattern" | "summary";
24
+ created: string;
25
+ updated: string;
26
+ embedding?: Float32Array | null;
27
+ }
28
+
29
+ /** Search result with snippet */
30
+ export interface SearchResult {
31
+ record: MemoryRecord;
32
+ score: number;
33
+ snippet: string;
34
+ }
35
+
36
+ /** Memory file frontmatter */
37
+ interface MemoryFrontmatter {
38
+ title: string;
39
+ tags: string[];
40
+ project: string;
41
+ created: string;
42
+ updated: string;
43
+ type: string;
44
+ }
45
+
46
+ const MEMORY_DB_NAME = "memory.db";
47
+ const MEMORY_EMBEDDING_DIM = 384;
48
+
49
+ /**
50
+ * Get the base memory directory (~/.unipi/memory/)
51
+ */
52
+ export function getMemoryBaseDir(): string {
53
+ return path.join(os.homedir(), ".unipi", "memory");
54
+ }
55
+
56
+ /**
57
+ * Get the project memory directory
58
+ */
59
+ export function getProjectDir(projectName: string): string {
60
+ return path.join(getMemoryBaseDir(), projectName);
61
+ }
62
+
63
+ /**
64
+ * Get all project directories under memory base.
65
+ */
66
+ export function getAllProjectDirs(): Array<{ name: string; dir: string }> {
67
+ const base = getMemoryBaseDir();
68
+ if (!fs.existsSync(base)) return [];
69
+
70
+ return fs.readdirSync(base)
71
+ .filter(f => {
72
+ const fullPath = path.join(base, f);
73
+ return fs.statSync(fullPath).isDirectory();
74
+ })
75
+ .map(name => ({
76
+ name,
77
+ dir: path.join(base, name),
78
+ }));
79
+ }
80
+
81
+ /**
82
+ * Sanitize a path to create a project name.
83
+ * Replace non-alphanumeric chars with _, collapse repeats.
84
+ */
85
+ export function sanitizeProjectName(cwd: string): string {
86
+ return cwd.replace(/[^a-zA-Z0-9]/g, "_").replace(/_+/g, "_");
87
+ }
88
+
89
+ /**
90
+ * Get the project name from the current working directory.
91
+ * Uses the last meaningful directory segment.
92
+ */
93
+ export function getProjectName(cwd: string): string {
94
+ // Use the last directory name as the project name
95
+ const base = path.basename(cwd);
96
+ return sanitizeProjectName(base);
97
+ }
98
+
99
+ /**
100
+ * Parse a memory markdown file with YAML frontmatter.
101
+ */
102
+ export function parseMemoryFile(filePath: string): MemoryRecord | null {
103
+ try {
104
+ const content = fs.readFileSync(filePath, "utf-8");
105
+ return parseMemoryContent(content);
106
+ } catch {
107
+ return null;
108
+ }
109
+ }
110
+
111
+ /**
112
+ * Parse memory content (markdown with frontmatter).
113
+ */
114
+ export function parseMemoryContent(content: string): MemoryRecord | null {
115
+ const match = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
116
+ if (!match) return null;
117
+
118
+ const [, frontmatterStr, body] = match;
119
+ const frontmatter = yaml.load(frontmatterStr) as MemoryFrontmatter;
120
+
121
+ return {
122
+ id: "",
123
+ title: frontmatter.title,
124
+ content: body.trim(),
125
+ tags: frontmatter.tags || [],
126
+ project: frontmatter.project,
127
+ type: (frontmatter.type as MemoryRecord["type"]) || "summary",
128
+ created: frontmatter.created,
129
+ updated: frontmatter.updated,
130
+ };
131
+ }
132
+
133
+ /**
134
+ * Write a memory record to a markdown file.
135
+ */
136
+ export function writeMemoryFile(filePath: string, record: MemoryRecord): void {
137
+ const frontmatter: MemoryFrontmatter = {
138
+ title: record.title,
139
+ tags: record.tags,
140
+ project: record.project,
141
+ created: record.created,
142
+ updated: record.updated,
143
+ type: record.type,
144
+ };
145
+
146
+ const content = `---
147
+ ${yaml.dump(frontmatter, { lineWidth: -1 })}---
148
+
149
+ ${record.content}
150
+ `;
151
+
152
+ // Ensure directory exists
153
+ const dir = path.dirname(filePath);
154
+ if (!fs.existsSync(dir)) {
155
+ fs.mkdirSync(dir, { recursive: true });
156
+ }
157
+
158
+ fs.writeFileSync(filePath, content, "utf-8");
159
+ }
160
+
161
+ /**
162
+ * MemoryStorage class — manages SQLite + markdown storage for a single project.
163
+ */
164
+ export class MemoryStorage {
165
+ private db: Database.Database | null = null;
166
+ private projectName: string;
167
+ private scopeDir: string;
168
+
169
+ constructor(projectName: string) {
170
+ this.projectName = projectName;
171
+ this.scopeDir = getProjectDir(projectName);
172
+ }
173
+
174
+ /**
175
+ * Initialize the storage (create DB, tables, load extension).
176
+ */
177
+ init(): void {
178
+ // Ensure directory exists
179
+ if (!fs.existsSync(this.scopeDir)) {
180
+ fs.mkdirSync(this.scopeDir, { recursive: true });
181
+ }
182
+
183
+ const dbPath = path.join(this.scopeDir, MEMORY_DB_NAME);
184
+ this.db = new Database(dbPath);
185
+
186
+ // Enable WAL mode for concurrent reads
187
+ this.db.pragma("journal_mode = WAL");
188
+
189
+ // Load sqlite-vec extension
190
+ try {
191
+ sqliteVec.load(this.db);
192
+ } catch (err) {
193
+ console.warn("[unipi/memory] Failed to load sqlite-vec, fuzzy-only mode:", err);
194
+ }
195
+
196
+ // Create tables
197
+ this.db.exec(`
198
+ CREATE TABLE IF NOT EXISTS memories (
199
+ id TEXT PRIMARY KEY,
200
+ title TEXT NOT NULL,
201
+ content TEXT NOT NULL,
202
+ tags TEXT,
203
+ project TEXT,
204
+ type TEXT,
205
+ created TEXT,
206
+ updated TEXT,
207
+ embedding BLOB
208
+ )
209
+ `);
210
+
211
+ // Create vector table if sqlite-vec loaded
212
+ try {
213
+ this.db.exec(`
214
+ CREATE VIRTUAL TABLE IF NOT EXISTS memories_vec USING vec0(embedding float[${MEMORY_EMBEDDING_DIM}])
215
+ `);
216
+ } catch {
217
+ // vec0 table may already exist or sqlite-vec not loaded
218
+ }
219
+ }
220
+
221
+ /**
222
+ * Close the database connection.
223
+ */
224
+ close(): void {
225
+ if (this.db) {
226
+ this.db.close();
227
+ this.db = null;
228
+ }
229
+ }
230
+
231
+ /**
232
+ * Store or update a memory record.
233
+ */
234
+ store(record: MemoryRecord): void {
235
+ if (!this.db) throw new Error("Storage not initialized");
236
+
237
+ // Generate ID from title if not provided
238
+ if (!record.id) {
239
+ record.id = record.title.toLowerCase().replace(/[^a-z0-9]+/g, "_");
240
+ }
241
+
242
+ // Set timestamps
243
+ const now = new Date().toISOString();
244
+ if (!record.created) record.created = now;
245
+ record.updated = now;
246
+
247
+ // Set project if not provided
248
+ if (!record.project) record.project = this.projectName;
249
+
250
+ // Upsert into memories table
251
+ const stmt = this.db.prepare(`
252
+ INSERT OR REPLACE INTO memories (id, title, content, tags, project, type, created, updated, embedding)
253
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
254
+ `);
255
+
256
+ const tagsJson = JSON.stringify(record.tags);
257
+ const embeddingBuf = record.embedding ? Buffer.from(record.embedding.buffer) : null;
258
+
259
+ stmt.run(
260
+ record.id,
261
+ record.title,
262
+ record.content,
263
+ tagsJson,
264
+ record.project,
265
+ record.type,
266
+ record.created,
267
+ record.updated,
268
+ embeddingBuf
269
+ );
270
+
271
+ // Update vector table
272
+ if (record.embedding) {
273
+ try {
274
+ // Delete old vector if exists
275
+ this.db.prepare("DELETE FROM memories_vec WHERE rowid = ?").run(BigInt(this.idToRowid(record.id)));
276
+ } catch {
277
+ // Ignore if not found
278
+ }
279
+
280
+ try {
281
+ const vecStmt = this.db.prepare(
282
+ "INSERT INTO memories_vec(rowid, embedding) VALUES (?, ?)"
283
+ );
284
+ vecStmt.run(
285
+ BigInt(this.idToRowid(record.id)),
286
+ Buffer.from(record.embedding.buffer)
287
+ );
288
+ } catch (err) {
289
+ console.warn("[unipi/memory] Failed to insert vector:", err);
290
+ }
291
+ }
292
+
293
+ // Write markdown file
294
+ const mdPath = path.join(this.scopeDir, `${record.id}.md`);
295
+ writeMemoryFile(mdPath, record);
296
+ }
297
+
298
+ /**
299
+ * Get a memory record by ID.
300
+ */
301
+ getById(id: string): MemoryRecord | null {
302
+ if (!this.db) throw new Error("Storage not initialized");
303
+
304
+ const row = this.db.prepare("SELECT * FROM memories WHERE id = ?").get(id) as any;
305
+ if (!row) return null;
306
+
307
+ return {
308
+ id: row.id,
309
+ title: row.title,
310
+ content: row.content,
311
+ tags: JSON.parse(row.tags || "[]"),
312
+ project: row.project,
313
+ type: row.type,
314
+ created: row.created,
315
+ updated: row.updated,
316
+ embedding: row.embedding ? new Float32Array(row.embedding.buffer) : null,
317
+ };
318
+ }
319
+
320
+ /**
321
+ * Get a memory record by title (fuzzy match).
322
+ */
323
+ getByTitle(title: string): MemoryRecord | null {
324
+ if (!this.db) throw new Error("Storage not initialized");
325
+
326
+ // Try exact match first
327
+ const exact = this.db.prepare("SELECT * FROM memories WHERE title = ?").get(title) as any;
328
+ if (exact) {
329
+ return {
330
+ id: exact.id,
331
+ title: exact.title,
332
+ content: exact.content,
333
+ tags: JSON.parse(exact.tags || "[]"),
334
+ project: exact.project,
335
+ type: exact.type,
336
+ created: exact.created,
337
+ updated: exact.updated,
338
+ embedding: exact.embedding ? new Float32Array(exact.embedding.buffer) : null,
339
+ };
340
+ }
341
+
342
+ // Try case-insensitive match
343
+ const row = this.db.prepare("SELECT * FROM memories WHERE LOWER(title) = LOWER(?)").get(title) as any;
344
+ if (!row) return null;
345
+
346
+ return {
347
+ id: row.id,
348
+ title: row.title,
349
+ content: row.content,
350
+ tags: JSON.parse(row.tags || "[]"),
351
+ project: row.project,
352
+ type: row.type,
353
+ created: row.created,
354
+ updated: row.updated,
355
+ embedding: row.embedding ? new Float32Array(row.embedding.buffer) : null,
356
+ };
357
+ }
358
+
359
+ /**
360
+ * List all memories (titles only).
361
+ */
362
+ listAll(): Array<{ id: string; title: string; type: string }> {
363
+ if (!this.db) throw new Error("Storage not initialized");
364
+
365
+ const rows = this.db.prepare("SELECT id, title, type FROM memories ORDER BY updated DESC").all() as any[];
366
+ return rows.map((r) => ({ id: r.id, title: r.title, type: r.type }));
367
+ }
368
+
369
+ /**
370
+ * Delete a memory by ID.
371
+ */
372
+ delete(id: string): boolean {
373
+ if (!this.db) throw new Error("Storage not initialized");
374
+
375
+ // Delete from vector table
376
+ try {
377
+ this.db.prepare("DELETE FROM memories_vec WHERE rowid = ?").run(BigInt(this.idToRowid(id)));
378
+ } catch {
379
+ // Ignore
380
+ }
381
+
382
+ // Delete from memories table
383
+ const result = this.db.prepare("DELETE FROM memories WHERE id = ?").run(id);
384
+
385
+ // Delete markdown file
386
+ const mdPath = path.join(this.scopeDir, `${id}.md`);
387
+ try {
388
+ if (fs.existsSync(mdPath)) {
389
+ fs.unlinkSync(mdPath);
390
+ }
391
+ } catch {
392
+ // Ignore
393
+ }
394
+
395
+ return result.changes > 0;
396
+ }
397
+
398
+ /**
399
+ * Delete a memory by title.
400
+ */
401
+ deleteByTitle(title: string): boolean {
402
+ const record = this.getByTitle(title);
403
+ if (!record) return false;
404
+ return this.delete(record.id);
405
+ }
406
+
407
+ /**
408
+ * Search memories using hybrid approach.
409
+ */
410
+ search(query: string, limit = 10, embedding?: Float32Array | null): SearchResult[] {
411
+ if (!this.db) throw new Error("Storage not initialized");
412
+
413
+ const results: Map<string, SearchResult> = new Map();
414
+
415
+ // 1. Vector search (if embedding provided and vec table exists)
416
+ if (embedding) {
417
+ try {
418
+ const vecResults = this.db
419
+ .prepare(
420
+ `SELECT rowid, distance FROM memories_vec
421
+ WHERE embedding MATCH ?
422
+ ORDER BY distance
423
+ LIMIT ?`
424
+ )
425
+ .all(Buffer.from(embedding.buffer), limit * 2) as any[];
426
+
427
+ for (const vr of vecResults) {
428
+ const memoryId = this.rowidToId(Number(vr.rowid));
429
+ const record = this.getById(memoryId);
430
+ if (record) {
431
+ const score = 1 - Math.min(vr.distance, 1); // Normalize to 0-1
432
+ const snippet = this.extractSnippet(record.content, query);
433
+ results.set(record.id, { record, score, snippet });
434
+ }
435
+ }
436
+ } catch (err) {
437
+ // Vector search failed, continue with fuzzy
438
+ }
439
+ }
440
+
441
+ // 2. Fuzzy text search (split query into words)
442
+ const queryWords = query.toLowerCase().split(/\s+/).filter(w => w.length > 0);
443
+
444
+ // Build conditions: each word must match either title OR content
445
+ const wordConditions = queryWords.map(() =>
446
+ "(LOWER(title) LIKE LOWER(?) OR LOWER(content) LIKE LOWER(?))"
447
+ ).join(" AND ");
448
+
449
+ const fuzzyResults = this.db
450
+ .prepare(
451
+ `SELECT id, title, content,
452
+ (CASE WHEN LOWER(title) LIKE LOWER(?) THEN 1 ELSE 0 END) as title_match,
453
+ (CASE WHEN LOWER(content) LIKE LOWER(?) THEN 1 ELSE 0 END) as content_match
454
+ FROM memories
455
+ WHERE ${wordConditions}
456
+ LIMIT ?`
457
+ )
458
+ .all(
459
+ `%${query}%`,
460
+ `%${query}%`,
461
+ ...queryWords.flatMap(w => [`%${w}%`, `%${w}%`]),
462
+ limit * 2
463
+ ) as any[];
464
+
465
+ for (const fr of fuzzyResults) {
466
+ const existing = results.get(fr.id);
467
+ const fuzzyScore = (fr.title_match * 0.7 + fr.content_match * 0.3);
468
+ const record = this.getById(fr.id);
469
+ if (record) {
470
+ const snippet = this.extractSnippet(record.content, query);
471
+ if (existing) {
472
+ // Boost score if found in both vector and fuzzy
473
+ existing.score = Math.min(existing.score + fuzzyScore * 0.3, 1);
474
+ } else {
475
+ results.set(fr.id, { record, score: fuzzyScore, snippet });
476
+ }
477
+ }
478
+ }
479
+
480
+ // 3. Sort by score and return top results
481
+ return Array.from(results.values())
482
+ .sort((a, b) => b.score - a.score)
483
+ .slice(0, limit);
484
+ }
485
+
486
+ /**
487
+ * Get the underlying database for advanced queries.
488
+ */
489
+ getDb(): Database.Database | null {
490
+ return this.db;
491
+ }
492
+
493
+ /**
494
+ * Get the scope directory.
495
+ */
496
+ getScopeDir(): string {
497
+ return this.scopeDir;
498
+ }
499
+
500
+ /**
501
+ * Extract a snippet around the query match.
502
+ */
503
+ private extractSnippet(content: string, query: string, chars = 100): string {
504
+ const lowerContent = content.toLowerCase();
505
+ const lowerQuery = query.toLowerCase();
506
+ const idx = lowerContent.indexOf(lowerQuery);
507
+
508
+ if (idx === -1) {
509
+ // No match, return beginning
510
+ return content.slice(0, chars) + (content.length > chars ? "..." : "");
511
+ }
512
+
513
+ const start = Math.max(0, idx - chars / 2);
514
+ const end = Math.min(content.length, idx + query.length + chars / 2);
515
+ let snippet = content.slice(start, end);
516
+
517
+ if (start > 0) snippet = "..." + snippet;
518
+ if (end < content.length) snippet = snippet + "...";
519
+
520
+ return snippet;
521
+ }
522
+
523
+ /**
524
+ * Convert string ID to numeric rowid for sqlite-vec.
525
+ */
526
+ private idToRowid(id: string): number {
527
+ // Simple hash: sum of char codes modulo 1M
528
+ let hash = 0;
529
+ for (let i = 0; i < id.length; i++) {
530
+ hash = ((hash << 5) - hash + id.charCodeAt(i)) | 0;
531
+ }
532
+ return Math.abs(hash) % 1_000_000;
533
+ }
534
+
535
+ /**
536
+ * Convert numeric rowid back to string ID.
537
+ */
538
+ private rowidToId(rowid: number): string {
539
+ // Look up ID from memories table by rowid
540
+ if (!this.db) return "";
541
+ const row = this.db.prepare("SELECT id FROM memories LIMIT 1 OFFSET ?").get(rowid) as any;
542
+ return row?.id || "";
543
+ }
544
+ }
545
+
546
+ /**
547
+ * Search across ALL project directories.
548
+ * Returns results with project name prefix.
549
+ */
550
+ export function searchAllProjects(
551
+ query: string,
552
+ limit = 10
553
+ ): SearchResult[] {
554
+ const projectDirs = getAllProjectDirs();
555
+ const allResults: SearchResult[] = [];
556
+
557
+ for (const { name: projectName, dir } of projectDirs) {
558
+ const dbPath = path.join(dir, MEMORY_DB_NAME);
559
+ if (!fs.existsSync(dbPath)) continue;
560
+
561
+ try {
562
+ const storage = new MemoryStorage(projectName);
563
+ storage.init();
564
+ const results = storage.search(query, limit);
565
+ allResults.push(...results);
566
+ storage.close();
567
+ } catch {
568
+ // Skip projects with corrupted DB
569
+ }
570
+ }
571
+
572
+ // Sort by score and return top results
573
+ return allResults
574
+ .sort((a, b) => b.score - a.score)
575
+ .slice(0, limit);
576
+ }
577
+
578
+ /**
579
+ * List memories from ALL projects.
580
+ * Returns memories with project name prefix.
581
+ */
582
+ export function listAllProjects(): Array<{
583
+ project: string;
584
+ id: string;
585
+ title: string;
586
+ type: string;
587
+ }> {
588
+ const projectDirs = getAllProjectDirs();
589
+ const allMemories: Array<{
590
+ project: string;
591
+ id: string;
592
+ title: string;
593
+ type: string;
594
+ }> = [];
595
+
596
+ for (const { name: projectName, dir } of projectDirs) {
597
+ const dbPath = path.join(dir, MEMORY_DB_NAME);
598
+ if (!fs.existsSync(dbPath)) continue;
599
+
600
+ try {
601
+ const storage = new MemoryStorage(projectName);
602
+ storage.init();
603
+ const memories = storage.listAll();
604
+ allMemories.push(
605
+ ...memories.map((m) => ({
606
+ project: projectName,
607
+ id: m.id,
608
+ title: m.title,
609
+ type: m.type,
610
+ }))
611
+ );
612
+ storage.close();
613
+ } catch {
614
+ // Skip projects with corrupted DB
615
+ }
616
+ }
617
+
618
+ return allMemories;
619
+ }
620
+
621
+ /**
622
+ * In-memory storage fallback when SQLite is unavailable.
623
+ */
624
+ export class InMemoryStorage {
625
+ private records: Map<string, MemoryRecord> = new Map();
626
+ private projectName: string;
627
+ private globalScope: boolean;
628
+
629
+ constructor(projectName: string, globalScope = false) {
630
+ this.projectName = projectName;
631
+ this.globalScope = globalScope;
632
+ }
633
+
634
+ store(record: MemoryRecord): void {
635
+ if (!record.id) {
636
+ record.id = record.title.toLowerCase().replace(/[^a-z0-9]+/g, "_");
637
+ }
638
+ const now = new Date().toISOString();
639
+ if (!record.created) record.created = now;
640
+ record.updated = now;
641
+ if (!record.project) record.project = this.projectName;
642
+ this.records.set(record.id, record);
643
+ }
644
+
645
+ getById(id: string): MemoryRecord | null {
646
+ return this.records.get(id) || null;
647
+ }
648
+
649
+ getByTitle(title: string): MemoryRecord | null {
650
+ for (const record of this.records.values()) {
651
+ if (record.title.toLowerCase() === title.toLowerCase()) {
652
+ return record;
653
+ }
654
+ }
655
+ return null;
656
+ }
657
+
658
+ listAll(): Array<{ id: string; title: string; type: string }> {
659
+ return Array.from(this.records.values()).map((r) => ({
660
+ id: r.id,
661
+ title: r.title,
662
+ type: r.type,
663
+ }));
664
+ }
665
+
666
+ delete(id: string): boolean {
667
+ return this.records.delete(id);
668
+ }
669
+
670
+ deleteByTitle(title: string): boolean {
671
+ const record = this.getByTitle(title);
672
+ if (!record) return false;
673
+ return this.delete(record.id);
674
+ }
675
+
676
+ search(query: string, limit = 10): SearchResult[] {
677
+ const results: SearchResult[] = [];
678
+ const lowerQuery = query.toLowerCase();
679
+
680
+ for (const record of this.records.values()) {
681
+ const titleMatch = record.title.toLowerCase().includes(lowerQuery);
682
+ const contentMatch = record.content.toLowerCase().includes(lowerQuery);
683
+
684
+ if (titleMatch || contentMatch) {
685
+ const score = titleMatch ? 0.7 : 0.3;
686
+ const snippet = this.extractSnippet(record.content, query);
687
+ results.push({ record, score, snippet });
688
+ }
689
+ }
690
+
691
+ return results.sort((a, b) => b.score - a.score).slice(0, limit);
692
+ }
693
+
694
+ close(): void {
695
+ // No-op for in-memory
696
+ }
697
+
698
+ private extractSnippet(content: string, query: string, chars = 100): string {
699
+ const lowerContent = content.toLowerCase();
700
+ const lowerQuery = query.toLowerCase();
701
+ const idx = lowerContent.indexOf(lowerQuery);
702
+
703
+ if (idx === -1) {
704
+ return content.slice(0, chars) + (content.length > chars ? "..." : "");
705
+ }
706
+
707
+ const start = Math.max(0, idx - chars / 2);
708
+ const end = Math.min(content.length, idx + query.length + chars / 2);
709
+ let snippet = content.slice(start, end);
710
+
711
+ if (start > 0) snippet = "..." + snippet;
712
+ if (end < content.length) snippet = snippet + "...";
713
+
714
+ return snippet;
715
+ }
716
+ }