@hbarefoot/engram 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,626 @@
1
+ import Database from 'better-sqlite3';
2
+ import { generateId } from '../utils/id.js';
3
+ import * as logger from '../utils/logger.js';
4
+
5
+ /**
6
+ * Deduplication thresholds
7
+ */
8
+ const DUPLICATE_THRESHOLD = 0.95; // Nearly identical - reject
9
+ const MERGE_THRESHOLD = 0.92; // Similar but adds info - merge
10
+
11
+ /**
12
+ * Initialize the database and run migrations
13
+ * @param {string} dbPath - Path to SQLite database file
14
+ * @returns {Database} SQLite database instance
15
+ */
16
+ export function initDatabase(dbPath) {
17
+ logger.info('Initializing database', { path: dbPath });
18
+
19
+ const db = new Database(dbPath);
20
+
21
+ // Set pragmas for performance and safety
22
+ db.pragma('journal_mode = WAL');
23
+ db.pragma('foreign_keys = ON');
24
+
25
+ // Run migrations
26
+ runMigrations(db);
27
+
28
+ logger.info('Database initialized successfully');
29
+ return db;
30
+ }
31
+
32
+ /**
33
+ * Run database migrations
34
+ * @param {Database} db - SQLite database instance
35
+ */
36
+ function runMigrations(db) {
37
+ // Core memories table
38
+ db.exec(`
39
+ CREATE TABLE IF NOT EXISTS memories (
40
+ id TEXT PRIMARY KEY,
41
+ content TEXT NOT NULL,
42
+ entity TEXT,
43
+ category TEXT NOT NULL DEFAULT 'fact',
44
+ confidence REAL NOT NULL DEFAULT 0.8,
45
+ embedding BLOB,
46
+ source TEXT DEFAULT 'manual',
47
+ namespace TEXT DEFAULT 'default',
48
+ tags TEXT DEFAULT '[]',
49
+ created_at INTEGER NOT NULL,
50
+ updated_at INTEGER NOT NULL,
51
+ last_accessed INTEGER,
52
+ access_count INTEGER DEFAULT 0,
53
+ decay_rate REAL DEFAULT 0.01,
54
+ feedback_score REAL DEFAULT 0.0
55
+ );
56
+ `);
57
+
58
+ // Memory feedback table for confidence adjustments
59
+ db.exec(`
60
+ CREATE TABLE IF NOT EXISTS memory_feedback (
61
+ id TEXT PRIMARY KEY,
62
+ memory_id TEXT NOT NULL,
63
+ helpful INTEGER NOT NULL,
64
+ context TEXT,
65
+ created_at INTEGER NOT NULL,
66
+ FOREIGN KEY (memory_id) REFERENCES memories(id) ON DELETE CASCADE
67
+ );
68
+ `);
69
+
70
+ // Add feedback_score column if it doesn't exist (migration for existing databases)
71
+ try {
72
+ db.exec(`ALTER TABLE memories ADD COLUMN feedback_score REAL DEFAULT 0.0`);
73
+ } catch (error) {
74
+ // Column already exists, ignore
75
+ }
76
+
77
+ // Full-text search index
78
+ db.exec(`
79
+ CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts USING fts5(
80
+ content,
81
+ entity,
82
+ tags,
83
+ content='memories',
84
+ content_rowid='rowid'
85
+ );
86
+ `);
87
+
88
+ // Triggers to keep FTS in sync
89
+ db.exec(`
90
+ CREATE TRIGGER IF NOT EXISTS memories_ai AFTER INSERT ON memories BEGIN
91
+ INSERT INTO memories_fts(rowid, content, entity, tags)
92
+ VALUES (new.rowid, new.content, new.entity, new.tags);
93
+ END;
94
+ `);
95
+
96
+ db.exec(`
97
+ CREATE TRIGGER IF NOT EXISTS memories_ad AFTER DELETE ON memories BEGIN
98
+ INSERT INTO memories_fts(memories_fts, rowid, content, entity, tags)
99
+ VALUES ('delete', old.rowid, old.content, old.entity, old.tags);
100
+ END;
101
+ `);
102
+
103
+ db.exec(`
104
+ CREATE TRIGGER IF NOT EXISTS memories_au AFTER UPDATE ON memories BEGIN
105
+ INSERT INTO memories_fts(memories_fts, rowid, content, entity, tags)
106
+ VALUES ('delete', old.rowid, old.content, old.entity, old.tags);
107
+ INSERT INTO memories_fts(rowid, content, entity, tags)
108
+ VALUES (new.rowid, new.content, new.entity, new.tags);
109
+ END;
110
+ `);
111
+
112
+ // Indexes for common queries
113
+ db.exec(`
114
+ CREATE INDEX IF NOT EXISTS idx_memories_category ON memories(category);
115
+ CREATE INDEX IF NOT EXISTS idx_memories_entity ON memories(entity);
116
+ CREATE INDEX IF NOT EXISTS idx_memories_namespace ON memories(namespace);
117
+ CREATE INDEX IF NOT EXISTS idx_memories_confidence ON memories(confidence);
118
+ CREATE INDEX IF NOT EXISTS idx_memories_created_at ON memories(created_at);
119
+ CREATE INDEX IF NOT EXISTS idx_memories_last_accessed ON memories(last_accessed);
120
+ CREATE INDEX IF NOT EXISTS idx_memories_feedback_score ON memories(feedback_score);
121
+ CREATE INDEX IF NOT EXISTS idx_memories_namespace_created ON memories(namespace, created_at);
122
+ CREATE INDEX IF NOT EXISTS idx_memory_feedback_memory_id ON memory_feedback(memory_id);
123
+ `);
124
+
125
+ // Metadata table for system state
126
+ db.exec(`
127
+ CREATE TABLE IF NOT EXISTS meta (
128
+ key TEXT PRIMARY KEY,
129
+ value TEXT
130
+ );
131
+ `);
132
+
133
+ logger.debug('Database migrations completed');
134
+ }
135
+
136
+ /**
137
+ * Check for duplicate memories using embedding similarity
138
+ * @param {Database} db - SQLite database instance
139
+ * @param {Float32Array} embedding - Embedding of new content
140
+ * @param {string} namespace - Namespace to search in
141
+ * @returns {Object|null} Duplicate info or null if no duplicate
142
+ */
143
+ export function checkDuplicate(db, embedding, namespace) {
144
+ if (!embedding) {
145
+ return null;
146
+ }
147
+
148
+ // Get all memories with embeddings in the same namespace
149
+ const memories = getMemoriesWithEmbeddings(db, namespace);
150
+
151
+ if (memories.length === 0) {
152
+ return null;
153
+ }
154
+
155
+ let bestMatch = null;
156
+ let highestSimilarity = 0;
157
+
158
+ for (const memory of memories) {
159
+ if (!memory.embedding) continue;
160
+
161
+ const similarity = calculateCosineSimilarity(embedding, memory.embedding);
162
+
163
+ if (similarity > highestSimilarity && similarity >= MERGE_THRESHOLD) {
164
+ highestSimilarity = similarity;
165
+ bestMatch = memory;
166
+ }
167
+ }
168
+
169
+ if (!bestMatch) {
170
+ return null;
171
+ }
172
+
173
+ return {
174
+ memory: bestMatch,
175
+ similarity: highestSimilarity,
176
+ isDuplicate: highestSimilarity >= DUPLICATE_THRESHOLD,
177
+ shouldMerge: highestSimilarity >= MERGE_THRESHOLD && highestSimilarity < DUPLICATE_THRESHOLD
178
+ };
179
+ }
180
+
181
+ /**
182
+ * Calculate cosine similarity between two embeddings
183
+ * @param {Float32Array} a - First embedding
184
+ * @param {Float32Array} b - Second embedding
185
+ * @returns {number} Similarity score (0-1)
186
+ */
187
+ function calculateCosineSimilarity(a, b) {
188
+ if (a.length !== b.length) {
189
+ return 0;
190
+ }
191
+
192
+ let dotProduct = 0;
193
+ let normA = 0;
194
+ let normB = 0;
195
+
196
+ for (let i = 0; i < a.length; i++) {
197
+ dotProduct += a[i] * b[i];
198
+ normA += a[i] * a[i];
199
+ normB += b[i] * b[i];
200
+ }
201
+
202
+ const similarity = dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
203
+ return Math.max(0, Math.min(1, similarity));
204
+ }
205
+
206
+ /**
207
+ * Store a new memory with deduplication check
208
+ * @param {Database} db - SQLite database instance
209
+ * @param {Object} memory - Memory object
210
+ * @param {string} memory.content - Memory content
211
+ * @param {string} [memory.category='fact'] - Memory category
212
+ * @param {string} [memory.entity] - Entity this memory is about
213
+ * @param {number} [memory.confidence=0.8] - Confidence score (0-1)
214
+ * @param {Float32Array} [memory.embedding] - Embedding vector
215
+ * @param {string} [memory.source='manual'] - Source of memory
216
+ * @param {string} [memory.namespace='default'] - Namespace
217
+ * @param {string[]} [memory.tags=[]] - Tags
218
+ * @param {number} [memory.decay_rate=0.01] - Decay rate
219
+ * @param {boolean} [options.force=false] - Bypass deduplication check
220
+ * @returns {Object} Result object with status, id, and message
221
+ */
222
+ export function createMemoryWithDedup(db, memory, options = {}) {
223
+ const { force = false } = options;
224
+ const namespace = memory.namespace || 'default';
225
+
226
+ // Check for duplicates if not forced and embedding is available
227
+ if (!force && memory.embedding) {
228
+ const duplicateCheck = checkDuplicate(db, memory.embedding, namespace);
229
+
230
+ if (duplicateCheck) {
231
+ if (duplicateCheck.isDuplicate) {
232
+ // Nearly identical - reject
233
+ logger.info('Duplicate memory detected, rejecting', {
234
+ existingId: duplicateCheck.memory.id,
235
+ similarity: duplicateCheck.similarity
236
+ });
237
+
238
+ return {
239
+ status: 'duplicate',
240
+ id: duplicateCheck.memory.id,
241
+ message: `Similar memory already exists: ${duplicateCheck.memory.id.substring(0, 8)}`,
242
+ similarity: duplicateCheck.similarity,
243
+ existingContent: duplicateCheck.memory.content
244
+ };
245
+ }
246
+
247
+ if (duplicateCheck.shouldMerge) {
248
+ // Similar but potentially adds info - merge
249
+ logger.info('Similar memory found, merging', {
250
+ existingId: duplicateCheck.memory.id,
251
+ similarity: duplicateCheck.similarity
252
+ });
253
+
254
+ // Merge: update existing memory with new content if it's longer or has more info
255
+ const existingMemory = duplicateCheck.memory;
256
+ const newContent = memory.content.length > existingMemory.content.length
257
+ ? memory.content
258
+ : `${existingMemory.content} ${memory.content}`.trim();
259
+
260
+ // Merge tags
261
+ const existingTags = existingMemory.tags || [];
262
+ const newTags = memory.tags || [];
263
+ const mergedTags = [...new Set([...existingTags, ...newTags])];
264
+
265
+ // Use higher confidence
266
+ const mergedConfidence = Math.max(
267
+ existingMemory.confidence || 0.8,
268
+ memory.confidence || 0.8
269
+ );
270
+
271
+ // Update the existing memory
272
+ const updated = updateMemory(db, existingMemory.id, {
273
+ content: newContent,
274
+ tags: mergedTags,
275
+ confidence: mergedConfidence,
276
+ embedding: memory.embedding // Use newer embedding
277
+ });
278
+
279
+ return {
280
+ status: 'merged',
281
+ id: existingMemory.id,
282
+ message: `Memory merged with existing: ${existingMemory.id.substring(0, 8)}`,
283
+ similarity: duplicateCheck.similarity,
284
+ memory: updated
285
+ };
286
+ }
287
+ }
288
+ }
289
+
290
+ // No duplicate found or force=true, create new memory
291
+ const created = createMemory(db, memory);
292
+
293
+ return {
294
+ status: 'created',
295
+ id: created.id,
296
+ message: 'Memory stored successfully',
297
+ memory: created
298
+ };
299
+ }
300
+
301
+ /**
302
+ * Store a new memory (basic version without deduplication)
303
+ * @param {Database} db - SQLite database instance
304
+ * @param {Object} memory - Memory object
305
+ * @param {string} memory.content - Memory content
306
+ * @param {string} [memory.category='fact'] - Memory category
307
+ * @param {string} [memory.entity] - Entity this memory is about
308
+ * @param {number} [memory.confidence=0.8] - Confidence score (0-1)
309
+ * @param {Float32Array} [memory.embedding] - Embedding vector
310
+ * @param {string} [memory.source='manual'] - Source of memory
311
+ * @param {string} [memory.namespace='default'] - Namespace
312
+ * @param {string[]} [memory.tags=[]] - Tags
313
+ * @param {number} [memory.decay_rate=0.01] - Decay rate
314
+ * @returns {Object} Stored memory with ID
315
+ */
316
+ export function createMemory(db, memory) {
317
+ const id = generateId();
318
+ const now = Date.now();
319
+
320
+ const tags = JSON.stringify(memory.tags || []);
321
+ const embeddingBuffer = memory.embedding
322
+ ? Buffer.from(memory.embedding.buffer)
323
+ : null;
324
+
325
+ const stmt = db.prepare(`
326
+ INSERT INTO memories (
327
+ id, content, entity, category, confidence, embedding,
328
+ source, namespace, tags, created_at, updated_at, decay_rate
329
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
330
+ `);
331
+
332
+ stmt.run(
333
+ id,
334
+ memory.content,
335
+ memory.entity || null,
336
+ memory.category || 'fact',
337
+ memory.confidence !== undefined ? memory.confidence : 0.8,
338
+ embeddingBuffer,
339
+ memory.source || 'manual',
340
+ memory.namespace || 'default',
341
+ tags,
342
+ now,
343
+ now,
344
+ memory.decay_rate !== undefined ? memory.decay_rate : 0.01
345
+ );
346
+
347
+ logger.debug('Memory created', { id, category: memory.category });
348
+
349
+ return getMemory(db, id);
350
+ }
351
+
352
+ /**
353
+ * Get a memory by ID
354
+ * @param {Database} db - SQLite database instance
355
+ * @param {string} id - Memory ID
356
+ * @returns {Object|null} Memory object or null if not found
357
+ */
358
+ export function getMemory(db, id) {
359
+ const stmt = db.prepare('SELECT * FROM memories WHERE id = ?');
360
+ const row = stmt.get(id);
361
+
362
+ if (!row) {
363
+ return null;
364
+ }
365
+
366
+ return deserializeMemory(row);
367
+ }
368
+
369
+ /**
370
+ * Update a memory
371
+ * @param {Database} db - SQLite database instance
372
+ * @param {string} id - Memory ID
373
+ * @param {Object} updates - Fields to update
374
+ * @returns {Object|null} Updated memory or null if not found
375
+ */
376
+ export function updateMemory(db, id, updates) {
377
+ // Build dynamic UPDATE query
378
+ const allowedFields = [
379
+ 'content', 'entity', 'category', 'confidence',
380
+ 'embedding', 'source', 'namespace', 'tags', 'decay_rate'
381
+ ];
382
+
383
+ const fields = [];
384
+ const values = [];
385
+
386
+ for (const [key, value] of Object.entries(updates)) {
387
+ if (allowedFields.includes(key)) {
388
+ fields.push(`${key} = ?`);
389
+
390
+ if (key === 'tags') {
391
+ values.push(JSON.stringify(value));
392
+ } else if (key === 'embedding' && value) {
393
+ values.push(Buffer.from(value.buffer));
394
+ } else {
395
+ values.push(value);
396
+ }
397
+ }
398
+ }
399
+
400
+ if (fields.length === 0) {
401
+ return getMemory(db, id);
402
+ }
403
+
404
+ // Add updated_at
405
+ fields.push('updated_at = ?');
406
+ values.push(Date.now());
407
+
408
+ // Add id for WHERE clause
409
+ values.push(id);
410
+
411
+ const stmt = db.prepare(`
412
+ UPDATE memories
413
+ SET ${fields.join(', ')}
414
+ WHERE id = ?
415
+ `);
416
+
417
+ const result = stmt.run(...values);
418
+
419
+ if (result.changes === 0) {
420
+ return null;
421
+ }
422
+
423
+ logger.debug('Memory updated', { id });
424
+
425
+ return getMemory(db, id);
426
+ }
427
+
428
+ /**
429
+ * Delete a memory
430
+ * @param {Database} db - SQLite database instance
431
+ * @param {string} id - Memory ID
432
+ * @returns {boolean} True if deleted, false if not found
433
+ */
434
+ export function deleteMemory(db, id) {
435
+ const stmt = db.prepare('DELETE FROM memories WHERE id = ?');
436
+ const result = stmt.run(id);
437
+
438
+ if (result.changes > 0) {
439
+ logger.debug('Memory deleted', { id });
440
+ return true;
441
+ }
442
+
443
+ return false;
444
+ }
445
+
446
+ /**
447
+ * List memories with optional filters
448
+ * @param {Database} db - SQLite database instance
449
+ * @param {Object} [options] - Query options
450
+ * @param {string} [options.namespace] - Filter by namespace
451
+ * @param {string} [options.category] - Filter by category
452
+ * @param {number} [options.limit=50] - Maximum results
453
+ * @param {number} [options.offset=0] - Offset for pagination
454
+ * @param {string} [options.sort='created_at DESC'] - Sort order
455
+ * @returns {Object[]} Array of memories
456
+ */
457
+ export function listMemories(db, options = {}) {
458
+ const {
459
+ namespace,
460
+ category,
461
+ limit = 50,
462
+ offset = 0,
463
+ sort = 'created_at DESC'
464
+ } = options;
465
+
466
+ let query = 'SELECT * FROM memories WHERE 1=1';
467
+ const params = [];
468
+
469
+ if (namespace) {
470
+ query += ' AND namespace = ?';
471
+ params.push(namespace);
472
+ }
473
+
474
+ if (category) {
475
+ query += ' AND category = ?';
476
+ params.push(category);
477
+ }
478
+
479
+ query += ` ORDER BY ${sort} LIMIT ? OFFSET ?`;
480
+ params.push(limit, offset);
481
+
482
+ const stmt = db.prepare(query);
483
+ const rows = stmt.all(...params);
484
+
485
+ return rows.map(deserializeMemory);
486
+ }
487
+
488
+ /**
489
+ * Search memories using FTS
490
+ * @param {Database} db - SQLite database instance
491
+ * @param {string} query - Search query
492
+ * @param {number} [limit=20] - Maximum results
493
+ * @returns {Object[]} Array of matching memories
494
+ */
495
+ export function searchMemories(db, query, limit = 20) {
496
+ const stmt = db.prepare(`
497
+ SELECT m.*
498
+ FROM memories m
499
+ JOIN memories_fts fts ON m.rowid = fts.rowid
500
+ WHERE memories_fts MATCH ?
501
+ LIMIT ?
502
+ `);
503
+
504
+ const rows = stmt.all(query, limit);
505
+ return rows.map(deserializeMemory);
506
+ }
507
+
508
+ /**
509
+ * Get all memories with embeddings for similarity search
510
+ * @param {Database} db - SQLite database instance
511
+ * @param {string} [namespace] - Optional namespace filter
512
+ * @returns {Object[]} Array of memories with embeddings
513
+ */
514
+ export function getMemoriesWithEmbeddings(db, namespace) {
515
+ let query = 'SELECT * FROM memories WHERE embedding IS NOT NULL';
516
+ const params = [];
517
+
518
+ if (namespace) {
519
+ query += ' AND namespace = ?';
520
+ params.push(namespace);
521
+ }
522
+
523
+ const stmt = db.prepare(query);
524
+ const rows = params.length > 0 ? stmt.all(...params) : stmt.all();
525
+
526
+ return rows.map(deserializeMemory);
527
+ }
528
+
529
+ /**
530
+ * Update last_accessed and access_count for memories
531
+ * @param {Database} db - SQLite database instance
532
+ * @param {string[]} ids - Memory IDs to update
533
+ */
534
+ export function updateAccessStats(db, ids) {
535
+ if (ids.length === 0) return;
536
+
537
+ const now = Date.now();
538
+ const placeholders = ids.map(() => '?').join(',');
539
+
540
+ const stmt = db.prepare(`
541
+ UPDATE memories
542
+ SET last_accessed = ?,
543
+ access_count = access_count + 1
544
+ WHERE id IN (${placeholders})
545
+ `);
546
+
547
+ stmt.run(now, ...ids);
548
+
549
+ logger.debug('Access stats updated', { count: ids.length });
550
+ }
551
+
552
+ /**
553
+ * Get database statistics
554
+ * @param {Database} db - SQLite database instance
555
+ * @returns {Object} Database statistics
556
+ */
557
+ export function getStats(db) {
558
+ const totalStmt = db.prepare('SELECT COUNT(*) as count FROM memories');
559
+ const total = totalStmt.get().count;
560
+
561
+ const byCategoryStmt = db.prepare(`
562
+ SELECT category, COUNT(*) as count
563
+ FROM memories
564
+ GROUP BY category
565
+ `);
566
+ const byCategory = Object.fromEntries(
567
+ byCategoryStmt.all().map(row => [row.category, row.count])
568
+ );
569
+
570
+ const byNamespaceStmt = db.prepare(`
571
+ SELECT namespace, COUNT(*) as count
572
+ FROM memories
573
+ GROUP BY namespace
574
+ `);
575
+ const byNamespace = Object.fromEntries(
576
+ byNamespaceStmt.all().map(row => [row.namespace, row.count])
577
+ );
578
+
579
+ const withEmbeddingsStmt = db.prepare(
580
+ 'SELECT COUNT(*) as count FROM memories WHERE embedding IS NOT NULL'
581
+ );
582
+ const withEmbeddings = withEmbeddingsStmt.get().count;
583
+
584
+ return {
585
+ total,
586
+ byCategory,
587
+ byNamespace,
588
+ withEmbeddings
589
+ };
590
+ }
591
+
592
+ /**
593
+ * Deserialize a database row into a memory object
594
+ * @param {Object} row - Database row
595
+ * @returns {Object} Memory object
596
+ */
597
+ function deserializeMemory(row) {
598
+ const memory = {
599
+ id: row.id,
600
+ content: row.content,
601
+ entity: row.entity,
602
+ category: row.category,
603
+ confidence: row.confidence,
604
+ source: row.source,
605
+ namespace: row.namespace,
606
+ tags: JSON.parse(row.tags),
607
+ created_at: row.created_at,
608
+ updated_at: row.updated_at,
609
+ last_accessed: row.last_accessed,
610
+ access_count: row.access_count,
611
+ decay_rate: row.decay_rate,
612
+ feedback_score: row.feedback_score || 0
613
+ };
614
+
615
+ // Deserialize embedding if present
616
+ if (row.embedding) {
617
+ const buffer = row.embedding;
618
+ memory.embedding = new Float32Array(
619
+ buffer.buffer,
620
+ buffer.byteOffset,
621
+ buffer.byteLength / 4
622
+ );
623
+ }
624
+
625
+ return memory;
626
+ }