@alex900530/claude-persistent-memory 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1310 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Memory Database Module - Core Memory System v4.5
4
+ *
5
+ * Features:
6
+ * - SQLite + sqlite-vec vector storage
7
+ * - FTS5 full-text search (BM25)
8
+ * - Incremental clustering
9
+ * - Confidence management
10
+ * - Automatic Skill generation
11
+ * - [v4.5] LLM structured memory
12
+ */
13
+
14
+ const path = require('path');
15
+ const fs = require('fs');
16
+ const { execSync } = require('child_process');
17
+
18
+ // ============== Configuration ==============
19
+
20
+ const config = require('../config');
21
+ const { ensureDir } = require('./utils');
22
+
23
+ // Optional jieba for Chinese word segmentation (installed via optionalDependencies)
24
+ let jieba = null;
25
+ try { jieba = require('nodejieba'); } catch {}
26
+
27
+ /**
28
+ * Tokenize text for FTS5 indexing and search.
29
+ * If jieba is available, segments Chinese text into words.
30
+ * English text passes through unchanged (FTS5 default tokenizer handles it).
31
+ */
32
+ function tokenize(text) {
33
+ if (!text) return '';
34
+ if (jieba) return jieba.cut(text).join(' ');
35
+ return text;
36
+ }
37
+
38
+ const DATA_DIR = config.dataDir;
39
+ const LOG_DIR = config.logDir;
40
+ const DB_PATH = path.join(DATA_DIR, 'memory.db');
41
+ const LOG_FILE = path.join(LOG_DIR, 'memory-db-calls.log');
42
+
43
+ // Ensure directories exist
44
+ ensureDir(DATA_DIR);
45
+ ensureDir(LOG_DIR);
46
+
47
+ function _str(v) { return v == null ? '' : typeof v === 'string' ? v : JSON.stringify(v); }
48
+
49
+ function _log(msg) {
50
+ const line = `[${new Date().toISOString()}] ${msg}\n`;
51
+ try { fs.appendFileSync(LOG_FILE, line); } catch (e) {}
52
+ }
53
+
54
+ // Stopwords
55
+ const STOPWORDS = new Set([
56
+ '的', '是', '在', '有', '和', '了', '我', '你', '这', '那', '吗', '呢', '啊',
57
+ 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'what', 'how', 'can', 'do',
58
+ 'does', 'did', 'will', 'would', 'could', 'should', 'to', 'for', 'of', 'in',
59
+ 'on', 'at', 'by', 'with', 'from', 'as', 'it', 'this', 'that', 'be', 'have'
60
+ ]);
61
+
62
+ // Clustering configuration
63
+ const CLUSTER_SIMILARITY_THRESHOLD = config.cluster.similarityThreshold;
64
+ const CLUSTER_MATURITY_COUNT = config.cluster.maturityCount;
65
+ const CLUSTER_MATURITY_CONFIDENCE = config.cluster.maturityConfidence;
66
+
67
+ // [v4.5] LLM structuring configuration
68
+ const STRUCTURIZE_CONFIG = {
69
+ enabled: true,
70
+ };
71
+
72
+ // ============== Database Management ==============
73
+
74
+ let db = null;
75
+ let embeddingModel = null;
76
+
77
+ function getDb() {
78
+ if (db) return db;
79
+
80
+ try {
81
+ const Database = require('better-sqlite3');
82
+ db = new Database(DB_PATH);
83
+
84
+ // Load sqlite-vec extension
85
+ try {
86
+ const sqliteVec = require('sqlite-vec');
87
+ sqliteVec.load(db);
88
+ } catch (e) {
89
+ console.error('[memory-db] Warning: sqlite-vec not loaded:', e.message);
90
+ }
91
+
92
+ initTables();
93
+ return db;
94
+ } catch (e) {
95
+ console.error('[memory-db] Failed to initialize database:', e.message);
96
+ throw e;
97
+ }
98
+ }
99
+
100
+ function closeDb() {
101
+ if (db) {
102
+ db.close();
103
+ db = null;
104
+ }
105
+ }
106
+
107
+ function initTables() {
108
+ // Main memories table
109
+ db.exec(`
110
+ CREATE TABLE IF NOT EXISTS memories (
111
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
112
+ content TEXT NOT NULL,
113
+ structured_content TEXT,
114
+ summary TEXT,
115
+ type TEXT DEFAULT 'context',
116
+ tags TEXT,
117
+ keywords TEXT,
118
+ domain TEXT DEFAULT 'general',
119
+ confidence REAL DEFAULT 0.5,
120
+ evidence_count INTEGER DEFAULT 0,
121
+ cluster_id INTEGER,
122
+ source TEXT,
123
+ trigger TEXT,
124
+ action TEXT,
125
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
126
+ updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
127
+ last_accessed_at DATETIME,
128
+ access_count INTEGER DEFAULT 0,
129
+ FOREIGN KEY (cluster_id) REFERENCES clusters(id)
130
+ )
131
+ `);
132
+
133
+ // [v4.5] Add structured_content column (if it doesn't exist)
134
+ try {
135
+ db.exec(`ALTER TABLE memories ADD COLUMN structured_content TEXT`);
136
+ } catch (e) {
137
+ // Column already exists, ignore
138
+ }
139
+
140
+ // FTS5 full-text search table (standalone, not external content)
141
+ // Managed manually via ftsInsert/ftsDelete for jieba tokenization support
142
+ try {
143
+ db.exec(`
144
+ CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts USING fts5(
145
+ content, structured_content, summary, tags, keywords
146
+ )
147
+ `);
148
+ } catch (e) {
149
+ // Table exists, keep it
150
+ }
151
+
152
+ // Migrate from external content FTS to standalone FTS if needed
153
+ try {
154
+ db.exec("DELETE FROM memories_fts WHERE rowid = -1");
155
+ } catch (e) {
156
+ // External content FTS table detected, rebuild as standalone
157
+ db.exec('DROP TABLE IF EXISTS memories_fts');
158
+ db.exec(`
159
+ CREATE VIRTUAL TABLE memories_fts USING fts5(
160
+ content, structured_content, summary, tags, keywords
161
+ )
162
+ `);
163
+ // Repopulate FTS with tokenized content
164
+ const rows = db.prepare('SELECT id, content, structured_content, summary, tags, keywords FROM memories').all();
165
+ const insertFts = db.prepare('INSERT INTO memories_fts(rowid, content, structured_content, summary, tags, keywords) VALUES (?, ?, ?, ?, ?, ?)');
166
+ for (const r of rows) {
167
+ try {
168
+ insertFts.run(r.id, tokenize(r.content || ''), tokenize(r.structured_content || ''), tokenize(r.summary || ''), tokenize(r.tags || ''), tokenize(r.keywords || ''));
169
+ } catch (e2) {}
170
+ }
171
+ }
172
+
173
+ // Vector table (if sqlite-vec is available, use cosine distance)
174
+ try {
175
+ db.exec(`
176
+ CREATE VIRTUAL TABLE IF NOT EXISTS memories_vec USING vec0(
177
+ embedding float[${config.embedding.dimensions}] distance_metric=cosine
178
+ )
179
+ `);
180
+ } catch (e) {
181
+ // sqlite-vec not available, skip
182
+ }
183
+
184
+ // Clusters table
185
+ db.exec(`
186
+ CREATE TABLE IF NOT EXISTS clusters (
187
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
188
+ theme TEXT NOT NULL,
189
+ centroid_id INTEGER,
190
+ centroid_vector TEXT,
191
+ member_count INTEGER DEFAULT 0,
192
+ avg_confidence REAL DEFAULT 0.5,
193
+ domain TEXT DEFAULT 'general',
194
+ status TEXT DEFAULT 'growing',
195
+ evolved_at DATETIME,
196
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
197
+ updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
198
+ )
199
+ `);
200
+
201
+ // Create indexes
202
+ db.exec(`
203
+ CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(type);
204
+ CREATE INDEX IF NOT EXISTS idx_memories_domain ON memories(domain);
205
+ CREATE INDEX IF NOT EXISTS idx_memories_confidence ON memories(confidence);
206
+ CREATE INDEX IF NOT EXISTS idx_memories_cluster_id ON memories(cluster_id);
207
+ CREATE INDEX IF NOT EXISTS idx_clusters_status ON clusters(status);
208
+ `);
209
+
210
+ // Drop old FTS triggers (no longer needed with standalone FTS + manual management)
211
+ db.exec(`DROP TRIGGER IF EXISTS memories_ai`);
212
+ db.exec(`DROP TRIGGER IF EXISTS memories_ad`);
213
+ db.exec(`DROP TRIGGER IF EXISTS memories_au`);
214
+ }
215
+
216
+ // ============== FTS Management ==============
217
+
218
+ /**
219
+ * Insert tokenized content into FTS index
220
+ */
221
+ function ftsInsert(id, content, structuredContent, summary, tags, keywords) {
222
+ try {
223
+ getDb().prepare(
224
+ 'INSERT INTO memories_fts(rowid, content, structured_content, summary, tags, keywords) VALUES (?, ?, ?, ?, ?, ?)'
225
+ ).run(id, tokenize(content || ''), tokenize(structuredContent || ''), tokenize(summary || ''), tokenize(tags || ''), tokenize(keywords || ''));
226
+ } catch (e) {}
227
+ }
228
+
229
+ /**
230
+ * Delete entry from FTS index
231
+ */
232
+ function ftsDelete(id) {
233
+ try {
234
+ getDb().prepare('DELETE FROM memories_fts WHERE rowid = ?').run(id);
235
+ } catch (e) {}
236
+ }
237
+
238
+ // ============== Embedding Model ==============
239
+ // [v6.2] Uses @huggingface/transformers + bge-m3 (ONNX)
240
+ // Replaces bge-base-zh-v1.5, 8192 token context, 1024 dimensions, multilingual support
241
+
242
+ let _pipeline = null; // transformers.js pipeline instance
243
+
244
+ async function getEmbeddingModel() {
245
+ if (_pipeline) return _pipeline;
246
+
247
+ try {
248
+ const { pipeline } = await import('@huggingface/transformers');
249
+ console.error('[memory-db] Loading bge-m3 via transformers.js...');
250
+ _pipeline = await pipeline('feature-extraction', config.embedding.model, {
251
+ device: 'cpu'
252
+ });
253
+ console.error('[memory-db] Embedding model ready (bge-m3)');
254
+ return _pipeline;
255
+ } catch (e) {
256
+ console.error('[memory-db] Failed to load embedding model:', e.message);
257
+ return null;
258
+ }
259
+ }
260
+
261
+ async function getEmbedding(text) {
262
+ const startTime = Date.now();
263
+ _log(`[EMBEDDING-REQ] text=${_str(text)}`);
264
+ const extractor = await getEmbeddingModel();
265
+ if (!extractor) {
266
+ _log(`[EMBEDDING-ERR] model not available`);
267
+ return null;
268
+ }
269
+
270
+ try {
271
+ const output = await extractor(text, { pooling: 'cls', normalize: true });
272
+ const vector = Array.from(output.data);
273
+ const duration = Date.now() - startTime;
274
+ _log(`[EMBEDDING-RES] duration=${duration}ms dim=${vector.length} norm=${Math.sqrt(vector.reduce((s, v) => s + v * v, 0)).toFixed(4)}`);
275
+ return vector;
276
+ } catch (e) {
277
+ const duration = Date.now() - startTime;
278
+ _log(`[EMBEDDING-ERR] duration=${duration}ms error=${e.message}`);
279
+ console.error('[memory-db] Failed to get embedding:', e.message);
280
+ return null;
281
+ }
282
+ }
283
+
284
+ async function warmupEmbedding() {
285
+ await getEmbeddingModel();
286
+ }
287
+
288
+ /**
289
+ * Build embedding input text: structured_content + domain
290
+ * Enriches the vector with more semantic information
291
+ */
292
+ function buildEmbeddingText(content, domain) {
293
+ const parts = [];
294
+ if (domain && domain !== 'general') {
295
+ parts.push(`[${domain}]`);
296
+ }
297
+ parts.push(content);
298
+ return parts.join(' ');
299
+ }
300
+
301
+ // ============== [v4.5] LLM Structuring ==============
302
+
303
+ /**
304
+ * Use LLM service to structurize raw memory content
305
+ * @param {string} rawContent - Raw content
306
+ * @param {string} type - Memory type
307
+ * @returns {object|null} Structured result
308
+ */
309
+ async function structurizeWithLLM(rawContent, type) {
310
+ if (!STRUCTURIZE_CONFIG.enabled) return null;
311
+
312
+ const startTime = Date.now();
313
+ _log(`[STRUCTURIZE-REQ] type=${type} content=${_str(rawContent)}`);
314
+
315
+ try {
316
+ // Use llm-client to call llm-server (avoid claude --print triggering hooks causing recursion)
317
+ const llmClient = require('./llm-client');
318
+ if (await llmClient.isAvailable()) {
319
+ const structured = await llmClient.structurize(rawContent, type);
320
+ const duration = Date.now() - startTime;
321
+ _log(`[STRUCTURIZE-RES] duration=${duration}ms result=${_str(structured)}`);
322
+ // LLM determined not worth saving
323
+ if (structured && structured.reject) {
324
+ _log(`[STRUCTURIZE-REJECT] reason=${structured.reason || 'low value'}`);
325
+ return { __rejected: true, reason: structured.reason };
326
+ }
327
+ return structured;
328
+ }
329
+ _log(`[STRUCTURIZE-ERR] llm-server not available`);
330
+ return null;
331
+ } catch (e) {
332
+ const duration = Date.now() - startTime;
333
+ _log(`[STRUCTURIZE-ERR] duration=${duration}ms error=${e.message}`);
334
+ console.error('[memory-db] LLM structurize failed:', e.message);
335
+ return null;
336
+ }
337
+ }
338
+
339
+ /**
340
+ * [v6.1] Format a structured object as XML
341
+ *
342
+ * Field descriptions:
343
+ * <what> Core content (required)
344
+ * <when> Trigger scenario / applicable timing
345
+ * <do> Specific operations, commands, code
346
+ * <warn> Warnings, prohibitions, caveats
347
+ *
348
+ * Different types use different field subsets:
349
+ * fact: <what>
350
+ * pattern: <what> + <when> + <do> + <warn>
351
+ * decision: <what> + <warn>
352
+ * preference: <what> + <warn>
353
+ * bug: <what> + <do>
354
+ * context: <what> + <when>
355
+ * skill: <what>
356
+ */
357
+ function formatStructuredContent(structured, type = 'context', domain = 'general') {
358
+ if (!structured) return null;
359
+
360
+ // Collect content for each field
361
+ const what = structured.summary || '';
362
+ const when = (structured.scenarios && structured.scenarios.length > 0)
363
+ ? structured.scenarios.join(' | ')
364
+ : '';
365
+
366
+ // <do> = must rules + prefer rules (merged into actionable instructions)
367
+ const doItems = [];
368
+ if (structured.rules?.must) doItems.push(...structured.rules.must);
369
+ if (structured.rules?.prefer) doItems.push(...structured.rules.prefer);
370
+ const doText = doItems.join(';');
371
+
372
+ // <warn> = must_not rules
373
+ const warnText = (structured.rules?.must_not && structured.rules.must_not.length > 0)
374
+ ? structured.rules.must_not.join(';')
375
+ : '';
376
+
377
+ // Select fields based on type
378
+ const fields = [];
379
+ if (what) fields.push(` <what>${escapeXml(what)}</what>`);
380
+
381
+ const needsWhen = ['pattern', 'context'].includes(type);
382
+ const needsDo = ['pattern', 'bug'].includes(type);
383
+ const needsWarn = ['pattern', 'decision', 'preference'].includes(type);
384
+
385
+ if (needsWhen && when) fields.push(` <when>${escapeXml(when)}</when>`);
386
+ if (needsDo && doText) fields.push(` <do>${escapeXml(doText)}</do>`);
387
+ if (needsWarn && warnText) fields.push(` <warn>${escapeXml(warnText)}</warn>`);
388
+
389
+ if (fields.length === 0) return null;
390
+
391
+ return [
392
+ `<memory type="${type}" domain="${domain}">`,
393
+ ...fields,
394
+ '</memory>'
395
+ ].join('\n');
396
+ }
397
+
398
+ /**
399
+ * XML escaping
400
+ */
401
+ function escapeXml(s) {
402
+ return s.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
403
+ }
404
+
405
+ // ============== Core Functions ==============
406
+
407
+ /**
408
+ * Save a memory (with automatic incremental clustering)
409
+ * [v4.5] Added LLM structuring support
410
+ */
411
+ async function save(content, options = {}) {
412
+ const database = getDb();
413
+
414
+ const {
415
+ type = 'context',
416
+ domain = 'general',
417
+ tags = '',
418
+ confidence = 0.5,
419
+ source = 'user',
420
+ trigger = null,
421
+ action = null,
422
+ skipClustering = false,
423
+ skipStructurize = false, // [v4.5] Whether to skip structuring
424
+ structuredContent: preStructuredContent = null // [v6.1] Pre-structured XML (skip LLM)
425
+ } = options;
426
+
427
+ // Generate summary
428
+ const summary = content.length > 100 ? content.slice(0, 100) + '...' : content;
429
+
430
+ // Extract keywords
431
+ const keywords = extractKeywords(content).join(',');
432
+
433
+ // Deduplication check: use FTS keyword search to find candidates across full database
434
+ const dedupKeywords = extractKeywords(content);
435
+ let dedupCandidates = [];
436
+
437
+ if (dedupKeywords.length > 0) {
438
+ try {
439
+ const ftsQuery = dedupKeywords.slice(0, 5).map(k => `"${k.replace(/"/g, '""')}"`).join(' OR ');
440
+ dedupCandidates = database.prepare(`
441
+ SELECT m.id, m.content, m.confidence
442
+ FROM memories_fts fts
443
+ JOIN memories m ON fts.rowid = m.id
444
+ WHERE memories_fts MATCH ?
445
+ ORDER BY bm25(memories_fts)
446
+ LIMIT 20
447
+ `).all(ftsQuery);
448
+ } catch (e) {
449
+ // FTS failed, fall through to recent-records fallback
450
+ }
451
+ }
452
+
453
+ // Fallback: check recent records if FTS found no candidates
454
+ if (dedupCandidates.length === 0) {
455
+ dedupCandidates = database.prepare(`
456
+ SELECT id, content, confidence FROM memories
457
+ ORDER BY created_at DESC
458
+ LIMIT 10
459
+ `).all();
460
+ }
461
+
462
+ for (const e of dedupCandidates) {
463
+ const similarity = textSimilarity(content, e.content);
464
+ if (similarity >= 0.95) {
465
+ // Update access time and confidence of existing memory
466
+ database.prepare(`
467
+ UPDATE memories
468
+ SET last_accessed_at = CURRENT_TIMESTAMP,
469
+ access_count = access_count + 1,
470
+ confidence = MIN(0.9, confidence + 0.05)
471
+ WHERE id = ?
472
+ `).run(e.id);
473
+ return { id: e.id, action: 'updated', similarity };
474
+ }
475
+ }
476
+
477
+ // [v6.1] LLM structuring -> XML
478
+ let structuredContent = preStructuredContent || null;
479
+ if (!structuredContent && !skipStructurize && STRUCTURIZE_CONFIG.enabled) {
480
+ console.log('[memory-db] Structurizing with LLM...');
481
+ const structured = await structurizeWithLLM(content, type);
482
+ if (structured && structured.__rejected) {
483
+ console.log(`[memory-db] Rejected by LLM: ${structured.reason}`);
484
+ return { id: null, action: 'rejected', reason: structured.reason };
485
+ }
486
+ if (structured) {
487
+ if (typeof structured === 'string' && structured.startsWith('<memory')) {
488
+ // LLM returned XML directly
489
+ structuredContent = structured;
490
+ } else if (typeof structured === 'object') {
491
+ // Legacy format object -> format as XML
492
+ structuredContent = formatStructuredContent(structured, type, domain);
493
+ }
494
+ console.log('[memory-db] Structured content:', structuredContent);
495
+ }
496
+ }
497
+
498
+ // Insert new memory
499
+ const result = database.prepare(`
500
+ INSERT INTO memories (content, structured_content, summary, type, tags, keywords, domain, confidence, source, trigger, action)
501
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
502
+ `).run(content, structuredContent, summary, type, tags, keywords, domain, confidence, source, trigger, action);
503
+
504
+ const memoryId = Number(result.lastInsertRowid);
505
+
506
+ // Index in FTS with tokenized content
507
+ ftsInsert(memoryId, content, structuredContent, summary, tags, keywords);
508
+
509
+ // Generate embedding vector (using structured_content + domain for better semantic quality)
510
+ const embeddingText = buildEmbeddingText(structuredContent || content, domain);
511
+ const embedding = await getEmbedding(embeddingText);
512
+ if (embedding) {
513
+ try {
514
+ // sqlite-vec requires BigInt as rowid
515
+ database.prepare(`
516
+ INSERT INTO memories_vec (rowid, embedding)
517
+ VALUES (?, ?)
518
+ `).run(BigInt(memoryId), JSON.stringify(embedding));
519
+ } catch (e) {
520
+ console.error('[memory-db] Vector insert failed:', e.message);
521
+ }
522
+ }
523
+
524
+ // Incremental clustering
525
+ let clusterResult = null;
526
+ if (!skipClustering && embedding) {
527
+ clusterResult = await tryJoinCluster(memoryId, embedding, domain, confidence);
528
+ }
529
+
530
+ return {
531
+ id: memoryId,
532
+ action: 'created',
533
+ cluster: clusterResult
534
+ };
535
+ }
536
+
537
+ /**
538
+ * Try to join a memory into an existing cluster
539
+ */
540
+ async function tryJoinCluster(memoryId, embedding, domain, confidence) {
541
+ const database = getDb();
542
+
543
+ // Find active clusters in the same domain
544
+ const clusters = database.prepare(`
545
+ SELECT id, theme, centroid_vector, member_count, avg_confidence
546
+ FROM clusters
547
+ WHERE domain = ? AND status IN ('growing', 'mature')
548
+ `).all(domain);
549
+
550
+ let bestCluster = null;
551
+ let bestSimilarity = 0;
552
+
553
+ for (const cluster of clusters) {
554
+ if (!cluster.centroid_vector) continue;
555
+
556
+ try {
557
+ const centroid = JSON.parse(cluster.centroid_vector);
558
+ const similarity = cosineSimilarity(embedding, centroid);
559
+
560
+ if (similarity >= CLUSTER_SIMILARITY_THRESHOLD && similarity > bestSimilarity) {
561
+ bestCluster = cluster;
562
+ bestSimilarity = similarity;
563
+ }
564
+ } catch (e) {
565
+ continue;
566
+ }
567
+ }
568
+
569
+ if (bestCluster) {
570
+ // Join existing cluster
571
+ database.prepare(`
572
+ UPDATE memories SET cluster_id = ? WHERE id = ?
573
+ `).run(bestCluster.id, memoryId);
574
+
575
+ // Update cluster statistics
576
+ const newCount = bestCluster.member_count + 1;
577
+ const newAvgConf = (bestCluster.avg_confidence * bestCluster.member_count + confidence) / newCount;
578
+
579
+ database.prepare(`
580
+ UPDATE clusters
581
+ SET member_count = ?,
582
+ avg_confidence = ?,
583
+ updated_at = CURRENT_TIMESTAMP
584
+ WHERE id = ?
585
+ `).run(newCount, newAvgConf, bestCluster.id);
586
+
587
+ // Check if cluster has matured
588
+ if (newCount >= CLUSTER_MATURITY_COUNT && newAvgConf >= CLUSTER_MATURITY_CONFIDENCE) {
589
+ database.prepare(`
590
+ UPDATE clusters SET status = 'mature' WHERE id = ? AND status = 'growing'
591
+ `).run(bestCluster.id);
592
+ }
593
+
594
+ return {
595
+ action: 'joined',
596
+ clusterId: bestCluster.id,
597
+ theme: bestCluster.theme,
598
+ similarity: bestSimilarity
599
+ };
600
+ }
601
+
602
+ return null;
603
+ }
604
+
605
+ /**
606
+ * Hybrid search (vector + BM25)
607
+ */
608
+ async function search(query, limit = 3, options = {}) {
609
+ const database = getDb();
610
+ const { minConfidence = 0, type = null, domain = null } = options;
611
+
612
+ // Use Map to merge BM25 and vector search results
613
+ const resultsMap = new Map();
614
+
615
+ // BM25 search
616
+ const ftsResults = quickSearch(query, limit * 2);
617
+ for (const r of ftsResults) {
618
+ resultsMap.set(r.id, {
619
+ ...r,
620
+ bm25Score: r.bm25Score || 0,
621
+ vectorSimilarity: 0
622
+ });
623
+ }
624
+
625
+ // Vector search
626
+ const embedding = await getEmbedding(query);
627
+ if (embedding) {
628
+ try {
629
+ const vecResults = database.prepare(`
630
+ SELECT rowid, distance
631
+ FROM memories_vec
632
+ WHERE embedding MATCH ?
633
+ ORDER BY distance
634
+ LIMIT ?
635
+ `).all(JSON.stringify(embedding), limit * 2);
636
+
637
+ for (const vr of vecResults) {
638
+ // cosine distance -> similarity: distance range [0, 2], similarity range [0, 1]
639
+ const similarity = 1 - vr.distance;
640
+
641
+ if (resultsMap.has(vr.rowid)) {
642
+ // Merge scores: update vector similarity of existing record
643
+ const existing = resultsMap.get(vr.rowid);
644
+ existing.vectorSimilarity = similarity;
645
+ existing.vectorDistance = vr.distance;
646
+ } else {
647
+ // New record: fetch full info from database
648
+ const memory = database.prepare('SELECT * FROM memories WHERE id = ?').get(vr.rowid);
649
+ if (memory) {
650
+ resultsMap.set(memory.id, {
651
+ id: memory.id,
652
+ content: memory.structured_content || memory.content, // [v4.6] Prefer returning structured content
653
+ summary: memory.summary,
654
+ type: memory.type,
655
+ domain: memory.domain,
656
+ confidence: memory.confidence,
657
+ tags: memory.tags,
658
+ createdAt: memory.created_at,
659
+ date: memory.created_at ? memory.created_at.slice(0, 10) : 'unknown',
660
+ bm25Score: 0,
661
+ vectorSimilarity: similarity,
662
+ vectorDistance: vr.distance
663
+ });
664
+ }
665
+ }
666
+ }
667
+ } catch (e) {
668
+ // Vector search failed, fall back to BM25 results
669
+ }
670
+ }
671
+
672
+ // Calculate combined score and sort
673
+ const results = Array.from(resultsMap.values())
674
+ .filter(r => (r.confidence || 0) >= minConfidence)
675
+ .filter(r => !type || r.type === type)
676
+ .filter(r => !domain || r.domain === domain)
677
+ .map(r => {
678
+ // Combined score: vector-dominant (0.7) + BM25-auxiliary (0.3)
679
+ const bm25Normalized = Math.min((r.bm25Score || 0) / 10, 1.0);
680
+ const vecSim = r.vectorSimilarity || 0;
681
+ r.combinedScore = 0.7 * vecSim + 0.3 * bm25Normalized;
682
+ return r;
683
+ })
684
+ .sort((a, b) => b.combinedScore - a.combinedScore)
685
+ .slice(0, limit);
686
+
687
+ return results;
688
+ }
689
+
690
+ /**
691
+ * Fast BM25 search (no embedding model needed)
692
+ * Uses jieba tokenization when available for Chinese support
693
+ */
694
+ function quickSearch(query, limit = 5, options = {}) {
695
+ const database = getDb();
696
+ const results = new Map();
697
+
698
+ // Tokenize and extract keywords
699
+ const tokenized = tokenize(query);
700
+ const words = tokenized
701
+ .split(/\s+/)
702
+ .filter(w => w.length > 1 && !STOPWORDS.has(w.toLowerCase()));
703
+
704
+ // 1. FTS search with tokenized keywords
705
+ if (words.length > 0) {
706
+ try {
707
+ const ftsQuery = words.map(w => `"${w}"`).join(' OR ');
708
+ const ftsResults = database.prepare(`
709
+ SELECT m.*, bm25(memories_fts) as bm25_score
710
+ FROM memories_fts fts
711
+ JOIN memories m ON fts.rowid = m.id
712
+ WHERE memories_fts MATCH ?
713
+ ORDER BY bm25(memories_fts)
714
+ LIMIT ?
715
+ `).all(ftsQuery, limit * 2);
716
+
717
+ for (const r of ftsResults) {
718
+ results.set(r.id, { ...r, bm25Score: Math.abs(r.bm25_score) });
719
+ }
720
+ } catch (e) {
721
+ // FTS query failed, continue to fallback
722
+ }
723
+ }
724
+
725
+ // 2. Fallback: LIKE search if no FTS results
726
+ if (results.size === 0 && query.length > 0) {
727
+ try {
728
+ const fallbackResults = database.prepare(`
729
+ SELECT *, 0 as bm25_score
730
+ FROM memories
731
+ WHERE content LIKE ? OR structured_content LIKE ?
732
+ LIMIT ?
733
+ `).all(`%${query}%`, `%${query}%`, limit);
734
+
735
+ for (const r of fallbackResults) {
736
+ results.set(r.id, { ...r, bm25Score: 0.3 });
737
+ }
738
+ } catch (e) {
739
+ // Ignore
740
+ }
741
+ }
742
+
743
+ // Sort and return
744
+ const { domain = null } = options;
745
+ return Array.from(results.values())
746
+ .filter(r => !domain || r.domain === domain)
747
+ .sort((a, b) => b.bm25Score - a.bm25Score)
748
+ .slice(0, limit)
749
+ .map(r => ({
750
+ id: r.id,
751
+ content: r.structured_content || r.content,
752
+ rawContent: r.content,
753
+ structuredContent: r.structured_content,
754
+ summary: r.summary,
755
+ type: r.type,
756
+ domain: r.domain,
757
+ confidence: r.confidence,
758
+ tags: r.tags,
759
+ createdAt: r.created_at,
760
+ bm25Score: r.bm25Score,
761
+ date: r.created_at ? r.created_at.slice(0, 10) : 'unknown'
762
+ }));
763
+ }
764
+
765
+ /**
766
+ * Boost confidence
767
+ */
768
+ function autoBoostConfidence(memoryId, boost = 0.1) {
769
+ const database = getDb();
770
+ database.prepare(`
771
+ UPDATE memories
772
+ SET confidence = MIN(0.9, confidence + ?),
773
+ last_accessed_at = CURRENT_TIMESTAMP,
774
+ access_count = access_count + 1
775
+ WHERE id = ?
776
+ `).run(boost, memoryId);
777
+ }
778
+
779
+ /**
780
+ * Mark memories as used
781
+ */
782
+ function markMemoriesUsed(memoryIds) {
783
+ if (!memoryIds || memoryIds.length === 0) return;
784
+
785
+ const database = getDb();
786
+ const placeholders = memoryIds.map(() => '?').join(',');
787
+
788
+ database.prepare(`
789
+ UPDATE memories
790
+ SET last_accessed_at = CURRENT_TIMESTAMP,
791
+ access_count = access_count + 1
792
+ WHERE id IN (${placeholders})
793
+ `).run(...memoryIds);
794
+ }
795
+
796
+ /**
797
+ * Delete a memory
798
+ */
799
+ function deleteMemory(memoryId) {
800
+ const database = getDb();
801
+ try {
802
+ database.prepare('DELETE FROM memories_vec WHERE rowid = ?').run(BigInt(memoryId));
803
+ } catch (e) { /* vector may not exist */ }
804
+ ftsDelete(memoryId);
805
+ database.prepare('DELETE FROM memories WHERE id = ?').run(memoryId);
806
+ }
807
+
808
+ /**
809
+ * Validate a memory (update confidence)
810
+ */
811
+ function validateMemory(memoryId, isValid) {
812
+ const database = getDb();
813
+ const delta = isValid ? 0.1 : -0.05;
814
+
815
+ database.prepare(`
816
+ UPDATE memories
817
+ SET confidence = MAX(0.3, MIN(0.9, confidence + ?)),
818
+ evidence_count = evidence_count + 1,
819
+ updated_at = CURRENT_TIMESTAMP
820
+ WHERE id = ?
821
+ `).run(delta, memoryId);
822
+ }
823
+
824
+ // ============== Clustering and Evolution ==============
825
+
826
+ /**
827
+ * Automatically create new clusters for uncategorized memories
828
+ * @param {object} options - Configuration options
829
+ * - domain: Specific domain, null for all
830
+ * - minConfidence: Minimum confidence (default 0.5)
831
+ * - minClusterSize: Minimum cluster size (default 2)
832
+ * - similarityThreshold: Similarity threshold (default 0.70)
833
+ * - hoursBack: Only process memories from the last N hours (default null for unlimited)
834
+ * @returns {array} Information about newly created clusters
835
+ */
836
+ async function autoCluster(options = {}) {
837
+ const {
838
+ domain = null,
839
+ minConfidence = 0.5,
840
+ minClusterSize = 2,
841
+ similarityThreshold = CLUSTER_SIMILARITY_THRESHOLD,
842
+ hoursBack = null
843
+ } = options;
844
+
845
+ const database = getDb();
846
+
847
+ // Build query conditions
848
+ let whereClause = 'cluster_id IS NULL AND confidence >= ?';
849
+ const params = [minConfidence];
850
+
851
+ if (domain) {
852
+ whereClause += ' AND domain = ?';
853
+ params.push(domain);
854
+ }
855
+
856
+ if (hoursBack) {
857
+ whereClause += ` AND created_at > datetime('now', '-${parseInt(hoursBack)} hours')`;
858
+ }
859
+
860
+ // Get unclustered memories
861
+ const unclustered = database.prepare(`
862
+ SELECT id, content, summary, confidence, domain
863
+ FROM memories
864
+ WHERE ${whereClause}
865
+ ORDER BY confidence DESC
866
+ LIMIT 100
867
+ `).all(...params);
868
+
869
+ if (unclustered.length < minClusterSize) {
870
+ return [];
871
+ }
872
+
873
+ // Get vectors (using vec_to_json to read sqlite-vec binary format)
874
+ const vectors = [];
875
+ for (const m of unclustered) {
876
+ try {
877
+ const vec = database.prepare(
878
+ 'SELECT vec_to_json(embedding) as json_vec FROM memories_vec WHERE rowid = ?'
879
+ ).get(m.id);
880
+ if (vec && vec.json_vec) {
881
+ vectors.push({ id: m.id, memory: m, vector: JSON.parse(vec.json_vec) });
882
+ }
883
+ } catch (e) {
884
+ // Skip memories without vectors
885
+ }
886
+ }
887
+
888
+ if (vectors.length < minClusterSize) {
889
+ return [];
890
+ }
891
+
892
+ // Group by domain
893
+ const byDomain = {};
894
+ for (const v of vectors) {
895
+ const d = v.memory.domain;
896
+ if (!byDomain[d]) byDomain[d] = [];
897
+ byDomain[d].push(v);
898
+ }
899
+
900
+ const createdClusters = [];
901
+
902
+ // Greedy clustering for each domain
903
+ for (const [domainName, domainVectors] of Object.entries(byDomain)) {
904
+ if (domainVectors.length < minClusterSize) continue;
905
+
906
+ const used = new Set();
907
+ const newClusters = [];
908
+
909
+ for (let i = 0; i < domainVectors.length; i++) {
910
+ if (used.has(i)) continue;
911
+
912
+ const cluster = [domainVectors[i]];
913
+ used.add(i);
914
+
915
+ for (let j = i + 1; j < domainVectors.length; j++) {
916
+ if (used.has(j)) continue;
917
+
918
+ const similarity = cosineSimilarity(domainVectors[i].vector, domainVectors[j].vector);
919
+
920
+ if (similarity >= similarityThreshold) {
921
+ cluster.push(domainVectors[j]);
922
+ used.add(j);
923
+ }
924
+ }
925
+
926
+ if (cluster.length >= minClusterSize) {
927
+ newClusters.push(cluster);
928
+ }
929
+ }
930
+
931
+ // Create new clusters
932
+ for (const cluster of newClusters) {
933
+ const theme = inferClusterTheme(cluster.map(c => c.memory));
934
+
935
+ // Calculate average vector
936
+ const avgVector = cluster[0].vector.map((_, i) =>
937
+ cluster.reduce((sum, c) => sum + c.vector[i], 0) / cluster.length
938
+ );
939
+
940
+ const avgConfidence = cluster.reduce((sum, c) => sum + c.memory.confidence, 0) / cluster.length;
941
+
942
+ // Determine whether to mark as mature directly
943
+ const status = (cluster.length >= CLUSTER_MATURITY_COUNT && avgConfidence >= CLUSTER_MATURITY_CONFIDENCE)
944
+ ? 'mature'
945
+ : 'growing';
946
+
947
+ // Insert cluster
948
+ const result = database.prepare(`
949
+ INSERT INTO clusters (theme, centroid_id, centroid_vector, member_count, avg_confidence, domain, status)
950
+ VALUES (?, ?, ?, ?, ?, ?, ?)
951
+ `).run(
952
+ theme,
953
+ cluster[0].id,
954
+ JSON.stringify(avgVector),
955
+ cluster.length,
956
+ avgConfidence,
957
+ domainName,
958
+ status
959
+ );
960
+
961
+ const clusterId = Number(result.lastInsertRowid);
962
+
963
+ // Update cluster_id for memories
964
+ for (const c of cluster) {
965
+ database.prepare('UPDATE memories SET cluster_id = ? WHERE id = ?').run(clusterId, c.id);
966
+ }
967
+
968
+ createdClusters.push({
969
+ id: clusterId,
970
+ theme,
971
+ domain: domainName,
972
+ memberCount: cluster.length,
973
+ avgConfidence,
974
+ status,
975
+ memberIds: cluster.map(c => c.id)
976
+ });
977
+ }
978
+ }
979
+
980
+ return createdClusters;
981
+ }
982
+
983
+ /**
984
+ * Infer cluster theme from memory contents
985
+ */
986
+ function inferClusterTheme(memories) {
987
+ const wordCount = {};
988
+ for (const m of memories) {
989
+ const words = (m.content || '')
990
+ .replace(/[^\w\s\u4e00-\u9fff]/g, ' ')
991
+ .split(/\s+/)
992
+ .filter(w => w.length > 2 && !STOPWORDS.has(w.toLowerCase()));
993
+
994
+ for (const w of words) {
995
+ wordCount[w] = (wordCount[w] || 0) + 1;
996
+ }
997
+ }
998
+
999
+ return Object.entries(wordCount)
1000
+ .sort((a, b) => b[1] - a[1])
1001
+ .slice(0, 3)
1002
+ .map(([word]) => word)
1003
+ .join('-') || 'general-pattern';
1004
+ }
1005
+
1006
+ /**
1007
+ * Get mature clusters
1008
+ */
1009
+ function getMatureClusters() {
1010
+ const database = getDb();
1011
+ return database.prepare(`
1012
+ SELECT * FROM clusters WHERE status = 'mature'
1013
+ `).all();
1014
+ }
1015
+
1016
+ /**
1017
+ * [v5.5] Merge memories in a mature cluster into a single high-confidence memory
1018
+ * @param {number} clusterId - Cluster ID
1019
+ * @returns {object|null} Merged memory information
1020
+ */
1021
+ async function mergeClusterMemories(clusterId) {
1022
+ const database = getDb();
1023
+
1024
+ const cluster = database.prepare('SELECT * FROM clusters WHERE id = ?').get(clusterId);
1025
+ if (!cluster || cluster.status !== 'mature') return null;
1026
+
1027
+ const members = database.prepare(`
1028
+ SELECT * FROM memories WHERE cluster_id = ? ORDER BY confidence DESC
1029
+ `).all(clusterId);
1030
+
1031
+ if (members.length < 2) return null; // Need at least 2 to make merging meaningful
1032
+
1033
+ // Collect all memory contents (prefer structured_content, fall back to content)
1034
+ const memoryTexts = members.map(m => m.structured_content || m.content);
1035
+
1036
+ // Determine primary type and domain
1037
+ const typeCounts = {};
1038
+ members.forEach(m => { typeCounts[m.type] = (typeCounts[m.type] || 0) + 1; });
1039
+ const mainType = Object.entries(typeCounts).sort((a, b) => b[1] - a[1])[0][0];
1040
+ const domain = cluster.domain || 'general';
1041
+
1042
+ // Call LLM for merging
1043
+ let mergedContent = null;
1044
+ let structuredContent = null;
1045
+ try {
1046
+ const llmClient = require('./llm-client');
1047
+ if (await llmClient.isAvailable()) {
1048
+ mergedContent = await llmClient.merge(memoryTexts, domain);
1049
+ }
1050
+ } catch (e) {
1051
+ // LLM not available, fall back
1052
+ }
1053
+
1054
+ if (mergedContent) {
1055
+ // [v6.1] LLM merge: supports direct XML return or legacy format object
1056
+ let content;
1057
+ if (typeof mergedContent === 'string' && mergedContent.startsWith('<memory')) {
1058
+ // LLM returned XML directly
1059
+ structuredContent = mergedContent;
1060
+ content = cluster.theme;
1061
+ } else {
1062
+ content = mergedContent.content || mergedContent.summary || cluster.theme;
1063
+ structuredContent = formatStructuredContent(mergedContent, mainType, domain);
1064
+ }
1065
+
1066
+ const summary = (typeof mergedContent === 'string') ? cluster.theme : (mergedContent.summary || cluster.theme);
1067
+ const mergedKeywords = extractKeywords(content).join(',');
1068
+
1069
+ // Generate embedding before transaction (async, cannot run inside better-sqlite3 transaction)
1070
+ const embeddingText = buildEmbeddingText(structuredContent || content, domain);
1071
+ const embedding = await getEmbedding(embeddingText);
1072
+
1073
+ // All DB writes in a single transaction
1074
+ const mergeResult = database.transaction(() => {
1075
+ const result = database.prepare(`
1076
+ INSERT INTO memories (content, structured_content, summary, type, domain, confidence, source, trigger, keywords)
1077
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
1078
+ `).run(
1079
+ content,
1080
+ structuredContent,
1081
+ summary,
1082
+ mainType,
1083
+ domain,
1084
+ 0.9,
1085
+ 'cluster-merge',
1086
+ `merged from cluster #${clusterId} (${members.length} memories)`,
1087
+ (typeof mergedContent === 'object' && mergedContent.triggers) ? mergedContent.triggers.join(',') : ''
1088
+ );
1089
+
1090
+ const newMemoryId = Number(result.lastInsertRowid);
1091
+ ftsInsert(newMemoryId, content, structuredContent, summary, '', mergedKeywords);
1092
+
1093
+ if (embedding) {
1094
+ try {
1095
+ database.prepare(`
1096
+ INSERT INTO memories_vec (rowid, embedding)
1097
+ VALUES (?, ?)
1098
+ `).run(BigInt(newMemoryId), JSON.stringify(embedding));
1099
+ } catch (e) {
1100
+ // Vector insert failed, does not affect main flow
1101
+ }
1102
+ }
1103
+
1104
+ // Delete original memories and their vectors
1105
+ for (const m of members) {
1106
+ try {
1107
+ database.prepare('DELETE FROM memories_vec WHERE rowid = ?').run(BigInt(m.id));
1108
+ } catch (e) { /* ignore */ }
1109
+ ftsDelete(m.id);
1110
+ database.prepare('DELETE FROM memories WHERE id = ?').run(m.id);
1111
+ }
1112
+
1113
+ // Update cluster status
1114
+ database.prepare(`
1115
+ UPDATE clusters SET status = 'merged', evolved_at = CURRENT_TIMESTAMP WHERE id = ?
1116
+ `).run(clusterId);
1117
+
1118
+ return { memoryId: newMemoryId, summary, memberCount: members.length };
1119
+ })();
1120
+
1121
+ return mergeResult;
1122
+ }
1123
+
1124
+ // LLM not available: simple concatenation fallback
1125
+ const fallbackContent = memoryTexts.join('\n---\n');
1126
+ const fallbackKeywords = extractKeywords(fallbackContent).join(',');
1127
+
1128
+ const fallbackResult = database.transaction(() => {
1129
+ const result = database.prepare(`
1130
+ INSERT INTO memories (content, summary, type, domain, confidence, source, trigger)
1131
+ VALUES (?, ?, ?, ?, ?, ?, ?)
1132
+ `).run(fallbackContent, cluster.theme, mainType, domain, 0.85, 'cluster-merge', `fallback merge from cluster #${clusterId}`);
1133
+
1134
+ const newMemoryId = Number(result.lastInsertRowid);
1135
+ ftsInsert(newMemoryId, fallbackContent, null, cluster.theme, '', fallbackKeywords);
1136
+
1137
+ for (const m of members) {
1138
+ try {
1139
+ database.prepare('DELETE FROM memories_vec WHERE rowid = ?').run(BigInt(m.id));
1140
+ } catch (e) { /* ignore */ }
1141
+ ftsDelete(m.id);
1142
+ database.prepare('DELETE FROM memories WHERE id = ?').run(m.id);
1143
+ }
1144
+
1145
+ database.prepare(`
1146
+ UPDATE clusters SET status = 'merged', evolved_at = CURRENT_TIMESTAMP WHERE id = ?
1147
+ `).run(clusterId);
1148
+
1149
+ return { memoryId: newMemoryId, summary: cluster.theme, memberCount: members.length };
1150
+ })();
1151
+
1152
+ return fallbackResult;
1153
+ }
1154
+
1155
+ // ============== Utility Functions ==============
1156
+
1157
+ function extractKeywords(text) {
1158
+ const words = text
1159
+ .replace(/[^\w\s\u4e00-\u9fff]/g, ' ')
1160
+ .split(/\s+/)
1161
+ .filter(w => w.length > 1 && !STOPWORDS.has(w.toLowerCase()));
1162
+
1163
+ // Count word frequency
1164
+ const freq = {};
1165
+ for (const w of words) {
1166
+ freq[w] = (freq[w] || 0) + 1;
1167
+ }
1168
+
1169
+ return Object.entries(freq)
1170
+ .sort((a, b) => b[1] - a[1])
1171
+ .slice(0, 10)
1172
+ .map(([word]) => word);
1173
+ }
1174
+
1175
+ function textSimilarity(a, b) {
1176
+ const wordsA = new Set(a.toLowerCase().split(/\s+/));
1177
+ const wordsB = new Set(b.toLowerCase().split(/\s+/));
1178
+ const intersection = new Set([...wordsA].filter(x => wordsB.has(x)));
1179
+ const union = new Set([...wordsA, ...wordsB]);
1180
+ return intersection.size / union.size;
1181
+ }
1182
+
1183
+ function cosineSimilarity(a, b) {
1184
+ if (!a || !b || a.length !== b.length) return 0;
1185
+
1186
+ let dot = 0, normA = 0, normB = 0;
1187
+ for (let i = 0; i < a.length; i++) {
1188
+ dot += a[i] * b[i];
1189
+ normA += a[i] * a[i];
1190
+ normB += b[i] * b[i];
1191
+ }
1192
+
1193
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
1194
+ return denom === 0 ? 0 : dot / denom;
1195
+ }
1196
+
1197
+ /**
1198
+ * Get statistics
1199
+ */
1200
+ function getStats() {
1201
+ const database = getDb();
1202
+
1203
+ const totalMemories = database.prepare('SELECT COUNT(*) as count FROM memories').get().count;
1204
+ const byType = database.prepare('SELECT type, COUNT(*) as count FROM memories GROUP BY type').all();
1205
+ const byDomain = database.prepare('SELECT domain, COUNT(*) as count FROM memories GROUP BY domain').all();
1206
+ const totalClusters = database.prepare('SELECT COUNT(*) as count FROM clusters').get().count;
1207
+ const matureClusters = database.prepare("SELECT COUNT(*) as count FROM clusters WHERE status = 'mature'").get().count;
1208
+
1209
+ return {
1210
+ totalMemories,
1211
+ byType: Object.fromEntries(byType.map(r => [r.type, r.count])),
1212
+ byDomain: Object.fromEntries(byDomain.map(r => [r.domain, r.count])),
1213
+ totalClusters,
1214
+ matureClusters,
1215
+ version: '6.1'
1216
+ };
1217
+ }
1218
+
1219
+ /**
1220
+ * Rebuild embeddings for all memories (using structured_content + domain)
1221
+ */
1222
+ async function rebuildAllEmbeddings() {
1223
+ const database = getDb();
1224
+ const rows = database.prepare('SELECT id, content, structured_content, domain FROM memories').all();
1225
+ console.log(`[memory-db] Rebuilding embeddings for ${rows.length} memories...`);
1226
+
1227
+ // Rebuild vector table (drop old table, recreate with cosine distance)
1228
+ try {
1229
+ database.exec('DROP TABLE IF EXISTS memories_vec');
1230
+ database.exec(`
1231
+ CREATE VIRTUAL TABLE memories_vec USING vec0(
1232
+ embedding float[${config.embedding.dimensions}] distance_metric=cosine
1233
+ )
1234
+ `);
1235
+ } catch (e) {
1236
+ console.error('[memory-db] Failed to recreate memories_vec:', e.message);
1237
+ return { success: false, error: e.message };
1238
+ }
1239
+
1240
+ let success = 0;
1241
+ let failed = 0;
1242
+ for (const row of rows) {
1243
+ const text = buildEmbeddingText(row.structured_content || row.content, row.domain);
1244
+ const embedding = await getEmbedding(text);
1245
+ if (embedding) {
1246
+ try {
1247
+ database.prepare('INSERT INTO memories_vec (rowid, embedding) VALUES (?, ?)').run(BigInt(row.id), JSON.stringify(embedding));
1248
+ success++;
1249
+ } catch (e) {
1250
+ failed++;
1251
+ }
1252
+ } else {
1253
+ failed++;
1254
+ }
1255
+ if ((success + failed) % 10 === 0) {
1256
+ console.log(`[memory-db] Progress: ${success + failed}/${rows.length}`);
1257
+ }
1258
+ }
1259
+
1260
+ console.log(`[memory-db] Rebuild complete: ${success} success, ${failed} failed`);
1261
+ return { success: true, rebuilt: success, failed };
1262
+ }
1263
+
1264
+ // ============== Exports ==============
1265
+
1266
+ module.exports = {
1267
+ // Database
1268
+ getDb,
1269
+ closeDb,
1270
+
1271
+ // Core functions
1272
+ save,
1273
+ search,
1274
+ quickSearch,
1275
+
1276
+ // Confidence management
1277
+ autoBoostConfidence,
1278
+ markMemoriesUsed,
1279
+ deleteMemory,
1280
+ validateMemory,
1281
+
1282
+ // Clustering and merging
1283
+ autoCluster,
1284
+ inferClusterTheme,
1285
+ tryJoinCluster,
1286
+ getMatureClusters,
1287
+ mergeClusterMemories,
1288
+
1289
+ // Embeddings
1290
+ getEmbedding,
1291
+ warmupEmbedding,
1292
+ buildEmbeddingText,
1293
+ rebuildAllEmbeddings,
1294
+
1295
+ // [v4.5] LLM structuring
1296
+ structurizeWithLLM,
1297
+ formatStructuredContent,
1298
+
1299
+ // Utilities
1300
+ tokenize,
1301
+ extractKeywords,
1302
+ cosineSimilarity,
1303
+ getStats,
1304
+
1305
+ // Configuration
1306
+ CLUSTER_SIMILARITY_THRESHOLD,
1307
+ CLUSTER_MATURITY_COUNT,
1308
+ STRUCTURIZE_CONFIG,
1309
+ CLUSTER_MATURITY_CONFIDENCE
1310
+ };