claude-mem-lite 2.87.0 → 2.89.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/server.mjs CHANGED
@@ -5,13 +5,18 @@
5
5
  import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
6
6
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
7
7
  import { ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
8
- import { jaccardSimilarity, truncate, typeIcon, sanitizeFtsQuery, relaxFtsQueryToOr, inferProject, computeMinHash, estimateJaccardFromMinHash, scrubSecrets, cjkBigrams, fmtDate, isoWeekKey, debugLog, debugCatch, COMPRESSED_PENDING_PURGE, SESS_BM25, DEFAULT_DECAY_HALF_LIFE_MS, isPathConfined, notLowSignalTitleClause } from './utils.mjs';
8
+ import { truncate, typeIcon, sanitizeFtsQuery, relaxFtsQueryToOr, inferProject, scrubSecrets, cjkBigrams, fmtDate, debugLog, debugCatch, SESS_BM25, DEFAULT_DECAY_HALF_LIFE_MS, isPathConfined, notLowSignalTitleClause } from './utils.mjs';
9
9
  import { extractCjkLikePatterns, cjkPrecisionOk } from './nlp.mjs';
10
10
  import { resolveProject as _resolveProjectShared } from './project-utils.mjs';
11
11
  import { ensureDb, DB_PATH, REGISTRY_DB_PATH } from './schema.mjs';
12
12
  import { reRankWithContext, markSuperseded, autoBoostIfNeeded, runIdleCleanup, buildServerInstructions } from './server-internals.mjs';
13
13
  import { searchObservationsHybrid, findFtsAnchor } from './search-engine.mjs';
14
- import { scrubRecord } from './lib/scrub-record.mjs';
14
+ import { selectCompressionCandidates, groupByProjectWeek, compressGroup } from './lib/compress-core.mjs';
15
+ import {
16
+ cleanupBroken, decayAndMarkIdle, boostAccessed, demotePinned, mergeDuplicates,
17
+ purgeStale, purgeStalePreview, findDuplicates, maintenanceStats, rebuildVectors, vacuum,
18
+ OP_CAP, STALE_AGE_MS,
19
+ } from './lib/maintain-core.mjs';
15
20
  import { effectiveQuiet, RUNTIME_DIR } from './hook-shared.mjs';
16
21
  import { computeTier, TIER_CASE_SQL, tierSqlParams } from './tier.mjs';
17
22
  import { memSearchSchema, memRecentSchema, memTimelineSchema, memGetSchema, memDeleteSchema, memSaveSchema, memStatsSchema, memCompressSchema, memMaintainSchema, memOptimizeSchema, memUpdateSchema, memExportSchema, memRecallSchema, memFtsCheckSchema, memRegistrySchema, memBrowseSchema, memUseSchema, memDeferSchema, memDeferListSchema, memDeferDropSchema, tools as TOOL_DEFS } from './tool-schemas.mjs';
@@ -31,11 +36,12 @@ import { ensureRegistryDb, upsertResource } from './registry.mjs';
31
36
  import { searchResources } from './registry-retriever.mjs';
32
37
  import { probeOtherSources as probeIdSources, parseIdToken, bucketIdTokens } from './lib/id-routing.mjs';
33
38
  import { saveObservation } from './lib/save-observation.mjs';
39
+ import { AUTO_MERGE_THRESHOLD } from './lib/dedup-constants.mjs';
34
40
  import {
35
41
  insertDeferred, listOpenWithOrdinal, dropDeferred,
36
42
  resolveDeferredIds, closeDeferredItems,
37
43
  } from './lib/deferred-work.mjs';
38
- import { getVocabulary, rebuildVocabulary, _resetVocabCache, computeVector } from './tfidf.mjs';
44
+ import { getVocabulary, _resetVocabCache, computeVector } from './tfidf.mjs';
39
45
  import { createRequire } from 'module';
40
46
 
41
47
  const require = createRequire(import.meta.url);
@@ -1168,35 +1174,13 @@ server.registerTool(
1168
1174
  const preview = args.preview !== false;
1169
1175
  const ageDays = args.age_days ?? 30;
1170
1176
  const cutoff = Date.now() - ageDays * 86400000;
1171
- const projectFilter = args.project ? 'AND project = ?' : '';
1172
- const baseParams = args.project ? [args.project] : [];
1173
-
1174
- // Find low-value candidates: importance=1, never accessed, old, not already compressed
1175
- const candidates = db.prepare(`
1176
- SELECT id, project, type, title, created_at, created_at_epoch
1177
- FROM observations
1178
- WHERE COALESCE(importance, 1) = 1
1179
- AND COALESCE(access_count, 0) = 0
1180
- AND created_at_epoch < ?
1181
- AND compressed_into IS NULL
1182
- ${projectFilter}
1183
- ORDER BY project, created_at_epoch
1184
- `).all(cutoff, ...baseParams);
1177
+ const candidates = selectCompressionCandidates(db, { cutoff, project: args.project || null });
1185
1178
 
1186
1179
  if (candidates.length === 0) {
1187
1180
  return { content: [{ type: 'text', text: 'No candidates for compression.' }] };
1188
1181
  }
1189
1182
 
1190
- // Group by project + ISO week
1191
- const groups = new Map();
1192
- for (const c of candidates) {
1193
- const key = `${c.project}::${isoWeekKey(c.created_at_epoch)}`;
1194
- if (!groups.has(key)) groups.set(key, []);
1195
- groups.get(key).push(c);
1196
- }
1197
-
1198
- // Filter groups with < 3 observations (not worth compressing)
1199
- const compressableGroups = [...groups.entries()].filter(([, obs]) => obs.length >= 3);
1183
+ const compressableGroups = groupByProjectWeek(candidates);
1200
1184
 
1201
1185
  if (preview) {
1202
1186
  const totalCandidates = compressableGroups.reduce((s, [, obs]) => s + obs.length, 0);
@@ -1220,49 +1204,12 @@ server.registerTool(
1220
1204
  return { content: [{ type: 'text', text: lines.join('\n') }] };
1221
1205
  }
1222
1206
 
1223
- // Execute compression
1207
+ // Execute compression — one transaction over all groups (the hook transacts per group).
1224
1208
  let totalCompressed = 0;
1225
- const insertSummary = db.prepare(`
1226
- INSERT INTO observations (memory_session_id, project, text, type, title, subtitle, narrative, concepts, facts, files_read, files_modified, importance, created_at, created_at_epoch)
1227
- VALUES (?, ?, ?, ?, ?, '', ?, '', '', '[]', '[]', 2, ?, ?)
1228
- `);
1229
1209
  const compress = db.transaction(() => {
1230
1210
  for (const [key, obs] of compressableGroups) {
1231
1211
  const [proj] = key.split('::');
1232
- const types = {};
1233
- for (const o of obs) types[o.type] = (types[o.type] || 0) + 1;
1234
- const dominantType = Object.entries(types).sort((a, b) => b[1] - a[1])[0][0];
1235
- const title = `Weekly summary: ${obs.length} ${dominantType} observations`;
1236
- const narrative = obs.map(o => `- ${o.title || '(untitled)'}`).join('\n');
1237
- const sessionId = obs[0].project ? `compress-${obs[0].project}` : 'compress-manual';
1238
-
1239
- // Use median timestamp of compressed observations instead of now,
1240
- // so the summary appears at the correct position in timeline/recency scoring.
1241
- const sortedEpochs = obs.map(o => o.created_at_epoch).sort((a, b) => a - b);
1242
- const medianEpoch = sortedEpochs[Math.floor(sortedEpochs.length / 2)];
1243
- const medianDate = new Date(medianEpoch);
1244
-
1245
- // Ensure session exists (INSERT OR IGNORE avoids race condition)
1246
- const now = new Date();
1247
- db.prepare(`
1248
- INSERT OR IGNORE INTO sdk_sessions (content_session_id, memory_session_id, project, started_at, started_at_epoch, status)
1249
- VALUES (?, ?, ?, ?, ?, 'active')
1250
- `).run(sessionId, sessionId, proj, now.toISOString(), now.getTime());
1251
-
1252
- // Defense-in-depth: source rows already scrubbed at original ingest,
1253
- // but the new compressed narrative is constructed here and re-persisted.
1254
- const safe = scrubRecord('observations', { text: narrative, title, narrative });
1255
- const summaryResult = insertSummary.run(
1256
- sessionId, proj, safe.text, dominantType, safe.title, safe.narrative,
1257
- medianDate.toISOString(), medianEpoch
1258
- );
1259
- const summaryId = Number(summaryResult.lastInsertRowid);
1260
-
1261
- // Batch UPDATE instead of per-row loop
1262
- const obsIds = obs.map(o => o.id);
1263
- const obsPh = obsIds.map(() => '?').join(',');
1264
- db.prepare(`UPDATE observations SET compressed_into = ? WHERE id IN (${obsPh})`).run(summaryId, ...obsIds);
1265
- totalCompressed += obs.length;
1212
+ totalCompressed += compressGroup(db, proj, obs).compressed;
1266
1213
  }
1267
1214
  });
1268
1215
  compress();
@@ -1281,10 +1228,6 @@ server.registerTool(
1281
1228
  },
1282
1229
  safeHandler(async (args) => {
1283
1230
  if (args.project) args = { ...args, project: resolveProject(args.project) };
1284
- const STALE_AGE_MS = 30 * 86400000;
1285
- const SIMILARITY_THRESHOLD = 0.7;
1286
- const SCAN_LIMIT = 500;
1287
- const DUPLICATE_LIMIT = 50;
1288
1231
  const DUPLICATE_DISPLAY = 15;
1289
1232
 
1290
1233
  const action = args.action;
@@ -1293,56 +1236,10 @@ server.registerTool(
1293
1236
  const baseParams = project ? [project] : [];
1294
1237
 
1295
1238
  if (action === 'scan') {
1296
- // 1. Find near-duplicate titles (MinHash pre-filter → exact Jaccard on candidates)
1297
- const recent = db.prepare(`
1298
- SELECT id, title, project, importance, access_count, created_at_epoch
1299
- FROM observations
1300
- WHERE COALESCE(compressed_into, 0) = 0 ${projectFilter}
1301
- ORDER BY created_at_epoch DESC
1302
- LIMIT ${SCAN_LIMIT}
1303
- `).all(...baseParams);
1304
-
1305
- const titles = recent.map(r => (r.title || '').trim());
1306
- const minhashes = titles.map(t => t ? computeMinHash(t) : null);
1307
- const MINHASH_PRE_THRESHOLD = 0.5; // loose pre-filter to catch candidates
1308
- const duplicates = [];
1309
- for (let i = 0; i < recent.length && duplicates.length < DUPLICATE_LIMIT; i++) {
1310
- if (!titles[i] || !minhashes[i]) continue;
1311
- for (let j = i + 1; j < recent.length; j++) {
1312
- if (!titles[j] || !minhashes[j]) continue;
1313
- // Fast MinHash estimate to skip obvious non-matches
1314
- if (estimateJaccardFromMinHash(minhashes[i], minhashes[j]) < MINHASH_PRE_THRESHOLD) continue;
1315
- const sim = jaccardSimilarity(titles[i], titles[j]);
1316
- if (sim > SIMILARITY_THRESHOLD) {
1317
- duplicates.push({
1318
- a: { id: recent[i].id, title: recent[i].title, importance: recent[i].importance },
1319
- b: { id: recent[j].id, title: recent[j].title, importance: recent[j].importance },
1320
- similarity: sim.toFixed(2),
1321
- });
1322
- }
1323
- if (duplicates.length >= DUPLICATE_LIMIT) break;
1324
- }
1325
- }
1326
-
1327
- // 2. Consolidated stats query (single table scan instead of 4 separate COUNTs)
1328
1239
  const staleAge = Date.now() - STALE_AGE_MS;
1329
- const stats = db.prepare(`
1330
- SELECT
1331
- COUNT(*) as total,
1332
- COALESCE(SUM(CASE WHEN COALESCE(importance, 1) = 1 AND COALESCE(access_count, 0) = 0
1333
- AND created_at_epoch < ? THEN 1 ELSE 0 END), 0) as stale,
1334
- COALESCE(SUM(CASE WHEN (title IS NULL OR title = '') AND (narrative IS NULL OR narrative = '')
1335
- THEN 1 ELSE 0 END), 0) as broken,
1336
- COALESCE(SUM(CASE WHEN COALESCE(access_count, 0) > 3 AND COALESCE(importance, 1) < 3
1337
- THEN 1 ELSE 0 END), 0) as boostable
1338
- FROM observations
1339
- WHERE COALESCE(compressed_into, 0) = 0 ${projectFilter}
1340
- `).get(staleAge, ...baseParams);
1341
-
1342
- // Count pending-purge items (marked by idle cleanup)
1343
- const pendingPurge = db.prepare(`
1344
- SELECT COUNT(*) as count FROM observations WHERE compressed_into = ${COMPRESSED_PENDING_PURGE} ${projectFilter}
1345
- `).get(...baseParams);
1240
+ const mctx = { projectFilter, baseParams, staleAge };
1241
+ const duplicates = findDuplicates(db, mctx);
1242
+ const stats = maintenanceStats(db, mctx);
1346
1243
 
1347
1244
  const lines = [
1348
1245
  `Memory maintenance scan:`,
@@ -1351,10 +1248,9 @@ server.registerTool(
1351
1248
  ` Stale (>30d, imp=1, no access): ${stats.stale}`,
1352
1249
  ` Broken (no title/narrative): ${stats.broken}`,
1353
1250
  ` Boostable (accessed>3, imp<3): ${stats.boostable}`,
1354
- ` Pending purge (idle-marked): ${pendingPurge.count}`,
1251
+ ` Pending purge (idle-marked): ${stats.pendingPurge}`,
1355
1252
  ];
1356
1253
  if (duplicates.length > 0) {
1357
- const AUTO_MERGE_THRESHOLD = 0.85;
1358
1254
  const autoMergeable = duplicates.filter(d => parseFloat(d.similarity) >= AUTO_MERGE_THRESHOLD);
1359
1255
  const manualReview = duplicates.filter(d => parseFloat(d.similarity) < AUTO_MERGE_THRESHOLD);
1360
1256
 
@@ -1396,7 +1292,7 @@ server.registerTool(
1396
1292
  }
1397
1293
  const results = [];
1398
1294
  const staleAge = Date.now() - STALE_AGE_MS;
1399
- const OP_ROW_CAP = 1000; // safety cap per operation
1295
+ const mctx = { projectFilter, baseParams, staleAge, opCap: OP_CAP };
1400
1296
 
1401
1297
  // T2-P0-A: purge_stale is the only DELETE in this handler. Require confirm=true;
1402
1298
  // a first call without confirm returns a dry-run preview so callers know the blast radius.
@@ -1404,11 +1300,7 @@ server.registerTool(
1404
1300
  if (purgeRequested && args.confirm !== true) {
1405
1301
  const retainDays = args.retain_days ?? 30;
1406
1302
  const retainCutoff = Date.now() - retainDays * 86400000;
1407
- const previewRow = db.prepare(`
1408
- SELECT COUNT(*) AS candidates, MIN(created_at_epoch) AS oldest, MAX(created_at_epoch) AS newest
1409
- FROM observations
1410
- WHERE compressed_into = ${COMPRESSED_PENDING_PURGE} AND created_at_epoch < ? ${projectFilter}
1411
- `).get(retainCutoff, ...baseParams);
1303
+ const previewRow = purgeStalePreview(db, mctx, retainCutoff);
1412
1304
  const lines = [
1413
1305
  'purge_stale preview (confirm=false):',
1414
1306
  ` Candidates (pending-purge, older than ${retainDays}d): ${previewRow.candidates}`,
@@ -1425,99 +1317,29 @@ server.registerTool(
1425
1317
 
1426
1318
  db.transaction(() => {
1427
1319
  if (ops.includes('cleanup')) {
1428
- const deleted = db.prepare(`
1429
- DELETE FROM observations
1430
- WHERE id IN (
1431
- SELECT id FROM observations
1432
- WHERE COALESCE(compressed_into, 0) = 0
1433
- AND (title IS NULL OR title = '')
1434
- AND (narrative IS NULL OR narrative = '')
1435
- ${projectFilter}
1436
- LIMIT ${OP_ROW_CAP}
1437
- )
1438
- `).run(...baseParams);
1439
- results.push(`Cleaned up ${deleted.changes} broken observations` + (deleted.changes >= OP_ROW_CAP ? ' (cap reached, re-run for more)' : ''));
1320
+ const deleted = cleanupBroken(db, mctx);
1321
+ results.push(`Cleaned up ${deleted} broken observations` + (deleted >= OP_CAP ? ' (cap reached, re-run for more)' : ''));
1440
1322
  }
1441
1323
 
1442
1324
  if (ops.includes('decay')) {
1443
- const decayed = db.prepare(`
1444
- UPDATE observations SET importance = MAX(1, COALESCE(importance, 1) - 1)
1445
- WHERE id IN (
1446
- SELECT id FROM observations
1447
- WHERE COALESCE(compressed_into, 0) = 0
1448
- AND COALESCE(importance, 1) > 1
1449
- AND COALESCE(access_count, 0) = 0
1450
- AND created_at_epoch < ?
1451
- ${projectFilter}
1452
- LIMIT ${OP_ROW_CAP}
1453
- )
1454
- `).run(staleAge, ...baseParams);
1455
-
1456
- // Mark importance=1, never-accessed, old observations as pending-purge
1457
- const idleMarked = db.prepare(`
1458
- UPDATE observations SET compressed_into = ${COMPRESSED_PENDING_PURGE}
1459
- WHERE id IN (
1460
- SELECT id FROM observations
1461
- WHERE COALESCE(compressed_into, 0) = 0
1462
- AND COALESCE(importance, 1) = 1
1463
- AND COALESCE(access_count, 0) = 0
1464
- AND created_at_epoch < ?
1465
- ${projectFilter}
1466
- LIMIT ${OP_ROW_CAP}
1467
- )
1468
- `).run(staleAge, ...baseParams);
1469
- results.push(`Decayed ${decayed.changes} stale observations, marked ${idleMarked.changes} idle as pending-purge` + ((decayed.changes >= OP_ROW_CAP || idleMarked.changes >= OP_ROW_CAP) ? ' (cap reached, re-run for more)' : ''));
1325
+ // injection_count>0 protected (maintain-core; unifies with CLI + hook —
1326
+ // the MCP copy previously lacked this clause and decayed/purged injected memories).
1327
+ const { decayed, idleMarked } = decayAndMarkIdle(db, mctx);
1328
+ results.push(`Decayed ${decayed} stale observations, marked ${idleMarked} idle as pending-purge` + ((decayed >= OP_CAP || idleMarked >= OP_CAP) ? ' (cap reached, re-run for more)' : ''));
1470
1329
  }
1471
1330
 
1472
1331
  if (ops.includes('boost')) {
1473
- const boosted = db.prepare(`
1474
- UPDATE observations SET importance = MIN(3, COALESCE(importance, 1) + 1)
1475
- WHERE id IN (
1476
- SELECT id FROM observations
1477
- WHERE COALESCE(compressed_into, 0) = 0
1478
- AND COALESCE(access_count, 0) > 3
1479
- AND COALESCE(importance, 1) < 3
1480
- ${projectFilter}
1481
- LIMIT ${OP_ROW_CAP}
1482
- )
1483
- `).run(...baseParams);
1484
- results.push(`Boosted ${boosted.changes} frequently-accessed observations` + (boosted.changes >= OP_ROW_CAP ? ' (cap reached, re-run for more)' : ''));
1332
+ const boosted = boostAccessed(db, mctx);
1333
+ results.push(`Boosted ${boosted} frequently-accessed observations` + (boosted >= OP_CAP ? ' (cap reached, re-run for more)' : ''));
1485
1334
  }
1486
1335
 
1487
1336
  if (ops.includes('demote_pinned')) {
1488
- // CLI-parity (cmdMaintain): repair the citation-decay blind spot. The
1489
- // `decay` op protects injection_count > 0, so a memory injected many
1490
- // times but never cited stays pinned at max importance and keeps
1491
- // dominating injection. Target heavy-injection + zero-citation and
1492
- // drop importance to 1 in one pass — injection priority is binary
1493
- // (importance>=2), so a 3→2 step would not de-rank it. Floor 1 (not
1494
- // purge). PINNED_INJ_THRESHOLD=8.
1495
- const demoted = db.prepare(`
1496
- UPDATE observations SET importance = 1
1497
- WHERE id IN (
1498
- SELECT id FROM observations
1499
- WHERE COALESCE(compressed_into, 0) = 0
1500
- AND COALESCE(injection_count, 0) >= 8
1501
- AND COALESCE(cited_count, 0) = 0
1502
- AND COALESCE(importance, 1) > 1
1503
- ${projectFilter}
1504
- LIMIT ${OP_ROW_CAP}
1505
- )
1506
- `).run(...baseParams);
1507
- results.push(`Demoted ${demoted.changes} pinned-but-uncited observations to importance 1 (inj>=8, cited=0)` + (demoted.changes >= OP_ROW_CAP ? ' (cap reached, re-run for more)' : ''));
1337
+ const demoted = demotePinned(db, mctx);
1338
+ results.push(`Demoted ${demoted} pinned-but-uncited observations to importance 1 (inj>=8, cited=0)` + (demoted >= OP_CAP ? ' (cap reached, re-run for more)' : ''));
1508
1339
  }
1509
1340
 
1510
1341
  if (ops.includes('dedup') && args.merge_ids) {
1511
- let totalMerged = 0;
1512
- const mergeStmt = db.prepare('UPDATE observations SET compressed_into = ? WHERE id = ? AND COALESCE(compressed_into, 0) = 0');
1513
- for (const group of args.merge_ids) {
1514
- if (group.length < 2) continue;
1515
- const [keepId, ...removeIds] = group;
1516
- for (const removeId of removeIds) {
1517
- const result = mergeStmt.run(keepId, removeId);
1518
- totalMerged += result.changes;
1519
- }
1520
- }
1342
+ const totalMerged = mergeDuplicates(db, args.merge_ids);
1521
1343
  results.push(`Merged ${totalMerged} duplicate observations`);
1522
1344
  }
1523
1345
 
@@ -1526,19 +1348,10 @@ server.registerTool(
1526
1348
  }
1527
1349
 
1528
1350
  if (ops.includes('purge_stale')) {
1529
- // Delete observations previously marked as pending-purge by idle cleanup.
1530
- // Requires user confirmation via /mem:update or /mem:mem.
1531
1351
  const retainDays = args.retain_days ?? 30;
1532
1352
  const retainCutoff = Date.now() - retainDays * 86400000;
1533
- const purged = db.prepare(`
1534
- DELETE FROM observations
1535
- WHERE id IN (
1536
- SELECT id FROM observations
1537
- WHERE compressed_into = ${COMPRESSED_PENDING_PURGE} AND created_at_epoch < ? ${projectFilter}
1538
- LIMIT ${OP_ROW_CAP}
1539
- )
1540
- `).run(retainCutoff, ...baseParams);
1541
- results.push(`Purged ${purged.changes} stale observations (retained last ${retainDays} days)` + (purged.changes >= OP_ROW_CAP ? ' (cap reached, re-run for more)' : ''));
1353
+ const purged = purgeStale(db, mctx, retainCutoff);
1354
+ results.push(`Purged ${purged} stale observations (retained last ${retainDays} days)` + (purged >= OP_CAP ? ' (cap reached, re-run for more)' : ''));
1542
1355
  }
1543
1356
  })();
1544
1357
 
@@ -1546,50 +1359,25 @@ server.registerTool(
1546
1359
  db.exec("INSERT INTO observations_fts(observations_fts) VALUES('optimize')");
1547
1360
  results.push('FTS5 index optimized');
1548
1361
 
1549
- // rebuild_vectors: outside main transaction (creates its own internal transaction)
1362
+ // rebuild_vectors: outside main transaction (maintain-core, shared with CLI).
1550
1363
  if (ops.includes('rebuild_vectors')) {
1551
1364
  try {
1552
- _resetVocabCache();
1553
- const vocab = rebuildVocabulary(db);
1554
- if (!vocab) {
1555
- results.push('Vectors: no observations to build vocabulary from');
1556
- } else {
1557
- const allObs = db.prepare(`
1558
- SELECT id, title, narrative, concepts FROM observations
1559
- WHERE COALESCE(compressed_into, 0) = 0 AND superseded_at IS NULL
1560
- `).all();
1561
- let updated = 0;
1562
- const insertStmt = db.prepare('INSERT OR REPLACE INTO observation_vectors (observation_id, vector, vocab_version, created_at_epoch) VALUES (?, ?, ?, ?)');
1563
- const now = Date.now();
1564
- db.transaction(() => {
1565
- db.prepare('DELETE FROM observation_vectors').run();
1566
- for (const obs of allObs) {
1567
- const text = [obs.title || '', obs.narrative || '', obs.concepts || ''].filter(Boolean).join(' ');
1568
- const vec = computeVector(text, vocab);
1569
- if (vec) {
1570
- insertStmt.run(obs.id, Buffer.from(vec.buffer), vocab.version, now);
1571
- updated++;
1572
- }
1573
- }
1574
- })();
1575
- results.push(`Vectors: rebuilt vocabulary (${vocab.terms.size} terms), updated ${updated}/${allObs.length} vectors`);
1576
- }
1365
+ const r = rebuildVectors(db);
1366
+ results.push(r.ok
1367
+ ? `Vectors: rebuilt vocabulary (${r.terms} terms), updated ${r.updated}/${r.total} vectors`
1368
+ : `Vectors: ${r.reason}`);
1577
1369
  } catch (e) {
1578
1370
  debugCatch(e, 'rebuild_vectors');
1579
1371
  results.push(`Vectors: rebuild failed — ${e.message}`);
1580
1372
  }
1581
1373
  }
1582
1374
 
1583
- // vacuum: reclaim freelist dead space left by DELETEs. CLI-parity
1584
- // (cmdMaintain). Must run OUTSIDE any transaction; whole-DB.
1375
+ // vacuum: reclaim freelist dead space left by DELETEs. Whole-DB, outside any
1376
+ // transaction. maintain-core, shared with CLI.
1585
1377
  if (ops.includes('vacuum')) {
1586
1378
  try {
1587
- const pageSize = db.pragma('page_size', { simple: true });
1588
- const freeBefore = db.pragma('freelist_count', { simple: true });
1589
- db.exec('VACUUM');
1590
- const freeAfter = db.pragma('freelist_count', { simple: true });
1591
- const reclaimedMB = ((Math.max(0, freeBefore - freeAfter) * pageSize) / 1048576).toFixed(1);
1592
- results.push(`VACUUM: reclaimed ~${reclaimedMB}MB (freelist ${freeBefore} → ${freeAfter} pages)`);
1379
+ const v = vacuum(db);
1380
+ results.push(`VACUUM: reclaimed ~${v.reclaimedMB}MB (freelist ${v.freeBefore} ${v.freeAfter} pages)`);
1593
1381
  } catch (e) {
1594
1382
  debugCatch(e, 'vacuum');
1595
1383
  results.push(`VACUUM failed — ${e.message}`);
package/source-files.mjs CHANGED
@@ -77,6 +77,20 @@ export const SOURCE_FILES = [
77
77
  // mem-cli.mjs::cmdSave and server.mjs::mem_save. Statically imported from both
78
78
  // entry points; missing it from the manifest broke MCP saves on auto-update.
79
79
  'lib/save-observation.mjs',
80
+ // Shared "compress old low-value observations into weekly summaries" core.
81
+ // Statically imported by mem-cli.mjs (cmdCompress), server.mjs (mem_compress),
82
+ // and hook.mjs (handleAutoCompress) — same single-source-of-truth pattern as
83
+ // save-observation.mjs; missing it from the manifest would break compress on auto-update.
84
+ 'lib/compress-core.mjs',
85
+ // Shared maintenance ops (decay/cleanup/boost/demote/dedup/purge/vacuum/rebuild).
86
+ // Statically imported by mem-cli.mjs (cmdMaintain), server.mjs (mem_maintain),
87
+ // and hook.mjs (handleAutoMaintain) — missing it would break maintain on auto-update.
88
+ 'lib/maintain-core.mjs',
89
+ // P10 dedup/merge threshold constants — single source of truth for the Jaccard
90
+ // dedup/merge cutoffs. Statically imported by hook.mjs, hook-llm.mjs,
91
+ // hook-optimize.mjs, mem-cli.mjs, server.mjs, and the save/maintain cores;
92
+ // missing it from the manifest would break those paths on auto-update.
93
+ 'lib/dedup-constants.mjs',
80
94
  // v2.70 deferred-work: carry-forward TODO primitives. Statically imported by
81
95
  // server.mjs (mem_defer family) and mem-cli.mjs (defer subcommand).
82
96
  'lib/deferred-work.mjs',
package/tfidf.mjs CHANGED
@@ -10,6 +10,10 @@ import { createHash } from 'crypto';
10
10
  export const VOCAB_DIM = 512;
11
11
  export const MIN_COSINE_SIMILARITY = 0.05;
12
12
  export const VECTOR_SCAN_LIMIT = 500;
13
+ // Reciprocal Rank Fusion constant. Higher k flattens the rank-position weighting
14
+ // (BM25 and vector lists contribute more equally); lower k lets the top few ranks
15
+ // dominate. 60 is the de-facto RRF default and balances the two retrievers here.
16
+ export const RRF_K = 60;
13
17
 
14
18
  const VOCAB_STOP_WORDS = new Set([
15
19
  ...BASE_STOP_WORDS,
@@ -192,7 +196,7 @@ export function _resetVocabCache() { _vocabCache = null; }
192
196
  * @param {object} db - better-sqlite3 database
193
197
  * @returns {{ terms: Map<string, {index: number, idf: number}>, version: string, dim: number } | null}
194
198
  */
195
- export function buildVocabulary(db) {
199
+ export function buildVocabulary(db, { dim = VOCAB_DIM } = {}) {
196
200
  const rows = db.prepare(`
197
201
  SELECT title, narrative, concepts FROM observations
198
202
  WHERE COALESCE(compressed_into, 0) = 0 AND superseded_at IS NULL
@@ -217,7 +221,7 @@ export function buildVocabulary(db) {
217
221
  .filter(([term, freq]) => !isNoiseTerm(term) && freq >= 2)
218
222
  .map(([term, freq]) => ({ term, df: freq, idf: idf(freq), ig: freq * idf(freq) }))
219
223
  .sort((a, b) => b.ig - a.ig)
220
- .slice(0, VOCAB_DIM);
224
+ .slice(0, dim);
221
225
 
222
226
  // Build terms map with index and IDF
223
227
  const terms = new Map();
@@ -229,7 +233,7 @@ export function buildVocabulary(db) {
229
233
  const termList = sortedTerms.map(e => e.term).join(',');
230
234
  const version = createHash('md5').update(termList).digest('hex').slice(0, 12);
231
235
 
232
- const vocab = { terms, version, dim: VOCAB_DIM };
236
+ const vocab = { terms, version, dim };
233
237
  _vocabCache = vocab;
234
238
  return vocab;
235
239
  }
@@ -239,8 +243,8 @@ export function buildVocabulary(db) {
239
243
  * @param {object} db - better-sqlite3 database
240
244
  * @returns {object|null} The new vocabulary
241
245
  */
242
- export function rebuildVocabulary(db) {
243
- const vocab = buildVocabulary(db);
246
+ export function rebuildVocabulary(db, opts) {
247
+ const vocab = buildVocabulary(db, opts);
244
248
  if (!vocab) return null;
245
249
 
246
250
  const insertStmt = db.prepare(
@@ -358,7 +362,7 @@ export function cosineSimilarity(a, b) {
358
362
  const VECTOR_TIME_WINDOW_MS = 90 * 24 * 60 * 60 * 1000; // 90 days
359
363
  const VECTOR_MIN_RESULTS = 50; // fallback to full scan if time-window yields fewer
360
364
 
361
- export function vectorSearch(db, queryVec, { project, type, vocabVersion, limit = VECTOR_SCAN_LIMIT }) {
365
+ export function vectorSearch(db, queryVec, { project, type, vocabVersion, limit = VECTOR_SCAN_LIMIT, minCosine = MIN_COSINE_SIMILARITY }) {
362
366
  if (!queryVec) return [];
363
367
 
364
368
  const now = Date.now();
@@ -403,7 +407,7 @@ export function vectorSearch(db, queryVec, { project, type, vocabVersion, limit
403
407
  for (const row of rows) {
404
408
  const vec = new Float32Array(row.vector.buffer.slice(row.vector.byteOffset, row.vector.byteOffset + row.vector.byteLength));
405
409
  const sim = cosineSimilarity(queryVec, vec);
406
- if (sim > MIN_COSINE_SIMILARITY) results.push({ id: row.observation_id, similarity: sim });
410
+ if (sim > minCosine) results.push({ id: row.observation_id, similarity: sim });
407
411
  }
408
412
  results.sort((a, b) => b.similarity - a.similarity);
409
413
  return results.slice(0, 20);
@@ -418,7 +422,7 @@ export function vectorSearch(db, queryVec, { project, type, vocabVersion, limit
418
422
  * @param {number} k - RRF constant (default 60)
419
423
  * @returns {{ id: number, rrfScore: number }[]}
420
424
  */
421
- export function rrfMerge(bm25Results, vectorResults, k = 60) {
425
+ export function rrfMerge(bm25Results, vectorResults, k = RRF_K) {
422
426
  const scores = new Map();
423
427
  bm25Results.forEach((r, i) => {
424
428
  scores.set(r.id, (scores.get(r.id) ?? 0) + 1 / (k + i + 1));