claude-mem-lite 3.8.0 → 3.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,7 @@
10
10
  "plugins": [
11
11
  {
12
12
  "name": "claude-mem-lite",
13
- "version": "3.8.0",
13
+ "version": "3.9.1",
14
14
  "source": "./",
15
15
  "description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark)."
16
16
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "3.8.0",
3
+ "version": "3.9.1",
4
4
  "description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark).",
5
5
  "author": {
6
6
  "name": "sdsrss"
package/hook-llm.mjs CHANGED
@@ -20,6 +20,7 @@ import {
20
20
  } from './hook-shared.mjs';
21
21
  import { EVENT_TYPES, saveEvent } from './lib/activity.mjs';
22
22
  import { isNoiseObservation, capNoiseImportance, isLowYieldChangeObs } from './lib/low-signal-patterns.mjs';
23
+ import { episodeHasSignificantContent } from './hook-episode.mjs';
23
24
 
24
25
  // T9: memdir-incompatible types live in the `events` table, not `observations`.
25
26
  // Set lookup is O(1) — authoritative source is lib/activity.mjs::EVENT_TYPES.
@@ -467,6 +468,24 @@ export function buildDegradedTitle(episode) {
467
468
  .trim();
468
469
  }
469
470
 
471
+ // Best-effort SYNCHRONOUS persist of an episode's rule-based observation. Shared by
472
+ // the normal flush and the SIGTERM/SIGINT shutdown handler. The ep-flush-* file the
473
+ // shutdown handler writes has NO consumer (only spawnBackground-passed files are
474
+ // processed), so without this the in-flight episode is silently lost on abnormal
475
+ // termination — and spawning a detached child from a dying process is unreliable, so
476
+ // the save must be synchronous (audit #6). Never throws; returns the obs id or null.
477
+ export function saveEpisodeImmediate(episode, externalDb) {
478
+ try {
479
+ if (!episode || !Array.isArray(episode.entries) || episode.entries.length === 0) return null;
480
+ if (!episodeHasSignificantContent(episode)) return null;
481
+ const obs = buildImmediateObservation(episode);
482
+ return saveObservation(obs, episode.project, episode.sessionId, externalDb) || null;
483
+ } catch (e) {
484
+ debugCatch(e, 'saveEpisodeImmediate');
485
+ return null;
486
+ }
487
+ }
488
+
470
489
  /**
471
490
  * Build a rule-based observation from episode metadata for immediate DB persistence.
472
491
  * Used as pre-save (before LLM) and as fallback when LLM is unavailable.
package/hook-update.mjs CHANGED
@@ -440,20 +440,27 @@ async function fetchAssetBuffer(url) {
440
440
  }
441
441
 
442
442
  // I/O gate called from downloadAndInstall after validateExtractedTarball.
443
- // Opportunistic: returns ok=false ONLY on a genuine tampering signal. Missing
444
- // embedded key, missing signature assets, asset-fetch failure, or the
445
- // CLAUDE_MEM_SKIP_SIG_VERIFY escape hatch all return ok=true so a verification
446
- // gap can never permanently brick auto-update.
447
- export async function verifyReleaseAuthenticity(extractedDir, assets) {
443
+ // Two regimes, switched by whether a public key is embedded:
444
+ // No embedded key (the shipped default, RELEASE_PUBLIC_KEY=''): INERT
445
+ // skipped-no-pubkey so an un-provisioned key can never brick auto-update.
446
+ // Key embedded (signing active): FAIL CLOSED — a missing signature asset, a
447
+ // signature-asset fetch failure, or an invalid signature all return ok=false.
448
+ // Once we publish signed releases, an attacker who can publish a release or MITM
449
+ // the asset CDN must not bypass verification by stripping the signature assets
450
+ // (the tags-fallback path also sends assets:[]). A transient fetch failure only
451
+ // defers the install to the next ~6h poll, not a permanent brick. (audit P1 #5)
452
+ // The CLAUDE_MEM_SKIP_SIG_VERIFY escape hatch still forces a skip. publicKey is a
453
+ // param (defaulting to the embedded constant) only so tests can exercise both regimes.
454
+ export async function verifyReleaseAuthenticity(extractedDir, assets, publicKey = RELEASE_PUBLIC_KEY) {
448
455
  if (process.env.CLAUDE_MEM_SKIP_SIG_VERIFY) return { ok: true, action: 'skipped-env' };
449
- if (!RELEASE_PUBLIC_KEY) return { ok: true, action: 'skipped-no-pubkey' };
456
+ if (!publicKey) return { ok: true, action: 'skipped-no-pubkey' };
450
457
 
451
458
  const list = Array.isArray(assets) ? assets : [];
452
459
  const manifestAsset = list.find(a => a && a.name === MANIFEST_ASSET_NAME);
453
460
  const sigAsset = list.find(a => a && a.name === SIGNATURE_ASSET_NAME);
454
461
  if (!manifestAsset || !sigAsset) {
455
- debugLog('WARN', 'hook-update', 'Release carries no signature assets — proceeding unverified (unsigned release)');
456
- return { ok: true, action: 'skipped-no-signature' };
462
+ debugLog('WARN', 'hook-update', 'Signed-release mode: release carries no signature assets — refusing to install (possible downgrade/strip)');
463
+ return { ok: false, action: 'missing-signature' };
457
464
  }
458
465
 
459
466
  let manifestBytes, signatureB64;
@@ -461,12 +468,12 @@ export async function verifyReleaseAuthenticity(extractedDir, assets) {
461
468
  manifestBytes = await fetchAssetBuffer(manifestAsset.browser_download_url);
462
469
  signatureB64 = (await fetchAssetBuffer(sigAsset.browser_download_url)).toString('utf8').trim();
463
470
  } catch (e) {
464
- // A flaky asset CDN is not a tampering signal — don't brick the update over it.
465
- debugLog('WARN', 'hook-update', `Signature asset fetch failed (${e.message}) — proceeding unverified`);
466
- return { ok: true, action: 'skipped-fetch-failed' };
471
+ // Can't fetch the signature can't verify don't install this cycle (retries next poll).
472
+ debugLog('WARN', 'hook-update', `Signed-release mode: signature asset fetch failed (${e.message}) — refusing to install this cycle`);
473
+ return { ok: false, action: 'signature-fetch-failed' };
467
474
  }
468
475
 
469
- const r = verifyDownloadedRelease(extractedDir, manifestBytes, signatureB64);
476
+ const r = verifyDownloadedRelease(extractedDir, manifestBytes, signatureB64, publicKey);
470
477
  if (!r.ok) return { ok: false, action: r.reason };
471
478
  debugLog('DEBUG', 'hook-update', 'Release signature verified');
472
479
  return { ok: true, action: 'verified' };
package/hook.mjs CHANGED
@@ -27,7 +27,6 @@ import {
27
27
  extractErrorKeywords, extractFilePaths, isRelatedToEpisode,
28
28
  makeEntryDesc, scrubSecrets, stripPrivate, EDIT_TOOLS, debugCatch, debugLog,
29
29
  COMPRESSED_AUTO, COMPRESSED_PENDING_PURGE, OBS_BM25,
30
- computeMinHash, estimateJaccardFromMinHash, jaccardSimilarity,
31
30
  } from './utils.mjs';
32
31
  import {
33
32
  readEpisodeRaw, episodeFile,
@@ -43,11 +42,11 @@ import {
43
42
  sessionFile, getSessionId, createSessionId, openDb,
44
43
  spawnBackground, sweepOrphanEpisodeFiles,
45
44
  } from './hook-shared.mjs';
46
- import { handleLLMEpisode, handleLLMSummary, saveObservation, buildImmediateObservation } from './hook-llm.mjs';
45
+ import { handleLLMEpisode, handleLLMSummary, saveObservation, buildImmediateObservation, saveEpisodeImmediate } from './hook-llm.mjs';
47
46
  import { scrubRecord } from './lib/scrub-record.mjs';
48
47
  import { formatHookError } from './lib/native-binding-hint.mjs';
49
48
  import { selectCompressionCandidates, groupByProjectWeek, compressGroup } from './lib/compress-core.mjs';
50
- import { cleanupBroken, decayAndMarkIdle, boostAccessed } from './lib/maintain-core.mjs';
49
+ import { cleanupBroken, decayAndMarkIdle, boostAccessed, selectFuzzyDedupeIds } from './lib/maintain-core.mjs';
51
50
  import {
52
51
  extractCitationsFromTranscript,
53
52
  extractAllInjected,
@@ -66,7 +65,6 @@ import { handleLLMOptimize } from './hook-optimize.mjs';
66
65
  import { silentAutoAdopt, hasAutoAdoptMarker } from './adopt-cli.mjs';
67
66
  import { emitV270UpgradeBanner } from './lib/upgrade-banner.mjs';
68
67
  import { loadCiteBackForEpisode, extractCiteBackSignals, buildUnsavedBugfixHint, countUnsavedBugfixShape, buildCiteRecallNudge as libBuildCiteRecallNudge, nextCiteLowStreak } from './lib/cite-back-hint.mjs';
69
- import { MINHASH_PREFILTER, FUZZY_DEDUP_THRESHOLD } from './lib/dedup-constants.mjs';
70
68
  // plugin-cache-guard.mjs loaded dynamically — pre-2.31.2 installs that auto-upgraded
71
69
  // from an older hook-update.mjs SOURCE_FILES (which did not list this module) would
72
70
  // crash on static import. Degrade gracefully to no-op when the module is absent.
@@ -115,6 +113,10 @@ for (const sig of ['SIGTERM', 'SIGINT']) {
115
113
  try {
116
114
  const ep = readEpisodeRaw();
117
115
  if (ep && ep.entries && ep.entries.length > 0) {
116
+ // Persist a rule-based observation synchronously BEFORE writing the flush
117
+ // file — that file has no consumer, so this is the only thing that prevents
118
+ // the in-flight episode being lost on abnormal termination (audit #6).
119
+ saveEpisodeImmediate(ep);
118
120
  const flushFile = join(RUNTIME_DIR, `ep-flush-${Date.now()}-${randomUUID().slice(0, 8)}.json`);
119
121
  writeFileSync(flushFile, JSON.stringify(ep));
120
122
  try { unlinkSync(join(RUNTIME_DIR, `ep-${inferProject()}.json`)); } catch {}
@@ -551,7 +553,11 @@ async function handleStop() {
551
553
  // Union closed by extractAllInjected — one integration point so the
552
554
  // contract test in tests/citation-tracker-userprompt.test.mjs covers it.
553
555
  try {
554
- const injected = extractAllInjected(transcriptPath);
556
+ // mainOnly: the injected denominator must use the same thread
557
+ // filter as citedMain (the numerator, below) — an obs injected only
558
+ // inside a subagent (sidechain) would otherwise enter the denominator
559
+ // but never the numerator and streak-demote despite being used there.
560
+ const injected = extractAllInjected(transcriptPath, { mainOnly: true });
555
561
  // P5 ①: cite-back signals — observations whose warned file the agent
556
562
  // edited this session. Union into injected so they're resolved (they
557
563
  // were injected via pre-tool-recall) and, below, into cited so the
@@ -788,7 +794,7 @@ function runSessionStartAutoMaintain(db) {
788
794
  const SCAN_LIMIT = 500;
789
795
  const FUZZY_MAX_MERGES = 20;
790
796
  const recent = db.prepare(`
791
- SELECT id, title, importance, created_at_epoch
797
+ SELECT id, title, importance, created_at_epoch, narrative, text
792
798
  FROM observations
793
799
  WHERE COALESCE(compressed_into, 0) = 0
794
800
  AND superseded_at IS NULL
@@ -797,24 +803,14 @@ function runSessionStartAutoMaintain(db) {
797
803
  ORDER BY created_at_epoch DESC LIMIT ${SCAN_LIMIT}
798
804
  `).all(STALE_AGE);
799
805
  if (recent.length >= 2) {
800
- const titles = recent.map(r => r.title.trim());
801
- const minhashes = titles.map(t => t ? computeMinHash(t) : null);
802
- const fuzzyRemoveIds = [];
803
- const removed = new Set();
804
- outer: for (let i = 0; i < recent.length; i++) {
805
- if (!minhashes[i] || removed.has(recent[i].id)) continue;
806
- for (let j = i + 1; j < recent.length; j++) {
807
- if (!minhashes[j] || removed.has(recent[j].id)) continue;
808
- if (estimateJaccardFromMinHash(minhashes[i], minhashes[j]) < MINHASH_PREFILTER) continue;
809
- if (jaccardSimilarity(titles[i], titles[j]) < FUZZY_DEDUP_THRESHOLD) continue;
810
- // Keep the higher-importance row; tiebreak by older (lower id wins access history)
811
- const keep = (recent[i].importance ?? 1) >= (recent[j].importance ?? 1) ? recent[i] : recent[j];
812
- const remove = keep === recent[i] ? recent[j] : recent[i];
813
- fuzzyRemoveIds.push(remove.id);
814
- removed.add(remove.id);
815
- if (fuzzyRemoveIds.length >= FUZZY_MAX_MERGES) break outer;
816
- }
817
- }
806
+ // audit #8: supersede only when title AND body match — title-only (a word-SET
807
+ // metric) collapsed distinct observations sharing a title token-set. The
808
+ // selection is the shared pure core in lib/maintain-core (unit-tested there).
809
+ const rows = recent.map(r => ({
810
+ id: r.id, title: r.title, importance: r.importance,
811
+ body: (r.narrative && r.narrative.trim()) || (r.text && r.text.trim()) || '',
812
+ }));
813
+ const fuzzyRemoveIds = selectFuzzyDedupeIds(rows, { maxMerges: FUZZY_MAX_MERGES });
818
814
  if (fuzzyRemoveIds.length > 0) {
819
815
  const ph = fuzzyRemoveIds.map(() => '?').join(',');
820
816
  db.prepare(`UPDATE observations SET superseded_at = ?, superseded_by = 'auto-dedup-fuzzy' WHERE id IN (${ph})`)
@@ -182,8 +182,15 @@ function normalizeHookCommand(command) {
182
182
  *
183
183
  * @param {string|null|undefined} transcriptPath
184
184
  * @param {(ctx: {command: string, text: string}) => void} fn
185
+ * @param {object} [opts]
186
+ * @param {boolean} [opts.mainOnly=false] If true, skip attachments on sidechain
187
+ * (subagent) transcript records. Mirrors extractCitationsFromTranscript's
188
+ * mainOnly so the citation-decay injected DENOMINATOR uses the same thread
189
+ * filter as the cited NUMERATOR — an obs injected only inside a subagent must
190
+ * not enter the denominator, else it streak-demotes despite being used there.
185
191
  */
186
- function eachHookAttachment(transcriptPath, fn) {
192
+ function eachHookAttachment(transcriptPath, fn, opts = {}) {
193
+ const { mainOnly = false } = opts;
187
194
  if (!transcriptPath || !existsSync(transcriptPath)) return;
188
195
  let raw;
189
196
  try { raw = readFileSync(transcriptPath, 'utf8'); } catch { return; }
@@ -192,6 +199,7 @@ function eachHookAttachment(transcriptPath, fn) {
192
199
  let entry;
193
200
  try { entry = JSON.parse(line); } catch { continue; }
194
201
  if (entry.type !== 'attachment') continue;
202
+ if (mainOnly && entry.isSidechain === true) continue;
195
203
  const att = entry.attachment;
196
204
  if (!att || att.type !== 'hook_success') continue;
197
205
  const stdout = att.stdout || '';
@@ -217,14 +225,14 @@ function eachHookAttachment(transcriptPath, fn) {
217
225
  * @param {string|null|undefined} transcriptPath
218
226
  * @returns {Set<number>} unique injected IDs (empty set on missing path/file)
219
227
  */
220
- export function extractInjectedFromPreToolUse(transcriptPath) {
228
+ export function extractInjectedFromPreToolUse(transcriptPath, opts = {}) {
221
229
  const ids = new Set();
222
230
  eachHookAttachment(transcriptPath, ({ command, text }) => {
223
231
  if (!command.includes('pre-tool-recall')) return;
224
232
  INJECTED_RE.lastIndex = 0;
225
233
  let m;
226
234
  while ((m = INJECTED_RE.exec(text))) addObsId(ids, m[1]);
227
- });
235
+ }, opts);
228
236
  return ids;
229
237
  }
230
238
 
@@ -251,7 +259,7 @@ const UPS_COMMAND_SUFFIX = 'hook.mjs user-prompt';
251
259
  * @param {string|null|undefined} transcriptPath
252
260
  * @returns {Set<number>}
253
261
  */
254
- export function extractInjectedFromUserPromptSubmit(transcriptPath) {
262
+ export function extractInjectedFromUserPromptSubmit(transcriptPath, opts = {}) {
255
263
  const ids = new Set();
256
264
  eachHookAttachment(transcriptPath, ({ command, text }) => {
257
265
  if (!command.includes(UPS_COMMAND_SUFFIX)) return;
@@ -265,7 +273,7 @@ export function extractInjectedFromUserPromptSubmit(transcriptPath) {
265
273
  if (matches.length === 0) continue;
266
274
  addObsId(ids, matches[matches.length - 1][1]);
267
275
  }
268
- });
276
+ }, opts);
269
277
  return ids;
270
278
  }
271
279
 
@@ -280,7 +288,7 @@ export function extractInjectedFromUserPromptSubmit(transcriptPath) {
280
288
  * @param {string|null|undefined} transcriptPath
281
289
  * @returns {Set<number>}
282
290
  */
283
- export function extractInjectedFromErrorRecall(transcriptPath) {
291
+ export function extractInjectedFromErrorRecall(transcriptPath, opts = {}) {
284
292
  const ids = new Set();
285
293
  eachHookAttachment(transcriptPath, ({ command, text }) => {
286
294
  if (!command.includes('post-tool-use')) return;
@@ -290,7 +298,7 @@ export function extractInjectedFromErrorRecall(transcriptPath) {
290
298
  INJECTED_RE.lastIndex = 0;
291
299
  let m;
292
300
  while ((m = INJECTED_RE.exec(text))) addObsId(ids, m[1]);
293
- });
301
+ }, opts);
294
302
  return ids;
295
303
  }
296
304
 
@@ -311,7 +319,7 @@ const FYI_LINE_ID_RE = /^#(\d{1,7})\s/;
311
319
  * @param {string|null|undefined} transcriptPath
312
320
  * @returns {Set<number>}
313
321
  */
314
- export function extractInjectedFromFyi(transcriptPath) {
322
+ export function extractInjectedFromFyi(transcriptPath, opts = {}) {
315
323
  const ids = new Set();
316
324
  eachHookAttachment(transcriptPath, ({ command, text }) => {
317
325
  if (!command.includes('user-prompt-search')) return;
@@ -320,7 +328,7 @@ export function extractInjectedFromFyi(transcriptPath) {
320
328
  const m = FYI_LINE_ID_RE.exec(fyiLine);
321
329
  if (m) addObsId(ids, m[1]);
322
330
  }
323
- });
331
+ }, opts);
324
332
  return ids;
325
333
  }
326
334
 
@@ -330,14 +338,18 @@ export function extractInjectedFromFyi(transcriptPath) {
330
338
  * user-prompt-search FYI block. Single integration point the Stop handler calls.
331
339
  *
332
340
  * @param {string|null|undefined} transcriptPath
341
+ * @param {object} [opts]
342
+ * @param {boolean} [opts.mainOnly=false] Skip sidechain-injected IDs. The
343
+ * citation-decay caller passes true so the injected denominator matches the
344
+ * mainOnly cited numerator; the P4 access-bump caller omits it (broader).
333
345
  * @returns {Set<number>}
334
346
  */
335
- export function extractAllInjected(transcriptPath) {
347
+ export function extractAllInjected(transcriptPath, opts = {}) {
336
348
  return new Set([
337
- ...extractInjectedFromPreToolUse(transcriptPath),
338
- ...extractInjectedFromUserPromptSubmit(transcriptPath),
339
- ...extractInjectedFromErrorRecall(transcriptPath),
340
- ...extractInjectedFromFyi(transcriptPath),
349
+ ...extractInjectedFromPreToolUse(transcriptPath, opts),
350
+ ...extractInjectedFromUserPromptSubmit(transcriptPath, opts),
351
+ ...extractInjectedFromErrorRecall(transcriptPath, opts),
352
+ ...extractInjectedFromFyi(transcriptPath, opts),
341
353
  ]);
342
354
  }
343
355
 
@@ -33,3 +33,10 @@ export const MINHASH_PREFILTER = 0.7;
33
33
  // 0.95: strict title-Jaccard cutoff for the hook post-inject fuzzy-dedup pass — only
34
34
  // collapse near-identical titles inline; anything softer waits for the maintain sweep.
35
35
  export const FUZZY_DEDUP_THRESHOLD = 0.95;
36
+
37
+ // 0.5: companion BODY-Jaccard floor for the hook fuzzy-dedup pass (audit #8). Titles
38
+ // alone are a word-SET metric, so two distinct observations sharing a title token-set
39
+ // ("Fix auth bug in login.mjs" vs "Fix login.mjs auth bug") would collapse and hide
40
+ // one body. Requiring the narratives to also overlap means only a genuine re-save of
41
+ // the same event (near-identical body) supersedes; distinct bodies are kept.
42
+ export const FUZZY_BODY_THRESHOLD = 0.5;
@@ -15,7 +15,7 @@
15
15
  // Gated entirely by CLAUDE_MEM_CATCH_SAMPLE env (0..1). Default off. All
16
16
  // failures inside the sampler are swallowed — never crash the caller.
17
17
 
18
- import { appendFileSync, mkdirSync, existsSync } from 'fs';
18
+ import { appendFileSync, mkdirSync, existsSync, readdirSync, statSync, unlinkSync } from 'fs';
19
19
  import { join } from 'path';
20
20
  import { scrubSecrets } from '../secret-scrub.mjs';
21
21
 
@@ -32,6 +32,22 @@ function parseSampleRate(raw) {
32
32
  return Number.isFinite(n) && n >= 0 && n <= 1 ? n : 0;
33
33
  }
34
34
 
35
+ // Delete daily shards older than the retention window. Mirrors
36
+ // lib/hook-telemetry.pruneOldShards (the sibling JSONL sink). Without this the
37
+ // retention constant was dead and errors/ grew one shard/day forever once
38
+ // CLAUDE_MEM_CATCH_SAMPLE was set — a slow unbounded leak in the user data dir.
39
+ function pruneOldShards(dir) {
40
+ let entries;
41
+ try { entries = readdirSync(dir); } catch { return; }
42
+ const cutoff = Date.now() - SAMPLE_LOG_RETENTION_MS;
43
+ for (const f of entries) {
44
+ if (!/^\d{4}-\d{2}-\d{2}\.jsonl$/.test(f)) continue;
45
+ try {
46
+ if (statSync(join(dir, f)).mtimeMs < cutoff) unlinkSync(join(dir, f));
47
+ } catch { /* gone or unreadable — skip */ }
48
+ }
49
+ }
50
+
35
51
  /**
36
52
  * Sample one caught error into the daily JSONL log.
37
53
  * @param {Error|unknown} e Caught error
@@ -59,6 +75,7 @@ export function maybeSampleError(e, ctx, dbDir) {
59
75
  }) + '\n';
60
76
 
61
77
  appendFileSync(join(errDir, `${today()}.jsonl`), line, { mode: 0o600 });
78
+ pruneOldShards(errDir);
62
79
  } catch { /* sampler must never throw */ }
63
80
  }
64
81
 
@@ -14,7 +14,7 @@
14
14
 
15
15
  import { COMPRESSED_PENDING_PURGE, computeMinHash, estimateJaccardFromMinHash, jaccardSimilarity } from '../utils.mjs';
16
16
  import { rebuildVocabulary, computeVector, _resetVocabCache } from '../tfidf.mjs';
17
- import { DEDUP_JACCARD_THRESHOLD, MINHASH_PRE_THRESHOLD as MINHASH_PRE_THRESHOLD_SRC } from './dedup-constants.mjs';
17
+ import { DEDUP_JACCARD_THRESHOLD, MINHASH_PRE_THRESHOLD as MINHASH_PRE_THRESHOLD_SRC, FUZZY_DEDUP_THRESHOLD, FUZZY_BODY_THRESHOLD, MINHASH_PREFILTER } from './dedup-constants.mjs';
18
18
 
19
19
  export const STALE_AGE_MS = 30 * 86400000;
20
20
  export const OP_CAP = 1000;
@@ -28,6 +28,57 @@ export const MINHASH_PRE_THRESHOLD = MINHASH_PRE_THRESHOLD_SRC;
28
28
  // the regular decay op can't touch (decay protects injection_count>0).
29
29
  export const PINNED_INJ_THRESHOLD = 8;
30
30
 
31
+ // Two trimmed bodies count as "the same body" when both are empty (a genuine
32
+ // no-body re-save) or their word-set Jaccard clears the floor. One-empty-one-not
33
+ // is treated as DISTINCT so a body-bearing observation is never hidden by a
34
+ // body-less peer that merely shares its title.
35
+ function bodiesSimilar(a, b, threshold) {
36
+ const ba = (a || '').trim();
37
+ const bb = (b || '').trim();
38
+ if (!ba && !bb) return true;
39
+ if (!ba || !bb) return false;
40
+ return jaccardSimilarity(ba, bb) >= threshold;
41
+ }
42
+
43
+ /**
44
+ * Pick which near-duplicate observation ids to supersede in the hook fuzzy-dedup
45
+ * pass. Pure (no DB) so it is unit-testable. A pair must clear BOTH the title
46
+ * thresholds (MinHash prefilter → exact title Jaccard) AND the body Jaccard floor
47
+ * before the lower-importance row is marked for superseding (audit #8 — title-only
48
+ * matching collapsed observations with the same title token-set but different bodies).
49
+ * @param {Array<{id:number,title:string,body:string,importance:number}>} rows
50
+ * Candidate rows in scan order (caller decides ordering / recency window).
51
+ * @returns {number[]} ids to supersede (lower-importance member of each kept pair).
52
+ */
53
+ export function selectFuzzyDedupeIds(rows, {
54
+ titleThreshold = FUZZY_DEDUP_THRESHOLD,
55
+ bodyThreshold = FUZZY_BODY_THRESHOLD,
56
+ minhashPrefilter = MINHASH_PREFILTER,
57
+ maxMerges = 20,
58
+ } = {}) {
59
+ const removeIds = [];
60
+ if (!Array.isArray(rows) || rows.length < 2) return removeIds;
61
+ const removed = new Set();
62
+ const titles = rows.map(r => (r.title || '').trim());
63
+ const minhashes = titles.map(t => t ? computeMinHash(t) : null);
64
+ outer: for (let i = 0; i < rows.length; i++) {
65
+ if (!minhashes[i] || removed.has(rows[i].id)) continue;
66
+ for (let j = i + 1; j < rows.length; j++) {
67
+ if (!minhashes[j] || removed.has(rows[j].id)) continue;
68
+ if (estimateJaccardFromMinHash(minhashes[i], minhashes[j]) < minhashPrefilter) continue;
69
+ if (jaccardSimilarity(titles[i], titles[j]) < titleThreshold) continue;
70
+ if (!bodiesSimilar(rows[i].body, rows[j].body, bodyThreshold)) continue;
71
+ // Keep the higher-importance row; tiebreak by earlier scan position (kept as i).
72
+ const keep = (rows[i].importance ?? 1) >= (rows[j].importance ?? 1) ? rows[i] : rows[j];
73
+ const remove = keep === rows[i] ? rows[j] : rows[i];
74
+ removeIds.push(remove.id);
75
+ removed.add(remove.id);
76
+ if (removeIds.length >= maxMerges) break outer;
77
+ }
78
+ }
79
+ return removeIds;
80
+ }
81
+
31
82
  /** Delete broken observations (no title AND no narrative). Returns rows deleted. */
32
83
  // Before hard-deleting observations, un-hide any rows merged INTO them. A child has
33
84
  // compressed_into = <keeperId>; deleting that keeper (compressed_into has no FK) would
@@ -428,7 +428,10 @@ export async function coreRunSearchPipeline(ctx, opts) {
428
428
  const doReRank = rerankPolicy === 'mcp' ? (ftsQuery && !deepReranked) : !deepReranked;
429
429
  if (doReRank) reRankWithContext(db, obsResults, rerankProject);
430
430
  markSuperseded(obsResults);
431
- const doReSort = rerankPolicy === 'mcp' ? (ftsQuery && !deepReranked) : isCrossSource;
431
+ // CLI single-source path must also re-sort when a context re-rank actually ran,
432
+ // else reRankWithContext's score boost mutates scores but never reorders output
433
+ // (audit #9). MCP branch unchanged. doReRank already implies a rerank happened.
434
+ const doReSort = rerankPolicy === 'mcp' ? (ftsQuery && !deepReranked) : (isCrossSource || doReRank);
432
435
  if (doReSort) results.sort((a, b) => (a.score ?? 0) - (b.score ?? 0));
433
436
  }
434
437
 
package/mem-cli.mjs CHANGED
@@ -57,6 +57,11 @@ async function cmdSearch(db, args, { llm } = {}) {
57
57
  return;
58
58
  }
59
59
 
60
+ // Bare string flags parse to boolean `true`; without this guard `--branch` reaches
61
+ // the SQLite bind and crashes, while `--to`/`--project` silently change results
62
+ // (epoch-1 upper bound → zero rows; unscoped search). (audit P1 #3)
63
+ if (rejectBareStringFlags(flags, ['source', 'project', 'from', 'to', 'branch'])) return;
64
+
60
65
  const limit = parseIntFlag(flags.limit, { name: '--limit', defaultValue: 20, max: 1000 });
61
66
  const type = flags.type || null;
62
67
  const validObsTypes = new Set(['decision', 'bugfix', 'feature', 'refactor', 'discovery', 'change']);
@@ -1400,8 +1405,11 @@ function cmdUpdate(db, args) {
1400
1405
  `Prompts and sessions are append-only.`);
1401
1406
  return;
1402
1407
  }
1408
+ // Strict parseIdToken gate (aligned with cmdDelete): a bare parseInt fallback
1409
+ // truncated "3.9" → 3 and silently UPDATE'd the WRONG row #3 (no preview, no
1410
+ // --confirm). Require an exact obs-id token; non-matching input → usage error.
1403
1411
  const parsed = raw ? parseIdToken(raw) : null;
1404
- const id = parsed && parsed.source === null ? parsed.id : parseInt(raw, 10);
1412
+ const id = parsed && parsed.source === null ? parsed.id : NaN;
1405
1413
  if (!id || isNaN(id)) {
1406
1414
  fail('[mem] Usage: claude-mem-lite update <id> [--title T] [--type T] [--importance N] [--lesson T] [--narrative T] [--concepts T]');
1407
1415
  return;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "3.8.0",
3
+ "version": "3.9.1",
4
4
  "description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark).",
5
5
  "type": "module",
6
6
  "packageManager": "npm@10.9.2",
package/registry.mjs CHANGED
@@ -110,7 +110,7 @@ const TRIGGERS_SCHEMA = `
110
110
  const INVOCATIONS_SCHEMA = `
111
111
  CREATE TABLE IF NOT EXISTS invocations (
112
112
  id INTEGER PRIMARY KEY AUTOINCREMENT,
113
- resource_id INTEGER NOT NULL REFERENCES resources(id),
113
+ resource_id INTEGER NOT NULL REFERENCES resources(id) ON DELETE CASCADE,
114
114
  session_id TEXT,
115
115
  trigger TEXT CHECK(trigger IN ('session_start','pre_tool_use','user_explicit','user_prompt')),
116
116
  tier INTEGER CHECK(tier IN (1,2,3)),
@@ -195,11 +195,19 @@ export function ensureRegistryDb(dbPath) {
195
195
  } catch (e) { debugCatch(e, 'resources-column-migration'); }
196
196
 
197
197
  // Migrate: add 'github' to source CHECK constraint (required for smart import)
198
- // Must disable FK checks during table recreation (RENAME triggers FK validation)
198
+ // Must disable FK checks during table recreation (RENAME triggers FK validation).
199
+ // legacy_alter_table=ON is REQUIRED: under modern SQLite (the better-sqlite3
200
+ // default) `ALTER TABLE resources RENAME TO resources_old` rewrites child-table FK
201
+ // references, so invocations.resource_id would become `REFERENCES resources_old`
202
+ // and the trailing DROP would leave it dangling — silently killing every future
203
+ // `INSERT INTO invocations` (audit P0 #1). Legacy mode keeps child FKs pointing at
204
+ // the original name, which the freshly-created `resources` table then satisfies.
205
+ let resourcesRebuilt = false;
199
206
  try {
200
207
  const resSchema = db.prepare(`SELECT sql FROM sqlite_master WHERE type='table' AND name='resources'`).get();
201
208
  if (resSchema?.sql && !resSchema.sql.includes("'github'")) {
202
209
  db.pragma('foreign_keys = OFF');
210
+ db.pragma('legacy_alter_table = ON');
203
211
  try {
204
212
  db.transaction(() => {
205
213
  const hasOld = db.prepare(`SELECT 1 FROM sqlite_master WHERE type='table' AND name='resources_old'`).get();
@@ -216,10 +224,18 @@ export function ensureRegistryDb(dbPath) {
216
224
  const common = cols.filter(c => newCols.has(c)).join(', ');
217
225
  db.exec(`INSERT INTO resources (${common}) SELECT ${common} FROM resources_old`);
218
226
  db.exec(`DROP TABLE resources_old`);
227
+ // Recreate the table's indexes: the CREATE INDEX IF NOT EXISTS inside
228
+ // RESOURCES_SCHEMA above was SKIPPED while resources_old still held the
229
+ // index names, so the rebuilt table had NONE — including the UNIQUE
230
+ // idx_res_type_name that upsertResource's ON CONFLICT(type,name) requires
231
+ // (review HIGH-1; pre-existing, closed here). Names are free post-DROP.
232
+ db.exec(RESOURCES_SCHEMA);
219
233
  })();
220
234
  } finally {
235
+ db.pragma('legacy_alter_table = OFF');
221
236
  db.pragma('foreign_keys = ON');
222
237
  }
238
+ resourcesRebuilt = true;
223
239
  }
224
240
  } catch (e) { debugCatch(e, 'resources-source-check-migration'); }
225
241
 
@@ -231,6 +247,16 @@ export function ensureRegistryDb(dbPath) {
231
247
  // Triggers: always ensure (IF NOT EXISTS) — fixes DBs where FTS5 was created without triggers
232
248
  db.exec(TRIGGERS_SCHEMA);
233
249
 
250
+ // The source-CHECK migration replaced the `resources` content table out from under
251
+ // the external-content FTS index (content=resources), leaving resources_fts stale.
252
+ // Rebuild it so a later DELETE's res_fts_delete trigger doesn't throw "database disk
253
+ // image is malformed" against the mismatched index. Gated on the migration actually
254
+ // having run so we don't rebuild on every open.
255
+ if (resourcesRebuilt) {
256
+ try { db.exec("INSERT INTO resources_fts(resources_fts) VALUES('rebuild')"); }
257
+ catch (e) { debugCatch(e, 'resources-fts-rebuild-after-source-check'); }
258
+ }
259
+
234
260
  db.exec(INVOCATIONS_SCHEMA);
235
261
 
236
262
  // Migrate invocations CHECK constraint: add 'user_prompt' trigger value
@@ -281,10 +307,44 @@ export function ensureRegistryDb(dbPath) {
281
307
  }
282
308
  } catch (e) { debugCatch(e, 'rejection_reason-migration'); }
283
309
 
284
- // Migrate: ensure composite index on invocations(resource_id, created_at) for correlated subqueries
310
+ // Migrate: add ON DELETE CASCADE to invocations.resource_id (audit P0 #4). Old DBs
311
+ // declared the FK with no ON DELETE action, so deleting a resource that had
312
+ // invocation history threw SQLITE_CONSTRAINT_FOREIGNKEY (registry remove /
313
+ // mem_registry delete) or silently no-op'd (dead-repo purge). SQLite can't ALTER an
314
+ // FK, so rebuild the table. Renaming the CHILD table is safe (nothing references
315
+ // invocations), so legacy_alter_table is not a concern here. Runs after the
316
+ // rejection_reason ADD COLUMN so the column exists in both old and new tables.
285
317
  try {
286
- db.exec(`CREATE INDEX IF NOT EXISTS idx_invocations_resource_created ON invocations(resource_id, created_at)`);
287
- } catch (e) { debugCatch(e, 'invocations-resource-created-index-migration'); }
318
+ const schema = db.prepare(`SELECT sql FROM sqlite_master WHERE type='table' AND name='invocations'`).get();
319
+ if (schema?.sql && !/ON DELETE CASCADE/i.test(schema.sql)) {
320
+ db.transaction(() => {
321
+ const hasOld = db.prepare(`SELECT 1 FROM sqlite_master WHERE type='table' AND name='invocations_old'`).get();
322
+ if (hasOld) db.exec(`DROP TABLE invocations_old`);
323
+ db.exec(`ALTER TABLE invocations RENAME TO invocations_old`);
324
+ db.exec(INVOCATIONS_SCHEMA);
325
+ // Omit rejection_reason from the copy (matching the CHECK migrations above):
326
+ // it was historically a bare TEXT with NO CHECK, so an old row could hold a
327
+ // value outside INVOCATIONS_SCHEMA's current rejection_reason CHECK whitelist.
328
+ // Copying it would throw SQLITE_CONSTRAINT_CHECK → rollback → the FK is left
329
+ // un-cascaded forever and every retry re-fails (review HIGH-2). The column is
330
+ // never written at runtime, so copied rows get NULL — no data loss.
331
+ db.exec(`INSERT INTO invocations
332
+ (id, resource_id, session_id, trigger, tier, recommended, adopted, outcome, score, created_at)
333
+ SELECT id, resource_id, session_id, trigger, tier, recommended, adopted, outcome, score, created_at
334
+ FROM invocations_old`);
335
+ db.exec(`DROP TABLE invocations_old`);
336
+ // Recreate the table's indexes — the INVOCATIONS_SCHEMA CREATE INDEX above was
337
+ // skipped while invocations_old held the names (review HIGH-1). Free post-DROP.
338
+ db.exec(INVOCATIONS_SCHEMA);
339
+ })();
340
+ }
341
+ } catch (e) { debugCatch(e, 'invocations-ondelete-cascade-migration'); }
342
+
343
+ // (Removed the separate idx_invocations_resource_created migration — it was a column-
344
+ // identical duplicate of idx_inv_resource (resource_id, created_at) in INVOCATIONS_SCHEMA.
345
+ // It only ever survived because the rebuild migrations dropped idx_inv_resource; now that
346
+ // the rebuilds recreate their indexes (review HIGH-1), the duplicate is pure dead weight.
347
+ // Pre-existing DBs keep their old idx_invocations_resource_created; it's harmless.)
288
348
 
289
349
  db.exec(PREINSTALLED_SCHEMA);
290
350
 
package/secret-scrub.mjs CHANGED
@@ -1,6 +1,8 @@
1
1
  // claude-mem-lite: Secret pattern detection and scrubbing
2
2
  // Extracted from utils.mjs for focused responsibility
3
3
 
4
+ import { stripPrivate } from './lib/private-strip.mjs';
5
+
4
6
  // ─── Secret Patterns ──────────────────────────────────────────────────────
5
7
 
6
8
  export const SECRET_PATTERNS = [
@@ -28,7 +30,28 @@ export const SECRET_PATTERNS = [
28
30
  // access_token / refresh_token are the canonical OAuth2 field names — they were
29
31
  // missing from this KV list (drift vs the JSON list below). `(?:\b|_)` for the same
30
32
  // underscore-prefix reason.
31
- [/((?:\b|_)(?:api[_-]?key|api[_-]?secret|secret[_-]?key|access[_-]?key|private[_-]?key|client[_-]?secret|auth[_-]?token|access[_-]?token|refresh[_-]?token)\s*[=:]\s*)(?!process\.env\.)(?!new\s)(?!\w+\()(?!(?:null|undefined|true|false|None|nil|empty|""|''|0)\b)[^\s,;'"}\]]{6,}/gi, '$1***'],
33
+ // `pgpassword|pgpass|mysql_pwd` are well-known credential ENV-VAR names whose
34
+ // keyword tail is unreachable via the noun list above (`PGPASSWORD`=PG+password has
35
+ // no \b/_ before "password"; `MYSQL_PWD` has no "password"/"token" substring). They
36
+ // live in THIS pattern (no prose lookbehind) so `export PGPASSWORD=x` / `env MYSQL_PWD=x`
37
+ // scrub — a compound credential env-var name is unambiguous config even after a word.
38
+ // Enumerating known names (not a blanket letter-prefix) preserves the deliberate
39
+ // low-FP decision that `topsecret=` / `access_token_count:` are non-credentials
40
+ // (#8283 + utils.test.mjs:1089-1100); bare `pwd` is omitted so `PWD=` (a path) survives.
41
+ [/((?:\b|_)(?:api[_-]?key|api[_-]?secret|secret[_-]?key|access[_-]?key|private[_-]?key|client[_-]?secret|auth[_-]?token|access[_-]?token|refresh[_-]?token|pgpassword|pgpass|mysql_pwd)\s*[=:]\s*)(?!process\.env\.)(?!new\s)(?!\w+\()(?!(?:null|undefined|true|false|None|nil|empty|""|''|0)\b)[^\s,;'"}\]]{6,}/gi, '$1***'],
42
+ // Bare-key QUOTED values — `api_key="..."`, `password: '...'`. The unquoted KV
43
+ // patterns above stop at `'`/`"` (excluded from their value class), so a quoted
44
+ // value matched 0 chars and slipped through. Consumes the opening quote, the value,
45
+ // and the matching close quote (backref \2), replacing only the value. Unlike the
46
+ // JSON pattern below it does NOT require the KEY to be quoted, covering `key="value"`
47
+ // object-literal / YAML / quoted-.env shapes. Split into the SAME two patterns as the
48
+ // unquoted KV pairs above so prose survives — a quoted value does not turn prose into
49
+ // config (`the token: "x"` is still prose, must NOT scrub; #8283 / utils.test.mjs:1090).
50
+ // (a) bare credential nouns keep the prose lookbehind:
51
+ [/((?<![A-Za-z][ \t])(?:\b|_)(?:password|passwd|token|bearer|secret)\s*[=:]\s*)(['"])[^'"]{6,}\2/gi, '$1$2***$2'],
52
+ // (b) structured keys + named env vars are unambiguous config even after a word
53
+ // (`see api_key: "x"` DOES scrub, mirroring the unquoted structured-key path):
54
+ [/((?:\b|_)(?:pgpassword|pgpass|mysql_pwd|api[_-]?key|api[_-]?secret|secret[_-]?key|access[_-]?key|private[_-]?key|client[_-]?secret|auth[_-]?token|access[_-]?token|refresh[_-]?token)\s*[=:]\s*)(['"])[^'"]{6,}\2/gi, '$1$2***$2'],
32
55
  // AWS access keys (AKIA...)
33
56
  [/\bAKIA[A-Z0-9]{16}\b/g, '***'],
34
57
  // OpenAI / Anthropic keys (sk-...) — specific prefixes have lower length threshold
@@ -94,12 +117,15 @@ export const SECRET_PATTERNS = [
94
117
 
95
118
  /**
96
119
  * Scrub known secret patterns (API keys, tokens, credentials) from text.
120
+ * Also strips user-marked `<private>...</private>` blocks first, so every
121
+ * persistence/log path that scrubs secrets inherits the `<private>` opt-out —
122
+ * previously stripPrivate ran only on the user-prompt hook, not on writes.
97
123
  * @param {string} text Input text potentially containing secrets
98
124
  * @returns {string} Text with secrets replaced by '***'
99
125
  */
100
126
  export function scrubSecrets(text) {
101
127
  if (!text || typeof text !== 'string') return text || '';
102
- let result = text;
128
+ let result = stripPrivate(text);
103
129
  for (const [pattern, replacement] of SECRET_PATTERNS) {
104
130
  result = result.replace(pattern, replacement);
105
131
  }