claude-mem-lite 2.92.0 → 2.94.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,7 @@
10
10
  "plugins": [
11
11
  {
12
12
  "name": "claude-mem-lite",
13
- "version": "2.92.0",
13
+ "version": "2.94.0",
14
14
  "source": "./",
15
15
  "description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark)."
16
16
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "2.92.0",
3
+ "version": "2.94.0",
4
4
  "description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark).",
5
5
  "author": {
6
6
  "name": "sdsrss"
package/adopt-cli.mjs CHANGED
@@ -15,7 +15,7 @@ import { join } from 'path';
15
15
  import {
16
16
  memdirPath, writePluginSection, removePluginSection,
17
17
  writePluginDoc, removePluginDoc,
18
- isAdopted, readMemoryIndex,
18
+ isAdopted, hasPluginState, readMemoryIndex,
19
19
  UserEditedError, BudgetExceededError,
20
20
  } from './memdir.mjs';
21
21
  import {
@@ -325,6 +325,7 @@ export function cmdUnadopt(args = []) {
325
325
 
326
326
  const all = hasFlag(args, '--all');
327
327
  const dryRun = hasFlag(args, '--dry-run');
328
+ const force = hasFlag(args, '--force');
328
329
  const targets = all
329
330
  ? listAllMemdirs().map((m) => m.memdir)
330
331
  : [memdirPath(detectCwd())];
@@ -334,23 +335,32 @@ export function cmdUnadopt(args = []) {
334
335
  return;
335
336
  }
336
337
 
337
- let removed = 0, absent = 0;
338
+ let removed = 0, absent = 0, skipped = 0;
338
339
  for (const memdir of targets) {
339
340
  if (dryRun) {
340
- const adopted = isAdopted(memdir, PLUGIN_SLUG);
341
- const action = adopted ? 'would-remove' : 'absent';
341
+ // Mirror the live foreign-content guard: a sentinel with no state sidecar would be
342
+ // skipped (not removed) unless --force, so dry-run must report it the same way.
343
+ const action = !isAdopted(memdir, PLUGIN_SLUG) ? 'absent'
344
+ : (hasPluginState(memdir, PLUGIN_SLUG) || force) ? 'would-remove'
345
+ : 'would-skip-foreign';
342
346
  log(`[unadopt --dry-run] ${memdir} → ${action}`);
343
- if (adopted) removed++; else absent++;
347
+ if (action === 'would-remove') removed++;
348
+ else if (action === 'would-skip-foreign') skipped++;
349
+ else absent++;
344
350
  continue;
345
351
  }
346
- const r = removePluginSection(memdir, PLUGIN_SLUG);
347
- removePluginDoc(memdir, PLUGIN_SLUG);
348
- if (r.action === 'removed') removed++;
352
+ const r = removePluginSection(memdir, PLUGIN_SLUG, { force });
353
+ if (r.action === 'removed') { removePluginDoc(memdir, PLUGIN_SLUG); removed++; }
354
+ else if (r.action === 'skipped-foreign') skipped++;
349
355
  else absent++;
350
356
  log(`[unadopt] ${memdir} → ${r.action}`);
351
357
  }
352
358
 
359
+ if (skipped > 0) {
360
+ log('[unadopt] skipped-foreign = a sentinel block with no plugin state file (not proven plugin-written).');
361
+ log('[unadopt] pass --force to remove it anyway.');
362
+ }
353
363
  log('');
354
364
  const verb = dryRun ? 'would remove' : 'removed';
355
- log(`[unadopt${dryRun ? ' --dry-run' : ''}] ${targets.length} target(s): ${removed} ${verb}, ${absent} absent`);
365
+ log(`[unadopt${dryRun ? ' --dry-run' : ''}] ${targets.length} target(s): ${removed} ${verb}, ${skipped} skipped-foreign, ${absent} absent`);
356
366
  }
package/bash-utils.mjs CHANGED
@@ -3,6 +3,38 @@
3
3
 
4
4
  import { basename } from 'path';
5
5
 
6
+ // Read/search commands whose output legitimately contains "error"-like keywords without
7
+ // being a failure. Matched against the PRIMARY command (see isReadOnlyCommand).
8
+ const SEARCH_VERBS = new Set([
9
+ 'grep', 'rg', 'ag', 'ack', 'cat', 'head', 'tail', 'less', 'more', 'find', 'locate', 'wc', 'file', 'which', 'type',
10
+ ]);
11
+ // Command prefixes that wrap the real command (env-assignments handled separately).
12
+ const CMD_WRAPPERS = new Set(['sudo', 'doas', 'env', 'time', 'command', 'nice', 'nohup', 'stdbuf', 'xargs']);
13
+ // git read subcommands whose output contains commit/log/match text, not failures.
14
+ const GIT_READ_SUBCMDS = new Set(['grep', 'log', 'show', 'diff', 'blame', 'ls-files', 'cat-file', 'whatchanged', 'shortlog', 'reflog', 'status']);
15
+
16
+ // True when the command's PRIMARY operation (left of the first pipe, past any
17
+ // env-assignments / wrapper like `sudo`/`env`/`time`) is a read/search — including
18
+ // `git grep`/`git log`. Anchoring on the primary command (not "search verb appears
19
+ // anywhere") is what lets `npm run build 2>&1 | tail` stay an error while `sudo grep`,
20
+ // `git grep`, `cat f | head` are correctly exempt.
21
+ function isReadOnlyCommand(cmd) {
22
+ const primary = cmd.split('|')[0];
23
+ const toks = primary.trim().split(/\s+/).filter(Boolean);
24
+ let i = 0;
25
+ while (i < toks.length && (/^\w+=/.test(toks[i]) || CMD_WRAPPERS.has(toks[i]))) i++;
26
+ const first = toks[i];
27
+ if (!first) return false;
28
+ if (SEARCH_VERBS.has(first)) return true;
29
+ return first === 'git' && GIT_READ_SUBCMDS.has(toks[i + 1]);
30
+ }
31
+
32
+ // Paths excluded from observation capture (ephemeral / virtual filesystems) — applied
33
+ // uniformly to both command-parsed paths and direct file_path/path/filePath fields.
34
+ function isExcludedPath(p) {
35
+ return p.startsWith('/dev/') || p.startsWith('/proc/') || p.startsWith('/tmp/');
36
+ }
37
+
6
38
  /**
7
39
  * Detect significance signals in a Bash command and its response.
8
40
  * Checks for errors, test runs, builds, git operations, and deployments.
@@ -12,9 +44,12 @@ import { basename } from 'path';
12
44
  */
13
45
  export function detectBashSignificance(input, response) {
14
46
  const cmd = (input.command || '').toLowerCase();
15
- // Skip error keyword matching when the command is a read/search operation
16
- // (grep output naturally contains matched keywords like "error")
17
- const isSearchCmd = /\b(grep|rg|ag|ack|cat|head|tail|less|more|find|locate|wc|file|which|type)\b/i.test(cmd);
47
+ // Skip error keyword matching only when the PRIMARY command is a read/search op (its
48
+ // output naturally contains "error"-like keywords that aren't failures). Anchored on the
49
+ // primary command — NOT "search verb appears anywhere" — so `npm run build 2>&1 | tail`
50
+ // stays a real failure while `sudo grep`, `git grep`, `git log --grep`, `cat f | head`
51
+ // remain exempt and `run-cat-tests` doesn't trip a substring match.
52
+ const isSearchCmd = isReadOnlyCommand(cmd);
18
53
  const looksLikeError = !isSearchCmd
19
54
  && /\berror\b|\bERR!|fail(ed|ure)?|exception|panic|traceback|errno|enoent|command not found/i.test(response)
20
55
  && response.length > 15;
@@ -38,7 +73,9 @@ export function detectBashSignificance(input, response) {
38
73
  const isTest = /\b(npm\s+test|npm\s+run\s+test|yarn\s+test|pnpm\s+test|pnpm\s+run\s+test|bun\s+test|go\s+test|cargo\s+test)\b/i.test(cmd)
39
74
  || /\b(jest|pytest|vitest|mocha|cypress|playwright)\b/i.test(cmd);
40
75
  const isBuild = /\b(build|compile|tsc|webpack|vite|rollup|esbuild|make|cargo)\b/i.test(cmd);
41
- const isGit = /\bgit\s+(commit|merge|rebase|cherry-pick|push)\b/i.test(cmd);
76
+ // Allow intervening global git options (`-C <path>`, `-c k=v`, `--no-pager`, …) between
77
+ // `git` and the subcommand — `git -C /repo push` is the standard multi-repo/scripted form.
78
+ const isGit = /\bgit\s+(?:(?:-[cC]\s+\S+|--?[\w-]+(?:=\S+)?)\s+)*(commit|merge|rebase|cherry-pick|push)\b/i.test(cmd);
42
79
  const isDeploy = /\b(deploy|docker|kubectl|terraform)\b/i.test(cmd);
43
80
  return {
44
81
  isError, isTest, isBuild, isGit, isDeploy,
@@ -92,6 +129,9 @@ export function extractErrorKeywords(cmd, response) {
92
129
  */
93
130
  export function extractFilePaths(input) {
94
131
  const paths = [];
132
+ // Direct fields (Edit/Write file_path) are kept unconditionally — an explicit edit to a
133
+ // /tmp path is real work the user chose to make, unlike a /tmp path that merely appears as
134
+ // a transient argument inside a Bash command (excluded as noise in the command branch below).
95
135
  if (input.file_path) paths.push(input.file_path);
96
136
  if (input.path) paths.push(input.path);
97
137
  if (input.filePath) paths.push(input.filePath);
@@ -101,7 +141,7 @@ export function extractFilePaths(input) {
101
141
  if (match) {
102
142
  for (const m of match) {
103
143
  const p = m.trim();
104
- if (!p.startsWith('/dev/') && !p.startsWith('/proc/') && !p.startsWith('/tmp/')
144
+ if (!isExcludedPath(p)
105
145
  // Skip single-component paths like /exit, /clear — likely slash commands, not files
106
146
  && (p.indexOf('/', 1) !== -1 || /\.\w+$/.test(p))) {
107
147
  paths.push(p);
package/cli/activity.mjs CHANGED
@@ -10,8 +10,8 @@
10
10
 
11
11
  import { inferProject } from '../utils.mjs';
12
12
  import { resolveProject } from '../project-utils.mjs';
13
- import { parseArgs, out, fail } from './common.mjs';
14
- import { parseIntFlag } from '../lib/cli-flags.mjs';
13
+ import { parseArgs, out, fail, rejectBareStringFlags } from './common.mjs';
14
+ import { parseIntFlag, isNumericToken } from '../lib/cli-flags.mjs';
15
15
 
16
16
  function formatActivityResults(rows) {
17
17
  if (!rows || rows.length === 0) return '(no events)';
@@ -31,6 +31,9 @@ export async function cmdActivity(db, args) {
31
31
  const project = flags.project ? resolveProject(db, flags.project) : inferProject();
32
32
 
33
33
  if (sub === 'save') {
34
+ // Reject value-less string flags before they reach saveEvent as a boolean `true`
35
+ // (#8470): bare --body / --title crashed with a raw "SQLite3 can only bind ..." error.
36
+ if (rejectBareStringFlags(flags, ['type', 'title', 'body', 'files', 'file', 'project'])) return;
34
37
  const type = flags.type || 'observation';
35
38
  if (!VALID_EVENT_TYPES.has(type)) {
36
39
  fail(`[mem] activity save: invalid --type "${type}". Valid: ${[...VALID_EVENT_TYPES].join(', ')}`);
@@ -51,7 +54,9 @@ export async function cmdActivity(db, args) {
51
54
  const file_paths_merged = [...filesFromSingular, ...filesFromPlural];
52
55
  const file_paths = file_paths_merged.length > 0 ? file_paths_merged : null;
53
56
  const rawImp = flags.importance !== undefined ? parseInt(flags.importance, 10) : 2;
54
- if (flags.importance !== undefined && (isNaN(rawImp) || rawImp < 1 || rawImp > 3)) {
57
+ // isNumericToken first (mirrors cmdSave): bare parseInt coerces "3xyz"→3 and would
58
+ // persist a wrong importance that silently skews ranking. Float literals truncate (#8277).
59
+ if (flags.importance !== undefined && (!isNumericToken(flags.importance) || isNaN(rawImp) || rawImp < 1 || rawImp > 3)) {
55
60
  fail(`[mem] Invalid importance "${flags.importance}". Must be 1, 2, or 3.`);
56
61
  return;
57
62
  }
@@ -112,7 +117,10 @@ export async function cmdActivity(db, args) {
112
117
  if (row) {
113
118
  out(JSON.stringify(row, null, 2));
114
119
  } else {
115
- out(`[mem] activity show: event #${id} Not found`);
120
+ // fail() (stderr + exit 1), matching the not-found contract of sibling commands
121
+ // (`get`, `activity delete`, `update`); previously stdout + exit 0, so scripts
122
+ // couldn't detect a missing event from the exit code.
123
+ fail(`[mem] activity show: event #${id} not found`);
116
124
  }
117
125
  return;
118
126
  }
package/cli/common.mjs CHANGED
@@ -54,6 +54,29 @@ export function fail(text) {
54
54
  process.exitCode = 1;
55
55
  }
56
56
 
57
+ /**
58
+ * Reject value-less `--flag` for string-valued flags. A bare trailing flag (or one
59
+ * immediately followed by another `--flag`) parses to boolean `true` (parseArgs above);
60
+ * that `true` then slips into code expecting a string and surfaces a raw
61
+ * `flags.x.split is not a function` / `SQLite3 can only bind ...` stacktrace (#8470).
62
+ * Returns true (and emits a clean `fail()`) when any listed key is a bare flag — the
63
+ * caller should `return` on true. Single source of the guard the update/registry paths
64
+ * previously inlined, so new string-flag commands stay consistent.
65
+ *
66
+ * @param {object} flags Parsed flags from parseArgs.
67
+ * @param {string[]} keys String-valued flag names to guard (without leading dashes).
68
+ * @returns {boolean} true if a bare flag was found and rejected.
69
+ */
70
+ export function rejectBareStringFlags(flags, keys) {
71
+ for (const key of keys) {
72
+ if (flags[key] === true) {
73
+ fail(`[mem] --${key} requires a value (received a bare flag with no value).`);
74
+ return true;
75
+ }
76
+ }
77
+ return false;
78
+ }
79
+
57
80
  // ─── Time Formatting ─────────────────────────────────────────────────────────
58
81
 
59
82
  /** "just now" / "5m ago" / "3h ago" / "2d ago" relative to now. */
package/format-utils.mjs CHANGED
@@ -9,8 +9,19 @@
9
9
  */
10
10
  export function truncate(str, max = 80) {
11
11
  if (!str) return '';
12
+ // Defense-in-depth: a non-string (e.g. an LLM that returned title as an array/number)
13
+ // would throw `str.replace is not a function` and abort the caller. Coerce to '' rather
14
+ // than crash; the real type-guarding happens at the call site.
15
+ if (typeof str !== 'string') return '';
12
16
  str = str.replace(/\n/g, ' ').trim();
13
- return str.length > max ? str.slice(0, max - 1) + '\u2026' : str;
17
+ if (str.length <= max) return str;
18
+ // Never split a UTF-16 surrogate pair: slicing between the high and low half emits a
19
+ // lone surrogate (invalid UTF-16) that then gets persisted to the DB. If the last kept
20
+ // code unit is a high surrogate, drop it so we cut on a code-point boundary.
21
+ let end = max - 1;
22
+ const last = str.charCodeAt(end - 1);
23
+ if (last >= 0xD800 && last <= 0xDBFF) end--;
24
+ return str.slice(0, end) + '\u2026';
14
25
  }
15
26
 
16
27
  /**
package/hook-handoff.mjs CHANGED
@@ -446,13 +446,31 @@ function renderHandoffFromRow(handoff, db, project) {
446
446
 
447
447
  lines.push('</session-handoff>');
448
448
 
449
- // Append session summary if available (long-gap enrichment)
449
+ // Append session summary if available (long-gap enrichment).
450
+ // session_summaries is keyed by the mem-internal memory_session_id, but in production
451
+ // session_handoffs.session_id holds the Claude Code UUID (the scope tag) — the two id
452
+ // namespaces never match, so the exact lookup returned nothing and this block was always
453
+ // dropped on a real resume. There is no bridge column (the CC-UUID lives on user_prompts,
454
+ // not on sdk_sessions/session_summaries), so: try the exact id match first (correct when
455
+ // ids align — legacy rows + tests), then fall back to the most-recent summary for the
456
+ // project, which at resume time is the summary from the session that wrote this handoff.
450
457
  try {
451
- const summary = db.prepare(`
458
+ let summary = db.prepare(`
452
459
  SELECT completed, next_steps, remaining_items FROM session_summaries
453
460
  WHERE memory_session_id = ? AND project = ?
454
461
  ORDER BY created_at_epoch DESC LIMIT 1
455
462
  `).get(handoff.session_id, project);
463
+ if (!summary) {
464
+ // Pick the project summary CLOSEST IN TIME to this handoff, not merely the newest:
465
+ // a handoff and its own session's summary are written within ms of each other at
466
+ // session end, so nearest-timestamp recovers the right session even when a different
467
+ // session later wrote a newer summary for the same project (concurrent/interleaved use).
468
+ summary = db.prepare(`
469
+ SELECT completed, next_steps, remaining_items FROM session_summaries
470
+ WHERE project = ?
471
+ ORDER BY ABS(created_at_epoch - ?) ASC LIMIT 1
472
+ `).get(project, handoff.created_at_epoch ?? 0);
473
+ }
456
474
  if (summary && (summary.completed || summary.next_steps || summary.remaining_items)) {
457
475
  lines.push('');
458
476
  lines.push('<session-summary source="haiku">');
package/hook-llm.mjs CHANGED
@@ -657,7 +657,12 @@ ${actionList}`;
657
657
  releaseLLMSlot();
658
658
  }
659
659
 
660
- if (parsed && parsed.title) {
660
+ // Require a STRING title: a truthy non-string (LLM returned title as an array/number/
661
+ // object) would pass a bare `parsed.title` check, then crash truncate() downstream,
662
+ // aborting the worker before tmpFile cleanup (leak) and leaving the obs degraded.
663
+ if (parsed && typeof parsed.title === 'string' && parsed.title) {
664
+ // Normalize narrative to a string too — same non-string crash risk in truncate().
665
+ if (typeof parsed.narrative !== 'string') parsed.narrative = '';
661
666
  // Discard if LLM judges observation has no learning value
662
667
  if (parsed.importance === 0 || parsed.importance === '0') {
663
668
  debugLog('DEBUG', 'llm-episode', `Discarded low-value observation: ${parsed.title}`);
package/hook-optimize.mjs CHANGED
@@ -262,7 +262,7 @@ Rules:
262
262
  }
263
263
  }
264
264
 
265
- export function applyNormalization(db, groups) {
265
+ export function applyNormalization(db, groups, { project = null } = {}) {
266
266
  if (!groups || groups.length === 0) return { updated: 0 };
267
267
 
268
268
  const aliasMap = new Map();
@@ -272,11 +272,17 @@ export function applyNormalization(db, groups) {
272
272
  }
273
273
  }
274
274
 
275
+ // Scope the mutation to `project` when normalize was scoped (v2.72.0 --project).
276
+ // Without this, synonym groups derived from ONE project's concepts rewrote the
277
+ // concepts/search_aliases of EVERY project's observations — the exact cross-project
278
+ // contamination the --project flag was added to prevent. NULL → all projects (legacy
279
+ // unscoped run), matching the search-engine `(? IS NULL OR project = ?)` idiom.
275
280
  const rows = db.prepare(`
276
281
  SELECT id, concepts, search_aliases FROM observations
277
282
  WHERE COALESCE(compressed_into, 0) = 0
278
283
  AND concepts IS NOT NULL AND concepts != ''
279
- `).all();
284
+ AND (? IS NULL OR project = ?)
285
+ `).all(project, project);
280
286
 
281
287
  let updated = 0;
282
288
  const updateStmt = db.prepare(`
@@ -322,7 +328,7 @@ export async function executeNormalize(db, force = false, { project } = {}) {
322
328
  const groups = await identifySynonymGroups(concepts);
323
329
  if (groups.length === 0) return { processed: 0, groups: 0 };
324
330
 
325
- const result = applyNormalization(db, groups);
331
+ const result = applyNormalization(db, groups, { project });
326
332
 
327
333
  try { writeFileSync(NORMALIZE_GATE_FILE, JSON.stringify({ epoch: Date.now() })); } catch {}
328
334
 
@@ -340,7 +346,7 @@ export function findMergeCandidates(db, maxClusters = 5, { project } = {}) {
340
346
  const cutoff = Date.now() - MERGE_TIME_WINDOW_MS;
341
347
  const projectClause = project ? 'AND project = ?' : '';
342
348
  const stmt = db.prepare(`
343
- SELECT id, title, narrative, project, type, access_count, created_at_epoch, minhash_sig
349
+ SELECT id, title, narrative, project, type, access_count, importance, created_at_epoch, minhash_sig
344
350
  FROM observations
345
351
  WHERE COALESCE(compressed_into, 0) = 0
346
352
  AND optimized_at IS NULL
@@ -410,10 +416,19 @@ Return ONLY valid JSON:
410
416
  const parsed = await callModelJSON(prompt, 'sonnet', { timeout: 20000, maxTokens: 1000 });
411
417
  if (!parsed || !parsed.should_merge) return { merged: false };
412
418
 
413
- const keeper = cluster.reduce((best, o) =>
414
- (o.access_count || 0) > (best.access_count || 0) ? o : best
415
- , cluster[0]);
419
+ // Keeper = highest importance, then highest access_count. Previously access_count
420
+ // alone, so a critical (importance=3) but never-accessed observation lost the keeper
421
+ // role to a trivial (importance=1) accessed one and was compressed away.
422
+ const keeper = cluster.reduce((best, o) => {
423
+ const oi = o.importance || 1, bi = best.importance || 1;
424
+ if (oi !== bi) return oi > bi ? o : best;
425
+ return (o.access_count || 0) > (best.access_count || 0) ? o : best;
426
+ }, cluster[0]);
416
427
  const others = cluster.filter(o => o.id !== keeper.id);
428
+ // Floor the merged importance at the cluster max — merging must never silently
429
+ // downgrade the ranking of the most-important member (the LLM default is 2). The keeper
430
+ // is selected by importance-first, so keeper.importance IS the cluster max by construction.
431
+ const maxClusterImportance = keeper.importance || 1;
417
432
 
418
433
  const concepts = Array.isArray(parsed.merged_concepts) ? parsed.merged_concepts.slice(0, 10) : [];
419
434
  const facts = Array.isArray(parsed.merged_facts) ? parsed.merged_facts.slice(0, 10) : [];
@@ -428,7 +443,7 @@ Return ONLY valid JSON:
428
443
  const bigramText = cjkBigrams((title || '') + ' ' + (narrative || ''));
429
444
  const textField = [conceptsText, factsText, bigramText].filter(Boolean).join(' ');
430
445
  const minhashSig = computeMinHash((title || '') + ' ' + (narrative || ''));
431
- const importance = clampImportance(parsed.importance || 2);
446
+ const importance = Math.max(clampImportance(parsed.importance || 2), maxClusterImportance);
432
447
 
433
448
  // Scrub LLM-output cluster-merge text fields at the UPDATE boundary.
434
449
  // importance is numeric; minhash_sig is hash bytes.
package/hook-update.mjs CHANGED
@@ -27,7 +27,10 @@ const STATE_DIR = DB_DIR;
27
27
  const STATE_FILE = join(STATE_DIR, 'runtime', 'update-state.json');
28
28
  const CHECK_INTERVAL_MS = 24 * 60 * 60 * 1000; // 24 hours
29
29
  const FETCH_TIMEOUT_MS = 3000; // 3s network timeout
30
- const RATE_LIMIT_INTERVAL_MS = 6 * 60 * 60 * 1000; // 6h if rate-limited
30
+ // When rate-limited we got NO release data, so re-check sooner than the normal 24h
31
+ // cadence (GitHub's unauthenticated rate-limit window resets within the hour). 6h × ≤2
32
+ // requests = 4 polls/day, far under the 60/hr limit, so this is a faster retry, not a hammer.
33
+ const RATE_LIMIT_INTERVAL_MS = 6 * 60 * 60 * 1000; // 6h retry when rate-limited
31
34
  const NPM_INSTALL_CMD = 'npm install --omit=dev --no-audit --no-fund';
32
35
 
33
36
  // ── Main Entry ─────────────────────────────────────────────
@@ -57,7 +60,12 @@ export async function checkForUpdate(options = {}) {
57
60
 
58
61
  const latest = await fetchLatestRelease();
59
62
  if (!latest) {
60
- saveState({ ...state, lastCheck: new Date().toISOString() });
63
+ // Re-read from disk: a 403 inside fetchWithTimeout just persisted rateLimited:true.
64
+ // Spreading the stale in-memory `state` (captured above with rateLimited:false) would
65
+ // clobber that flag back to false, so shouldCheck never honors the backoff and the
66
+ // rate-limit mechanism is dead. Re-reading preserves the freshly-written flag.
67
+ const fresh = readState();
68
+ saveState({ ...fresh, lastCheck: new Date().toISOString() });
61
69
  return null;
62
70
  }
63
71
 
@@ -174,7 +182,10 @@ async function fetchLatestRelease() {
174
182
  headers,
175
183
  );
176
184
  if (result === 'rate-limited') return null;
177
- if (result) {
185
+ // Guard tag_name: a 200-OK with a malformed body ({} / {tag_name:null}) would throw
186
+ // `Cannot read properties of undefined (reading 'replace')`. Caught upstream, but it
187
+ // poisons lastError and blocks the tags fallback below — fall through instead.
188
+ if (result && typeof result.tag_name === 'string') {
178
189
  return {
179
190
  version: result.tag_name.replace(/^v/, ''),
180
191
  tarballUrl: result.tarball_url,
@@ -188,7 +199,7 @@ async function fetchLatestRelease() {
188
199
  headers,
189
200
  );
190
201
  if (tags === 'rate-limited') return null;
191
- if (Array.isArray(tags) && tags.length > 0) {
202
+ if (Array.isArray(tags) && tags.length > 0 && typeof tags[0]?.name === 'string') {
192
203
  const tag = tags[0];
193
204
  return {
194
205
  version: tag.name.replace(/^v/, ''),
@@ -208,7 +219,7 @@ async function fetchWithTimeout(url, headers) {
208
219
  if (res.status === 403) {
209
220
  const state = readState();
210
221
  saveState({ ...state, rateLimited: true });
211
- debugLog('DEBUG', 'hook-update', 'GitHub API rate limited, extending interval');
222
+ debugLog('DEBUG', 'hook-update', 'GitHub API rate limited; will retry on the 6h rate-limit cadence');
212
223
  return 'rate-limited';
213
224
  }
214
225
  if (!res.ok) return null;
package/hook.mjs CHANGED
@@ -202,13 +202,20 @@ function flushEpisode(episode, hookEventName = 'PostToolUse') {
202
202
  // bugfix-shape nudge above and may co-fire.
203
203
  const citeBack = loadCiteBackForEpisode(episode, RUNTIME_DIR);
204
204
  if (citeBack) lines.push(citeBack);
205
+ // Trailing newline is REQUIRED: when this receipt flushes at SessionStart
206
+ // (leftover episode after /clear or /compact), the startup dashboard writes a
207
+ // second hookSpecificOutput object right after. Without the '\n' the two land
208
+ // back-to-back as `}{` on one line and Claude Code's line-based JSON parser
209
+ // drops both — losing the episode-flush / cite-back context exactly at the
210
+ // session boundary. Every other hookSpecificOutput write appends '\n'; this
211
+ // was the lone exception.
205
212
  process.stdout.write(JSON.stringify({
206
213
  suppressOutput: true,
207
214
  hookSpecificOutput: {
208
215
  hookEventName,
209
216
  additionalContext: lines.join('\n'),
210
217
  },
211
- }));
218
+ }) + '\n');
212
219
  } catch { /* never block on receipt */ }
213
220
  }
214
221
  } else {
@@ -33,11 +33,18 @@ export const PINNED_INJ_THRESHOLD = 8;
33
33
  // compressed_into = <keeperId>; deleting that keeper (compressed_into has no FK) would
34
34
  // leave the child dangling behind a now-missing parent — hidden from every
35
35
  // COALESCE(compressed_into,0)=0 view and unrecoverable. Recovery = resurface the child
36
- // as live (NULL) rather than lose it silently. Shared by both hard-delete paths.
37
- function recoverChildrenOf(db, ids) {
36
+ // as live (NULL) rather than lose it silently. Shared by every hard-delete path:
37
+ // maintain (cleanupBroken/purgeStale) AND the interactive `delete` / MCP mem_delete.
38
+ export function recoverChildrenOf(db, ids) {
38
39
  if (!ids.length) return 0;
39
40
  const ph = ids.map(() => '?').join(',');
40
- return db.prepare(`UPDATE observations SET compressed_into = NULL WHERE compressed_into IN (${ph})`).run(...ids).changes;
41
+ // `AND id NOT IN (...)`: never "recover" a row that is itself being deleted in the same
42
+ // call (e.g. `delete 1,2` where #2 was merged into #1). Without it, #2 is un-hidden and
43
+ // then immediately deleted, inflating the reported recovery count with a row that did not
44
+ // survive. Recovery should count only children that actually stay live.
45
+ return db.prepare(
46
+ `UPDATE observations SET compressed_into = NULL WHERE compressed_into IN (${ph}) AND id NOT IN (${ph})`
47
+ ).run(...ids, ...ids).changes;
41
48
  }
42
49
 
43
50
  export function cleanupBroken(db, { projectFilter, baseParams, opCap = OP_CAP }) {
package/mem-cli.mjs CHANGED
@@ -18,6 +18,7 @@ import { selectCompressionCandidates, groupByProjectWeek, compressGroup } from '
18
18
  import {
19
19
  cleanupBroken, decayAndMarkIdle, boostAccessed, demotePinned, mergeDuplicates,
20
20
  purgeStale, purgeStalePreview, findDuplicates, maintenanceStats, rebuildVectors, vacuum,
21
+ recoverChildrenOf,
21
22
  OP_CAP, STALE_AGE_MS, PINNED_INJ_THRESHOLD,
22
23
  } from './lib/maintain-core.mjs';
23
24
  import { optimizePreview, optimizeRun } from './hook-optimize.mjs';
@@ -32,7 +33,7 @@ import { readFileSync, existsSync, readdirSync } from 'fs';
32
33
  // v2.41: shared CLI helpers extracted to cli/common.mjs. Keep this file as the
33
34
  // router + remaining-command bodies during the incremental split. Future work:
34
35
  // move each cmdXxx into its own cli/<cmd>.mjs; mem-cli.mjs becomes pure dispatch.
35
- import { parseArgs, out, fail, relativeTime, fmtDateShort, parseIdToken, formatProbeHints } from './cli/common.mjs';
36
+ import { parseArgs, out, fail, relativeTime, fmtDateShort, parseIdToken, formatProbeHints, rejectBareStringFlags } from './cli/common.mjs';
36
37
  import { saveObservation } from './lib/save-observation.mjs';
37
38
  import { AUTO_MERGE_THRESHOLD } from './lib/dedup-constants.mjs';
38
39
  import { countRecentHookErrors } from './lib/hook-telemetry.mjs';
@@ -667,6 +668,7 @@ function cmdGet(db, args) {
667
668
  }
668
669
 
669
670
  // Validate --fields against obs schema (only meaningful for obs rows).
671
+ if (rejectBareStringFlags(flags, ['fields', 'source'])) return;
670
672
  let requestedFields = null;
671
673
  if (flags.fields) {
672
674
  const allRequested = flags.fields.split(',').map(s => s.trim());
@@ -713,6 +715,10 @@ function cmdGet(db, args) {
713
715
 
714
716
  function cmdTimeline(db, args) {
715
717
  const { positional, flags } = parseArgs(args);
718
+ // Bare `--query` parses to boolean true and crashed downstream in sanitizeFtsQuery
719
+ // (nlp.mjs string ops on a boolean). No sensible default for a search anchor — reject
720
+ // cleanly (#8470). (`--project` bare is absorbed by resolveProject's non-string guard.)
721
+ if (rejectBareStringFlags(flags, ['query'])) return;
716
722
  // parseInt('-5') === -5 is truthy, so `|| 5` doesn't rescue negative input.
717
723
  // Match cmdSearch's warn-then-default pattern for consistency across CLI flags.
718
724
  const parseWindow = (label, raw) => {
@@ -944,6 +950,10 @@ function cmdSave(db, args) {
944
950
  return;
945
951
  }
946
952
 
953
+ // Reject value-less string flags before they reach .split()/saveObservation as a
954
+ // boolean `true` (#8470): bare --files/--title/--lesson crashed with a raw stacktrace.
955
+ if (rejectBareStringFlags(flags, ['title', 'files', 'lesson', 'lesson-learned', 'project', 'type'])) return;
956
+
947
957
  const type = flags.type || 'discovery';
948
958
  const validTypes = new Set(['decision', 'bugfix', 'feature', 'refactor', 'discovery', 'change']);
949
959
  if (!validTypes.has(type)) {
@@ -1070,6 +1080,8 @@ function cmdDeferAdd(db, args) {
1070
1080
  fail(`[mem] defer add: title too long (${title.length} chars, max 200). Move detail to --detail "<text>".`);
1071
1081
  return;
1072
1082
  }
1083
+ // Reject bare --files/--detail/--project before .split()/bind sees a boolean true (#8470).
1084
+ if (rejectBareStringFlags(flags, ['files', 'detail', 'project'])) return;
1073
1085
  const priority = flags.priority !== undefined ? parseInt(flags.priority, 10) : 2;
1074
1086
  // isNumericToken first: bare parseInt would coerce "3xyz"→3 and silently escalate a
1075
1087
  // deferred item's urgency. Float literals still truncate (#8277).
@@ -1614,11 +1626,19 @@ function cmdDelete(db, args) {
1614
1626
  db.prepare('UPDATE observations SET related_ids = ? WHERE id = ?').run(JSON.stringify(filtered), r.id);
1615
1627
  }
1616
1628
  }
1617
- return db.prepare(`DELETE FROM observations WHERE id IN (${placeholders})`).run(...ids);
1629
+ // Resurface any rows merged/compressed INTO the doomed keepers before deleting,
1630
+ // else they dangle behind a missing parent (compressed_into has no FK) — invisible
1631
+ // to every COALESCE(compressed_into,0)=0 view and unrecoverable. Same guard the
1632
+ // maintain hard-delete paths use (recoverChildrenOf); the interactive delete path
1633
+ // was missing it. Returned in the result so the user sees the recovery count.
1634
+ const recovered = recoverChildrenOf(db, ids);
1635
+ const deleted = db.prepare(`DELETE FROM observations WHERE id IN (${placeholders})`).run(...ids);
1636
+ return { changes: deleted.changes, recovered };
1618
1637
  });
1619
1638
  const result = deleteTx();
1620
1639
  const missing = ids.filter(id => !rows.some(r => r.id === id));
1621
- out(`[mem] Deleted ${result.changes} observation(s).${missing.length > 0 ? ` Note: ID(s) ${missing.join(', ')} not found.` : ''}`);
1640
+ const recoveredNote = result.recovered > 0 ? ` Recovered ${result.recovered} merged/compressed child observation(s) to live.` : '';
1641
+ out(`[mem] Deleted ${result.changes} observation(s).${recoveredNote}${missing.length > 0 ? ` Note: ID(s) ${missing.join(', ')} not found.` : ''}`);
1622
1642
  }
1623
1643
 
1624
1644
  // ─── Update ──────────────────────────────────────────────────────────────────
@@ -1644,18 +1664,10 @@ function cmdUpdate(db, args) {
1644
1664
  return;
1645
1665
  }
1646
1666
 
1647
- // A value-less `--flag` (last arg, or immediately followed by another --flag)
1648
- // parses to boolean `true` (cli/common.mjs parseArgs). For string-valued fields
1649
- // that boolean would slip past the string-only empty guards below and reach the
1650
- // SQLite bind, surfacing a raw "TypeError: SQLite3 can only bind ..." stacktrace
1651
- // — the same accidental shell-strip class the empty-title guard (#8470) catches.
1652
- // Reject it cleanly for every string-valued update flag.
1653
- for (const key of ['title', 'narrative', 'lesson', 'lesson-learned', 'concepts']) {
1654
- if (flags[key] === true) {
1655
- fail(`[mem] --${key} requires a value (received a bare flag with no value).`);
1656
- return;
1657
- }
1658
- }
1667
+ // A value-less `--flag` parses to boolean `true` (cli/common.mjs parseArgs); for string
1668
+ // fields that would reach the SQLite bind as a raw "TypeError: SQLite3 can only bind ..."
1669
+ // (#8470). Reject cleanly via the shared guard single source with the other commands.
1670
+ if (rejectBareStringFlags(flags, ['title', 'narrative', 'lesson', 'lesson-learned', 'concepts'])) return;
1659
1671
 
1660
1672
  const updates = [];
1661
1673
  const params = [];
@@ -2172,6 +2184,9 @@ function cmdRegistry(_memDb, args) {
2172
2184
 
2173
2185
  try {
2174
2186
  if (action === 'search') {
2187
+ // Bare `--query` parses to boolean true; `true || ...` would search for the literal
2188
+ // string "true". Reject it cleanly (#8470) before it becomes a confusing no-match.
2189
+ if (rejectBareStringFlags(flags, ['query', 'category', 'quality'])) return;
2175
2190
  const query = flags.query || positional.slice(1).join(' ');
2176
2191
  if (!query) { fail('[mem] Usage: claude-mem-lite registry search <query> [--type skill|agent] [--category C] [--quality Q]'); return; }
2177
2192
  let results = searchResources(rdb, query, {
@@ -2260,12 +2275,9 @@ function cmdRegistry(_memDb, args) {
2260
2275
  }
2261
2276
 
2262
2277
  if (action === 'import') {
2263
- // A bare value-less flag parses to boolean `true` (parseArgs); for these string
2264
- // fields that boolean reaches the SQLite bind in upsertResource and throws a raw
2265
- // TypeError same class as the `update` guard above (#8470). Reject up front.
2266
- for (const key of ['name', 'resource-type', 'invocation-name', 'source', 'repo-url', 'local-path', 'intent-tags', 'domain-tags', 'trigger-patterns', 'capability-summary', 'keywords', 'tech-stack', 'use-cases']) {
2267
- if (flags[key] === true) { fail(`[mem] --${key} requires a value (received a bare flag with no value).`); return; }
2268
- }
2278
+ // Bare value-less flags boolean true SQLite-bind crash in upsertResource (#8470).
2279
+ // Shared guard single source with update/remove/the other commands.
2280
+ if (rejectBareStringFlags(flags, ['name', 'resource-type', 'invocation-name', 'source', 'repo-url', 'local-path', 'intent-tags', 'domain-tags', 'trigger-patterns', 'capability-summary', 'keywords', 'tech-stack', 'use-cases'])) return;
2269
2281
  const name = flags.name;
2270
2282
  const resourceType = flags['resource-type'];
2271
2283
  if (!name || !resourceType) { fail('[mem] Usage: claude-mem-lite registry import --name N --resource-type skill|agent [--invocation-name I] [--capability-summary S]'); return; }
@@ -2287,11 +2299,9 @@ function cmdRegistry(_memDb, args) {
2287
2299
  }
2288
2300
 
2289
2301
  if (action === 'remove') {
2290
- // Bare value-less --name / --resource-type → boolean true → SQLite-bind crash
2291
- // on the DELETE below; reject like the import branch and the `update` guard.
2292
- for (const key of ['name', 'resource-type']) {
2293
- if (flags[key] === true) { fail(`[mem] --${key} requires a value (received a bare flag with no value).`); return; }
2294
- }
2302
+ // Bare value-less --name / --resource-type → boolean true → SQLite-bind crash on
2303
+ // the DELETE below; shared guard, single source with import/update.
2304
+ if (rejectBareStringFlags(flags, ['name', 'resource-type'])) return;
2295
2305
  const name = flags.name;
2296
2306
  const resourceType = flags['resource-type'];
2297
2307
  if (!name || !resourceType) { fail('[mem] Usage: claude-mem-lite registry remove --name N --resource-type skill|agent'); return; }
package/memdir.mjs CHANGED
@@ -219,33 +219,58 @@ export function writePluginSection(memdir, { slug, version, contentLine, force =
219
219
  /**
220
220
  * Remove the plugin's sentinel block plus its state sidecar. External content
221
221
  * in MEMORY.md is preserved.
222
- * @returns {{action: 'removed'|'absent'}}
222
+ *
223
+ * Foreign-content guard (symmetric with writePluginSection): a sentinel block with
224
+ * NO state sidecar is content we cannot prove the plugin authored — the user may have
225
+ * pasted plugin docs or quoted a sentinel example. Without `force`, such a block is
226
+ * LEFT IN PLACE (action 'skipped-foreign') instead of being silently deleted. The
227
+ * adopt side already throws UserEditedError on the same condition; unadopt lacked the
228
+ * mirror, so it could delete user-authored text that merely resembled the sentinel.
229
+ *
230
+ * @param {string} memdir
231
+ * @param {string} slug
232
+ * @param {{force?: boolean}} [opts] force=true removes even a no-state (foreign) block.
233
+ * @returns {{action: 'removed'|'absent'|'skipped-foreign'}}
223
234
  */
224
- export function removePluginSection(memdir, slug) {
225
- clearState(memdir, slug);
235
+ export function removePluginSection(memdir, slug, { force = false } = {}) {
226
236
  const path = memoryFile(memdir);
227
- if (!existsSync(path)) return { action: 'absent' };
237
+ if (!existsSync(path)) { clearState(memdir, slug); return { action: 'absent' }; }
228
238
  const raw = readFileSync(path, 'utf8');
229
239
  const match = raw.match(sentinelRegex(slug));
230
- if (!match) return { action: 'absent' };
240
+ if (!match) { clearState(memdir, slug); return { action: 'absent' }; }
241
+
242
+ // Only remove a block we have a state sidecar for (proof we wrote it), unless forced.
243
+ if (!readState(memdir, slug) && !force) {
244
+ return { action: 'skipped-foreign' };
245
+ }
246
+ clearState(memdir, slug);
231
247
 
232
248
  // Delete the match plus a trailing newline + a preceding blank line so we
233
249
  // don't leave a stranded paragraph gap.
250
+ const blockAtStart = match.index === 0;
234
251
  let start = match.index;
235
252
  let end = match.index + match[0].length;
236
253
  if (raw[end] === '\n') end++;
237
254
  if (start > 0 && raw.slice(0, start).endsWith('\n\n')) start--;
238
255
  let next = raw.slice(0, start) + raw.slice(end);
239
- // Edge case (code review v2.32.3): when the sentinel was the first content
240
- // (e.g. two invited-memory plugins coexist and we remove the earlier one),
241
- // the tail can still start with a stranded blank line / doubled newlines.
242
- // Normalize leading whitespace and collapse any ≥3 consecutive newlines
243
- // so the remaining content looks hand-authored.
244
- next = next.replace(/^\s+/, '').replace(/\n{3,}/g, '\n\n');
256
+ // Collapse any ≥3 consecutive newlines left at the removal seam so the remaining
257
+ // content looks hand-authored. Only strip leading whitespace when OUR block was the
258
+ // file's first content otherwise an unconditional `/^\s+/` deleted user-authored
259
+ // leading blank lines / structure that sat far above our (end-of-file) block.
260
+ next = next.replace(/\n{3,}/g, '\n\n');
261
+ if (blockAtStart) next = next.replace(/^\s+/, '');
245
262
  atomicWrite(path, next);
246
263
  return { action: 'removed' };
247
264
  }
248
265
 
266
+ /**
267
+ * Whether a plugin state sidecar exists for this memdir — i.e. the plugin can prove it
268
+ * wrote the sentinel. Used by unadopt's dry-run to predict the foreign-content skip.
269
+ */
270
+ export function hasPluginState(memdir, slug) {
271
+ return readState(memdir, slug) !== null;
272
+ }
273
+
249
274
  /**
250
275
  * Whether this memdir has our sentinel. Body edits don't demote the adoption —
251
276
  * users who hand-tweak the contract line still count as adopted.
package/nlp.mjs CHANGED
@@ -11,6 +11,23 @@ export { SYNONYM_MAP, CJK_COMPOUNDS };
11
11
 
12
12
  const FTS5_KEYWORDS = new Set(['AND', 'OR', 'NOT', 'NEAR']);
13
13
 
14
+ /**
15
+ * True if a CJK bigram is pure grammatical noise that should not enter an FTS query
16
+ * or the precision gate's `required` set. CJK_STOP_WORDS holds single-char particles
17
+ * (的/了/是…) plus a few whole multi-char fillers (什么/怎么…); callers used to test a
18
+ * 2-char bigram with a bare `CJK_STOP_WORDS.has(bg)`, which only caught the whole-filler
19
+ * case — so a particle-pair bigram like `的了` / `了是` slipped through and (a) forced an
20
+ * unsatisfiable AND term and (b) made an all-particle query's `required` set non-empty,
21
+ * wrongly rejecting every candidate. We reject a bigram when it IS a known filler OR when
22
+ * BOTH characters are single-char stop words. A bigram with only ONE stop char (有效, 目的)
23
+ * is deliberately kept — those are real compounds, and distinguishing a boundary-straddle
24
+ * (的全) from a genuine compound needs a dictionary/recall benchmark (deferred).
25
+ */
26
+ function isCjkNoiseBigram(bg) {
27
+ if (CJK_STOP_WORDS.has(bg)) return true;
28
+ return bg.length === 2 && CJK_STOP_WORDS.has(bg[0]) && CJK_STOP_WORDS.has(bg[1]);
29
+ }
30
+
14
31
  // Sort by length descending for greedy matching
15
32
  const CJK_SORTED = [...CJK_COMPOUNDS].sort((a, b) => b.length - a.length);
16
33
 
@@ -177,7 +194,7 @@ export function cjkPrecisionOk(query, text, threshold) {
177
194
  const keywords = extractCjkKeywords(query);
178
195
  const required = keywords.length > 0
179
196
  ? keywords
180
- : cjkBigrams(query).split(' ').filter(b => b && !CJK_STOP_WORDS.has(b));
197
+ : cjkBigrams(query).split(' ').filter(b => b && !isCjkNoiseBigram(b));
181
198
  if (required.length === 0) return true;
182
199
  const hit = required.filter(w => text.includes(w)).length;
183
200
  return (hit / required.length) >= threshold;
@@ -254,7 +271,7 @@ export function sanitizeFtsQuery(query) {
254
271
  const gapBigrams = cjkBigrams(remainder);
255
272
  if (gapBigrams) {
256
273
  for (const bg of gapBigrams.split(' ')) {
257
- if (bg && !CJK_STOP_WORDS.has(bg) && !matched.has(bg)) expandedTokens.push(bg);
274
+ if (bg && !isCjkNoiseBigram(bg) && !matched.has(bg)) expandedTokens.push(bg);
258
275
  }
259
276
  }
260
277
  continue;
@@ -278,7 +295,7 @@ export function sanitizeFtsQuery(query) {
278
295
  );
279
296
  if (pureCjkTokens.length > 0) bigrams = cjkBigrams(pureCjkTokens.join(' '));
280
297
  }
281
- const bigramSet = new Set(bigrams ? bigrams.split(' ').filter(b => b && !CJK_STOP_WORDS.has(b)) : []);
298
+ const bigramSet = new Set(bigrams ? bigrams.split(' ').filter(b => b && !isCjkNoiseBigram(b)) : []);
282
299
  const hasBigrams = bigramSet.size > 0;
283
300
  const finalTokens = [];
284
301
  const seen = new Set();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "2.92.0",
3
+ "version": "2.94.0",
4
4
  "description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark).",
5
5
  "type": "module",
6
6
  "packageManager": "npm@10.9.2",
package/project-utils.mjs CHANGED
@@ -14,6 +14,12 @@ const _cache = new Map();
14
14
  */
15
15
  export function resolveProject(db, name) {
16
16
  if (!name) return name;
17
+ // Defense-in-depth: a bare `--project` CLI flag parses to boolean `true` (and a
18
+ // malformed MCP/hook caller could pass any non-string). `true.includes('--')` below
19
+ // throws a raw TypeError that crashed search/recent/timeline/stats/export/defer-list.
20
+ // Treat any non-string as "no project filter" (null) — the degradation every caller
21
+ // already handles for an absent --project — instead of crashing at the root helper.
22
+ if (typeof name !== 'string') return null;
17
23
  if (_cache.has(name)) return _cache.get(name);
18
24
  // Already a canonical name (contains "--")? Use as-is.
19
25
  if (name.includes('--')) { _cache.set(name, name); return name; }
@@ -336,10 +336,15 @@ export async function importFromGitHub(db, url, opts = {}) {
336
336
  indexed_at: new Date().toISOString(),
337
337
  });
338
338
 
339
- // 5g. Update repo_forks and repo_updated_at (not in upsert SQL)
339
+ // 5g. Update repo_forks and repo_updated_at (not in upsert SQL).
340
+ // Do NOT touch quality_tier here: UPSERT_SQL never writes it, so a first insert
341
+ // gets the column DEFAULT 'community' and a re-import preserves whatever tier the
342
+ // row reached. Re-stamping 'community' downgraded enrichment-promoted tiers
343
+ // (verified/installed → community) on every content re-import, silently lowering
344
+ // the resource's BM25 composite rank (tier is a 1.0/2.0/3.0 multiplier).
340
345
  db.prepare(
341
- 'UPDATE resources SET repo_forks = ?, repo_updated_at = ?, quality_tier = ? WHERE id = ?'
342
- ).run(repoForks, repoUpdatedAt, 'community', resourceId);
346
+ 'UPDATE resources SET repo_forks = ?, repo_updated_at = ? WHERE id = ?'
347
+ ).run(repoForks, repoUpdatedAt, resourceId);
343
348
 
344
349
  results.push({ name, type: item.type, id: resourceId });
345
350
  debugLog('INFO', 'importer', `Imported ${item.type}:${name} (id=${resourceId})`);
@@ -284,6 +284,10 @@ export function filterByProjectDomain(results, projectDomains) {
284
284
  //
285
285
  // Composite ranking formula:
286
286
  // 40% BM25 text relevance
287
+ // Quality-tier bonus: bounded additive (installed -0.15, verified -0.075). Was a
288
+ // MULTIPLIER on the BM25 term, which scaled the magnitude of a variable, unbounded,
289
+ // NEGATIVE signal — letting a weakly-matching installed resource (×3) outrank a
290
+ // strongly-matching community one. Additive keeps tier a promotion, not an override.
287
291
  // 15% Star popularity (saturation normalization — diminishing returns after ~500 stars)
288
292
  // 15% Success rate (Laplace smoothing — Beta prior α=1, β=1 for small-sample robustness)
289
293
  // 10% Adoption rate (Laplace smoothing)
@@ -301,10 +305,10 @@ export function filterByProjectDomain(results, projectDomains) {
301
305
  // Sign convention: more negative = better. BM25 is negative, behavioral signals are subtracted.
302
306
  const COMPOSITE_EXPR = `(
303
307
  bm25(resources_fts, 3.0, 3.0, 3.0, 2.0, 2.0, 1.0, 1.0, 1.0) * 0.4
304
- * CASE COALESCE(r.quality_tier, 'community')
305
- WHEN 'installed' THEN 3.0
306
- WHEN 'verified' THEN 2.0
307
- ELSE 1.0
308
+ - CASE COALESCE(r.quality_tier, 'community')
309
+ WHEN 'installed' THEN 0.15
310
+ WHEN 'verified' THEN 0.075
311
+ ELSE 0
308
312
  END
309
313
  - COALESCE(r.repo_stars * 1.0 / (r.repo_stars + 100.0), 0) * 0.15
310
314
  - (
@@ -347,7 +351,7 @@ const SEARCH_SQL = `
347
351
  WHERE resources_fts MATCH ?
348
352
  AND r.status = 'active'
349
353
  ) sub
350
- ORDER BY composite_score ASC
354
+ ORDER BY composite_score ASC, id ASC
351
355
  LIMIT ?
352
356
  `;
353
357
 
@@ -362,7 +366,7 @@ const SEARCH_BY_TYPE_SQL = `
362
366
  AND r.status = 'active'
363
367
  AND r.type = ?
364
368
  ) sub
365
- ORDER BY composite_score ASC
369
+ ORDER BY composite_score ASC, id ASC
366
370
  LIMIT ?
367
371
  `;
368
372
 
package/schema.mjs CHANGED
@@ -382,6 +382,7 @@ export function initSchema(db) {
382
382
 
383
383
  // FTS5 migration: recreate observations_fts when columns are missing (one-time)
384
384
  // Detect old FTS5 table missing lesson_learned or search_aliases and recreate with full column set
385
+ let obsFtsRecreated = false;
385
386
  try {
386
387
  const ftsDdl = db.prepare(`SELECT sql FROM sqlite_master WHERE type='table' AND name='observations_fts'`).get();
387
388
  if (ftsDdl && (!ftsDdl.sql.includes('lesson_learned') || !ftsDdl.sql.includes('search_aliases'))) {
@@ -389,6 +390,7 @@ export function initSchema(db) {
389
390
  db.exec(`DROP TRIGGER IF EXISTS observations_ad`);
390
391
  db.exec(`DROP TRIGGER IF EXISTS observations_au`);
391
392
  db.exec(`DROP TABLE IF EXISTS observations_fts`);
393
+ obsFtsRecreated = true;
392
394
  }
393
395
  } catch { /* non-critical — ensureFTS will create if missing */ }
394
396
 
@@ -416,14 +418,19 @@ export function initSchema(db) {
416
418
  ensureFTS(db, 'session_summaries_fts', 'session_summaries', ['request', 'investigated', 'learned', 'completed', 'next_steps', 'notes', 'remaining_items']);
417
419
  ensureFTS(db, 'user_prompts_fts', 'user_prompts', ['prompt_text']);
418
420
 
419
- // Rebuild FTS5 if we just recreated it (migration populates from content table)
420
- try {
421
- const needsRebuild = db.prepare(`SELECT COUNT(*) as cnt FROM observations`).get();
422
- const ftsCount = db.prepare(`SELECT COUNT(*) as cnt FROM observations_fts`).get();
423
- if (needsRebuild.cnt > 0 && ftsCount.cnt === 0) {
424
- db.exec(`INSERT INTO observations_fts(observations_fts) VALUES('rebuild')`);
425
- }
426
- } catch { /* non-critical */ }
421
+ // Rebuild FTS5 if we just recreated it above (the new index is empty and must be
422
+ // populated from the content table). The old emptiness probe — `SELECT COUNT(*) FROM
423
+ // observations_fts` was DEAD: for an external-content FTS5 table, COUNT reads the
424
+ // CONTENT table (observations), not the index, so `ftsCount === 0` was only ever true
425
+ // on an empty DB (where needsRebuild>0 is false). The rebuild therefore never fired and
426
+ // full-text search silently returned 0 rows after the column-mismatch migration. Gate
427
+ // on the recreation flag instead, which is the only path that leaves the index empty.
428
+ if (obsFtsRecreated) {
429
+ try {
430
+ const cnt = db.prepare(`SELECT COUNT(*) as cnt FROM observations`).get();
431
+ if (cnt.cnt > 0) db.exec(`INSERT INTO observations_fts(observations_fts) VALUES('rebuild')`);
432
+ } catch { /* non-critical */ }
433
+ }
427
434
 
428
435
  // v36 migration: narrow events_fts_au like the v27 fix above. The events FTS
429
436
  // triggers were hand-written inline (below) rather than via ensureFTS, so
package/search-engine.mjs CHANGED
@@ -179,7 +179,11 @@ export function countSearchTotal(db, {
179
179
  export function ftsRowToResult(r, { scoreMultiplier, snippet } = {}) {
180
180
  return {
181
181
  source: 'obs', id: r.id, type: r.type, title: r.title, subtitle: r.subtitle,
182
- project: r.project, date: r.created_at, created_at_epoch: r.created_at_epoch,
182
+ // `date` is the legacy key the MCP paired-search path reads; `created_at` aligns the
183
+ // obs row shape with the session/prompt rows the CLI interleaves in the same results
184
+ // array (cmdSearch reads r.created_at uniformly) and with recent/recall output. Both
185
+ // hold the same ISO string — keep both so neither consumer breaks.
186
+ project: r.project, date: r.created_at, created_at: r.created_at, created_at_epoch: r.created_at_epoch,
183
187
  score: scoreMultiplier ? r.score * scoreMultiplier : r.score,
184
188
  files_modified: r.files_modified, importance: r.importance, lesson_learned: r.lesson_learned,
185
189
  snippet: snippet ? (r.match_snippet || '') : '',
package/secret-scrub.mjs CHANGED
@@ -18,8 +18,17 @@ export const SECRET_PATTERNS = [
18
18
  // 2. Structured keys (api_key, auth_token, …) keep the original behavior —
19
19
  // a separator/compound key is unambiguous config syntax even when
20
20
  // preceded by prose ("see auth_token: shhhhhh").
21
- [/((?<![A-Za-z][ \t])\b(?:password|passwd|token|bearer)\s*[=:]\s*)(?!process\.env\.)(?!new\s)(?!\w+\()(?!(?:null|undefined|true|false|None|nil|empty|""|''|0)\b)[^\s,;'"}\]]{6,}/gi, '$1***'],
22
- [/(\b(?:api[_-]?key|api[_-]?secret|secret[_-]?key|access[_-]?key|private[_-]?key|client[_-]?secret|auth[_-]?token)\s*[=:]\s*)(?!process\.env\.)(?!new\s)(?!\w+\()(?!(?:null|undefined|true|false|None|nil|empty|""|''|0)\b)[^\s,;'"}\]]{6,}/gi, '$1***'],
21
+ // `(?:\b|_)` before the keyword: a plain word-boundary misses the single most
22
+ // common credential shape — underscore-cased env vars (DB_PASSWORD, GH_TOKEN,
23
+ // MY_AUTH_TOKEN) — because `_` is a \w char, so there is NO \b between it and the
24
+ // keyword. Allowing a leading `_` catches those while the prose lookbehind still
25
+ // excludes "Marker token: …". `secret` added so a bare SECRET=… with a mixed-alnum
26
+ // value is covered (the hex-only assignment pattern below misses non-hex values).
27
+ [/((?<![A-Za-z][ \t])(?:\b|_)(?:password|passwd|token|bearer|secret)\s*[=:]\s*)(?!process\.env\.)(?!new\s)(?!\w+\()(?!(?:null|undefined|true|false|None|nil|empty|""|''|0)\b)[^\s,;'"}\]]{6,}/gi, '$1***'],
28
+ // access_token / refresh_token are the canonical OAuth2 field names — they were
29
+ // missing from this KV list (drift vs the JSON list below). `(?:\b|_)` for the same
30
+ // underscore-prefix reason.
31
+ [/((?:\b|_)(?:api[_-]?key|api[_-]?secret|secret[_-]?key|access[_-]?key|private[_-]?key|client[_-]?secret|auth[_-]?token|access[_-]?token|refresh[_-]?token)\s*[=:]\s*)(?!process\.env\.)(?!new\s)(?!\w+\()(?!(?:null|undefined|true|false|None|nil|empty|""|''|0)\b)[^\s,;'"}\]]{6,}/gi, '$1***'],
23
32
  // AWS access keys (AKIA...)
24
33
  [/\bAKIA[A-Z0-9]{16}\b/g, '***'],
25
34
  // OpenAI / Anthropic keys (sk-...) — specific prefixes have lower length threshold
@@ -52,14 +61,35 @@ export const SECRET_PATTERNS = [
52
61
  [/\bnpm_[a-zA-Z0-9]{36,}\b/g, '***'],
53
62
  // Stripe keys (sk_live_, rk_live_, pk_live_, sk_test_, pk_test_)
54
63
  [/\b[srp]k_(?:live|test)_[a-zA-Z0-9]{20,}\b/g, '***'],
64
+ // SendGrid API keys: SG.<22>.<43> — two dots at fixed offsets make this
65
+ // structurally unmistakable; near-zero false-positive risk.
66
+ [/\bSG\.[A-Za-z0-9_-]{22}\.[A-Za-z0-9_-]{43}\b/g, '***'],
67
+ // Twilio identifiers: Account SID (AC…) + API Key SID (SK…), each = prefix
68
+ // + exactly 32 hex. The 2-letter prefix + 32-hex shape is specific: an MD5
69
+ // is 32 hex (no AC/SK prefix → no match) and a 40-hex git SHA has no internal
70
+ // \b so the trailing \b can't land mid-string. We deliberately do NOT scrub
71
+ // the bare-hex Twilio *auth token* — see comment block at end re: SHA collision.
72
+ [/\b(?:AC|SK)[0-9a-f]{32}\b/g, '***'],
73
+ // Mailgun private API keys: key-<32 hex>. Prefix-anchored for the same reason;
74
+ // bare 32-hex (no `key-`) is intentionally left alone to avoid hashing FPs.
75
+ [/\bkey-[0-9a-f]{32}\b/g, '***'],
55
76
  // JSON-quoted secrets — error payloads / API responses commonly carry creds
56
77
  // as `{"api_key": "..."}`. The base key=value pattern stops at quotes, so
57
78
  // these slip through. Match the value-quoted form explicitly. Length floor
58
79
  // (6) avoids tripping on intentional placeholder shorts ("...", "secret").
59
- [/("(?:password|passwd|token|api[_-]?key|api[_-]?secret|secret[_-]?key|access[_-]?key|private[_-]?key|client[_-]?secret|auth[_-]?token|bearer|refresh[_-]?token|session[_-]?id|sessionid)"\s*:\s*")[^"]{6,}(")/gi, '$1***$2'],
80
+ [/("(?:password|passwd|token|api[_-]?key|api[_-]?secret|secret[_-]?key|access[_-]?key|access[_-]?token|private[_-]?key|client[_-]?secret|auth[_-]?token|bearer|refresh[_-]?token|session[_-]?id|sessionid)"\s*:\s*")[^"]{6,}(")/gi, '$1***$2'],
60
81
  // Session cookies in headers / urlencoded bodies (sessionid=, session_id=, JSESSIONID=, PHPSESSID=).
61
82
  // 16+ chars filters out short test fixtures like sessionid=abc.
62
83
  [/\b((?:session[_-]?id|sessionid|jsessionid|phpsessid)\s*[=:]\s*)[^\s,;'"}\]]{16,}/gi, '$1***'],
84
+ // ── DELIBERATELY NOT COVERED: bare high-entropy / "raw N-char" tokens ──────
85
+ // A generic `[A-Fa-f0-9]{40}` / high-entropy regex would scrub this repo's own
86
+ // legitimate data: 40-hex git SHAs, 32-hex MD5s, 64-hex SHA256s, and stored
87
+ // `minhash_sig` values. In a hash-heavy codebase the false-positive cost
88
+ // (silent `***` over real content, lost recall) exceeds the marginal catch —
89
+ // and an entropy gate doesn't help because git SHAs are themselves high-entropy.
90
+ // The contextual forms (token=…, Authorization: Bearer …, "api_key":"…") above
91
+ // already cover the dangerous *labelled* shapes. If you are tempted to add a
92
+ // bare-token pattern here: don't — anchor it to a provider prefix instead.
63
93
  ];
64
94
 
65
95
  /**
package/server.mjs CHANGED
@@ -15,6 +15,7 @@ import { selectCompressionCandidates, groupByProjectWeek, compressGroup } from '
15
15
  import {
16
16
  cleanupBroken, decayAndMarkIdle, boostAccessed, demotePinned, mergeDuplicates,
17
17
  purgeStale, purgeStalePreview, findDuplicates, maintenanceStats, rebuildVectors, vacuum,
18
+ recoverChildrenOf,
18
19
  OP_CAP, STALE_AGE_MS,
19
20
  } from './lib/maintain-core.mjs';
20
21
  import { effectiveQuiet, RUNTIME_DIR } from './hook-shared.mjs';
@@ -926,13 +927,20 @@ server.registerTool(
926
927
  db.prepare('UPDATE observations SET related_ids = ? WHERE id = ?').run(JSON.stringify(filtered), r.id);
927
928
  }
928
929
  }
930
+ // Resurface rows merged/compressed INTO the doomed keepers before deleting, else
931
+ // they dangle behind a now-missing parent (compressed_into has no FK) — invisible
932
+ // to every COALESCE(compressed_into,0)=0 view and unrecoverable. Mirrors the CLI
933
+ // delete path + the maintain hard-delete guard (recoverChildrenOf).
934
+ const recovered = recoverChildrenOf(db, args.ids);
929
935
  // Execute deletion (FTS5 cleanup handled by observations_ad trigger)
930
- return db.prepare(`DELETE FROM observations WHERE id IN (${placeholders})`).run(...args.ids);
936
+ const deleted = db.prepare(`DELETE FROM observations WHERE id IN (${placeholders})`).run(...args.ids);
937
+ return { changes: deleted.changes, recovered };
931
938
  });
932
939
  const result = deleteTx();
933
940
 
934
941
  const missing = args.ids.filter(id => !rows.some(r => r.id === id));
935
942
  const msg = [`Deleted ${result.changes} observation(s).`];
943
+ if (result.recovered > 0) msg.push(`Recovered ${result.recovered} merged/compressed child observation(s) to live.`);
936
944
  if (missing.length > 0) msg.push(`Note: ID(s) ${missing.join(', ')} not found.`);
937
945
  return { content: [{ type: 'text', text: msg.join(' ') }] };
938
946
  })
package/synonyms.mjs CHANGED
@@ -265,6 +265,13 @@ export const CJK_COMPOUNDS = new Set([
265
265
  // architecture
266
266
  '架构', '设计', '方案', '规划', '文档', '注释', '版本', '分支', '依赖',
267
267
  '性能', '安全', '漏洞', '补丁', '系统', '算法',
268
+ // common task/dev vocab — mined from the zero-dict-keyword prompt slice
269
+ // (benchmark/cjk-straddle-prevalence.mjs). These ubiquitous words were absent
270
+ // from the dictionary, so ~15% of real CJK queries fell through to all-bigram
271
+ // noise. Adding real words is monotonically safe: greedy longest-match only
272
+ // improves, and real compounds cannot create boundary-straddle bigrams.
273
+ '工作', '用户', '完成', '计划', '命令', '工具', '插件', '实施', '处理',
274
+ '清理', '显示', '本地', '改动', '确认', '直接', '开始',
268
275
  ]);
269
276
 
270
277
  // ─── Dispatch Synonyms (unidirectional, broader groupings) ──────────────────
package/tier.mjs CHANGED
@@ -44,9 +44,12 @@ export function computeTier(obs, ctx) {
44
44
  return 'working';
45
45
  }
46
46
 
47
- // Rule 5: Active if within type-specific window
47
+ // Rule 5: Active if within type-specific window. Use `<=` so the exact-millisecond
48
+ // window edge matches TIER_CASE_SQL (`created_at_epoch >= now - window`, i.e. inclusive).
49
+ // The strict `<` here disagreed with the SQL classifier by one tier at the boundary,
50
+ // despite both being documented as the same classifier.
48
51
  const activeWindow = ACTIVE_WINDOWS[obs.type] ?? DEFAULT_ACTIVE_WINDOW_MS;
49
- if (now - obs.created_at_epoch < activeWindow) return 'active';
52
+ if (now - obs.created_at_epoch <= activeWindow) return 'active';
50
53
 
51
54
  // Rule 6: Archive (fallback)
52
55
  return 'archive';
package/utils.mjs CHANGED
@@ -77,8 +77,11 @@ export function estimateTokens(text) {
77
77
  * @returns {number} Clamped integer importance (1, 2, or 3)
78
78
  */
79
79
  export function clampImportance(val) {
80
- if (typeof val !== 'number' || isNaN(val)) return 1;
81
- return Math.max(1, Math.min(3, Math.round(val)));
80
+ // Coerce numeric strings: an LLM emitting "importance":"2" (quoted) would otherwise
81
+ // collapse to 1, silently dropping its signal. Non-numeric strings → NaN → 1.
82
+ const n = typeof val === 'number' ? val : (typeof val === 'string' ? Number(val) : NaN);
83
+ if (!Number.isFinite(n)) return 1;
84
+ return Math.max(1, Math.min(3, Math.round(n)));
82
85
  }
83
86
 
84
87
  /**
@@ -267,9 +270,39 @@ export function debugCatch(e, context) {
267
270
 
268
271
  // ─── JSON Parsing ────────────────────────────────────────────────────────────
269
272
 
273
+ /**
274
+ * Extract the first brace-balanced JSON object substring from text, honoring strings
275
+ * and escapes so braces inside string values don't throw off the depth count. Returns
276
+ * null when there's no `{` or no balanced close. Used to recover a valid leading object
277
+ * when the LLM wrapped it in prose that ALSO contains braces — the greedy `{[\s\S]*}`
278
+ * fallback spans first-`{` to last-`}` and is defeated by an unrelated trailing `{…}`.
279
+ */
280
+ function firstBalancedJsonObject(text) {
281
+ // Anchor on whichever structural opener comes first — `{` (object) or `[` (array) —
282
+ // so a prose-wrapped top-level array isn't truncated to its first inner object.
283
+ const braceAt = text.indexOf('{');
284
+ const brackAt = text.indexOf('[');
285
+ let start, open, close;
286
+ if (braceAt === -1 && brackAt === -1) return null;
287
+ if (brackAt !== -1 && (braceAt === -1 || brackAt < braceAt)) { start = brackAt; open = '['; close = ']'; }
288
+ else { start = braceAt; open = '{'; close = '}'; }
289
+ let depth = 0, inStr = false, esc = false;
290
+ for (let i = start; i < text.length; i++) {
291
+ const c = text[i];
292
+ if (inStr) {
293
+ if (esc) esc = false;
294
+ else if (c === '\\') esc = true;
295
+ else if (c === '"') inStr = false;
296
+ } else if (c === '"') inStr = true;
297
+ else if (c === open) depth++;
298
+ else if (c === close && --depth === 0) return text.slice(start, i + 1);
299
+ }
300
+ return null;
301
+ }
302
+
270
303
  /**
271
304
  * Parse JSON from LLM output, handling markdown fences and embedded objects.
272
- * Tries: direct parse → fenced code block → regex object extraction.
305
+ * Tries: direct parse → fenced code block → first balanced object → greedy regex.
273
306
  * @param {string} text Raw LLM output text
274
307
  * @returns {object|null} Parsed JSON object or null on failure
275
308
  */
@@ -278,6 +311,10 @@ export function parseJsonFromLLM(text) {
278
311
  try { return JSON.parse(text); } catch {}
279
312
  const fenced = text.match(/```(?:json)?\s*([\s\S]*?)\s*```/);
280
313
  if (fenced) try { return JSON.parse(fenced[1]); } catch {}
314
+ // First balanced object — survives unfenced output wrapped in brace-containing prose.
315
+ const balanced = firstBalancedJsonObject(text);
316
+ if (balanced) try { return JSON.parse(balanced); } catch {}
317
+ // Last-resort greedy span (handles a payload that isn't the FIRST balanced object).
281
318
  const obj = text.match(/\{[\s\S]*\}/);
282
319
  if (obj) try { return JSON.parse(obj[0]); } catch {}
283
320
  return null;