claude-mem-lite 2.42.0 → 2.44.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,7 @@
10
10
  "plugins": [
11
11
  {
12
12
  "name": "claude-mem-lite",
13
- "version": "2.42.0",
13
+ "version": "2.44.0",
14
14
  "source": "./",
15
15
  "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall"
16
16
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "2.42.0",
3
+ "version": "2.44.0",
4
4
  "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall",
5
5
  "author": {
6
6
  "name": "sdsrss"
package/install.mjs CHANGED
@@ -1205,7 +1205,7 @@ async function doctor() {
1205
1205
  if (r.drift) {
1206
1206
  const names = r.details.join(', ');
1207
1207
  const suffix = r.plainCount > r.details.length ? ` +${r.plainCount - r.details.length} more` : '';
1208
- warn(`Dev drift: ${r.plainCount} non-symlink file(s) in dev install: ${names}${suffix} (re-run: node install.mjs install --dev)`);
1208
+ warn(`Dev drift: ${r.plainCount} non-symlink file(s) in dev install: ${names}${suffix} (re-run: node ${join(PROJECT_DIR, 'install.mjs')} install --dev)`);
1209
1209
  issues++;
1210
1210
  } else if (r.devMode) {
1211
1211
  ok(`Dev drift: clean (${r.symlinkCount} symlinks, 0 plain)`);
@@ -25,6 +25,35 @@ export function parseIdToken(raw) {
25
25
  return { source, id };
26
26
  }
27
27
 
28
+ /**
29
+ * Group mixed ID tokens by source. Accepts bare ints, `#N`, `P#N`, `S#N`,
30
+ * and raw strings — the same shapes parseIdToken handles. Used by CLI
31
+ * cmdGet and MCP mem_get so both paths route paste-from-search tokens
32
+ * consistently (closes the #8127 parity gap).
33
+ *
34
+ * An explicit source override (from `--source` or `args.source`) wins over
35
+ * per-token prefixes. Un-prefixed tokens fall back to `defaultSource`.
36
+ *
37
+ * @param {Array<string|number>} tokens Mixed input — order preserved within each bucket.
38
+ * @param {{explicit?: 'obs'|'session'|'prompt'|null, defaultSource?: 'obs'|'session'|'prompt'}} opts
39
+ * @returns {{bySrc: {obs:number[], session:number[], prompt:number[]}, invalid: string[]}}
40
+ */
41
+ export function bucketIdTokens(tokens, { explicit = null, defaultSource = 'obs' } = {}) {
42
+ const bySrc = { obs: [], session: [], prompt: [] };
43
+ const invalid = [];
44
+ for (const raw of tokens) {
45
+ if (typeof raw === 'number' && Number.isFinite(raw) && raw > 0) {
46
+ bySrc[explicit || defaultSource].push(raw);
47
+ continue;
48
+ }
49
+ const p = parseIdToken(raw);
50
+ if (!p) { invalid.push(String(raw)); continue; }
51
+ const src = explicit || p.source || defaultSource;
52
+ bySrc[src].push(p.id);
53
+ }
54
+ return { bySrc, invalid };
55
+ }
56
+
28
57
  /**
29
58
  * Probe the observations / session_summaries / user_prompts tables for any
30
59
  * of the given numeric IDs, excluding the sources the caller already queried.
package/mem-cli.mjs CHANGED
@@ -15,7 +15,7 @@ import { searchResources } from './registry-retriever.mjs';
15
15
  import { optimizePreview, optimizeRun } from './hook-optimize.mjs';
16
16
  import { buildSessionContextLines } from './hook-context.mjs';
17
17
  import { cmdAdopt, cmdUnadopt } from './adopt-cli.mjs';
18
- import { probeOtherSources as probeIdSources } from './lib/id-routing.mjs';
18
+ import { probeOtherSources as probeIdSources, bucketIdTokens } from './lib/id-routing.mjs';
19
19
  import { basename } from 'path';
20
20
  import { readFileSync } from 'fs';
21
21
 
@@ -484,23 +484,26 @@ function cmdRecall(db, args) {
484
484
  const { positional, flags } = parseArgs(args);
485
485
  const file = positional.join(' ');
486
486
  if (!file) {
487
- fail('[mem] Usage: mem recall <file>');
487
+ fail('[mem] Usage: mem recall <file> [--limit N] [--include-noise]');
488
488
  return;
489
489
  }
490
490
 
491
491
  const filename = basename(file);
492
492
  const rawLimit = flags.limit !== undefined ? parseInt(flags.limit, 10) : NaN;
493
493
  const limit = Number.isInteger(rawLimit) ? Math.max(1, rawLimit) : 10;
494
+ const includeNoise = flags['include-noise'] === true || flags['include-noise'] === 'true';
494
495
 
495
496
  // Search via observation_files junction table for indexed filename lookups
496
497
  const escaped = filename.replace(/%/g, '\\%').replace(/_/g, '\\_');
497
498
  const likePattern = `%${escaped}`;
499
+ const noiseClause = includeNoise ? '' : `AND ${notLowSignalTitleClause('o')}`;
498
500
  const rows = db.prepare(`
499
501
  SELECT DISTINCT o.id, o.type, o.title, o.lesson_learned, o.created_at, o.project
500
502
  FROM observations o
501
503
  JOIN observation_files of2 ON of2.obs_id = o.id
502
504
  WHERE COALESCE(o.compressed_into, 0) = 0
503
505
  AND (of2.filename = ? OR of2.filename LIKE ? ESCAPE '\\')
506
+ ${noiseClause}
504
507
  ORDER BY o.created_at_epoch DESC
505
508
  LIMIT ?
506
509
  `).all(filename, likePattern, limit);
@@ -597,20 +600,6 @@ function cmdGet(db, args) {
597
600
  }
598
601
 
599
602
  const tokens = idStr.split(',').map(s => s.trim()).filter(Boolean);
600
- const unparseable = [];
601
- const parsed = [];
602
- for (const t of tokens) {
603
- const p = parseIdToken(t);
604
- if (p) parsed.push(p);
605
- else unparseable.push(t);
606
- }
607
- if (unparseable.length > 0) {
608
- process.stderr.write(`[mem] Ignoring unparseable ID token(s): ${unparseable.join(', ')}\n`);
609
- }
610
- if (parsed.length === 0) {
611
- fail('[mem] No valid IDs provided');
612
- return;
613
- }
614
603
 
615
604
  // Explicit --source overrides any prefix; otherwise each token's prefix routes individually.
616
605
  const explicit = flags.source;
@@ -620,10 +609,14 @@ function cmdGet(db, args) {
620
609
  return;
621
610
  }
622
611
 
623
- const bySrc = { obs: [], session: [], prompt: [] };
624
- for (const p of parsed) {
625
- const src = explicit || p.source || 'obs';
626
- bySrc[src].push(p.id);
612
+ // Shared bucketing with MCP mem_get single source of truth for P#/S#/# routing (#8050).
613
+ const { bySrc, invalid: unparseable } = bucketIdTokens(tokens, { explicit, defaultSource: 'obs' });
614
+ if (unparseable.length > 0) {
615
+ process.stderr.write(`[mem] Ignoring unparseable ID token(s): ${unparseable.join(', ')}\n`);
616
+ }
617
+ if (bySrc.obs.length + bySrc.session.length + bySrc.prompt.length === 0) {
618
+ fail('[mem] No valid IDs provided');
619
+ return;
627
620
  }
628
621
 
629
622
  // Validate --fields against obs schema (only meaningful for obs rows).
@@ -659,7 +652,7 @@ function cmdGet(db, args) {
659
652
  if (totalFound === 0) {
660
653
  // Probe the OTHER sources so the caller can retry with the right prefix.
661
654
  const queried = new Set(Object.entries(bySrc).filter(([, v]) => v.length > 0).map(([k]) => k));
662
- const allIds = parsed.map(p => p.id);
655
+ const allIds = [...bySrc.obs, ...bySrc.session, ...bySrc.prompt];
663
656
  const probe = probeIdSources(db, allIds, queried);
664
657
  const hits = formatProbeHints(probe);
665
658
  const hint = hits.length > 0 ? ` Try: ${hits.join('; ')}.` : '';
@@ -716,9 +709,21 @@ function cmdTimeline(db, args) {
716
709
  // Bare integer (no prefix): try observation first. Fall back to user_prompts
717
710
  // then session_summaries so pasted P#/S# IDs still work when the prefix is
718
711
  // omitted — matches the prefix-aware routing used by search/probe.
719
- const obsExists = db.prepare('SELECT 1 FROM observations WHERE id = ?').get(parsed.id);
720
- if (obsExists) {
721
- anchorId = parsed.id;
712
+ const obsRow = db.prepare('SELECT compressed_into FROM observations WHERE id = ?').get(parsed.id);
713
+ if (obsRow) {
714
+ const ci = obsRow.compressed_into;
715
+ if (ci && ci > 0) {
716
+ // Compressed into a live parent: re-anchor so the window doesn't silently
717
+ // straddle a dead record. Negative sentinels (-1 dropped, -2 pending purge)
718
+ // have no canonical parent — surface an explicit error instead.
719
+ anchorId = ci;
720
+ anchorNote = `(anchored to #${ci}, #${parsed.id} was compressed into it)`;
721
+ } else if (ci && ci < 0) {
722
+ fail(`[mem] Observation #${parsed.id} was compressed and pruned; no canonical anchor available`);
723
+ return;
724
+ } else {
725
+ anchorId = parsed.id;
726
+ }
722
727
  } else {
723
728
  const promptRow = db.prepare('SELECT created_at_epoch FROM user_prompts WHERE id = ?').get(parsed.id);
724
729
  const sessionRow = promptRow ? null : db.prepare('SELECT created_at_epoch FROM session_summaries WHERE id = ?').get(parsed.id);
@@ -1970,6 +1975,7 @@ Commands:
1970
1975
 
1971
1976
  recall <file> Show observations related to a file
1972
1977
  --limit N Max results (default 10)
1978
+ --include-noise Include hook-llm fallback titles ("Modified X", raw error logs)
1973
1979
 
1974
1980
  get <id1,id2,...> Get full details by ID
1975
1981
  IDs accept search-output prefixes: #123 (obs), P#123 (prompt), S#123 (session).
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "2.42.0",
3
+ "version": "2.44.0",
4
4
  "description": "Lightweight persistent memory system for Claude Code",
5
5
  "type": "module",
6
6
  "engines": {
@@ -9,6 +9,19 @@ const CONFIRM_RE = /^(y(es)?|no?|ok|done|go|sure|lgtm|thanks?|ty|继续|确认|
9
9
  const SLASH_CMD_RE = /^\//;
10
10
  const PURE_OP_RE = /^(git\s+(commit|push|merge)|npm\s+(publish|deploy))\b/i;
11
11
 
12
+ // v2.43.x: pure continuation directives — "keep going on what you were doing"
13
+ // with no new topic. Long enough to evade CONFIRM_RE / length gate but
14
+ // semantically empty for memory-recall purposes; injecting [mem] context
15
+ // here reads like a turn boundary and can prematurely end the model's
16
+ // in-flight tool chain. Conservative match: must be SOLELY the directive,
17
+ // not directive + new instruction (those keep getting injection).
18
+ const CONTINUATION_RE = /^(继续|接着|继续做|接着做|继续干|继续做下一步|接着做下一步|别停|不要停|next|continue|go\s*on|keep\s+going|carry\s+on|proceed|more(?:\s+please)?)\s*[??!!。.,,]*\s*$/i;
19
+
20
+ // v2.43.x: meta-pause questions — user is asking the model to reflect on
21
+ // its own pause/stop, then continue. No new topic = no useful memory hit;
22
+ // injection just adds reminder noise on top of an already-reflective turn.
23
+ const META_PAUSE_RE = /(怎么停|为什么停|为何停|你怎么停|工作停下来|刚才停|why\s+(?:did\s+you\s+)?(?:stop|pause|halt))/i;
24
+
12
25
  /**
13
26
  * CJK-weighted effective length. CJK characters (CJK Unified Ideographs
14
27
  * main + extension A) carry ~3x the semantic token density of Latin
@@ -30,6 +43,8 @@ export function shouldSkip(text) {
30
43
  if (CONFIRM_RE.test(trimmed)) return true;
31
44
  if (SLASH_CMD_RE.test(trimmed)) return true;
32
45
  if (PURE_OP_RE.test(trimmed)) return true;
46
+ if (CONTINUATION_RE.test(trimmed)) return true;
47
+ if (META_PAUSE_RE.test(trimmed)) return true;
33
48
  return false;
34
49
  }
35
50
 
@@ -66,6 +66,34 @@ const FOLLOWUP_BM25_MIN_SCORE = Number(process.env.CLAUDE_MEM_UPS_BM25_MIN_FOLLO
66
66
  // gate exists to drop.
67
67
  const TOP_REL_FLOOR = Number(process.env.CLAUDE_MEM_UPS_TOP_MIN || 50);
68
68
 
69
+ // v2.43.x: OR-fallback raw BM25 magnitude floor. The composite TOP_REL_FLOOR
70
+ // above gates on `bm25 × importance × type_quality × decay × noise_penalty`.
71
+ // For importance=3 bugfix obs, those multipliers compound to ~6×, so a modest
72
+ // BM25 of -17..-22 can clear a composite floor of 50 via inflation alone.
73
+ // When the FTS query relaxes to OR (AND returned 0), a single strongly-
74
+ // matching stem on a big multi-topic prompt leaks through — observed
75
+ // failure mode: broad Chinese prompts surfacing unrelated importance=3
76
+ // bugfix obs whose concepts share exactly one stem with the prompt.
77
+ //
78
+ // Empirical OR-mode distribution (11-prompt probe, 2026-04-23):
79
+ // real signal top-|bm25_raw| ≥ 41
80
+ // broad/meta noise top-|bm25_raw| ≤ 22
81
+ // below threshold top-|bm25_raw| < 12
82
+ // Default 30 sits in the clean 22→41 gap. AND mode bypasses this gate —
83
+ // AND's all-stems-must-match constraint is already a precision signal,
84
+ // and there are legitimate AND hits (GOOD-narrow probe: bm25_raw=19.3,
85
+ // rel=81) that we must not drop.
86
+ //
87
+ // CLAUDE_MEM_UPS_TOP_MIN=0 disables this too: on small test corpora (1–2
88
+ // seeded obs) absolute BM25 magnitudes collapse to near-zero (observed
89
+ // |bm25|≈4e-6) because FTS5 IDF normalization needs a real document
90
+ // distribution. The existing TOP_REL_FLOOR knob already encodes the
91
+ // "seed-mode: kill absolute floors" semantic for integration tests, so
92
+ // we piggy-back on it rather than introducing a second override env.
93
+ const OR_TOP_BM25_FLOOR = TOP_REL_FLOOR === 0
94
+ ? 0
95
+ : Number(process.env.CLAUDE_MEM_UPS_OR_BM25_MIN || 30);
96
+
69
97
  function isFollowUpSession() {
70
98
  try {
71
99
  const raw = readFileSync(INJECTED_IDS_FILE, 'utf8');
@@ -77,9 +105,15 @@ function isFollowUpSession() {
77
105
 
78
106
  // ─── DB Query Functions ─────────────────────────────────────────────────────
79
107
 
108
+ // Returns { rows, mode } where mode is 'AND' (initial pass), 'OR' (fallback
109
+ // after AND returned 0), or null (no FTS query / sanitize rejected). Callers
110
+ // use `mode` to apply OR-specific gates — see OR_TOP_BM25_FLOOR rationale.
111
+ // Each row includes `bm25_raw` (pre-multiplier bm25 magnitude) alongside the
112
+ // composite `relevance`, so callers can distinguish raw-match strength from
113
+ // importance/type/decay inflation.
80
114
  function searchByFts(db, queryText, project, limit, typeFilter) {
81
115
  const ftsQuery = sanitizeFtsQuery(queryText);
82
- if (!ftsQuery) return [];
116
+ if (!ftsQuery) return { rows: [], mode: null };
83
117
 
84
118
  const cutoff = Date.now() - LOOKBACK_MS;
85
119
 
@@ -92,6 +126,7 @@ function searchByFts(db, queryText, project, limit, typeFilter) {
92
126
  // docs/p0-injection-noise-baseline.txt.
93
127
  const sql = `
94
128
  SELECT o.id, o.type, o.title, o.lesson_learned,
129
+ ${OBS_BM25} as bm25_raw,
95
130
  ${OBS_BM25}
96
131
  * (1.0 + EXP(-0.693 * (? - o.created_at_epoch) / ${TYPE_DECAY_CASE}))
97
132
  * ${TYPE_QUALITY_CASE}
@@ -115,6 +150,7 @@ function searchByFts(db, queryText, project, limit, typeFilter) {
115
150
  params.push(limit);
116
151
 
117
152
  let rows = db.prepare(sql).all(...params);
153
+ let mode = 'AND';
118
154
 
119
155
  // OR fallback if AND query returned nothing
120
156
  if (rows.length === 0) {
@@ -122,10 +158,11 @@ function searchByFts(db, queryText, project, limit, typeFilter) {
122
158
  if (orQuery) {
123
159
  params[1] = orQuery;
124
160
  rows = db.prepare(sql).all(...params);
161
+ mode = 'OR';
125
162
  }
126
163
  }
127
164
 
128
- return rows;
165
+ return { rows, mode };
129
166
  }
130
167
 
131
168
  function searchByFile(db, files, project, limit) {
@@ -256,7 +293,7 @@ const QUIET_HOOKS = process.env.MEM_QUIET_HOOKS === '1';
256
293
  function formatResults(rows) {
257
294
  if (!rows || rows.length === 0) return null;
258
295
 
259
- const lines = ['[mem] Related memories:'];
296
+ const lines = ['[mem] FYI — Related memories (continue your task):'];
260
297
  for (const r of rows) {
261
298
  const icon = typeIcon(r.type);
262
299
  const title = truncate(r.title || '', 70);
@@ -272,7 +309,7 @@ function formatResults(rows) {
272
309
  // chars (slightly longer than obs titles because prompts carry more context).
273
310
  function formatPromptResults(rows) {
274
311
  if (!rows || rows.length === 0) return null;
275
- const lines = ['[mem] Past similar questions:'];
312
+ const lines = ['[mem] FYI — Past similar questions (continue your task):'];
276
313
  for (const r of rows) {
277
314
  const text = truncate((r.prompt_text || '').replace(/\s+/g, ' '), 80);
278
315
  lines.push(`P#${r.id} 💬 ${text}`);
@@ -375,7 +412,7 @@ async function main() {
375
412
  // take priority slots in the merged output.
376
413
  const errSig = extractErrorSignature(promptText);
377
414
  const sigRows = errSig
378
- ? searchByFts(db, errSig.signature, project, 2, 'bugfix').filter(r =>
415
+ ? searchByFts(db, errSig.signature, project, 2, 'bugfix').rows.filter(r =>
379
416
  typeof r.relevance === 'number' && Math.abs(r.relevance) >= bm25Floor
380
417
  )
381
418
  : [];
@@ -386,11 +423,13 @@ async function main() {
386
423
  } else {
387
424
  // FTS search: use the prompt as query, optionally type-filtered
388
425
  const files = extractFiles(promptText);
389
- let ftsRows = searchByFts(db, promptText, project, intent?.limit || MAX_RESULTS, intent?.type || null);
426
+ let ftsResult = searchByFts(db, promptText, project, intent?.limit || MAX_RESULTS, intent?.type || null);
390
427
  // Fallback: if typed search returned nothing, retry without type filter
391
- if (ftsRows.length === 0 && intent?.type) {
392
- ftsRows = searchByFts(db, promptText, project, intent.limit || MAX_RESULTS, null);
428
+ if (ftsResult.rows.length === 0 && intent?.type) {
429
+ ftsResult = searchByFts(db, promptText, project, intent.limit || MAX_RESULTS, null);
393
430
  }
431
+ let ftsRows = ftsResult.rows;
432
+ const ftsMode = ftsResult.mode;
394
433
  const fileRows = files.length > 0 ? searchByFile(db, files, project, 2) : [];
395
434
 
396
435
  // T3 (v2.31): BM25 magnitude threshold — drop FTS hits whose relevance
@@ -403,6 +442,19 @@ async function main() {
403
442
  typeof r.relevance === 'number' && Math.abs(r.relevance) >= bm25Floor
404
443
  );
405
444
 
445
+ // v2.43.x: OR-mode raw-BM25 floor. In OR-fallback mode the composite
446
+ // TOP_REL_FLOOR below is inflated by importance × type_quality × decay
447
+ // multipliers — a weak single-stem hit on an importance=3 bugfix obs
448
+ // can reach composite rel=66 while raw |bm25|=19. Gate on raw bm25
449
+ // magnitude for OR mode only; AND mode's all-stems-match constraint
450
+ // is a precision signal and routinely produces legitimate AND hits
451
+ // below raw |bm25|=20 that we do not want to drop (see GOOD-narrow
452
+ // probe). Skip gate when OR_TOP_BM25_FLOOR is set to 0 (test hook).
453
+ if (ftsMode === 'OR' && OR_TOP_BM25_FLOOR > 0 && ftsRows.length > 0) {
454
+ const topBm25 = Math.abs(ftsRows[0].bm25_raw || 0);
455
+ if (topBm25 < OR_TOP_BM25_FLOOR) ftsRows = [];
456
+ }
457
+
406
458
  // v2.34.3: top-|rel| sanity gate. Per-row filtering above leaves noise
407
459
  // prompts intact when many rows share a weak stem (all in 25..48 range).
408
460
  // If the best remaining FTS match is below the top floor, drop the
package/server.mjs CHANGED
@@ -27,7 +27,7 @@ import { basename, join } from 'path';
27
27
  import { homedir } from 'os';
28
28
  import { ensureRegistryDb, upsertResource } from './registry.mjs';
29
29
  import { searchResources } from './registry-retriever.mjs';
30
- import { probeOtherSources as probeIdSources, parseIdToken } from './lib/id-routing.mjs';
30
+ import { probeOtherSources as probeIdSources, parseIdToken, bucketIdTokens } from './lib/id-routing.mjs';
31
31
  import { getVocabulary, rebuildVocabulary, _resetVocabCache, computeVector, vectorSearch, rrfMerge } from './tfidf.mjs';
32
32
  import { createRequire } from 'module';
33
33
 
@@ -768,7 +768,10 @@ server.registerTool(
768
768
  // from mem_search results expect the same routing as CLI `timeline --anchor`.
769
769
  // Prompt/session anchors resolve to the nearest-in-time observation so
770
770
  // before/after semantics still apply to the observations timeline.
771
- if (typeof anchorId === 'string') {
771
+ // Also covers bare numeric anchors so compressed-obs routing applies uniformly —
772
+ // without this, `anchor: 7826` (int) would bypass the compressed check and
773
+ // silently straddle a dead record.
774
+ if (typeof anchorId === 'string' || typeof anchorId === 'number') {
772
775
  const parsed = parseIdToken(anchorId);
773
776
  if (!parsed) {
774
777
  return { content: [{ type: 'text', text: `Invalid anchor "${args.anchor}". Expected N, #N, P#N, or S#N.` }] };
@@ -789,9 +792,20 @@ server.registerTool(
789
792
  anchorNote = `(anchored to #${nearest.id}, closest obs to ${srcPrefix}${parsed.id})`;
790
793
  } else {
791
794
  // Bare "#N" or "N" — resolve to obs, falling back to prompt/session like CLI bare-int path.
792
- const obsExists = db.prepare('SELECT 1 FROM observations WHERE id = ?').get(parsed.id);
793
- if (obsExists) {
794
- anchorId = parsed.id;
795
+ // Route compressed obs to its parent so the before/after window (which filters compressed)
796
+ // isn't shown around a dead anchor. Negative sentinels (-1 dropped, -2 pending purge) surface
797
+ // an explicit error — they have no canonical parent.
798
+ const obsRow = db.prepare('SELECT compressed_into FROM observations WHERE id = ?').get(parsed.id);
799
+ if (obsRow) {
800
+ const ci = obsRow.compressed_into;
801
+ if (ci && ci > 0) {
802
+ anchorId = ci;
803
+ anchorNote = `(anchored to #${ci}, #${parsed.id} was compressed into it)`;
804
+ } else if (ci && ci < 0) {
805
+ return { content: [{ type: 'text', text: `Observation #${parsed.id} was compressed and pruned; no canonical anchor available.` }] };
806
+ } else {
807
+ anchorId = parsed.id;
808
+ }
795
809
  } else {
796
810
  const promptRow = db.prepare('SELECT created_at_epoch FROM user_prompts WHERE id = ?').get(parsed.id);
797
811
  const sessionRow = promptRow ? null : db.prepare('SELECT created_at_epoch FROM session_summaries WHERE id = ?').get(parsed.id);
@@ -915,84 +929,123 @@ server.registerTool(
915
929
  inputSchema: memGetSchema,
916
930
  },
917
931
  safeHandler(async (args) => {
918
- const source = args.source || 'obs';
919
- const placeholders = args.ids.map(() => '?').join(',');
932
+ // Bucket by per-token prefix (or force all to `args.source` when explicit).
933
+ // coerceMixedIdTokens has already stringified + regex-validated each token.
934
+ const { bySrc, invalid } = bucketIdTokens(args.ids, { explicit: args.source || null, defaultSource: 'obs' });
935
+ if (invalid.length > 0) {
936
+ // Should not happen — schema regex already rejected bad tokens — but guard defensively.
937
+ return { content: [{ type: 'text', text: `Invalid ID token(s): ${invalid.join(', ')}. Expected N, #N, P#N, or S#N.` }] };
938
+ }
939
+ const totalRequested = bySrc.obs.length + bySrc.session.length + bySrc.prompt.length;
940
+ if (totalRequested === 0) {
941
+ return { content: [{ type: 'text', text: 'No valid IDs provided.' }] };
942
+ }
920
943
 
921
- let rows, allFields, prefix, sourceLabel;
922
- if (source === 'session') {
923
- rows = db.prepare(`SELECT * FROM session_summaries WHERE id IN (${placeholders}) ORDER BY created_at_epoch ASC`).all(...args.ids);
924
- allFields = ['id', 'request', 'investigated', 'learned', 'completed', 'next_steps', 'files_read', 'files_edited', 'notes', 'project', 'created_at', 'memory_session_id', 'prompt_number'];
925
- prefix = 'S#';
926
- sourceLabel = 'sessions';
927
- } else if (source === 'prompt') {
928
- rows = db.prepare(`SELECT * FROM user_prompts WHERE id IN (${placeholders}) ORDER BY created_at_epoch ASC`).all(...args.ids);
929
- allFields = ['id', 'prompt_text', 'content_session_id', 'prompt_number', 'created_at'];
930
- prefix = 'P#';
931
- sourceLabel = 'prompts';
932
- } else {
933
- // Increment access_count for retrieved observations (batch UPDATE)
944
+ const OBS_FIELDS = ['id', 'type', 'title', 'subtitle', 'narrative', 'text', 'facts', 'concepts', 'lesson_learned', 'search_aliases', 'files_read', 'files_modified', 'project', 'created_at', 'memory_session_id', 'prompt_number', 'importance', 'related_ids', 'access_count', 'branch', 'superseded_at', 'superseded_by', 'last_accessed_at'];
945
+
946
+ // `fields` filter only makes sense for obs rows; session/prompt ignore it.
947
+ // Validate when obs is queried throw on all-invalid, note on partial-invalid.
948
+ let fieldsNote = '';
949
+ let obsFieldFilter = null;
950
+ if (args.fields?.length && bySrc.obs.length > 0) {
951
+ const invalidFields = args.fields.filter(f => !OBS_FIELDS.includes(f));
952
+ const validFields = args.fields.filter(f => OBS_FIELDS.includes(f));
953
+ if (validFields.length === 0) {
954
+ throw new Error(`No valid fields. Unknown field(s): ${invalidFields.join(', ')}. Valid: ${OBS_FIELDS.join(', ')}`);
955
+ }
956
+ if (invalidFields.length > 0) {
957
+ fieldsNote = `Note: unknown field(s) dropped: ${invalidFields.join(', ')}. Valid: ${OBS_FIELDS.join(', ')}`;
958
+ }
959
+ obsFieldFilter = validFields;
960
+ }
961
+
962
+ // Per-source fetchers — each returns { rows, foundIds:Set, prefix }.
963
+ const sections = [];
964
+ const foundBySource = { obs: new Set(), session: new Set(), prompt: new Set() };
965
+
966
+ if (bySrc.obs.length > 0) {
967
+ const ph = bySrc.obs.map(() => '?').join(',');
934
968
  try {
935
- db.prepare(
936
- `UPDATE observations SET access_count = COALESCE(access_count, 0) + 1, last_accessed_at = ? WHERE id IN (${placeholders})`
937
- ).run(Date.now(), ...args.ids);
938
- autoBoostIfNeeded(db, args.ids);
969
+ db.prepare(`UPDATE observations SET access_count = COALESCE(access_count, 0) + 1, last_accessed_at = ? WHERE id IN (${ph})`).run(Date.now(), ...bySrc.obs);
970
+ autoBoostIfNeeded(db, bySrc.obs);
939
971
  } catch { /* non-critical: FTS5 trigger may fail on corrupted index */ }
940
- rows = db.prepare(`SELECT * FROM observations WHERE id IN (${placeholders}) ORDER BY created_at_epoch ASC`).all(...args.ids);
941
- allFields = ['id', 'type', 'title', 'subtitle', 'narrative', 'text', 'facts', 'concepts', 'lesson_learned', 'search_aliases', 'files_read', 'files_modified', 'project', 'created_at', 'memory_session_id', 'prompt_number', 'importance', 'related_ids', 'access_count', 'branch', 'superseded_at', 'superseded_by', 'last_accessed_at'];
942
- prefix = '#';
943
- sourceLabel = 'observations';
972
+ const rows = db.prepare(`SELECT * FROM observations WHERE id IN (${ph}) ORDER BY created_at_epoch ASC`).all(...bySrc.obs);
973
+ const renderFields = obsFieldFilter || OBS_FIELDS;
974
+ for (const row of rows) {
975
+ foundBySource.obs.add(row.id);
976
+ const lines = [`── #${row.id} ──`];
977
+ for (const f of renderFields) {
978
+ const val = row[f];
979
+ if (val === null || val === undefined || val === '') continue;
980
+ if (f === 'text' && row.narrative && typeof val === 'string' && val.startsWith(row.narrative)) continue;
981
+ const maxLen = f === 'narrative' ? 1000 : f === 'lesson_learned' ? 500 : f === 'text' ? 500 : 200;
982
+ lines.push(`${f}: ${typeof val === 'string' && val.length > maxLen ? val.slice(0, maxLen) + '…' : val}`);
983
+ }
984
+ sections.push(lines.join('\n'));
985
+ }
944
986
  }
945
987
 
946
- // P1-3: validate requested fields — throw on all-invalid so callers don't silently get an
947
- // empty record (header only). Partial-invalid is tolerated but surfaced as a note.
948
- let fieldsNote = '';
949
- if (args.fields?.length) {
950
- const invalid = args.fields.filter(f => !allFields.includes(f));
951
- const valid = args.fields.filter(f => allFields.includes(f));
952
- if (valid.length === 0) {
953
- throw new Error(`No valid fields. Unknown field(s): ${invalid.join(', ')}. Valid: ${allFields.join(', ')}`);
988
+ if (bySrc.session.length > 0) {
989
+ const ph = bySrc.session.map(() => '?').join(',');
990
+ const rows = db.prepare(`SELECT * FROM session_summaries WHERE id IN (${ph}) ORDER BY created_at_epoch ASC`).all(...bySrc.session);
991
+ const sessFields = ['id', 'request', 'investigated', 'learned', 'completed', 'next_steps', 'files_read', 'files_edited', 'notes', 'project', 'created_at', 'memory_session_id', 'prompt_number'];
992
+ for (const row of rows) {
993
+ foundBySource.session.add(row.id);
994
+ const lines = [`── S#${row.id} ──`];
995
+ for (const f of sessFields) {
996
+ const val = row[f];
997
+ if (val === null || val === undefined || val === '') continue;
998
+ const maxLen = 500;
999
+ lines.push(`${f}: ${typeof val === 'string' && val.length > maxLen ? val.slice(0, maxLen) + '…' : val}`);
1000
+ }
1001
+ sections.push(lines.join('\n'));
954
1002
  }
955
- if (invalid.length > 0) {
956
- fieldsNote = `Note: unknown field(s) dropped: ${invalid.join(', ')}. Valid: ${allFields.join(', ')}`;
1003
+ }
1004
+
1005
+ if (bySrc.prompt.length > 0) {
1006
+ const ph = bySrc.prompt.map(() => '?').join(',');
1007
+ const rows = db.prepare(`SELECT * FROM user_prompts WHERE id IN (${ph}) ORDER BY created_at_epoch ASC`).all(...bySrc.prompt);
1008
+ for (const row of rows) {
1009
+ foundBySource.prompt.add(row.id);
1010
+ const lines = [`── P#${row.id} ──`];
1011
+ if (row.prompt_text) lines.push(`prompt_text: ${row.prompt_text.length > 500 ? row.prompt_text.slice(0, 500) + '…' : row.prompt_text}`);
1012
+ if (row.content_session_id) lines.push(`content_session_id: ${row.content_session_id}`);
1013
+ if (row.prompt_number !== null && row.prompt_number !== undefined) lines.push(`prompt_number: ${row.prompt_number}`);
1014
+ if (row.created_at) lines.push(`created_at: ${row.created_at}`);
1015
+ sections.push(lines.join('\n'));
957
1016
  }
958
1017
  }
959
1018
 
960
- if (rows.length === 0) {
961
- // Symmetric probe via shared lib/id-routing.mjs so CLI cmdGet and MCP mem_get
962
- // stay aligned if a table's ID semantics change.
963
- const probe = probeIdSources(db, args.ids, new Set([source]));
1019
+ const totalFound = foundBySource.obs.size + foundBySource.session.size + foundBySource.prompt.size;
1020
+
1021
+ if (totalFound === 0) {
1022
+ // Probe other sources so callers can retry with the right prefix/source override.
1023
+ const queried = new Set(Object.entries(bySrc).filter(([, v]) => v.length > 0).map(([k]) => k));
1024
+ const allNumericIds = [...bySrc.obs, ...bySrc.session, ...bySrc.prompt];
1025
+ const probe = probeIdSources(db, allNumericIds, queried);
964
1026
  const hints = [];
965
- if (probe.obs.length > 0) hints.push(`#${probe.obs.join(', #')} (obs — use source='obs')`);
966
- if (probe.session.length > 0) hints.push(`S#${probe.session.join(', S#')} (session — use source='session')`);
967
- if (probe.prompt.length > 0) hints.push(`P#${probe.prompt.join(', P#')} (prompt — use source='prompt')`);
1027
+ if (probe.obs.length > 0) hints.push(`#${probe.obs.join(', #')} (obs — use source='obs' or bare #N)`);
1028
+ if (probe.session.length > 0) hints.push(`S#${probe.session.join(', S#')} (session — use source='session' or S#N)`);
1029
+ if (probe.prompt.length > 0) hints.push(`P#${probe.prompt.join(', P#')} (prompt — use source='prompt' or P#N)`);
968
1030
  const hint = hints.length > 0 ? ` Try: ${hints.join('; ')}.` : '';
969
- const msg = `No ${sourceLabel} found for given IDs.${hint}`;
1031
+ const queriedList = [...queried].join(', ');
1032
+ const msg = `No records found in source(s) [${queriedList}] for the given ID(s).${hint}`;
970
1033
  return { content: [{ type: 'text', text: fieldsNote ? `${msg}\n\n${fieldsNote}` : msg }] };
971
1034
  }
972
1035
 
973
- const fields = args.fields?.length ? args.fields.filter(f => allFields.includes(f)) : allFields;
1036
+ // Missing-ID note per bucket (mirrors mem_delete). Show missing IDs with their bucket prefix
1037
+ // so callers can tell which source returned nothing.
1038
+ const missingHints = [];
1039
+ const miss = (arr, found, prefix) => arr.filter(id => !found.has(id)).map(id => `${prefix}${id}`);
1040
+ missingHints.push(...miss(bySrc.obs, foundBySource.obs, '#'));
1041
+ missingHints.push(...miss(bySrc.session, foundBySource.session, 'S#'));
1042
+ missingHints.push(...miss(bySrc.prompt, foundBySource.prompt, 'P#'));
974
1043
 
975
1044
  const parts = [];
976
1045
  if (fieldsNote) parts.push(fieldsNote);
977
- for (const row of rows) {
978
- const lines = [`── ${prefix}${row.id} ──`];
979
- for (const f of fields) {
980
- const val = row[f];
981
- if (val === null || val === undefined || val === '') continue;
982
- // Skip 'text' field when it duplicates narrative (text = narrative + optional CJK bigrams)
983
- if (f === 'text' && row.narrative && typeof val === 'string' && val.startsWith(row.narrative)) continue;
984
- // Field-aware truncation: narrative and lesson need more space than metadata
985
- const maxLen = f === 'narrative' ? 1000 : f === 'lesson_learned' ? 500 : f === 'text' ? 500 : 200;
986
- lines.push(`${f}: ${typeof val === 'string' && val.length > maxLen ? val.slice(0, maxLen) + '…' : val}`);
987
- }
988
- parts.push(lines.join('\n'));
989
- }
990
-
991
- // P1-4: surface IDs that weren't found (mirrors mem_delete's missing-ID note).
992
- const foundIds = new Set(rows.map(r => r.id));
993
- const missing = args.ids.filter(id => !foundIds.has(id));
994
- if (missing.length > 0) {
995
- parts.push(`Note: ID(s) ${missing.join(', ')} not found.`);
1046
+ parts.push(...sections);
1047
+ if (missingHints.length > 0) {
1048
+ parts.push(`Note: ID(s) ${missingHints.join(', ')} not found.`);
996
1049
  }
997
1050
 
998
1051
  return { content: [{ type: 'text', text: parts.join('\n\n') }] };
@@ -2123,15 +2176,18 @@ server.registerTool(
2123
2176
  safeHandler(async (args) => {
2124
2177
  const filename = basename(args.file);
2125
2178
  const limit = args.limit ?? 10;
2179
+ const includeNoise = args.include_noise === true;
2126
2180
 
2127
2181
  const escaped = filename.replace(/%/g, '\\%').replace(/_/g, '\\_');
2128
2182
  const likePattern = `%${escaped}`;
2183
+ const noiseClause = includeNoise ? '' : `AND ${notLowSignalTitleClause('o')}`;
2129
2184
  const rows = db.prepare(`
2130
2185
  SELECT DISTINCT o.id, o.type, o.title, o.lesson_learned, o.created_at, o.project
2131
2186
  FROM observations o
2132
2187
  JOIN observation_files of2 ON of2.obs_id = o.id
2133
2188
  WHERE COALESCE(o.compressed_into, 0) = 0
2134
2189
  AND (of2.filename = ? OR of2.filename LIKE ? ESCAPE '\\')
2190
+ ${noiseClause}
2135
2191
  ORDER BY o.created_at_epoch DESC
2136
2192
  LIMIT ?
2137
2193
  `).all(filename, likePattern, limit);
package/tool-schemas.mjs CHANGED
@@ -28,6 +28,55 @@ const coerceIntArray = z.preprocess(
28
28
  z.array(z.number().int())
29
29
  );
30
30
 
31
+ // Coerce string arrays: accept array, comma-separated string, JSON-array string, or bare string.
32
+ // MCP bridges sometimes JSON-stringify complex args — bare `z.array(z.string())` rejects those
33
+ // with "expected array, received string" and the caller loses the field silently. Parity with
34
+ // coerceIntArray: tolerate the same shapes so files/fields survive client serialization quirks.
35
+ const coerceStringArray = z.preprocess(
36
+ (v) => {
37
+ if (Array.isArray(v)) return v.map(x => typeof x === 'string' ? x : String(x));
38
+ if (typeof v === 'string') {
39
+ const s = v.trim();
40
+ if (s.startsWith('[') && s.endsWith(']')) {
41
+ try {
42
+ const parsed = JSON.parse(s);
43
+ if (Array.isArray(parsed)) return parsed.map(x => typeof x === 'string' ? x : String(x));
44
+ } catch { /* fall through to comma-split */ }
45
+ }
46
+ return s.split(',').map(x => x.trim()).filter(x => x.length > 0);
47
+ }
48
+ return v;
49
+ },
50
+ z.array(z.string())
51
+ );
52
+
53
+ // Coerce mixed ID tokens (#N / P#N / S#N / bare N) for mem_get. Accepts:
54
+ // - native arrays: [1, "P#2", "#3"]
55
+ // - single number: 1
56
+ // - single/comma string: "1,P#2,S#3"
57
+ // - JSON-array string: '[1,"P#2"]' (MCP bridges that stringify complex args)
58
+ // Piped to a regex-validated string[] so each token stays parseable by lib/id-routing.parseIdToken
59
+ // at the handler. Closes the CLI↔MCP gap noted in #8127.
60
+ const coerceMixedIdTokens = z.preprocess(
61
+ (v) => {
62
+ const norm = (x) => typeof x === 'string' ? x.trim() : String(x);
63
+ if (Array.isArray(v)) return v.map(norm).filter(s => s.length > 0);
64
+ if (typeof v === 'number') return [String(v)];
65
+ if (typeof v === 'string') {
66
+ const s = v.trim();
67
+ if (s.startsWith('[') && s.endsWith(']')) {
68
+ try {
69
+ const parsed = JSON.parse(s);
70
+ if (Array.isArray(parsed)) return parsed.map(norm).filter(x => x.length > 0);
71
+ } catch { /* fall through to comma-split */ }
72
+ }
73
+ return s.split(',').map(x => x.trim()).filter(Boolean);
74
+ }
75
+ return v;
76
+ },
77
+ z.array(z.string().regex(/^[PpSs]?#?\d+$/, 'Expected N, #N, P#N, or S#N')).min(1).max(20)
78
+ );
79
+
31
80
  export const memSearchSchema = {
32
81
  query: z.string().optional().describe('Search query (FTS5 syntax supported)'),
33
82
  type: z.enum(['observations', 'sessions', 'prompts']).optional().describe('Limit to one table'),
@@ -78,12 +127,12 @@ export const memTimelineSchema = {
78
127
  };
79
128
 
80
129
  export const memGetSchema = {
81
- // TODO(#8126): accept P#/S#/# prefix strings for paste-from-search parity with
82
- // CLI cmdGet bucketed routing (~40 LOC handler refactor). mem_timeline already
83
- // accepts prefixes via coerceAnchor; this is the matched-pair gap.
84
- ids: coerceIntArray.pipe(z.array(z.number().int()).min(1).max(20)).describe('Observation IDs to retrieve'),
85
- source: z.enum(['obs', 'session', 'prompt']).optional().describe('Record type: obs (default), session (S# from search), prompt (P# from search)'),
86
- fields: z.array(z.string()).optional().describe('Specific fields to return (default: all)'),
130
+ // Accepts mixed tokens so pasted search results work verbatim: [1], [1, "P#2"], "1,P#2,S#3",
131
+ // or the JSON-stringified form ["1","P#2"]. Each token's prefix routes to its source bucket
132
+ // in server.mjs via lib/id-routing.bucketIdTokens. An explicit `source` override still wins.
133
+ ids: coerceMixedIdTokens.describe('Mixed observation/prompt/session IDs accepts N, #N, P#N, S#N; comma-strings and JSON arrays also coerced'),
134
+ source: z.enum(['obs', 'session', 'prompt']).optional().describe('Force all IDs to this source (overrides per-token prefixes). Omit to let P#/S#/# prefixes route individually.'),
135
+ fields: coerceStringArray.optional().describe('Specific fields to return (default: all; validated against obs schema — session/prompt sources ignore this filter)'),
87
136
  };
88
137
 
89
138
  export const memDeleteSchema = {
@@ -97,7 +146,7 @@ export const memSaveSchema = {
97
146
  type: OBS_TYPE_ENUM.optional().describe('Observation type (default: discovery)'),
98
147
  project: z.string().optional().describe('Project name (default: inferred from CWD)'),
99
148
  importance: coerceInt.pipe(z.number().int().min(1).max(3)).optional().describe('Importance level: 1=routine, 2=notable, 3=critical (default: 2 for explicit saves)'),
100
- files: z.array(z.string()).optional().describe('File paths associated with this observation'),
149
+ files: coerceStringArray.optional().describe('File paths associated with this observation'),
101
150
  lesson_learned: z.string().max(500).optional().describe('Key lesson or takeaway (for bugfix: root cause & fix; for decision: rationale)'),
102
151
  };
103
152
 
@@ -162,6 +211,7 @@ export const memExportSchema = {
162
211
  export const memRecallSchema = {
163
212
  file: z.string().min(1).describe('File path or filename to recall observations for'),
164
213
  limit: coerceInt.pipe(z.number().int().min(1).max(50)).optional().describe('Max results (default 10)'),
214
+ include_noise: z.boolean().optional().describe('Include hook-llm fallback titles ("Modified X", "Worked on X", raw error logs) — hidden by default for parity with mem_search'),
165
215
  };
166
216
 
167
217
  export const memFtsCheckSchema = {