@lh8ppl/claude-memory-kit 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
File without changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lh8ppl/claude-memory-kit",
3
- "version": "0.4.1",
3
+ "version": "0.4.3",
4
4
  "description": "cmk — the CLI for claude-memory-kit. Per-project, in-repo memory system for Claude Code.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -33,7 +33,7 @@
33
33
  "dependencies": {
34
34
  "@lh8ppl/cmk-canonicalize": "0.1.0",
35
35
  "@modelcontextprotocol/sdk": "^1.29.0",
36
- "better-sqlite3": "^12.10.0",
36
+ "better-sqlite3": "^12.11.1",
37
37
  "chokidar": "^5.0.0",
38
38
  "commander": "^15.0.0",
39
39
  "js-yaml": "^4.2.0",
package/src/audit-log.mjs CHANGED
@@ -33,6 +33,7 @@ export const REASON_CODES = Object.freeze({
33
33
  FACT_CREATED: 'fact-created', // writeFact: a new fact file was written (Task 123.A — the default create audit; callers emitting a richer code opt out via audit:false)
34
34
  DUPLICATE: 'duplicate', // writeFact: same path + same id
35
35
  DUPLICATE_ELSEWHERE: 'duplicate-elsewhere', // writeFact: different path + same id
36
+ RECURRENCE: 'recurrence', // writeFact: a duplicate write = the same canonical fact re-surfaced → recurrence_count bumped (Task 151.1, ADR-0016 — the capped-recurrence promotion signal)
36
37
  INDEX_REBUILD_FAILED: 'index-rebuild-failed', // writeFact: the fact landed on disk but the best-effort INDEX.md rebuild threw (e.g. a detached auto-extract child killed mid-rebuild). Surfaces what was previously a SILENTLY swallowed catch (D-152) so a lagging committed INDEX is diagnosable; the next reindex/cmk reindex self-heals.
37
38
  USER_REQUESTED: 'user-requested', // forget: user-initiated tombstone
38
39
  CURATED_MERGE: 'curated-merge', // mergeFacts: explicit merge of A + B → C
@@ -50,6 +50,7 @@ import { memoryWrite } from './memory-write.mjs';
50
50
  import { detectConflicts } from './conflict-queue.mjs';
51
51
  import { appendAuditEntry, REASON_CODES } from './audit-log.mjs';
52
52
  import { DEFAULT_COOLDOWN_MS, isCooldownActive, touchCooldownMarker } from './cooldown.mjs';
53
+ import { PROMOTE_THRESHOLD } from './heat.mjs';
53
54
 
54
55
  // User-tier scratchpads auto-persona is allowed to promote into. A
55
56
  // classifier-named target outside this set is dropped defensively (the
@@ -80,8 +81,19 @@ export const PERSONA_CANDIDATE_RE =
80
81
  // Generous (facts are high-signal) but bounded; whole facts only (see below).
81
82
  export const PERSONA_CORPUS_BYTES = 60_000;
82
83
 
83
- function assembleProjectCorpus({ projectRoot, userDir }) {
84
+ // Assemble the tier-P fact corpus AND the cite-and-sum recurrence index (151.3).
85
+ // Returns { corpus, factIndex }:
86
+ // - corpus: the classifier input. Each fact is headed `### [P-XXXXXXXX] title`
87
+ // so the classifier has a stable HANDLE to cite in `source_fact_ids=[…]`.
88
+ // - factIndex: Map<id, recurrence_count> for the facts ACTUALLY in the corpus
89
+ // (an id dropped by the byte cap isn't citable — the LLM never saw it — so it
90
+ // isn't in the index either). resolveRecurrenceSum uses this to validate cited
91
+ // ids + sum their real recurrence_count (the gate — code counts, LLM doesn't).
92
+ // Scratchpad bullets have no per-bullet id/recurrence_count, so they appear in the
93
+ // corpus (still useful synthesis context) but contribute no citable index entries.
94
+ export function assembleProjectCorpus({ projectRoot, userDir }) {
84
95
  const sources = listObservationSources({ projectRoot, userDir });
96
+ // {part, id, recurrenceCount}; id/recurrenceCount null for scratchpad parts.
85
97
  const parts = [];
86
98
  for (const s of sources) {
87
99
  if (s.tier !== 'P') continue;
@@ -93,10 +105,17 @@ function assembleProjectCorpus({ projectRoot, userDir }) {
93
105
  }
94
106
  if (s.kind === 'fact') {
95
107
  const { frontmatter, body } = parse(content);
96
- const title = frontmatter?.title ?? frontmatter?.id ?? '';
97
- parts.push(`### ${title}\n${(body ?? '').trim()}`);
108
+ const id = frontmatter?.id ?? null;
109
+ const title = frontmatter?.title ?? id ?? '';
110
+ // 151.1: recurrence_count is the gate signal; a fact predating the field
111
+ // (or with a bad value) counts as 1 — a single occurrence.
112
+ const rc = frontmatter?.recurrence_count;
113
+ const recurrenceCount = Number.isFinite(rc) && rc > 0 ? rc : 1;
114
+ // Lead the heading with the citable id so the classifier can echo it.
115
+ const head = id ? `### [${id}] ${title}` : `### ${title}`;
116
+ parts.push({ part: `${head}\n${(body ?? '').trim()}`, id, recurrenceCount });
98
117
  } else {
99
- parts.push((content ?? '').trim());
118
+ parts.push({ part: (content ?? '').trim(), id: null, recurrenceCount: null });
100
119
  }
101
120
  }
102
121
  // Task 111 (F-2): BOUND the corpus. Previously this joined EVERY tier-P fact
@@ -110,9 +129,10 @@ function assembleProjectCorpus({ projectRoot, userDir }) {
110
129
  // timed-out zero. A value-ordered (trust/recency-first) accumulation is the
111
130
  // follow-up if a large corpus drops doctrine.
112
131
  const out = [];
132
+ const factIndex = new Map();
113
133
  let used = 0;
114
134
  let truncated = false;
115
- for (const part of parts.filter(Boolean)) {
135
+ for (const { part, id, recurrenceCount } of parts.filter((p) => p.part)) {
116
136
  const cost = Buffer.byteLength(part, 'utf8') + 2; // +2 for the '\n\n' join
117
137
  if (used + cost > PERSONA_CORPUS_BYTES) {
118
138
  truncated = true;
@@ -120,9 +140,11 @@ function assembleProjectCorpus({ projectRoot, userDir }) {
120
140
  }
121
141
  out.push(part);
122
142
  used += cost;
143
+ // Index only facts that actually made it into the corpus (citable).
144
+ if (id) factIndex.set(id, recurrenceCount);
123
145
  }
124
146
  if (truncated) out.push('### …\n(corpus truncated — additional project facts omitted for this pass)');
125
- return out.join('\n\n');
147
+ return { corpus: out.join('\n\n'), factIndex };
126
148
  }
127
149
 
128
150
  // Default size of the recent-transcript window handed to the SessionEnd persona
@@ -231,13 +253,32 @@ export function buildClassifierInstructions(source = 'facts') {
231
253
  const beginMarker = isTranscript
232
254
  ? '=== BEGIN RECENT CONVERSATION ==='
233
255
  : '=== BEGIN CAPTURED PROJECT FACTS ===';
256
+ // 151.3 (cite-and-sum, D-230): on the FACTS path each input fact is headed
257
+ // `### [P-XXXXXXXX] title`, so the classifier can CITE the facts it synthesized
258
+ // a trait from. It cites — it does NOT count. Code resolves the cited ids and
259
+ // sums their real recurrence_count (the gate). The transcript path has no
260
+ // citable ids, so it keeps the simpler line + the confidence fast-path only.
261
+ const outputFormat = isTranscript
262
+ ? 'PERSONA CANDIDATE | target=<FILE> | section=<SECTION> | confidence=<high|medium|low> | <one-line restatement>'
263
+ : 'PERSONA CANDIDATE | target=<FILE> | section=<SECTION> | confidence=<high|medium|low> | <one-line restatement> | source_fact_ids=[<the [P-...] ids of the facts you synthesized THIS trait from>]';
264
+ const citeBlock = isTranscript
265
+ ? []
266
+ : [
267
+ '',
268
+ 'CITING SOURCE FACTS (required on every line):',
269
+ ' - Each input fact is headed `### [P-XXXXXXXX] <title>`. In source_fact_ids, list the [P-...] ids of the facts THIS trait was synthesized from — copy them EXACTLY as shown.',
270
+ ' - CITE the facts; do NOT count anything and do NOT invent a number. The kit sums the cited facts\' real recurrence on its own.',
271
+ ' - Cite ONLY ids that appear in the input. Never invent an id.',
272
+ ' - Example: `… | source_fact_ids=[P-AAAAAAAA, P-BBBBBBBB]`',
273
+ ];
234
274
  return [
235
275
  opener,
236
276
  '',
237
277
  jobLine,
238
278
  '',
239
279
  'For EACH cross-project fact, emit exactly one line, nothing else, in this EXACT format:',
240
- 'PERSONA CANDIDATE | target=<FILE> | section=<SECTION> | confidence=<high|medium|low> | <one-line restatement>',
280
+ outputFormat,
281
+ ...citeBlock,
241
282
  '',
242
283
  'Routing:',
243
284
  ' - target=HABITS.md → working-style habits. sections: Iteration Cadence | Destructive Operations | Communication Style',
@@ -264,12 +305,77 @@ export function parsePersonaCandidates(outputText) {
264
305
  target: target.trim(),
265
306
  section: section.trim(),
266
307
  confidence: confidence.trim().toLowerCase(),
267
- text: text.trim(),
308
+ ...splitSourceFactIds(text.trim()),
268
309
  });
269
310
  }
270
311
  return candidates;
271
312
  }
272
313
 
314
+ // The cite-and-sum suffix the classifier appends to a candidate line (151.3,
315
+ // ADR-0016 / D-230): `… | source_fact_ids=[P-AAAAAAAA, P-BBBBBBBB]`. It cites the
316
+ // PROJECT facts the trait was synthesized from — NOT a recurrence COUNT (5/5
317
+ // bridge-study systems reject the LLM counting; the LLM groups, code counts).
318
+ // Optional + trailing so a line WITHOUT it still parses (back-compat: the
319
+ // transcript path has no fact ids to cite, and an older classifier prompt omits
320
+ // it) — such a candidate gets `sourceFactIds: []` and can only promote via the
321
+ // explicit-imperative (confidence=high) fast-path.
322
+ const SOURCE_FACT_IDS_RE = /\s*\|\s*source_fact_ids=\[([^\]]*)\]\s*$/;
323
+
324
+ // Split a candidate's free-text tail into {text, sourceFactIds}. The ids are
325
+ // peeled off the END (the classifier appends them last), leaving the human-
326
+ // readable restatement as `text`. Ids are UPPER-CASED (canonical ids are always
327
+ // uppercase `P-…`; a lowercase echo from Haiku — despite "copy EXACTLY" — would
328
+ // otherwise miss the Map lookup) + de-noised; the real corpus-resolution
329
+ // (rejecting hallucinations) happens in resolveRecurrenceSum.
330
+ function splitSourceFactIds(tail) {
331
+ const m = SOURCE_FACT_IDS_RE.exec(tail);
332
+ if (!m) return { text: tail, sourceFactIds: [] };
333
+ const text = tail.slice(0, m.index).trim();
334
+ const sourceFactIds = m[1]
335
+ .split(',')
336
+ .map((s) => s.trim().toUpperCase())
337
+ .filter(Boolean);
338
+ return { text, sourceFactIds };
339
+ }
340
+
341
+ /**
342
+ * THE cite-and-sum gate arithmetic (151.3, ADR-0016 / D-230). Given the ids the
343
+ * classifier CITED and the project corpus's real `{id → recurrence_count}` index,
344
+ * resolve the cited ids against the corpus (DROP any the LLM hallucinated) and SUM
345
+ * their real recurrence_count. That deterministic sum — never an LLM count — gates
346
+ * promotion. Repeated cited ids are de-duplicated (a fact cited twice counts once).
347
+ *
348
+ * PURE: no I/O. The factIndex is assembled by assembleProjectCorpus.
349
+ *
350
+ * @param {object} o
351
+ * @param {string[]} [o.sourceFactIds] ids the classifier cited
352
+ * @param {Map<string,number>} [o.factIndex] real corpus `id → recurrence_count`
353
+ * @returns {{sum:number, resolved:string[], rejected:string[]}}
354
+ */
355
+ export function resolveRecurrenceSum({ sourceFactIds = [], factIndex } = {}) {
356
+ const index = factIndex instanceof Map ? factIndex : new Map();
357
+ const resolved = [];
358
+ const rejected = [];
359
+ const seen = new Set();
360
+ let sum = 0;
361
+ for (const rawId of sourceFactIds ?? []) {
362
+ const id = String(rawId).trim();
363
+ if (!id || seen.has(id)) continue;
364
+ seen.add(id);
365
+ if (index.has(id)) {
366
+ resolved.push(id);
367
+ const n = index.get(id);
368
+ // Floor again here (assembleProjectCorpus already floors): this helper is
369
+ // exported + pure, so a direct caller could pass a junk Map — a real fact is
370
+ // always worth ≥1.
371
+ sum += Number.isFinite(n) && n > 0 ? n : 1;
372
+ } else {
373
+ rejected.push(id); // hallucinated / not in the synthesis corpus → contributes 0
374
+ }
375
+ }
376
+ return { sum, resolved, rejected };
377
+ }
378
+
273
379
  /**
274
380
  * Run auto-persona synthesis: classify project-tier captured facts,
275
381
  * auto-promote cross-project doctrine into the user tier (trust:medium).
@@ -315,9 +421,17 @@ export async function autoPersona(opts = {}) {
315
421
  // Task 86c (D-44): the SessionEnd path classifies the RAW TRANSCRIPT (where a
316
422
  // user's standing rule survives verbatim); the default 'facts' path classifies
317
423
  // the distilled project corpus (whole-project sweep — weekly/manual).
318
- const corpus = source === 'transcript'
319
- ? assembleTranscriptWindow({ projectRoot })
320
- : assembleProjectCorpus({ projectRoot, userDir });
424
+ // 151.3: the facts path ALSO returns a factIndex (id → recurrence_count) for the
425
+ // cite-and-sum gate. The transcript path has no citable fact ids — its candidates
426
+ // promote only via the explicit-imperative (confidence=high) fast-path, which is
427
+ // exactly the verbatim "from now on …" signal a transcript carries (D-44).
428
+ let corpus;
429
+ let factIndex = new Map();
430
+ if (source === 'transcript') {
431
+ corpus = assembleTranscriptWindow({ projectRoot });
432
+ } else {
433
+ ({ corpus, factIndex } = assembleProjectCorpus({ projectRoot, userDir }));
434
+ }
321
435
  if (!corpus) {
322
436
  const reason = source === 'transcript' ? 'no-transcript' : 'no-facts';
323
437
  return { action: 'skipped', reason, promoted: [], queued: [], duration_ms: Date.now() - t0 };
@@ -353,7 +467,17 @@ export async function autoPersona(opts = {}) {
353
467
  });
354
468
  }
355
469
 
356
- const candidates = parsePersonaCandidates(result?.outputText);
470
+ // 151.3 (cite-and-sum, D-230): resolve each candidate's cited source_fact_ids
471
+ // against the corpus factIndex (rejecting hallucinated ids) and attach the
472
+ // arithmetic recurrence SUM. THAT sum — computed in code, never by the LLM —
473
+ // is the promotion gate inside promoteCandidatesToUserTier (a medium/inferred
474
+ // trait promotes iff its cited facts recur ≥ PROMOTE_THRESHOLD). The transcript
475
+ // path's factIndex is empty, so its candidates carry sum 0 and promote only via
476
+ // the confidence=high fast-path (the verbatim stated rule a transcript holds).
477
+ const candidates = parsePersonaCandidates(result?.outputText).map((c) => ({
478
+ ...c,
479
+ recurrenceSum: resolveRecurrenceSum({ sourceFactIds: c.sourceFactIds, factIndex }).sum,
480
+ }));
357
481
  const { promoted, queued, superseded, conflicts, reviewQueuePath } = promoteCandidatesToUserTier({
358
482
  candidates,
359
483
  userDir,
@@ -531,12 +655,29 @@ export function promoteCandidatesToUserTier({ candidates, userDir, now, settings
531
655
  const conflicts = [];
532
656
  for (const c of candidates) {
533
657
  if (!VALID_TARGETS.has(c.target)) continue; // defensive: drop bad routing
534
- if (c.confidence !== 'high') {
535
- // Confidence gate (not a manual gate): low/medium route to the review
536
- // queue. They are returned in `queued` AND written to the durable
658
+ // 151.3 THE RECURRENCE GATE (ADR-0016, D-230), replacing the pure form
659
+ // gate. A candidate promotes if EITHER:
660
+ // (a) confidence=high an EXPLICITLY-STATED standing rule (the fast-path:
661
+ // a user-attested rule promotes immediately, recurrence irrelevant); OR
662
+ // (b) its cited facts' recurrence SUM ≥ PROMOTE_THRESHOLD — a DEMONSTRATED-
663
+ // but-not-declared trait that has recurred enough to be durable.
664
+ // (b) is the Hole-A fix: pre-151.3 a demonstrated philosophy stranded here
665
+ // because it lacked "always/never" phrasing (D-177). The sum is arithmetic on
666
+ // real recurrence_count (the LLM cites; code counts). recurrenceSum is attached
667
+ // by autoPersona; callers that don't attach it (inline/explicit/drain) leave it
668
+ // undefined → those paths rely on the confidence=high clause, unchanged.
669
+ const recurrenceSum = c.recurrenceSum ?? 0;
670
+ const promotesByRecurrence = recurrenceSum >= PROMOTE_THRESHOLD;
671
+ // Door 4: name WHY this trait promoted, so a debugger can tell a recurrence-
672
+ // gated promotion (the new 151.3 path) from the explicit-imperative fast-path.
673
+ const promotedVia = c.confidence === 'high' ? 'confidence-high' : `recurrence-${recurrenceSum}`;
674
+ if (c.confidence !== 'high' && !promotesByRecurrence) {
675
+ // Not promotable: low/medium confidence AND under the recurrence threshold.
676
+ // Route to the review queue — returned in `queued` AND written to the durable
537
677
  // queue FILE below (appendPersonaReviewQueue) so they survive past the
538
678
  // response — the daily/weekly auto-drain (or a manual review) acts on them.
539
- queued.push({ target: c.target, section: c.section, text: c.text, confidence: c.confidence, reason: `confidence-${c.confidence}` });
679
+ const reason = recurrenceSum > 0 ? `recurrence-${recurrenceSum}-below-${PROMOTE_THRESHOLD}` : `confidence-${c.confidence}`;
680
+ queued.push({ target: c.target, section: c.section, text: c.text, confidence: c.confidence, reason });
540
681
  continue;
541
682
  }
542
683
 
@@ -652,8 +793,10 @@ export function promoteCandidatesToUserTier({ candidates, userDir, now, settings
652
793
  id: res.id,
653
794
  reasonCode: REASON_CODES.PERSONA_PROMOTED,
654
795
  // Carry `source` so the audit trail distinguishes an explicit
655
- // `cmk lessons promote` (user-explicit) from an auto-synthesis promote.
656
- reasonText: `${c.target} § ${c.section} (${source})`,
796
+ // `cmk lessons promote` (user-explicit) from an auto-synthesis promote, and
797
+ // `promotedVia` so a recurrence-gated promotion (151.3) is distinguishable
798
+ // from the explicit-imperative fast-path.
799
+ reasonText: `${c.target} § ${c.section} (${source}; via ${promotedVia})`,
657
800
  paths: { after: res.path },
658
801
  });
659
802
 
@@ -96,15 +96,31 @@ function coerce(raw) {
96
96
  return raw;
97
97
  }
98
98
 
99
- function setDeep(obj, dottedKey, value) {
99
+ // Exported for a direct unit test: this guard holds a security invariant
100
+ // (prototype-pollution resistance) and is analyzed by CodeQL in isolation, so
101
+ // it's tested at its own boundary, not only through configSet.
102
+ export function setDeep(obj, dottedKey, value) {
100
103
  const parts = dottedKey.split('.');
101
104
  let cur = obj;
102
- for (let i = 0; i < parts.length - 1; i++) {
105
+ for (let i = 0; i < parts.length; i++) {
103
106
  const p = parts[i];
104
- if (cur[p] == null || typeof cur[p] !== 'object' || Array.isArray(cur[p])) cur[p] = {};
105
- cur = cur[p];
107
+ // Defense-in-depth: refuse prototype-polluting segments at the assignment
108
+ // site, inside the walk loop. CodeQL's js/prototype-pollution-utility
109
+ // recognizes a sanitizer only as DIRECT `===` comparisons against the
110
+ // dangerous names (per its query-help example `if (key === "__proto__" ||
111
+ // key === "constructor") ...`), NOT a Set/helper lookup — so this is spelled
112
+ // out explicitly. (FORBIDDEN_KEYS keeps the same names for the entry-point
113
+ // guards; this in-loop form is what the static analyzer reads.)
114
+ if (p === '__proto__' || p === 'constructor' || p === 'prototype') {
115
+ throw new Error(`setDeep: forbidden key segment (${p}) — prototype-pollution guard`);
116
+ }
117
+ if (i === parts.length - 1) {
118
+ cur[p] = value;
119
+ } else {
120
+ if (cur[p] == null || typeof cur[p] !== 'object' || Array.isArray(cur[p])) cur[p] = {};
121
+ cur = cur[p];
122
+ }
106
123
  }
107
- cur[parts[parts.length - 1]] = value;
108
124
  }
109
125
 
110
126
  /**
@@ -54,6 +54,8 @@ import { hashContent } from './content-hash.mjs';
54
54
  import { nowIso, appendAuditEntry, REASON_CODES } from './audit-log.mjs';
55
55
  import { ERROR_CATEGORIES, errorResult } from './result-shapes.mjs';
56
56
  import { generateId } from '@lh8ppl/cmk-canonicalize';
57
+ import { applyTrustSignal } from './trust-signal.mjs';
58
+ import { openIndexDb } from './index-db.mjs';
57
59
 
58
60
  // Trust ordering. Higher number = higher trust.
59
61
  const TRUST_LEVELS = Object.freeze({
@@ -825,6 +827,22 @@ export function mergeScratchpadBullets({
825
827
  },
826
828
  });
827
829
 
830
+ // Task 151.12 — merge-both SUPERSEDES both originals → DAMPEN their trust_score
831
+ // (the supersession passive signal; closes the merge-path gap 151.8 deferred).
832
+ // Best-effort overlay — never breaks the merge. One shared index-db handle for
833
+ // both dampens (avoid open/close per id).
834
+ try {
835
+ const sigDb = openIndexDb({ projectRoot });
836
+ try {
837
+ applyTrustSignal({ id: idA, event: 'dampen', db: sigDb });
838
+ applyTrustSignal({ id: idB, event: 'dampen', db: sigDb });
839
+ } finally {
840
+ sigDb.close();
841
+ }
842
+ } catch {
843
+ // best-effort: the trust dampen must never break the merge.
844
+ }
845
+
828
846
  return {
829
847
  action: 'merged',
830
848
  id: newId,
@@ -177,3 +177,42 @@ export function graduateForCapRelief({
177
177
  const out = lines.filter((_, i) => !removeIdx.has(i)).join('\n');
178
178
  return { text: out, graduated };
179
179
  }
180
+
181
+ /**
182
+ * Mechanical cap relief for the USER-TIER PERSONA (Task 151.4, ADR-0016 §20.3).
183
+ *
184
+ * The persona (USER/HABITS/LESSONS.md) must NEVER graduate its high-trust bullets
185
+ * out to un-injected `fragments/` — that strands a promoted trait so it vanishes
186
+ * at cold-open (Hole B). Instead, reclaim bytes WITHOUT dropping any content:
187
+ * - trim trailing whitespace per line,
188
+ * - collapse any run of ≥2 blank lines down to a single blank line.
189
+ * No bullet is ever removed. This is a best-effort byte reclaim (the load-cap-not-
190
+ * write-cap invariant, D-61, already lets the file exceed the inject budget when
191
+ * relief isn't enough; the snapshot load-cap + sweep order (151.5) then keeps the
192
+ * high-trust traits injected). A genuine LLM tighter-rewrite is Task 95 (off the
193
+ * synchronous append hot path — an inline Haiku call here would be a composition +
194
+ * latency hazard). PURE: no I/O. Idempotent (re-condensing tight text is a no-op).
195
+ *
196
+ * @param {string} text the scratchpad content
197
+ * @returns {string} the condensed content (same bullets, fewer bytes)
198
+ */
199
+ export function condenseScratchpadForCapRelief(text) {
200
+ // CRLF-tolerant (Task 139): split on /\r?\n/ so a Windows-authored persona file
201
+ // condenses too — a plain split('\n') would leave a trailing '\r' on every line,
202
+ // making blank-run collapse + trailing-trim silent no-ops. Rejoin with '\n'
203
+ // (LF), matching how consolidate()/writeBullet() already normalize line endings.
204
+ const lines = String(text ?? '').split(/\r?\n/);
205
+ const out = [];
206
+ let blankRun = 0;
207
+ for (const line of lines) {
208
+ const trimmed = line.replace(/[ \t]+$/, ''); // trailing whitespace
209
+ if (trimmed === '') {
210
+ blankRun += 1;
211
+ if (blankRun > 1) continue; // collapse ≥2 blanks → 1
212
+ } else {
213
+ blankRun = 0;
214
+ }
215
+ out.push(trimmed);
216
+ }
217
+ return out.join('\n');
218
+ }
package/src/heat.mjs ADDED
@@ -0,0 +1,75 @@
1
+ // heat.mjs — the capped-recurrence promotion score (Task 151.2, ADR-0016).
2
+ //
3
+ // A fact earns promotion to the persona by RECURRENCE, not by phrasing. The
4
+ // score blends how often a fact has re-surfaced (the earned signal, CAPPED) with
5
+ // how recently (a lazy exponential decay). The cap is load-bearing: recurrence
6
+ // is a tie-breaker, never the driver — a noisy-but-trivial fact must never
7
+ // outrank a once-stated durable decision.
8
+ //
9
+ // heat = min(recurrence_count, RECUR_CAP) * W_REC + exp(-Δhours / τ)
10
+ //
11
+ // Shapes verified against real code (the 7-system study, D-228):
12
+ // • recency = exp(-Δhours/τ), τ=24h — MemoryOS `compute_recency`
13
+ // • the cap = min(count, ceiling) — MemOS `min(leaf_count*2, 20)`
14
+ // • threshold 3 — memclaw `min_cluster_size` (diversity
15
+ // gate dropped: single-user, not a fleet)
16
+ //
17
+ // PURE: no I/O, no cron. Recency is computed AT READ from `now` + `lastAt`, so
18
+ // there is no background job mutating a stored heat value (D-169 — automatic,
19
+ // ritual-free). The caller passes `recurrence_count` (frontmatter, Task 151.1)
20
+ // and the fact's last-surfaced timestamp.
21
+
22
+ // The recurrence cap. Past this many recurrences, more adds nothing — recurrence
23
+ // is a tie-breaker, not a runaway driver (MemOS). Chosen so the recurrence band
24
+ // (0…RECUR_CAP·W_REC) is comparable to the recency band (0…1), keeping neither
25
+ // signal able to bury the other.
26
+ export const RECUR_CAP = 10;
27
+
28
+ // Weight on the (capped) recurrence term. RECUR_CAP·W_REC ≈ 1.0 so a maxed-out
29
+ // recurrence contributes about the same as a brand-new recency — the two signals
30
+ // are balanced, then the cap prevents recurrence from dominating.
31
+ export const W_REC = 0.1;
32
+
33
+ // Recency half-life-ish constant (hours). exp(-Δh/τ): at τ hours the recency
34
+ // term is e^-1 ≈ 0.368 of its fresh value. MemoryOS uses τ=24.
35
+ export const TAU_HOURS = 24;
36
+
37
+ // Promotion threshold: a fact promotes to the persona at this many recurrences
38
+ // (memclaw min-cluster-size; diversity gate dropped for single-user). "I've
39
+ // reached this same shape 3× → it's durable."
40
+ export const PROMOTE_THRESHOLD = 3;
41
+
42
+ /**
43
+ * Compute a fact's promotion heat. Pure — no I/O.
44
+ *
45
+ * @param {object} o
46
+ * @param {number} o.recurrenceCount how many times this fact has surfaced (≥1)
47
+ * @param {string|null} [o.lastAt] ISO timestamp the fact last surfaced; null/garbage → recency 0
48
+ * @param {number} [o.now] ms epoch "now" (default Date.now()); injectable for tests
49
+ * @returns {number} heat score (recurrence band + recency band)
50
+ */
51
+ export function computeHeat({ recurrenceCount = 1, lastAt = null, now = Date.now() } = {}) {
52
+ const count = Number.isFinite(recurrenceCount) && recurrenceCount > 0 ? recurrenceCount : 1;
53
+ const recurrenceTerm = Math.min(count, RECUR_CAP) * W_REC;
54
+
55
+ let recencyTerm = 0;
56
+ if (lastAt) {
57
+ const t = Date.parse(lastAt);
58
+ if (Number.isFinite(t)) {
59
+ const deltaHours = Math.max(0, (now - t) / 3_600_000); // clamp future skew → ≤1
60
+ recencyTerm = Math.exp(-deltaHours / TAU_HOURS);
61
+ }
62
+ }
63
+ return recurrenceTerm + recencyTerm;
64
+ }
65
+
66
+ /**
67
+ * Does this fact clear the promotion threshold? (recurrence-only — the gate is
68
+ * "seen ≥ N times", recency only orders WITHIN the promotable set.)
69
+ *
70
+ * @param {number} recurrenceCount
71
+ * @returns {boolean}
72
+ */
73
+ export function isPromotable(recurrenceCount) {
74
+ return Number.isFinite(recurrenceCount) && recurrenceCount >= PROMOTE_THRESHOLD;
75
+ }
package/src/index-db.mjs CHANGED
@@ -68,6 +68,12 @@ CREATE TABLE IF NOT EXISTS observations (
68
68
  body TEXT NOT NULL,
69
69
  write_source TEXT NOT NULL,
70
70
  trust TEXT NOT NULL,
71
+ -- Task 151.6 (ADR-0016 §20.2): the evolving PROTECTION field — a FLOAT seeded
72
+ -- from source (user-explicit > auto-extract) on (re)index, then moved by passive
73
+ -- outcomes (151.7/151.8). Lives ONLY here (the rebuildable index), never in
74
+ -- committed frontmatter (D-218). DEFAULT 0.5 so a migrated pre-151.6 row + any
75
+ -- insert that omits it gets a sane medium seed until the next full reindex.
76
+ trust_score REAL NOT NULL DEFAULT 0.5,
71
77
  created_at INTEGER NOT NULL,
72
78
  superseded_by TEXT REFERENCES observations(id),
73
79
  deleted_at INTEGER
@@ -189,5 +195,21 @@ export function openIndexDb({ projectRoot, dbPath } = {}) {
189
195
  db.pragma('synchronous = NORMAL');
190
196
  // Apply schema (idempotent CREATE IF NOT EXISTS).
191
197
  db.exec(INDEX_DB_SCHEMA);
198
+ // Task 151.6: non-destructive column migration. CREATE TABLE IF NOT EXISTS does
199
+ // NOT add a new column to a pre-existing table, so an index built before 151.6
200
+ // would lack `trust_score`. Add it in place (ALTER preserves all rows — the
201
+ // index is rebuildable, but we don't force a full rebuild just for a column).
202
+ // The next full reindex reseeds real values; until then existing rows carry the
203
+ // DEFAULT 0.5 (medium). Idempotent: skip if the column already exists.
204
+ migrateAddColumn(db, 'observations', 'trust_score', 'REAL NOT NULL DEFAULT 0.5');
192
205
  return db;
193
206
  }
207
+
208
+ // Add `column` to `table` if it isn't already present (idempotent). SQLite has no
209
+ // "ADD COLUMN IF NOT EXISTS", so we check PRAGMA table_info first — a duplicate
210
+ // ALTER would throw.
211
+ function migrateAddColumn(db, table, column, definition) {
212
+ const cols = db.prepare(`PRAGMA table_info(${table})`).all();
213
+ if (cols.some((c) => c.name === column)) return;
214
+ db.exec(`ALTER TABLE ${table} ADD COLUMN ${column} ${definition}`);
215
+ }