@lh8ppl/claude-memory-kit 0.4.2 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/audit-log.mjs +1 -0
- package/src/auto-persona.mjs +161 -18
- package/src/config-core.mjs +17 -15
- package/src/conflict-queue.mjs +18 -0
- package/src/graduation.mjs +39 -0
- package/src/heat.mjs +75 -0
- package/src/index-db.mjs +22 -0
- package/src/index-rebuild.mjs +67 -14
- package/src/inject-context.mjs +6 -0
- package/src/lessons-promote.mjs +95 -12
- package/src/mcp-server.mjs +10 -1
- package/src/memory-write.mjs +18 -0
- package/src/merge-facts.mjs +19 -0
- package/src/poison-guard.mjs +42 -0
- package/src/provenance.mjs +27 -0
- package/src/scratchpad.mjs +64 -25
- package/src/trust-score.mjs +120 -0
- package/src/trust-signal.mjs +73 -0
- package/src/write-fact.mjs +49 -4
package/package.json
CHANGED
package/src/audit-log.mjs
CHANGED
|
@@ -33,6 +33,7 @@ export const REASON_CODES = Object.freeze({
|
|
|
33
33
|
FACT_CREATED: 'fact-created', // writeFact: a new fact file was written (Task 123.A — the default create audit; callers emitting a richer code opt out via audit:false)
|
|
34
34
|
DUPLICATE: 'duplicate', // writeFact: same path + same id
|
|
35
35
|
DUPLICATE_ELSEWHERE: 'duplicate-elsewhere', // writeFact: different path + same id
|
|
36
|
+
RECURRENCE: 'recurrence', // writeFact: a duplicate write = the same canonical fact re-surfaced → recurrence_count bumped (Task 151.1, ADR-0016 — the capped-recurrence promotion signal)
|
|
36
37
|
INDEX_REBUILD_FAILED: 'index-rebuild-failed', // writeFact: the fact landed on disk but the best-effort INDEX.md rebuild threw (e.g. a detached auto-extract child killed mid-rebuild). Surfaces what was previously a SILENTLY swallowed catch (D-152) so a lagging committed INDEX is diagnosable; the next reindex/cmk reindex self-heals.
|
|
37
38
|
USER_REQUESTED: 'user-requested', // forget: user-initiated tombstone
|
|
38
39
|
CURATED_MERGE: 'curated-merge', // mergeFacts: explicit merge of A + B → C
|
package/src/auto-persona.mjs
CHANGED
|
@@ -50,6 +50,7 @@ import { memoryWrite } from './memory-write.mjs';
|
|
|
50
50
|
import { detectConflicts } from './conflict-queue.mjs';
|
|
51
51
|
import { appendAuditEntry, REASON_CODES } from './audit-log.mjs';
|
|
52
52
|
import { DEFAULT_COOLDOWN_MS, isCooldownActive, touchCooldownMarker } from './cooldown.mjs';
|
|
53
|
+
import { PROMOTE_THRESHOLD } from './heat.mjs';
|
|
53
54
|
|
|
54
55
|
// User-tier scratchpads auto-persona is allowed to promote into. A
|
|
55
56
|
// classifier-named target outside this set is dropped defensively (the
|
|
@@ -80,8 +81,19 @@ export const PERSONA_CANDIDATE_RE =
|
|
|
80
81
|
// Generous (facts are high-signal) but bounded; whole facts only (see below).
|
|
81
82
|
export const PERSONA_CORPUS_BYTES = 60_000;
|
|
82
83
|
|
|
83
|
-
|
|
84
|
+
// Assemble the tier-P fact corpus AND the cite-and-sum recurrence index (151.3).
|
|
85
|
+
// Returns { corpus, factIndex }:
|
|
86
|
+
// - corpus: the classifier input. Each fact is headed `### [P-XXXXXXXX] title`
|
|
87
|
+
// so the classifier has a stable HANDLE to cite in `source_fact_ids=[…]`.
|
|
88
|
+
// - factIndex: Map<id, recurrence_count> for the facts ACTUALLY in the corpus
|
|
89
|
+
// (an id dropped by the byte cap isn't citable — the LLM never saw it — so it
|
|
90
|
+
// isn't in the index either). resolveRecurrenceSum uses this to validate cited
|
|
91
|
+
// ids + sum their real recurrence_count (the gate — code counts, LLM doesn't).
|
|
92
|
+
// Scratchpad bullets have no per-bullet id/recurrence_count, so they appear in the
|
|
93
|
+
// corpus (still useful synthesis context) but contribute no citable index entries.
|
|
94
|
+
export function assembleProjectCorpus({ projectRoot, userDir }) {
|
|
84
95
|
const sources = listObservationSources({ projectRoot, userDir });
|
|
96
|
+
// {part, id, recurrenceCount}; id/recurrenceCount null for scratchpad parts.
|
|
85
97
|
const parts = [];
|
|
86
98
|
for (const s of sources) {
|
|
87
99
|
if (s.tier !== 'P') continue;
|
|
@@ -93,10 +105,17 @@ function assembleProjectCorpus({ projectRoot, userDir }) {
|
|
|
93
105
|
}
|
|
94
106
|
if (s.kind === 'fact') {
|
|
95
107
|
const { frontmatter, body } = parse(content);
|
|
96
|
-
const
|
|
97
|
-
|
|
108
|
+
const id = frontmatter?.id ?? null;
|
|
109
|
+
const title = frontmatter?.title ?? id ?? '';
|
|
110
|
+
// 151.1: recurrence_count is the gate signal; a fact predating the field
|
|
111
|
+
// (or with a bad value) counts as 1 — a single occurrence.
|
|
112
|
+
const rc = frontmatter?.recurrence_count;
|
|
113
|
+
const recurrenceCount = Number.isFinite(rc) && rc > 0 ? rc : 1;
|
|
114
|
+
// Lead the heading with the citable id so the classifier can echo it.
|
|
115
|
+
const head = id ? `### [${id}] ${title}` : `### ${title}`;
|
|
116
|
+
parts.push({ part: `${head}\n${(body ?? '').trim()}`, id, recurrenceCount });
|
|
98
117
|
} else {
|
|
99
|
-
parts.push((content ?? '').trim());
|
|
118
|
+
parts.push({ part: (content ?? '').trim(), id: null, recurrenceCount: null });
|
|
100
119
|
}
|
|
101
120
|
}
|
|
102
121
|
// Task 111 (F-2): BOUND the corpus. Previously this joined EVERY tier-P fact
|
|
@@ -110,9 +129,10 @@ function assembleProjectCorpus({ projectRoot, userDir }) {
|
|
|
110
129
|
// timed-out zero. A value-ordered (trust/recency-first) accumulation is the
|
|
111
130
|
// follow-up if a large corpus drops doctrine.
|
|
112
131
|
const out = [];
|
|
132
|
+
const factIndex = new Map();
|
|
113
133
|
let used = 0;
|
|
114
134
|
let truncated = false;
|
|
115
|
-
for (const part of parts.filter(
|
|
135
|
+
for (const { part, id, recurrenceCount } of parts.filter((p) => p.part)) {
|
|
116
136
|
const cost = Buffer.byteLength(part, 'utf8') + 2; // +2 for the '\n\n' join
|
|
117
137
|
if (used + cost > PERSONA_CORPUS_BYTES) {
|
|
118
138
|
truncated = true;
|
|
@@ -120,9 +140,11 @@ function assembleProjectCorpus({ projectRoot, userDir }) {
|
|
|
120
140
|
}
|
|
121
141
|
out.push(part);
|
|
122
142
|
used += cost;
|
|
143
|
+
// Index only facts that actually made it into the corpus (citable).
|
|
144
|
+
if (id) factIndex.set(id, recurrenceCount);
|
|
123
145
|
}
|
|
124
146
|
if (truncated) out.push('### …\n(corpus truncated — additional project facts omitted for this pass)');
|
|
125
|
-
return out.join('\n\n');
|
|
147
|
+
return { corpus: out.join('\n\n'), factIndex };
|
|
126
148
|
}
|
|
127
149
|
|
|
128
150
|
// Default size of the recent-transcript window handed to the SessionEnd persona
|
|
@@ -231,13 +253,32 @@ export function buildClassifierInstructions(source = 'facts') {
|
|
|
231
253
|
const beginMarker = isTranscript
|
|
232
254
|
? '=== BEGIN RECENT CONVERSATION ==='
|
|
233
255
|
: '=== BEGIN CAPTURED PROJECT FACTS ===';
|
|
256
|
+
// 151.3 (cite-and-sum, D-230): on the FACTS path each input fact is headed
|
|
257
|
+
// `### [P-XXXXXXXX] title`, so the classifier can CITE the facts it synthesized
|
|
258
|
+
// a trait from. It cites — it does NOT count. Code resolves the cited ids and
|
|
259
|
+
// sums their real recurrence_count (the gate). The transcript path has no
|
|
260
|
+
// citable ids, so it keeps the simpler line + the confidence fast-path only.
|
|
261
|
+
const outputFormat = isTranscript
|
|
262
|
+
? 'PERSONA CANDIDATE | target=<FILE> | section=<SECTION> | confidence=<high|medium|low> | <one-line restatement>'
|
|
263
|
+
: 'PERSONA CANDIDATE | target=<FILE> | section=<SECTION> | confidence=<high|medium|low> | <one-line restatement> | source_fact_ids=[<the [P-...] ids of the facts you synthesized THIS trait from>]';
|
|
264
|
+
const citeBlock = isTranscript
|
|
265
|
+
? []
|
|
266
|
+
: [
|
|
267
|
+
'',
|
|
268
|
+
'CITING SOURCE FACTS (required on every line):',
|
|
269
|
+
' - Each input fact is headed `### [P-XXXXXXXX] <title>`. In source_fact_ids, list the [P-...] ids of the facts THIS trait was synthesized from — copy them EXACTLY as shown.',
|
|
270
|
+
' - CITE the facts; do NOT count anything and do NOT invent a number. The kit sums the cited facts\' real recurrence on its own.',
|
|
271
|
+
' - Cite ONLY ids that appear in the input. Never invent an id.',
|
|
272
|
+
' - Example: `… | source_fact_ids=[P-AAAAAAAA, P-BBBBBBBB]`',
|
|
273
|
+
];
|
|
234
274
|
return [
|
|
235
275
|
opener,
|
|
236
276
|
'',
|
|
237
277
|
jobLine,
|
|
238
278
|
'',
|
|
239
279
|
'For EACH cross-project fact, emit exactly one line, nothing else, in this EXACT format:',
|
|
240
|
-
|
|
280
|
+
outputFormat,
|
|
281
|
+
...citeBlock,
|
|
241
282
|
'',
|
|
242
283
|
'Routing:',
|
|
243
284
|
' - target=HABITS.md → working-style habits. sections: Iteration Cadence | Destructive Operations | Communication Style',
|
|
@@ -264,12 +305,77 @@ export function parsePersonaCandidates(outputText) {
|
|
|
264
305
|
target: target.trim(),
|
|
265
306
|
section: section.trim(),
|
|
266
307
|
confidence: confidence.trim().toLowerCase(),
|
|
267
|
-
text
|
|
308
|
+
...splitSourceFactIds(text.trim()),
|
|
268
309
|
});
|
|
269
310
|
}
|
|
270
311
|
return candidates;
|
|
271
312
|
}
|
|
272
313
|
|
|
314
|
+
// The cite-and-sum suffix the classifier appends to a candidate line (151.3,
|
|
315
|
+
// ADR-0016 / D-230): `… | source_fact_ids=[P-AAAAAAAA, P-BBBBBBBB]`. It cites the
|
|
316
|
+
// PROJECT facts the trait was synthesized from — NOT a recurrence COUNT (5/5
|
|
317
|
+
// bridge-study systems reject the LLM counting; the LLM groups, code counts).
|
|
318
|
+
// Optional + trailing so a line WITHOUT it still parses (back-compat: the
|
|
319
|
+
// transcript path has no fact ids to cite, and an older classifier prompt omits
|
|
320
|
+
// it) — such a candidate gets `sourceFactIds: []` and can only promote via the
|
|
321
|
+
// explicit-imperative (confidence=high) fast-path.
|
|
322
|
+
const SOURCE_FACT_IDS_RE = /\s*\|\s*source_fact_ids=\[([^\]]*)\]\s*$/;
|
|
323
|
+
|
|
324
|
+
// Split a candidate's free-text tail into {text, sourceFactIds}. The ids are
|
|
325
|
+
// peeled off the END (the classifier appends them last), leaving the human-
|
|
326
|
+
// readable restatement as `text`. Ids are UPPER-CASED (canonical ids are always
|
|
327
|
+
// uppercase `P-…`; a lowercase echo from Haiku — despite "copy EXACTLY" — would
|
|
328
|
+
// otherwise miss the Map lookup) + de-noised; the real corpus-resolution
|
|
329
|
+
// (rejecting hallucinations) happens in resolveRecurrenceSum.
|
|
330
|
+
function splitSourceFactIds(tail) {
|
|
331
|
+
const m = SOURCE_FACT_IDS_RE.exec(tail);
|
|
332
|
+
if (!m) return { text: tail, sourceFactIds: [] };
|
|
333
|
+
const text = tail.slice(0, m.index).trim();
|
|
334
|
+
const sourceFactIds = m[1]
|
|
335
|
+
.split(',')
|
|
336
|
+
.map((s) => s.trim().toUpperCase())
|
|
337
|
+
.filter(Boolean);
|
|
338
|
+
return { text, sourceFactIds };
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* THE cite-and-sum gate arithmetic (151.3, ADR-0016 / D-230). Given the ids the
|
|
343
|
+
* classifier CITED and the project corpus's real `{id → recurrence_count}` index,
|
|
344
|
+
* resolve the cited ids against the corpus (DROP any the LLM hallucinated) and SUM
|
|
345
|
+
* their real recurrence_count. That deterministic sum — never an LLM count — gates
|
|
346
|
+
* promotion. Repeated cited ids are de-duplicated (a fact cited twice counts once).
|
|
347
|
+
*
|
|
348
|
+
* PURE: no I/O. The factIndex is assembled by assembleProjectCorpus.
|
|
349
|
+
*
|
|
350
|
+
* @param {object} o
|
|
351
|
+
* @param {string[]} [o.sourceFactIds] ids the classifier cited
|
|
352
|
+
* @param {Map<string,number>} [o.factIndex] real corpus `id → recurrence_count`
|
|
353
|
+
* @returns {{sum:number, resolved:string[], rejected:string[]}}
|
|
354
|
+
*/
|
|
355
|
+
export function resolveRecurrenceSum({ sourceFactIds = [], factIndex } = {}) {
|
|
356
|
+
const index = factIndex instanceof Map ? factIndex : new Map();
|
|
357
|
+
const resolved = [];
|
|
358
|
+
const rejected = [];
|
|
359
|
+
const seen = new Set();
|
|
360
|
+
let sum = 0;
|
|
361
|
+
for (const rawId of sourceFactIds ?? []) {
|
|
362
|
+
const id = String(rawId).trim();
|
|
363
|
+
if (!id || seen.has(id)) continue;
|
|
364
|
+
seen.add(id);
|
|
365
|
+
if (index.has(id)) {
|
|
366
|
+
resolved.push(id);
|
|
367
|
+
const n = index.get(id);
|
|
368
|
+
// Floor again here (assembleProjectCorpus already floors): this helper is
|
|
369
|
+
// exported + pure, so a direct caller could pass a junk Map — a real fact is
|
|
370
|
+
// always worth ≥1.
|
|
371
|
+
sum += Number.isFinite(n) && n > 0 ? n : 1;
|
|
372
|
+
} else {
|
|
373
|
+
rejected.push(id); // hallucinated / not in the synthesis corpus → contributes 0
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
return { sum, resolved, rejected };
|
|
377
|
+
}
|
|
378
|
+
|
|
273
379
|
/**
|
|
274
380
|
* Run auto-persona synthesis: classify project-tier captured facts,
|
|
275
381
|
* auto-promote cross-project doctrine into the user tier (trust:medium).
|
|
@@ -315,9 +421,17 @@ export async function autoPersona(opts = {}) {
|
|
|
315
421
|
// Task 86c (D-44): the SessionEnd path classifies the RAW TRANSCRIPT (where a
|
|
316
422
|
// user's standing rule survives verbatim); the default 'facts' path classifies
|
|
317
423
|
// the distilled project corpus (whole-project sweep — weekly/manual).
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
424
|
+
// 151.3: the facts path ALSO returns a factIndex (id → recurrence_count) for the
|
|
425
|
+
// cite-and-sum gate. The transcript path has no citable fact ids — its candidates
|
|
426
|
+
// promote only via the explicit-imperative (confidence=high) fast-path, which is
|
|
427
|
+
// exactly the verbatim "from now on …" signal a transcript carries (D-44).
|
|
428
|
+
let corpus;
|
|
429
|
+
let factIndex = new Map();
|
|
430
|
+
if (source === 'transcript') {
|
|
431
|
+
corpus = assembleTranscriptWindow({ projectRoot });
|
|
432
|
+
} else {
|
|
433
|
+
({ corpus, factIndex } = assembleProjectCorpus({ projectRoot, userDir }));
|
|
434
|
+
}
|
|
321
435
|
if (!corpus) {
|
|
322
436
|
const reason = source === 'transcript' ? 'no-transcript' : 'no-facts';
|
|
323
437
|
return { action: 'skipped', reason, promoted: [], queued: [], duration_ms: Date.now() - t0 };
|
|
@@ -353,7 +467,17 @@ export async function autoPersona(opts = {}) {
|
|
|
353
467
|
});
|
|
354
468
|
}
|
|
355
469
|
|
|
356
|
-
|
|
470
|
+
// 151.3 (cite-and-sum, D-230): resolve each candidate's cited source_fact_ids
|
|
471
|
+
// against the corpus factIndex (rejecting hallucinated ids) and attach the
|
|
472
|
+
// arithmetic recurrence SUM. THAT sum — computed in code, never by the LLM —
|
|
473
|
+
// is the promotion gate inside promoteCandidatesToUserTier (a medium/inferred
|
|
474
|
+
// trait promotes iff its cited facts recur ≥ PROMOTE_THRESHOLD). The transcript
|
|
475
|
+
// path's factIndex is empty, so its candidates carry sum 0 and promote only via
|
|
476
|
+
// the confidence=high fast-path (the verbatim stated rule a transcript holds).
|
|
477
|
+
const candidates = parsePersonaCandidates(result?.outputText).map((c) => ({
|
|
478
|
+
...c,
|
|
479
|
+
recurrenceSum: resolveRecurrenceSum({ sourceFactIds: c.sourceFactIds, factIndex }).sum,
|
|
480
|
+
}));
|
|
357
481
|
const { promoted, queued, superseded, conflicts, reviewQueuePath } = promoteCandidatesToUserTier({
|
|
358
482
|
candidates,
|
|
359
483
|
userDir,
|
|
@@ -531,12 +655,29 @@ export function promoteCandidatesToUserTier({ candidates, userDir, now, settings
|
|
|
531
655
|
const conflicts = [];
|
|
532
656
|
for (const c of candidates) {
|
|
533
657
|
if (!VALID_TARGETS.has(c.target)) continue; // defensive: drop bad routing
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
658
|
+
// 151.3 — THE RECURRENCE GATE (ADR-0016, D-230), replacing the pure form
|
|
659
|
+
// gate. A candidate promotes if EITHER:
|
|
660
|
+
// (a) confidence=high — an EXPLICITLY-STATED standing rule (the fast-path:
|
|
661
|
+
// a user-attested rule promotes immediately, recurrence irrelevant); OR
|
|
662
|
+
// (b) its cited facts' recurrence SUM ≥ PROMOTE_THRESHOLD — a DEMONSTRATED-
|
|
663
|
+
// but-not-declared trait that has recurred enough to be durable.
|
|
664
|
+
// (b) is the Hole-A fix: pre-151.3 a demonstrated philosophy stranded here
|
|
665
|
+
// because it lacked "always/never" phrasing (D-177). The sum is arithmetic on
|
|
666
|
+
// real recurrence_count (the LLM cites; code counts). recurrenceSum is attached
|
|
667
|
+
// by autoPersona; callers that don't attach it (inline/explicit/drain) leave it
|
|
668
|
+
// undefined → those paths rely on the confidence=high clause, unchanged.
|
|
669
|
+
const recurrenceSum = c.recurrenceSum ?? 0;
|
|
670
|
+
const promotesByRecurrence = recurrenceSum >= PROMOTE_THRESHOLD;
|
|
671
|
+
// Door 4: name WHY this trait promoted, so a debugger can tell a recurrence-
|
|
672
|
+
// gated promotion (the new 151.3 path) from the explicit-imperative fast-path.
|
|
673
|
+
const promotedVia = c.confidence === 'high' ? 'confidence-high' : `recurrence-${recurrenceSum}`;
|
|
674
|
+
if (c.confidence !== 'high' && !promotesByRecurrence) {
|
|
675
|
+
// Not promotable: low/medium confidence AND under the recurrence threshold.
|
|
676
|
+
// Route to the review queue — returned in `queued` AND written to the durable
|
|
537
677
|
// queue FILE below (appendPersonaReviewQueue) so they survive past the
|
|
538
678
|
// response — the daily/weekly auto-drain (or a manual review) acts on them.
|
|
539
|
-
|
|
679
|
+
const reason = recurrenceSum > 0 ? `recurrence-${recurrenceSum}-below-${PROMOTE_THRESHOLD}` : `confidence-${c.confidence}`;
|
|
680
|
+
queued.push({ target: c.target, section: c.section, text: c.text, confidence: c.confidence, reason });
|
|
540
681
|
continue;
|
|
541
682
|
}
|
|
542
683
|
|
|
@@ -652,8 +793,10 @@ export function promoteCandidatesToUserTier({ candidates, userDir, now, settings
|
|
|
652
793
|
id: res.id,
|
|
653
794
|
reasonCode: REASON_CODES.PERSONA_PROMOTED,
|
|
654
795
|
// Carry `source` so the audit trail distinguishes an explicit
|
|
655
|
-
// `cmk lessons promote` (user-explicit) from an auto-synthesis promote
|
|
656
|
-
|
|
796
|
+
// `cmk lessons promote` (user-explicit) from an auto-synthesis promote, and
|
|
797
|
+
// `promotedVia` so a recurrence-gated promotion (151.3) is distinguishable
|
|
798
|
+
// from the explicit-imperative fast-path.
|
|
799
|
+
reasonText: `${c.target} § ${c.section} (${source}; via ${promotedVia})`,
|
|
657
800
|
paths: { after: res.path },
|
|
658
801
|
});
|
|
659
802
|
|
package/src/config-core.mjs
CHANGED
|
@@ -100,25 +100,27 @@ function coerce(raw) {
|
|
|
100
100
|
// (prototype-pollution resistance) and is analyzed by CodeQL in isolation, so
|
|
101
101
|
// it's tested at its own boundary, not only through configSet.
|
|
102
102
|
export function setDeep(obj, dottedKey, value) {
|
|
103
|
-
// Defense-in-depth: refuse prototype-polluting segments INSIDE the walker
|
|
104
|
-
// itself, not only at the public entry points (configGet/Set/ShowOrigin all
|
|
105
|
-
// pre-check via hasForbiddenSegment). A self-guarding utility stays safe even
|
|
106
|
-
// if a future caller forgets the guard — and it closes the CodeQL
|
|
107
|
-
// js/prototype-pollution-utility finding. Reuses the same helper as the entry
|
|
108
|
-
// points so the forbidden-segment set can't drift.
|
|
109
|
-
if (hasForbiddenSegment(dottedKey)) {
|
|
110
|
-
throw new Error(
|
|
111
|
-
`setDeep: forbidden key segment (${[...FORBIDDEN_KEYS].join('/')}) — prototype-pollution guard`,
|
|
112
|
-
);
|
|
113
|
-
}
|
|
114
103
|
const parts = dottedKey.split('.');
|
|
115
104
|
let cur = obj;
|
|
116
|
-
for (let i = 0; i < parts.length
|
|
105
|
+
for (let i = 0; i < parts.length; i++) {
|
|
117
106
|
const p = parts[i];
|
|
118
|
-
|
|
119
|
-
|
|
107
|
+
// Defense-in-depth: refuse prototype-polluting segments at the assignment
|
|
108
|
+
// site, inside the walk loop. CodeQL's js/prototype-pollution-utility
|
|
109
|
+
// recognizes a sanitizer only as DIRECT `===` comparisons against the
|
|
110
|
+
// dangerous names (per its query-help example `if (key === "__proto__" ||
|
|
111
|
+
// key === "constructor") ...`), NOT a Set/helper lookup — so this is spelled
|
|
112
|
+
// out explicitly. (FORBIDDEN_KEYS keeps the same names for the entry-point
|
|
113
|
+
// guards; this in-loop form is what the static analyzer reads.)
|
|
114
|
+
if (p === '__proto__' || p === 'constructor' || p === 'prototype') {
|
|
115
|
+
throw new Error(`setDeep: forbidden key segment (${p}) — prototype-pollution guard`);
|
|
116
|
+
}
|
|
117
|
+
if (i === parts.length - 1) {
|
|
118
|
+
cur[p] = value;
|
|
119
|
+
} else {
|
|
120
|
+
if (cur[p] == null || typeof cur[p] !== 'object' || Array.isArray(cur[p])) cur[p] = {};
|
|
121
|
+
cur = cur[p];
|
|
122
|
+
}
|
|
120
123
|
}
|
|
121
|
-
cur[parts[parts.length - 1]] = value;
|
|
122
124
|
}
|
|
123
125
|
|
|
124
126
|
/**
|
package/src/conflict-queue.mjs
CHANGED
|
@@ -54,6 +54,8 @@ import { hashContent } from './content-hash.mjs';
|
|
|
54
54
|
import { nowIso, appendAuditEntry, REASON_CODES } from './audit-log.mjs';
|
|
55
55
|
import { ERROR_CATEGORIES, errorResult } from './result-shapes.mjs';
|
|
56
56
|
import { generateId } from '@lh8ppl/cmk-canonicalize';
|
|
57
|
+
import { applyTrustSignal } from './trust-signal.mjs';
|
|
58
|
+
import { openIndexDb } from './index-db.mjs';
|
|
57
59
|
|
|
58
60
|
// Trust ordering. Higher number = higher trust.
|
|
59
61
|
const TRUST_LEVELS = Object.freeze({
|
|
@@ -825,6 +827,22 @@ export function mergeScratchpadBullets({
|
|
|
825
827
|
},
|
|
826
828
|
});
|
|
827
829
|
|
|
830
|
+
// Task 151.12 — merge-both SUPERSEDES both originals → DAMPEN their trust_score
|
|
831
|
+
// (the supersession passive signal; closes the merge-path gap 151.8 deferred).
|
|
832
|
+
// Best-effort overlay — never breaks the merge. One shared index-db handle for
|
|
833
|
+
// both dampens (avoid open/close per id).
|
|
834
|
+
try {
|
|
835
|
+
const sigDb = openIndexDb({ projectRoot });
|
|
836
|
+
try {
|
|
837
|
+
applyTrustSignal({ id: idA, event: 'dampen', db: sigDb });
|
|
838
|
+
applyTrustSignal({ id: idB, event: 'dampen', db: sigDb });
|
|
839
|
+
} finally {
|
|
840
|
+
sigDb.close();
|
|
841
|
+
}
|
|
842
|
+
} catch {
|
|
843
|
+
// best-effort: the trust dampen must never break the merge.
|
|
844
|
+
}
|
|
845
|
+
|
|
828
846
|
return {
|
|
829
847
|
action: 'merged',
|
|
830
848
|
id: newId,
|
package/src/graduation.mjs
CHANGED
|
@@ -177,3 +177,42 @@ export function graduateForCapRelief({
|
|
|
177
177
|
const out = lines.filter((_, i) => !removeIdx.has(i)).join('\n');
|
|
178
178
|
return { text: out, graduated };
|
|
179
179
|
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Mechanical cap relief for the USER-TIER PERSONA (Task 151.4, ADR-0016 §20.3).
|
|
183
|
+
*
|
|
184
|
+
* The persona (USER/HABITS/LESSONS.md) must NEVER graduate its high-trust bullets
|
|
185
|
+
* out to un-injected `fragments/` — that strands a promoted trait so it vanishes
|
|
186
|
+
* at cold-open (Hole B). Instead, reclaim bytes WITHOUT dropping any content:
|
|
187
|
+
* - trim trailing whitespace per line,
|
|
188
|
+
* - collapse any run of ≥2 blank lines down to a single blank line.
|
|
189
|
+
* No bullet is ever removed. This is a best-effort byte reclaim (the load-cap-not-
|
|
190
|
+
* write-cap invariant, D-61, already lets the file exceed the inject budget when
|
|
191
|
+
* relief isn't enough; the snapshot load-cap + sweep order (151.5) then keeps the
|
|
192
|
+
* high-trust traits injected). A genuine LLM tighter-rewrite is Task 95 (off the
|
|
193
|
+
* synchronous append hot path — an inline Haiku call here would be a composition +
|
|
194
|
+
* latency hazard). PURE: no I/O. Idempotent (re-condensing tight text is a no-op).
|
|
195
|
+
*
|
|
196
|
+
* @param {string} text the scratchpad content
|
|
197
|
+
* @returns {string} the condensed content (same bullets, fewer bytes)
|
|
198
|
+
*/
|
|
199
|
+
export function condenseScratchpadForCapRelief(text) {
|
|
200
|
+
// CRLF-tolerant (Task 139): split on /\r?\n/ so a Windows-authored persona file
|
|
201
|
+
// condenses too — a plain split('\n') would leave a trailing '\r' on every line,
|
|
202
|
+
// making blank-run collapse + trailing-trim silent no-ops. Rejoin with '\n'
|
|
203
|
+
// (LF), matching how consolidate()/writeBullet() already normalize line endings.
|
|
204
|
+
const lines = String(text ?? '').split(/\r?\n/);
|
|
205
|
+
const out = [];
|
|
206
|
+
let blankRun = 0;
|
|
207
|
+
for (const line of lines) {
|
|
208
|
+
const trimmed = line.replace(/[ \t]+$/, ''); // trailing whitespace
|
|
209
|
+
if (trimmed === '') {
|
|
210
|
+
blankRun += 1;
|
|
211
|
+
if (blankRun > 1) continue; // collapse ≥2 blanks → 1
|
|
212
|
+
} else {
|
|
213
|
+
blankRun = 0;
|
|
214
|
+
}
|
|
215
|
+
out.push(trimmed);
|
|
216
|
+
}
|
|
217
|
+
return out.join('\n');
|
|
218
|
+
}
|
package/src/heat.mjs
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
// heat.mjs — the capped-recurrence promotion score (Task 151.2, ADR-0016).
|
|
2
|
+
//
|
|
3
|
+
// A fact earns promotion to the persona by RECURRENCE, not by phrasing. The
|
|
4
|
+
// score blends how often a fact has re-surfaced (the earned signal, CAPPED) with
|
|
5
|
+
// how recently (a lazy exponential decay). The cap is load-bearing: recurrence
|
|
6
|
+
// is a tie-breaker, never the driver — a noisy-but-trivial fact must never
|
|
7
|
+
// outrank a once-stated durable decision.
|
|
8
|
+
//
|
|
9
|
+
// heat = min(recurrence_count, RECUR_CAP) * W_REC + exp(-Δhours / τ)
|
|
10
|
+
//
|
|
11
|
+
// Shapes verified against real code (the 7-system study, D-228):
|
|
12
|
+
// • recency = exp(-Δhours/τ), τ=24h — MemoryOS `compute_recency`
|
|
13
|
+
// • the cap = min(count, ceiling) — MemOS `min(leaf_count*2, 20)`
|
|
14
|
+
// • threshold 3 — memclaw `min_cluster_size` (diversity
|
|
15
|
+
// gate dropped: single-user, not a fleet)
|
|
16
|
+
//
|
|
17
|
+
// PURE: no I/O, no cron. Recency is computed AT READ from `now` + `lastAt`, so
|
|
18
|
+
// there is no background job mutating a stored heat value (D-169 — automatic,
|
|
19
|
+
// ritual-free). The caller passes `recurrence_count` (frontmatter, Task 151.1)
|
|
20
|
+
// and the fact's last-surfaced timestamp.
|
|
21
|
+
|
|
22
|
+
// The recurrence cap. Past this many recurrences, more adds nothing — recurrence
|
|
23
|
+
// is a tie-breaker, not a runaway driver (MemOS). Chosen so the recurrence band
|
|
24
|
+
// (0…RECUR_CAP·W_REC) is comparable to the recency band (0…1), keeping neither
|
|
25
|
+
// signal able to bury the other.
|
|
26
|
+
export const RECUR_CAP = 10;
|
|
27
|
+
|
|
28
|
+
// Weight on the (capped) recurrence term. RECUR_CAP·W_REC ≈ 1.0 so a maxed-out
|
|
29
|
+
// recurrence contributes about the same as a brand-new recency — the two signals
|
|
30
|
+
// are balanced, then the cap prevents recurrence from dominating.
|
|
31
|
+
export const W_REC = 0.1;
|
|
32
|
+
|
|
33
|
+
// Recency half-life-ish constant (hours). exp(-Δh/τ): at τ hours the recency
|
|
34
|
+
// term is e^-1 ≈ 0.368 of its fresh value. MemoryOS uses τ=24.
|
|
35
|
+
export const TAU_HOURS = 24;
|
|
36
|
+
|
|
37
|
+
// Promotion threshold: a fact promotes to the persona at this many recurrences
|
|
38
|
+
// (memclaw min-cluster-size; diversity gate dropped for single-user). "I've
|
|
39
|
+
// reached this same shape 3× → it's durable."
|
|
40
|
+
export const PROMOTE_THRESHOLD = 3;
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Compute a fact's promotion heat. Pure — no I/O.
|
|
44
|
+
*
|
|
45
|
+
* @param {object} o
|
|
46
|
+
* @param {number} o.recurrenceCount how many times this fact has surfaced (≥1)
|
|
47
|
+
* @param {string|null} [o.lastAt] ISO timestamp the fact last surfaced; null/garbage → recency 0
|
|
48
|
+
* @param {number} [o.now] ms epoch "now" (default Date.now()); injectable for tests
|
|
49
|
+
* @returns {number} heat score (recurrence band + recency band)
|
|
50
|
+
*/
|
|
51
|
+
export function computeHeat({ recurrenceCount = 1, lastAt = null, now = Date.now() } = {}) {
|
|
52
|
+
const count = Number.isFinite(recurrenceCount) && recurrenceCount > 0 ? recurrenceCount : 1;
|
|
53
|
+
const recurrenceTerm = Math.min(count, RECUR_CAP) * W_REC;
|
|
54
|
+
|
|
55
|
+
let recencyTerm = 0;
|
|
56
|
+
if (lastAt) {
|
|
57
|
+
const t = Date.parse(lastAt);
|
|
58
|
+
if (Number.isFinite(t)) {
|
|
59
|
+
const deltaHours = Math.max(0, (now - t) / 3_600_000); // clamp future skew → ≤1
|
|
60
|
+
recencyTerm = Math.exp(-deltaHours / TAU_HOURS);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return recurrenceTerm + recencyTerm;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Does this fact clear the promotion threshold? (recurrence-only — the gate is
|
|
68
|
+
* "seen ≥ N times", recency only orders WITHIN the promotable set.)
|
|
69
|
+
*
|
|
70
|
+
* @param {number} recurrenceCount
|
|
71
|
+
* @returns {boolean}
|
|
72
|
+
*/
|
|
73
|
+
export function isPromotable(recurrenceCount) {
|
|
74
|
+
return Number.isFinite(recurrenceCount) && recurrenceCount >= PROMOTE_THRESHOLD;
|
|
75
|
+
}
|
package/src/index-db.mjs
CHANGED
|
@@ -68,6 +68,12 @@ CREATE TABLE IF NOT EXISTS observations (
|
|
|
68
68
|
body TEXT NOT NULL,
|
|
69
69
|
write_source TEXT NOT NULL,
|
|
70
70
|
trust TEXT NOT NULL,
|
|
71
|
+
-- Task 151.6 (ADR-0016 §20.2): the evolving PROTECTION field — a FLOAT seeded
|
|
72
|
+
-- from source (user-explicit > auto-extract) on (re)index, then moved by passive
|
|
73
|
+
-- outcomes (151.7/151.8). Lives ONLY here (the rebuildable index), never in
|
|
74
|
+
-- committed frontmatter (D-218). DEFAULT 0.5 so a migrated pre-151.6 row + any
|
|
75
|
+
-- insert that omits it gets a sane medium seed until the next full reindex.
|
|
76
|
+
trust_score REAL NOT NULL DEFAULT 0.5,
|
|
71
77
|
created_at INTEGER NOT NULL,
|
|
72
78
|
superseded_by TEXT REFERENCES observations(id),
|
|
73
79
|
deleted_at INTEGER
|
|
@@ -189,5 +195,21 @@ export function openIndexDb({ projectRoot, dbPath } = {}) {
|
|
|
189
195
|
db.pragma('synchronous = NORMAL');
|
|
190
196
|
// Apply schema (idempotent CREATE IF NOT EXISTS).
|
|
191
197
|
db.exec(INDEX_DB_SCHEMA);
|
|
198
|
+
// Task 151.6: non-destructive column migration. CREATE TABLE IF NOT EXISTS does
|
|
199
|
+
// NOT add a new column to a pre-existing table, so an index built before 151.6
|
|
200
|
+
// would lack `trust_score`. Add it in place (ALTER preserves all rows — the
|
|
201
|
+
// index is rebuildable, but we don't force a full rebuild just for a column).
|
|
202
|
+
// The next full reindex reseeds real values; until then existing rows carry the
|
|
203
|
+
// DEFAULT 0.5 (medium). Idempotent: skip if the column already exists.
|
|
204
|
+
migrateAddColumn(db, 'observations', 'trust_score', 'REAL NOT NULL DEFAULT 0.5');
|
|
192
205
|
return db;
|
|
193
206
|
}
|
|
207
|
+
|
|
208
|
+
// Add `column` to `table` if it isn't already present (idempotent). SQLite has no
|
|
209
|
+
// "ADD COLUMN IF NOT EXISTS", so we check PRAGMA table_info first — a duplicate
|
|
210
|
+
// ALTER would throw.
|
|
211
|
+
function migrateAddColumn(db, table, column, definition) {
|
|
212
|
+
const cols = db.prepare(`PRAGMA table_info(${table})`).all();
|
|
213
|
+
if (cols.some((c) => c.name === column)) return;
|
|
214
|
+
db.exec(`ALTER TABLE ${table} ADD COLUMN ${column} ${definition}`);
|
|
215
|
+
}
|