@lh8ppl/claude-memory-kit 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -6
- package/bin/cmk-capture-prompt.mjs +17 -17
- package/bin/cmk-capture-turn.mjs +22 -21
- package/bin/cmk-compress-session.mjs +2 -2
- package/bin/cmk-inject-context.mjs +11 -11
- package/bin/cmk-observe-edit.mjs +17 -16
- package/package.json +1 -1
- package/src/audit-log.mjs +1 -0
- package/src/auto-extract.mjs +258 -6
- package/src/auto-persona.mjs +40 -8
- package/src/capture-turn.mjs +48 -1
- package/src/compress-session.mjs +89 -26
- package/src/compressor.mjs +1 -1
- package/src/conflict-queue.mjs +14 -0
- package/src/doctor.mjs +3 -3
- package/src/forget.mjs +29 -0
- package/src/graduation.mjs +1 -1
- package/src/index-rebuild.mjs +42 -0
- package/src/inject-context.mjs +5 -1
- package/src/install.mjs +29 -6
- package/src/lazy-compress.mjs +58 -9
- package/src/mcp-server.mjs +353 -124
- package/src/merge-facts.mjs +4 -0
- package/src/persona-portability.mjs +24 -1
- package/src/read-core.mjs +87 -0
- package/src/register-crons.mjs +64 -33
- package/src/remember-core.mjs +91 -0
- package/src/review-queue.mjs +13 -0
- package/src/rich-fact.mjs +46 -0
- package/src/settings-hooks.mjs +56 -2
- package/src/subcommands.mjs +419 -182
- package/src/weekly-curate.mjs +5 -0
- package/src/write-fact.mjs +25 -1
- package/template/.claude/skills/memory-write/SKILL.md +52 -35
- package/template/.gitignore.fragment +9 -3
- package/template/CLAUDE.md.template +2 -2
- package/template/docs/journey/journey-log.md.template +1 -1
package/src/auto-extract.mjs
CHANGED
|
@@ -48,8 +48,11 @@ import {
|
|
|
48
48
|
appendFileSync,
|
|
49
49
|
} from 'node:fs';
|
|
50
50
|
import { join, dirname } from 'node:path';
|
|
51
|
+
import { createHash } from 'node:crypto';
|
|
51
52
|
import { generateId } from '@lh8ppl/cmk-canonicalize';
|
|
52
53
|
import { memoryWrite } from './memory-write.mjs';
|
|
54
|
+
import { writeFact } from './write-fact.mjs';
|
|
55
|
+
import { buildRichFactBody, slugifyFact } from './rich-fact.mjs';
|
|
53
56
|
import { HaikuTimeoutError } from './compressor.mjs';
|
|
54
57
|
import { pidIsAlive } from './lock-discipline.mjs';
|
|
55
58
|
import { nowIso } from './audit-log.mjs';
|
|
@@ -284,6 +287,21 @@ export function buildExtractionInstructions() {
|
|
|
284
287
|
'',
|
|
285
288
|
'Note: assistant-origin candidates are auto-demoted one trust level before routing (HIGH → MEDIUM → LOW → discarded). This is intentional — assistant inferences need user review. Emit your honest trust assessment; the routing layer handles demotion.',
|
|
286
289
|
'',
|
|
290
|
+
'ALSO — rich fact files (durable project KNOWLEDGE). This is a SEPARATE output from the terse TRUST_ lines. When a turn reveals a durable, substantive piece of project knowledge worth a FULL record — a setup/configuration fact (trigger 3), a project convention (trigger 4), a completed multi-step workflow worth recording (trigger 5), or a tool quirk/workaround (trigger 6) — emit a BEGIN_FACT block (below) INSTEAD OF a terse TRUST_ line for it. Keep terse TRUST_ lines for the LIGHTER signals: user corrections and discovered preferences (triggers 1–2) and active threads. Emit each fact EITHER as a rich BEGIN_FACT block OR as a terse TRUST_ line — NEVER both.',
|
|
291
|
+
'Format (one block per durable fact):',
|
|
292
|
+
' BEGIN_FACT',
|
|
293
|
+
' type: project',
|
|
294
|
+
' title: <short Title-Case headline, ≤ 80 chars>',
|
|
295
|
+
' body: <what is true; if it has parts, give a short labelled markdown breakdown over multiple lines, NOT one vague sentence>',
|
|
296
|
+
' why: <why it is true / why it matters — the rationale a future session needs>',
|
|
297
|
+
' how: <how the next session should apply it>',
|
|
298
|
+
' END_FACT',
|
|
299
|
+
'Rules for BEGIN_FACT blocks:',
|
|
300
|
+
' - body may span multiple lines (markdown bullets are encouraged when the knowledge has parts — make the saved fact genuinely useful to a future session, at least as detailed as a careful hand-written note). Write it as plain markdown on the lines after `body:` — do NOT use a YAML block scalar (`|` or `>`).',
|
|
301
|
+
' - title AND body are required; why/how are strongly preferred but optional. type defaults to project.',
|
|
302
|
+
' - Do NOT invent facts; synthesize only what the turn shows. Never put a secret, token, password, or key in a block.',
|
|
303
|
+
' - These facts are saved automatically (no review step), so be selective: only genuinely durable knowledge, at most a few per turn.',
|
|
304
|
+
'',
|
|
287
305
|
'ALSO — cross-project doctrine. This is a REQUIRED, PER-FACT pass, separate from the TRUST_ lines above. Re-scan the SAME turn for EVERY fact that expresses how this user works in ALL their projects (tooling habits, how they structure their work, communication / process style — NOT specifics that belong to this ONE project, like a particular value, name, or detail that would not carry to their other projects). **For EACH such cross-project fact, emit its OWN PERSONA CANDIDATE line — one line per fact. If the turn states THREE cross-project rules, emit THREE PERSONA CANDIDATE lines. Never collapse several rules into one line, and never skip a rule because the turn is busy or already has TRUST_ lines.** Format (one line per cross-project fact):',
|
|
288
306
|
' PERSONA CANDIDATE | target=<HABITS.md|LESSONS.md|USER.md> | section=<Section> | confidence=<high|medium|low> | <one-line restatement>',
|
|
289
307
|
' - HABITS.md → sections: Iteration Cadence | Destructive Operations | Communication Style',
|
|
@@ -310,7 +328,11 @@ function buildExtractionPrompt({ userTurn, assistantTurn, dedupContext }) {
|
|
|
310
328
|
return sections.join('\n');
|
|
311
329
|
}
|
|
312
330
|
|
|
313
|
-
|
|
331
|
+
// Exported for the live-Haiku smoke (spawn-smoke-auto-extract-rich.test.js),
|
|
332
|
+
// which asserts the enriched prompt still elicits parseable terse OR rich
|
|
333
|
+
// output from real Haiku. The terse format is the extraction prompt's contract,
|
|
334
|
+
// same as parseRichFacts above.
|
|
335
|
+
export function parseCandidates(haikuOutput) {
|
|
314
336
|
if (!haikuOutput || typeof haikuOutput !== 'string') return [];
|
|
315
337
|
const lines = haikuOutput.split('\n');
|
|
316
338
|
const candidates = [];
|
|
@@ -328,6 +350,127 @@ function parseCandidates(haikuOutput) {
|
|
|
328
350
|
return candidates;
|
|
329
351
|
}
|
|
330
352
|
|
|
353
|
+
// --- Rich-fact parser (Task 103) ------------------------------------
|
|
354
|
+
|
|
355
|
+
// Durable project KNOWLEDGE (the six triggers' config / convention / workflow /
|
|
356
|
+
// quirk facts) is emitted by Haiku as a fenced block, parsed here into the
|
|
357
|
+
// fields writeFact() needs. Lives next to parseCandidates + buildExtraction-
|
|
358
|
+
// Instructions — the format and its parser stay together (same as the terse
|
|
359
|
+
// TRUST_ surface). See design §6.4.
|
|
360
|
+
//
|
|
361
|
+
// BEGIN_FACT
|
|
362
|
+
// type: project
|
|
363
|
+
// title: <short title>
|
|
364
|
+
// body: <summary; MAY continue as markdown bullets on following lines>
|
|
365
|
+
// why: <rationale>
|
|
366
|
+
// how: <how to apply>
|
|
367
|
+
// END_FACT
|
|
368
|
+
//
|
|
369
|
+
// A field's value continues across lines until the next recognized key or the
|
|
370
|
+
// block close — so `body` can hold a multi-line structured breakdown (the
|
|
371
|
+
// native-parity bar). type defaults to 'project' when absent/invalid; a block
|
|
372
|
+
// missing title OR body is skipped (writeFact requires both).
|
|
373
|
+
const RICH_FACT_VALID_TYPES = new Set(['user', 'feedback', 'project', 'reference']);
|
|
374
|
+
const RICH_FACT_KEYS = new Set(['type', 'title', 'body', 'why', 'how']);
|
|
375
|
+
// Defensive per-field cap so a runaway block can't write an unbounded fact body.
|
|
376
|
+
const RICH_FACT_FIELD_CAP = 4000;
|
|
377
|
+
|
|
378
|
+
// Match a `key: value` field line. String-based (not a regex) — deterministically
|
|
379
|
+
// linear, no backtracking surface. Semantics: the key must be at the START of
|
|
380
|
+
// the line (no leading whitespace, mirroring an `^key` anchor), with optional
|
|
381
|
+
// whitespace before the colon. Returns {key, value} or null (a continuation /
|
|
382
|
+
// non-key line, e.g. a `- bullet:` inside a body).
|
|
383
|
+
function matchRichFactKey(line) {
|
|
384
|
+
const idx = line.indexOf(':');
|
|
385
|
+
if (idx <= 0) return null;
|
|
386
|
+
const keyPart = line.slice(0, idx);
|
|
387
|
+
if (keyPart.trimStart().length !== keyPart.length) return null; // leading ws → not a key
|
|
388
|
+
const key = keyPart.trimEnd().toLowerCase();
|
|
389
|
+
if (!RICH_FACT_KEYS.has(key)) return null;
|
|
390
|
+
return { key, value: line.slice(idx + 1).trimStart() };
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
// A YAML block-scalar indicator as a field's entire first-line value (`|`,
|
|
394
|
+
// `|-`, `>`, `>+`, `|2`, …). Live Haiku formats a multi-line body as `body: |`
|
|
395
|
+
// then indents the content — we must not keep the literal `|` or the indent.
|
|
396
|
+
const BLOCK_SCALAR_RE = /^[|>][+-]?\d*$/;
|
|
397
|
+
|
|
398
|
+
// Normalize a parsed field value: drop a leading block-scalar indicator line,
|
|
399
|
+
// then dedent (strip the common leading whitespace the block scalar adds). A
|
|
400
|
+
// plain single-line value passes through untouched.
|
|
401
|
+
function cleanFieldValue(raw) {
|
|
402
|
+
const lines = (raw ?? '').split('\n');
|
|
403
|
+
if (lines.length && BLOCK_SCALAR_RE.test(lines[0].trim())) lines.shift();
|
|
404
|
+
const indents = lines
|
|
405
|
+
.filter((l) => l.trim() !== '')
|
|
406
|
+
.map((l) => (l.match(/^[ \t]*/)?.[0].length ?? 0));
|
|
407
|
+
const minIndent = indents.length ? Math.min(...indents) : 0;
|
|
408
|
+
return lines.map((l) => l.slice(minIndent)).join('\n').trim();
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
function parseRichFactBlock(blockLines) {
|
|
412
|
+
const fields = {};
|
|
413
|
+
let currentKey = null;
|
|
414
|
+
for (const line of blockLines) {
|
|
415
|
+
const m = matchRichFactKey(line);
|
|
416
|
+
if (m) {
|
|
417
|
+
currentKey = m.key;
|
|
418
|
+
fields[currentKey] = m.value; // first-line value (may be '' or a `|` scalar)
|
|
419
|
+
} else if (currentKey) {
|
|
420
|
+
// Continuation of the current field — multi-line body / why / how.
|
|
421
|
+
fields[currentKey] += '\n' + line;
|
|
422
|
+
}
|
|
423
|
+
// A non-key line before any key is ignored.
|
|
424
|
+
}
|
|
425
|
+
const title = cleanFieldValue(fields.title);
|
|
426
|
+
const body = cleanFieldValue(fields.body);
|
|
427
|
+
if (!title || !body) return null; // writeFact requires both
|
|
428
|
+
let type = cleanFieldValue(fields.type).toLowerCase();
|
|
429
|
+
if (!RICH_FACT_VALID_TYPES.has(type)) type = 'project';
|
|
430
|
+
const why = cleanFieldValue(fields.why);
|
|
431
|
+
const how = cleanFieldValue(fields.how);
|
|
432
|
+
return {
|
|
433
|
+
type,
|
|
434
|
+
title: title.slice(0, RICH_FACT_FIELD_CAP),
|
|
435
|
+
body: body.slice(0, RICH_FACT_FIELD_CAP),
|
|
436
|
+
why: why ? why.slice(0, RICH_FACT_FIELD_CAP) : '',
|
|
437
|
+
how: how ? how.slice(0, RICH_FACT_FIELD_CAP) : '',
|
|
438
|
+
};
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
// Exported for direct unit-testing (cli-rich-fact.test.js) — the BEGIN_FACT
|
|
442
|
+
// format is the extraction prompt's contract, pinned independently of a live
|
|
443
|
+
// Haiku call.
|
|
444
|
+
export function parseRichFacts(haikuOutput) {
|
|
445
|
+
if (!haikuOutput || typeof haikuOutput !== 'string') return [];
|
|
446
|
+
const lines = haikuOutput.split('\n');
|
|
447
|
+
const facts = [];
|
|
448
|
+
let i = 0;
|
|
449
|
+
while (i < lines.length) {
|
|
450
|
+
if (lines[i].trim().toUpperCase() !== 'BEGIN_FACT') {
|
|
451
|
+
i++;
|
|
452
|
+
continue;
|
|
453
|
+
}
|
|
454
|
+
// Collect block lines until END_FACT, the next BEGIN_FACT (missing close —
|
|
455
|
+
// don't let it swallow the following block), or end-of-output.
|
|
456
|
+
i++;
|
|
457
|
+
const blockLines = [];
|
|
458
|
+
while (i < lines.length) {
|
|
459
|
+
const marker = lines[i].trim().toUpperCase();
|
|
460
|
+
if (marker === 'END_FACT') {
|
|
461
|
+
i++;
|
|
462
|
+
break;
|
|
463
|
+
}
|
|
464
|
+
if (marker === 'BEGIN_FACT') break; // close here; leave i for the outer loop
|
|
465
|
+
blockLines.push(lines[i]);
|
|
466
|
+
i++;
|
|
467
|
+
}
|
|
468
|
+
const fact = parseRichFactBlock(blockLines);
|
|
469
|
+
if (fact) facts.push(fact);
|
|
470
|
+
}
|
|
471
|
+
return facts;
|
|
472
|
+
}
|
|
473
|
+
|
|
331
474
|
// Demote assistant-origin candidates one trust level. User-origin
|
|
332
475
|
// candidates pass through unchanged — they're authoritative.
|
|
333
476
|
// Order: must run BEFORE applyRetainOverride so the override beats
|
|
@@ -456,6 +599,45 @@ function routeMedium({ candidate, projectRoot, ts }) {
|
|
|
456
599
|
return { action: 'queued', id, path: reviewPath };
|
|
457
600
|
}
|
|
458
601
|
|
|
602
|
+
// Route a rich fact to the project fact store via writeFact() (Task 103).
|
|
603
|
+
//
|
|
604
|
+
// Direct-to-fact-store (NOT the review queue the terse medium-trust path uses):
|
|
605
|
+
// the point of Task 103 is AUTOMATIC native-parity capture — native writes its
|
|
606
|
+
// fact files with no approval step, so parity requires the same. The fact store
|
|
607
|
+
// is searchable-but-not-full-trust-injected, writeFact already screens every
|
|
608
|
+
// write (home-path sanitize + Poison_Guard + schema + INDEX/reindex), and a
|
|
609
|
+
// later explicit `cmk remember` (trust:high) supersedes. See design §6.4.
|
|
610
|
+
//
|
|
611
|
+
// trust:medium / write_source:auto-extract marks it as a Haiku synthesis
|
|
612
|
+
// (proposal-grade), below the explicit-high tier. The body is built by the SAME
|
|
613
|
+
// rich-fact.mjs helper the explicit path uses, so an auto-extracted fact reads
|
|
614
|
+
// identically to a `cmk remember --why/--how` one.
|
|
615
|
+
function routeRichFact({ candidate, projectRoot, ts }) {
|
|
616
|
+
const body = buildRichFactBody({
|
|
617
|
+
text: candidate.body,
|
|
618
|
+
why: candidate.why,
|
|
619
|
+
how: candidate.how,
|
|
620
|
+
});
|
|
621
|
+
return writeFact({
|
|
622
|
+
tier: 'P',
|
|
623
|
+
type: candidate.type,
|
|
624
|
+
slug: slugifyFact(candidate.title),
|
|
625
|
+
title: candidate.title,
|
|
626
|
+
body,
|
|
627
|
+
writeSource: 'auto-extract',
|
|
628
|
+
trust: 'medium',
|
|
629
|
+
sourceFile: 'auto-extract',
|
|
630
|
+
sourceLine: 1,
|
|
631
|
+
// Content fingerprint for the provenance field — NOT a security context.
|
|
632
|
+
// Matches the kit's sha1-of-content convention (write-fact.mjs caller in
|
|
633
|
+
// subcommands.runRememberRich, memory-write.mjs); writeFact dedups by the
|
|
634
|
+
// content-addressed id, this is just source_sha1. // NOSONAR
|
|
635
|
+
sourceSha1: createHash('sha1').update(body).digest('hex'), // NOSONAR
|
|
636
|
+
createdAt: ts,
|
|
637
|
+
projectRoot,
|
|
638
|
+
});
|
|
639
|
+
}
|
|
640
|
+
|
|
459
641
|
// --- NDJSON extract.log ---------------------------------------------
|
|
460
642
|
|
|
461
643
|
function writeExtractLogEntry({ projectRoot, ts, entry }) {
|
|
@@ -614,7 +796,7 @@ export async function runAutoExtract({
|
|
|
614
796
|
// duration ≈ 25000ms = hitting the cap, not finishing) → automatic
|
|
615
797
|
// capture + persona promotion (F2) silently never ran. This call is
|
|
616
798
|
// DETACHED (fire-and-forget, never blocks the session), so a generous
|
|
617
|
-
// ceiling is free. Live-test finding (2026-06-01,
|
|
799
|
+
// ceiling is free. Live-test finding (2026-06-01, live-test-4 baseline).
|
|
618
800
|
timeoutMs: 90_000,
|
|
619
801
|
});
|
|
620
802
|
// Touch the cooldown marker IMMEDIATELY after the Haiku call
|
|
@@ -668,6 +850,22 @@ export async function runAutoExtract({
|
|
|
668
850
|
candidates = applyRetainOverride(candidates, retainSegments);
|
|
669
851
|
candidates = dedupByCanonicalId(candidates);
|
|
670
852
|
|
|
853
|
+
// Task 103 — rich fact synthesis on the native-immune Stop-hook path. The
|
|
854
|
+
// SAME Haiku output may carry BEGIN_FACT blocks (durable project KNOWLEDGE)
|
|
855
|
+
// alongside the terse TRUST_ lines; route them to the fact store via
|
|
856
|
+
// writeFact (richer + searchable). No second LLM call — same outputText.
|
|
857
|
+
const richFacts = parseRichFacts(haikuResult.outputText);
|
|
858
|
+
// XOR safety net: the prompt asks Haiku to emit a fact as EITHER a rich
|
|
859
|
+
// block OR a terse line, never both. If it does both for the same fact, the
|
|
860
|
+
// rich block wins — drop any terse candidate whose canonical id matches a
|
|
861
|
+
// rich fact's body, so it isn't ALSO written as a MEMORY.md bullet. (Keyed
|
|
862
|
+
// on the rich fact's raw `body` headline vs the terse `text` — the prompt
|
|
863
|
+
// enforces the semantic XOR; this catches the exact-restatement case.)
|
|
864
|
+
if (richFacts.length > 0) {
|
|
865
|
+
const richIds = new Set(richFacts.map((f) => generateId('P', f.body)));
|
|
866
|
+
candidates = candidates.filter((c) => !richIds.has(generateId('P', c.text)));
|
|
867
|
+
}
|
|
868
|
+
|
|
671
869
|
// Task 61 — inline cross-project promotion. The SAME Haiku output may
|
|
672
870
|
// carry PERSONA CANDIDATE lines (cross-project doctrine); promote them to
|
|
673
871
|
// the user tier THIS run (vs the weekly auto-persona janitor). No second
|
|
@@ -719,10 +917,11 @@ export async function runAutoExtract({
|
|
|
719
917
|
}
|
|
720
918
|
: {};
|
|
721
919
|
|
|
722
|
-
if (candidates.length === 0 && !personaLanded) {
|
|
920
|
+
if (candidates.length === 0 && richFacts.length === 0 && !personaLanded) {
|
|
723
921
|
const entry = {
|
|
724
922
|
...baseEntry,
|
|
725
923
|
...personaLogFields,
|
|
924
|
+
rich_facts_written: 0,
|
|
726
925
|
success: true,
|
|
727
926
|
skipped_reason: 'nothing_durable',
|
|
728
927
|
duration_ms: Date.now() - t0,
|
|
@@ -735,6 +934,7 @@ export async function runAutoExtract({
|
|
|
735
934
|
duration_ms: entry.duration_ms,
|
|
736
935
|
logPath,
|
|
737
936
|
candidates: [],
|
|
937
|
+
richFacts: [],
|
|
738
938
|
persona,
|
|
739
939
|
};
|
|
740
940
|
}
|
|
@@ -787,9 +987,57 @@ export async function runAutoExtract({
|
|
|
787
987
|
}
|
|
788
988
|
}
|
|
789
989
|
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
)
|
|
990
|
+
// 6b. Route rich facts to the fact store (Task 103). Each writeFact is
|
|
991
|
+
// isolated in try/catch — a Poison_Guard / schema / collision rejection
|
|
992
|
+
// (or an unexpected throw) must NOT take down terse routing or the
|
|
993
|
+
// persona pass, exactly like the inline persona isolation above. A
|
|
994
|
+
// 'created' counts toward observation_count; a 'skipped' (content
|
|
995
|
+
// duplicate) is a no-op success that doesn't re-count; anything else is
|
|
996
|
+
// 'rejected' with its category for analytics (Door 4).
|
|
997
|
+
const richWrites = [];
|
|
998
|
+
for (const fact of richFacts) {
|
|
999
|
+
try {
|
|
1000
|
+
const r = routeRichFact({ candidate: fact, projectRoot, ts });
|
|
1001
|
+
let written;
|
|
1002
|
+
if (r?.action === 'created') written = 'fact';
|
|
1003
|
+
else if (r?.action === 'skipped') written = 'fact-duplicate';
|
|
1004
|
+
else written = 'rejected';
|
|
1005
|
+
const rec = { ...fact, written, result: r };
|
|
1006
|
+
if (written === 'rejected') {
|
|
1007
|
+
rec.rejected_category = r?.errorCategory ?? 'unknown';
|
|
1008
|
+
// Trace the drop (§6.5 don't-lose-without-trace), mirroring the terse
|
|
1009
|
+
// low-discard trace — a rejected rich fact is otherwise invisible once
|
|
1010
|
+
// the detached process exits. TITLE ONLY, never the body: a
|
|
1011
|
+
// poison_guard rejection means the body may carry a secret (the
|
|
1012
|
+
// redacted excerpt is already in poison-guard.log). One NDJSON entry
|
|
1013
|
+
// per rejection (Door 4).
|
|
1014
|
+
writeExtractLogEntry({
|
|
1015
|
+
projectRoot,
|
|
1016
|
+
ts,
|
|
1017
|
+
entry: {
|
|
1018
|
+
event: 'rich_fact_rejected',
|
|
1019
|
+
reason: 'rich_fact_rejected',
|
|
1020
|
+
rejected_category: rec.rejected_category,
|
|
1021
|
+
title: fact.title.slice(0, LOW_DISCARD_EXCERPT_MAX),
|
|
1022
|
+
},
|
|
1023
|
+
});
|
|
1024
|
+
}
|
|
1025
|
+
richWrites.push(rec);
|
|
1026
|
+
} catch (err) {
|
|
1027
|
+
richWrites.push({
|
|
1028
|
+
...fact,
|
|
1029
|
+
written: 'rejected',
|
|
1030
|
+
rejected_category: 'exception',
|
|
1031
|
+
error: err?.message ?? String(err),
|
|
1032
|
+
});
|
|
1033
|
+
}
|
|
1034
|
+
}
|
|
1035
|
+
const richFactsWritten = richWrites.filter((w) => w.written === 'fact').length;
|
|
1036
|
+
|
|
1037
|
+
const observation_count =
|
|
1038
|
+
writes.filter(
|
|
1039
|
+
(w) => w.written === 'memory' || w.written === 'review' || w.written === 'conflict',
|
|
1040
|
+
).length + richFactsWritten;
|
|
793
1041
|
|
|
794
1042
|
// Persona-only turn: no project candidate landed, but cross-project
|
|
795
1043
|
// doctrine promoted to the user tier this run. That IS a durable
|
|
@@ -799,6 +1047,7 @@ export async function runAutoExtract({
|
|
|
799
1047
|
const entry = {
|
|
800
1048
|
...baseEntry,
|
|
801
1049
|
...personaLogFields,
|
|
1050
|
+
rich_facts_written: richFactsWritten,
|
|
802
1051
|
success: true,
|
|
803
1052
|
skipped_reason: 'nothing_durable',
|
|
804
1053
|
duration_ms: Date.now() - t0,
|
|
@@ -811,6 +1060,7 @@ export async function runAutoExtract({
|
|
|
811
1060
|
duration_ms: entry.duration_ms,
|
|
812
1061
|
logPath,
|
|
813
1062
|
candidates: writes,
|
|
1063
|
+
richFacts: richWrites,
|
|
814
1064
|
persona,
|
|
815
1065
|
};
|
|
816
1066
|
}
|
|
@@ -818,6 +1068,7 @@ export async function runAutoExtract({
|
|
|
818
1068
|
const entry = {
|
|
819
1069
|
...baseEntry,
|
|
820
1070
|
...personaLogFields,
|
|
1071
|
+
rich_facts_written: richFactsWritten,
|
|
821
1072
|
success: true,
|
|
822
1073
|
observation_count,
|
|
823
1074
|
duration_ms: Date.now() - t0,
|
|
@@ -829,6 +1080,7 @@ export async function runAutoExtract({
|
|
|
829
1080
|
duration_ms: entry.duration_ms,
|
|
830
1081
|
logPath,
|
|
831
1082
|
candidates: writes,
|
|
1083
|
+
richFacts: richWrites,
|
|
832
1084
|
persona,
|
|
833
1085
|
};
|
|
834
1086
|
} finally {
|
package/src/auto-persona.mjs
CHANGED
|
@@ -4,11 +4,11 @@
|
|
|
4
4
|
// reproduced design §16.16's predicted failure: cross-project doctrine
|
|
5
5
|
// ("how I work everywhere" — venv-3.13, layered-backend) was captured
|
|
6
6
|
// but filed PROJECT-tier; the USER tier stayed empty, collapsing the
|
|
7
|
-
// 3-tier value prop to project+local.
|
|
7
|
+
// 3-tier value prop to project+local. The user won't hand-curate the user
|
|
8
8
|
// tier ("too much of a hassle"), so the user tier must fill itself.
|
|
9
9
|
//
|
|
10
10
|
// Posture (tasks.md 45.6 — supersedes 45.2/45.3's manual gate):
|
|
11
|
-
// OPTIMISTIC AUTO-PROMOTE.
|
|
11
|
+
// OPTIMISTIC AUTO-PROMOTE. The user (2026-05-30): "i dont want to do
|
|
12
12
|
// anything, i want it to be automatic." A synthesized doctrine that
|
|
13
13
|
// applies beyond the current project is auto-promoted to the user tier
|
|
14
14
|
// at trust:medium — no manual `cmk persona accept` step. A confidence
|
|
@@ -75,6 +75,11 @@ export const PERSONA_CANDIDATE_RE =
|
|
|
75
75
|
// userDir is passed through to listObservationSources purely to keep the
|
|
76
76
|
// U-tier resolution sandbox-scoped (never walk the real home dir —
|
|
77
77
|
// design §16.36); we then filter to tier P, the synthesis SOURCE.
|
|
78
|
+
// Byte budget for the `facts` persona corpus (Task 111 / F-2). Bounds the Haiku
|
|
79
|
+
// classifier input so a large project's whole-memory sweep can't blow the timeout.
|
|
80
|
+
// Generous (facts are high-signal) but bounded; whole facts only (see below).
|
|
81
|
+
export const PERSONA_CORPUS_BYTES = 60_000;
|
|
82
|
+
|
|
78
83
|
function assembleProjectCorpus({ projectRoot, userDir }) {
|
|
79
84
|
const sources = listObservationSources({ projectRoot, userDir });
|
|
80
85
|
const parts = [];
|
|
@@ -94,7 +99,30 @@ function assembleProjectCorpus({ projectRoot, userDir }) {
|
|
|
94
99
|
parts.push((content ?? '').trim());
|
|
95
100
|
}
|
|
96
101
|
}
|
|
97
|
-
|
|
102
|
+
// Task 111 (F-2): BOUND the corpus. Previously this joined EVERY tier-P fact
|
|
103
|
+
// + scratchpad with no cap, so on a real project with substantial memory the
|
|
104
|
+
// classifier prompt grew unbounded and the Haiku `claude --print` call blew the
|
|
105
|
+
// timeout (the reported "did not return within 50000ms"). Accumulate WHOLE
|
|
106
|
+
// facts up to a byte budget (never split a fact mid-body) and mark truncation.
|
|
107
|
+
// KNOWN LIMITATION (mirrors TRANSCRIPT_WINDOW_BYTES): facts past the budget are
|
|
108
|
+
// dropped in file-iteration order — a doctrine fact in the tail can be missed
|
|
109
|
+
// on one pass, but the weekly janitor re-runs, and some doctrine beats a
|
|
110
|
+
// timed-out zero. A value-ordered (trust/recency-first) accumulation is the
|
|
111
|
+
// follow-up if a large corpus drops doctrine.
|
|
112
|
+
const out = [];
|
|
113
|
+
let used = 0;
|
|
114
|
+
let truncated = false;
|
|
115
|
+
for (const part of parts.filter(Boolean)) {
|
|
116
|
+
const cost = Buffer.byteLength(part, 'utf8') + 2; // +2 for the '\n\n' join
|
|
117
|
+
if (used + cost > PERSONA_CORPUS_BYTES) {
|
|
118
|
+
truncated = true;
|
|
119
|
+
break;
|
|
120
|
+
}
|
|
121
|
+
out.push(part);
|
|
122
|
+
used += cost;
|
|
123
|
+
}
|
|
124
|
+
if (truncated) out.push('### …\n(corpus truncated — additional project facts omitted for this pass)');
|
|
125
|
+
return out.join('\n\n');
|
|
98
126
|
}
|
|
99
127
|
|
|
100
128
|
// Default size of the recent-transcript window handed to the SessionEnd persona
|
|
@@ -111,7 +139,7 @@ function assembleProjectCorpus({ projectRoot, userDir }) {
|
|
|
111
139
|
// 40k chars ≈ a long session's worth of turns ≈ ~10k tokens — trivial cost for a
|
|
112
140
|
// once-per-session call, and the classifier prompt's "IGNORE anything specific to
|
|
113
141
|
// this ONE project" instruction guards precision at the larger size (live test:
|
|
114
|
-
// clean 2/2, no false promotes). The exact bound is a
|
|
142
|
+
// clean 2/2, no false promotes). The exact bound is a live-test-9 tuning item.
|
|
115
143
|
// KNOWN LIMITATION (documented, not yet fixed): only the most-recent date-named
|
|
116
144
|
// file is read, so a session spanning midnight loses the pre-midnight turns. Rare;
|
|
117
145
|
// a multi-file read is the follow-up if it bites.
|
|
@@ -250,7 +278,7 @@ export function parsePersonaCandidates(outputText) {
|
|
|
250
278
|
*/
|
|
251
279
|
export async function autoPersona(opts = {}) {
|
|
252
280
|
const t0 = Date.now();
|
|
253
|
-
const { projectRoot, userDir, backend, now, settings, cooldownMs = DEFAULT_COOLDOWN_MS, source = 'facts' } = opts;
|
|
281
|
+
const { projectRoot, userDir, backend, now, settings, cooldownMs = DEFAULT_COOLDOWN_MS, source = 'facts', timeoutMs = 50_000 } = opts;
|
|
254
282
|
|
|
255
283
|
if (!projectRoot) {
|
|
256
284
|
return errorResult({
|
|
@@ -302,7 +330,11 @@ export async function autoPersona(opts = {}) {
|
|
|
302
330
|
instructions: buildClassifierInstructions(source),
|
|
303
331
|
preserveCitationIds: false,
|
|
304
332
|
maxOutputBytes: 4096,
|
|
305
|
-
|
|
333
|
+
// Task 111 (F-2): the timeout is caller-supplied. The SessionEnd hook path
|
|
334
|
+
// keeps the 50_000 default (it composes with the 60s SessionEnd ceiling per
|
|
335
|
+
// design §8.5 / D-42). The CLI `cmk persona generate` has NO outer hook
|
|
336
|
+
// ceiling, so it passes a generous value — the explicit command can wait.
|
|
337
|
+
timeoutMs,
|
|
306
338
|
});
|
|
307
339
|
// Spent a Haiku call — refresh the shared cooldown marker so the next
|
|
308
340
|
// gated caller backs off. (touch even on cooldownMs:0 cycles: the call
|
|
@@ -349,7 +381,7 @@ export async function autoPersona(opts = {}) {
|
|
|
349
381
|
* inferred noise. This still holds for every medium/inferred write.
|
|
350
382
|
* - trust:'high' (explicit path — Task 76 `cmk lessons promote` + Task 78
|
|
351
383
|
* inline grading of an EXPLICITLY-STATED rule). **45.4 REFINEMENT
|
|
352
|
-
* (2026-06-02, D-32 —
|
|
384
|
+
* (2026-06-02, D-32 — the user chose "latest explicit wins"):** an explicit,
|
|
353
385
|
* user-attested rule at trust:high MAY supersede an equal-trust same-topic
|
|
354
386
|
* entry (high >= high → supersede). The newest explicit statement wins,
|
|
355
387
|
* even over a hand-curated high. The original protection is unchanged for
|
|
@@ -359,7 +391,7 @@ export async function autoPersona(opts = {}) {
|
|
|
359
391
|
*/
|
|
360
392
|
// Persist low/medium-confidence (and otherwise-not-promoted) candidates to a
|
|
361
393
|
// durable review-queue FILE at <userDir>/queues/persona-review.md, so they are
|
|
362
|
-
// not lost when only returned in the response (
|
|
394
|
+
// not lost when only returned in the response (the user, 2026-05-31: "response
|
|
363
395
|
// object can get lost — i dont like it"). Dedup by canonical id against what's
|
|
364
396
|
// already in the file so repeated synthesis passes don't pile up duplicates.
|
|
365
397
|
// Returns the queue path (or null when there's nothing to write).
|
package/src/capture-turn.mjs
CHANGED
|
@@ -48,6 +48,9 @@ import {
|
|
|
48
48
|
appendFileSync,
|
|
49
49
|
readFileSync,
|
|
50
50
|
writeFileSync,
|
|
51
|
+
readdirSync,
|
|
52
|
+
statSync,
|
|
53
|
+
unlinkSync,
|
|
51
54
|
} from 'node:fs';
|
|
52
55
|
import { join } from 'node:path';
|
|
53
56
|
import { spawn } from 'node:child_process';
|
|
@@ -57,6 +60,41 @@ function dateFromIso(iso) {
|
|
|
57
60
|
return String(iso).slice(0, 10);
|
|
58
61
|
}
|
|
59
62
|
|
|
63
|
+
// A `.extract-<ts>.tmp` turn-file lives only for the duration of one
|
|
64
|
+
// auto-extract run (bounded by the Stop-hook ceiling, design §8.5). The owning
|
|
65
|
+
// child unlinks it in its `finally`; capture-turn unlinks it here when the spawn
|
|
66
|
+
// fails. But a child KILLED before its finally (hook ceiling), or a Windows
|
|
67
|
+
// unlink refused by a scanner, leaks the temp (cut-gate7 found 2 lingering —
|
|
68
|
+
// D-103 finding E). This janitor sweeps any `.extract-*.tmp` older than the
|
|
69
|
+
// threshold — far longer than any live run, so it can't race an in-flight child.
|
|
70
|
+
// Best-effort: a sweep hiccup must never block the capture.
|
|
71
|
+
const STALE_TURN_FILE_MS = 10 * 60 * 1000; // 10 min — well beyond the hook ceiling
|
|
72
|
+
|
|
73
|
+
export function sweepStaleTurnFiles(transcriptsDir, maxAgeMs = STALE_TURN_FILE_MS, now = Date.now()) {
|
|
74
|
+
let swept = 0;
|
|
75
|
+
if (!existsSync(transcriptsDir)) return swept;
|
|
76
|
+
let entries;
|
|
77
|
+
try {
|
|
78
|
+
entries = readdirSync(transcriptsDir);
|
|
79
|
+
} catch {
|
|
80
|
+
return swept;
|
|
81
|
+
}
|
|
82
|
+
for (const name of entries) {
|
|
83
|
+
if (!name.startsWith('.extract-') || !name.endsWith('.tmp')) continue;
|
|
84
|
+
const p = join(transcriptsDir, name);
|
|
85
|
+
try {
|
|
86
|
+
if (now - statSync(p).mtimeMs > maxAgeMs) {
|
|
87
|
+
unlinkSync(p);
|
|
88
|
+
swept += 1;
|
|
89
|
+
}
|
|
90
|
+
} catch {
|
|
91
|
+
// best-effort: a stat/unlink failure (already gone, or briefly locked)
|
|
92
|
+
// must not abort the sweep or the capture.
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return swept;
|
|
96
|
+
}
|
|
97
|
+
|
|
60
98
|
// Write a `phase: 'spawn'` NDJSON entry to `<projectRoot>/context/sessions/{date}.extract.log`
|
|
61
99
|
// when the auto-extract spawn fails. This closes PR-A's class-1 audit
|
|
62
100
|
// deferral (capture-turn Door 5 observability gap). Auto-extract's own
|
|
@@ -143,7 +181,7 @@ function readLastUserTurnFromTranscript(transcriptPath) {
|
|
|
143
181
|
// (context/sessions/now.md). Before this, now.md was fed ONLY by observe-edit's
|
|
144
182
|
// file-write lines ("[ts] Write file=X lines=N"), so the SessionEnd compressor
|
|
145
183
|
// summarized a list of filenames and hallucinated content the dialogue never
|
|
146
|
-
// contained (
|
|
184
|
+
// contained (live-test-6: "Flask app: app.py" — inferred a framework from a
|
|
147
185
|
// filename). Buffering the actual user+assistant turns here means the summary
|
|
148
186
|
// reflects what was DISCUSSED. Same `## <ts> — speaker` shape as the transcript
|
|
149
187
|
// so the compressor reads it as dialogue; now.md is truncated after each compress
|
|
@@ -281,6 +319,10 @@ export function captureTurn({
|
|
|
281
319
|
// summarizes the DIALOGUE, not observe-edit's filename log. Best-effort.
|
|
282
320
|
appendConversationToNowMd({ projectRoot, ts, userTurn, assistantTurn: sanitized });
|
|
283
321
|
|
|
322
|
+
// Janitor: clear any orphaned turn-files from a prior killed/crashed child
|
|
323
|
+
// before writing this turn's (D-103 finding E). Best-effort.
|
|
324
|
+
sweepStaleTurnFiles(transcriptsDir);
|
|
325
|
+
|
|
284
326
|
const turnFile = join(transcriptsDir, `.extract-${Date.now()}.tmp`);
|
|
285
327
|
try {
|
|
286
328
|
writeFileSync(
|
|
@@ -316,6 +358,11 @@ export function captureTurn({
|
|
|
316
358
|
reason: spawnResult.reason,
|
|
317
359
|
error: spawnResult.error,
|
|
318
360
|
});
|
|
361
|
+
// NB: we do NOT unlink the turn-file here. Ownership is clean — auto-extract
|
|
362
|
+
// owns deletion (its `finally`); when the spawn fails (or a child is killed
|
|
363
|
+
// before its finally), the file becomes an orphan that the entry-sweep above
|
|
364
|
+
// reaps once it's stale (D-103 finding E). capture-turn never deletes a file
|
|
365
|
+
// it handed off, so tests can still inspect the IPC shape on the no-spawn path.
|
|
319
366
|
}
|
|
320
367
|
|
|
321
368
|
return {
|