akm-cli 0.9.0-beta.52 → 0.9.0-beta.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/dist/assets/hints/cli-hints-full.md +6 -5
  2. package/dist/cli.js +0 -7
  3. package/dist/commands/env/env-cli.js +3 -2
  4. package/dist/commands/env/env.js +14 -67
  5. package/dist/commands/health/checks.js +28 -15
  6. package/dist/commands/health.js +68 -1
  7. package/dist/commands/improve/collapse-detector.js +419 -0
  8. package/dist/commands/improve/consolidate.js +72 -54
  9. package/dist/commands/improve/distill.js +79 -13
  10. package/dist/commands/improve/extract.js +13 -6
  11. package/dist/commands/improve/homeostatic.js +109 -79
  12. package/dist/commands/improve/improve-cli.js +67 -1
  13. package/dist/commands/improve/improve.js +10 -0
  14. package/dist/commands/improve/loop-stages.js +39 -1
  15. package/dist/commands/improve/outcome-loop.js +15 -3
  16. package/dist/commands/improve/preparation.js +17 -8
  17. package/dist/commands/improve/salience.js +49 -32
  18. package/dist/commands/read/curate.js +5 -9
  19. package/dist/commands/read/knowledge.js +4 -0
  20. package/dist/commands/read/search.js +5 -2
  21. package/dist/commands/read/show.js +3 -3
  22. package/dist/core/asset/asset-spec.js +3 -2
  23. package/dist/core/config/config-schema.js +39 -17
  24. package/dist/core/eval/rank-metrics.js +113 -0
  25. package/dist/core/state/migrations.js +56 -0
  26. package/dist/core/state-db.js +146 -19
  27. package/dist/indexer/ensure-index.js +33 -90
  28. package/dist/indexer/index-writer-lock.js +0 -11
  29. package/dist/indexer/index-written-assets.js +105 -0
  30. package/dist/indexer/passes/metadata.js +20 -0
  31. package/dist/indexer/search/db-search.js +29 -1
  32. package/dist/indexer/search/ranking-contributors.js +33 -1
  33. package/dist/indexer/search/ranking.js +66 -0
  34. package/dist/indexer/search/search-fields.js +6 -0
  35. package/dist/llm/feature-gate.js +6 -2
  36. package/dist/output/renderers.js +8 -13
  37. package/dist/output/shapes/helpers.js +0 -3
  38. package/dist/output/shapes/passthrough.js +1 -0
  39. package/dist/scripts/migrate-storage.js +152 -33
  40. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +41 -18
  41. package/dist/storage/repositories/index-db.js +10 -1
  42. package/package.json +2 -4
@@ -0,0 +1,419 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ /**
5
+ * R5 — Longitudinal collapse/churn detector
6
+ * (docs/design/improve-collapse-churn-detector-design.md).
7
+ *
8
+ * Detects the two measured failure modes of LLM-consolidated memory stores:
9
+ *
10
+ * COLLAPSE — repeated merges destroy information: canary retrieval recall
11
+ * downtrends, distinct-content entropy downtrends, or the store shrinks
12
+ * while generation counts rise.
13
+ * CHURN — real accepted-change volume with zero retrieval-visible or
14
+ * shape-visible movement (LLM budget burned rewriting to no effect).
15
+ *
16
+ * Hard invariants: deterministic only (FTS BM25 + hashing — never an LLM,
17
+ * never an embedding model); bounded storage (< 2 KB per qualifying cycle,
18
+ * 365-day retention); fail-open (an error warns and skips, never breaks an
19
+ * improve run); runs only on cycles where consolidate/recombine did work.
20
+ *
21
+ * Observe-only in v1: alerts land in `improve_cycle_metrics.alerts_json`, the
22
+ * events log (`collapse_detector_alert`), and the `akm health` advisory —
23
+ * nothing is ever blocked.
24
+ *
25
+ * @module collapse-detector
26
+ */
27
+ import { randomBytes } from "node:crypto";
28
+ import { makeAssetRef } from "../../core/asset/asset-ref.js";
29
+ import { appendEvent } from "../../core/events.js";
30
+ import { deactivateCanarySet, getActiveCanaries, getCanariesBySetId, insertCanaries, insertCycleMetrics, listActiveCanarySetIds, queryRecentCycleMetrics, withStateDb, } from "../../core/state-db.js";
31
+ import { warn } from "../../core/warn.js";
32
+ import { closeDatabase, getAllEntries, openExistingDatabase, searchFts, } from "../../indexer/db/db.js";
33
+ import { computeBigramDiversity, DEFAULT_MAX_GENERATION } from "./homeostatic.js";
34
+ import { getAllRankScores } from "./salience.js";
35
+ // ── Defaults (mirrored in config-schema.ts ImproveCollapseDetectorSchema) ────
36
+ export const DEFAULT_CANARY_COUNT = 40; // owner-approved 30–50 range
37
+ export const DEFAULT_CANARY_K = 10;
38
+ export const DEFAULT_WINDOW_CYCLES = 5;
39
+ export const DEFAULT_RECALL_DROP_THRESHOLD = 0.15;
40
+ export const DEFAULT_ENTROPY_DROP_THRESHOLD = 0.05;
41
+ export const DEFAULT_CHURN_MIN_ACCEPTED = 25;
42
+ export const DEFAULT_RETENTION_DAYS = 365;
43
+ /** Deterministic bigram-diversity sample cap (cost bound at 10k assets). */
44
+ const DIVERSITY_SAMPLE_CAP = 2000;
45
+ /**
46
+ * Minimum merge-floor violations in one cycle before the advisory alert fires.
47
+ * The specificity floor is deliberately strict (Phase-1 tuning pending), so a
48
+ * couple of borderline merges per cycle must not flip `akm health` to warn —
49
+ * that alert fatigue would drown the real collapse signals.
50
+ */
51
+ const MERGE_FLOOR_ALERT_MIN = 3;
52
+ /** The learning-store types the detector measures. */
53
+ const LEARNING_TYPES = new Set(["memory", "lesson", "knowledge"]);
54
+ // ── Canary set ────────────────────────────────────────────────────────────────
55
+ /** Deterministic query string for one anchor entry: name tokens + top tags + description head. */
56
+ function buildCanaryQuery(entry) {
57
+ const nameTokens = entry.entry.name.split(/[-_/.]+/).filter((t) => t.length > 1);
58
+ const tags = (entry.entry.tags ?? []).slice(0, 3);
59
+ const descriptionHead = (entry.entry.description ?? "").split(/\s+/).slice(0, 6);
60
+ const parts = [...nameTokens, ...tags, ...descriptionHead].filter((t) => t.length > 0);
61
+ return [...new Set(parts)].join(" ");
62
+ }
63
+ /** Build the mint candidate list (deterministic given index + salience tables). */
64
+ function buildMintList(stateDb, entries, cfg) {
65
+ const canaryCount = cfg.canaryCount ?? DEFAULT_CANARY_COUNT;
66
+ const rankScores = getAllRankScores(stateDb);
67
+ // NOTE: entryKey is stash-prefixed ("<stashDir>:type:name"); asset_salience
68
+ // and the canary scoring both key on the bare "type:name" ref.
69
+ const candidates = entries
70
+ .filter((e) => LEARNING_TYPES.has(e.entry.type))
71
+ .map((e) => {
72
+ const ref = makeAssetRef(e.entry.type, e.entry.name);
73
+ return { e, ref, score: rankScores.get(ref) ?? 0 };
74
+ })
75
+ .sort((a, b) => b.score - a.score || (a.ref < b.ref ? -1 : 1));
76
+ // Type-stratified top slice: ⅓ per learning type, backfill from global order.
77
+ const perType = Math.ceil(canaryCount / 3);
78
+ const picked = new Map();
79
+ for (const type of LEARNING_TYPES) {
80
+ let taken = 0;
81
+ for (const c of candidates) {
82
+ if (taken >= perType || picked.size >= canaryCount)
83
+ break;
84
+ if (c.e.entry.type === type && !picked.has(c.ref)) {
85
+ picked.set(c.ref, c);
86
+ taken++;
87
+ }
88
+ }
89
+ }
90
+ for (const c of candidates) {
91
+ if (picked.size >= canaryCount)
92
+ break;
93
+ if (!picked.has(c.ref))
94
+ picked.set(c.ref, c);
95
+ }
96
+ return [...picked.values()]
97
+ .map((c) => ({ anchorRef: c.ref, query: buildCanaryQuery(c.e) }))
98
+ .filter((c) => c.query.length > 0);
99
+ }
100
+ /** Collision-safe mint token (same-millisecond mints happen in tests + concurrent runs). */
101
+ function newCanarySetId() {
102
+ return `canary-${Date.now().toString(36)}-${randomBytes(2).toString("hex")}`;
103
+ }
104
+ /**
105
+ * Mint (or return) the active canary set. Deterministic given the index +
106
+ * salience tables: rank the three learning types by `asset_salience.rank_score`
107
+ * (fallback 0, tie-broken by ref), take a type-stratified top slice
108
+ * (⅓ per type, backfilled from the global ranking when a type is short).
109
+ *
110
+ * Returns `null` when the index has no mintable learning entries — a cycle
111
+ * with no canary set is NOT recorded (a fresh unused set id every cycle would
112
+ * mean the trend window never fills and recall reads as a fake 0).
113
+ *
114
+ * NEVER auto-refreshes: once minted the set is frozen until an explicit
115
+ * `akm improve canary --refresh` — silent re-baselining is how a slow collapse
116
+ * hides. Rows are read back BY OUR OWN set id (never "newest active") so a
117
+ * concurrent mint in another process cannot relabel this run's metrics.
118
+ */
119
+ export function ensureCanarySet(stateDb, indexDb, cfg, preloadedEntries) {
120
+ const existing = getActiveCanaries(stateDb);
121
+ if (existing.length > 0) {
122
+ return { canarySetId: existing[0].canary_set_id, canaries: existing };
123
+ }
124
+ const minted = buildMintList(stateDb, preloadedEntries ?? getAllEntries(indexDb), cfg);
125
+ if (minted.length === 0)
126
+ return null;
127
+ const canarySetId = newCanarySetId();
128
+ insertCanaries(stateDb, canarySetId, minted);
129
+ return { canarySetId, canaries: getCanariesBySetId(stateDb, canarySetId) };
130
+ }
131
+ /**
132
+ * Explicit canary re-mint (the ONLY refresh path — `akm improve canary
133
+ * --refresh`). Mint-first, deactivate-after: when the index is empty or
134
+ * unreadable the current baseline is left untouched instead of destroyed.
135
+ * Deactivates ALL other active sets (not just the newest) so stragglers from
136
+ * an interrupted refresh can never resurrect.
137
+ */
138
+ export function refreshCanarySet(stateDb, indexDb, cfg) {
139
+ const minted = buildMintList(stateDb, getAllEntries(indexDb), cfg);
140
+ if (minted.length === 0)
141
+ return null; // nothing mintable — keep the old baseline
142
+ const canarySetId = newCanarySetId();
143
+ insertCanaries(stateDb, canarySetId, minted);
144
+ for (const oldSetId of listActiveCanarySetIds(stateDb)) {
145
+ if (oldSetId !== canarySetId)
146
+ deactivateCanarySet(stateDb, oldSetId);
147
+ }
148
+ return { canarySetId, canaries: getCanariesBySetId(stateDb, canarySetId) };
149
+ }
150
+ // ── Cycle metrics ─────────────────────────────────────────────────────────────
151
+ /**
152
+ * Name-free content fingerprint text for entropy metrics. The indexed
153
+ * search_text EMBEDS the (unique) entry name, which would pin the
154
+ * distinct-content ratio at 1.0 forever; convergence shows up in the
155
+ * description/tags/heading fields, so those are what get hashed. (The raw body
156
+ * is not in the index at all — search_text covers metadata + TOC headings —
157
+ * so v1 entropy is measured over the searchable surface, which is also what
158
+ * generic merged assets converge on.)
159
+ */
160
+ function contentFingerprint(entry) {
161
+ const parts = [entry.description ?? "", (entry.tags ?? []).join(" "), (entry.toc ?? []).map((h) => h.text).join(" ")];
162
+ return parts.filter((t) => t.length > 0).join(" ");
163
+ }
164
+ /** FNV-1a 64-bit over lowercased whitespace-collapsed text (distinct-content hashing). */
165
+ export function normHash(text) {
166
+ const normalized = text.toLowerCase().replace(/\s+/g, " ").trim();
167
+ let hash = 0xcbf29ce484222325n;
168
+ const prime = 0x100000001b3n;
169
+ for (let i = 0; i < normalized.length; i++) {
170
+ hash ^= BigInt(normalized.charCodeAt(i));
171
+ hash = (hash * prime) & 0xffffffffffffffffn;
172
+ }
173
+ return hash.toString(16);
174
+ }
175
+ /**
176
+ * Score one canary against the live index, merge-following via `source_refs`:
177
+ * a hit is the anchor ref itself OR any returned entry whose `source_refs`
178
+ * frontmatter contains the anchor (ONE level — provenance dropped on a
179
+ * second-generation merge is a miss by design; that IS the information loss).
180
+ * Returns the 0-based rank of the first hit, or -1.
181
+ */
182
+ function scoreCanary(indexDb, canary, k) {
183
+ const results = searchFts(indexDb, canary.query, k);
184
+ for (let i = 0; i < Math.min(results.length, k); i++) {
185
+ const r = results[i];
186
+ const ref = makeAssetRef(r.entry.type, r.entry.name);
187
+ if (ref === canary.anchor_ref)
188
+ return i;
189
+ if (r.entry.sourceRefs?.includes(canary.anchor_ref))
190
+ return i;
191
+ }
192
+ return -1;
193
+ }
194
+ /**
195
+ * Compute one qualifying cycle's store-health snapshot. One `entries` scan +
196
+ * `canaryCount` FTS queries; no LLM, no embedding model, no filesystem reads.
197
+ * Returns `null` when no canary set exists AND none is mintable (empty index)
198
+ * — such a cycle is not measurable and must not be recorded.
199
+ */
200
+ export function computeCycleMetrics(stateDb, indexDb, args) {
201
+ const k = args.cfg.k ?? DEFAULT_CANARY_K;
202
+ const maxGeneration = args.maxGeneration ?? DEFAULT_MAX_GENERATION;
203
+ // Single entries scan — shared by the canary mint (if one is needed) and
204
+ // the store-shape metrics below.
205
+ const all = getAllEntries(indexDb);
206
+ const canarySet = ensureCanarySet(stateDb, indexDb, args.cfg, all);
207
+ if (canarySet === null)
208
+ return null;
209
+ const { canarySetId, canaries } = canarySet;
210
+ // ── Canary retrieval metrics ───────────────────────────────────────────────
211
+ const ranks = [];
212
+ let recallSum = 0;
213
+ let ndcgSum = 0;
214
+ let mrrSum = 0;
215
+ for (const canary of canaries) {
216
+ const rank = scoreCanary(indexDb, canary, k);
217
+ ranks.push([canary.id, rank]);
218
+ if (rank >= 0) {
219
+ recallSum += 1;
220
+ mrrSum += 1 / (rank + 1);
221
+ // Single-relevant nDCG@k closed form: ideal DCG is 1, so the score is
222
+ // just the discount at the hit rank.
223
+ ndcgSum += 1 / Math.log2(rank + 2);
224
+ }
225
+ }
226
+ const n = Math.max(1, canaries.length);
227
+ // ── Store-shape metrics (same single entries scan) ────────────────────────
228
+ const byType = new Map();
229
+ const contentHashes = new Set();
230
+ let learningTotal = 0;
231
+ let overGeneration = 0;
232
+ const learningTexts = [];
233
+ for (const e of all) {
234
+ byType.set(e.entry.type, (byType.get(e.entry.type) ?? 0) + 1);
235
+ if (!LEARNING_TYPES.has(e.entry.type))
236
+ continue;
237
+ learningTotal++;
238
+ const fingerprint = contentFingerprint(e.entry);
239
+ contentHashes.add(normHash(fingerprint));
240
+ if ((e.entry.generation ?? 0) > maxGeneration)
241
+ overGeneration++;
242
+ learningTexts.push({ key: e.entryKey, text: fingerprint });
243
+ }
244
+ // Deterministic diversity sample: sort by entryKey, take every ⌈N/cap⌉-th row.
245
+ learningTexts.sort((a, b) => (a.key < b.key ? -1 : 1));
246
+ const step = Math.max(1, Math.ceil(learningTexts.length / DIVERSITY_SAMPLE_CAP));
247
+ let diversitySum = 0;
248
+ let diversityCount = 0;
249
+ for (let i = 0; i < learningTexts.length; i += step) {
250
+ diversitySum += computeBigramDiversity(learningTexts[i].text);
251
+ diversityCount++;
252
+ }
253
+ return {
254
+ run_id: args.runId,
255
+ ts: (args.now ?? new Date()).toISOString(),
256
+ pass: args.pass,
257
+ canary_set_id: canarySetId,
258
+ mean_recall: recallSum / n,
259
+ mean_ndcg: ndcgSum / n,
260
+ mean_mrr: mrrSum / n,
261
+ canary_ranks_json: JSON.stringify(ranks),
262
+ store_total: learningTotal,
263
+ store_by_type_json: JSON.stringify(Object.fromEntries([...byType.entries()].sort())),
264
+ distinct_content_ratio: learningTotal === 0 ? 1 : contentHashes.size / learningTotal,
265
+ mean_bigram_diversity: diversityCount === 0 ? 1 : diversitySum / diversityCount,
266
+ over_generation_count: overGeneration,
267
+ accepted_actions: args.acceptedActions,
268
+ merge_floor_violations: args.mergeFloorViolations,
269
+ alerts_json: "[]",
270
+ };
271
+ }
272
+ // ── Alert evaluation (pure) ───────────────────────────────────────────────────
273
+ function median(values) {
274
+ const sorted = [...values].sort((a, b) => a - b);
275
+ const mid = Math.floor(sorted.length / 2);
276
+ return sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid];
277
+ }
278
+ /**
279
+ * Evaluate the §1 alert definitions. PURE — history rows (oldest-first, NOT
280
+ * including `current`) plus the current row in, alerts out. A window shorter
281
+ * than `windowCycles` never fires (no baseline yet); the merge-floor advisory
282
+ * is per-cycle and fires regardless of window depth.
283
+ */
284
+ export function evaluateCollapseAlerts(history, current, cfg) {
285
+ const alerts = [];
286
+ // MERGE-FLOOR advisory: per-cycle, window-independent. Gated on a minimum
287
+ // count — the specificity floor is deliberately strict pre-tuning, and one
288
+ // or two borderline merges per cycle must not generate alert fatigue.
289
+ if (current.merge_floor_violations >= MERGE_FLOOR_ALERT_MIN) {
290
+ alerts.push({
291
+ kind: "merge-floor",
292
+ detail: `${current.merge_floor_violations} merge(s) failed the information floor this cycle (provenance shrank or specificity below threshold)`,
293
+ metrics: { mergeFloorViolations: current.merge_floor_violations },
294
+ });
295
+ }
296
+ const W = cfg.windowCycles ?? DEFAULT_WINDOW_CYCLES;
297
+ const hist = history.slice(-W);
298
+ if (hist.length < W)
299
+ return alerts; // no baseline yet
300
+ const recallDrop = cfg.recallDropThreshold ?? DEFAULT_RECALL_DROP_THRESHOLD;
301
+ const entropyDrop = cfg.entropyDropThreshold ?? DEFAULT_ENTROPY_DROP_THRESHOLD;
302
+ const churnMin = cfg.churnMinAcceptedActions ?? DEFAULT_CHURN_MIN_ACCEPTED;
303
+ // COLLAPSE 1 — canary recall drop vs window median (median, not previous
304
+ // cycle, so one noisy cycle can neither fire nor mask the alert).
305
+ const medianRecall = median(hist.map((h) => h.mean_recall));
306
+ if (current.mean_recall <= medianRecall - recallDrop) {
307
+ alerts.push({
308
+ kind: "collapse-recall",
309
+ detail: `mean canary recall ${current.mean_recall.toFixed(3)} dropped ≥${recallDrop} below the ${W}-cycle median ${medianRecall.toFixed(3)}`,
310
+ metrics: { currentRecall: current.mean_recall, medianRecall, threshold: recallDrop },
311
+ });
312
+ }
313
+ // COLLAPSE 2 — monotonic distinct-content-ratio decline over the window.
314
+ const series = [...hist.map((h) => h.distinct_content_ratio), current.distinct_content_ratio];
315
+ const monotonicNonIncreasing = series.every((v, i) => i === 0 || v <= series[i - 1]);
316
+ const totalDecline = hist[0].distinct_content_ratio - current.distinct_content_ratio;
317
+ if (monotonicNonIncreasing && totalDecline >= entropyDrop) {
318
+ alerts.push({
319
+ kind: "collapse-entropy",
320
+ detail: `distinct-content ratio declined monotonically by ${totalDecline.toFixed(3)} (≥${entropyDrop}) over ${W} cycles — store content is converging`,
321
+ metrics: {
322
+ windowStart: hist[0].distinct_content_ratio,
323
+ current: current.distinct_content_ratio,
324
+ decline: totalDecline,
325
+ },
326
+ });
327
+ }
328
+ // COLLAPSE 3 — store shrinking BECAUSE of re-merging (not deletion hygiene).
329
+ const maxStore = Math.max(...hist.map((h) => h.store_total));
330
+ if (current.store_total < 0.8 * maxStore && current.over_generation_count > hist[0].over_generation_count) {
331
+ alerts.push({
332
+ kind: "collapse-shrink",
333
+ detail: `store shrank >20% (${current.store_total} vs window max ${maxStore}) while over-generation count rose (${hist[0].over_generation_count} → ${current.over_generation_count})`,
334
+ metrics: {
335
+ storeTotal: current.store_total,
336
+ windowMax: maxStore,
337
+ overGeneration: current.over_generation_count,
338
+ },
339
+ });
340
+ }
341
+ // CHURN — real write volume, zero retrieval- or shape-visible movement.
342
+ // Flatness is measured against the window MEDIAN (consistent with the
343
+ // recall rule): endpoint-only comparison would call a window that swung
344
+ // wildly but happened to land near its start "flat".
345
+ const acceptedSum = hist.reduce((a, h) => a + h.accepted_actions, 0);
346
+ const scoreFlat = Math.abs(current.mean_ndcg - median(hist.map((h) => h.mean_ndcg))) < 0.02;
347
+ const entropyFlat = Math.abs(current.distinct_content_ratio - median(hist.map((h) => h.distinct_content_ratio))) < 0.02;
348
+ if (acceptedSum >= churnMin && scoreFlat && entropyFlat) {
349
+ alerts.push({
350
+ kind: "churn",
351
+ detail: `${acceptedSum} accepted actions over ${W} cycles with flat canary score and flat entropy — write volume with no retrieval-visible effect`,
352
+ metrics: { acceptedSum, ndcgDelta: current.mean_ndcg - hist[0].mean_ndcg },
353
+ });
354
+ }
355
+ return alerts;
356
+ }
357
+ // ── Orchestrator ─────────────────────────────────────────────────────────────
358
+ /**
359
+ * Run the detector for one qualifying cycle: ensure canaries → compute →
360
+ * evaluate against stored history → persist the row → append one
361
+ * `collapse_detector_alert` event per fired alert. FAIL-OPEN: any error warns
362
+ * and returns undefined — an improve run is never broken by its own
363
+ * instrumentation.
364
+ */
365
+ export function runCollapseDetector(args) {
366
+ const cfg = args.config.improve?.collapseDetector ?? {};
367
+ if (cfg.enabled === false)
368
+ return undefined;
369
+ try {
370
+ let indexDb;
371
+ try {
372
+ indexDb = openExistingDatabase(args.indexDbPath);
373
+ const db = indexDb;
374
+ // Over-generation threshold mirrors the guard actually in effect —
375
+ // reading the same config key keeps the two aligned when tuned.
376
+ const antiCollapse = args.config.profiles?.improve?.default?.processes?.consolidate?.antiCollapse;
377
+ const maxGeneration = antiCollapse?.maxGeneration ?? DEFAULT_MAX_GENERATION;
378
+ return withStateDb((stateDb) => {
379
+ const row = computeCycleMetrics(stateDb, db, {
380
+ runId: args.runId,
381
+ pass: args.pass,
382
+ acceptedActions: args.acceptedActions,
383
+ mergeFloorViolations: args.mergeFloorViolations,
384
+ cfg,
385
+ maxGeneration,
386
+ });
387
+ if (row === null)
388
+ return undefined; // empty index — nothing to measure
389
+ const windowCycles = cfg.windowCycles ?? DEFAULT_WINDOW_CYCLES;
390
+ const history = queryRecentCycleMetrics(stateDb, row.canary_set_id, windowCycles);
391
+ const alerts = evaluateCollapseAlerts(history, row, cfg);
392
+ row.alerts_json = JSON.stringify(alerts.map((a) => a.kind));
393
+ insertCycleMetrics(stateDb, row);
394
+ for (const alert of alerts) {
395
+ appendEvent({
396
+ eventType: "collapse_detector_alert",
397
+ ref: undefined,
398
+ metadata: {
399
+ kind: alert.kind,
400
+ detail: alert.detail,
401
+ metrics: alert.metrics,
402
+ canarySetId: row.canary_set_id,
403
+ runId: args.runId,
404
+ },
405
+ }, args.eventsCtx);
406
+ }
407
+ return row;
408
+ }, { path: args.eventsCtx?.dbPath, borrowed: args.eventsCtx?.db });
409
+ }
410
+ finally {
411
+ if (indexDb)
412
+ closeDatabase(indexDb);
413
+ }
414
+ }
415
+ catch (err) {
416
+ warn(`[collapse-detector] skipped (fail-open): ${err instanceof Error ? err.message : String(err)}`);
417
+ return undefined;
418
+ }
419
+ }
@@ -19,7 +19,7 @@ import { detectTruncatedDescription } from "../../core/text-truncation.js";
19
19
  import { hasSupersededStatus, MERGE_ABSOLUTE_FLOOR_CHARS, MERGE_SHRINK_RATIO_MIN, validateProposalFrontmatter, } from "../proposal/validators/proposal-quality-validators.js";
20
20
  import { createProposal, isProposalSkipped, listProposals } from "../proposal/validators/proposals.js";
21
21
  import { cacheHash, runDeterministicDedup, stripFrontmatterBody } from "./dedup.js";
22
- import { checkGenerationGuard, checkLexicalDiversity, computeMergedGeneration, readAssetGeneration, runHomeostaticDemotion, shouldSkipHotProbationInLlm, } from "./homeostatic.js";
22
+ import { checkGenerationGuard, checkLexicalDiversity, checkMergeInformationFloor, computeMergedGeneration, readAssetGeneration, shouldSkipHotProbationInLlm, } from "./homeostatic.js";
23
23
  import { writeContradictEdge } from "./memory/memory-belief.js";
24
24
  // Re-export the moved helpers so existing test imports continue to resolve.
25
25
  export { hasSupersededStatus, validateProposalFrontmatter };
@@ -410,20 +410,24 @@ function backupFile(filePath, backupDir, name) {
410
410
  }
411
411
  // ── WS-3b: Generation frontmatter injection ───────────────────────────────────
412
412
  /**
413
- * Inject `generation` (and optionally `source_refs`) into merged content.
413
+ * Inject `generation` and `source_refs` into merged content.
414
414
  * generation = max(sourceGenerations) + 1.
415
+ * source_refs = UNION of the provided provenance refs (participants + their
416
+ * cited sources) with anything already present in the merged frontmatter —
417
+ * R5 §4.2: the old set-if-absent behavior dropped second-generation
418
+ * provenance whenever the LLM emitted its own (partial) source_refs.
415
419
  * Fails open — returns original content if frontmatter can't be parsed.
416
420
  */
417
- function injectGenerationFrontmatter(mergedContent, sourceGenerations, allParticipants) {
421
+ function injectGenerationFrontmatter(mergedContent, sourceGenerations, provenanceRefs) {
418
422
  try {
419
423
  const parsed = parseFrontmatter(mergedContent);
424
+ const existingFm = parsed.data;
425
+ const existingRefs = Array.isArray(existingFm.source_refs) ? existingFm.source_refs.map(String) : [];
420
426
  const updatedFm = {
421
- ...parsed.data,
427
+ ...existingFm,
422
428
  generation: computeMergedGeneration(sourceGenerations),
429
+ source_refs: [...new Set([...existingRefs, ...provenanceRefs])],
423
430
  };
424
- if (!updatedFm.source_refs) {
425
- updatedFm.source_refs = allParticipants;
426
- }
427
431
  return assembleAssetFromString(serializeFrontmatter(updatedFm), parsed.content);
428
432
  }
429
433
  catch {
@@ -513,17 +517,27 @@ function resolveConsolidateLlmConfig(config) {
513
517
  // ── Judged-state cache (#581) ────────────────────────────────────────────────
514
518
  /**
515
519
  * Stable content hash for a memory file used by the judged-state cache (#581)
516
- * and the body-embedding cache (WS-3a). Uses `cacheHash` from dedup.ts:
517
- * sha256 of the case-preserving stripped body. Two memories that differ only
518
- * in frontmatter (`updated:`, `inferenceProcessed:`) hash identically, so a
519
- * cosmetic frontmatter touch never forces a needless re-judge — only a body
520
- * change does. Returns `undefined` on any read/parse error so callers fail
521
- * open (treat the memory as un-cached → it stays in the LLM pool).
520
+ * and the body-embedding cache (WS-3a). Uses `cacheHash` from dedup.ts
521
+ * (sha256 of the case-preserving stripped body) plus the sorted `tags` list,
522
+ * so semantic-metadata drift re-enters the judge while cosmetic frontmatter
523
+ * touches (`updated:`, `inferenceProcessed:`) still hash identically and never
524
+ * force a needless re-judge. Returns `undefined` on any read/parse error so
525
+ * callers fail open (treat the memory as un-cached → it stays in the LLM pool).
522
526
  */
523
527
  function computeMemoryContentHash(filePath) {
524
528
  try {
525
529
  const raw = fs.readFileSync(filePath, "utf8");
526
- return cacheHash(raw);
530
+ let tagSuffix = "";
531
+ try {
532
+ const { data } = parseFrontmatter(raw);
533
+ const tags = Array.isArray(data?.tags) ? data.tags.map(String).sort() : [];
534
+ if (tags.length > 0)
535
+ tagSuffix = `\n\u0000tags:${tags.join(",")}`;
536
+ }
537
+ catch {
538
+ // Unparseable frontmatter → body-only hash (prior behaviour).
539
+ }
540
+ return cacheHash(raw + tagSuffix);
527
541
  }
528
542
  catch {
529
543
  return undefined;
@@ -589,19 +603,10 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
589
603
  warnings.push(`Pre-flight: filtered ${staleCount} stale DB entr${staleCount === 1 ? "y" : "ies"} (file absent on disk) from memory pool before chunking.`);
590
604
  }
591
605
  memories = memories.filter((m) => fs.existsSync(m.filePath));
592
- // ── WS-3b Step 0a: Homeostatic demotion ────────────────────────────────────
593
- // DEFAULT OFF. Before any LLM merge, demote retrievalSalience in state.db
594
- // for stale/low-value assets so the merge pool is bounded and high-SNR.
595
- // Demotion is state.db-only (file content untouched); re-promotable on
596
- // re-retrieval. Only fires when `homeostaticDemotion.enabled === true`.
597
- const homeostaticConfig = config.profiles?.improve?.default?.processes?.consolidate?.homeostaticDemotion ?? {};
598
- if (homeostaticConfig.enabled && sharedStateDb) {
599
- const demotionResult = runHomeostaticDemotion(sharedStateDb, homeostaticConfig);
600
- if (demotionResult.demoted > 0) {
601
- warnings.push(`Homeostatic demotion: demoted retrievalSalience for ${demotionResult.demoted} stale asset(s) before merge pool assembly.`);
602
- }
603
- warnings.push(...demotionResult.warnings);
604
- }
606
+ // (The former WS-3b Step 0a homeostatic demotion pass was removed — R4:
607
+ // it was default-off and self-undoing (the next salience recompute
608
+ // unconditionally overwrote the demoted values). Continuous decay now lives
609
+ // in computeSalience's recency term, whose floor decays on a long half-life.)
605
610
  // ── WS-3b Step 0c: Filter hot-probation assets from LLM merge pool ─────────
606
611
  // Hot-probation assets (system-generated, not yet graduated from intake pass)
607
612
  // are processed by the dedup pre-pass but excluded from the LLM clustering.
@@ -842,11 +847,11 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
842
847
  // A small fraction (default 5%) of the pool is shuffled into random positions
843
848
  // so the pipeline isn't PURELY similarity-driven. This prevents rich-get-richer
844
849
  // entrenchment where only the most-retrieved assets ever get consolidated.
845
- // DEFAULT OFFgated on antiCollapse.enabled.
850
+ // DEFAULT ON since R5 opt out via antiCollapse.enabled: false.
846
851
  let finalClusteredMemories = clusteredMemories;
847
852
  {
848
853
  const antiCollapseForCluster = config.profiles?.improve?.default?.processes?.consolidate?.antiCollapse ?? {};
849
- if (antiCollapseForCluster.enabled && clusteredMemories.length > 2) {
854
+ if (antiCollapseForCluster.enabled !== false && clusteredMemories.length > 2) {
850
855
  const fraction = antiCollapseForCluster.randomClusterFraction ?? 0.05;
851
856
  const randomCount = Math.max(1, Math.floor(clusteredMemories.length * fraction));
852
857
  // Pick `randomCount` positions to inject random (un-clustered) members.
@@ -1296,6 +1301,7 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
1296
1301
  let deleted = 0;
1297
1302
  const promoted = [];
1298
1303
  let contradicted = 0; // C-3 / #382: count of contradiction edges written
1304
+ let mergeFloorViolations = 0; // R5 §4.2: advisory merge-information-floor failures
1299
1305
  // Within-run dedup: track source refs for which a promote proposal was
1300
1306
  // already created this run. The LLM can return multiple promote ops for
1301
1307
  // different source memories that happen to have identical content (all are
@@ -1453,27 +1459,32 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
1453
1459
  continue;
1454
1460
  }
1455
1461
  // WS-3b: Anti-collapse generation guard (step 8a).
1456
- // DEFAULT OFF. When antiCollapse.enabled, refuse to merge two assets both
1457
- // above generation N (default 2). This prevents the pipeline from
1458
- // building ever-deeper LLM-merged trees that lose the source fidelity
1459
- // of the original episodes.
1462
+ // DEFAULT ON since R5 (opt out via antiCollapse.enabled: false). Refuses
1463
+ // to merge two assets both above generation N (default 2) prevents the
1464
+ // pipeline from building ever-deeper LLM-merged trees that lose the
1465
+ // source fidelity of the original episodes.
1460
1466
  const antiCollapseConfig = config.profiles?.improve?.default?.processes?.consolidate?.antiCollapse ??
1461
1467
  {};
1462
- if (antiCollapseConfig.enabled) {
1468
+ if (antiCollapseConfig.enabled !== false) {
1463
1469
  const allParticipants = [op.primary, ...op.secondaries];
1464
- const sourceGenerations = allParticipants.map((ref) => {
1470
+ // One read per participant: generation counter, stripped body (for the
1471
+ // information floor), and existing source_refs (for the provenance union).
1472
+ const participantInfo = allParticipants.map((ref) => {
1465
1473
  const e = memoryByRef.get(ref);
1466
1474
  if (!e)
1467
- return 0;
1475
+ return { ref, generation: 0, body: "", sourceRefs: [] };
1468
1476
  try {
1469
1477
  const raw = fs.readFileSync(e.filePath, "utf8");
1470
1478
  const parsed = parseFrontmatter(raw);
1471
- return readAssetGeneration(parsed.data);
1479
+ const fm = parsed.data;
1480
+ const sourceRefs = Array.isArray(fm.source_refs) ? fm.source_refs.map(String) : [];
1481
+ return { ref, generation: readAssetGeneration(fm), body: stripFrontmatterBody(raw), sourceRefs };
1472
1482
  }
1473
1483
  catch {
1474
- return 0;
1484
+ return { ref, generation: 0, body: "", sourceRefs: [] };
1475
1485
  }
1476
1486
  });
1487
+ const sourceGenerations = participantInfo.map((p) => p.generation);
1477
1488
  const generationCheck = checkGenerationGuard(sourceGenerations, antiCollapseConfig);
1478
1489
  if (generationCheck.refused) {
1479
1490
  warnings.push(`Merge: ${generationCheck.reason}`);
@@ -1483,20 +1494,7 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
1483
1494
  // WS-3b: Lexical diversity check (step 8b).
1484
1495
  // Low n-gram diversity ⇒ likely correlated-extraction artifact; raise merge threshold.
1485
1496
  if (antiCollapseConfig.lexicalDiversityCheck !== false) {
1486
- const bodies = allParticipants
1487
- .map((ref) => {
1488
- const e = memoryByRef.get(ref);
1489
- if (!e)
1490
- return "";
1491
- try {
1492
- const raw = fs.readFileSync(e.filePath, "utf8");
1493
- return stripFrontmatterBody(raw);
1494
- }
1495
- catch {
1496
- return "";
1497
- }
1498
- })
1499
- .filter((b) => b.length > 0);
1497
+ const bodies = participantInfo.map((p) => p.body).filter((b) => b.length > 0);
1500
1498
  const diversityCheck = checkLexicalDiversity(bodies, antiCollapseConfig);
1501
1499
  if (diversityCheck.lowDiversity) {
1502
1500
  // Low-diversity cluster: just warn (don't refuse merge since the dedup
@@ -1505,8 +1503,27 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
1505
1503
  }
1506
1504
  }
1507
1505
  // Inject generation counter into merged content frontmatter (step 8a).
1508
- // merged.generation = max(sourceGenerations) + 1.
1509
- mergedContent = injectGenerationFrontmatter(mergedContent, sourceGenerations, allParticipants);
1506
+ // merged.generation = max(sourceGenerations) + 1. source_refs is the
1507
+ // UNION of participants + everything they already cited (R5 §4.2
1508
+ // the old set-if-absent behavior dropped second-generation provenance).
1509
+ const provenanceUnion = [...new Set([...allParticipants, ...participantInfo.flatMap((p) => p.sourceRefs)])];
1510
+ mergedContent = injectGenerationFrontmatter(mergedContent, sourceGenerations, provenanceUnion);
1511
+ // R5 §4.2: merge-information floor — ADVISORY in v1. A merge that
1512
+ // shrinks provenance or genericizes below the retention floor is
1513
+ // counted + warned, never refused (promotion path: design doc §7).
1514
+ try {
1515
+ const mergedParsed = parseFrontmatter(mergedContent);
1516
+ const mergedFm = mergedParsed.data;
1517
+ const mergedSourceRefs = Array.isArray(mergedFm.source_refs) ? mergedFm.source_refs.map(String) : [];
1518
+ const floorCheck = checkMergeInformationFloor(mergedParsed.content, mergedSourceRefs, participantInfo, antiCollapseConfig);
1519
+ if (!floorCheck.passed) {
1520
+ mergeFloorViolations++;
1521
+ warnings.push(`Merge: information floor advisory for ${op.primary}: ${floorCheck.reason ?? "unspecified"} — merge proceeds (v1 observe-only).`);
1522
+ }
1523
+ }
1524
+ catch {
1525
+ // Floor measurement is best-effort; never blocks the merge path.
1526
+ }
1510
1527
  }
1511
1528
  // Backup secondaries before deleting
1512
1529
  for (const secRef of op.secondaries) {
@@ -1876,6 +1893,7 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
1876
1893
  deleted: deleted + dedupCollapsed,
1877
1894
  promoted,
1878
1895
  contradicted,
1896
+ mergeFloorViolations,
1879
1897
  failedChunks: totalChunksFailed,
1880
1898
  totalChunks: chunks.length,
1881
1899
  judgedNoAction,