akm-cli 0.9.0-beta.52 → 0.9.0-beta.53
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/hints/cli-hints-full.md +6 -5
- package/dist/cli.js +0 -7
- package/dist/commands/env/env-cli.js +3 -2
- package/dist/commands/env/env.js +14 -67
- package/dist/commands/health/checks.js +28 -15
- package/dist/commands/health.js +68 -1
- package/dist/commands/improve/collapse-detector.js +419 -0
- package/dist/commands/improve/consolidate.js +72 -54
- package/dist/commands/improve/distill.js +79 -13
- package/dist/commands/improve/extract.js +13 -6
- package/dist/commands/improve/homeostatic.js +109 -79
- package/dist/commands/improve/improve-cli.js +67 -1
- package/dist/commands/improve/improve.js +10 -0
- package/dist/commands/improve/loop-stages.js +39 -1
- package/dist/commands/improve/outcome-loop.js +15 -3
- package/dist/commands/improve/preparation.js +17 -8
- package/dist/commands/improve/salience.js +49 -32
- package/dist/commands/read/curate.js +5 -9
- package/dist/commands/read/knowledge.js +4 -0
- package/dist/commands/read/search.js +5 -2
- package/dist/commands/read/show.js +3 -3
- package/dist/core/asset/asset-spec.js +3 -2
- package/dist/core/config/config-schema.js +39 -17
- package/dist/core/eval/rank-metrics.js +113 -0
- package/dist/core/state/migrations.js +56 -0
- package/dist/core/state-db.js +146 -19
- package/dist/indexer/ensure-index.js +33 -90
- package/dist/indexer/index-writer-lock.js +0 -11
- package/dist/indexer/index-written-assets.js +105 -0
- package/dist/indexer/passes/metadata.js +20 -0
- package/dist/indexer/search/db-search.js +29 -1
- package/dist/indexer/search/ranking-contributors.js +33 -1
- package/dist/indexer/search/ranking.js +66 -0
- package/dist/indexer/search/search-fields.js +6 -0
- package/dist/llm/feature-gate.js +6 -2
- package/dist/output/renderers.js +8 -13
- package/dist/output/shapes/helpers.js +0 -3
- package/dist/output/shapes/passthrough.js +1 -0
- package/dist/scripts/migrate-storage.js +152 -33
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +41 -18
- package/dist/storage/repositories/index-db.js +10 -1
- package/package.json +2 -4
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
|
+
/**
|
|
5
|
+
* R5 — Longitudinal collapse/churn detector
|
|
6
|
+
* (docs/design/improve-collapse-churn-detector-design.md).
|
|
7
|
+
*
|
|
8
|
+
* Detects the two measured failure modes of LLM-consolidated memory stores:
|
|
9
|
+
*
|
|
10
|
+
* COLLAPSE — repeated merges destroy information: canary retrieval recall
|
|
11
|
+
* downtrends, distinct-content entropy downtrends, or the store shrinks
|
|
12
|
+
* while generation counts rise.
|
|
13
|
+
* CHURN — real accepted-change volume with zero retrieval-visible or
|
|
14
|
+
* shape-visible movement (LLM budget burned rewriting to no effect).
|
|
15
|
+
*
|
|
16
|
+
* Hard invariants: deterministic only (FTS BM25 + hashing — never an LLM,
|
|
17
|
+
* never an embedding model); bounded storage (< 2 KB per qualifying cycle,
|
|
18
|
+
* 365-day retention); fail-open (an error warns and skips, never breaks an
|
|
19
|
+
* improve run); runs only on cycles where consolidate/recombine did work.
|
|
20
|
+
*
|
|
21
|
+
* Observe-only in v1: alerts land in `improve_cycle_metrics.alerts_json`, the
|
|
22
|
+
* events log (`collapse_detector_alert`), and the `akm health` advisory —
|
|
23
|
+
* nothing is ever blocked.
|
|
24
|
+
*
|
|
25
|
+
* @module collapse-detector
|
|
26
|
+
*/
|
|
27
|
+
import { randomBytes } from "node:crypto";
|
|
28
|
+
import { makeAssetRef } from "../../core/asset/asset-ref.js";
|
|
29
|
+
import { appendEvent } from "../../core/events.js";
|
|
30
|
+
import { deactivateCanarySet, getActiveCanaries, getCanariesBySetId, insertCanaries, insertCycleMetrics, listActiveCanarySetIds, queryRecentCycleMetrics, withStateDb, } from "../../core/state-db.js";
|
|
31
|
+
import { warn } from "../../core/warn.js";
|
|
32
|
+
import { closeDatabase, getAllEntries, openExistingDatabase, searchFts, } from "../../indexer/db/db.js";
|
|
33
|
+
import { computeBigramDiversity, DEFAULT_MAX_GENERATION } from "./homeostatic.js";
|
|
34
|
+
import { getAllRankScores } from "./salience.js";
|
|
35
|
+
// ── Defaults (mirrored in config-schema.ts ImproveCollapseDetectorSchema) ────
|
|
36
|
+
export const DEFAULT_CANARY_COUNT = 40; // owner-approved 30–50 range
|
|
37
|
+
export const DEFAULT_CANARY_K = 10;
|
|
38
|
+
export const DEFAULT_WINDOW_CYCLES = 5;
|
|
39
|
+
export const DEFAULT_RECALL_DROP_THRESHOLD = 0.15;
|
|
40
|
+
export const DEFAULT_ENTROPY_DROP_THRESHOLD = 0.05;
|
|
41
|
+
export const DEFAULT_CHURN_MIN_ACCEPTED = 25;
|
|
42
|
+
export const DEFAULT_RETENTION_DAYS = 365;
|
|
43
|
+
/** Deterministic bigram-diversity sample cap (cost bound at 10k assets). */
|
|
44
|
+
const DIVERSITY_SAMPLE_CAP = 2000;
|
|
45
|
+
/**
|
|
46
|
+
* Minimum merge-floor violations in one cycle before the advisory alert fires.
|
|
47
|
+
* The specificity floor is deliberately strict (Phase-1 tuning pending), so a
|
|
48
|
+
* couple of borderline merges per cycle must not flip `akm health` to warn —
|
|
49
|
+
* that alert fatigue would drown the real collapse signals.
|
|
50
|
+
*/
|
|
51
|
+
const MERGE_FLOOR_ALERT_MIN = 3;
|
|
52
|
+
/** The learning-store types the detector measures. */
|
|
53
|
+
const LEARNING_TYPES = new Set(["memory", "lesson", "knowledge"]);
|
|
54
|
+
// ── Canary set ────────────────────────────────────────────────────────────────
|
|
55
|
+
/** Deterministic query string for one anchor entry: name tokens + top tags + description head. */
|
|
56
|
+
function buildCanaryQuery(entry) {
|
|
57
|
+
const nameTokens = entry.entry.name.split(/[-_/.]+/).filter((t) => t.length > 1);
|
|
58
|
+
const tags = (entry.entry.tags ?? []).slice(0, 3);
|
|
59
|
+
const descriptionHead = (entry.entry.description ?? "").split(/\s+/).slice(0, 6);
|
|
60
|
+
const parts = [...nameTokens, ...tags, ...descriptionHead].filter((t) => t.length > 0);
|
|
61
|
+
return [...new Set(parts)].join(" ");
|
|
62
|
+
}
|
|
63
|
+
/** Build the mint candidate list (deterministic given index + salience tables). */
|
|
64
|
+
function buildMintList(stateDb, entries, cfg) {
|
|
65
|
+
const canaryCount = cfg.canaryCount ?? DEFAULT_CANARY_COUNT;
|
|
66
|
+
const rankScores = getAllRankScores(stateDb);
|
|
67
|
+
// NOTE: entryKey is stash-prefixed ("<stashDir>:type:name"); asset_salience
|
|
68
|
+
// and the canary scoring both key on the bare "type:name" ref.
|
|
69
|
+
const candidates = entries
|
|
70
|
+
.filter((e) => LEARNING_TYPES.has(e.entry.type))
|
|
71
|
+
.map((e) => {
|
|
72
|
+
const ref = makeAssetRef(e.entry.type, e.entry.name);
|
|
73
|
+
return { e, ref, score: rankScores.get(ref) ?? 0 };
|
|
74
|
+
})
|
|
75
|
+
.sort((a, b) => b.score - a.score || (a.ref < b.ref ? -1 : 1));
|
|
76
|
+
// Type-stratified top slice: ⅓ per learning type, backfill from global order.
|
|
77
|
+
const perType = Math.ceil(canaryCount / 3);
|
|
78
|
+
const picked = new Map();
|
|
79
|
+
for (const type of LEARNING_TYPES) {
|
|
80
|
+
let taken = 0;
|
|
81
|
+
for (const c of candidates) {
|
|
82
|
+
if (taken >= perType || picked.size >= canaryCount)
|
|
83
|
+
break;
|
|
84
|
+
if (c.e.entry.type === type && !picked.has(c.ref)) {
|
|
85
|
+
picked.set(c.ref, c);
|
|
86
|
+
taken++;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
for (const c of candidates) {
|
|
91
|
+
if (picked.size >= canaryCount)
|
|
92
|
+
break;
|
|
93
|
+
if (!picked.has(c.ref))
|
|
94
|
+
picked.set(c.ref, c);
|
|
95
|
+
}
|
|
96
|
+
return [...picked.values()]
|
|
97
|
+
.map((c) => ({ anchorRef: c.ref, query: buildCanaryQuery(c.e) }))
|
|
98
|
+
.filter((c) => c.query.length > 0);
|
|
99
|
+
}
|
|
100
|
+
/** Collision-safe mint token (same-millisecond mints happen in tests + concurrent runs). */
|
|
101
|
+
function newCanarySetId() {
|
|
102
|
+
return `canary-${Date.now().toString(36)}-${randomBytes(2).toString("hex")}`;
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Mint (or return) the active canary set. Deterministic given the index +
|
|
106
|
+
* salience tables: rank the three learning types by `asset_salience.rank_score`
|
|
107
|
+
* (fallback 0, tie-broken by ref), take a type-stratified top slice
|
|
108
|
+
* (⅓ per type, backfilled from the global ranking when a type is short).
|
|
109
|
+
*
|
|
110
|
+
* Returns `null` when the index has no mintable learning entries — a cycle
|
|
111
|
+
* with no canary set is NOT recorded (a fresh unused set id every cycle would
|
|
112
|
+
* mean the trend window never fills and recall reads as a fake 0).
|
|
113
|
+
*
|
|
114
|
+
* NEVER auto-refreshes: once minted the set is frozen until an explicit
|
|
115
|
+
* `akm improve canary --refresh` — silent re-baselining is how a slow collapse
|
|
116
|
+
* hides. Rows are read back BY OUR OWN set id (never "newest active") so a
|
|
117
|
+
* concurrent mint in another process cannot relabel this run's metrics.
|
|
118
|
+
*/
|
|
119
|
+
export function ensureCanarySet(stateDb, indexDb, cfg, preloadedEntries) {
|
|
120
|
+
const existing = getActiveCanaries(stateDb);
|
|
121
|
+
if (existing.length > 0) {
|
|
122
|
+
return { canarySetId: existing[0].canary_set_id, canaries: existing };
|
|
123
|
+
}
|
|
124
|
+
const minted = buildMintList(stateDb, preloadedEntries ?? getAllEntries(indexDb), cfg);
|
|
125
|
+
if (minted.length === 0)
|
|
126
|
+
return null;
|
|
127
|
+
const canarySetId = newCanarySetId();
|
|
128
|
+
insertCanaries(stateDb, canarySetId, minted);
|
|
129
|
+
return { canarySetId, canaries: getCanariesBySetId(stateDb, canarySetId) };
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Explicit canary re-mint (the ONLY refresh path — `akm improve canary
|
|
133
|
+
* --refresh`). Mint-first, deactivate-after: when the index is empty or
|
|
134
|
+
* unreadable the current baseline is left untouched instead of destroyed.
|
|
135
|
+
* Deactivates ALL other active sets (not just the newest) so stragglers from
|
|
136
|
+
* an interrupted refresh can never resurrect.
|
|
137
|
+
*/
|
|
138
|
+
export function refreshCanarySet(stateDb, indexDb, cfg) {
|
|
139
|
+
const minted = buildMintList(stateDb, getAllEntries(indexDb), cfg);
|
|
140
|
+
if (minted.length === 0)
|
|
141
|
+
return null; // nothing mintable — keep the old baseline
|
|
142
|
+
const canarySetId = newCanarySetId();
|
|
143
|
+
insertCanaries(stateDb, canarySetId, minted);
|
|
144
|
+
for (const oldSetId of listActiveCanarySetIds(stateDb)) {
|
|
145
|
+
if (oldSetId !== canarySetId)
|
|
146
|
+
deactivateCanarySet(stateDb, oldSetId);
|
|
147
|
+
}
|
|
148
|
+
return { canarySetId, canaries: getCanariesBySetId(stateDb, canarySetId) };
|
|
149
|
+
}
|
|
150
|
+
// ── Cycle metrics ─────────────────────────────────────────────────────────────
|
|
151
|
+
/**
|
|
152
|
+
* Name-free content fingerprint text for entropy metrics. The indexed
|
|
153
|
+
* search_text EMBEDS the (unique) entry name, which would pin the
|
|
154
|
+
* distinct-content ratio at 1.0 forever; convergence shows up in the
|
|
155
|
+
* description/tags/heading fields, so those are what get hashed. (The raw body
|
|
156
|
+
* is not in the index at all — search_text covers metadata + TOC headings —
|
|
157
|
+
* so v1 entropy is measured over the searchable surface, which is also what
|
|
158
|
+
* generic merged assets converge on.)
|
|
159
|
+
*/
|
|
160
|
+
function contentFingerprint(entry) {
|
|
161
|
+
const parts = [entry.description ?? "", (entry.tags ?? []).join(" "), (entry.toc ?? []).map((h) => h.text).join(" ")];
|
|
162
|
+
return parts.filter((t) => t.length > 0).join(" ");
|
|
163
|
+
}
|
|
164
|
+
/** FNV-1a 64-bit over lowercased whitespace-collapsed text (distinct-content hashing). */
|
|
165
|
+
export function normHash(text) {
|
|
166
|
+
const normalized = text.toLowerCase().replace(/\s+/g, " ").trim();
|
|
167
|
+
let hash = 0xcbf29ce484222325n;
|
|
168
|
+
const prime = 0x100000001b3n;
|
|
169
|
+
for (let i = 0; i < normalized.length; i++) {
|
|
170
|
+
hash ^= BigInt(normalized.charCodeAt(i));
|
|
171
|
+
hash = (hash * prime) & 0xffffffffffffffffn;
|
|
172
|
+
}
|
|
173
|
+
return hash.toString(16);
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Score one canary against the live index, merge-following via `source_refs`:
|
|
177
|
+
* a hit is the anchor ref itself OR any returned entry whose `source_refs`
|
|
178
|
+
* frontmatter contains the anchor (ONE level — provenance dropped on a
|
|
179
|
+
* second-generation merge is a miss by design; that IS the information loss).
|
|
180
|
+
* Returns the 0-based rank of the first hit, or -1.
|
|
181
|
+
*/
|
|
182
|
+
function scoreCanary(indexDb, canary, k) {
|
|
183
|
+
const results = searchFts(indexDb, canary.query, k);
|
|
184
|
+
for (let i = 0; i < Math.min(results.length, k); i++) {
|
|
185
|
+
const r = results[i];
|
|
186
|
+
const ref = makeAssetRef(r.entry.type, r.entry.name);
|
|
187
|
+
if (ref === canary.anchor_ref)
|
|
188
|
+
return i;
|
|
189
|
+
if (r.entry.sourceRefs?.includes(canary.anchor_ref))
|
|
190
|
+
return i;
|
|
191
|
+
}
|
|
192
|
+
return -1;
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* Compute one qualifying cycle's store-health snapshot. One `entries` scan +
|
|
196
|
+
* `canaryCount` FTS queries; no LLM, no embedding model, no filesystem reads.
|
|
197
|
+
* Returns `null` when no canary set exists AND none is mintable (empty index)
|
|
198
|
+
* — such a cycle is not measurable and must not be recorded.
|
|
199
|
+
*/
|
|
200
|
+
export function computeCycleMetrics(stateDb, indexDb, args) {
|
|
201
|
+
const k = args.cfg.k ?? DEFAULT_CANARY_K;
|
|
202
|
+
const maxGeneration = args.maxGeneration ?? DEFAULT_MAX_GENERATION;
|
|
203
|
+
// Single entries scan — shared by the canary mint (if one is needed) and
|
|
204
|
+
// the store-shape metrics below.
|
|
205
|
+
const all = getAllEntries(indexDb);
|
|
206
|
+
const canarySet = ensureCanarySet(stateDb, indexDb, args.cfg, all);
|
|
207
|
+
if (canarySet === null)
|
|
208
|
+
return null;
|
|
209
|
+
const { canarySetId, canaries } = canarySet;
|
|
210
|
+
// ── Canary retrieval metrics ───────────────────────────────────────────────
|
|
211
|
+
const ranks = [];
|
|
212
|
+
let recallSum = 0;
|
|
213
|
+
let ndcgSum = 0;
|
|
214
|
+
let mrrSum = 0;
|
|
215
|
+
for (const canary of canaries) {
|
|
216
|
+
const rank = scoreCanary(indexDb, canary, k);
|
|
217
|
+
ranks.push([canary.id, rank]);
|
|
218
|
+
if (rank >= 0) {
|
|
219
|
+
recallSum += 1;
|
|
220
|
+
mrrSum += 1 / (rank + 1);
|
|
221
|
+
// Single-relevant nDCG@k closed form: ideal DCG is 1, so the score is
|
|
222
|
+
// just the discount at the hit rank.
|
|
223
|
+
ndcgSum += 1 / Math.log2(rank + 2);
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
const n = Math.max(1, canaries.length);
|
|
227
|
+
// ── Store-shape metrics (same single entries scan) ────────────────────────
|
|
228
|
+
const byType = new Map();
|
|
229
|
+
const contentHashes = new Set();
|
|
230
|
+
let learningTotal = 0;
|
|
231
|
+
let overGeneration = 0;
|
|
232
|
+
const learningTexts = [];
|
|
233
|
+
for (const e of all) {
|
|
234
|
+
byType.set(e.entry.type, (byType.get(e.entry.type) ?? 0) + 1);
|
|
235
|
+
if (!LEARNING_TYPES.has(e.entry.type))
|
|
236
|
+
continue;
|
|
237
|
+
learningTotal++;
|
|
238
|
+
const fingerprint = contentFingerprint(e.entry);
|
|
239
|
+
contentHashes.add(normHash(fingerprint));
|
|
240
|
+
if ((e.entry.generation ?? 0) > maxGeneration)
|
|
241
|
+
overGeneration++;
|
|
242
|
+
learningTexts.push({ key: e.entryKey, text: fingerprint });
|
|
243
|
+
}
|
|
244
|
+
// Deterministic diversity sample: sort by entryKey, take every ⌈N/cap⌉-th row.
|
|
245
|
+
learningTexts.sort((a, b) => (a.key < b.key ? -1 : 1));
|
|
246
|
+
const step = Math.max(1, Math.ceil(learningTexts.length / DIVERSITY_SAMPLE_CAP));
|
|
247
|
+
let diversitySum = 0;
|
|
248
|
+
let diversityCount = 0;
|
|
249
|
+
for (let i = 0; i < learningTexts.length; i += step) {
|
|
250
|
+
diversitySum += computeBigramDiversity(learningTexts[i].text);
|
|
251
|
+
diversityCount++;
|
|
252
|
+
}
|
|
253
|
+
return {
|
|
254
|
+
run_id: args.runId,
|
|
255
|
+
ts: (args.now ?? new Date()).toISOString(),
|
|
256
|
+
pass: args.pass,
|
|
257
|
+
canary_set_id: canarySetId,
|
|
258
|
+
mean_recall: recallSum / n,
|
|
259
|
+
mean_ndcg: ndcgSum / n,
|
|
260
|
+
mean_mrr: mrrSum / n,
|
|
261
|
+
canary_ranks_json: JSON.stringify(ranks),
|
|
262
|
+
store_total: learningTotal,
|
|
263
|
+
store_by_type_json: JSON.stringify(Object.fromEntries([...byType.entries()].sort())),
|
|
264
|
+
distinct_content_ratio: learningTotal === 0 ? 1 : contentHashes.size / learningTotal,
|
|
265
|
+
mean_bigram_diversity: diversityCount === 0 ? 1 : diversitySum / diversityCount,
|
|
266
|
+
over_generation_count: overGeneration,
|
|
267
|
+
accepted_actions: args.acceptedActions,
|
|
268
|
+
merge_floor_violations: args.mergeFloorViolations,
|
|
269
|
+
alerts_json: "[]",
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
// ── Alert evaluation (pure) ───────────────────────────────────────────────────
|
|
273
|
+
function median(values) {
|
|
274
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
275
|
+
const mid = Math.floor(sorted.length / 2);
|
|
276
|
+
return sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid];
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Evaluate the §1 alert definitions. PURE — history rows (oldest-first, NOT
|
|
280
|
+
* including `current`) plus the current row in, alerts out. A window shorter
|
|
281
|
+
* than `windowCycles` never fires (no baseline yet); the merge-floor advisory
|
|
282
|
+
* is per-cycle and fires regardless of window depth.
|
|
283
|
+
*/
|
|
284
|
+
export function evaluateCollapseAlerts(history, current, cfg) {
|
|
285
|
+
const alerts = [];
|
|
286
|
+
// MERGE-FLOOR advisory: per-cycle, window-independent. Gated on a minimum
|
|
287
|
+
// count — the specificity floor is deliberately strict pre-tuning, and one
|
|
288
|
+
// or two borderline merges per cycle must not generate alert fatigue.
|
|
289
|
+
if (current.merge_floor_violations >= MERGE_FLOOR_ALERT_MIN) {
|
|
290
|
+
alerts.push({
|
|
291
|
+
kind: "merge-floor",
|
|
292
|
+
detail: `${current.merge_floor_violations} merge(s) failed the information floor this cycle (provenance shrank or specificity below threshold)`,
|
|
293
|
+
metrics: { mergeFloorViolations: current.merge_floor_violations },
|
|
294
|
+
});
|
|
295
|
+
}
|
|
296
|
+
const W = cfg.windowCycles ?? DEFAULT_WINDOW_CYCLES;
|
|
297
|
+
const hist = history.slice(-W);
|
|
298
|
+
if (hist.length < W)
|
|
299
|
+
return alerts; // no baseline yet
|
|
300
|
+
const recallDrop = cfg.recallDropThreshold ?? DEFAULT_RECALL_DROP_THRESHOLD;
|
|
301
|
+
const entropyDrop = cfg.entropyDropThreshold ?? DEFAULT_ENTROPY_DROP_THRESHOLD;
|
|
302
|
+
const churnMin = cfg.churnMinAcceptedActions ?? DEFAULT_CHURN_MIN_ACCEPTED;
|
|
303
|
+
// COLLAPSE 1 — canary recall drop vs window median (median, not previous
|
|
304
|
+
// cycle, so one noisy cycle can neither fire nor mask the alert).
|
|
305
|
+
const medianRecall = median(hist.map((h) => h.mean_recall));
|
|
306
|
+
if (current.mean_recall <= medianRecall - recallDrop) {
|
|
307
|
+
alerts.push({
|
|
308
|
+
kind: "collapse-recall",
|
|
309
|
+
detail: `mean canary recall ${current.mean_recall.toFixed(3)} dropped ≥${recallDrop} below the ${W}-cycle median ${medianRecall.toFixed(3)}`,
|
|
310
|
+
metrics: { currentRecall: current.mean_recall, medianRecall, threshold: recallDrop },
|
|
311
|
+
});
|
|
312
|
+
}
|
|
313
|
+
// COLLAPSE 2 — monotonic distinct-content-ratio decline over the window.
|
|
314
|
+
const series = [...hist.map((h) => h.distinct_content_ratio), current.distinct_content_ratio];
|
|
315
|
+
const monotonicNonIncreasing = series.every((v, i) => i === 0 || v <= series[i - 1]);
|
|
316
|
+
const totalDecline = hist[0].distinct_content_ratio - current.distinct_content_ratio;
|
|
317
|
+
if (monotonicNonIncreasing && totalDecline >= entropyDrop) {
|
|
318
|
+
alerts.push({
|
|
319
|
+
kind: "collapse-entropy",
|
|
320
|
+
detail: `distinct-content ratio declined monotonically by ${totalDecline.toFixed(3)} (≥${entropyDrop}) over ${W} cycles — store content is converging`,
|
|
321
|
+
metrics: {
|
|
322
|
+
windowStart: hist[0].distinct_content_ratio,
|
|
323
|
+
current: current.distinct_content_ratio,
|
|
324
|
+
decline: totalDecline,
|
|
325
|
+
},
|
|
326
|
+
});
|
|
327
|
+
}
|
|
328
|
+
// COLLAPSE 3 — store shrinking BECAUSE of re-merging (not deletion hygiene).
|
|
329
|
+
const maxStore = Math.max(...hist.map((h) => h.store_total));
|
|
330
|
+
if (current.store_total < 0.8 * maxStore && current.over_generation_count > hist[0].over_generation_count) {
|
|
331
|
+
alerts.push({
|
|
332
|
+
kind: "collapse-shrink",
|
|
333
|
+
detail: `store shrank >20% (${current.store_total} vs window max ${maxStore}) while over-generation count rose (${hist[0].over_generation_count} → ${current.over_generation_count})`,
|
|
334
|
+
metrics: {
|
|
335
|
+
storeTotal: current.store_total,
|
|
336
|
+
windowMax: maxStore,
|
|
337
|
+
overGeneration: current.over_generation_count,
|
|
338
|
+
},
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
// CHURN — real write volume, zero retrieval- or shape-visible movement.
|
|
342
|
+
// Flatness is measured against the window MEDIAN (consistent with the
|
|
343
|
+
// recall rule): endpoint-only comparison would call a window that swung
|
|
344
|
+
// wildly but happened to land near its start "flat".
|
|
345
|
+
const acceptedSum = hist.reduce((a, h) => a + h.accepted_actions, 0);
|
|
346
|
+
const scoreFlat = Math.abs(current.mean_ndcg - median(hist.map((h) => h.mean_ndcg))) < 0.02;
|
|
347
|
+
const entropyFlat = Math.abs(current.distinct_content_ratio - median(hist.map((h) => h.distinct_content_ratio))) < 0.02;
|
|
348
|
+
if (acceptedSum >= churnMin && scoreFlat && entropyFlat) {
|
|
349
|
+
alerts.push({
|
|
350
|
+
kind: "churn",
|
|
351
|
+
detail: `${acceptedSum} accepted actions over ${W} cycles with flat canary score and flat entropy — write volume with no retrieval-visible effect`,
|
|
352
|
+
metrics: { acceptedSum, ndcgDelta: current.mean_ndcg - hist[0].mean_ndcg },
|
|
353
|
+
});
|
|
354
|
+
}
|
|
355
|
+
return alerts;
|
|
356
|
+
}
|
|
357
|
+
// ── Orchestrator ─────────────────────────────────────────────────────────────
|
|
358
|
+
/**
|
|
359
|
+
* Run the detector for one qualifying cycle: ensure canaries → compute →
|
|
360
|
+
* evaluate against stored history → persist the row → append one
|
|
361
|
+
* `collapse_detector_alert` event per fired alert. FAIL-OPEN: any error warns
|
|
362
|
+
* and returns undefined — an improve run is never broken by its own
|
|
363
|
+
* instrumentation.
|
|
364
|
+
*/
|
|
365
|
+
export function runCollapseDetector(args) {
|
|
366
|
+
const cfg = args.config.improve?.collapseDetector ?? {};
|
|
367
|
+
if (cfg.enabled === false)
|
|
368
|
+
return undefined;
|
|
369
|
+
try {
|
|
370
|
+
let indexDb;
|
|
371
|
+
try {
|
|
372
|
+
indexDb = openExistingDatabase(args.indexDbPath);
|
|
373
|
+
const db = indexDb;
|
|
374
|
+
// Over-generation threshold mirrors the guard actually in effect —
|
|
375
|
+
// reading the same config key keeps the two aligned when tuned.
|
|
376
|
+
const antiCollapse = args.config.profiles?.improve?.default?.processes?.consolidate?.antiCollapse;
|
|
377
|
+
const maxGeneration = antiCollapse?.maxGeneration ?? DEFAULT_MAX_GENERATION;
|
|
378
|
+
return withStateDb((stateDb) => {
|
|
379
|
+
const row = computeCycleMetrics(stateDb, db, {
|
|
380
|
+
runId: args.runId,
|
|
381
|
+
pass: args.pass,
|
|
382
|
+
acceptedActions: args.acceptedActions,
|
|
383
|
+
mergeFloorViolations: args.mergeFloorViolations,
|
|
384
|
+
cfg,
|
|
385
|
+
maxGeneration,
|
|
386
|
+
});
|
|
387
|
+
if (row === null)
|
|
388
|
+
return undefined; // empty index — nothing to measure
|
|
389
|
+
const windowCycles = cfg.windowCycles ?? DEFAULT_WINDOW_CYCLES;
|
|
390
|
+
const history = queryRecentCycleMetrics(stateDb, row.canary_set_id, windowCycles);
|
|
391
|
+
const alerts = evaluateCollapseAlerts(history, row, cfg);
|
|
392
|
+
row.alerts_json = JSON.stringify(alerts.map((a) => a.kind));
|
|
393
|
+
insertCycleMetrics(stateDb, row);
|
|
394
|
+
for (const alert of alerts) {
|
|
395
|
+
appendEvent({
|
|
396
|
+
eventType: "collapse_detector_alert",
|
|
397
|
+
ref: undefined,
|
|
398
|
+
metadata: {
|
|
399
|
+
kind: alert.kind,
|
|
400
|
+
detail: alert.detail,
|
|
401
|
+
metrics: alert.metrics,
|
|
402
|
+
canarySetId: row.canary_set_id,
|
|
403
|
+
runId: args.runId,
|
|
404
|
+
},
|
|
405
|
+
}, args.eventsCtx);
|
|
406
|
+
}
|
|
407
|
+
return row;
|
|
408
|
+
}, { path: args.eventsCtx?.dbPath, borrowed: args.eventsCtx?.db });
|
|
409
|
+
}
|
|
410
|
+
finally {
|
|
411
|
+
if (indexDb)
|
|
412
|
+
closeDatabase(indexDb);
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
catch (err) {
|
|
416
|
+
warn(`[collapse-detector] skipped (fail-open): ${err instanceof Error ? err.message : String(err)}`);
|
|
417
|
+
return undefined;
|
|
418
|
+
}
|
|
419
|
+
}
|
|
@@ -19,7 +19,7 @@ import { detectTruncatedDescription } from "../../core/text-truncation.js";
|
|
|
19
19
|
import { hasSupersededStatus, MERGE_ABSOLUTE_FLOOR_CHARS, MERGE_SHRINK_RATIO_MIN, validateProposalFrontmatter, } from "../proposal/validators/proposal-quality-validators.js";
|
|
20
20
|
import { createProposal, isProposalSkipped, listProposals } from "../proposal/validators/proposals.js";
|
|
21
21
|
import { cacheHash, runDeterministicDedup, stripFrontmatterBody } from "./dedup.js";
|
|
22
|
-
import { checkGenerationGuard, checkLexicalDiversity, computeMergedGeneration, readAssetGeneration,
|
|
22
|
+
import { checkGenerationGuard, checkLexicalDiversity, checkMergeInformationFloor, computeMergedGeneration, readAssetGeneration, shouldSkipHotProbationInLlm, } from "./homeostatic.js";
|
|
23
23
|
import { writeContradictEdge } from "./memory/memory-belief.js";
|
|
24
24
|
// Re-export the moved helpers so existing test imports continue to resolve.
|
|
25
25
|
export { hasSupersededStatus, validateProposalFrontmatter };
|
|
@@ -410,20 +410,24 @@ function backupFile(filePath, backupDir, name) {
|
|
|
410
410
|
}
|
|
411
411
|
// ── WS-3b: Generation frontmatter injection ───────────────────────────────────
|
|
412
412
|
/**
|
|
413
|
-
* Inject `generation`
|
|
413
|
+
* Inject `generation` and `source_refs` into merged content.
|
|
414
414
|
* generation = max(sourceGenerations) + 1.
|
|
415
|
+
* source_refs = UNION of the provided provenance refs (participants + their
|
|
416
|
+
* cited sources) with anything already present in the merged frontmatter —
|
|
417
|
+
* R5 §4.2: the old set-if-absent behavior dropped second-generation
|
|
418
|
+
* provenance whenever the LLM emitted its own (partial) source_refs.
|
|
415
419
|
* Fails open — returns original content if frontmatter can't be parsed.
|
|
416
420
|
*/
|
|
417
|
-
function injectGenerationFrontmatter(mergedContent, sourceGenerations,
|
|
421
|
+
function injectGenerationFrontmatter(mergedContent, sourceGenerations, provenanceRefs) {
|
|
418
422
|
try {
|
|
419
423
|
const parsed = parseFrontmatter(mergedContent);
|
|
424
|
+
const existingFm = parsed.data;
|
|
425
|
+
const existingRefs = Array.isArray(existingFm.source_refs) ? existingFm.source_refs.map(String) : [];
|
|
420
426
|
const updatedFm = {
|
|
421
|
-
...
|
|
427
|
+
...existingFm,
|
|
422
428
|
generation: computeMergedGeneration(sourceGenerations),
|
|
429
|
+
source_refs: [...new Set([...existingRefs, ...provenanceRefs])],
|
|
423
430
|
};
|
|
424
|
-
if (!updatedFm.source_refs) {
|
|
425
|
-
updatedFm.source_refs = allParticipants;
|
|
426
|
-
}
|
|
427
431
|
return assembleAssetFromString(serializeFrontmatter(updatedFm), parsed.content);
|
|
428
432
|
}
|
|
429
433
|
catch {
|
|
@@ -513,17 +517,27 @@ function resolveConsolidateLlmConfig(config) {
|
|
|
513
517
|
// ── Judged-state cache (#581) ────────────────────────────────────────────────
|
|
514
518
|
/**
|
|
515
519
|
* Stable content hash for a memory file used by the judged-state cache (#581)
|
|
516
|
-
* and the body-embedding cache (WS-3a). Uses `cacheHash` from dedup.ts
|
|
517
|
-
* sha256 of the case-preserving stripped body
|
|
518
|
-
*
|
|
519
|
-
*
|
|
520
|
-
*
|
|
521
|
-
* open (treat the memory as un-cached → it stays in the LLM pool).
|
|
520
|
+
* and the body-embedding cache (WS-3a). Uses `cacheHash` from dedup.ts
|
|
521
|
+
* (sha256 of the case-preserving stripped body) plus the sorted `tags` list,
|
|
522
|
+
* so semantic-metadata drift re-enters the judge while cosmetic frontmatter
|
|
523
|
+
* touches (`updated:`, `inferenceProcessed:`) still hash identically and never
|
|
524
|
+
* force a needless re-judge. Returns `undefined` on any read/parse error so
|
|
525
|
+
* callers fail open (treat the memory as un-cached → it stays in the LLM pool).
|
|
522
526
|
*/
|
|
523
527
|
function computeMemoryContentHash(filePath) {
|
|
524
528
|
try {
|
|
525
529
|
const raw = fs.readFileSync(filePath, "utf8");
|
|
526
|
-
|
|
530
|
+
let tagSuffix = "";
|
|
531
|
+
try {
|
|
532
|
+
const { data } = parseFrontmatter(raw);
|
|
533
|
+
const tags = Array.isArray(data?.tags) ? data.tags.map(String).sort() : [];
|
|
534
|
+
if (tags.length > 0)
|
|
535
|
+
tagSuffix = `\n\u0000tags:${tags.join(",")}`;
|
|
536
|
+
}
|
|
537
|
+
catch {
|
|
538
|
+
// Unparseable frontmatter → body-only hash (prior behaviour).
|
|
539
|
+
}
|
|
540
|
+
return cacheHash(raw + tagSuffix);
|
|
527
541
|
}
|
|
528
542
|
catch {
|
|
529
543
|
return undefined;
|
|
@@ -589,19 +603,10 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
589
603
|
warnings.push(`Pre-flight: filtered ${staleCount} stale DB entr${staleCount === 1 ? "y" : "ies"} (file absent on disk) from memory pool before chunking.`);
|
|
590
604
|
}
|
|
591
605
|
memories = memories.filter((m) => fs.existsSync(m.filePath));
|
|
592
|
-
//
|
|
593
|
-
//
|
|
594
|
-
//
|
|
595
|
-
//
|
|
596
|
-
// re-retrieval. Only fires when `homeostaticDemotion.enabled === true`.
|
|
597
|
-
const homeostaticConfig = config.profiles?.improve?.default?.processes?.consolidate?.homeostaticDemotion ?? {};
|
|
598
|
-
if (homeostaticConfig.enabled && sharedStateDb) {
|
|
599
|
-
const demotionResult = runHomeostaticDemotion(sharedStateDb, homeostaticConfig);
|
|
600
|
-
if (demotionResult.demoted > 0) {
|
|
601
|
-
warnings.push(`Homeostatic demotion: demoted retrievalSalience for ${demotionResult.demoted} stale asset(s) before merge pool assembly.`);
|
|
602
|
-
}
|
|
603
|
-
warnings.push(...demotionResult.warnings);
|
|
604
|
-
}
|
|
606
|
+
// (The former WS-3b Step 0a homeostatic demotion pass was removed — R4:
|
|
607
|
+
// it was default-off and self-undoing (the next salience recompute
|
|
608
|
+
// unconditionally overwrote the demoted values). Continuous decay now lives
|
|
609
|
+
// in computeSalience's recency term, whose floor decays on a long half-life.)
|
|
605
610
|
// ── WS-3b Step 0c: Filter hot-probation assets from LLM merge pool ─────────
|
|
606
611
|
// Hot-probation assets (system-generated, not yet graduated from intake pass)
|
|
607
612
|
// are processed by the dedup pre-pass but excluded from the LLM clustering.
|
|
@@ -842,11 +847,11 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
842
847
|
// A small fraction (default 5%) of the pool is shuffled into random positions
|
|
843
848
|
// so the pipeline isn't PURELY similarity-driven. This prevents rich-get-richer
|
|
844
849
|
// entrenchment where only the most-retrieved assets ever get consolidated.
|
|
845
|
-
// DEFAULT
|
|
850
|
+
// DEFAULT ON since R5 — opt out via antiCollapse.enabled: false.
|
|
846
851
|
let finalClusteredMemories = clusteredMemories;
|
|
847
852
|
{
|
|
848
853
|
const antiCollapseForCluster = config.profiles?.improve?.default?.processes?.consolidate?.antiCollapse ?? {};
|
|
849
|
-
if (antiCollapseForCluster.enabled && clusteredMemories.length > 2) {
|
|
854
|
+
if (antiCollapseForCluster.enabled !== false && clusteredMemories.length > 2) {
|
|
850
855
|
const fraction = antiCollapseForCluster.randomClusterFraction ?? 0.05;
|
|
851
856
|
const randomCount = Math.max(1, Math.floor(clusteredMemories.length * fraction));
|
|
852
857
|
// Pick `randomCount` positions to inject random (un-clustered) members.
|
|
@@ -1296,6 +1301,7 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
1296
1301
|
let deleted = 0;
|
|
1297
1302
|
const promoted = [];
|
|
1298
1303
|
let contradicted = 0; // C-3 / #382: count of contradiction edges written
|
|
1304
|
+
let mergeFloorViolations = 0; // R5 §4.2: advisory merge-information-floor failures
|
|
1299
1305
|
// Within-run dedup: track source refs for which a promote proposal was
|
|
1300
1306
|
// already created this run. The LLM can return multiple promote ops for
|
|
1301
1307
|
// different source memories that happen to have identical content (all are
|
|
@@ -1453,27 +1459,32 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
1453
1459
|
continue;
|
|
1454
1460
|
}
|
|
1455
1461
|
// WS-3b: Anti-collapse generation guard (step 8a).
|
|
1456
|
-
// DEFAULT
|
|
1457
|
-
// above generation N (default 2)
|
|
1458
|
-
// building ever-deeper LLM-merged trees that lose the
|
|
1459
|
-
// of the original episodes.
|
|
1462
|
+
// DEFAULT ON since R5 (opt out via antiCollapse.enabled: false). Refuses
|
|
1463
|
+
// to merge two assets both above generation N (default 2) — prevents the
|
|
1464
|
+
// pipeline from building ever-deeper LLM-merged trees that lose the
|
|
1465
|
+
// source fidelity of the original episodes.
|
|
1460
1466
|
const antiCollapseConfig = config.profiles?.improve?.default?.processes?.consolidate?.antiCollapse ??
|
|
1461
1467
|
{};
|
|
1462
|
-
if (antiCollapseConfig.enabled) {
|
|
1468
|
+
if (antiCollapseConfig.enabled !== false) {
|
|
1463
1469
|
const allParticipants = [op.primary, ...op.secondaries];
|
|
1464
|
-
|
|
1470
|
+
// One read per participant: generation counter, stripped body (for the
|
|
1471
|
+
// information floor), and existing source_refs (for the provenance union).
|
|
1472
|
+
const participantInfo = allParticipants.map((ref) => {
|
|
1465
1473
|
const e = memoryByRef.get(ref);
|
|
1466
1474
|
if (!e)
|
|
1467
|
-
return 0;
|
|
1475
|
+
return { ref, generation: 0, body: "", sourceRefs: [] };
|
|
1468
1476
|
try {
|
|
1469
1477
|
const raw = fs.readFileSync(e.filePath, "utf8");
|
|
1470
1478
|
const parsed = parseFrontmatter(raw);
|
|
1471
|
-
|
|
1479
|
+
const fm = parsed.data;
|
|
1480
|
+
const sourceRefs = Array.isArray(fm.source_refs) ? fm.source_refs.map(String) : [];
|
|
1481
|
+
return { ref, generation: readAssetGeneration(fm), body: stripFrontmatterBody(raw), sourceRefs };
|
|
1472
1482
|
}
|
|
1473
1483
|
catch {
|
|
1474
|
-
return 0;
|
|
1484
|
+
return { ref, generation: 0, body: "", sourceRefs: [] };
|
|
1475
1485
|
}
|
|
1476
1486
|
});
|
|
1487
|
+
const sourceGenerations = participantInfo.map((p) => p.generation);
|
|
1477
1488
|
const generationCheck = checkGenerationGuard(sourceGenerations, antiCollapseConfig);
|
|
1478
1489
|
if (generationCheck.refused) {
|
|
1479
1490
|
warnings.push(`Merge: ${generationCheck.reason}`);
|
|
@@ -1483,20 +1494,7 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
1483
1494
|
// WS-3b: Lexical diversity check (step 8b).
|
|
1484
1495
|
// Low n-gram diversity ⇒ likely correlated-extraction artifact; raise merge threshold.
|
|
1485
1496
|
if (antiCollapseConfig.lexicalDiversityCheck !== false) {
|
|
1486
|
-
const bodies =
|
|
1487
|
-
.map((ref) => {
|
|
1488
|
-
const e = memoryByRef.get(ref);
|
|
1489
|
-
if (!e)
|
|
1490
|
-
return "";
|
|
1491
|
-
try {
|
|
1492
|
-
const raw = fs.readFileSync(e.filePath, "utf8");
|
|
1493
|
-
return stripFrontmatterBody(raw);
|
|
1494
|
-
}
|
|
1495
|
-
catch {
|
|
1496
|
-
return "";
|
|
1497
|
-
}
|
|
1498
|
-
})
|
|
1499
|
-
.filter((b) => b.length > 0);
|
|
1497
|
+
const bodies = participantInfo.map((p) => p.body).filter((b) => b.length > 0);
|
|
1500
1498
|
const diversityCheck = checkLexicalDiversity(bodies, antiCollapseConfig);
|
|
1501
1499
|
if (diversityCheck.lowDiversity) {
|
|
1502
1500
|
// Low-diversity cluster: just warn (don't refuse merge since the dedup
|
|
@@ -1505,8 +1503,27 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
1505
1503
|
}
|
|
1506
1504
|
}
|
|
1507
1505
|
// Inject generation counter into merged content frontmatter (step 8a).
|
|
1508
|
-
// merged.generation = max(sourceGenerations) + 1.
|
|
1509
|
-
|
|
1506
|
+
// merged.generation = max(sourceGenerations) + 1. source_refs is the
|
|
1507
|
+
// UNION of participants + everything they already cited (R5 §4.2 —
|
|
1508
|
+
// the old set-if-absent behavior dropped second-generation provenance).
|
|
1509
|
+
const provenanceUnion = [...new Set([...allParticipants, ...participantInfo.flatMap((p) => p.sourceRefs)])];
|
|
1510
|
+
mergedContent = injectGenerationFrontmatter(mergedContent, sourceGenerations, provenanceUnion);
|
|
1511
|
+
// R5 §4.2: merge-information floor — ADVISORY in v1. A merge that
|
|
1512
|
+
// shrinks provenance or genericizes below the retention floor is
|
|
1513
|
+
// counted + warned, never refused (promotion path: design doc §7).
|
|
1514
|
+
try {
|
|
1515
|
+
const mergedParsed = parseFrontmatter(mergedContent);
|
|
1516
|
+
const mergedFm = mergedParsed.data;
|
|
1517
|
+
const mergedSourceRefs = Array.isArray(mergedFm.source_refs) ? mergedFm.source_refs.map(String) : [];
|
|
1518
|
+
const floorCheck = checkMergeInformationFloor(mergedParsed.content, mergedSourceRefs, participantInfo, antiCollapseConfig);
|
|
1519
|
+
if (!floorCheck.passed) {
|
|
1520
|
+
mergeFloorViolations++;
|
|
1521
|
+
warnings.push(`Merge: information floor advisory for ${op.primary}: ${floorCheck.reason ?? "unspecified"} — merge proceeds (v1 observe-only).`);
|
|
1522
|
+
}
|
|
1523
|
+
}
|
|
1524
|
+
catch {
|
|
1525
|
+
// Floor measurement is best-effort; never blocks the merge path.
|
|
1526
|
+
}
|
|
1510
1527
|
}
|
|
1511
1528
|
// Backup secondaries before deleting
|
|
1512
1529
|
for (const secRef of op.secondaries) {
|
|
@@ -1876,6 +1893,7 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
1876
1893
|
deleted: deleted + dedupCollapsed,
|
|
1877
1894
|
promoted,
|
|
1878
1895
|
contradicted,
|
|
1896
|
+
mergeFloorViolations,
|
|
1879
1897
|
failedChunks: totalChunksFailed,
|
|
1880
1898
|
totalChunks: chunks.length,
|
|
1881
1899
|
judgedNoAction,
|