npm - akm-cli - Versions diffs - 0.9.0-beta.52 → 0.9.0-beta.54 - Mend

akm-cli 0.9.0-beta.52 → 0.9.0-beta.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

package/dist/assets/hints/cli-hints-full.md +6 -5
package/dist/cli/clack.js +56 -0
package/dist/cli/confirm.js +1 -1
package/dist/cli.js +0 -7
package/dist/commands/env/env-cli.js +3 -2
package/dist/commands/env/env.js +14 -67
package/dist/commands/health/checks.js +28 -15
package/dist/commands/health/html-report.js +33 -10
package/dist/commands/health.js +222 -22
package/dist/commands/improve/collapse-detector.js +419 -0
package/dist/commands/improve/consolidate.js +72 -54
package/dist/commands/improve/distill.js +79 -13
package/dist/commands/improve/extract.js +13 -6
package/dist/commands/improve/homeostatic.js +109 -79
package/dist/commands/improve/improve-cli.js +67 -1
package/dist/commands/improve/improve.js +10 -0
package/dist/commands/improve/loop-stages.js +39 -1
package/dist/commands/improve/outcome-loop.js +33 -19
package/dist/commands/improve/preparation.js +36 -11
package/dist/commands/improve/salience.js +49 -32
package/dist/commands/read/curate.js +9 -13
package/dist/commands/read/knowledge.js +4 -0
package/dist/commands/read/search-cli.js +6 -4
package/dist/commands/read/search.js +12 -5
package/dist/commands/read/show.js +6 -8
package/dist/commands/sources/add-cli.js +1 -1
package/dist/commands/sources/init.js +12 -0
package/dist/commands/sources/stash-cli.js +1 -1
package/dist/commands/tasks/default-tasks.js +12 -0
package/dist/core/asset/asset-spec.js +3 -2
package/dist/core/config/config-schema.js +39 -17
package/dist/core/config/config.js +12 -0
package/dist/core/eval/rank-metrics.js +113 -0
package/dist/core/state/migrations.js +56 -0
package/dist/core/state-db.js +146 -19
package/dist/core/warn.js +21 -0
package/dist/indexer/db/db.js +6 -0
package/dist/indexer/ensure-index.js +36 -92
package/dist/indexer/index-writer-lock.js +9 -11
package/dist/indexer/index-written-assets.js +105 -0
package/dist/indexer/indexer.js +16 -4
package/dist/indexer/passes/metadata.js +20 -0
package/dist/indexer/read-preflight.js +23 -0
package/dist/indexer/search/db-search.js +29 -1
package/dist/indexer/search/ranking-contributors.js +33 -1
package/dist/indexer/search/ranking.js +66 -0
package/dist/indexer/search/search-fields.js +6 -0
package/dist/indexer/walk/walker.js +21 -13
package/dist/integrations/agent/detect.js +9 -0
package/dist/integrations/agent/index.js +1 -1
package/dist/llm/client.js +12 -0
package/dist/llm/embedder.js +26 -2
package/dist/llm/embedders/local.js +7 -1
package/dist/llm/feature-gate.js +6 -2
package/dist/output/renderers.js +8 -13
package/dist/output/shapes/helpers.js +0 -3
package/dist/output/shapes/passthrough.js +1 -0
package/dist/scripts/migrate-storage.js +178 -35
package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +46 -19
package/dist/setup/detect.js +9 -0
package/dist/setup/registry-stash-loader.js +12 -0
package/dist/setup/setup.js +1 -1
package/dist/storage/repositories/index-db.js +10 -1
package/dist/tasks/backends/index.js +9 -0
package/dist/tasks/runner.js +9 -0
package/package.json +2 -4

package/dist/commands/improve/distill.js CHANGED Viewed

@@ -581,6 +581,43 @@ function writeQualityRejection(stash, inputRef, lessonRef, content, score, reaso
         ...extraMeta,
     };
 }
+/**
+ * G4 — content-score a distilled OUTPUT (lesson/knowledge proposal body) and
+ * persist it to state.db :: asset_salience with `encoding_source: "content"`.
+ *
+ * Lessons are refused as distill INPUTS (`DISTILL_REFUSED_INPUT_TYPES`), so
+ * this creation-time write is their only chance to earn a real content-derived
+ * encoding score instead of sitting on the type-weight stub forever. Best-effort:
+ * never blocks or fails the proposal flow.
+ */
+function persistOutputEncodingSalience(ref, body, existingRefVocabulary,
+// Operator opt-out (improve.salience.outcomeWeightEnabled: false) must apply
+// here too, or distill-written rank_score rows would use WS-2 weights while
+// preparation uses parity weights — inconsistent salience semantics.
+outcomeWeightEnabled) {
+    try {
+        const parsedRef = parseAssetRef(ref);
+        const salienceResult = scoreEncodingSalience({
+            body,
+            type: parsedRef.type,
+            existingRefVocabulary,
+            revisionCount: 0, // a freshly distilled output IS a first encounter
+        });
+        withStateDb((stateDb) => {
+            const vector = computeSalience({
+                ref,
+                type: parsedRef.type,
+                retrievalFreq: 0,
+                encodingSalience: salienceResult.score,
+                outcomeWeightEnabled,
+            });
+            upsertAssetSalience(stateDb, ref, vector);
+        });
+    }
+    catch {
+        // Best-effort — scoring must never block proposal creation.
+    }
+}
 // ── Main entry point ────────────────────────────────────────────────────────
 /**
  * Run a single bounded distillation pass for `ref`. Always emits exactly one
@@ -639,6 +676,9 @@ export async function akmDistill(options) {
     const chat = options.chat ?? chatCompletion;
     const lookup = options.lookupFn ?? defaultLookup;
     const readEventsImpl = options.readEventsFn ?? readEvents;
+    // R1 opt-out must flow into every computeSalience call this command makes so
+    // distill-written rank_score rows use the same weights as preparation's.
+    const outcomeWeightEnabled = config.improve?.salience?.outcomeWeightEnabled !== false;
     // D-4 / #390: similar-lessons retrieval seam (test-injectable).
     const fetchSimilarLessonsFn = options.fetchSimilarLessonsFn ?? ((query, n) => fetchTopSimilarLessons(query, n, options.stashDir));
     // Best-effort load: when the asset is not yet indexed we still proceed —
@@ -661,30 +701,42 @@ export async function akmDistill(options) {
     //   1. The asset's frontmatter (human-readable mirror; idempotent delta gate).
     //   2. state.db :: asset_salience (canonical; feeds improve's high-salience gate).
     // Both writes are best-effort — a DB error never blocks distillation.
+    //
+    // The bigram ref vocabulary is built ONCE per invocation — the novelty signal
+    // reuses it when scoring the distilled OUTPUT at proposal creation (G4).
+    let existingRefVocabulary = new Set();
+    try {
+        const embCfg = config?.embedding;
+        const indexDb = openIndexDatabase(getDbPath(), embCfg?.dimension ? { embeddingDim: embCfg.dimension } : undefined);
+        try {
+            const allRefs = getAllEntries(indexDb).map((e) => e.entryKey);
+            existingRefVocabulary = buildRefVocabulary(allRefs);
+        }
+        finally {
+            closeDatabase(indexDb);
+        }
+    }
+    catch {
+        // Index not available — novelty defaults to type-floor.
+    }
     if (assetContent && assetFilePath) {
         try {
             const parsedRef = parseAssetRef(inputRef);
-            // Build bigram vocabulary from currently-indexed refs for novelty signal.
-            let existingRefVocabulary = new Set();
+            // G4: predictionError decays with revision count — the prior hardcoded
+            // `revisionCount: 0` made it a dead constant 1.0. Use the number of
+            // proposals ever raised against this ref as the revision proxy.
+            let revisionCount = 0;
             try {
-                const embCfg = config?.embedding;
-                const indexDb = openIndexDatabase(getDbPath(), embCfg?.dimension ? { embeddingDim: embCfg.dimension } : undefined);
-                try {
-                    const allRefs = getAllEntries(indexDb).map((e) => e.entryKey);
-                    existingRefVocabulary = buildRefVocabulary(allRefs);
-                }
-                finally {
-                    closeDatabase(indexDb);
-                }
+                revisionCount = listProposals(stash, { ref: inputRef, includeArchive: true }).length;
             }
             catch {
-                // Index not available — novelty defaults to type-floor.
+                // best-effort: unknown history scores as a first encounter
             }
             const salienceResult = scoreEncodingSalience({
                 body: assetContent,
                 type: parsedRef.type,
                 existingRefVocabulary,
-                revisionCount: 0,
+                revisionCount,
             });
             // 1. Write salience to the source asset frontmatter (idempotent).
             const updatedContent = writeSalienceToFrontmatter(assetContent, salienceResult.score, salienceResult);
@@ -700,6 +752,7 @@ export async function akmDistill(options) {
                         type: parsedRef.type,
                         retrievalFreq: 0,
                         encodingSalience: salienceResult.score,
+                        outcomeWeightEnabled,
                     });
                     upsertAssetSalience(stateDb, inputRef, vector);
                 });
@@ -892,6 +945,9 @@ export async function akmDistill(options) {
             };
         }
         const proposal = proposalResult;
+        // G4: content-score the distilled OUTPUT so it carries a real encoding
+        // salience (encoding_source='content') from creation.
+        persistOutputEncodingSalience(promotion.knowledgeRef, resolvedPromotionContent, existingRefVocabulary, outcomeWeightEnabled);
         appendEvent({
             eventType: "distill_invoked",
             ref: inputRef,
@@ -901,6 +957,9 @@ export async function akmDistill(options) {
                 proposalRef: promotion.knowledgeRef,
                 proposalKind: "knowledge",
                 proposalId: proposal.id,
+                // R3: judge verdicts are longitudinally queryable, not just a one-shot
+                // proposal.confidence write (normalized 1–5 score / 5).
+                ...(knowledgeJudgeConfidence !== undefined ? { judgeConfidence: knowledgeJudgeConfidence } : {}),
                 ...(options.sourceRun !== undefined ? { sourceRun: options.sourceRun } : {}),
                 ...(exclusionSet.size > 0 ? { filteredFeedbackCount } : {}),
                 ...eligMeta,
@@ -1367,6 +1426,10 @@ export async function akmDistill(options) {
         };
     }
     const proposal2 = proposalResult2;
+    // G4: content-score the distilled OUTPUT so it carries a real encoding
+    // salience (encoding_source='content') from creation — lessons never get
+    // another chance (they are refused as distill inputs).
+    persistOutputEncodingSalience(effectiveLessonRef, content, existingRefVocabulary, outcomeWeightEnabled);
     appendEvent({
         eventType: "distill_invoked",
         ref: inputRef,
@@ -1376,6 +1439,9 @@ export async function akmDistill(options) {
             proposalRef: effectiveLessonRef,
             proposalKind: effectiveProposalKind,
             proposalId: proposal2.id,
+            // R3: judge verdicts are longitudinally queryable, not just a one-shot
+            // proposal.confidence write (normalized 1–5 score / 5).
+            ...(lessonJudgeConfidence !== undefined ? { judgeConfidence: lessonJudgeConfidence } : {}),
             ...(options.sourceRun !== undefined ? { sourceRun: options.sourceRun } : {}),
             ...(exclusionSet.size > 0 ? { filteredFeedbackCount } : {}),
             ...(descriptionSwapped > 0 ? { descriptionSwapped } : {}),

package/dist/commands/improve/extract.js CHANGED Viewed

@@ -35,6 +35,7 @@ import { resolveStashStandards } from "../../core/standards/resolve-stash-standa
 import { getExtractedSessionsMap, getLastExtractRunAt, getStateDbPath, openStateDatabase, shouldSkipAlreadyExtractedSession, upsertExtractedSession, withStateDb, } from "../../core/state-db.js";
 import { repairTruncatedDescription } from "../../core/text-truncation.js";
 import { warn } from "../../core/warn.js";
+import { indexWrittenAssets } from "../../indexer/index-written-assets.js";
 import { resolveImproveProcessRunnerFromProfile, runnerIsLlm } from "../../integrations/agent/runner.js";
 import { normalizeHarnessId } from "../../integrations/harnesses/index.js";
 import { getAvailableHarnesses } from "../../integrations/session-logs/index.js";
@@ -370,6 +371,10 @@ standardsContext) {
         try {
             const result = await writeSessionAsset(data, stashDir, sessionIndexing.generate);
             if (result.written) {
+                // Write-path indexing (itself fail-open): standalone `akm extract`
+                // (session-end hook) has no post-loop reindex to pick this file up.
+                if (result.filePath)
+                    await indexWrittenAssets(stashDir, [result.filePath]);
                 return {
                     ...(result.ref ? { sessionAssetRef: result.ref } : {}),
                     ...(result.logPath ? { sessionLogPath: result.logPath } : {}),
@@ -752,16 +757,18 @@ export async function akmExtract(options) {
         }
     }
     // WS-3b Step-0b: schema-similarity intake gate.
-    // Load derived-layer (lesson/knowledge) embeddings once per run, but ONLY
-    // when the gate is enabled in config. When disabled (the default) this block
-    // is fully skipped and schemaSimilarityCtx stays null → byte-identical to
-    // prior behaviour.
+    // DEFAULT ON since R3 (docs/design/improve-self-learning-analysis.md G5):
+    // extract is the highest-volume acquisition path with no LLM judge, so the
+    // cheap embedding-dedup check (one embed per lesson/knowledge candidate,
+    // fail-open) is the intake quality gate. Opt out via
+    // processes.extract.schemaSimilarity.enabled: false. The gate is inert in
+    // practice when no derived-layer embeddings exist (empty ctx → no penalty).
     const schemaSimilarityCfg = extractProcess?.schemaSimilarity;
     let schemaSimilarityCtx = null;
-    if (schemaSimilarityCfg?.enabled === true) {
+    if (schemaSimilarityCfg?.enabled !== false) {
         const derivedEmbeddings = options.schemaSimilarityEmbeddings ?? loadDerivedLayerEmbeddings();
         schemaSimilarityCtx = {
-            config: schemaSimilarityCfg,
+            config: { ...schemaSimilarityCfg, enabled: true },
             derivedEmbeddings,
             embeddingConfig: config.embedding,
             embedFn: options.schemaSimilarityEmbedFn,

package/dist/commands/improve/homeostatic.js CHANGED Viewed

@@ -1,13 +1,51 @@
 // This Source Code Form is subject to the terms of the Mozilla Public
 // License, v. 2.0. If a copy of the MPL was not distributed with this
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
+/**
+ * WS-3b Step 0 — Intake + homeostatic tier.
+ *
+ * Sub-features (0b is default-ON for extract since R3; the rest default-OFF):
+ *
+ * (The former **0a homeostatic demotion** pass was removed (R4,
+ * docs/design/improve-self-learning-analysis.md G3): it was default-off and
+ * self-undoing — the next `upsertAssetSalience` recompute unconditionally
+ * overwrote the demoted values. SHY-style continuous downscaling now lives in
+ * `computeSalience`'s always-applied recency decay, whose 0.1 floor itself
+ * decays on a long half-life so unreviewed-forever assets keep drifting down.)
+ *
+ * **0b Schema-similarity gate**
+ *   At intake, if a new candidate's body embedding is within ε of an existing
+ *   derived-layer lesson/knowledge node, mark `schema-consistent` and lower
+ *   its priority; only schema-inconsistent/contradicting candidates get full
+ *   `encodingSalience`. One embedding lookup via body_embeddings cache; relieves
+ *   dedup pressure before it accumulates.
+ *
+ * **0c Hot-probation intake buffer (#604)**
+ *   New system-generated extractions enter `captureMode: hot-probation` and
+ *   spend ONE consolidation cycle in probation before promotion to the main
+ *   stash; dedup + quality second-pass runs against them. Stops noisy
+ *   extractions from polluting the stash at the source. Reuses shared
+ *   dedupHash + body_embeddings. Default OFF.
+ *
+ * **Anti-collapse guards (step 8)**
+ *   (a) Generation counter: merged.generation = max(sources)+1; refuse merge
+ *       of two assets both above generation N (default 2); merges cite sources.
+ *   (b) Lexical-diversity check: low n-gram diversity ⇒ raise merge threshold.
+ *   (c) Occasional random non-similar cluster in the pool.
+ *
+ * **CLS interleaving (step 9)**
+ *   distill/memoryInference prompts include embedding-retrieved adjacent
+ *   lessons/knowledge so the pipeline doesn't overwrite prior generalizations.
+ *
+ * **Distill→source fidelity (step 10)**
+ *   After a distill proposal, check it against cited source memories; a
+ *   contradiction flag routes to human review.
+ *
+ * @module homeostatic
+ */
 import { warn } from "../../core/warn.js";
 import { closeDatabase, openExistingDatabase } from "../../indexer/db/db.js";
 // ── Constants ─────────────────────────────────────────────────────────────────
-/** Default days-since-last-retrieval threshold to consider an asset stale. */
-export const DEFAULT_STALE_DAYS = 30;
-/** Default retrievalSalience demotion factor for stale assets. */
-export const DEFAULT_DEMOTION_FACTOR = 0.5;
 /** Default epsilon for schema-similarity gate (looser than dedup's 0.97). */
 export const DEFAULT_SCHEMA_SIMILARITY_EPSILON = 0.85;
 /** Default multiplicative confidence penalty applied to schema-consistent candidates. */
@@ -18,79 +56,6 @@ export const DEFAULT_MAX_GENERATION = 2;
 export const DEFAULT_RANDOM_CLUSTER_FRACTION = 0.05;
 /** Default number of adjacent lessons/knowledge for CLS interleaving. */
 export const DEFAULT_CLS_ADJACENT_COUNT = 3;
-/**
- * Demote `retrievalSalience` in state.db for stale/low-value assets.
- *
- * "Stale" = the asset has a salience row with `updated_at` older than
- * `staleDays` AND `retrieval_salience > 0`. Demotion multiplies the current
- * `retrieval_salience` by `demotionFactor` (default 0.5) and records
- * `homeostatic_demoted_at` so the pass can be observed.
- *
- * Pure state.db operation — no file I/O, no LLM calls. Idempotent: running
- * twice in the same run only demotes the already-demoted value a second time,
- * which is bounded (0.5 × 0.5 = 0.25) and corrected on re-retrieval.
- *
- * Called BEFORE the dedup/LLM-merge pool is assembled so the merge pool
- * already reflects the updated scores.
- */
-export function runHomeostaticDemotion(db, config, now) {
-    const warnings = [];
-    if (!config.enabled)
-        return { demoted: 0, warnings };
-    const staleDays = config.staleDays ?? DEFAULT_STALE_DAYS;
-    const demotionFactor = config.demotionFactor ?? DEFAULT_DEMOTION_FACTOR;
-    const nowMs = now ?? Date.now();
-    const staleThresholdMs = nowMs - staleDays * 86_400_000;
-    try {
-        // Find assets whose salience row is stale AND has non-zero retrievalSalience.
-        // updated_at reflects the last time salience was computed (i.e. the last run
-        // that touched this asset). If the asset hasn't been seen recently, its
-        // retrieval_salience is stale.
-        const staleRows = db
-            .prepare(`SELECT asset_ref, retrieval_salience, rank_score, encoding_salience, outcome_salience
-         FROM asset_salience
-         WHERE updated_at < ? AND retrieval_salience > 0`)
-            .all(staleThresholdMs);
-        if (staleRows.length === 0)
-            return { demoted: 0, warnings };
-        // Batch update in a transaction for atomicity and performance.
-        const updateStmt = db.prepare(`UPDATE asset_salience
-       SET retrieval_salience = ?,
-           rank_score = ?,
-           homeostatic_demoted_at = ?,
-           updated_at = ?
-       WHERE asset_ref = ?`);
-        let demoted = 0;
-        db.exec("BEGIN");
-        try {
-            for (const row of staleRows) {
-                const newRetrieval = row.retrieval_salience * demotionFactor;
-                // Recompute rank_score with the demoted retrieval value.
-                // We use simplified WS-1 parity weights here (no outcome weight by
-                // default) so the demotion is consistent with what salience.ts computes.
-                // The next full computeSalience call will overwrite with the exact value.
-                const newRank = Math.min(1, Math.max(0, (0.3 * row.encoding_salience + 0.0 * row.outcome_salience + 0.7 * newRetrieval) *
-                    // Apply a mild size penalty assumption (200 bytes floor gives 1/log10(200)≈0.43)
-                    0.43));
-                updateStmt.run(newRetrieval, newRank, nowMs, nowMs, row.asset_ref);
-                demoted++;
-            }
-            db.exec("COMMIT");
-        }
-        catch (e) {
-            db.exec("ROLLBACK");
-            throw e;
-        }
-        warn(`[homeostatic] demoted retrievalSalience for ${demoted} stale asset(s) (staleDays=${staleDays}, factor=${demotionFactor})`);
-        return { demoted, warnings };
-    }
-    catch (err) {
-        const msg = `[homeostatic] demotion failed: ${err instanceof Error ? err.message : String(err)}`;
-        warn(msg);
-        warnings.push(msg);
-        return { demoted: 0, warnings };
-    }
-}
 /**
  * Check whether a candidate body embedding is schema-consistent with an existing
  * derived-layer lesson/knowledge node. Returns `true` when the candidate is
@@ -270,7 +235,8 @@ export function computeMergedGeneration(sourceGenerations) {
  * @param config - Anti-collapse config.
  */
 export function checkGenerationGuard(sourceGenerations, config) {
-    if (!config.enabled)
+    // R5: default ON — only an explicit opt-out disables the guard.
+    if (config.enabled === false)
         return { refused: false };
     const maxGen = config.maxGeneration ?? DEFAULT_MAX_GENERATION;
     const highGenCount = sourceGenerations.filter((g) => g > maxGen).length;
@@ -282,6 +248,69 @@ export function checkGenerationGuard(sourceGenerations, config) {
     }
     return { refused: false };
 }
+/** Distinct-token retention floor default (R5 §4.2). */
+export const DEFAULT_MIN_SPECIFICITY_RETENTION = 0.6;
+function distinctTokens(text) {
+    // Same lowercase whitespace tokenization computeBigramDiversity uses.
+    return new Set(text
+        .toLowerCase()
+        .split(/\s+/)
+        .filter((w) => w.length > 0));
+}
+/**
+ * A merge must strictly increase information (R5 §4.2):
+ *  1. Provenance: the merged asset's `source_refs` must be a superset of the
+ *     union of all participants' `source_refs` plus the participant refs
+ *     themselves — provenance never shrinks through a merge.
+ *  2. Specificity: distinctTokens(mergedBody) ≥ minSpecificityRetention ×
+ *     |union(distinctTokens(participant bodies))| — a merge that only
+ *     shortens/genericizes fails.
+ *
+ * Pure and deterministic; ADVISORY in v1 (the caller counts violations, it
+ * does not refuse the merge). Returns `passed: true` immediately when the
+ * anti-collapse suite or the floor itself is opted out.
+ */
+export function checkMergeInformationFloor(mergedBody, mergedSourceRefs, participants, config) {
+    if (config.enabled === false || config.mergeInformationFloor === false || participants.length === 0) {
+        return { passed: true, provenanceBefore: 0, provenanceAfter: 0, specificityRetention: 1 };
+    }
+    // 1. Provenance union: participants + everything they already cited.
+    const required = new Set();
+    for (const p of participants) {
+        required.add(p.ref);
+        for (const sr of p.sourceRefs)
+            required.add(sr);
+    }
+    const after = new Set(mergedSourceRefs);
+    const missing = [...required].filter((r) => !after.has(r));
+    // 2. Specificity retention over the union of source tokens.
+    const sourceTokens = new Set();
+    for (const p of participants) {
+        for (const t of distinctTokens(p.body))
+            sourceTokens.add(t);
+    }
+    const mergedTokens = distinctTokens(mergedBody);
+    // Clamped at computation so the pass/fail decision, the reason string, and
+    // the reported field all describe the same value.
+    const specificityRetention = Math.min(1, sourceTokens.size === 0 ? 1 : mergedTokens.size / sourceTokens.size);
+    const minRetention = config.minSpecificityRetention ?? DEFAULT_MIN_SPECIFICITY_RETENTION;
+    const provenanceOk = missing.length === 0;
+    const specificityOk = specificityRetention >= minRetention;
+    const reasons = [];
+    if (!provenanceOk) {
+        reasons.push(`provenance shrank: merged source_refs missing ${missing.length} ref(s) (e.g. ${missing[0]})`);
+    }
+    if (!specificityOk) {
+        reasons.push(`specificity retention ${specificityRetention.toFixed(2)} < ${minRetention} (merge genericized/shortened)`);
+    }
+    return {
+        passed: provenanceOk && specificityOk,
+        provenanceBefore: required.size,
+        provenanceAfter: after.size,
+        specificityRetention,
+        ...(reasons.length > 0 ? { reason: reasons.join("; ") } : {}),
+    };
+}
 /**
  * Compute the bigram n-gram diversity of a text string.
  * Returns a value in [0, 1] where 0 = all identical bigrams, 1 = all unique.
@@ -312,7 +341,8 @@ export function computeBigramDiversity(text) {
  *   below the 0.3 threshold; `{ lowDiversity: false }` otherwise.
  */
 export function checkLexicalDiversity(bodies, config) {
-    if (!config.enabled || config.lexicalDiversityCheck === false) {
+    // R5: default ON — only an explicit opt-out disables the check.
+    if (config.enabled === false || config.lexicalDiversityCheck === false) {
         return { lowDiversity: false };
     }
     if (bodies.length === 0)

package/dist/commands/improve/improve-cli.js CHANGED Viewed

@@ -8,16 +8,70 @@ import { output, runWithJsonErrors } from "../../cli/shared.js";
 import { loadConfig } from "../../core/config/config.js";
 import { UsageError } from "../../core/errors.js";
 import { getCacheDir } from "../../core/paths.js";
+import { getActiveCanaries, queryRecentCycleMetrics, withStateDb } from "../../core/state-db.js";
 import { clearLogFile, setLogFile } from "../../core/warn.js";
+import { closeDatabase, openExistingDatabase } from "../../indexer/db/db.js";
 import { resolveSourceEntries } from "../../indexer/search/search-source.js";
 import { getHyphenatedArg, getHyphenatedBoolean, parseFlagValue } from "../../output/context.js";
+import { refreshCanarySet } from "./collapse-detector.js";
 import { akmImprove } from "./improve.js";
 import { buildImproveRunId, recordTerminatedImproveRun, relativeImproveResultPath, writeImproveResultFile, } from "./improve-result-file.js";
 import { runImproveSession } from "./improve-session.js";
+// R5 — collapse-detector canary set inspection / explicit refresh. The
+// detector NEVER auto-refreshes the canary set (silent re-baselining is how a
+// slow collapse hides); this verb is the only refresh path.
+//
+// Dispatched from the parent improve run() on `scope === "canary"` — NOT a
+// citty subCommand: registering subCommands makes citty treat EVERY first
+// positional as a subcommand name, breaking `akm improve <type|ref>` outright
+// (citty throws "Unknown command memory"), and citty also re-runs the parent
+// run() after a matched subcommand.
+async function runCanaryInspection(refresh) {
+    const config = loadConfig();
+    const cfg = config.improve?.collapseDetector ?? {};
+    const result = withStateDb((stateDb) => {
+        let refreshOutcome;
+        if (refresh) {
+            const indexDb = openExistingDatabase();
+            try {
+                // Mint-first, deactivate-after (refreshCanarySet): an empty/unreadable
+                // index keeps the old baseline instead of destroying it.
+                refreshOutcome = refreshCanarySet(stateDb, indexDb, cfg) === null ? "kept-old-set" : "refreshed";
+            }
+            finally {
+                closeDatabase(indexDb);
+            }
+        }
+        const canaries = getActiveCanaries(stateDb);
+        const canarySetId = canaries[0]?.canary_set_id;
+        const recentCycles = canarySetId ? queryRecentCycleMetrics(stateDb, canarySetId, cfg.windowCycles ?? 5) : [];
+        return {
+            schemaVersion: 1,
+            ok: true,
+            refreshed: refreshOutcome === "refreshed",
+            ...(refreshOutcome === "kept-old-set"
+                ? { warning: "refresh skipped: no mintable learning entries in the index — existing canary set kept" }
+                : {}),
+            canarySetId: canarySetId ?? null,
+            canaries: canaries.map((c) => ({ id: c.id, anchorRef: c.anchor_ref, query: c.query })),
+            recentCycles: recentCycles.map((r) => ({
+                ts: r.ts,
+                pass: r.pass,
+                meanRecall: r.mean_recall,
+                meanNdcg: r.mean_ndcg,
+                distinctContentRatio: r.distinct_content_ratio,
+                acceptedActions: r.accepted_actions,
+                mergeFloorViolations: r.merge_floor_violations,
+                alerts: JSON.parse(r.alerts_json),
+            })),
+        };
+    });
+    output("improve-canary", result);
+}
 export const improveCommand = defineCommand({
     meta: {
         name: "improve",
-        description: "Analyze existing AKM assets and generate improvement proposals; also consolidates memories when profiles.improve.default.processes.consolidate.enabled is true",
+        description: "Analyze existing AKM assets and generate improvement proposals; also consolidates memories when profiles.improve.default.processes.consolidate.enabled is true. `akm improve canary [--refresh]` inspects the collapse-detector canary set.",
     },
     args: {
         scope: {
@@ -26,6 +80,11 @@ export const improveCommand = defineCommand({
             required: false,
         },
         task: { type: "string", description: "Add extra guidance for this improvement pass" },
+        refresh: {
+            type: "boolean",
+            description: "(canary scope only) Mint a new collapse-detector canary set and deactivate the old one; old rows and their cycle history are retained",
+            default: false,
+        },
         "dry-run": { type: "boolean", description: "Show planned actions without writing", default: false },
         target: { type: "string", description: "Override the write target for accepted proposals" },
         "auto-accept": {
@@ -74,6 +133,13 @@ export const improveCommand = defineCommand({
         },
     },
     async run({ args }) {
+        // "canary" is a reserved scope word (never a valid asset type, and refs
+        // contain ":"): dispatch to the detector inspection verb instead of an
+        // improve run.
+        if (args.scope === "canary") {
+            await runWithJsonErrors(() => runCanaryInspection(getHyphenatedBoolean(args, "refresh") === true));
+            return;
+        }
         await runWithJsonErrors(async () => {
             const formatFlagValue = parseFlagValue(process.argv, "--format");
             if (formatFlagValue !== undefined) {

package/dist/commands/improve/improve.js CHANGED Viewed

@@ -491,6 +491,7 @@ export async function akmImprove(options = {}) {
         let stalenessDetection;
         let recombination;
         let proceduralCompilation;
+        let cycleMetrics;
         // Summed counters/durations.
         let prepGateCount = 0;
         let prepGateFailedCount = 0;
@@ -655,6 +656,10 @@ export async function akmImprove(options = {}) {
                 budgetSignal: budgetAbortController.signal,
                 improveProfile,
                 consolidationRan: preparation.consolidationRan,
+                // R5: floor violations from this run's consolidate pass + the
+                // auto-accepted volume so far (prep + loop gates) for churn detection.
+                consolidationMergeFloorViolations: preparation.consolidation.mergeFloorViolations ?? 0,
+                acceptedActions: preparation.gateAutoAcceptedCount + loopGateCountThisCycle,
             }));
             const postLoopGateCountThisCycle = postLoopResult.gateAutoAcceptedCount;
             // Last-wins point-in-time objects.
@@ -663,6 +668,10 @@ export async function akmImprove(options = {}) {
             stalenessDetection = postLoopResult.stalenessDetection;
             recombination = postLoopResult.recombination;
             proceduralCompilation = postLoopResult.proceduralCompilation;
+            // Keep the last QUALIFYING cycle's snapshot — a later non-qualifying
+            // cycle in a maxCycles>1 run must not clobber it with undefined.
+            if (postLoopResult.cycleMetrics)
+                cycleMetrics = postLoopResult.cycleMetrics;
             // Summed counters/durations.
             postLoopGateCount += postLoopResult.gateAutoAcceptedCount;
             postLoopGateFailedCount += postLoopResult.gateAutoAcceptFailedCount;
@@ -754,6 +763,7 @@ export async function akmImprove(options = {}) {
             ...(stalenessDetection ? { stalenessDetection } : {}),
             ...(recombination ? { recombination } : {}),
             ...(proceduralCompilation ? { proceduralCompilation } : {}),
+            ...(cycleMetrics ? { cycleMetrics } : {}),
             ...(orphansPurged !== undefined ? { orphansPurged } : {}),
             ...(proposalsExpired !== undefined && proposalsExpired > 0 ? { proposalsExpired } : {}),
             reflectCooldownActions: finalActions.filter((a) => a.mode === "reflect-cooldown").length,

package/dist/commands/improve/loop-stages.js CHANGED Viewed

@@ -9,7 +9,7 @@ import { UsageError } from "../../core/errors.js";
 import { appendEvent } from "../../core/events.js";
 import { openLogsDatabase, purgeOldTaskLogs } from "../../core/logs-db.js";
 import { getDbPath } from "../../core/paths.js";
-import { purgeOldEvents, purgeOldImproveRuns, withStateDb } from "../../core/state-db.js";
+import { purgeOldCycleMetrics, purgeOldEvents, purgeOldImproveRuns, withStateDb, } from "../../core/state-db.js";
 import { info, warn } from "../../core/warn.js";
 import { closeDatabase, openIndexDatabase } from "../../indexer/db/db.js";
 import { runGraphExtractionPass } from "../../indexer/graph/graph-extraction.js";
@@ -22,6 +22,7 @@ import { isProcessEnabled } from "../../llm/feature-gate.js";
 import { withLlmStage } from "../../llm/usage-telemetry.js";
 import { createProposal, expireStaleProposals, isProposalSkipped, listProposals, purgeOrphanProposals, } from "../proposal/validators/proposals.js";
 import { checkDeadUrls } from "../url-checker.js";
+import { DEFAULT_RETENTION_DAYS as CYCLE_METRICS_RETENTION_DAYS, runCollapseDetector } from "./collapse-detector.js";
 import { deriveLessonRef } from "./distill.js";
 import { deriveKnowledgeRef } from "./distill-promotion-policy.js";
 // Eligibility / candidate-selection predicates live in ./eligibility.
@@ -694,9 +695,31 @@ export async function runImprovePostLoopStage(args) {
             allWarnings.push(`procedural: ${String(e)}`);
         }
     }
+    // ── R5: collapse/churn detector ────────────────────────────────────────────
+    // One snapshot per QUALIFYING cycle: consolidate processed work and/or
+    // recombine formed clusters. Runs AFTER the maintenance reindex so FTS sees
+    // the post-merge index; one call site covers both passes. Deterministic,
+    // observe-only, fail-open (the orchestrator catches everything) — and inert
+    // on the ~9-in-10 default-profile runs that touch no merges.
+    let cycleMetrics;
+    const recombineWorked = (recombination?.clustersFormed ?? 0) > 0;
+    if (!options.dryRun && (consolidationRan || recombineWorked)) {
+        cycleMetrics = runCollapseDetector({
+            runId: options.runId ?? "improve-adhoc",
+            pass: consolidationRan && recombineWorked ? "both" : consolidationRan ? "consolidate" : "recombine",
+            // prep+loop gate accepts, PLUS recombine's confirmed-lesson promotions —
+            // recombine churn is the historically observed failure mode and its
+            // promotions never flow through the prep/loop gates.
+            acceptedActions: (args.acceptedActions ?? 0) + (recombination?.lessonsPromoted ?? 0),
+            mergeFloorViolations: args.consolidationMergeFloorViolations ?? 0,
+            config: options.config ?? loadConfig(),
+            ...(eventsCtx ? { eventsCtx } : {}),
+        });
+    }
     return {
         allWarnings,
         deadUrls,
+        ...(cycleMetrics ? { cycleMetrics } : {}),
         ...(recombination ? { recombination } : {}),
         ...(proceduralCompilation ? { proceduralCompilation } : {}),
         ...(maintenanceResult.memoryInference ? { memoryInference: maintenanceResult.memoryInference } : {}),
@@ -1007,6 +1030,21 @@ export async function runImproveMaintenancePasses(args) {
                                 ref: "improve_runs:_purge",
                                 metadata: { purgedCount: improveRunsPurged, retentionDays },
                             }, eventsCtx);
+                            // R5: improve_cycle_metrics has its OWN retention window
+                            // (default 365d — a slow collapse needs a longer trend than
+                            // the 90d events window). canary_queries rows are never purged.
+                            const cycleRetention = config.improve?.collapseDetector?.retentionDays ?? CYCLE_METRICS_RETENTION_DAYS;
+                            const cycleMetricsPurged = purgeOldCycleMetrics(stateDb, cycleRetention);
+                            if (cycleMetricsPurged > 0) {
+                                info(`[improve] cycle-metrics purge: ${cycleMetricsPurged} row(s) older than ${cycleRetention}d removed from state.db`);
+                                appendEvent({
+                                    // Dedicated type (mirrors improve_runs_purged) so consumers
+                                    // never have to disambiguate purge targets via the ref string.
+                                    eventType: "improve_cycle_metrics_purged",
+                                    ref: "improve_cycle_metrics:_purge",
+                                    metadata: { purgedCount: cycleMetricsPurged, retentionDays: cycleRetention },
+                                }, eventsCtx);
+                            }
                         }, { path: eventsCtx?.dbPath, borrowed: eventsCtx?.db });
                     }
                     catch (err) {