npm - akm-cli - Versions diffs - 0.9.0-beta.52 → 0.9.0-beta.53 - Mend

akm-cli 0.9.0-beta.52 → 0.9.0-beta.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/dist/assets/hints/cli-hints-full.md +6 -5
package/dist/cli.js +0 -7
package/dist/commands/env/env-cli.js +3 -2
package/dist/commands/env/env.js +14 -67
package/dist/commands/health/checks.js +28 -15
package/dist/commands/health.js +68 -1
package/dist/commands/improve/collapse-detector.js +419 -0
package/dist/commands/improve/consolidate.js +72 -54
package/dist/commands/improve/distill.js +79 -13
package/dist/commands/improve/extract.js +13 -6
package/dist/commands/improve/homeostatic.js +109 -79
package/dist/commands/improve/improve-cli.js +67 -1
package/dist/commands/improve/improve.js +10 -0
package/dist/commands/improve/loop-stages.js +39 -1
package/dist/commands/improve/outcome-loop.js +15 -3
package/dist/commands/improve/preparation.js +17 -8
package/dist/commands/improve/salience.js +49 -32
package/dist/commands/read/curate.js +5 -9
package/dist/commands/read/knowledge.js +4 -0
package/dist/commands/read/search.js +5 -2
package/dist/commands/read/show.js +3 -3
package/dist/core/asset/asset-spec.js +3 -2
package/dist/core/config/config-schema.js +39 -17
package/dist/core/eval/rank-metrics.js +113 -0
package/dist/core/state/migrations.js +56 -0
package/dist/core/state-db.js +146 -19
package/dist/indexer/ensure-index.js +33 -90
package/dist/indexer/index-writer-lock.js +0 -11
package/dist/indexer/index-written-assets.js +105 -0
package/dist/indexer/passes/metadata.js +20 -0
package/dist/indexer/search/db-search.js +29 -1
package/dist/indexer/search/ranking-contributors.js +33 -1
package/dist/indexer/search/ranking.js +66 -0
package/dist/indexer/search/search-fields.js +6 -0
package/dist/llm/feature-gate.js +6 -2
package/dist/output/renderers.js +8 -13
package/dist/output/shapes/helpers.js +0 -3
package/dist/output/shapes/passthrough.js +1 -0
package/dist/scripts/migrate-storage.js +152 -33
package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +41 -18
package/dist/storage/repositories/index-db.js +10 -1
package/package.json +2 -4

package/dist/commands/improve/outcome-loop.js CHANGED Viewed

@@ -33,6 +33,15 @@ export const WARM_START_CAP = 0.3;
  * very-negative run can't send the score to −∞.
  */
 export const OUTCOME_SCORE_MIN = -1.0;
+/**
+ * Saturation ceiling: the maximum outcome_score. Biological RPE saturates —
+ * a fully predicted reward produces zero response, not an ever-growing one —
+ * so a long-lived popular asset must not accrue unbounded outcome mass that
+ * would dominate ranking once the outcome weight is enabled (analysis G2).
+ * 1.5 comfortably exceeds the max plausible single-cycle raw update while
+ * keeping the normalised outcomeSalience spread meaningful.
+ */
+export const OUTCOME_SCORE_MAX = 1.5;
 /**
  * Diversity floor: `outcomeSalience` for any asset is at least this fraction
  * of the maximum observed `outcome_score` in the table, so rare-but-correct
@@ -107,9 +116,12 @@ export function updateAssetOutcome(db, inputs) {
         // so the score tracks the moving signal, not the cumulative sum.
         const rawUpdate = predictionError - penalty + valence;
         const newScore = OUTCOME_EMA_ALPHA * rawUpdate + (1 - OUTCOME_EMA_ALPHA) * existing.outcome_score;
-        // Clip to [OUTCOME_SCORE_MIN, +Infinity) — no upper cap so that very-active
-        // useful assets can accumulate a high positive score.
-        outcomeScore = Math.max(OUTCOME_SCORE_MIN, newScore);
+        // Clip to [OUTCOME_SCORE_MIN, OUTCOME_SCORE_MAX] — the ceiling is the RPE
+        // saturation analog (G2): without it, long-lived popular assets accumulate
+        // unbounded positive mass (live max was 3.13) and would dominate rank_score
+        // the moment the outcome weight is enabled. Stored legacy scores above the
+        // ceiling converge back under it on their next differential update.
+        outcomeScore = Math.min(OUTCOME_SCORE_MAX, Math.max(OUTCOME_SCORE_MIN, newScore));
         // ── review_pressure (#613) ─────────────────────────────────────────────
         // New negatives this cycle.
         const newNegatives = Math.max(0, inputs.negativeFeedbackCount - existing.negative_feedback_count);

package/dist/commands/improve/preparation.js CHANGED Viewed

@@ -1145,23 +1145,32 @@ export async function runImprovePreparationStage(args) {
     const proactiveAndRetrievalSet = new Set([...highRetrievalRefs, ...proactiveRefs].map((r) => r.ref));
     try {
         withStateDb((dbForHighSalience) => {
-            const effectiveLimit = options.limit ?? 10;
+            // Derive the cap from the resolved reflect limit (mirrors improve.ts's
+            // options.limit resolution) so an unbounded whole-stash run does not
+            // collapse the lane to exactly 1 ref via the bare `?? 10` fallback.
+            const effectiveLimit = options.limit ?? improveProfile?.processes?.reflect?.limit ?? improveProfile.limit ?? 10;
             const highSalienceCap = Math.max(1, Math.floor(effectiveLimit * 0.1));
             // #632/#4 — session-capture telemetry (checkpoints) must never consume
             // the scarce high-salience budget. Even with a content-scored row, these
             // are pipeline bookkeeping, not assets worth reflecting/rewriting.
             const candidates = noFeedbackCandidates.filter((r) => !proactiveAndRetrievalSet.has(r.ref) && !isSessionCaptureMemoryName(parseAssetRef(r.ref).name));
+            // Collect ALL qualifying candidates, then take the top-N BY SCORE — the
+            // previous first-N-in-scan-order break meant a higher-salience candidate
+            // found later in the scan lost its slot to an earlier lower-scoring one.
+            const qualifying = [];
             for (const r of candidates) {
-                if (highSalienceRefs.length >= highSalienceCap)
-                    break;
                 const row = getAssetSalience(dbForHighSalience, r.ref);
                 if (row &&
                     isContentEncodingRow(row, parseAssetRef(r.ref).type) &&
                     row.encoding_salience >= salienceThreshold &&
                     !lastReflectProposalTs.has(r.ref)) {
-                    highSalienceRefs.push(r);
+                    qualifying.push({ ref: r, score: row.encoding_salience });
                 }
             }
+            qualifying.sort((a, b) => b.score - a.score);
+            for (const q of qualifying.slice(0, highSalienceCap)) {
+                highSalienceRefs.push(q.ref);
+            }
         }, { path: eventsCtx?.dbPath });
     }
     catch (err) {
@@ -1392,11 +1401,11 @@ export async function runImprovePreparationStage(args) {
     // so feedback refs get their genuine retrieval frequency, not a 0-floor fallback.
     // outcomeSalienceByRef is populated by WS-2 above (or empty on first run).
     //
-    // Part-V gate: read the operator opt-in flag from config. Default false
-    // (WS-1 parity weights) until the maintainer runs scripts/akm-eval and sets
-    // improve.salience.outcomeWeightEnabled: true in the config.
+    // R1 loop closure: the outcome weight is ON by default (the G2 saturation
+    // cap makes it safe). Operators opt out with
+    // improve.salience.outcomeWeightEnabled: false in the config.
     const salienceConfig = (options.config ?? loadConfig()).improve?.salience;
-    const outcomeWeightEnabled = salienceConfig?.outcomeWeightEnabled === true;
+    const outcomeWeightEnabled = salienceConfig?.outcomeWeightEnabled !== false;
     const salienceMap = new Map();
     const nowForSalience = Date.now();
     // #644 — preserve content-derived encoding scores across runs.

package/dist/commands/improve/salience.js CHANGED Viewed

@@ -21,12 +21,12 @@
  *
  * `rankScore = (w_e·encoding + w_o·outcome + w_r·retrieval) × sizePenalty`, normalized [0,1].
  *
- * **WS-2 default-off (Part-V gate):**
- * `w_o = 0.15` is the target but is applied only when `outcomeWeightEnabled=true`
- * (set via `improve.salience.outcomeWeightEnabled: true` in config after running
- * Part-V T0 baseline). Default: WS-1 parity weights `w_e=0.30, w_r=0.70, w_o=0`.
- * `outcomeSalience` is populated from `asset_outcome.outcome_score` (WS-2) for
- * observability regardless of the flag.
+ * **WS-2 default-ON (R1 loop closure):**
+ * `w_o = 0.15` is applied by default now that `outcome_score` saturates at
+ * `OUTCOME_SCORE_MAX` (G2). Operators can opt out via
+ * `improve.salience.outcomeWeightEnabled: false`, which restores the WS-1
+ * parity weights `w_e=0.30, w_r=0.70, w_o=0`. `outcomeSalience` is populated
+ * from `asset_outcome.outcome_score` regardless of the flag.
  *
  * ## Plasticity
  *
@@ -52,16 +52,30 @@ import { WARM_START_CAP } from "./outcome-loop.js";
 const DAY_MS = 86_400_000;
 // ── Recency decay half-life (mirrors the proactive-maintenance prototype) ─────
 const RECENCY_HALFLIFE_DAYS = 21;
+// ── Recency-floor half-life (R4 — SHY-style continuous downscaling) ──────────
+//
+// The recency floor itself decays on this (much longer) half-life so an
+// unreviewed-forever asset keeps drifting down instead of parking at the 0.1
+// floor. This replaces the deleted homeostatic demotion pass (which was
+// default-off and self-undoing — every salience recompute clobbered it);
+// folding the decay into the always-applied recency term makes it persist by
+// construction. At 180 days the floor halves; a 1-year-stale asset sits at
+// ~0.025 instead of 0.1.
+const RECENCY_FLOOR_HALFLIFE_DAYS = 180;
+// Absolute epsilon under the decaying floor. Keeps the frequency term ordinal
+// for assets whose last-use timestamp is unknown (utility_scores has no
+// last_used_at) — without it their retrieval salience collapses to exactly 0
+// and frequency ordering is lost for maintenance selection.
+const RECENCY_EPSILON = 0.01;
 // ── Size proxy floor (avoids log10(0)) ────────────────────────────────────────
 const SIZE_FLOOR_BYTES = 200;
 // ── Projection weights ────────────────────────────────────────────────────────
 //
-// These constants reflect the WS-2 TARGET values (used when outcomeWeightEnabled=true).
-// Default ranking uses WS-1 parity weights (w_e=0.30, w_r=0.70, w_o=0) until the
-// maintainer opts in via `improve.salience.outcomeWeightEnabled: true` after running
-// the Part-V T0 baseline (scripts/akm-eval + health report).
+// These constants are the DEFAULT ranking weights (R1 loop closure). Operators
+// can opt back out to the WS-1 parity weights (w_e=0.30, w_r=0.70, w_o=0) via
+// `improve.salience.outcomeWeightEnabled: false`.
 //
-// WS-2 opt-in split (w_e=0.25, w_o=0.15, w_r=0.60, sum = 1.0):
+// WS-2 split (w_e=0.25, w_o=0.15, w_r=0.60, sum = 1.0):
 // [exp] Expert recommendation: encoding should be moderate so a type-importance
 // stub does not completely dominate; retrieval should be strong since it directly
 // measures use; outcome provides a quality signal proportional to usefulness.
@@ -78,10 +92,10 @@ if (Math.abs(W_ENCODING + W_OUTCOME + W_RETRIEVAL - 1.0) > 1e-9) {
 }
 // ── WS-1 parity weights ───────────────────────────────────────────────────────
 //
-// These constants reflect the default WS-1 parity weights used when
-// `outcomeWeightEnabled` is false/absent (the default). They preserve the
+// These constants reflect the WS-1 parity weights used when the operator
+// explicitly opts out (`outcomeWeightEnabled: false`). They preserve the
 // WS-1 two-way split (w_e=0.30, w_r=0.70) with w_o=0 so outcome does not
-// affect rankScore until the operator opts in after the Part-V baseline run.
+// affect rankScore in the opt-out mode.
 //
 // Named here (rather than inline literals in the else branch) so a future
 // re-tune has a single source of truth and the sum-to-1 guard below catches
@@ -155,15 +169,19 @@ export function computeSalience(inputs) {
     //
     // Formula: log(1 + freq) × recencyDecay
     //   log(1+freq): sub-linear frequency term (same as proactive-maintenance prototype).
-    //   recencyDecay: 0.1 + 0.5^(useAgeDays/halflife) — decays to 0.1 floor when stale.
-    //     lastUseMs=0/undefined → useAgeDays=9999 → recencyDecay≈0.1 (floor).
+    //   recencyDecay: max(ε, 0.1·0.5^(useAgeDays/180) + 0.5^(useAgeDays/21)) —
+    //     the fast term halves every 21 days; the 0.1 floor itself halves every
+    //     180 days (R4: SHY-style continuous downscaling — an unreviewed-forever
+    //     asset keeps drifting down instead of parking at the floor). The ε=0.01
+    //     epsilon keeps the frequency term ordinal for unknown-last-use assets.
+    //     lastUseMs=0/undefined → useAgeDays=9999 → recencyDecay=ε.
     //
     // The recency term is MANDATORY (plan requirement §WS-1 step 2). Without it
-    // retrievalSalience degenerates to a non-decaying frequency count and the WS-3
-    // homeostatic step-0 demotion has nothing to act on.
+    // retrievalSalience degenerates to a non-decaying frequency count. This
+    // always-applied decay replaces the deleted homeostatic demotion pass.
     const lastUseMs = inputs.lastUseMs ?? 0;
     const useAgeDays = lastUseMs > 0 ? (now - lastUseMs) / DAY_MS : 9999;
-    const recencyDecay = 0.1 + 0.5 ** (useAgeDays / RECENCY_HALFLIFE_DAYS);
+    const recencyDecay = Math.max(RECENCY_EPSILON, 0.1 * 0.5 ** (useAgeDays / RECENCY_FLOOR_HALFLIFE_DAYS) + 0.5 ** (useAgeDays / RECENCY_HALFLIFE_DAYS));
     const rawRetrieval = Math.log(1 + inputs.retrievalFreq) * recencyDecay;
     // ── Size penalty ─────────────────────────────────────────────────────────────
     // 1/log10(size): larger assets are slightly deprioritized (same as proactive prototype).
@@ -184,29 +202,28 @@ export function computeSalience(inputs) {
     // which asymptotes to 1 and equals 0.5 at rawRetrieval=1. This is the same
     // formula used for MemRL utility updates.
     const retrieval = rawRetrieval / (rawRetrieval + 1);
-    // ── Weight selection (Part-V gate) ────────────────────────────────────────
-    //
-    // When `outcomeWeightEnabled` is false/absent (default): use WS-1 parity
-    // weights (w_e=0.30, w_r=0.70, w_o=0) so ranking is unchanged from the WS-1
-    // baseline. The `outcome` sub-score is still computed and stored in the
-    // salience vector for observability, but it does not affect rankScore.
+    // ── Weight selection (R1 — outcome loop closed by default) ───────────────
     //
-    // When `outcomeWeightEnabled` is true (operator opt-in after Part-V run):
-    // use WS-2 weights (w_e=0.25, w_o=0.15, w_r=0.60).
+    // When `outcomeWeightEnabled` is true/absent (DEFAULT ON since the G2
+    // saturation cap landed): use WS-2 weights (w_e=0.25, w_o=0.15, w_r=0.60)
+    // so the prediction-error outcome signal actually shapes rankScore — this
+    // is the R1 loop-closure from docs/design/improve-self-learning-analysis.md.
     //
-    // The constants W_ENCODING, W_OUTCOME, W_RETRIEVAL always reflect the
-    // WS-2 target values for documentation and re-tune reference.
+    // When `outcomeWeightEnabled` is explicitly false (operator opt-out via
+    // `improve.salience.outcomeWeightEnabled: false`): fall back to the WS-1
+    // parity weights (w_e=0.30, w_r=0.70, w_o=0). The `outcome` sub-score is
+    // still computed and stored for observability in that mode.
     let we;
     let wo;
     let wr;
-    if (inputs.outcomeWeightEnabled === true) {
-        // WS-2 active: three-way split from Part-V operator opt-in.
+    if (inputs.outcomeWeightEnabled !== false) {
+        // WS-2 active (default): three-way split.
         we = W_ENCODING; // 0.25
         wo = W_OUTCOME; // 0.15
         wr = W_RETRIEVAL; // 0.60
     }
     else {
-        // WS-1 parity (default): w_o=0, redistribute to WS-1 proportions.
+        // WS-1 parity (opt-out): w_o=0, redistribute to WS-1 proportions.
         // Original WS-1 split was w_e=0.30, w_r=0.70.
         we = W_ENCODING_PARITY;
         wo = W_OUTCOME_PARITY;

package/dist/commands/read/curate.js CHANGED Viewed

@@ -19,12 +19,12 @@ import { parseFrontmatter } from "../../core/asset/frontmatter.js";
 import { getIndexPassConfig, loadConfig } from "../../core/config/config.js";
 import { rethrowIfTestIsolationError, UsageError } from "../../core/errors.js";
 import { appendEvent } from "../../core/events.js";
-import { closeDatabase, computeBodyHash, openExistingDatabase } from "../../indexer/db/db.js";
+import { computeBodyHash } from "../../indexer/db/db.js";
 import { enqueueGraphExtraction, hasGraphData } from "../../indexer/db/graph-db.js";
 import { findSourceForPath, resolveSourceEntries } from "../../indexer/search/search-source.js";
 import { insertUsageEvent } from "../../indexer/usage/usage-events.js";
 import { truncateDescription } from "../../output/shapes.js";
-import { withIndexDb } from "../../storage/repositories/index-db.js";
+import { TELEMETRY_BUSY_TIMEOUT_MS, withIndexDb } from "../../storage/repositories/index-db.js";
 import { akmSearch, parseSearchSource } from "./search.js";
 import { akmShowUnified } from "./show.js";
 const CURATE_FALLBACK_FILTER_WORDS = new Set([
@@ -65,8 +65,7 @@ function logCurateEvent(query, result) {
         metadata: { query, itemCount: result.items.length, itemRefs },
     });
     try {
-        const db = openExistingDatabase();
-        try {
+        withIndexDb((db) => {
             insertUsageEvent(db, {
                 event_type: "curate",
                 query,
@@ -86,10 +85,7 @@ function logCurateEvent(query, result) {
                     source: "user",
                 });
             }
-        }
-        finally {
-            closeDatabase(db);
-        }
+        }, { busyTimeoutMs: TELEMETRY_BUSY_TIMEOUT_MS });
     }
     catch (err) {
         rethrowIfTestIsolationError(err);
@@ -207,7 +203,7 @@ function maybeEnqueueLazyGraph(assetPath) {
             if (!hasGraphData(db, stashRoot, assetPath)) {
                 enqueueGraphExtraction(db, stashRoot, assetPath, bodyHash, 0);
             }
-        });
+        }, { busyTimeoutMs: TELEMETRY_BUSY_TIMEOUT_MS });
     }
     catch (err) {
         rethrowIfTestIsolationError(err);

package/dist/commands/read/knowledge.js CHANGED Viewed

@@ -16,6 +16,7 @@ import { isHttpUrl, isWithin, tryReadStdinText } from "../../core/common.js";
 import { loadConfig } from "../../core/config/config.js";
 import { UsageError } from "../../core/errors.js";
 import { commitWriteTargetBoundary, formatRefForMessage, resolveWriteTarget, writeAssetToSource, } from "../../core/write-source.js";
+import { indexWrittenAssets } from "../../indexer/index-written-assets.js";
 import { fetchWebsiteMarkdownSnapshot, shouldAllowPrivateWebsiteUrlForTests } from "../../sources/website-ingest.js";
 const MAX_CAPTURED_ASSET_SLUG_LENGTH = 64;
 // ── Asset-name normalisation ─────────────────────────────────────────────────
@@ -144,6 +145,9 @@ export async function writeMarkdownAsset(options) {
     // 0.9.0 (issue #507): single batch commit at the write boundary for git
     // targets. No-op for filesystem/primary-stash targets.
     commitWriteTargetBoundary(target, `Update ${formatRefForMessage(ref)}`);
+    // Write-path indexing: the asset is searchable immediately. Fail-open; reads
+    // no longer trigger reindexes, so keeping the index current is the writer's job.
+    await indexWrittenAssets(source.path, [result.path]);
     return {
         ref: result.ref,
         path: result.path,

package/dist/commands/read/search.js CHANGED Viewed

@@ -23,7 +23,7 @@ import { getCurrentWorkflowScopeKey } from "../../workflows/authoring/scope-key.
 // indexer or path-resolution code runs.
 import "../../sources/providers/index.js";
 import { insertUsageEvent } from "../../indexer/usage/usage-events.js";
-import { withIndexDb } from "../../storage/repositories/index-db.js";
+import { TELEMETRY_BUSY_TIMEOUT_MS, withIndexDb } from "../../storage/repositories/index-db.js";
 import { searchRegistry } from "./registry-search.js";
 const DEFAULT_LIMIT = 20;
 export async function akmSearch(input) {
@@ -227,6 +227,9 @@ function logSearchEvent(query, response, mode = "keyword", eventSource = "user",
         metadata: { query, hitCount: stashHits.length, resultRefs: allResultRefs, mode },
     });
     try {
+        // Short busy timeout: telemetry must never stall the search result behind
+        // a background reindex holding the index.db write lock (30s default wait).
+        // Under contention these usage hints are skipped, not waited for.
         withIndexDb((db) => {
             const resolved = resolveEntryIds(db, stashHits.slice(0, 50));
             for (const { entryId, ref } of resolved) {
@@ -269,7 +272,7 @@ function logSearchEvent(query, response, mode = "keyword", eventSource = "user",
                 }),
                 source: eventSource,
             });
-        });
+        }, { busyTimeoutMs: TELEMETRY_BUSY_TIMEOUT_MS });
     }
     catch (err) {
         rethrowIfTestIsolationError(err);

package/dist/commands/read/show.js CHANGED Viewed

@@ -39,7 +39,7 @@ import { resolveAssetPath } from "../../indexer/walk/path-resolver.js";
 import { resolveIndexPassLLM } from "../../llm/index-passes.js";
 import { resolveSourcesForOrigin } from "../../registry/origin-resolve.js";
 import { resolveStorageLocations } from "../../storage/locations.js";
-import { withIndexDb } from "../../storage/repositories/index-db.js";
+import { TELEMETRY_BUSY_TIMEOUT_MS, withIndexDb } from "../../storage/repositories/index-db.js";
 // Eagerly import source providers to trigger self-registration.
 import "../../sources/providers/index.js";
 import { getCurrentWorkflowScopeKey } from "../../workflows/authoring/scope-key.js";
@@ -301,7 +301,7 @@ function logShowEvent(ref, eventSource = "user") {
                 entry_id: findEntryIdByRef(db, ref),
                 source: eventSource,
             });
-        });
+        }, { busyTimeoutMs: TELEMETRY_BUSY_TIMEOUT_MS });
     }
     catch (err) {
         rethrowIfTestIsolationError(err);
@@ -431,7 +431,7 @@ async function maybeExtractGraphInline(config, sourceStashDir, assetPath) {
         }
         withIndexDb((db) => {
             alreadyGraphed = hasGraphData(db, sourceStashDir, assetPath);
-        });
+        }, { busyTimeoutMs: TELEMETRY_BUSY_TIMEOUT_MS });
         if (alreadyGraphed)
             return;
         // Open the db for the async extraction ourselves: `withIndexDb` is

package/dist/core/asset/asset-spec.js CHANGED Viewed

@@ -69,8 +69,9 @@ const ASSET_SPECS_INTERNAL = {
     script: { stashDir: "scripts", ...scriptSpec },
     memory: { stashDir: "memories", ...markdownSpec },
     // Environment assets — whole `.env` files sourced/injected wholesale. Replaced
-    // the deprecated `vault` type (removed in 0.9.0). Key NAMES + start-of-line
-    // comments are surfaced as metadata; values are never read for indexing.
+    // the deprecated `vault` type (removed in 0.9.0). Only key NAMES are surfaced
+    // as metadata; values and comment text are never read for indexing (comments
+    // routinely contain commented-out credentials).
     env: {
         stashDir: "env",
         isRelevantFile: (fileName) => fileName === ".env" || fileName.endsWith(".env"),

package/dist/core/config/config-schema.js CHANGED Viewed

@@ -171,6 +171,9 @@ export const ImproveProcessConfigSchema = z
     // byte-identically to today (the incrementalSince path is unaffected). Only
     // meaningful on the `consolidate` process.
     judgedCache: z.object({ enabled: z.boolean().optional() }).passthrough().optional(),
+    // Distill process: LLM-as-judge lesson quality gate. Default ON (R3);
+    // fail-open — judge failure/timeout/parse errors pass through. Set
+    // `enabled: false` on the distill process to opt out.
     qualityGate: z.object({ enabled: z.boolean().optional() }).passthrough().optional(),
     contradictionDetection: z.object({ enabled: z.boolean().optional() }).passthrough().optional(),
     // Extract process config (only meaningful for extract process)
@@ -250,24 +253,13 @@ export const ImproveProcessConfigSchema = z
     // once sufficient history accumulates; this value is only used on the very
     // first run. Default 30 s. Only meaningful on the `consolidate` process.
     p90ChunkSecondsDefault: z.number().finite().positive().optional(),
-    // WS-3b: Homeostatic demotion (step 0a). Before any LLM merge, demote
-    // retrievalSalience for stale/low-value assets so the merge pool is bounded
-    // and high-SNR. Demotion is state.db-only (file content untouched);
-    // re-promotable on re-retrieval. Default OFF. Only meaningful on the
-    // `consolidate` process.
-    homeostaticDemotion: z
-        .object({
-        enabled: z.boolean().optional(),
-        // Minimum days since last retrieval to consider an asset stale (default 30).
-        staleDays: z.number().int().min(0).optional(),
-        // Demotion factor: multiply retrievalSalience by this when stale (default 0.5).
-        demotionFactor: z.number().min(0).max(1).optional(),
-    })
-        .passthrough()
-        .optional(),
+    // (WS-3b step 0a `homeostaticDemotion` was removed — R4. The key is
+    // tolerated via passthrough if an old config still carries it; continuous
+    // decay is now part of the always-applied salience recency term.)
     // WS-3b: Schema-similarity gate (step 0b). At intake, if a new candidate's
     // body embedding is within epsilon of an existing derived-layer lesson/knowledge
-    // node, mark it schema-consistent and lower its priority. Default OFF.
+    // node, mark it schema-consistent and lower its priority. Default ON for
+    // the `extract` process since R3 (fail-open; set `enabled: false` to opt out).
     // Only meaningful on the `consolidate` and `extract` processes.
     schemaSimilarity: z
         .object({
@@ -297,13 +289,19 @@ export const ImproveProcessConfigSchema = z
     //   - maxGeneration: refuse to merge two assets both above this generation (default 2).
     //   - lexicalDiversityCheck: low n-gram diversity ⇒ raise merge threshold.
     //   - randomClusterFraction: occasional random (non-similar) cluster in pool (default 0.05).
-    // Default OFF. Only meaningful on the `consolidate` process.
+    //   - mergeInformationFloor: measure that merges keep provenance + specificity
+    //     (R5 §4.2; ADVISORY in v1 — counted, never refused).
+    //   - minSpecificityRetention: distinct-token retention floor for merges (default 0.6).
+    // Default ON since R5 (opt out via enabled: false). Only meaningful on the
+    // `consolidate` process.
     antiCollapse: z
         .object({
         enabled: z.boolean().optional(),
         maxGeneration: z.number().int().min(1).optional(),
         lexicalDiversityCheck: z.boolean().optional(),
         randomClusterFraction: z.number().min(0).max(1).optional(),
+        mergeInformationFloor: z.boolean().optional(),
+        minSpecificityRetention: z.number().min(0).max(1).optional(),
     })
         .passthrough()
         .optional(),
@@ -633,6 +631,29 @@ const ImproveSalienceSchema = z
     replayBudget: z.number().int().min(0).optional(),
 })
     .passthrough();
+// R5 — longitudinal collapse/churn detector (observe-only in v1; deterministic,
+// fail-open, runs only on cycles where consolidate/recombine did work).
+// Default ON; opt out via `improve.collapseDetector.enabled: false`.
+// See docs/design/improve-collapse-churn-detector-design.md.
+const ImproveCollapseDetectorSchema = z
+    .object({
+    enabled: z.boolean().optional(),
+    // Canary set size minted on first run (owner-approved 30–50 range; default 40).
+    canaryCount: z.number().int().min(3).max(200).optional(),
+    // Top-K cutoff for canary recall/nDCG (default 10).
+    k: z.number().int().min(1).max(100).optional(),
+    // Trend window in qualifying cycles (default 5).
+    windowCycles: z.number().int().min(2).max(50).optional(),
+    // Absolute mean-recall drop vs window median that fires collapse (default 0.15).
+    recallDropThreshold: z.number().min(0).max(1).optional(),
+    // distinct-content-ratio decline over the window that fires collapse (default 0.05).
+    entropyDropThreshold: z.number().min(0).max(1).optional(),
+    // Accepted-action volume over the window below which churn never fires (default 25).
+    churnMinAcceptedActions: z.number().int().min(1).optional(),
+    // improve_cycle_metrics retention (default 365 days, owner-approved).
+    retentionDays: z.number().int().min(1).optional(),
+})
+    .passthrough();
 export const ImproveConfigSchema = z
     .object({
     utilityDecay: ImproveUtilityDecaySchema.optional(),
@@ -640,6 +661,7 @@ export const ImproveConfigSchema = z
     calibration: ImproveCalibrationSchema.optional(),
     exploration: ImproveExplorationSchema.optional(),
     salience: ImproveSalienceSchema.optional(),
+    collapseDetector: ImproveCollapseDetectorSchema.optional(),
 })
     .passthrough();
 // ── Index / per-pass ────────────────────────────────────────────────────────

package/dist/core/eval/rank-metrics.js ADDED Viewed

@@ -0,0 +1,113 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+export const DEFAULT_CURATE_WEIGHTS = {
+    ndcg: 0.5,
+    recall: 0.2,
+    mrr: 0.1,
+    noBannedAboveRequired: 0.2,
+};
+/** nDCG@k with binary relevance: gain 1 for relevant refs, 0 otherwise. */
+export function ndcgAtK(returned, relevant, k) {
+    const top = returned.slice(0, k);
+    let dcg = 0;
+    for (let i = 0; i < top.length; i++) {
+        if (relevant.has(top[i]))
+            dcg += 1 / Math.log2(i + 2);
+    }
+    const idealCount = Math.min(k, relevant.size);
+    let idcg = 0;
+    for (let i = 0; i < idealCount; i++)
+        idcg += 1 / Math.log2(i + 2);
+    return idcg === 0 ? 1 : dcg / idcg;
+}
+export function recallAtK(returned, relevant, k) {
+    if (relevant.size === 0)
+        return 1;
+    const top = new Set(returned.slice(0, k));
+    let hit = 0;
+    for (const r of relevant)
+        if (top.has(r))
+            hit += 1;
+    return hit / relevant.size;
+}
+export function mrr(returned, relevant) {
+    for (let i = 0; i < returned.length; i++) {
+        if (relevant.has(returned[i]))
+            return 1 / (i + 1);
+    }
+    return 0;
+}
+/**
+ * Leapfrog gate. A banned ref "leapfrogs" when it appears ABOVE at least one
+ * present relevant ref. Returns the fraction of present banned refs that do
+ * NOT leapfrog (1.0 when no banned ref is present, or none leapfrog), plus the
+ * raw violation count.
+ */
+export function noBannedAboveRequired(returned, relevant, banned) {
+    const rankOf = new Map();
+    returned.forEach((ref, i) => {
+        if (!rankOf.has(ref))
+            rankOf.set(ref, i);
+    });
+    const relevantRanks = returned.map((ref, i) => (relevant.has(ref) ? i : -1)).filter((i) => i >= 0);
+    if (relevantRanks.length === 0) {
+        // No relevant ref present to be leapfrogged — gate is vacuously satisfied.
+        return { score: 1, leapfrogCount: 0 };
+    }
+    const worstRelevantRank = Math.max(...relevantRanks);
+    const bannedPresent = returned.filter((ref) => banned.has(ref));
+    if (bannedPresent.length === 0)
+        return { score: 1, leapfrogCount: 0 };
+    let leapfrog = 0;
+    for (const b of bannedPresent) {
+        const rb = rankOf.get(b);
+        if (rb !== undefined && rb < worstRelevantRank)
+            leapfrog += 1;
+    }
+    return { score: 1 - leapfrog / bannedPresent.length, leapfrogCount: leapfrog };
+}
+/** Score a single curate result (ordered refs) against its judgment. */
+export function scoreCurateCase(returned, judgment, weights = DEFAULT_CURATE_WEIGHTS) {
+    const k = judgment.limit;
+    const relevant = new Set(judgment.relevant);
+    const banned = new Set(judgment.banned);
+    const ndcg = ndcgAtK(returned, relevant, k);
+    const recall = recallAtK(returned, relevant, k);
+    const rr = mrr(returned, relevant);
+    const gate = noBannedAboveRequired(returned, relevant, banned);
+    const score = ndcg * weights.ndcg + recall * weights.recall + rr * weights.mrr + gate.score * weights.noBannedAboveRequired;
+    return {
+        ndcg,
+        recall,
+        mrr: rr,
+        noBannedAboveRequired: gate.score,
+        bannedLeapfrogCount: gate.leapfrogCount,
+        score,
+    };
+}
+/** Aggregate per-case metrics into a suite summary. */
+export function summarizeCurateMetrics(metrics) {
+    const n = metrics.length;
+    if (n === 0) {
+        return {
+            caseCount: 0,
+            meanScore: 0,
+            meanNdcg: 0,
+            meanRecall: 0,
+            meanMrr: 0,
+            meanNoBannedAboveRequired: 1,
+            totalBannedLeapfrog: 0,
+        };
+    }
+    const sum = (sel) => metrics.reduce((a, m) => a + sel(m), 0);
+    return {
+        caseCount: n,
+        meanScore: sum((m) => m.score) / n,
+        meanNdcg: sum((m) => m.ndcg) / n,
+        meanRecall: sum((m) => m.recall) / n,
+        meanMrr: sum((m) => m.mrr) / n,
+        meanNoBannedAboveRequired: sum((m) => m.noBannedAboveRequired) / n,
+        totalBannedLeapfrog: sum((m) => m.bannedLeapfrogCount),
+    };
+}

package/dist/core/state/migrations.js CHANGED Viewed

@@ -700,6 +700,62 @@ const MIGRATIONS = [
       ALTER TABLE asset_salience ADD COLUMN encoding_source TEXT DEFAULT NULL;
     `,
     },
+    // ── Migration 016 — collapse/churn detector (R5) ─────────────────────────────
+    //
+    // Longitudinal store-health history for the improve pipeline
+    // (docs/design/improve-collapse-churn-detector-design.md).
+    //
+    //   canary_queries — the fixed canary set, minted deterministically from the
+    //     live stash on first detector run and NEVER auto-refreshed (silent
+    //     re-baselining is how a slow collapse hides). `canary_set_id` groups one
+    //     mint; deactivated sets keep their rows (active = 0) so historical cycle
+    //     rows stay interpretable. Tens of rows; never purged.
+    //
+    //   improve_cycle_metrics — one row per qualifying improve cycle (a run where
+    //     consolidate processed ≥1 op or recombine evaluated ≥1 cluster). Every
+    //     column is a scalar or a size-capped JSON blob (< 2 KB/row by
+    //     construction — the result_json lesson applied). Retention: 365 days via
+    //     purgeOldCycleMetrics. Trend queries drive the collapse/churn alert
+    //     evaluation and the health advisory; `canary_set_id` scoping prevents
+    //     comparing across canary re-mints.
+    {
+        id: "016-collapse-churn-detector",
+        up: `
+      CREATE TABLE IF NOT EXISTS canary_queries (
+        id            INTEGER PRIMARY KEY AUTOINCREMENT,
+        canary_set_id TEXT    NOT NULL,
+        anchor_ref    TEXT    NOT NULL,
+        query         TEXT    NOT NULL,
+        source        TEXT    NOT NULL DEFAULT 'auto',
+        active        INTEGER NOT NULL DEFAULT 1,
+        created_at    TEXT    NOT NULL
+      );
+      CREATE INDEX IF NOT EXISTS idx_canary_queries_active
+        ON canary_queries(active, canary_set_id);
+      CREATE TABLE IF NOT EXISTS improve_cycle_metrics (
+        id                      INTEGER PRIMARY KEY AUTOINCREMENT,
+        run_id                  TEXT    NOT NULL,
+        ts                      TEXT    NOT NULL,
+        pass                    TEXT    NOT NULL,
+        canary_set_id           TEXT    NOT NULL,
+        mean_recall             REAL    NOT NULL,
+        mean_ndcg               REAL    NOT NULL,
+        mean_mrr                REAL    NOT NULL,
+        canary_ranks_json       TEXT    NOT NULL,
+        store_total             INTEGER NOT NULL,
+        store_by_type_json      TEXT    NOT NULL,
+        distinct_content_ratio  REAL    NOT NULL,
+        mean_bigram_diversity   REAL    NOT NULL,
+        over_generation_count   INTEGER NOT NULL,
+        accepted_actions        INTEGER NOT NULL,
+        merge_floor_violations  INTEGER NOT NULL DEFAULT 0,
+        alerts_json             TEXT    NOT NULL DEFAULT '[]'
+      );
+      CREATE INDEX IF NOT EXISTS idx_improve_cycle_metrics_ts
+        ON improve_cycle_metrics(ts);
+    `,
+    },
 ];
 /**
  * Apply every pending migration in a single transaction per migration.