npm - akm-cli - Versions diffs - 0.9.0-beta.5 → 0.9.0-beta.9 - Mend

akm-cli 0.9.0-beta.5 → 0.9.0-beta.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/CHANGELOG.md +119 -0
package/dist/cli.js +7 -0
package/dist/commands/feedback-cli.js +42 -37
package/dist/commands/graph/graph.js +75 -71
package/dist/commands/health.js +10 -2
package/dist/commands/improve/consolidate.js +24 -4
package/dist/commands/improve/distill.js +26 -5
package/dist/commands/improve/extract-prompt.js +1 -1
package/dist/commands/improve/improve-auto-accept.js +6 -0
package/dist/commands/improve/improve-profiles.js +4 -0
package/dist/commands/improve/improve.js +753 -465
package/dist/commands/improve/proactive-maintenance.js +113 -0
package/dist/commands/improve/reflect.js +6 -0
package/dist/commands/proposal/proposal.js +5 -0
package/dist/commands/proposal/validators/proposals.js +67 -54
package/dist/commands/read/curate.js +17 -0
package/dist/commands/sources/stash-cli.js +10 -2
package/dist/core/config/config-schema.js +25 -0
package/dist/core/paths.js +3 -0
package/dist/core/state-db.js +46 -1
package/dist/indexer/db/db.js +97 -11
package/dist/indexer/ensure-index.js +152 -17
package/dist/indexer/index-writer-lock.js +99 -0
package/dist/indexer/indexer.js +114 -111
package/dist/integrations/harnesses/claude/session-log.js +1 -1
package/dist/llm/client.js +23 -4
package/dist/scripts/migrate-storage.js +90 -13
package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +8 -1
package/dist/sources/providers/tar-utils.js +16 -8
package/package.json +2 -2

package/dist/commands/improve/improve.js CHANGED Viewed

@@ -19,8 +19,9 @@ import { info, warn } from "../../core/warn.js";
 import { closeDatabase, getAllEntries, getEntryCount, getRetrievalCounts, getUtilityScoresByIds, getZeroResultSearches, openDatabase, openExistingDatabase, } from "../../indexer/db/db.js";
 import { ensureIndex } from "../../indexer/ensure-index.js";
 import { runGraphExtractionPass } from "../../indexer/graph/graph-extraction.js";
+import { withIndexWriterLease } from "../../indexer/index-writer-lock.js";
 import { akmIndex } from "../../indexer/indexer.js";
-import { runMemoryInferencePass } from "../../indexer/passes/memory-inference.js";
+import { collectPendingMemories, runMemoryInferencePass, } from "../../indexer/passes/memory-inference.js";
 import { runStalenessDetectionPass } from "../../indexer/passes/staleness-detect.js";
 import { getWritableStashDirs, resolveSourceEntries } from "../../indexer/search/search-source.js";
 import { countUsageEventsByType } from "../../indexer/usage/usage-events.js";
@@ -46,7 +47,105 @@ import { makeGateConfig, resolveExtractConfidence, runAutoAcceptGate } from "./i
 import { isProfileFilteredForAllPasses, resolveImproveProfile, resolveProcessEnabled, shouldSkipRef, } from "./improve-profiles.js";
 import { detectAndWriteContradictions } from "./memory/memory-contradiction-detect.js";
 import { analyzeMemoryCleanup, applyMemoryCleanup } from "./memory/memory-improve.js";
+import { DEFAULT_DUE_DAYS, DEFAULT_MAX_PER_RUN, selectProactiveMaintenanceRefs } from "./proactive-maintenance.js";
 import { akmReflect } from "./reflect.js";
+// #607 Lock Decomposition: fine-grained per-process locks replace the single
+// `improve.lock`. Three independent locks allow concurrent improve runs when
+// they touch different subsystems (e.g. quick-shredder consolidate can run
+// alongside daily reflect+distill).
+//
+//   consolidate.lock   — protects consolidate + memoryInference (both write index.db)
+//   reflect-distill.lock — protects reflect + distill (both write state.db proposals)
+//   triage.lock         — protects triage (writes proposal promotions)
+//
+// Stale timeouts are per-lock, tuned to the expected runtime of the protected
+// processes: consolidate is disk-bound (1h), reflect+distill is GPU-bound (2h),
+// triage is fast (30min).
+const PROCESS_LOCK_DEFS = {
+    consolidate: { fileName: "consolidate.lock", staleAfterMs: 60 * 60 * 1000 },
+    reflectDistill: { fileName: "reflect-distill.lock", staleAfterMs: 2 * 60 * 60 * 1000 },
+    triage: { fileName: "triage.lock", staleAfterMs: 30 * 60 * 1000 },
+};
+const heldProcessLocks = new Set();
+export function resetHeldProcessLocks() {
+    heldProcessLocks.clear();
+}
+function processLockPath(lockBaseDir, lockName) {
+    return path.join(lockBaseDir, PROCESS_LOCK_DEFS[lockName].fileName);
+}
+function tryAcquireProcessLock(lockPath, staleAfterMs, skipIfLocked, lockLabel) {
+    fs.mkdirSync(path.dirname(lockPath), { recursive: true });
+    const lockPayload = () => JSON.stringify({ pid: process.pid, startedAt: new Date().toISOString() });
+    if (tryAcquireLockSync(lockPath, lockPayload())) {
+        heldProcessLocks.add(lockPath);
+        return "acquired";
+    }
+    const probe = probeLock(lockPath, { staleAfterMs });
+    const rawContent = probe.state === "absent" ? undefined : probe.rawContent;
+    const lock = rawContent
+        ? (() => {
+            try {
+                return JSON.parse(rawContent);
+            }
+            catch {
+                return null;
+            }
+        })()
+        : null;
+    if (probe.state === "stale") {
+        try {
+            appendEvent({
+                eventType: "improve_lock_recovered",
+                metadata: {
+                    lockName: lockLabel,
+                    stalePid: lock?.pid ?? null,
+                    lockedAt: lock?.startedAt ?? null,
+                    recoveredAt: new Date().toISOString(),
+                    lockAgeMs: probe.ageMs ?? null,
+                    reason: probe.reason === "pid_dead" ? "pid_not_alive" : probe.reason,
+                },
+            });
+        }
+        catch {
+            /* event emission is best-effort; never block lock recovery */
+        }
+        releaseLock(lockPath);
+        if (tryAcquireLockSync(lockPath, lockPayload())) {
+            heldProcessLocks.add(lockPath);
+            return "acquired";
+        }
+        if (skipIfLocked) {
+            warn(`[improve] ${lockLabel} lock acquired by another run during stale recovery; skipping (--skip-if-locked)`);
+            return "skipped";
+        }
+        throw new ConfigError(`akm improve ${lockLabel} is already running. Delete ${lockPath} to force.`, "INVALID_CONFIG_FILE");
+    }
+    if (skipIfLocked) {
+        warn(`[improve] ${lockLabel} lock held by another run (PID ${lock?.pid}, started ${lock?.startedAt}); skipping (--skip-if-locked)`);
+        return "skipped";
+    }
+    throw new ConfigError(`akm improve ${lockLabel} is already running (PID ${lock?.pid}, started ${lock?.startedAt}). Delete ${lockPath} to force.`, "INVALID_CONFIG_FILE");
+}
+function releaseProcessLock(lockPath) {
+    try {
+        fs.unlinkSync(lockPath);
+    }
+    catch {
+        // ignore
+    }
+    heldProcessLocks.delete(lockPath);
+}
+function releaseAllProcessLocks() {
+    for (const p of heldProcessLocks) {
+        try {
+            fs.unlinkSync(p);
+        }
+        catch {
+            // ignore
+        }
+    }
+    heldProcessLocks.clear();
+}
 function resolveImproveScope(scope) {
     const trimmed = scope?.trim();
     if (!trimmed)
@@ -102,6 +201,22 @@ export function renderSyncCommitMessage(template, result, nowMs) {
     };
     return template.replace(/\{(\w+)\}/g, (match, key) => (Object.hasOwn(tokens, key) ? tokens[key] : match));
 }
+/**
+ * Dedupe a list of eligible refs by `ref`, preserving first-seen order. Used to
+ * merge the three eligibility sources (feedback-signal, P0-A high-retrieval,
+ * Layer-2 proactive-maintenance) without admitting a ref into the loop twice.
+ */
+function dedupeRefs(refs) {
+    const seen = new Set();
+    const out = [];
+    for (const r of refs) {
+        if (seen.has(r.ref))
+            continue;
+        seen.add(r.ref);
+        out.push(r);
+    }
+    return out;
+}
 async function collectEligibleRefs(scope, stashDir, improveProfile) {
     if (scope.mode === "ref" && scope.value) {
         const parsed = parseAssetRef(scope.value);
@@ -471,7 +586,9 @@ export async function akmImprove(options = {}) {
     options = {
         ...options,
         autoAccept: options.autoAccept ?? improveProfile.autoAccept,
-        limit: options.limit ?? improveProfile.limit,
+        // Profile-level limit, then process-level reflect.limit as fallback.
+        // CLI --limit takes precedence over both.
+        limit: options.limit ?? improveProfile?.processes?.reflect?.limit ?? improveProfile.limit,
     };
     let primaryStashDir;
     try {
@@ -489,103 +606,16 @@ export async function akmImprove(options = {}) {
     // timeout root cause). Because beforeEach runs synchronously, env is still the
     // calling test's own at this point; we capture it before yielding the loop.
     const resolvedStateDbPath = getStateDbPathInDataDir();
-    // Phase 4 lock hoist (§7): the `improve.lock` setup is hoisted ABOVE
-    // ensureIndex/collectEligibleRefs so the triage pre-pass (and improve's own
-    // queue writes) run fully serialized under the lock. The dry-run early-return
-    // below still skips the lock and triage (the lock+triage block is gated on
-    // `!options.dryRun`); contradiction-detection and memory-cleanup analysis,
-    // which previously ran before the lock, now sit after it for free.
-    const resolvedLockPath = primaryStashDir
-        ? path.join(primaryStashDir, ".akm", "improve.lock")
-        : path.join(options.stashDir ?? ".", ".akm", "improve.lock");
-    const MAX_LOCK_AGE_MS = 4 * 60 * 60 * 1000; // 4 hours
-    const acquireLock = () => {
-        fs.mkdirSync(path.dirname(resolvedLockPath), { recursive: true });
-        const lockPayload = () => JSON.stringify({ pid: process.pid, startedAt: new Date().toISOString() });
-        if (tryAcquireLockSync(resolvedLockPath, lockPayload()))
-            return "acquired";
-        // Lock file already exists — probe to determine whether it's still held
-        // or whether the prior run died without cleaning up.
-        const probe = probeLock(resolvedLockPath, { staleAfterMs: MAX_LOCK_AGE_MS });
-        const rawContent = probe.state === "absent" ? undefined : probe.rawContent;
-        const lock = rawContent
-            ? (() => {
-                try {
-                    return JSON.parse(rawContent);
-                }
-                catch {
-                    return null;
-                }
-            })()
-            : null;
-        if (probe.state === "stale") {
-            // O-7 / #394: Emit improve_lock_recovered event before recovery so the
-            // audit trail records the abnormal prior-run exit (Temporal/Airflow pattern).
-            try {
-                appendEvent({
-                    eventType: "improve_lock_recovered",
-                    metadata: {
-                        stalePid: lock?.pid ?? null,
-                        lockedAt: lock?.startedAt ?? null,
-                        recoveredAt: new Date().toISOString(),
-                        lockAgeMs: probe.ageMs ?? null,
-                        reason: probe.reason === "pid_dead" ? "pid_not_alive" : probe.reason,
-                    },
-                });
-            }
-            catch {
-                /* event emission is best-effort; never block lock recovery */
-            }
-            releaseLock(resolvedLockPath);
-            if (tryAcquireLockSync(resolvedLockPath, lockPayload()))
-                return "acquired";
-            // Lost the race to another run that grabbed the freed stale lock.
-            if (options.skipIfLocked) {
-                warn("[improve] another run acquired the lock during stale recovery; skipping (--skip-if-locked)");
-                return "skipped";
-            }
-            throw new ConfigError(`akm improve is already running. Delete ${resolvedLockPath} to force.`, "INVALID_CONFIG_FILE");
-        }
-        // Lock is held by a live run within the staleness window.
-        if (options.skipIfLocked) {
-            warn(`[improve] another improve run holds the lock (PID ${lock?.pid}, started ${lock?.startedAt}); skipping (--skip-if-locked)`);
-            return "skipped";
-        }
-        throw new ConfigError(`akm improve is already running (PID ${lock?.pid}, started ${lock?.startedAt}). Delete ${resolvedLockPath} to force.`, "INVALID_CONFIG_FILE");
-    };
-    // Phase 4 lock-leak guard (§7 ordering hazard): hoisting `improve.lock` above
-    // the pre-index region (so the triage pre-pass runs under it) means the lock is
-    // held while ensureIndex / collectEligibleRefs / contradiction-detection /
-    // memory-cleanup analysis run — but the main protecting `try { … } finally {
-    // unlinkSync(resolvedLockPath) }` does not begin until after them. A throw in
-    // any of those steps would leak the lock. We close that window by wrapping the
-    // whole region in a try whose catch releases the lock (when held) and
-    // re-throws. The values this region computes are declared in the outer scope so
-    // they remain visible to the main run below. The dry-run path never sets
-    // `lockAcquired`, so its early return releases nothing.
-    let lockAcquired = false;
-    const releaseLockOnError = () => {
-        if (!lockAcquired)
-            return;
-        try {
-            fs.unlinkSync(resolvedLockPath);
-        }
-        catch {
-            // best-effort release on the error path
-        }
-        lockAcquired = false;
-    };
-    // Signal-safe lock release. The SIGTERM/SIGINT/SIGHUP handler in improve-cli.ts
-    // calls `process.exit()`, which does NOT run the `finally` below that owns lock
-    // release — so a cron-timeout SIGTERM leaked `improve.lock` every run.
-    // `process.exit()` DOES fire `'exit'` listeners, so we release the lock from
-    // one. `releaseLockIfOwned` only unlinks a lock still owned by this PID, so it
-    // is safe even if a later run re-acquired it. The listener is removed in the
-    // `finally` so the normal path stays single-release and repeated in-process
-    // `akmImprove` calls (tests) do not accumulate listeners.
-    const releaseLockOnExit = () => {
-        releaseLockIfOwned(resolvedLockPath, process.pid);
-    };
+    // #607 Lock decomposition: three per-process locks replace the single
+    // `improve.lock`. Each process acquires only the lock(s) it needs, so
+    // quick-shredder consolidate can run alongside daily reflect+distill.
+    //
+    //   consolidate.lock     — protects consolidate + memoryInference + graphExtraction (index.db writers)
+    //   reflect-distill.lock — protects reflect + distill (state.db proposal writers)
+    //   triage.lock          — protects triage pre-pass (state.db proposal promotions)
+    //
+    // Lock base directory — same `.akm/` under the primary stash dir.
+    const lockBaseDir = primaryStashDir ? path.join(primaryStashDir, ".akm") : path.join(options.stashDir ?? ".", ".akm");
     const preEnsureCleanupWarnings = [];
     let plannedRefs;
     let memorySummary;
@@ -594,65 +624,59 @@ export async function akmImprove(options = {}) {
     let guidance;
     let triageDrain;
     try {
-        // Acquire the lock and run the triage pre-pass for non-dry-run executions.
-        // The dry-run branch below produces plannedRefs/memorySummary WITHOUT the lock
-        // or triage (decision: dry-run never mutates the queue).
+        // #607: Per-process lock acquisition. Each process acquires only the lock(s)
+        // it needs. The dry-run branch produces plannedRefs/memorySummary WITHOUT any
+        // locks (decision: dry-run never mutates the queue).
         if (!options.dryRun) {
-            if (acquireLock() === "skipped") {
-                // Another improve holds the lock and the caller asked to skip rather
-                // than fail. Return a clean no-op result (exit 0) before any index/DB
-                // work — never registered the exit listener, never set lockAcquired,
-                // so we release nothing belonging to the run that owns the lock.
-                return {
-                    schemaVersion: 1,
-                    ok: true,
-                    scope,
-                    dryRun: false,
-                    skipped: { reason: "lock-held" },
-                    memorySummary: { eligible: 0, derived: 0 },
-                    plannedRefs: [],
-                };
-            }
-            lockAcquired = true;
             // Backstop release on process.exit() (signal handler / budget watchdog),
             // which skips the finally below. Removed in that finally on the normal path.
-            process.on("exit", releaseLockOnExit);
-            // Phase 4 triage pre-pass (§7, §13): drain the standing pending backlog
-            // BEFORE ensureIndex so improve generates fresh proposals against a cleared
-            // queue (no `duplicate_pending` collisions) and ensureIndex absorbs triage's
-            // promotions for free. Gated on the triage process being enabled (opt-in,
-            // defaults off) and on a whole-stash / type-scoped run — a single-ref
-            // `akm improve skill:x` must never drain the whole queue. Best-effort: a
-            // triage failure is a non-fatal warning, never an abort (mirrors the
-            // contradiction-detection pass below).
+            const releaseAllOnExit = () => {
+                for (const p of heldProcessLocks) {
+                    releaseLockIfOwned(p, process.pid);
+                }
+            };
+            process.on("exit", releaseAllOnExit);
+            // #607 triage pre-pass: acquire triage.lock, drain the standing pending
+            // backlog BEFORE ensureIndex so improve generates fresh proposals against
+            // a cleared queue (no `duplicate_pending` collisions) and ensureIndex
+            // absorbs triage's promotions for free. Release immediately after —
+            // triage.lock is not needed again until the next improve run.
             if (primaryStashDir && resolveProcessEnabled("triage", improveProfile)) {
                 if (scope.mode === "ref") {
                     warn("[improve] triage pre-pass skipped (single-ref scope never drains the whole queue)");
                 }
                 else {
-                    try {
-                        const triageConfig = improveProfile.processes?.triage;
-                        const policy = resolveDrainPolicy(triageConfig?.policy);
-                        const applyMode = triageConfig?.applyMode ?? "queue";
-                        const maxAccepts = triageConfig?.maxAcceptsPerRun ?? 25;
-                        const judgment = triageConfig?.judgment
-                            ? resolveTriageJudgmentRunner(triageConfig.judgment, _earlyConfig)
-                            : null;
-                        triageDrain = await drainProposalsFn({
-                            stashDir: primaryStashDir,
-                            policy,
-                            applyMode,
-                            maxAccepts,
-                            dryRun: false,
-                            // No fresh ids exist yet — triage runs before improve generates any.
-                            excludeIds: new Set(),
-                            ...(triageConfig?.maxDiffLines !== undefined ? { maxDiffLines: triageConfig.maxDiffLines } : {}),
-                            judgment,
-                        });
+                    const triageLPath = processLockPath(lockBaseDir, "triage");
+                    const triageResult = tryAcquireProcessLock(triageLPath, PROCESS_LOCK_DEFS.triage.staleAfterMs, options.skipIfLocked, "triage");
+                    if (triageResult === "skipped") {
+                        triageDrain = undefined;
                     }
-                    catch (err) {
-                        // Non-fatal: triage is a best-effort pre-pass and must never abort improve.
-                        warn(`[improve] triage pre-pass failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
+                    else {
+                        try {
+                            const triageConfig = improveProfile.processes?.triage;
+                            const policy = resolveDrainPolicy(triageConfig?.policy);
+                            const applyMode = triageConfig?.applyMode ?? "queue";
+                            const maxAccepts = triageConfig?.maxAcceptsPerRun ?? 25;
+                            const judgment = triageConfig?.judgment
+                                ? resolveTriageJudgmentRunner(triageConfig.judgment, _earlyConfig)
+                                : null;
+                            triageDrain = await drainProposalsFn({
+                                stashDir: primaryStashDir,
+                                policy,
+                                applyMode,
+                                maxAccepts,
+                                dryRun: false,
+                                excludeIds: new Set(),
+                                ...(triageConfig?.maxDiffLines !== undefined ? { maxDiffLines: triageConfig.maxDiffLines } : {}),
+                                judgment,
+                            });
+                        }
+                        catch (err) {
+                            warn(`[improve] triage pre-pass failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
+                        }
+                        finally {
+                            releaseProcessLock(triageLPath);
+                        }
                     }
                 }
             }
@@ -684,7 +708,7 @@ export async function akmImprove(options = {}) {
                 // best-effort; leave preEnsureEntryCount undefined
             }
             try {
-                await ensureIndexFn(primaryStashDir);
+                await ensureIndexFn(primaryStashDir, { mode: "blocking" });
             }
             catch (err) {
                 preEnsureCleanupWarnings.push(`ensureIndex failed: ${err instanceof Error ? err.message : String(err)}`);
@@ -752,17 +776,14 @@ export async function akmImprove(options = {}) {
         }
     }
     catch (err) {
-        releaseLockOnError();
+        releaseAllProcessLocks();
         throw err;
     }
-    // FIX 2 (lock-leak window): everything from here on runs UNDER the lock that
-    // `acquireLock()` just took. The single `try { … } finally { unlinkSync(lock) }`
-    // below now spans the budget-timer setup, `openStateDatabase()`, and the
-    // `profileFilteredRefs` audit-event loop too — regions that previously sat in
-    // the gap between the lock-acquire catch (above) and the main try. A throw in
-    // any of them used to leak the lock (blocking the next improve up to 4h);
-    // now the finally releases it exactly once. The dry-run path already returned
-    // above without acquiring the lock, so it never reaches this finally; the
+    // #607: per-process locks are acquired/released around each stage below.
+    // The triage pre-pass already ran under triage.lock (released). The
+    // preparation stage runs under consolidate.lock, the loop stage under
+    // reflect-distill.lock, and the post-loop stage under consolidate.lock again.
+    // Each stage acquires its lock just before starting and releases in finally.
     // best-effort `unlinkSync` is a no-op when no lock file exists.
     const startMs = Date.now();
     const budgetMs = options.timeoutMs ?? 2 * 60 * 60 * 1000; // default 2 hours
@@ -826,6 +847,10 @@ export async function akmImprove(options = {}) {
                 },
             }, eventsCtx);
         }
+        // #607: acquire consolidate.lock for the preparation stage (consolidate,
+        // ensureIndex, extract all write index.db). Released immediately after.
+        const consolidateLPath = processLockPath(lockBaseDir, "consolidate");
+        const consolidatePrepAcquired = tryAcquireProcessLock(consolidateLPath, PROCESS_LOCK_DEFS.consolidate.staleAfterMs, options.skipIfLocked, "consolidate") === "acquired";
         const preparation = await runImprovePreparationStage({
             scope,
             options,
@@ -840,6 +865,8 @@ export async function akmImprove(options = {}) {
             initialCleanupWarnings: preEnsureCleanupWarnings,
             improveProfile,
         });
+        if (consolidatePrepAcquired)
+            releaseProcessLock(consolidateLPath);
         // D6: pre-load all proposal_rejected events from the last 30 days once,
         // so the per-asset loop can use a Map lookup instead of N DB round trips.
         const REJECTED_PROPOSAL_WINDOW_MS = daysToMs(30);
@@ -851,6 +878,10 @@ export async function akmImprove(options = {}) {
                 rejectedProposalsByRef.set(e.ref, e);
             }
         }
+        // #607: acquire reflect-distill.lock for the loop stage (reflect + distill
+        // both write proposals to state.db). Released immediately after.
+        const reflectDistillLPath = processLockPath(lockBaseDir, "reflectDistill");
+        const reflectDistillAcquired = tryAcquireProcessLock(reflectDistillLPath, PROCESS_LOCK_DEFS.reflectDistill.staleAfterMs, options.skipIfLocked, "reflect-distill") === "acquired";
         const { reflectsWithErrorContext, memoryRefsForInference, gateAutoAcceptedCount: loopGateCount, gateAutoAcceptFailedCount: loopGateFailedCount, } = await runImproveLoopStage({
             scope,
             options,
@@ -870,9 +901,15 @@ export async function akmImprove(options = {}) {
             eventsCtx,
             improveProfile,
         });
+        if (reflectDistillAcquired)
+            releaseProcessLock(reflectDistillLPath);
         // #551: consolidation now runs in the preparation stage (before extract);
         // its result and run-flag are read from `preparation`, not the post-loop.
         const consolidation = preparation.consolidation;
+        // #607: acquire consolidate.lock for the post-loop stage (memoryInference +
+        // graphExtraction both write index.db). Released immediately after.
+        const consolidatePostLPath = processLockPath(lockBaseDir, "consolidate");
+        const consolidatePostAcquired = tryAcquireProcessLock(consolidatePostLPath, PROCESS_LOCK_DEFS.consolidate.staleAfterMs, options.skipIfLocked, "consolidate") === "acquired";
         const { allWarnings, deadUrls, memoryInference, graphExtraction, stalenessDetection, maintenanceActions, memoryInferenceDurationMs, graphExtractionDurationMs, orphansPurged, proposalsExpired, gateAutoAcceptedCount: postLoopGateCount, gateAutoAcceptFailedCount: postLoopGateFailedCount, } = await runImprovePostLoopStage({
             scope,
             options,
@@ -883,11 +920,12 @@ export async function akmImprove(options = {}) {
             memoryRefsForInference,
             reindexFn,
             eventsCtx,
-            // O-1 (#364): propagate wall-clock budget signal to post-loop maintenance.
             budgetSignal: budgetAbortController.signal,
             improveProfile,
             consolidationRan: preparation.consolidationRan,
         });
+        if (consolidatePostAcquired)
+            releaseProcessLock(consolidatePostLPath);
         const finalActions = maintenanceActions && maintenanceActions.length > 0
             ? [...preparation.actions, ...maintenanceActions]
             : preparation.actions;
@@ -972,6 +1010,7 @@ export async function akmImprove(options = {}) {
                     },
                 }
                 : {}),
+            ...(preparation.proactiveMaintenance ? { proactiveMaintenance: preparation.proactiveMaintenance } : {}),
             ...(options.runId !== undefined ? { runId: options.runId } : {}),
         };
         if (!result.dryRun)
@@ -1054,15 +1093,12 @@ export async function akmImprove(options = {}) {
         // O-1 (#364): Clear the budget abort timer so it does not keep the event
         // loop alive after the run completes.
         clearBudgetTimer();
-        try {
-            fs.unlinkSync(resolvedLockPath);
-        }
-        catch {
-            // ignore
-        }
-        // The normal path released the lock above; drop the process.exit backstop so
-        // it does not fire later (or accumulate across repeated in-process calls).
-        process.removeListener("exit", releaseLockOnExit);
+        // #607: release any per-process locks still held (backstop for error paths;
+        // the normal path already released each lock after its stage completed).
+        releaseAllProcessLocks();
+        // Drop the process.exit backstop so it does not fire later (or accumulate
+        // across repeated in-process calls).
+        process.removeAllListeners("exit");
         // I1: close the long-lived state.db connection opened at the top of the run.
         try {
             eventsDb?.close();
@@ -1175,6 +1211,11 @@ function emitImproveCompletedEvent(result, durations, eventsCtx) {
             memoryInferenceDurationMs: durations.memoryInferenceDurationMs,
             graphExtractionExtractedFiles: result.graphExtraction?.quality.extractedFiles ?? 0,
             graphExtractionDurationMs: durations.graphExtractionDurationMs,
+            // Layer-2 proactive-maintenance coverage (0 when the process is disabled
+            // or the run was ref-scoped) so a scheduled sweep's reach is trackable.
+            proactiveSelected: result.proactiveMaintenance?.selected ?? 0,
+            proactiveDueTotal: result.proactiveMaintenance?.dueTotal ?? 0,
+            proactiveNeverReflected: result.proactiveMaintenance?.neverReflected ?? 0,
             // New metrics for tuning the improve loop.
             ...(durations.totalDurationMs !== undefined ? { durationMs: durations.totalDurationMs } : {}),
             ...(durations.warningCount !== undefined ? { warningCount: durations.warningCount } : {}),
@@ -1385,13 +1426,13 @@ async function runConsolidationPass(args) {
             // Tie consolidate proposals back to this improve invocation so
             // accept-rate-per-run aggregation works. Mirrors reflect/propose/extract.
             sourceRun: `consolidate-${Date.now()}`,
-            // Full-pool sweep: consolidation only runs on the nightly default-profile
-            // pass (quick/frequent disable it), so a complete re-cluster is correct and
-            // affordable here. Do NOT pass incrementalSince — the time-window narrowing
-            // it triggers permanently excludes stale-but-unmerged duplicate clusters,
-            // starving merge recall and letting the pool grow unbounded. (The narrowing
-            // was a band-aid for an every-30-min consolidation cadence that the profile
-            // split has since eliminated.) lastConsolidateTs still gates whether we run.
+            // Pass profile-configured options. incrementalSince narrows the pool to
+            // recently-changed memories + graph neighbours — use this for frequent
+            // passes (quick-shredder). Leave absent in the nightly default profile for
+            // a full-pool sweep that catches stale-but-unmerged duplicates.
+            incrementalSince: improveProfile?.processes?.consolidate?.incrementalSince,
+            limit: improveProfile?.processes?.consolidate?.limit,
+            neighborsPerChanged: improveProfile?.processes?.consolidate?.neighborsPerChanged,
             maxChunkSize: improveProfile?.processes?.consolidate?.maxChunkSize,
             // Honor profile.autoAccept (already merged into options.autoAccept at the
             // top of akmImprove). The CLI parser always supplies 90 when --auto-accept
@@ -1420,7 +1461,14 @@ async function runConsolidationPass(args) {
             appendEvent({
                 eventType: "consolidate_completed",
                 ref: "memory:_consolidation",
-                metadata: { processed: consolidation.processed, merged: consolidation.merged },
+                metadata: {
+                    processed: consolidation.processed,
+                    merged: consolidation.merged,
+                    deleted: consolidation.deleted,
+                    contradicted: consolidation.contradicted,
+                    failedChunks: consolidation.failedChunks ?? 0,
+                    durationMs: consolidation.durationMs,
+                },
             }, eventsCtx);
         }
     }
@@ -1791,10 +1839,19 @@ async function runImprovePreparationStage(args) {
     //                         refs that fail the distill signal-delta gate).
     //   distillOnlyRefs     — reflect blocked but distill signal-delta passes
     //                         AND ref is a distill candidate.
-    //   fullySkippedCount   — neither gate passes → synthetic skip action
-    //                         + improve_skipped event, excluded from sort.
+    //   noFeedbackPool      — neither signal-delta gate passes *and* the ref has
+    //                         no recent feedback signal at all. These are NOT
+    //                         skipped here: they are handed to the high-retrieval
+    //                         fallback (P0-A) below so frequently-retrieved but
+    //                         never-rated assets can still be improved. Only refs
+    //                         that P0-A declines are ultimately fully skipped.
+    //   fullySkippedCount   — has stale feedback but no signal delta → genuine
+    //                         skip (counted, aggregated event emitted post-loop),
+    //                         excluded from sort.
     const eligibleRefs = [];
     const distillOnlyRefs = [];
+    // Zero-(recent-)feedback refs deferred to the P0-A high-retrieval fallback.
+    const noFeedbackPool = [];
     let fullySkippedCount = 0;
     // O-2 (#365): explicit --scope <ref> bypasses every gate (user intent wins).
     const scopeRefBypass = scope.mode === "ref";
@@ -1832,22 +1889,59 @@ async function runImprovePreparationStage(args) {
             // Reflect blocked but distill passes → distill-only bucket.
             distillOnlyRefs.push(r);
         }
+        else if (!latestFeedbackTs.has(r.ref)) {
+            // Neither signal-delta gate passes AND there is no recent feedback signal
+            // at all. Rather than skip outright, defer to the high-retrieval fallback
+            // (P0-A) below: a never-rated-but-frequently-retrieved asset is exactly
+            // what that path is meant to rescue. Refs P0-A declines are skipped there.
+            noFeedbackPool.push(r);
+        }
         else {
-            // Neither gate passes — fully skipped.
+            // Has feedback on record but no signal delta since the last proposal —
+            // genuinely fully skipped. Counted here; a single aggregated
+            // improve_skipped event is emitted after the loop (mirrors
+            // profile_filtered_all_passes) instead of one event per ref.
             fullySkippedCount++;
             actions.push({
                 ref: r.ref,
                 mode: "distill-skipped",
                 result: { ok: true, reason: "no new signal since last proposal" },
             });
-            appendEvent({ eventType: "improve_skipped", ref: r.ref, metadata: { reason: "no_new_signal" } }, eventsCtx);
         }
     }
+    // Emit ONE aggregated skip event for the fully-skipped bucket rather than one
+    // improve_skipped event per ref (#592 pattern, mirrors
+    // profile_filtered_all_passes above). The per-ref loop previously produced
+    // ~11K state.db writes per run on a large stash, the dominant contributor to
+    // 900 s timeouts. The in-memory `actions` log keeps the per-ref detail for the
+    // run summary; no downstream consumer needs a per-ref DB audit trail (health's
+    // skip histogram reads the `no_new_signal` counter from the count field).
+    if (fullySkippedCount > 0) {
+        appendEvent({
+            eventType: "improve_skipped",
+            ref: undefined,
+            metadata: {
+                reason: "no_new_signal",
+                count: fullySkippedCount,
+            },
+        }, eventsCtx);
+    }
     // ── Phase 4: signal/feedback/utility/sort on the reduced set ──────────────
-    // Everything from here works only on (eligibleRefs ∪ distillOnlyRefs). The
-    // fully-skipped bucket has already been routed and emitted; we deliberately
-    // avoid spending DB/CPU on refs that cannot enter the loop.
+    // Everything from here works on (eligibleRefs ∪ distillOnlyRefs) plus the
+    // deferred noFeedbackPool that may be rescued by the high-retrieval fallback
+    // (P0-A). The fully-skipped bucket has already been routed and its aggregated
+    // event emitted; we deliberately avoid spending DB/CPU on refs that the
+    // signal-delta gate rejected with feedback already on record.
     const processableRefs = [...eligibleRefs, ...distillOnlyRefs];
+    // Refs eligible for the high-retrieval fallback (P0-A): the signal-delta
+    // partition above could not place these in a reflect/distill bucket, but they
+    // may still qualify if they have been retrieved often enough. Two disjoint
+    // sources feed this set:
+    //   1. noFeedbackPool — refs with no recent feedback that the partition loop
+    //      deliberately deferred here (otherwise they would never reach P0-A).
+    //   2. processableRefs entries that turn out to carry no recent feedback
+    //      *signal* once feedbackSummary is computed below.
+    // (1) is added here; (2) is folded in after feedbackSummary is built.
     // Gap 6: only surface feedback signals from the last 30 days so that
     // ancient one-off feedback events don't permanently lock an asset into
     // every improve run. Assets with only stale signals fall through to the
@@ -1857,8 +1951,12 @@ async function runImprovePreparationStage(args) {
     // Pre-compute feedback summary per ref in a single pass so we don't issue
     // two readEvents({type:"feedback", ref}) per asset (one for signal filtering,
     // one for ratio computation).
+    // Cover processableRefs *and* the deferred noFeedbackPool so utility/feedback
+    // ratios are available for any noFeedbackPool ref that P0-A rescues below.
     const feedbackSummary = new Map();
-    for (const candidate of processableRefs) {
+    for (const candidate of [...processableRefs, ...noFeedbackPool]) {
+        if (feedbackSummary.has(candidate.ref))
+            continue;
         const { events } = readEvents({ type: "feedback", ref: candidate.ref });
         let hasSignal = false;
         let positive = 0;
@@ -1881,8 +1979,21 @@ async function runImprovePreparationStage(args) {
     // P0-A: also surface zero-feedback assets that have been retrieved many times.
     const RETRIEVAL_COUNT_THRESHOLD = options.minRetrievalCount ?? 5;
     const signalBearingSet = new Set(signalFiltered.map((r) => r.ref));
-    const noFeedbackCandidates = processableRefs.filter((r) => !signalBearingSet.has(r.ref));
+    // Zero-feedback candidates for P0-A: processableRefs without a recent signal,
+    // plus the deferred noFeedbackPool. Dedupe by ref (the two sources are
+    // disjoint by construction, but guard against overlap defensively).
+    const noFeedbackSeen = new Set();
+    const noFeedbackCandidates = [];
+    for (const r of [...processableRefs.filter((r) => !signalBearingSet.has(r.ref)), ...noFeedbackPool]) {
+        if (noFeedbackSeen.has(r.ref))
+            continue;
+        noFeedbackSeen.add(r.ref);
+        noFeedbackCandidates.push(r);
+    }
     let highRetrievalRefs = [];
+    // Retrieval counts for the zero-feedback pool, hoisted so the Layer-2
+    // proactive-maintenance selector below can reuse them without a second DB pass.
+    let retrievalCounts = new Map();
     let dbForRetrieval;
     try {
         dbForRetrieval = openExistingDatabase();
@@ -1890,15 +2001,21 @@ async function runImprovePreparationStage(args) {
         if (showEventCount === 0) {
             warn("Warning: show events not yet in usage_events — zero-feedback fallback will match only search-retrieved assets.");
         }
-        const retrievalCounts = getRetrievalCounts(dbForRetrieval, noFeedbackCandidates.map((r) => r.ref));
+        retrievalCounts = getRetrievalCounts(dbForRetrieval, noFeedbackCandidates.map((r) => r.ref));
         // High-retrieval signal-delta (simplified rule, 0.8.0): a no-feedback
-        // ref qualifies exactly once — when retrievalCount ≥ threshold AND no
-        // prior reflect proposal exists for it. Once a reflect proposal is on
-        // record, subsequent re-eligibility requires explicit feedback (which
-        // flows through the normal signal-delta gate above). Tracking growth in
-        // retrieval count would require persisting the count in proposal
-        // metadata; deferred to a follow-up.
-        highRetrievalRefs = noFeedbackCandidates.filter((r) => (retrievalCounts.get(r.ref) ?? 0) >= RETRIEVAL_COUNT_THRESHOLD && !lastReflectProposalTs.has(r.ref));
+        // ref qualifies exactly once — when it has actually been retrieved
+        // (retrievalCount ≥ 1) AND retrievalCount ≥ threshold AND no prior reflect
+        // proposal exists for it. Once a reflect proposal is on record, subsequent
+        // re-eligibility requires explicit feedback (which flows through the normal
+        // signal-delta gate above). The explicit `> 0` guard keeps a threshold of 0
+        // from rescuing genuinely never-retrieved assets — the fallback is for
+        // *retrieved* assets, not silent ones. Tracking growth in retrieval count
+        // would require persisting the count in proposal metadata; deferred to a
+        // follow-up.
+        highRetrievalRefs = noFeedbackCandidates.filter((r) => {
+            const count = retrievalCounts.get(r.ref) ?? 0;
+            return count > 0 && count >= RETRIEVAL_COUNT_THRESHOLD && !lastReflectProposalTs.has(r.ref);
+        });
     }
     catch (err) {
         rethrowIfTestIsolationError(err);
@@ -1908,6 +2025,91 @@ async function runImprovePreparationStage(args) {
         if (dbForRetrieval)
             closeDatabase(dbForRetrieval);
     }
+    // ── Layer 2: PROACTIVE MAINTENANCE SELECTOR (third eligibility source) ─────
+    // The signal-delta gate and P0-A only surface assets with fresh feedback or a
+    // raw-retrieval spike. Neither revisits a stable, high-value asset on a
+    // schedule, so on a quiet stash useful assets drift stale and are never
+    // refreshed. When the `proactiveMaintenance` process is enabled (DEFAULT OFF)
+    // and the run is whole-stash / type scope, this selector ranks the eligible
+    // population by a composite maintenance priority, gates on staleness ("due"),
+    // bounds to top-N, and folds the winners into the SAME candidate set the other
+    // two sources feed — so they flow through the existing #580 empty-diff /
+    // cosmetic suppression and additive-distill gates. It adds no new mutation
+    // logic of its own. The due gate doubles as the rotation cooldown: a freshly
+    // reflected asset is excluded until it ages back past `dueDays`, so successive
+    // runs rotate through the due pool rather than re-selecting the same heads.
+    let proactiveRefs = [];
+    let proactiveMaintenanceSummary;
+    const proactiveEnabled = scope.mode !== "ref" && resolveProcessEnabled("proactiveMaintenance", improveProfile);
+    if (proactiveEnabled) {
+        const pmCfg = improveProfile.processes?.proactiveMaintenance;
+        const dueDays = pmCfg?.dueDays ?? DEFAULT_DUE_DAYS;
+        const maxPerRun = pmCfg?.maxPerRun ?? pmCfg?.limit ?? DEFAULT_MAX_PER_RUN;
+        const importanceWeights = pmCfg?.importanceWeights;
+        // Candidate population: the zero-feedback / non-signal pool — exactly the
+        // assets the other two sources would NOT pick this run. Exclude any P0-A
+        // rescued this run so we never double-select the same ref.
+        const alreadySelected = new Set(highRetrievalRefs.map((r) => r.ref));
+        const pmCandidates = noFeedbackCandidates.filter((r) => !alreadySelected.has(r.ref));
+        const selection = selectProactiveMaintenanceRefs({
+            candidates: pmCandidates,
+            lastReflectTs: lastReflectProposalTs,
+            lastDistillTs: lastDistillProposalTs,
+            retrievalCounts,
+            sizeBytesOf: (r) => {
+                const fp = r.filePath;
+                if (!fp)
+                    return undefined;
+                try {
+                    return fs.statSync(fp).size;
+                }
+                catch {
+                    return undefined;
+                }
+            },
+            dueDays,
+            maxPerRun,
+            importanceWeights,
+        });
+        proactiveRefs = selection.selected;
+        proactiveMaintenanceSummary = {
+            selected: selection.selected.length,
+            dueTotal: selection.dueTotal,
+            neverReflected: selection.neverReflected,
+        };
+        // Aggregated observability event (never per-ref — avoids the event flood the
+        // Layer-1 work eliminated). Mirrors the `no_new_signal` aggregation pattern.
+        appendEvent({
+            eventType: "proactive_selected",
+            ref: undefined,
+            metadata: {
+                count: selection.selected.length,
+                dueTotal: selection.dueTotal,
+                neverReflected: selection.neverReflected,
+            },
+        }, eventsCtx);
+        if (selection.selected.length > 0) {
+            info(`[improve] proactive maintenance selected ${selection.selected.length}/${selection.dueTotal} due refs ` +
+                `(${selection.neverReflected} never reflected, dueDays=${dueDays}, maxPerRun=${maxPerRun})`);
+        }
+    }
+    // Record an in-memory skip action for every zero-feedback ref that the
+    // partition loop deferred to P0-A but P0-A then declined (retrievalCount below
+    // threshold, or a prior reflect proposal already on record). These never make
+    // it into mergedRefs, so without this they would silently vanish from the run
+    // summary. No DB event is written here — these refs carry no signal at all, so
+    // there is nothing for the skip histogram to aggregate; the action log alone
+    // preserves the per-ref audit trail (mirrors the fully-skipped action above).
+    const rescuedSet = new Set([...highRetrievalRefs, ...proactiveRefs].map((r) => r.ref));
+    for (const r of noFeedbackPool) {
+        if (rescuedSet.has(r.ref))
+            continue;
+        actions.push({
+            ref: r.ref,
+            mode: "distill-skipped",
+            result: { ok: true, reason: "no new signal since last proposal" },
+        });
+    }
     // If the user explicitly scoped to a single ref, always act on it —
     // skip the signal/retrieval filter entirely. The filter exists to avoid
     // noisy "improve everything" runs; it should not gate an intentional
@@ -1917,8 +2119,48 @@ async function runImprovePreparationStage(args) {
     // or sufficient retrievals). A stash with no signals has 0 eligible refs —
     // usage is the gate. Run `akm feedback <ref> --positive` or retrieve assets
     // to bring them into the eligible pool.
-    const signalAndRetrievalRefs = [...signalFiltered, ...highRetrievalRefs];
+    // Layer-2 proactive refs join the eligible set alongside feedback-signal and
+    // high-retrieval (P0-A) refs. The three sources are disjoint by construction
+    // (proactive draws from noFeedbackCandidates with the P0-A picks removed), but
+    // dedupe defensively so a ref can never enter the loop twice. `requireFeedbackSignal`
+    // still suppresses both fallback sources for callers that want feedback-only runs.
+    const signalAndRetrievalRefs = dedupeRefs([...signalFiltered, ...highRetrievalRefs, ...proactiveRefs]);
     const mergedRefs = scope.mode === "ref" ? processableRefs : options.requireFeedbackSignal ? signalFiltered : signalAndRetrievalRefs;
+    // ── Attribution tagging: stamp each ref with the eligibility lane that
+    // selected it ──────────────────────────────────────────────────────────────
+    // Every reflect/distill proposal must record WHICH lane chose its source asset
+    // so downstream accept/reject/revert/retrieval outcomes can be sliced by lane
+    // (does the PROACTIVE lane produce value vs the reactive lanes?). We build the
+    // lane map here — the one place all four lanes are known — and stamp it onto
+    // each ImproveEligibleRef object. Because the ref objects are shared by
+    // reference across buckets, the stamp travels with the ref through the sort,
+    // disk-check, and loop stages down to the reflect/distill event emit sites and
+    // createProposal calls. See EligibilitySource for the lane vocabulary.
+    //
+    // Precedence (prefer the most specific reactive signal):
+    //   scope > signal-delta > high-retrieval > proactive
+    // A ref with real feedback is attributed to feedback even if it was also due
+    // for proactive maintenance. We apply lanes weakest-first so the strongest
+    // overwrites; the explicit --scope <ref> bypass wins outright (user intent).
+    const eligibilitySourceByRef = new Map();
+    for (const r of proactiveRefs)
+        eligibilitySourceByRef.set(r.ref, "proactive");
+    for (const r of highRetrievalRefs)
+        eligibilitySourceByRef.set(r.ref, "high-retrieval");
+    for (const r of signalFiltered)
+        eligibilitySourceByRef.set(r.ref, "signal-delta");
+    if (scope.mode === "ref") {
+        // O-2 (#365): explicit --scope <ref> bypass — every ref in processableRefs
+        // arrived via the scopeRefBypass branch, so attribute the whole set to scope.
+        for (const r of processableRefs)
+            eligibilitySourceByRef.set(r.ref, "scope");
+    }
+    for (const r of mergedRefs) {
+        // "unknown" is a genuine fallback, never a silent alias for signal-delta:
+        // only refs we truly cannot attribute land here (none in practice, since
+        // mergedRefs is always a subset of the four lanes above).
+        r.eligibilitySource = eligibilitySourceByRef.get(r.ref) ?? "unknown";
+    }
     const utilityMap = buildUtilityMap(mergedRefs);
     // Load feedback ratio per ref from the pre-computed summary (no extra DB pass).
     const feedbackRatios = new Map();
@@ -2059,6 +2301,7 @@ async function runImprovePreparationStage(args) {
         gateAutoAcceptFailedCount,
         consolidation: consolidationPass.consolidation,
         consolidationRan: consolidationPass.consolidationRan,
+        ...(proactiveMaintenanceSummary ? { proactiveMaintenance: proactiveMaintenanceSummary } : {}),
     };
 }
 async function runImproveLoopStage(args) {
@@ -2067,6 +2310,14 @@ async function runImproveLoopStage(args) {
     // receives only its fair share of the wall-clock budget.
     const remainingBudgetMs = () => Math.max(0, budgetMs - (Date.now() - startMs));
     const RECENT_ERRORS_CAP = 3;
+    // requirePlannedRefs guard: when the distill profile sets this flag, skip
+    // distill for distill-only refs if the reflect phase produced no planned refs.
+    // Prevents the distill loop from generating hundreds of distill-skipped events
+    // on quiet passes (all refs on reflect cooldown, no new signal to distill).
+    const requirePlannedRefs = improveProfile?.processes?.distill?.requirePlannedRefs === true;
+    const _distillOnlyRefNames = new Set(distillOnlyRefs.map((r) => r.ref));
+    const hasReflectEligibleRefs = loopRefs.some((r) => !_distillOnlyRefNames.has(r.ref));
+    const skipDistillDueToRequirePlannedRefs = requirePlannedRefs && !hasReflectEligibleRefs;
     // R-2 / #389: Self-Consistency multi-sample voting helpers.
     // Wang et al. arXiv:2203.11171 — N=3 samples beat single-shot on reasoning tasks.
     const SC_THRESHOLD = options.selfConsistencyThreshold ?? 0.7;
@@ -2227,6 +2478,9 @@ async function runImproveLoopStage(args) {
                         eventSource: "improve",
                         ...(reflectBudgetMs > 0 ? { timeoutMs: reflectBudgetMs } : {}),
                         ...(reflectProfileRunner ? { runner: reflectProfileRunner } : {}),
+                        // Attribution: carry the eligibility lane so reflect stamps it on
+                        // the reflect_invoked event and the persisted proposal.
+                        ...(planned.eligibilitySource ? { eligibilitySource: planned.eligibilitySource } : {}),
                     };
                     // R-2 / #389: Self-consistency multi-sample voting for high-utility refs.
                     // Self-Consistency arXiv:2203.11171 — N=3 samples beat single-shot quality.
@@ -2251,6 +2505,9 @@ async function runImproveLoopStage(args) {
                                 source: "reflect",
                                 sourceRun: `reflect-sc-${Date.now()}`,
                                 payload: winner.proposal.payload,
+                                // Attribution: the self-consistency path persists the winner here
+                                // (draftMode skips reflect's own createProposal), so stamp the lane.
+                                ...(planned.eligibilitySource ? { eligibilitySource: planned.eligibilitySource } : {}),
                             });
                             reflectResult = isProposalSkipped(persistResult)
                                 ? {
@@ -2364,6 +2621,18 @@ async function runImproveLoopStage(args) {
                 info(`[improve] ${completedCount}/${loopRefs.length} ${planned.ref}`);
                 continue;
             }
+            // requirePlannedRefs guard: skip distill for distill-only refs when no
+            // reflect-eligible refs were planned this run, preventing mass skip events.
+            if (skipDistillDueToRequirePlannedRefs && isDistillOnly) {
+                actions.push({
+                    ref: planned.ref,
+                    mode: "distill-skipped",
+                    result: { ok: true, reason: "require_planned_refs" },
+                });
+                completedCount++;
+                info(`[improve] ${completedCount}/${loopRefs.length} ${planned.ref}`);
+                continue;
+            }
             // See `isDistillCandidateRef` — excludes `lesson:*` (and anything else in
             // DISTILL_REFUSED_INPUT_TYPES) so distill never gets queued for an input
             // it will refuse.
@@ -2437,6 +2706,9 @@ async function runImproveLoopStage(args) {
                     ref: planned.ref,
                     ...(parsedPlannedRef.type === "memory" ? { proposalKind: "auto" } : {}),
                     ...(options.stashDir ? { stashDir: options.stashDir } : {}),
+                    // Attribution: carry the eligibility lane so distill stamps it on the
+                    // distill_invoked event and the persisted proposal.
+                    ...(planned.eligibilitySource ? { eligibilitySource: planned.eligibilitySource } : {}),
                 }));
                 actions.push({ ref: planned.ref, mode: "distill", result: distillResult });
                 if (distillResult.outcome === "queued" && distillResult.proposal) {
@@ -2618,309 +2890,325 @@ export async function runImproveMaintenancePasses(args) {
             db = openIndexDb();
         }
     };
-    try {
-        db = openIndexDb();
-        // Memory inference candidate-discovery (post-Item 9 fix from
-        // memory:akm-improve-critical-review-2026-05-20). Previously this pass
-        // was gated on memoryRefsForInference.size > 0 AND passed those refs as a
-        // candidateRefs filter. But memoryRefsForInference is populated from refs
-        // distilled THIS RUN — by the time that happens, those parents are
-        // already split (`inferenceProcessed: true`) and `isPendingMemory` excludes
-        // them. The genuinely-pending parents in the stash never entered the
-        // filter. Result: 0/0/0 for 25 consecutive runs.
-        //
-        // Fix: always run the pass when the feature is enabled; let the pass's
-        // own `collectPendingMemories` + `isPendingMemory` predicate find
-        // candidates from the filesystem-of-truth. The this-run set is still
-        // logged as a hint but no longer used as a filter.
-        const memoryInferenceDisabledByProfile = improveProfile?.processes?.memoryInference?.enabled === false;
-        if (memoryInferenceDisabledByProfile) {
-            info("[improve] memory inference skipped (disabled by improve profile)");
-        }
-        else {
-            const hintRefs = memoryRefsForInference.size;
-            info(hintRefs > 0
-                ? `[improve] memory inference starting (${hintRefs} hint refs touched this run; pass discovers all pending)`
-                : "[improve] memory inference starting (discovering pending parents)");
-            const inferenceStart = Date.now();
-            try {
-                // O-1 (#364): pass budget signal so a hung inference call is cancelled.
-                memoryInference = await withLlmStage("memory-inference", () => memoryInferenceFn({
-                    config,
-                    sources,
-                    signal: budgetSignal,
-                    db,
-                    reEnrich: false,
-                    onProgress: (event) => {
-                        const current = event.currentRef ? ` ${event.currentRef}` : "";
-                        info(`[improve] memory inference ${event.processed}/${event.total}${current} (written ${event.writtenFacts}, skipped ${event.skippedNoFacts})`);
-                    },
-                }));
-                memoryInferenceDurationMs = Date.now() - inferenceStart;
-                actions.push({ ref: "memory:_inference", mode: "memory-inference", result: memoryInference });
-                info(`[improve] memory inference complete (${memoryInference.writtenFacts} facts written from ${memoryInference.splitParents} parents)`);
-            }
-            catch (err) {
-                memoryInferenceDurationMs = Date.now() - inferenceStart;
-                allWarnings.push(`memory inference failed: ${err instanceof Error ? err.message : String(err)}`);
-            }
-        }
-        if (memoryInference && (memoryInference.splitParents > 0 || memoryInference.writtenFacts > 0)) {
-            info("[improve] reindexing after memory inference writes");
-            try {
-                await reindexWithIndexDbReleased(primaryStashDir);
-                reindexedAfterInference = true;
-                info("[improve] reindex after memory inference complete");
+    await withIndexWriterLease({ purpose: "improve-maintenance", signal: budgetSignal }, async () => {
+        try {
+            db = openIndexDb();
+            // Memory inference candidate-discovery (post-Item 9 fix from
+            // memory:akm-improve-critical-review-2026-05-20). Previously this pass
+            // was gated on memoryRefsForInference.size > 0 AND passed those refs as a
+            // candidateRefs filter. But memoryRefsForInference is populated from refs
+            // distilled THIS RUN — by the time that happens, those parents are
+            // already split (`inferenceProcessed: true`) and `isPendingMemory` excludes
+            // them. The genuinely-pending parents in the stash never entered the
+            // filter. Result: 0/0/0 for 25 consecutive runs.
+            //
+            // Fix: always run the pass when the feature is enabled; let the pass's
+            // own `collectPendingMemories` + `isPendingMemory` predicate find
+            // candidates from the filesystem-of-truth. The this-run set is still
+            // logged as a hint but no longer used as a filter.
+            const memoryInferenceDisabledByProfile = improveProfile?.processes?.memoryInference?.enabled === false;
+            const minPendingCount = improveProfile?.processes?.memoryInference?.minPendingCount;
+            const pendingBelowMinCount = (() => {
+                if (!primaryStashDir || minPendingCount === undefined || minPendingCount <= 0)
+                    return false;
+                const pending = collectPendingMemories(primaryStashDir).length;
+                if (pending < minPendingCount) {
+                    info(`[improve] memory inference skipped (${pending} pending < minPendingCount ${minPendingCount})`);
+                    return true;
+                }
+                return false;
+            })();
+            if (memoryInferenceDisabledByProfile) {
+                info("[improve] memory inference skipped (disabled by improve profile)");
             }
-            catch (err) {
-                allWarnings.push(`reindex after memory inference failed: ${err instanceof Error ? err.message : String(err)}`);
+            else if (pendingBelowMinCount) {
+                // skipped — message already emitted above
             }
-        }
-        const graphEnabled = isProcessEnabled("index", "graph_extraction", config);
-        const graphExtractionDisabledByProfile = improveProfile?.processes?.graphExtraction?.enabled === false;
-        const graphExtractionFullScan = improveProfile?.processes?.graphExtraction?.fullScan === true;
-        // Build the set of refs actually touched this run.
-        const touchedRefs = new Set();
-        for (const r of args.actionableRefs)
-            touchedRefs.add(r.ref);
-        for (const r of memoryRefsForInference)
-            touchedRefs.add(r);
-        // INVARIANT: graph extraction normally runs only on files touched by
-        // actionable refs (candidatePaths). Full-corpus scans are opt-in via
-        // profile.processes.graphExtraction.fullScan = true (used by the
-        // `graph-refresh` built-in profile and its weekly scheduled task).
-        // The empty-Set fallback is intentional when no refs were touched —
-        // the extractor's filter rejects every file and returns empty, keeping
-        // the pass invoked so the action is recorded and tests stay exercised.
-        if (graphExtractionDisabledByProfile) {
-            info("[improve] graph extraction skipped (disabled by improve profile)");
-        }
-        else if (sources.length > 0 && graphEnabled) {
-            info(`[improve] graph extraction starting${graphExtractionFullScan ? " (full-corpus scan)" : ""}`);
-            const extractionStart = Date.now();
-            try {
-                // D9: if consolidation ran but memory inference did not reindex, force a reindex
-                // so graph extraction sees current DB state after consolidation writes.
-                if (consolidationRan && !reindexedAfterInference) {
-                    info("[improve] reindexing after consolidation (graph extraction needs current state)");
-                    try {
-                        await reindexWithIndexDbReleased(primaryStashDir);
-                        reindexedAfterInference = true;
-                        info("[improve] reindex after consolidation complete");
-                    }
-                    catch (err) {
-                        allWarnings.push(`reindex after consolidation failed: ${err instanceof Error ? err.message : String(err)}`);
-                    }
+            else {
+                const hintRefs = memoryRefsForInference.size;
+                info(hintRefs > 0
+                    ? `[improve] memory inference starting (${hintRefs} hint refs touched this run; pass discovers all pending)`
+                    : "[improve] memory inference starting (discovering pending parents)");
+                const inferenceStart = Date.now();
+                try {
+                    // O-1 (#364): pass budget signal so a hung inference call is cancelled.
+                    memoryInference = await withLlmStage("memory-inference", () => memoryInferenceFn({
+                        config,
+                        sources,
+                        signal: budgetSignal,
+                        db,
+                        reEnrich: false,
+                        onProgress: (event) => {
+                            const current = event.currentRef ? ` ${event.currentRef}` : "";
+                            info(`[improve] memory inference ${event.processed}/${event.total}${current} (written ${event.writtenFacts}, skipped ${event.skippedNoFacts})`);
+                        },
+                    }));
+                    memoryInferenceDurationMs = Date.now() - inferenceStart;
+                    actions.push({ ref: "memory:_inference", mode: "memory-inference", result: memoryInference });
+                    info(`[improve] memory inference complete (${memoryInference.writtenFacts} facts written from ${memoryInference.splitParents} parents)`);
                 }
-                // #584: no close/reopen needed here — reindexWithIndexDbReleased
-                // already swapped in a fresh post-reindex handle.
-                // Resolve touched refs to absolute file paths. Skipped for fullScan
-                // (candidatePaths stays undefined → extractor processes all files).
-                let candidatePaths;
-                if (!graphExtractionFullScan) {
-                    candidatePaths = new Set();
-                    if (primaryStashDir && touchedRefs.size > 0) {
-                        const writableDirSet = new Set(getWritableStashDirs(primaryStashDir).map((d) => path.resolve(d)));
-                        const resolved = await Promise.all([...touchedRefs].map((ref) => findAssetFilePath(ref, primaryStashDir, writableDirSet).catch(() => null)));
-                        for (const p of resolved) {
-                            if (typeof p === "string" && p.length > 0)
-                                candidatePaths.add(p);
-                        }
-                    }
+                catch (err) {
+                    memoryInferenceDurationMs = Date.now() - inferenceStart;
+                    allWarnings.push(`memory inference failed: ${err instanceof Error ? err.message : String(err)}`);
                 }
-                const progressHandler = (event) => {
-                    const current = event.currentPath ? ` ${path.basename(event.currentPath)}` : "";
-                    info(`[improve] graph extraction ${event.processed}/${event.total}${current} (extracted ${event.extracted}, entities ${event.totalEntities}, relations ${event.totalRelations})`);
-                };
-                // O-1 (#364): pass budget signal so a hung graph extraction call is cancelled.
-                graphExtraction = await withLlmStage("graph-extraction", () => graphExtractionFn({
-                    config,
-                    sources,
-                    signal: budgetSignal,
-                    db,
-                    reEnrich: false,
-                    onProgress: progressHandler,
-                    options: { candidatePaths },
-                }));
-                graphExtractionDurationMs = Date.now() - extractionStart;
-                actions.push({ ref: "graph:_artifact", mode: "graph-extraction", result: graphExtraction });
-                info(`[improve] graph extraction complete (${graphExtraction.quality.extractedFiles} files, ${graphExtraction.quality.entityCount} entities, ${graphExtraction.quality.relationCount} relations)`);
-            }
-            catch (err) {
-                graphExtractionDurationMs = Date.now() - extractionStart;
-                allWarnings.push(`graph extraction failed: ${err instanceof Error ? err.message : String(err)}`);
             }
-        }
-        else if (sources.length > 0 && !graphEnabled) {
-            info("[improve] graph extraction skipped (features.index.graph_extraction is disabled)");
-        }
-        // Orphan proposal purge — reject pending reflect proposals whose target
-        // asset no longer exists on disk. Runs after graph extraction so newly
-        // promoted assets from accept flows during this run are already present.
-        if (primaryStashDir) {
-            try {
-                const purgeResult = purgeOrphanProposals(primaryStashDir, sources.map((s) => s.path));
-                orphansPurged = purgeResult.rejected;
-                if (purgeResult.rejected > 0) {
-                    info(`[improve] orphan purge: ${purgeResult.rejected}/${purgeResult.checked} orphaned proposals rejected (${purgeResult.durationMs}ms)`);
+            if (memoryInference && (memoryInference.splitParents > 0 || memoryInference.writtenFacts > 0)) {
+                info("[improve] reindexing after memory inference writes");
+                try {
+                    await reindexWithIndexDbReleased(primaryStashDir);
+                    reindexedAfterInference = true;
+                    info("[improve] reindex after memory inference complete");
+                }
+                catch (err) {
+                    allWarnings.push(`reindex after memory inference failed: ${err instanceof Error ? err.message : String(err)}`);
                 }
-                appendEvent({
-                    eventType: "proposal_orphan_purge",
-                    ref: "proposals:_orphan-purge",
-                    metadata: {
-                        checked: purgeResult.checked,
-                        rejected: purgeResult.rejected,
-                        durationMs: purgeResult.durationMs,
-                        byType: purgeResult.byType,
-                        orphans: purgeResult.orphans.map((o) => o.ref),
-                    },
-                }, eventsCtx);
             }
-            catch (err) {
-                allWarnings.push(`orphan purge failed: ${err instanceof Error ? err.message : String(err)}`);
+            const graphEnabled = isProcessEnabled("index", "graph_extraction", config);
+            const graphExtractionDisabledByProfile = improveProfile?.processes?.graphExtraction?.enabled === false;
+            const graphExtractionFullScan = improveProfile?.processes?.graphExtraction?.fullScan === true;
+            // Build the set of refs actually touched this run.
+            const touchedRefs = new Set();
+            for (const r of args.actionableRefs)
+                touchedRefs.add(r.ref);
+            for (const r of memoryRefsForInference)
+                touchedRefs.add(r);
+            // INVARIANT: graph extraction normally runs only on files touched by
+            // actionable refs (candidatePaths). Full-corpus scans are opt-in via
+            // profile.processes.graphExtraction.fullScan = true (used by the
+            // `graph-refresh` built-in profile and its weekly scheduled task).
+            // The empty-Set fallback is intentional when no refs were touched —
+            // the extractor's filter rejects every file and returns empty, keeping
+            // the pass invoked so the action is recorded and tests stay exercised.
+            if (graphExtractionDisabledByProfile) {
+                info("[improve] graph extraction skipped (disabled by improve profile)");
             }
-            // Phase 6B (Advantage D6b): expire pending proposals that have aged past
-            // the retention window. Runs AFTER orphan purge so we never double-archive
-            // a proposal that orphan-purge already moved. `expireStaleProposals` emits
-            // its own per-proposal `proposal_expired` events; we additionally emit a
-            // single roll-up event here for parity with the orphan-purge surface.
-            try {
-                const expireResult = expireStaleProposals(primaryStashDir, config);
-                proposalsExpired = expireResult.expired;
-                if (expireResult.expired > 0) {
-                    info(`[improve] expiration: ${expireResult.expired}/${expireResult.checked} pending proposals expired ` +
-                        `(retention=${expireResult.retentionDays}d, ${expireResult.durationMs}ms)`);
+            else if (sources.length > 0 && graphEnabled) {
+                info(`[improve] graph extraction starting${graphExtractionFullScan ? " (full-corpus scan)" : ""}`);
+                const extractionStart = Date.now();
+                try {
+                    // D9: if consolidation ran but memory inference did not reindex, force a reindex
+                    // so graph extraction sees current DB state after consolidation writes.
+                    if (consolidationRan && !reindexedAfterInference) {
+                        info("[improve] reindexing after consolidation (graph extraction needs current state)");
+                        try {
+                            await reindexWithIndexDbReleased(primaryStashDir);
+                            reindexedAfterInference = true;
+                            info("[improve] reindex after consolidation complete");
+                        }
+                        catch (err) {
+                            allWarnings.push(`reindex after consolidation failed: ${err instanceof Error ? err.message : String(err)}`);
+                        }
+                    }
+                    // #584: no close/reopen needed here — reindexWithIndexDbReleased
+                    // already swapped in a fresh post-reindex handle.
+                    // Resolve touched refs to absolute file paths. Skipped for fullScan
+                    // (candidatePaths stays undefined → extractor processes all files).
+                    let candidatePaths;
+                    if (!graphExtractionFullScan) {
+                        candidatePaths = new Set();
+                        if (primaryStashDir && touchedRefs.size > 0) {
+                            const writableDirSet = new Set(getWritableStashDirs(primaryStashDir).map((d) => path.resolve(d)));
+                            const resolved = await Promise.all([...touchedRefs].map((ref) => findAssetFilePath(ref, primaryStashDir, writableDirSet).catch(() => null)));
+                            for (const p of resolved) {
+                                if (typeof p === "string" && p.length > 0)
+                                    candidatePaths.add(p);
+                            }
+                        }
+                    }
+                    const progressHandler = (event) => {
+                        const current = event.currentPath ? ` ${path.basename(event.currentPath)}` : "";
+                        info(`[improve] graph extraction ${event.processed}/${event.total}${current} (extracted ${event.extracted}, entities ${event.totalEntities}, relations ${event.totalRelations})`);
+                    };
+                    // O-1 (#364): pass budget signal so a hung graph extraction call is cancelled.
+                    graphExtraction = await withLlmStage("graph-extraction", () => graphExtractionFn({
+                        config,
+                        sources,
+                        signal: budgetSignal,
+                        db,
+                        reEnrich: false,
+                        onProgress: progressHandler,
+                        options: { candidatePaths },
+                    }));
+                    graphExtractionDurationMs = Date.now() - extractionStart;
+                    actions.push({ ref: "graph:_artifact", mode: "graph-extraction", result: graphExtraction });
+                    info(`[improve] graph extraction complete (${graphExtraction.quality.extractedFiles} files, ${graphExtraction.quality.entityCount} entities, ${graphExtraction.quality.relationCount} relations)`);
+                }
+                catch (err) {
+                    graphExtractionDurationMs = Date.now() - extractionStart;
+                    allWarnings.push(`graph extraction failed: ${err instanceof Error ? err.message : String(err)}`);
                 }
-                appendEvent({
-                    eventType: "proposal_expiration_pass",
-                    ref: "proposals:_expiration",
-                    metadata: {
-                        checked: expireResult.checked,
-                        expired: expireResult.expired,
-                        durationMs: expireResult.durationMs,
-                        retentionDays: expireResult.retentionDays,
-                        expiredProposals: expireResult.expiredProposals,
-                    },
-                }, eventsCtx);
             }
-            catch (err) {
-                allWarnings.push(`proposal expiration failed: ${err instanceof Error ? err.message : String(err)}`);
+            else if (sources.length > 0 && !graphEnabled) {
+                info("[improve] graph extraction skipped (features.index.graph_extraction is disabled)");
             }
-        }
-        // Fix #2 (observability 0.8.0): trim the events table in state.db so it
-        // doesn't grow unbounded. `akm health` writes a `health_probe` row on every
-        // invocation, and every command surface emits at least one event besides —
-        // without this trim, state.db is a permanent append-only log. Config key
-        // `improve.eventRetentionDays` (default 90, set 0 to disable) controls the
-        // window. The purge runs against state.db (a different SQLite file from
-        // the index `db` above).
-        {
-            const retentionDays = typeof config.improve?.eventRetentionDays === "number" ? config.improve.eventRetentionDays : 90;
-            if (retentionDays > 0) {
-                // #585: reuse the long-lived eventsCtx.db connection when akmImprove
-                // opened one — opening a second state.db write connection while
-                // eventsDb is still live made two simultaneous writers contend on the
-                // same WAL file ("database is locked"). Only the eventsCtx.dbPath
-                // fallback path (state.db failed to open up-front) opens — and then
-                // owns and closes — its own handle. C2 still holds: the fallback uses
-                // the boundary-pinned path, never a live `process.env` re-read.
-                const ownsStateDb = !eventsCtx?.db;
-                let stateDb;
+            // Orphan proposal purge — reject pending reflect proposals whose target
+            // asset no longer exists on disk. Runs after graph extraction so newly
+            // promoted assets from accept flows during this run are already present.
+            if (primaryStashDir) {
                 try {
-                    stateDb = eventsCtx?.db ?? openStateDatabase(eventsCtx?.dbPath);
-                    const purgedCount = purgeOldEvents(stateDb, retentionDays);
-                    if (purgedCount > 0) {
-                        info(`[improve] events purge: ${purgedCount} event(s) older than ${retentionDays}d removed from state.db`);
-                    }
-                    appendEvent({
-                        eventType: "events_purged",
-                        ref: "events:_purge",
-                        metadata: { purgedCount, retentionDays },
-                    }, eventsCtx);
-                    // improve_runs uses the same retention window as events — both are
-                    // observability/audit data, both grow append-only, both have a
-                    // dedicated purge helper. Mirroring the events purge here means a
-                    // single retention knob (improve.eventRetentionDays) governs both.
-                    const improveRunsPurged = purgeOldImproveRuns(stateDb, retentionDays);
-                    if (improveRunsPurged > 0) {
-                        info(`[improve] improve_runs purge: ${improveRunsPurged} run(s) older than ${retentionDays}d removed from state.db`);
+                    const purgeResult = purgeOrphanProposals(primaryStashDir, sources.map((s) => s.path));
+                    orphansPurged = purgeResult.rejected;
+                    if (purgeResult.rejected > 0) {
+                        info(`[improve] orphan purge: ${purgeResult.rejected}/${purgeResult.checked} orphaned proposals rejected (${purgeResult.durationMs}ms)`);
                     }
                     appendEvent({
-                        eventType: "improve_runs_purged",
-                        ref: "improve_runs:_purge",
-                        metadata: { purgedCount: improveRunsPurged, retentionDays },
+                        eventType: "proposal_orphan_purge",
+                        ref: "proposals:_orphan-purge",
+                        metadata: {
+                            checked: purgeResult.checked,
+                            rejected: purgeResult.rejected,
+                            durationMs: purgeResult.durationMs,
+                            byType: purgeResult.byType,
+                            orphans: purgeResult.orphans.map((o) => o.ref),
+                        },
                     }, eventsCtx);
                 }
                 catch (err) {
-                    allWarnings.push(`events purge failed: ${err instanceof Error ? err.message : String(err)}`);
+                    allWarnings.push(`orphan purge failed: ${err instanceof Error ? err.message : String(err)}`);
                 }
-                finally {
-                    if (ownsStateDb && stateDb) {
-                        try {
-                            stateDb.close();
-                        }
-                        catch {
-                            // best-effort
-                        }
-                    }
-                }
-                // task_logs in logs.db (#579) shares the same retention window as
-                // events/improve_runs — all three are observability data governed by
-                // the single improve.eventRetentionDays knob. Separate try/finally
-                // because logs.db is a different file: a locked/missing logs.db must
-                // not block the state.db purges above.
-                let logsDb;
+                // Phase 6B (Advantage D6b): expire pending proposals that have aged past
+                // the retention window. Runs AFTER orphan purge so we never double-archive
+                // a proposal that orphan-purge already moved. `expireStaleProposals` emits
+                // its own per-proposal `proposal_expired` events; we additionally emit a
+                // single roll-up event here for parity with the orphan-purge surface.
                 try {
-                    logsDb = openLogsDatabase();
-                    const taskLogsPurged = purgeOldTaskLogs(logsDb, retentionDays);
-                    if (taskLogsPurged > 0) {
-                        info(`[improve] task_logs purge: ${taskLogsPurged} log line(s) older than ${retentionDays}d removed from logs.db`);
+                    const expireResult = expireStaleProposals(primaryStashDir, config);
+                    proposalsExpired = expireResult.expired;
+                    if (expireResult.expired > 0) {
+                        info(`[improve] expiration: ${expireResult.expired}/${expireResult.checked} pending proposals expired ` +
+                            `(retention=${expireResult.retentionDays}d, ${expireResult.durationMs}ms)`);
                     }
                     appendEvent({
-                        eventType: "task_logs_purged",
-                        ref: "task_logs:_purge",
-                        metadata: { purgedCount: taskLogsPurged, retentionDays },
+                        eventType: "proposal_expiration_pass",
+                        ref: "proposals:_expiration",
+                        metadata: {
+                            checked: expireResult.checked,
+                            expired: expireResult.expired,
+                            durationMs: expireResult.durationMs,
+                            retentionDays: expireResult.retentionDays,
+                            expiredProposals: expireResult.expiredProposals,
+                        },
                     }, eventsCtx);
                 }
                 catch (err) {
-                    allWarnings.push(`task_logs purge failed: ${err instanceof Error ? err.message : String(err)}`);
+                    allWarnings.push(`proposal expiration failed: ${err instanceof Error ? err.message : String(err)}`);
                 }
-                finally {
-                    if (logsDb) {
-                        try {
-                            logsDb.close();
+            }
+            // Fix #2 (observability 0.8.0): trim the events table in state.db so it
+            // doesn't grow unbounded. `akm health` writes a `health_probe` row on every
+            // invocation, and every command surface emits at least one event besides —
+            // without this trim, state.db is a permanent append-only log. Config key
+            // `improve.eventRetentionDays` (default 90, set 0 to disable) controls the
+            // window. The purge runs against state.db (a different SQLite file from
+            // the index `db` above).
+            {
+                const retentionDays = typeof config.improve?.eventRetentionDays === "number" ? config.improve.eventRetentionDays : 90;
+                if (retentionDays > 0) {
+                    // #585: reuse the long-lived eventsCtx.db connection when akmImprove
+                    // opened one — opening a second state.db write connection while
+                    // eventsDb is still live made two simultaneous writers contend on the
+                    // same WAL file ("database is locked"). Only the eventsCtx.dbPath
+                    // fallback path (state.db failed to open up-front) opens — and then
+                    // owns and closes — its own handle. C2 still holds: the fallback uses
+                    // the boundary-pinned path, never a live `process.env` re-read.
+                    const ownsStateDb = !eventsCtx?.db;
+                    let stateDb;
+                    try {
+                        stateDb = eventsCtx?.db ?? openStateDatabase(eventsCtx?.dbPath);
+                        const purgedCount = purgeOldEvents(stateDb, retentionDays);
+                        if (purgedCount > 0) {
+                            info(`[improve] events purge: ${purgedCount} event(s) older than ${retentionDays}d removed from state.db`);
                         }
-                        catch {
-                            // best-effort
+                        appendEvent({
+                            eventType: "events_purged",
+                            ref: "events:_purge",
+                            metadata: { purgedCount, retentionDays },
+                        }, eventsCtx);
+                        // improve_runs uses the same retention window as events — both are
+                        // observability/audit data, both grow append-only, both have a
+                        // dedicated purge helper. Mirroring the events purge here means a
+                        // single retention knob (improve.eventRetentionDays) governs both.
+                        const improveRunsPurged = purgeOldImproveRuns(stateDb, retentionDays);
+                        if (improveRunsPurged > 0) {
+                            info(`[improve] improve_runs purge: ${improveRunsPurged} run(s) older than ${retentionDays}d removed from state.db`);
+                        }
+                        appendEvent({
+                            eventType: "improve_runs_purged",
+                            ref: "improve_runs:_purge",
+                            metadata: { purgedCount: improveRunsPurged, retentionDays },
+                        }, eventsCtx);
+                    }
+                    catch (err) {
+                        allWarnings.push(`events purge failed: ${err instanceof Error ? err.message : String(err)}`);
+                    }
+                    finally {
+                        if (ownsStateDb && stateDb) {
+                            try {
+                                stateDb.close();
+                            }
+                            catch {
+                                // best-effort
+                            }
+                        }
+                    }
+                    // task_logs in logs.db (#579) shares the same retention window as
+                    // events/improve_runs — all three are observability data governed by
+                    // the single improve.eventRetentionDays knob. Separate try/finally
+                    // because logs.db is a different file: a locked/missing logs.db must
+                    // not block the state.db purges above.
+                    let logsDb;
+                    try {
+                        logsDb = openLogsDatabase();
+                        const taskLogsPurged = purgeOldTaskLogs(logsDb, retentionDays);
+                        if (taskLogsPurged > 0) {
+                            info(`[improve] task_logs purge: ${taskLogsPurged} log line(s) older than ${retentionDays}d removed from logs.db`);
+                        }
+                        appendEvent({
+                            eventType: "task_logs_purged",
+                            ref: "task_logs:_purge",
+                            metadata: { purgedCount: taskLogsPurged, retentionDays },
+                        }, eventsCtx);
+                    }
+                    catch (err) {
+                        allWarnings.push(`task_logs purge failed: ${err instanceof Error ? err.message : String(err)}`);
+                    }
+                    finally {
+                        if (logsDb) {
+                            try {
+                                logsDb.close();
+                            }
+                            catch {
+                                // best-effort
+                            }
                         }
                     }
                 }
             }
-        }
-        // Phase 4A (staleness detection). Activates the `deprecated` belief-state
-        // machinery shipped in Phase 1A. Default OFF — gated by
-        // `features.index.staleness_detection.enabled`. Runs after orphan purge
-        // and before the URL check (which lives in the outer caller).
-        if (sources.length > 0) {
-            try {
-                stalenessDetection = await withLlmStage("staleness-detection", () => stalenessDetectionFn({ config, sources, signal: budgetSignal, db }));
-                if (stalenessDetection.considered > 0) {
-                    info(`[improve] staleness detection complete (considered ${stalenessDetection.considered}, ` +
-                        `deprecated ${stalenessDetection.deprecated}, confirmed ${stalenessDetection.confirmed}, ` +
-                        `skipped ${stalenessDetection.skipped}, ${stalenessDetection.durationMs}ms)`);
+            // Phase 4A (staleness detection). Activates the `deprecated` belief-state
+            // machinery shipped in Phase 1A. Default OFF — gated by
+            // `features.index.staleness_detection.enabled`. Runs after orphan purge
+            // and before the URL check (which lives in the outer caller).
+            if (sources.length > 0) {
+                try {
+                    stalenessDetection = await withLlmStage("staleness-detection", () => stalenessDetectionFn({ config, sources, signal: budgetSignal, db }));
+                    if (stalenessDetection.considered > 0) {
+                        info(`[improve] staleness detection complete (considered ${stalenessDetection.considered}, ` +
+                            `deprecated ${stalenessDetection.deprecated}, confirmed ${stalenessDetection.confirmed}, ` +
+                            `skipped ${stalenessDetection.skipped}, ${stalenessDetection.durationMs}ms)`);
+                    }
+                    for (const w of stalenessDetection.warnings)
+                        allWarnings.push(`[improve] staleness detection: ${w}`);
+                }
+                catch (err) {
+                    allWarnings.push(`staleness detection failed: ${err instanceof Error ? err.message : String(err)}`);
                 }
-                for (const w of stalenessDetection.warnings)
-                    allWarnings.push(`[improve] staleness detection: ${w}`);
-            }
-            catch (err) {
-                allWarnings.push(`staleness detection failed: ${err instanceof Error ? err.message : String(err)}`);
             }
         }
-    }
-    finally {
-        if (db)
-            closeDatabase(db);
-    }
+        finally {
+            if (db)
+                closeDatabase(db);
+        }
+    });
     return {
         ...(memoryInference ? { memoryInference } : {}),
         ...(graphExtraction ? { graphExtraction } : {}),