npm - kc-beta - Versions diffs - 0.7.5 → 0.8.1 - Mend

kc-beta 0.7.5 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/README.md +47 -0
package/package.json +3 -2
package/src/agent/engine.js +390 -100
package/src/agent/pipelines/_advance-hints.js +92 -0
package/src/agent/pipelines/_milestone-derive.js +247 -13
package/src/agent/pipelines/skill-authoring.js +30 -1
package/src/agent/tools/agent-tool.js +2 -2
package/src/agent/tools/consult-skill.js +15 -0
package/src/agent/tools/dashboard-render.js +48 -1
package/src/agent/tools/document-parse.js +31 -2
package/src/agent/tools/phase-advance.js +17 -13
package/src/agent/tools/release.js +250 -7
package/src/agent/tools/sandbox-exec.js +65 -8
package/src/agent/tools/worker-llm-call.js +95 -15
package/src/agent/workspace.js +25 -4
package/src/cli/components.js +4 -1
package/src/cli/index.js +97 -1
package/src/config.js +19 -2
package/src/marathon/driver.js +217 -0
package/src/marathon/prompts.js +93 -0
package/template/.env.template +16 -0
package/template/skills/en/bootstrap-workspace/SKILL.md +14 -0
package/template/skills/en/quality-control/SKILL.md +9 -0
package/template/skills/en/skill-authoring/SKILL.md +39 -0
package/template/skills/en/skill-to-workflow/SKILL.md +53 -0
package/template/skills/en/work-decomposition/SKILL.md +34 -0
package/template/skills/phase_skills.yaml +5 -0
package/template/skills/zh/bootstrap-workspace/SKILL.md +14 -0
package/template/skills/zh/compliance-judgment/SKILL.md +37 -37
package/template/skills/zh/document-chunking/SKILL.md +21 -14
package/template/skills/zh/document-parsing/SKILL.md +65 -65
package/template/skills/zh/entity-extraction/SKILL.md +68 -68
package/template/skills/zh/quality-control/SKILL.md +9 -0
package/template/skills/zh/skill-authoring/SKILL.md +39 -0
package/template/skills/zh/skill-creator/SKILL.md +204 -200
package/template/skills/zh/skill-to-workflow/SKILL.md +53 -0
package/template/skills/zh/tree-processing/SKILL.md +67 -63
package/template/skills/zh/work-decomposition/SKILL.md +34 -0
package/template/workflows/common/llm_client.py +168 -0
package/template/workflows/common/utils.py +132 -0

package/src/agent/engine.js CHANGED Viewed

@@ -5,6 +5,9 @@ import {
   deriveSkillAuthoringMilestones,
   deriveSkillTestingMilestones,
 } from "./pipelines/_milestone-derive.js";
+import { getPrescriptiveHint } from "./pipelines/_advance-hints.js";
+import { loadEnvFile } from "../config.js";
+import { MarathonDriver } from "../marathon/driver.js";
 import { ContextAssembler } from "./context.js";
 import { ConversationHistory } from "./history.js";
 import { findSafeSplitPoint } from "./message-utils.js";
@@ -166,6 +169,10 @@ export class AgentEngine {
       { gitAutoCommit: config.gitAutoCommit !== false },
     );
+    // v0.8 P1-B: workspace .env overlay deferred until after eventLog
+    // init (see _overlayWorkspaceEnv call below). Workspace dir is
+    // known here, but the overlay's audit event needs eventLog.
     // For sub-agents, persistence (history/events/state) lives under
     // sub_agents/<scope>/ instead of the workspace root. Workspace files
     // (rules/, rule_skills/, workflows/) stay shared.
@@ -203,6 +210,26 @@ export class AgentEngine {
     // Event log (append-only JSONL, source of truth)
     this.eventLog = new EventLog(this.workspace.cwd, { logDir });
+    // v0.8 P1-B: overlay workspace .env onto this.config. cli/index.js
+    // calls loadSettings() without a workspace path because the path
+    // isn't known until this constructor runs. Result: workspace .env's
+    // VLM_TIER1 / OCR_MODEL_TIER1 / TIER1..4 / LANGUAGE were silently
+    // ignored, with gc defaults (~/.kc_agent/config.json) winning.
+    // 资管 audit § 9.2 finding 7: user's OCR_MODEL_TIER1=zai-org/GLM-4.6V
+    // never reached document_parse; error messages quoted gc's
+    // Qwen3-VL-235B default. Overlay reads workspace .env, fills in
+    // fields where current config came from gc fallback (penv-set values
+    // still win because loadSettings applied them).
+    try { this._overlayWorkspaceEnv(); } catch { /* best-effort */ }
+    // v0.8.1 P8-A: inline marathon driver. v0.8.0's separate-process
+    // kc-marathon CLI + filesystem-watcher IPC died silently when the
+    // launching terminal closed (E2E #11 audit). Redesigned as an inline
+    // state machine activated via /marathon slash command. No filesystem
+    // marker, no inbox.jsonl. Driver instance set by enterMarathonMode(),
+    // cleared by exitMarathonMode(). Query via this.marathonDriver.
+    this.marathonDriver = null;
     // Context windowing
     this.contextWindow = new ContextWindow({
       contextLimit: config.kcContextLimit || 200000,
@@ -216,8 +243,6 @@ export class AgentEngine {
     // so they don't get a TaskManager.
     this.taskManager = this._isSubagent ? null : new TaskManager(this.workspace.cwd);
-    // Build all tool instances (but register phase-appropriate ones)
-    this._buildTools = this._createAllTools();
     this._phaseSummaries = [];
     // Pipeline system (meta-meta skills as code)
@@ -233,8 +258,15 @@ export class AgentEngine {
     };
     // Skill discovery (Claude Code pattern: index in context, full content on demand)
+    // v0.7.5 — must initialize BEFORE _createAllTools() because ConsultSkillTool
+    // takes this._skillLoader as a constructor arg. Was a v0.7.5 init-order bug:
+    // _createAllTools ran first, passed undefined skillLoader to ConsultSkillTool,
+    // calls to consult_skill threw "Cannot read properties of undefined".
     this._skillLoader = new SkillLoader(config.language);
+    // Build all tool instances (but register phase-appropriate ones)
+    this._buildTools = this._createAllTools();
     // v0.7.5 G-D1: populate <workspace>/skills/ with the initial phase's
     // available skill set. Symlink with copy fallback. Re-populated on
     // every phase advance/retreat (see _advancePhase).
@@ -247,20 +279,26 @@ export class AgentEngine {
       });
     } catch { /* best-effort; skills/ population is not a critical-path failure */ }
+    // v0.8.1 P10-A: auto-populate <workspace>/workflows/common/llm_client.py
+    // from the template. Idempotent (skips if file already exists). Covers
+    // the bench-corpus flow where `kc-beta init` was bypassed. v0.8.0
+    // shipped this shim as embedded source in skill-to-workflow teaching;
+    // E2E #11 audits found BOTH agents ignored the teaching and wrote
+    // their own (non-canonical) llm_client.py. Shipping it as a template
+    // file the agent finds via filesystem walk is more robust.
+    try { this._populateWorkspaceCommonShims(); } catch { /* best-effort */ }
     // Register tools for initial phase
     this.toolRegistry = new ToolRegistry();
     this._registerToolsForPhase(this.currentPhase);
-    // Edge-trigger state for _maybeAutoAdvance. Initialize to false for every
-    // phase so the first real false→true flip inside onToolResult triggers an
-    // advance — even when the user launches from a pre-populated workspace
-    // whose exit criteria already happen to be met at boot.
-    // resume() re-primes this from the restored pipeline state (see ~L566),
-    // which is the correct behaviour there: resumed sessions that were already
-    // past this phase shouldn't re-fire.
-    this._lastReady = Object.fromEntries(
-      Object.keys(this.pipelines).map((p) => [p, false]),
-    );
+    // v0.8 P1-D: removed `_lastReady` edge-trigger state. It was the
+    // bookkeeping for `_maybeAutoAdvance`, which v0.7.4 G0b decommissioned
+    // (all call sites removed because v0.7.3's mid-session auto-advance
+    // chain regression was caused by it). The method definition itself
+    // is also gone in P1-D. Phase advance is now 100% explicit: agent's
+    // `phase_advance` tool or user re-prompt. Resume + rollback paths
+    // that previously re-primed `_lastReady` are no-ops now.
     // B0.1: Heap sampler. Parent engines only — sub-agents share a process
     // with the parent and would double-log. Writes a single JSONL line
@@ -271,6 +309,111 @@ export class AgentEngine {
     this._heapSamplerStop = this._isSubagent ? null : this._startHeapSampler();
   }
+  /**
+   * v0.8 P1-B: overlay workspace .env onto this.config now that
+   * this.workspace.cwd is known. Only fills in fields where the current
+   * config value was a gc fallback (empty OR the gc default) — does NOT
+   * override fields that came from process.env (those win at
+   * loadSettings() time and stay winning).
+   *
+   * Without this overlay, workspace .env's VLM_TIER1 / OCR_MODEL_TIER1 /
+   * TIER1..4 / LANGUAGE are silently ignored — the v0.7.4 G1b OCR_MODEL_TIER1
+   * alias fix landed at the config layer but never reached the runtime
+   * because loadSettings() is called without a workspace path.
+   */
+  /**
+   * v0.8.1 P10-A: copy canonical `workflows/common/*.py` shims from the
+   * bundled template if they're missing in the workspace. Provides
+   * `llm_client.py` (worker LLM HTTP shim, provider-agnostic) and
+   * `utils.py` (strip_annotations + helpers). Idempotent — never
+   * overwrites existing files (agent edits stay intact).
+   *
+   * Runs at engine init. Covers bench-corpus mode where `kc-beta init`
+   * doesn't run; init-flow workspaces already have these from copyDir.
+   */
+  _populateWorkspaceCommonShims() {
+    const __dirname = path.dirname(new URL(import.meta.url).pathname);
+    const templateRoot = path.resolve(__dirname, "..", "..", "template", "workflows", "common");
+    if (!fs.existsSync(templateRoot)) return;
+    const targetRoot = path.join(this.workspace.cwd, "workflows", "common");
+    fs.mkdirSync(targetRoot, { recursive: true });
+    const copied = [];
+    const skipped = [];
+    for (const entry of fs.readdirSync(templateRoot)) {
+      if (!entry.endsWith(".py") || entry.startsWith(".")) continue;
+      const srcPath = path.join(templateRoot, entry);
+      const dstPath = path.join(targetRoot, entry);
+      if (fs.existsSync(dstPath)) {
+        skipped.push(entry);
+        continue;
+      }
+      try {
+        fs.copyFileSync(srcPath, dstPath);
+        copied.push(entry);
+      } catch { /* best-effort */ }
+    }
+    if (copied.length > 0) {
+      try {
+        this.eventLog?.append?.("workflows_common_populated", { copied, skipped });
+      } catch { /* best-effort */ }
+    }
+  }
+  _overlayWorkspaceEnv() {
+    if (!this.workspace?.cwd) return;
+    const envPath = path.join(this.workspace.cwd, ".env");
+    if (!fs.existsSync(envPath)) return;
+    let wsEnv;
+    try { wsEnv = loadEnvFile(envPath); } catch { return; }
+    if (!wsEnv || typeof wsEnv !== "object") return;
+    // VLM tiers — workspace .env's VLM_TIER1 / OCR_MODEL_TIER1 wins over
+    // gc's vlm_tiers.tier1 default. process.env precedence preserved
+    // because loadSettings already applied it; we only fill in slots
+    // that fell through to gc-or-empty.
+    const overlays = [
+      { configKey: "vlmTier1", envKey: ["VLM_TIER1", "OCR_MODEL_TIER1"] },
+      { configKey: "vlmTier2", envKey: ["VLM_TIER2", "OCR_MODEL_TIER2"] },
+      { configKey: "vlmTier3", envKey: ["VLM_TIER3", "OCR_MODEL_TIER3"] },
+      { configKey: "tier1", envKey: ["TIER1"] },
+      { configKey: "tier2", envKey: ["TIER2"] },
+      { configKey: "tier3", envKey: ["TIER3"] },
+      { configKey: "tier4", envKey: ["TIER4"] },
+      { configKey: "language", envKey: ["LANGUAGE"] },
+    ];
+    const applied = [];
+    for (const { configKey, envKey } of overlays) {
+      // Find first non-empty workspace .env value for this config key
+      let wsValue = "";
+      for (const k of envKey) {
+        if (wsEnv[k]) { wsValue = wsEnv[k]; break; }
+      }
+      if (!wsValue) continue;
+      // Skip if process.env has the same key set — penv already won
+      const penvWon = envKey.some((k) => process.env[k] && process.env[k] !== wsValue);
+      if (penvWon) continue;
+      // Apply the workspace value
+      if (this.config[configKey] !== wsValue) {
+        applied.push({ key: configKey, from: this.config[configKey] || "(empty)", to: wsValue });
+        this.config[configKey] = wsValue;
+      }
+    }
+    // Audit visibility: emit a one-time event listing what was overlaid.
+    if (applied.length > 0) {
+      try {
+        this.eventLog?.append?.("workspace_env_overlay", {
+          envPath: path.relative(this.workspace.cwd, envPath),
+          fields: applied,
+        });
+      } catch { /* best-effort */ }
+    }
+  }
   /**
    * Start sampling process.memoryUsage() every 60 s into logs/heap.jsonl.
    * Returns a stop fn. Timer is .unref()'d so it never keeps the process
@@ -280,11 +423,22 @@ export class AgentEngine {
   _startHeapSampler() {
     const logDir = path.join(this.workspace.cwd, "logs");
     const logPath = path.join(logDir, "heap.jsonl");
+    let stopped = false;
+    let lastSampleAt = 0;
     const sample = () => {
       try {
         const mem = process.memoryUsage();
+        const now = Date.now();
+        // v0.8 P1-C: track skipped intervals. If more than 90s elapsed
+        // since last sample on a 60s cadence, the previous tick was missed
+        // (event loop sleep, GC pause, etc.). Surface in the row so the
+        // post-mortem audit can detect gaps without needing to compare
+        // adjacent timestamps.
+        const skippedMs = lastSampleAt > 0 ? (now - lastSampleAt - 60_000) : 0;
+        lastSampleAt = now;
         const row = {
-          t: new Date().toISOString(),
+          t: new Date(now).toISOString(),
           seq: this.eventLog?.currentSeq ?? 0,
           phase: this.currentPhase,
           rssMB: Math.round(mem.rss / 1024 / 1024),
@@ -301,17 +455,36 @@ export class AgentEngine {
           // and the row gets `componentsErr` instead.
           components: this._sampleComponents(),
         };
+        if (skippedMs > 0) row.skippedMs = skippedMs;
         fs.mkdirSync(logDir, { recursive: true });
         fs.appendFileSync(logPath, JSON.stringify(row) + "\n", "utf-8");
       } catch { /* never fatal */ }
     };
+    // v0.8 P1-C: self-rescheduling setTimeout instead of setInterval. The
+    // 资管 v0.7.5 session shows only 2 heap.jsonl entries (12:39:40 start
+    // + 12:40:40 first tick) across an 18-hour run — the unref'd
+    // setInterval was somehow dropped between event-loop idle phases.
+    // setTimeout reschedules from inside the sample callback, so the
+    // timer is re-registered every tick. unref'd so we don't block exit.
+    let timeoutHandle = null;
+    const scheduleNext = () => {
+      if (stopped) return;
+      timeoutHandle = setTimeout(() => {
+        sample();
+        scheduleNext();
+      }, 60_000);
+      timeoutHandle.unref?.();
+    };
     // Record one sample at startup so we have a baseline even on short runs.
     sample();
-    const timer = setInterval(sample, 60_000);
-    timer.unref?.();
+    scheduleNext();
     return () => {
       try {
-        clearInterval(timer);
+        stopped = true;
+        if (timeoutHandle) clearTimeout(timeoutHandle);
         sample(); // one final sample on shutdown
       } catch { /* ignore */ }
     };
@@ -428,7 +601,10 @@ export class AgentEngine {
     return {
       // Always available (BUILD + DISTILL)
       core: [
-        new SandboxExecTool(this.workspace, this.config.kcExecTimeout),
+        new SandboxExecTool(this.workspace, {
+          defaultTimeoutMs: this.config.kcExecDefaultTimeoutMs,
+          maxTimeoutMs: this.config.kcExecMaxTimeoutMs,
+        }),
         new WorkspaceFileTool(this.workspace, this.versionManager),
         new CopyToWorkspaceTool(this.workspace, {
           largeRefThresholdMB: this.config.largeRefThresholdMB ?? 10,
@@ -468,7 +644,12 @@ export class AgentEngine {
           mineruApiKey: this.config.mineruApiKey,
           llmApiKey: workerApiKey,
           llmBaseUrl: workerBaseUrl,
+          // v0.8.1 P9-B: live-read vlmTier1 so workspace_env_overlay
+          // changes after tool construction (or mid-run .env edits)
+          // reach document_parse. The static `ocrModel` is the
+          // construction-time fallback; getOcrModel takes precedence.
           ocrModel: vlmModel,
+          getOcrModel: () => this.config.vlmTier1 || vlmModel,
         }),
         new DocumentSearchTool(this.workspace),
         // Group C — chunker/RAG infrastructure ported from AMC app. Core
@@ -968,16 +1149,9 @@ export class AgentEngine {
         }
       }
-      // Re-prime _lastReady AFTER importState so it reflects the restored
-      // pipeline milestones, not the empty defaults from constructor.
-      // (Bug 5 fix — without this, resume reignites auto-advance.)
-      for (const phase of Object.keys(engine.pipelines)) {
-        try {
-          engine._lastReady[phase] = !!engine.pipelines[phase].exitCriteriaMet?.();
-        } catch {
-          engine._lastReady[phase] = false;
-        }
-      }
+      // v0.8 P1-D: removed `_lastReady` re-prime. Was the bookkeeping for
+      // `_maybeAutoAdvance` which v0.7.4 G0b decommissioned. Phase advance
+      // is explicit now; nothing to re-prime on resume.
       engine.eventLog.append("session_resume", {
         resumedPhase: engine.currentPhase,
@@ -1086,6 +1260,29 @@ export class AgentEngine {
       // budget. Better to lose some history than crash with HTTP 400.
       messages = this._enforceTokenBudget(messages);
+      // v0.8 P3-A: skill usage counter — emit one skill_byte_send event
+      // per always-loaded skill per LLM send. Captures the cost of having
+      // a skill body inlined in the system prompt (Layer B per design doc).
+      // Agent-blind: events go to events.jsonl only; never surfaced to the
+      // agent's context. consult_skill tool results emit their own
+      // skill_invoked events with via_tool="consult_skill" (already in
+      // place since v0.7.5 G-C4), so we don't double-count those here.
+      try {
+        const { alwaysLoaded } = this._skillLoader.getPhaseSkillSet(this.currentPhase) || {};
+        if (Array.isArray(alwaysLoaded)) {
+          for (const skill of alwaysLoaded) {
+            const body = this._skillLoader.loadSkillBody(skill);
+            if (!body) continue;
+            this.eventLog.append("skill_byte_send", {
+              skill,
+              via: "system_prompt_always_loaded",
+              byte_count: body.length,
+              phase: this.currentPhase,
+            });
+          }
+        }
+      } catch { /* counter is best-effort; never break the turn */ }
       this.eventLog.append("llm_start", {
         model: this.config.kcModel,
         messageCount: messages.length,
@@ -1335,11 +1532,17 @@ export class AgentEngine {
           // path-matching emission stays only as a fallback for any agent
           // that reads a SKILL.md path directly (out of pattern).
           try {
+            // v0.8 P1-E: heredoc detection. `cat << 'EOF' > /tmp/skill.md`
+            // matches the read-verb regex but is actually a WRITE — the
+            // heredoc operator `<<` means cat is consuming inline content
+            // (the heredoc body), not a file path. 资管 v0.7.5 audit § 5f
+            // confirmed 1 spurious skill_invoked event of this kind.
+            // Excluding any command with `<<` from the isRead classification.
+            const cmd = String(inputData?.command || "");
+            const isHeredoc = cmd.includes("<<");
             const isRead =
               (tc.name === "workspace_file" && inputData?.operation === "read") ||
-              (tc.name === "sandbox_exec" && /\b(cat|head|tail|less|grep|view|read)\b/.test(
-                String(inputData?.command || "")
-              ));
+              (tc.name === "sandbox_exec" && !isHeredoc && /\b(cat|head|tail|less|grep|view|read)\b/.test(cmd));
             if (
               !result.isError &&
               isRead &&
@@ -1349,9 +1552,13 @@ export class AgentEngine {
               // v0.7.5 flat layout: skills/<name>/SKILL.md (workspace scope)
               // OR template/skills/<lang>/<name>/SKILL.md (template scope, rare)
               // Deep layout backward-compat preserved for any stragglers.
+              // v0.8 P0-B: accept lowercase `skill.md` too — 资管 audit § 3.2
+              // found agents writing lowercase consistently (14/14 rule_skills/).
+              // Limited to exact uppercase OR exact lowercase (no mixed case)
+              // to avoid spurious matches on unrelated files (e.g., `Skill.md`).
               const skillMatch = p.match(
-                /(?:template\/)?skills\/(?:[a-z]+\/)?(?:(?:meta-meta|meta|skill-creator)\/)?([a-zA-Z0-9_-]+)\/SKILL\.md\b/
-              ) || p.match(/\bSKILL\.md\b/);
+                /(?:template\/)?skills\/(?:[a-z]+\/)?(?:(?:meta-meta|meta|skill-creator)\/)?([a-zA-Z0-9_-]+)\/(?:SKILL|skill)\.md\b/
+              ) || p.match(/\b(?:SKILL|skill)\.md\b/);
               if (skillMatch) {
                 const skillName = skillMatch[1] || "(unknown)";
                 this.eventLog.append("skill_invoked", {
@@ -1452,10 +1659,12 @@ export class AgentEngine {
   }
   /**
-   * Centralized phase transition (Bug 4). All three triggers route through here:
+   * Centralized phase transition (Bug 4). Two triggers route through here
+   * after v0.7.4 G0b + v0.8 P1-D:
    * (1) pipeline.onToolResult returning phase_ready
-   * (2) post-turn auto-check via _maybeAutoAdvance
-   * (3) explicit user request via the phase_advance tool
+   * (2) explicit user request via the phase_advance tool
+   * (The historical (3) post-turn auto-check via `_maybeAutoAdvance` was
+   * removed; phase advance is 100% explicit.)
    *
    * Reachability: by default only forward-by-one transitions per NEXT_PHASE.
    * Set `force: true` to allow non-adjacent or backward transitions (e.g. user
@@ -1533,9 +1742,17 @@ export class AgentEngine {
         try { criteriaMet = !!fromPipeline?.exitCriteriaMet?.(); } catch { criteriaMet = true; }
         if (!criteriaMet) {
           const counts = this._buildEngineCountsBlock(this.currentPhase);
+          // v0.8 P0-E: prescriptive hint in the event payload so post-mortem
+          // audits see what the agent was told (matches what phase-advance.js
+          // returns to the LLM).
+          let prescriptive = null;
+          try {
+            prescriptive = getPrescriptiveHint(this.currentPhase, null, counts || "");
+          } catch { /* hint generation is best-effort */ }
           this.eventLog.append("phase_advance_refused", {
             from: this.currentPhase, to: nextPhase, reason,
             hint: "exit criteria not met by engine telemetry",
+            prescriptive_hint: prescriptive,
             engineCounts: counts || null,
           });
           return false;
@@ -1610,23 +1827,16 @@ export class AgentEngine {
       });
     }
-    // v0.6.2 J2: on rollback, reset the rolled-FROM phase's lastReady
-    // edge-trigger so that if the agent revisits it and re-flips
-    // exit-criteria true, _maybeAutoAdvance will fire correctly. Without
-    // this, the auto-advance edge trigger stays latched true and the
-    // moment the agent returns to fromPhase the engine immediately
-    // bounces them back out — defeating the rollback.
-    if (direction === "rollback" && this._lastReady) {
-      this._lastReady[fromPhase] = false;
-    }
+    // v0.8 P1-D: removed `_lastReady` rollback reset. Was the bookkeeping
+    // for `_maybeAutoAdvance` which v0.7.4 G0b decommissioned. Phase
+    // advance is explicit now; rollback just needs to commit the new phase.
     this.saveState();
     // B8: Soft signal — surface any sub-agents left running from the prior
     // phase so the main agent's next turn can decide whether to kill them.
-    // NOT automated: phase_advance can fire from _maybeAutoAdvance on a
-    // criteria-flip, and auto-killing would couple lifecycle with blast
-    // radius. This just informs.
+    // NOT automated: auto-killing would couple lifecycle with blast radius.
+    // This just informs.
     try {
       const agentTool = this._buildTools?.core?.find((t) => t?.name === "agent_tool");
       const runningIds = agentTool?.getRunningTaskIds?.() || [];
@@ -1826,35 +2036,12 @@ export class AgentEngine {
     return false;
   }
-  /**
-   * Bug 4 trigger (1) auto-detect, edge-triggered (Bug 5): only fires on a
-   * fresh false → true flip in `exitCriteriaMet()`. Sessions resumed in an
-   * already-met state do nothing; users iterating in a phase whose criteria
-   * have been met for a while do nothing. Real new evidence is required.
-   */
-  _maybeAutoAdvance() {
-    const phase = this.currentPhase;
-    const pipeline = this.pipelines[phase];
-    let nowReady = false;
-    try { nowReady = !!pipeline?.exitCriteriaMet?.(); } catch { nowReady = false; }
-    if (!nowReady) {
-      this._lastReady[phase] = false;
-      return null;
-    }
-    // Edge-trigger: nowReady && !wasReady
-    if (this._lastReady[phase]) return null;
-    this._lastReady[phase] = true;
-    const next = NEXT_PHASE[phase];
-    if (!next) return null;
-    const advanced = this._advancePhase(next, "exit criteria flipped to met");
-    if (!advanced) return null;
-    return new AgentEvent({
-      type: "pipeline_event",
-      data: { type: "phase_ready", nextPhase: next, message: "exit criteria flipped to met" },
-    });
-  }
+  // v0.8 P1-D: `_maybeAutoAdvance()` deleted. The method auto-fired phase
+  // advance on a false→true flip of `exitCriteriaMet()`, but v0.7.3
+  // showed mid-session auto-advance chains were a regression hazard
+  // (user couldn't review between phases). v0.7.4 G0b removed all call
+  // sites; v0.8 P1-D removes the now-dead method definition + the
+  // `_lastReady` bookkeeping it relied on. Phase advance is 100% explicit.
   /**
    * Tool-call offloading. If the tool's content exceeds the threshold,
@@ -2163,27 +2350,38 @@ export class AgentEngine {
   /** B1: original serial ralph-loop path — one task at a time, shared
    *  conversation history. Unchanged from pre-v0.6.0 behavior. */
   async *_runTaskLoopSerial(userMessage) {
-    // Run the initial turn (user's request)
-    yield* this.runTurn(userMessage);
-    // v0.7.5 G-F5 — TEMPORARILY DISABLED 2026-05-13 for overnight
-    // marathon test. The strict capture-BEFORE form lets every user
-    // prompt advance only one phase, which blocks unattended overnight
-    // sessions. v0.7.4-style capture-AFTER (below) allows the agent
-    // to chain multiple phase_advance calls within the initial runTurn,
-    // then exits the while loop on subsequent phase changes.
-    //
-    // TODO: after the overnight E2E results come in (2026-05-14), decide:
-    //   (a) re-enable F5 strict and build marathon as a separate mode
-    //       (external driver pattern, e.g., /loop-kc command) — locked
-    //       earlier decision per harness-research § 7
-    //   (b) keep capture-AFTER permanently and accept multi-phase prompts
+    // v0.8 P5-A: F5 re-enabled, conditional on marathon mode.
+    // - Interactive sessions (marathon NOT active): capture startingPhase
+    //   BEFORE the initial runTurn, and exit the loop on ANY phase change
+    //   (including within the initial runTurn). One user prompt = one
+    //   phase advance. Path (a) per design doc Q5 lean.
+    // - Marathon sessions: the kc-marathon driver provides per-phase
+    //   prompts via .kc_marathon/inbox.jsonl, so the engine doesn't need
+    //   F5's checkpoint — phase chaining is OK because each chained phase
+    //   STILL gets its own driver-emitted prompt next tick.
     //
-    // To re-enable F5: move `const startingPhase = this.currentPhase;`
-    // to BEFORE the `yield* this.runTurn(userMessage);` above, and add
-    // the matching `if (this.currentPhase !== startingPhase) { return; }`
-    // block between runTurn and the while loop.
+    // v0.7.3 demonstrated why F5 matters interactively: auto-chained
+    // phase advances skip the user check-in cycle and broke phase
+    // control in team testing. v0.7.4 G0c first fixed it via
+    // post-initial-runTurn exit; v0.7.5 added the strict capture-BEFORE
+    // refinement; v0.8 P5-A preserves both with the marathon escape;
+    // v0.8.1 P8-A switched marathon-active source from filesystem
+    // marker to inline driver instance.
+    const marathonActive = this.isMarathonActive();
     const startingPhase = this.currentPhase;
+    yield* this.runTurn(userMessage);
+    // F5 strict gate: if interactive AND phase changed during initial
+    // runTurn, exit immediately (don't auto-continue tasks in the new
+    // phase). Marathon bypasses — driver decides pacing.
+    if (!marathonActive && this.currentPhase !== startingPhase) {
+      this.eventLog.append("ralph_loop_exit", {
+        reason: "f5_strict_initial_turn",
+        from: startingPhase,
+        to: this.currentPhase,
+      });
+      return;
+    }
     // Auto-continue through pending tasks (within current phase only)
     while (this.taskManager.getNextPending()) {
@@ -2262,6 +2460,86 @@ export class AgentEngine {
         break;
       }
     }
+    // v0.8.1 P8-A: marathon mode — inline driver. After the F5 phase-
+    // boundary exit, if marathon is active, query the driver for the
+    // next continuation prompt and run additional turns until the driver
+    // signals stop (null return). State machine logic unchanged from
+    // v0.8.0; the I/O wrapper just shifted from filesystem-watcher to
+    // direct method calls.
+    while (this.marathonDriver) {
+      const turnsSnapshot = this.marathonDriver.turnsThisPhase;
+      const phaseChanged = this.currentPhase !== this.marathonDriver.currentPhase;
+      const milestones = this._buildEngineCountsBlock(this.currentPhase) || {};
+      const decision = this.marathonDriver.decideNext({
+        currentPhase: this.currentPhase,
+        milestones,
+        phaseChanged,
+        errorSeen: false, // engine surfaces errors via tool_result.isError; not propagated here for v0.8.1 MVP
+        turnsThisPhase: turnsSnapshot + 1,
+      });
+      if (!decision) {
+        // Stop condition met — driver returned null
+        this.eventLog.append("marathon_detach", {
+          reason: this.marathonDriver.stopReason || "unknown",
+          decisions: this.marathonDriver.decisionCount,
+        });
+        this.marathonDriver = null;
+        break;
+      }
+      this.eventLog.append("marathon_decision", {
+        template: decision.template,
+        reason: decision.reason,
+        phase: this.currentPhase,
+      });
+      yield* this.runTurn(decision.prompt);
+      // Loop back: another turn just completed; driver gets another decideNext call.
+    }
+  }
+  /**
+   * v0.8.1 P8-A: activate marathon mode with a goal-description.
+   * Called from cli/index.js's /marathon slash command handler.
+   * The engine's next runTaskLoop will use marathonDriver.getInitialPrompt()
+   * as the kickoff user message.
+   *
+   * @param {string} goal — the marathon goal description (user-typed)
+   * @param {object} [opts] — {maxWallclockMs?, stuckAfterMs?}
+   * @returns {object} {goal, language, startedAt} for confirmation
+   */
+  enterMarathonMode(goal, opts = {}) {
+    if (this.marathonDriver) {
+      throw new Error("Marathon already active — use /marathon off to disengage first");
+    }
+    this.marathonDriver = new MarathonDriver({
+      goal,
+      language: this.config.language || "en",
+      maxWallclockMs: opts.maxWallclockMs,
+      stuckAfterMs: opts.stuckAfterMs,
+    });
+    this.eventLog.append("marathon_attach", {
+      goal: goal.slice(0, 200),
+      language: this.config.language || "en",
+    });
+    return this.marathonDriver.getStatus();
+  }
+  /** v0.8.1 P8-A: deactivate marathon mode. Returns final status snapshot. */
+  exitMarathonMode(reason = "user_off") {
+    if (!this.marathonDriver) return null;
+    const status = this.marathonDriver.getStatus();
+    this.marathonDriver.stop(reason);
+    this.eventLog.append("marathon_detach", {
+      reason,
+      decisions: this.marathonDriver.decisionCount,
+    });
+    this.marathonDriver = null;
+    return status;
+  }
+  /** v0.8.1 P8-A: is marathon mode currently active? (for TUI status bar) */
+  isMarathonActive() {
+    return !!this.marathonDriver && !this.marathonDriver.stopped;
   }
   /**
@@ -2282,14 +2560,26 @@ export class AgentEngine {
    * amortized against the 2-4× wall-clock speedup.
    */
   async *_runTaskLoopParallel(userMessage, parallelism) {
+    // v0.8 P5-A: F5 re-enabled, conditional on marathon mode.
+    // Mirror _runTaskLoopSerial — capture startingPhase BEFORE initial
+    // runTurn so phase advance during the initial turn exits the loop
+    // unless marathon is active.
+    // v0.8.1 P8-A: marathon check now uses inline driver instance.
+    const marathonActive = this.isMarathonActive();
+    const startingPhase = this.currentPhase;
     // Initial turn: main agent reads user request, creates tasks.
     yield* this.runTurn(userMessage);
-    // v0.7.5 G-F5 — TEMPORARILY DISABLED 2026-05-13 for overnight
-    // marathon test. See _runTaskLoopSerial above for full rationale.
-    // To re-enable F5: move `startingPhase` capture BEFORE the
-    // initial runTurn, add post-runTurn exit check matching serial.
-    const startingPhase = this.currentPhase;
+    if (!marathonActive && this.currentPhase !== startingPhase) {
+      this.eventLog.append("ralph_loop_exit", {
+        reason: "f5_strict_initial_turn",
+        from: startingPhase,
+        to: this.currentPhase,
+        mode: "parallel",
+      });
+      return;
+    }
     const agentTool = this._buildTools.core.find((t) => t?.name === "agent_tool");
     if (!agentTool) {