npm - @glrs-dev/cli - Versions diffs - 1.0.1 → 1.2.0 - Mend

@glrs-dev/cli 1.0.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/dist/vendor/harness-opencode/dist/agents/prompts/qa-reviewer.open.md ADDED Viewed

@@ -0,0 +1,58 @@
+---
+name: qa-reviewer
+description: Fast adversarial reviewer. Always re-runs verifiers. Returns [PASS] or [FAIL]. Default for typical diffs.
+mode: subagent
+model: anthropic/claude-sonnet-4-6
+temperature: 0.1
+---
+<!-- STRICT_EXECUTOR_VARIANT -->
+You are the QA Reviewer (fast variant, open-weights edition). Your job is to verify that the diff matches the plan **semantically**, detect **scope creep**, and detect **plan drift**.
+Do not ask the user questions. Return `[PASS]` or `[FAIL]` only. If you're tempted to ask, FAIL instead and let the build agent fix it.
+**Always re-run tests, lint, and typecheck.** Do not skip verification steps. Run every command yourself before returning `[PASS]`.
+# Process
+1. **Read the plan** at the path provided.
+2. **Inspect the diff.** Run `git diff` (against merge base — try `git merge-base HEAD origin/main` then `origin/master`) and `git diff --stat`. Also run `git status` to see untracked files.
+3. **Plan-drift check (AUTO-FAIL).** For each modified file in the diff, verify it appears in the plan's `## File-level changes`. A modified file NOT listed in `## File-level changes` is AUTO-FAIL. Report as `Plan drift: <path> modified but not in ## File-level changes`.
+4. **Scope-creep check.** For each UNTRACKED file (from `git status`) that is NOT in `## File-level changes`, run `git log --oneline -- <file>` to determine whether the file is pre-existing work or scope creep. If the file has no prior commits on this branch AND isn't in the plan, FAIL with `Scope creep: <path> untracked and not in plan`.
+5. **Semantic verification.** For each item in `## File-level changes`, verify the corresponding code change exists and matches the description by reading the code. For each `## Acceptance criteria` item, verify it is actually met — do NOT trust `[x]` checkboxes.
+6. **Plan-state verify commands.** Run `bunx @glrs-dev/harness-plugin-opencode plan-check --run <plan-path>` to get the list of verify commands for pending items. Execute each one via `bash`. Any non-zero exit → FAIL with `Verify failed: <command> (exit N)`. If the plan has no fence (legacy), plan-check emits `legacy (no plan-state fence)` — skip this step.
+7. **Full-suite re-run.** Run the project's test / lint / typecheck commands (discover from `package.json` scripts / `Makefile` / `AGENTS.md`). Any failure → FAIL.
+8. **Scan for new tech debt.** Run `todo_scan` with `onlyChanged: true`. For every TODO / FIXME / HACK / XXX in the result, check whether the plan's `## Out of scope` or `## Open questions` section acknowledges it. Unacknowledged new debt → FAIL with the specific `file:line`.
+9. **AGENTS.md freshness (light check).** If the change shifts a convention documented in a local `AGENTS.md` in a touched directory, FAIL with `Update <path>/AGENTS.md to reflect <specific change>`.
+# Output
+Exactly one of these two formats. Nothing else.
+**If everything passes:**
+```
+[PASS]
+<2–3 sentence summary of verified changes.>
+```
+**If anything fails:**
+```
+[FAIL]
+1. <File:line> — <Specific issue>
+2. <File:line> — <Next issue>
+...
+```
+# Rules
+- Never suggest fixes. Report precisely; the build agent will fix.
+- Never trust the build agent's narrative. "Pre-existing work" requires `git log --oneline -- <file>` evidence.
+- A single failing item is enough to FAIL. Do not minimize.
+- **AUTO-FAIL on plan drift.** Modified file not in `## File-level changes` → FAIL, no exceptions.
+- **AUTO-FAIL on scope creep.** Untracked file not in plan with no prior commits → FAIL.
+- If the diff is large (>10 files or >500 lines) or touches high-risk paths (auth / crypto / billing / migrations), tell the PRIME to delegate to `@qa-thorough` instead.

package/dist/vendor/harness-opencode/dist/{chunk-WBBN7OVN.js → chunk-BWERBERN.js} RENAMED Viewed

@@ -257,7 +257,7 @@ async function requirePlugin() {
     );
     process.exit(1);
   }
-  const { install: install2 } = await import("./install-X5KEANRB.js");
+  const { install: install2 } = await import("./install-5JKWK6Z4.js");
   await install2({ nonInteractive: true });
 }
@@ -764,6 +764,25 @@ ${c.bold}Ready.${c.reset} Run ${c.green}opencode${c.reset} to start.
         fast: [preset.fast]
       };
       ok(`Models configured`);
+      const midExecIdx = await promptChoice(
+        "  Use a strict executor for build agents? (recommended for Kimi/Qwen/DeepSeek)",
+        ["No (use mid model as reasoning builder)", "Yes (configure mid-execute model)"],
+        0
+      );
+      if (midExecIdx === 1) {
+        const { input } = await import("@inquirer/prompts");
+        const midExecModel = await input({
+          message: "  mid-execute model ID:",
+          default: preset.mid
+        });
+        if (midExecModel) {
+          pluginOpts.models["mid-execute"] = [midExecModel];
+          newModelsValue["mid-execute"] = [midExecModel];
+          info(`  mid-execute \u2192 ${midExecModel} (strict executor prompts)`);
+        }
+      } else {
+        info(`  mid-execute: skipped (build agents use mid model with reasoning prompts)`);
+      }
     } else if (!pluginOpts._skipModels) {
       info("Enter model IDs in <provider>/<model-id> format (e.g. amazon-bedrock/global.anthropic.claude-opus-4-7)");
       const { input } = await import("@inquirer/prompts");
@@ -771,17 +790,26 @@ ${c.bold}Ready.${c.reset} Run ${c.green}opencode${c.reset} to start.
       const midModel = await input({ message: "  mid (balanced):" });
       const fastModel = await input({ message: "  fast (cheapest):" });
       if (deepModel) {
+        const resolvedMid = midModel || deepModel;
         pluginOpts.models = {
           deep: [deepModel],
-          mid: [midModel || deepModel],
+          mid: [resolvedMid],
           fast: [fastModel || midModel || deepModel]
         };
         newModelsValue = {
           deep: [deepModel],
-          mid: [midModel || deepModel],
+          mid: [resolvedMid],
           fast: [fastModel || midModel || deepModel]
         };
         ok("Models: custom");
+        const midExecModel = await input({ message: "  mid-execute (optional strict executor, press Enter to skip):" });
+        if (midExecModel) {
+          pluginOpts.models["mid-execute"] = [midExecModel];
+          newModelsValue["mid-execute"] = [midExecModel];
+          info(`  mid-execute \u2192 ${midExecModel} (strict executor prompts)`);
+        } else {
+          info(`  mid-execute: skipped (build agents use mid model with reasoning prompts)`);
+        }
       } else {
         ok("Models: OpenCode defaults");
       }

package/dist/vendor/harness-opencode/dist/{chunk-CZMAJISX.js → chunk-EK7K4NTV.js} RENAMED Viewed

@@ -47,7 +47,9 @@ function readPrompt(name) {
 var primePrompt = readPrompt("prime.md");
 var planPrompt = readPrompt("plan.md");
 var buildPrompt = readPrompt("build.md");
+var buildOpenPrompt = readPrompt("build.open.md");
 var qaReviewerPrompt = readPrompt("qa-reviewer.md");
+var qaReviewerOpenPrompt = readPrompt("qa-reviewer.open.md");
 var qaThoroughPrompt = readPrompt("qa-thorough.md");
 var planReviewerPrompt = readPrompt("plan-reviewer.md");
 var codeSearcherPrompt = readPrompt("code-searcher.md");
@@ -57,11 +59,24 @@ var docsMaintainerPrompt = readPrompt("docs-maintainer.md");
 var libReaderPrompt = readPrompt("lib-reader.md");
 var agentsMdWriterPrompt = readPrompt("agents-md-writer.md");
 var pilotBuilderPrompt = readPrompt("pilot-builder.md");
+var pilotBuilderOpenPrompt = readPrompt("pilot-builder.open.md");
 var pilotPlannerPrompt = readPrompt("pilot-planner.md");
 var researchPrompt = readPrompt("research.md");
 var researchWebPrompt = readPrompt("research-web.md");
 var researchLocalPrompt = readPrompt("research-local.md");
 var researchAutoPrompt = readPrompt("research-auto.md");
+var EXECUTOR_VARIANT_AGENTS = {
+  build: { reasoning: buildPrompt, strict: buildOpenPrompt },
+  "qa-reviewer": { reasoning: qaReviewerPrompt, strict: qaReviewerOpenPrompt },
+  "pilot-builder": { reasoning: pilotBuilderPrompt, strict: pilotBuilderOpenPrompt }
+};
+function getStrictPrompt(agentName) {
+  const variants = EXECUTOR_VARIANT_AGENTS[agentName];
+  if (!variants) {
+    throw new Error(`getStrictPrompt: no strict variant registered for agent "${agentName}"`);
+  }
+  return variants.strict;
+}
 function stripFrontmatter(md) {
   if (!md.startsWith("---")) return md;
   const end = md.indexOf("\n---", 3);
@@ -563,12 +578,12 @@ var AGENT_TIERS = {
   "research-web": "deep",
   "research-local": "deep",
   "research-auto": "deep",
-  build: "mid",
-  "qa-reviewer": "mid",
+  build: "mid-execute",
+  "qa-reviewer": "mid-execute",
+  "pilot-builder": "mid-execute",
   "docs-maintainer": "mid",
   "lib-reader": "mid",
   "agents-md-writer": "mid",
-  "pilot-builder": "mid",
   "code-searcher": "fast"
 };
 function createAgents() {
@@ -724,6 +739,7 @@ function formatModelOverrideWarning(id, source, suggestion) {
 }
 export {
+  getStrictPrompt,
   AGENT_TIERS,
   createAgents,
   validateModelOverride,

package/dist/vendor/harness-opencode/dist/cli.js CHANGED Viewed

@@ -2,7 +2,7 @@
 import {
   createAgents,
   validateModelOverride
-} from "./chunk-CZMAJISX.js";
+} from "./chunk-EK7K4NTV.js";
 import {
   getSessionsPath,
   registerSession,
@@ -11,7 +11,7 @@ import {
 import {
   install,
   requirePlugin
-} from "./chunk-WBBN7OVN.js";
+} from "./chunk-BWERBERN.js";
 import "./chunk-VJUETC6A.js";
 import {
   getPilotDir,
@@ -1142,11 +1142,60 @@ CREATE TABLE IF NOT EXISTS events (
 CREATE INDEX IF NOT EXISTS idx_events_run ON events(run_id, id);
 CREATE INDEX IF NOT EXISTS idx_events_run_task ON events(run_id, task_id, id);
 `.trim();
+var V2_SQL = `
+CREATE TABLE IF NOT EXISTS workflows (
+  id            TEXT    NOT NULL PRIMARY KEY,
+  goal          TEXT    NOT NULL,
+  started_at    INTEGER NOT NULL,
+  finished_at   INTEGER,
+  status        TEXT    NOT NULL CHECK (status IN ('pending','running','completed','aborted','failed')),
+  current_phase TEXT
+);
+CREATE TABLE IF NOT EXISTS phases (
+  workflow_id   TEXT    NOT NULL,
+  name          TEXT    NOT NULL CHECK (name IN ('scope','plan','build','qa','followup')),
+  status        TEXT    NOT NULL CHECK (status IN ('pending','running','completed','aborted','failed')),
+  started_at    INTEGER,
+  finished_at   INTEGER,
+  artifact_path TEXT,
+  PRIMARY KEY (workflow_id, name),
+  FOREIGN KEY (workflow_id) REFERENCES workflows(id) ON DELETE CASCADE
+);
+CREATE TABLE IF NOT EXISTS artifacts (
+  id          INTEGER PRIMARY KEY AUTOINCREMENT,
+  workflow_id TEXT    NOT NULL,
+  phase       TEXT    NOT NULL,
+  kind        TEXT    NOT NULL,
+  path        TEXT    NOT NULL,
+  created_at  INTEGER NOT NULL,
+  sha256      TEXT,
+  FOREIGN KEY (workflow_id) REFERENCES workflows(id) ON DELETE CASCADE
+);
+CREATE INDEX IF NOT EXISTS idx_artifacts_workflow_phase ON artifacts(workflow_id, phase);
+ALTER TABLE events ADD COLUMN phase TEXT;
+INSERT INTO workflows (id, goal, started_at, finished_at, status, current_phase)
+SELECT id, plan_slug, started_at, finished_at, status, 'build' FROM runs;
+INSERT INTO phases (workflow_id, name, status, started_at, finished_at, artifact_path)
+SELECT id, 'build', status, started_at, finished_at, NULL FROM runs;
+UPDATE events SET phase = 'build' WHERE phase IS NULL;
+`.trim();
 var MIGRATIONS = [
   {
     version: 1,
     description: "initial pilot schema (runs/tasks/events)",
     sql: V1_SQL
+  },
+  {
+    version: 2,
+    description: "workflows/phases/artifacts tables + events.phase column",
+    sql: V2_SQL
   }
 ];
 function applyMigrations(db) {
@@ -1279,8 +1328,8 @@ function appendEvent(db, args) {
     });
   }
   db.run(
-    `INSERT INTO events (run_id, task_id, ts, kind, payload) VALUES (?, ?, ?, ?, ?)`,
-    [args.runId, args.taskId ?? null, ts, args.kind, payloadStr]
+    `INSERT INTO events (run_id, task_id, ts, kind, payload, phase) VALUES (?, ?, ?, ?, ?, ?)`,
+    [args.runId, args.taskId ?? null, ts, args.kind, payloadStr, args.phase ?? null]
   );
   if (eventSubscribers.length > 0) {
     const snapshot = eventSubscribers.slice();
@@ -1291,6 +1340,7 @@ function appendEvent(db, args) {
           taskId: args.taskId ?? null,
           kind: args.kind,
           payload: args.payload,
+          phase: args.phase ?? null,
           ts
         });
       } catch {
@@ -1865,25 +1915,78 @@ function fixPrompt(_task, last) {
   return sections.join("\n");
 }
-// src/pilot/verify/runner.ts
-import { spawn as spawn2 } from "child_process";
-var DEFAULT_TIMEOUT_MS = 5 * 60 * 1e3;
-var DEFAULT_OUTPUT_CAP_BYTES = 256 * 1024;
-var TRUNCATION_NOTICE = "\n[pilot] verify output truncated\n";
-async function runVerify(commands, options) {
+// src/pilot/gates/composite.ts
+async function evalAllGate(gate, ctx) {
+  const startedAt = Date.now();
   const results = [];
-  for (const command10 of commands) {
-    const result = await runOne(command10, options);
-    results.push(result);
-    if (!result.ok) {
-      return { ok: false, results, failure: result };
+  for (const sub of gate.gates) {
+    const subResult = await evalGate(sub, ctx);
+    results.push({ gate: sub, result: subResult });
+    if (!subResult.ok) {
+      const evidence2 = {
+        kind: "all",
+        results,
+        failure: subResult
+      };
+      return {
+        ok: false,
+        reason: subResult.reason,
+        evidence: evidence2,
+        durationMs: Date.now() - startedAt
+      };
     }
   }
+  const evidence = { kind: "all", results };
   return {
     ok: true,
-    results
+    evidence,
+    durationMs: Date.now() - startedAt
   };
 }
+async function evalAnyGate(gate, ctx) {
+  const startedAt = Date.now();
+  const results = [];
+  if (gate.gates.length === 0) {
+    const evidence2 = { kind: "any", results };
+    return {
+      ok: false,
+      reason: "any-gate has no sub-gates to satisfy",
+      evidence: evidence2,
+      durationMs: Date.now() - startedAt
+    };
+  }
+  let lastResult = null;
+  for (const sub of gate.gates) {
+    const subResult = await evalGate(sub, ctx);
+    results.push({ gate: sub, result: subResult });
+    lastResult = subResult;
+    if (subResult.ok) {
+      const evidence2 = { kind: "any", results };
+      return {
+        ok: true,
+        evidence: evidence2,
+        durationMs: Date.now() - startedAt
+      };
+    }
+  }
+  const evidence = {
+    kind: "any",
+    results,
+    failure: lastResult ?? void 0
+  };
+  return {
+    ok: false,
+    reason: `any-gate exhausted: all ${results.length} sub-gates failed`,
+    evidence,
+    durationMs: Date.now() - startedAt
+  };
+}
+// src/pilot/verify/spawn.ts
+import { spawn as spawn2 } from "child_process";
+var DEFAULT_TIMEOUT_MS = 5 * 60 * 1e3;
+var DEFAULT_OUTPUT_CAP_BYTES = 256 * 1024;
+var TRUNCATION_NOTICE = "\n[pilot] verify output truncated\n";
 async function runOne(command10, options) {
   if (typeof command10 !== "string" || command10.length === 0) {
     throw new TypeError(`runOne: command must be a non-empty string`);
@@ -2020,6 +2123,147 @@ function killTree(child) {
   }, 2e3).unref();
 }
+// src/pilot/gates/shell.ts
+async function evalShellGate(gate, ctx) {
+  const result = await runOne(gate.command, {
+    cwd: ctx.cwd,
+    env: ctx.env,
+    abortSignal: ctx.abortSignal,
+    onLine: ctx.onShellLine,
+    timeoutMs: gate.timeoutMs,
+    outputCapBytes: ctx.shellOutputCapBytes
+  });
+  return toGateResult(result);
+}
+function toGateResult(result) {
+  if (result.ok) {
+    return {
+      ok: true,
+      durationMs: result.durationMs,
+      evidence: { kind: "shell", result }
+    };
+  }
+  const reason = formatShellFailure(result);
+  return {
+    ok: false,
+    reason,
+    durationMs: result.durationMs,
+    evidence: { kind: "shell", result }
+  };
+}
+function formatShellFailure(result) {
+  const flags = [];
+  if (result.timedOut) flags.push("timed-out");
+  if (result.aborted) flags.push("aborted");
+  if (result.signal) flags.push(`signal=${result.signal}`);
+  const flagSuffix = flags.length > 0 ? ` [${flags.join(",")}]` : "";
+  return `shell gate failed: ${result.command} \u2192 exit ${result.exitCode}${flagSuffix}`;
+}
+// src/pilot/gates/eval.ts
+async function evalGate(gate, ctx) {
+  switch (gate.kind) {
+    case "shell":
+      return evalShellGate(gate, ctx);
+    case "all":
+      return evalAllGate(gate, ctx);
+    case "any":
+      return evalAnyGate(gate, ctx);
+    default: {
+      const _exhaustive = gate;
+      throw new Error(
+        `evalGate: unknown gate kind ${_exhaustive.kind}`
+      );
+    }
+  }
+}
+// src/pilot/gates/types.ts
+function asShellEvidence(evidence) {
+  if (typeof evidence === "object" && evidence !== null && evidence.kind === "shell") {
+    return evidence;
+  }
+  return null;
+}
+function asCompositeEvidence(evidence) {
+  if (typeof evidence === "object" && evidence !== null && (evidence.kind === "all" || evidence.kind === "any")) {
+    return evidence;
+  }
+  return null;
+}
+// src/pilot/verify/runner.ts
+async function runVerify(commands, options) {
+  if (commands.length === 0) {
+    return { ok: true, results: [] };
+  }
+  const gate = {
+    kind: "all",
+    gates: commands.map((command10) => ({
+      kind: "shell",
+      command: command10,
+      timeoutMs: options.timeoutMs
+    }))
+  };
+  const ctx = {
+    cwd: options.cwd,
+    env: options.env,
+    abortSignal: options.abortSignal,
+    onShellLine: options.onLine,
+    shellOutputCapBytes: options.outputCapBytes
+  };
+  const gateResult = await evalGate(gate, ctx);
+  return toRunVerifyResult(gateResult);
+}
+function toRunVerifyResult(gateResult) {
+  const composite = asCompositeEvidence(gateResult.evidence);
+  if (composite === null || composite.kind !== "all") {
+    throw new Error(
+      `runVerify: expected composite all-gate evidence, got ${gateResultDescriptor(gateResult)}`
+    );
+  }
+  const results = composite.results.map((entry) => extractCommandResult(entry));
+  if (gateResult.ok) {
+    return {
+      ok: true,
+      results
+    };
+  }
+  const failingEntry = composite.results[composite.results.length - 1];
+  if (!failingEntry || failingEntry.result.ok) {
+    throw new Error(
+      "runVerify: all-gate failed but no failing sub-result was recorded"
+    );
+  }
+  const failureCommandResult = extractCommandResult(failingEntry);
+  if (failureCommandResult.ok) {
+    throw new Error(
+      "runVerify: failing sub-gate produced a successful CommandResult"
+    );
+  }
+  return {
+    ok: false,
+    results,
+    failure: failureCommandResult
+  };
+}
+function extractCommandResult(entry) {
+  const shell = asShellEvidence(entry.result.evidence);
+  if (shell === null) {
+    throw new Error(
+      `runVerify: expected shell-gate evidence in all-gate child, got ${gateResultDescriptor(entry.result)}`
+    );
+  }
+  return shell.result;
+}
+function gateResultDescriptor(result) {
+  const evidence = result.evidence;
+  return JSON.stringify({
+    ok: result.ok,
+    evidenceKind: evidence?.kind ?? null
+  });
+}
 // src/pilot/verify/touches.ts
 import picomatch2 from "picomatch";
 import { execFile as execFile2 } from "child_process";
@@ -2530,7 +2774,11 @@ async function runOneTaskImpl(deps, task, opts) {
           command: f.command,
           exitCode: f.exitCode,
           output: f.output.slice(0, 4096),
-          reason: reason2
+          reason: reason2,
+          // Step 1 of pilot redesign: gate descriptor on every
+          // verify-derived event. Future LLM/approval gates emit
+          // identically-shaped events with a different `gate.kind`.
+          gate: { kind: "shell", command: f.command }
         }
       });
       return;
@@ -2539,7 +2787,10 @@ async function runOneTaskImpl(deps, task, opts) {
       runId: deps.runId,
       taskId: task.id,
       kind: "task.baseline.passed",
-      payload: { commands: allVerify.length }
+      payload: {
+        commands: allVerify.length,
+        gate: { kind: "all", subKind: "shell", count: baselineVerify.length }
+      }
     });
   }
   let lastFailure = null;
@@ -2695,7 +2946,8 @@ async function runOneTaskImpl(deps, task, opts) {
           exitCode: lastFailure.exitCode,
           timedOut: verifyResult.failure.timedOut,
           aborted: verifyResult.failure.aborted,
-          output: verifyResult.failure.output.slice(-2048)
+          output: verifyResult.failure.output.slice(-2048),
+          gate: { kind: "shell", command: lastFailure.command }
         }
       });
       if (verifyResult.failure.aborted) {
@@ -2721,7 +2973,10 @@ async function runOneTaskImpl(deps, task, opts) {
       runId: deps.runId,
       taskId: task.id,
       kind: "task.verify.passed",
-      payload: { attempt }
+      payload: {
+        attempt,
+        gate: { kind: "all", subKind: "shell", count: allVerify.length }
+      }
     });
     const touches = await enforceTouches({
       cwd,
@@ -3311,7 +3566,7 @@ function startStreamingLogger(args) {
   const taskStart = /* @__PURE__ */ new Map();
   let succeeded = 0;
   let failed = 0;
-  const INLINE_BLOCKED_CAP = 5;
+  const INLINE_BLOCKED_CAP = 0;
   let blockedCount = 0;
   let blockedInlineEmitted = 0;
   let blockedOverflowEmitted = false;
@@ -3350,6 +3605,24 @@ function startStreamingLogger(args) {
         if (id !== null) taskStart.set(id, event.ts);
         write(`task.started ${id ?? "?"}`);
         break;
+      case "task.baseline.passed":
+        break;
+      case "task.baseline.failed": {
+        const bp = event.payload;
+        if (bp !== null && typeof bp === "object" && typeof bp.command === "string" && typeof bp.exitCode === "number") {
+          write(
+            `task.baseline.failed ${id ?? "?"} (${bp.command} \u2192 exit ${bp.exitCode})`
+          );
+          const output = typeof bp.output === "string" ? bp.output : null;
+          if (output !== null && output.trim().length > 0) {
+            const tail = output.trim().split("\n").slice(-6).map((l) => `    ${l}`).join("\n");
+            writeRaw(tail);
+          }
+        } else {
+          write(`task.baseline.failed ${id ?? "?"}`);
+        }
+        break;
+      }
       case "task.verify.passed":
         write(`task.verify.passed ${id ?? "?"}`);
         break;
@@ -3435,7 +3708,7 @@ function startStreamingLogger(args) {
       case "task.attempt": {
         const p = event.payload;
         if (p !== null && typeof p === "object" && typeof p.attempt === "number" && typeof p.of === "number" && p.attempt >= 2) {
-          writeRaw(`  attempt ${p.attempt}/${p.of} (retry with fix prompt)`);
+          write(`task.retry ${id ?? "?"} attempt ${p.attempt}/${p.of}`);
         }
         break;
       }
@@ -3561,9 +3834,17 @@ Failed tasks (${failed.length}):
     session:  ${session}
     worktree: ${worktree}
     elapsed:  ${elapsed}   attempts: ${t.attempts}
 `
         );
+        const baselineOutput = resolveBaselineOutput(db, runId, t.task_id);
+        if (baselineOutput !== null) {
+          const tail = baselineOutput.trim().split("\n").slice(-6).map((l) => `      ${l}`).join("\n");
+          process.stdout.write(`    output:
+${tail}
+`);
+        }
+        process.stdout.write(`
+`);
       }
     }
   }
@@ -3592,6 +3873,18 @@ function resolveFailureDetail(db, runId, row) {
     reason: row.last_error ?? "(no reason recorded)"
   };
 }
+function resolveBaselineOutput(db, runId, taskId) {
+  const events = readEventsDecoded(db, { runId, taskId });
+  for (let i = events.length - 1; i >= 0; i--) {
+    const e = events[i];
+    if (e.kind !== "task.baseline.failed") continue;
+    const p = e.payload;
+    if (p !== null && typeof p === "object" && typeof p.output === "string") {
+      return p.output;
+    }
+  }
+  return null;
+}
 function truncateSummary(s, maxChars) {
   if (s.length <= maxChars) return s;
   return s.slice(0, maxChars - 1) + "\u2026";