npm - gsd-pi - Versions diffs - 2.47.0-dev.04be8c9 → 2.47.0-dev.f2e721d - Mend

gsd-pi 2.47.0-dev.04be8c9 → 2.47.0-dev.f2e721d

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

package/src/resources/extensions/gsd/guided-flow.ts CHANGED Viewed

@@ -35,7 +35,7 @@ import { showProjectInit, offerMigration } from "./init-wizard.js";
 import { validateDirectory } from "./validate-directory.js";
 import { showConfirm } from "../shared/tui.js";
 import { debugLog } from "./debug-logger.js";
-import { findMilestoneIds, nextMilestoneId, reserveMilestoneId, getReservedMilestoneIds } from "./milestone-ids.js";
+import { findMilestoneIds, nextMilestoneId, reserveMilestoneId, getReservedMilestoneIds, clearReservedMilestoneIds } from "./milestone-ids.js";
 import { parkMilestone, discardMilestone } from "./milestone-actions.js";
 import { resolveModelWithFallbacksForUnit } from "./preferences-models.js";
@@ -373,6 +373,9 @@ export async function showHeadlessMilestoneCreation(
   basePath: string,
   seedContext: string,
 ): Promise<void> {
+  // Clear stale reservations from previous cancelled sessions (#2488)
+  clearReservedMilestoneIds();
   // Ensure .gsd/ is bootstrapped
   bootstrapGsdProject(basePath);
@@ -511,9 +514,14 @@ export async function showDiscuss(
   const state = await deriveState(basePath);
-  // Guard: no active milestone
+  // No active milestone — check for pending milestones to discuss instead
   if (!state.activeMilestone) {
-    ctx.ui.notify("No active milestone. Run /gsd to create one first.", "warning");
+    const pendingMilestones = state.registry.filter(m => m.status === "pending");
+    if (pendingMilestones.length === 0) {
+      ctx.ui.notify("No active milestone. Run /gsd to create one first.", "warning");
+      return;
+    }
+    await showDiscussQueuedMilestone(ctx, pi, basePath, pendingMilestones);
     return;
   }
@@ -648,6 +656,17 @@ export async function showDiscuss(
       };
     });
+    // Offer access to queued milestones when any exist
+    const pendingMilestones = state.registry.filter(m => m.status === "pending");
+    if (pendingMilestones.length > 0) {
+      actions.push({
+        id: "discuss_queued_milestone",
+        label: "Discuss a queued milestone",
+        description: `Refine context for ${pendingMilestones.length} queued milestone(s). Does not affect current execution.`,
+        recommended: false,
+      });
+    }
     const choice = await showNextAction(ctx, {
       title: "GSD — Discuss a slice",
       summary: [
@@ -660,6 +679,11 @@ export async function showDiscuss(
     if (choice === "not_yet") return;
+    if (choice === "discuss_queued_milestone") {
+      await showDiscussQueuedMilestone(ctx, pi, basePath, pendingMilestones);
+      return;
+    }
     const chosen = pendingSlices.find(s => s.id === choice);
     if (!chosen) return;
@@ -689,6 +713,79 @@ export async function showDiscuss(
   }
 }
+// ─── Queued Milestone Discussion ─────────────────────────────────────────────
+/**
+ * Show a picker of queued (pending) milestones and dispatch a discuss flow for
+ * the chosen one. Discussing a queued milestone does NOT activate it — it only
+ * refines the CONTEXT.md artifact so it is better prepared when auto-mode
+ * eventually reaches it.
+ */
+async function showDiscussQueuedMilestone(
+  ctx: ExtensionCommandContext,
+  pi: ExtensionAPI,
+  basePath: string,
+  pendingMilestones: Array<{ id: string; title: string; status: string }>,
+): Promise<void> {
+  const actions = pendingMilestones.map((m, i) => {
+    const hasContext = !!resolveMilestoneFile(basePath, m.id, "CONTEXT");
+    const hasDraft = !hasContext && !!resolveMilestoneFile(basePath, m.id, "CONTEXT-DRAFT");
+    const contextStatus = hasContext ? "context ✓" : hasDraft ? "draft context" : "no context yet";
+    return {
+      id: m.id,
+      label: `${m.id}: ${m.title}`,
+      description: `[queued] · ${contextStatus}`,
+      recommended: i === 0,
+    };
+  });
+  const choice = await showNextAction(ctx, {
+    title: "GSD — Discuss a queued milestone",
+    summary: [
+      "Select a queued milestone to discuss.",
+      "Discussing will update its context file. It will not be activated.",
+    ],
+    actions,
+    notYetMessage: "Run /gsd discuss when ready.",
+  });
+  if (choice === "not_yet") return;
+  const chosen = pendingMilestones.find(m => m.id === choice);
+  if (!chosen) return;
+  await dispatchDiscussForMilestone(ctx, pi, basePath, chosen.id, chosen.title);
+}
+/**
+ * Dispatch the guided-discuss-milestone prompt for a milestone without
+ * setting pendingAutoStart — so discussing a queued milestone does not
+ * implicitly activate it when the session ends.
+ */
+async function dispatchDiscussForMilestone(
+  ctx: ExtensionCommandContext,
+  pi: ExtensionAPI,
+  basePath: string,
+  mid: string,
+  milestoneTitle: string,
+): Promise<void> {
+  const draftFile = resolveMilestoneFile(basePath, mid, "CONTEXT-DRAFT");
+  const draftContent = draftFile ? await loadFile(draftFile) : null;
+  const discussMilestoneTemplates = inlineTemplate("context", "Context");
+  const structuredQuestionsAvailable = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false";
+  const basePrompt = loadPrompt("guided-discuss-milestone", {
+    milestoneId: mid,
+    milestoneTitle,
+    inlinedTemplates: discussMilestoneTemplates,
+    structuredQuestionsAvailable,
+    commitInstruction: buildDocsCommitInstruction(`docs(${mid}): milestone context from discuss`),
+  });
+  const prompt = draftContent
+    ? `${basePrompt}\n\n## Prior Discussion (Draft Seed)\n\n${draftContent}`
+    : basePrompt;
+  await dispatchWorkflow(pi, prompt, "gsd-discuss", ctx, "plan-milestone");
+}
 // ─── Smart Entry Point ────────────────────────────────────────────────────────
 /**
@@ -842,6 +939,11 @@ export async function showSmartEntry(
 ): Promise<void> {
   const stepMode = options?.step;
+  // ── Clear stale milestone ID reservations from previous cancelled sessions ──
+  // Reservations only need to survive within a single /gsd interaction.
+  // Without this, each cancelled session permanently bumps the next ID. (#2488)
+  clearReservedMilestoneIds();
   // ── Directory safety check — refuse to operate in system/home dirs ───
   const dirCheck = validateDirectory(basePath);
   if (dirCheck.severity === "blocked") {

package/src/resources/extensions/gsd/prompts/forensics.md CHANGED Viewed

@@ -36,6 +36,8 @@ GSD extension source code is at: `{{gsdSourceDir}}`
 ├── doctor-history.jsonl         — doctor check history
 ├── activity/                    — session activity logs (JSONL per unit)
 │   └── {seq}-{unitType}-{unitId}.jsonl
+├── journal/                     — structured event journal (JSONL per day)
+│   └── YYYY-MM-DD.jsonl
 ├── runtime/
 │   ├── paused-session.json      — serialized session when auto pauses
 │   └── headless-context.md      — headless resume context
@@ -60,6 +62,32 @@ GSD extension source code is at: `{{gsdSourceDir}}`
 - `usage` field on assistant messages: `input`, `output`, `cacheRead`, `cacheWrite`, `totalTokens`, `cost`
 - **To trace a failure**: find the last activity log, search for `isError: true` tool results, then read the agent's reasoning text preceding that error
+### Journal Format (`.gsd/journal/`)
+The journal is a structured event log for auto-mode iterations. Each daily file contains JSONL entries:
+```
+{ ts: "ISO-8601", flowId: "UUID", seq: 0, eventType: "iteration-start", rule?: "rule-name", causedBy?: { flowId, seq }, data?: { unitId, status, ... } }
+```
+**Key event types:**
+- `iteration-start` / `iteration-end` — marks loop iteration boundaries
+- `dispatch-match` / `dispatch-stop` — what the auto-mode decided to do (or not do)
+- `unit-start` / `unit-end` — lifecycle of individual work units
+- `terminal` — auto-mode reached a terminal state (all done, budget exceeded, etc.)
+- `guard-block` — dispatch was blocked by a guard condition (e.g. needs user input)
+- `stuck-detected` — the loop detected it was stuck (same unit repeatedly dispatched)
+- `milestone-transition` — a milestone was promoted or completed
+- `worktree-enter` / `worktree-create-failed` / `worktree-merge-start` / `worktree-merge-failed` — worktree operations
+**Key concepts:**
+- **flowId**: UUID grouping all events in one iteration. Use to reconstruct what happened in a single loop pass.
+- **causedBy**: Cross-reference to a prior event (same or different flow). Enables causal chain tracing.
+- **seq**: Monotonically increasing within a flow. Reconstruct event order within an iteration.
+**To trace a stuck loop**: filter for `stuck-detected` events, then follow `flowId` to see the surrounding dispatch and unit events.
+**To trace a guard block**: filter for `guard-block` events, check `data.reason` for why dispatch was blocked.
 ### Crash Lock Format (`auto.lock`)
 JSON with fields: `pid`, `startedAt`, `unitType`, `unitId`, `unitStartedAt`, `completedUnits`, `sessionFile`
@@ -78,20 +106,24 @@ A unit dispatched more than once (`type/id` appears multiple times) indicates a
 1. **Start with the pre-parsed forensic report** above. The anomaly section contains automated findings — treat these as leads, not conclusions.
-2. **Form hypotheses** about which module and code path is responsible. Use the source map to identify candidate files.
+2. **Check the journal timeline** if present. The journal events show the auto-mode's decision sequence (dispatches, guards, stuck detection, worktree operations). Use flow IDs to group related events and trace causal chains.
+3. **Cross-reference activity logs and journal**. Activity logs show *what the LLM did* (tool calls, reasoning, errors). Journal events show *what auto-mode decided* (dispatch rules, iteration boundaries, state transitions). Together they reveal the full picture.
+4. **Form hypotheses** about which module and code path is responsible. Use the source map to identify candidate files.
-3. **Read the actual GSD source code** at `{{gsdSourceDir}}` to confirm or deny each hypothesis. Do not guess what code does — read it.
+5. **Read the actual GSD source code** at `{{gsdSourceDir}}` to confirm or deny each hypothesis. Do not guess what code does — read it.
-4. **Trace the code path** from the entry point (usually `auto-loop.ts` dispatch or `auto-dispatch.ts`) through to the failure point. Follow function calls across files.
+6. **Trace the code path** from the entry point (usually `auto-loop.ts` dispatch or `auto-dispatch.ts`) through to the failure point. Follow function calls across files.
-5. **Identify the specific file and line** where the bug lives. Determine what kind of defect it is:
+7. **Identify the specific file and line** where the bug lives. Determine what kind of defect it is:
    - Missing edge case / unhandled condition
    - Wrong boolean logic or comparison
    - Race condition or ordering issue
    - State corruption (e.g. completed-units.json out of sync with artifacts)
    - Timeout / recovery logic not triggering correctly
-6. **Clarify if needed.** Use ask_user_questions (max 2 questions) only if the report is genuinely insufficient. Do not ask questions you can answer from the data or source code.
+8. **Clarify if needed.** Use ask_user_questions (max 2 questions) only if the report is genuinely insufficient. Do not ask questions you can answer from the data or source code.
 ## Output

package/src/resources/extensions/gsd/session-forensics.ts CHANGED Viewed

@@ -172,7 +172,17 @@ export function extractTrace(entries: unknown[]): ExecutionTrace {
       }
       if (isError && resultText) {
-        errors.push(resultText.slice(0, 300));
+        // Filter out benign "errors" that are normal during code exploration:
+        // - grep/rg/find returning exit code 1 (no matches) is expected POSIX behavior
+        // - User interrupts (Escape/skip) are intentional, not failures
+        const trimmed = resultText.trim();
+        const isBenignNoMatch = pending?.name === "bash" &&
+          /^\(no output\)\s*\n\s*Command exited with code 1$/m.test(trimmed);
+        const isUserSkip = /^Skipped due to queued user message/i.test(trimmed);
+        if (!isBenignNoMatch && !isUserSkip) {
+          errors.push(resultText.slice(0, 300));
+        }
       }
     }
   }

package/src/resources/extensions/gsd/tests/discuss-queued-milestones.test.ts ADDED Viewed

@@ -0,0 +1,241 @@
+/**
+ * discuss-queued-milestones.test.ts — Tests for #2307.
+ *
+ * /gsd discuss was previously gated on state.activeMilestone, which prevented
+ * users from discussing queued (pending) milestones during roadmap grooming.
+ *
+ * These tests verify:
+ *   1. deriveState correctly identifies pending milestones (the set the picker
+ *      will show when no active milestone is present)
+ *   2. resolveMilestoneFile correctly resolves context artifacts for pending
+ *      milestones so the picker can report their discussion state
+ *   3. The guided-flow.ts source code no longer hard-exits when no active
+ *      milestone exists but pending milestones are present
+ *   4. The helper functions for queued discuss exist in the source
+ */
+import { describe, test, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync, readFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { fileURLToPath } from "node:url";
+import { dirname } from "node:path";
+import { deriveState } from "../state.ts";
+import { invalidateAllCaches } from "../cache.ts";
+import { resolveMilestoneFile } from "../paths.ts";
+// ─── Fixture Helpers ──────────────────────────────────────────────────────────
+function createBase(): string {
+  const base = mkdtempSync(join(tmpdir(), "gsd-discuss-queued-"));
+  mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
+  return base;
+}
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+function writeMilestoneDir(base: string, mid: string): void {
+  mkdirSync(join(base, ".gsd", "milestones", mid), { recursive: true });
+}
+function writeContext(base: string, mid: string, content: string): void {
+  writeMilestoneDir(base, mid);
+  writeFileSync(join(base, ".gsd", "milestones", mid, `${mid}-CONTEXT.md`), content);
+}
+function writeContextDraft(base: string, mid: string, content: string): void {
+  writeMilestoneDir(base, mid);
+  writeFileSync(join(base, ".gsd", "milestones", mid, `${mid}-CONTEXT-DRAFT.md`), content);
+}
+function writeRoadmap(base: string, mid: string, content: string): void {
+  writeMilestoneDir(base, mid);
+  writeFileSync(join(base, ".gsd", "milestones", mid, `${mid}-ROADMAP.md`), content);
+}
+function readGuidedFlowSource(): string {
+  const thisFile = fileURLToPath(import.meta.url);
+  const thisDir = dirname(thisFile);
+  return readFileSync(join(thisDir, "..", "guided-flow.ts"), "utf-8");
+}
+// ─── Tests ────────────────────────────────────────────────────────────────────
+describe("discuss-queued-milestones (#2307)", () => {
+  test("1. pending milestones appear in registry when active milestone exists", async () => {
+    const base = createBase();
+    try {
+      // M001: active — has context + roadmap with a slice
+      writeContext(base, "M001", "# M001: Active\nContext here.");
+      writeRoadmap(base, "M001",
+        "# M001: Active\n\n## Slices\n- [ ] **S01: Do work** `risk:low` `depends:[]`\n  > After this: works\n");
+      // M002: pending — context only, no roadmap
+      writeContext(base, "M002", "# M002: Queued\nFuture work.");
+      // M003: pending — draft context only
+      writeContextDraft(base, "M003", "# M003: Draft\nSeed material.");
+      invalidateAllCaches();
+      const state = await deriveState(base);
+      assert.ok(!!state.activeMilestone, "M001 should be the active milestone");
+      assert.strictEqual(state.activeMilestone?.id, "M001");
+      const pendingIds = state.registry
+        .filter(m => m.status === "pending")
+        .map(m => m.id);
+      assert.ok(pendingIds.includes("M002"), "M002 should be pending");
+      assert.ok(pendingIds.includes("M003"), "M003 should be pending");
+    } finally {
+      cleanup(base);
+    }
+  });
+  test("2. first context-only milestone is active, subsequent ones are pending", async () => {
+    const base = createBase();
+    try {
+      // M001: first milestone with context but no roadmap — deriveState marks it active
+      writeContext(base, "M001", "# M001: First\nContext here.");
+      // M002: will be pending since M001 is active
+      writeContext(base, "M002", "# M002: Second\nMore future work.");
+      invalidateAllCaches();
+      const state = await deriveState(base);
+      // deriveState makes the first unfinished milestone "active" even without a roadmap
+      assert.ok(!!state.activeMilestone, "first milestone should be active");
+      assert.strictEqual(state.activeMilestone?.id, "M001", "M001 is the active milestone");
+      const pendingIds = state.registry
+        .filter(m => m.status === "pending")
+        .map(m => m.id);
+      assert.ok(pendingIds.includes("M002"),
+        "M002 should be pending — it comes after the active M001");
+    } finally {
+      cleanup(base);
+    }
+  });
+  test("3. resolveMilestoneFile finds CONTEXT.md for pending milestone", (t) => {
+    const base = createBase();
+    try {
+      writeContext(base, "M002", "# M002: Queued\nContent.");
+      const contextFile = resolveMilestoneFile(base, "M002", "CONTEXT");
+      assert.ok(contextFile !== null, "resolveMilestoneFile should find CONTEXT.md for M002");
+      assert.ok(contextFile!.endsWith("M002-CONTEXT.md"),
+        "resolved path should point to M002-CONTEXT.md");
+    } finally {
+      cleanup(base);
+    }
+  });
+  test("4. resolveMilestoneFile finds CONTEXT-DRAFT.md for pending milestone", (t) => {
+    const base = createBase();
+    try {
+      writeContextDraft(base, "M003", "# M003: Draft\nSeed content.");
+      const draftFile = resolveMilestoneFile(base, "M003", "CONTEXT-DRAFT");
+      assert.ok(draftFile !== null, "resolveMilestoneFile should find CONTEXT-DRAFT.md for M003");
+      assert.ok(draftFile!.endsWith("M003-CONTEXT-DRAFT.md"),
+        "resolved path should point to M003-CONTEXT-DRAFT.md");
+    } finally {
+      cleanup(base);
+    }
+  });
+  test("5. resolveMilestoneFile returns null when pending milestone has no context", (t) => {
+    const base = createBase();
+    try {
+      writeMilestoneDir(base, "M004");
+      const contextFile = resolveMilestoneFile(base, "M004", "CONTEXT");
+      assert.strictEqual(contextFile, null,
+        "resolveMilestoneFile should return null when no CONTEXT.md exists");
+      const draftFile = resolveMilestoneFile(base, "M004", "CONTEXT-DRAFT");
+      assert.strictEqual(draftFile, null,
+        "resolveMilestoneFile should return null when no CONTEXT-DRAFT.md exists");
+    } finally {
+      cleanup(base);
+    }
+  });
+  test("6. guided-flow no longer hard-exits when no active milestone but pending exist", () => {
+    const source = readGuidedFlowSource();
+    // The old guard was a simple early-exit:
+    //   if (!state.activeMilestone) {
+    //     ctx.ui.notify("No active milestone. Run /gsd to create one first.", "warning");
+    //     return;
+    //   }
+    //
+    // The new guard should check for pending milestones and route instead.
+    const oldGuardPattern = /if\s*\(!state\.activeMilestone\)\s*\{\s*ctx\.ui\.notify\("No active milestone/;
+    assert.ok(
+      !oldGuardPattern.test(source),
+      "guided-flow must not unconditionally exit when activeMilestone is null",
+    );
+  });
+  test("7. showDiscussQueuedMilestone helper exists in guided-flow", () => {
+    const source = readGuidedFlowSource();
+    assert.ok(
+      source.includes("showDiscussQueuedMilestone"),
+      "guided-flow must export showDiscussQueuedMilestone helper",
+    );
+  });
+  test("8. dispatchDiscussForMilestone helper exists in guided-flow", () => {
+    const source = readGuidedFlowSource();
+    assert.ok(
+      source.includes("dispatchDiscussForMilestone"),
+      "guided-flow must export dispatchDiscussForMilestone helper",
+    );
+  });
+  test("9. dispatchDiscussForMilestone does not set pendingAutoStart", () => {
+    const source = readGuidedFlowSource();
+    // Extract the dispatchDiscussForMilestone function body
+    const fnMatch = source.match(
+      /async function dispatchDiscussForMilestone\s*\([^)]*\)[^{]*\{([\s\S]*?)\n\}/,
+    );
+    assert.ok(!!fnMatch, "dispatchDiscussForMilestone function body must be present");
+    if (fnMatch) {
+      assert.ok(
+        !fnMatch[1].includes("pendingAutoStart"),
+        "dispatchDiscussForMilestone must NOT set pendingAutoStart — discussing a queued milestone must not activate it",
+      );
+    }
+  });
+  test("10. slice picker includes queued milestone option when pending milestones exist", () => {
+    const source = readGuidedFlowSource();
+    assert.ok(
+      source.includes("discuss_queued_milestone"),
+      "slice picker must include a 'discuss_queued_milestone' action id for queued milestones",
+    );
+    assert.ok(
+      source.includes("Discuss a queued milestone"),
+      "slice picker must label the queued milestone action clearly",
+    );
+  });
+  test("11. queued milestone picker labels entries with [queued]", () => {
+    const source = readGuidedFlowSource();
+    assert.ok(
+      source.includes("[queued]"),
+      "queued milestone picker must label entries with [queued] to distinguish from active",
+    );
+  });
+});

package/src/resources/extensions/gsd/tests/forensics-error-filter.test.ts ADDED Viewed

@@ -0,0 +1,121 @@
+/**
+ * Regression test for #2539: extractTrace should not count benign bash
+ * exit-code-1 (grep no-match) or user skips as errors.
+ */
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { extractTrace } from "../session-forensics.ts";
+/**
+ * Build a minimal JSONL entry pair: assistant tool_use → toolResult.
+ * This is the shape extractTrace() expects from session activity files.
+ */
+function makeToolPair(
+  toolName: string,
+  input: Record<string, unknown>,
+  resultText: string,
+  isError: boolean,
+): unknown[] {
+  const toolCallId = `toolu_${Math.random().toString(36).slice(2, 10)}`;
+  return [
+    {
+      type: "message",
+      message: {
+        role: "assistant",
+        content: [
+          {
+            type: "toolCall",
+            id: toolCallId,
+            name: toolName,
+            arguments: input,
+          },
+        ],
+      },
+    },
+    {
+      type: "message",
+      message: {
+        role: "toolResult",
+        toolCallId,
+        toolName,
+        isError,
+        content: [{ type: "text", text: resultText }],
+      },
+    },
+  ];
+}
+describe("extractTrace error filtering (#2539)", () => {
+  test("grep exit-code-1 (no matches) is not counted as an error", () => {
+    const entries = makeToolPair(
+      "bash",
+      { command: "grep -rn 'nonexistent' src/" },
+      "(no output)\nCommand exited with code 1",
+      true,
+    );
+    const trace = extractTrace(entries);
+    assert.equal(trace.errors.length, 0, "grep no-match should not be an error");
+  });
+  test("user skip is not counted as an error", () => {
+    const entries = makeToolPair(
+      "bash",
+      { command: "npm run test" },
+      "Skipped due to queued user message",
+      true,
+    );
+    const trace = extractTrace(entries);
+    assert.equal(trace.errors.length, 0, "user skip should not be an error");
+  });
+  test("real bash error is still counted", () => {
+    const entries = makeToolPair(
+      "bash",
+      { command: "cat /nonexistent" },
+      "cat: /nonexistent: No such file or directory\nCommand exited with code 1",
+      true,
+    );
+    const trace = extractTrace(entries);
+    assert.equal(trace.errors.length, 1, "real error should still be counted");
+    assert.match(trace.errors[0], /No such file or directory/);
+  });
+  test("non-bash tool error is still counted", () => {
+    const entries = makeToolPair(
+      "edit",
+      { path: "foo.ts", oldText: "x", newText: "y" },
+      "oldText not found in file",
+      true,
+    );
+    const trace = extractTrace(entries);
+    assert.equal(trace.errors.length, 1, "non-bash tool errors should still be counted");
+  });
+  test("mixed entries: only real errors are counted", () => {
+    const entries = [
+      // benign grep no-match
+      ...makeToolPair("bash", { command: "grep -rn 'pattern' src/" }, "(no output)\nCommand exited with code 1", true),
+      // user skip
+      ...makeToolPair("bash", { command: "npm test" }, "Skipped due to queued user message", true),
+      // real error
+      ...makeToolPair("bash", { command: "node broken.js" }, "SyntaxError: Unexpected token\nCommand exited with code 1", true),
+      // successful command (not an error)
+      ...makeToolPair("bash", { command: "echo hello" }, "hello", false),
+    ];
+    const trace = extractTrace(entries);
+    assert.equal(trace.errors.length, 1, "only the real error should be counted");
+    assert.match(trace.errors[0], /SyntaxError/);
+  });
+  test("exit code 1 with actual output is still an error", () => {
+    const entries = makeToolPair(
+      "bash",
+      { command: "npm run lint" },
+      "src/foo.ts:10:5 - error TS2304: Cannot find name 'x'\nCommand exited with code 1",
+      true,
+    );
+    const trace = extractTrace(entries);
+    assert.equal(trace.errors.length, 1, "lint error with output should be counted");
+  });
+});