npm - gsd-pi - Versions diffs - 2.22.0 → 2.24.0 - Mend

gsd-pi 2.22.0 → 2.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (228) hide show

package/src/resources/extensions/gsd/auto.ts CHANGED Viewed

@@ -108,6 +108,7 @@ import {
   autoWorktreeBranch,
 } from "./auto-worktree.js";
 import { pruneQueueOrder } from "./queue-order.js";
+import { consumeSignal } from "./session-status-io.js";
 import { showNextAction } from "../shared/next-action-ui.js";
 import { debugLog, debugTime, debugCount, debugPeak, enableDebug, isDebugEnabled, writeDebugSummary, getDebugLogPath } from "./debug-logger.js";
 import {
@@ -125,6 +126,18 @@ import {
   reconcileMergeState,
 } from "./auto-recovery.js";
 import { resolveDispatch, resetRewriteCircuitBreaker } from "./auto-dispatch.js";
+import {
+  buildResearchSlicePrompt,
+  buildResearchMilestonePrompt,
+  buildPlanSlicePrompt,
+  buildPlanMilestonePrompt,
+  buildExecuteTaskPrompt,
+  buildCompleteSlicePrompt,
+  buildCompleteMilestonePrompt,
+  buildReassessRoadmapPrompt,
+  buildRunUatPrompt,
+  buildReplanSlicePrompt,
+} from "./auto-prompts.js";
 import {
   type AutoDashboardData,
   updateProgressWidget as _updateProgressWidget,
@@ -183,6 +196,35 @@ function syncStateToProjectRoot(worktreePath: string, projectRoot: string, miles
       cpSync(srcMilestone, dstMilestone, { recursive: true, force: true });
     }
   } catch { /* non-fatal */ }
+  // 3. Merge completed-units.json (set-union of both locations)
+  // Prevents already-completed units from being re-dispatched after crash/restart.
+  const srcKeysFile = join(wtGsd, "completed-units.json");
+  const dstKeysFile = join(prGsd, "completed-units.json");
+  if (existsSync(srcKeysFile)) {
+    try {
+      const srcKeys: string[] = JSON.parse(readFileSync(srcKeysFile, "utf8"));
+      let dstKeys: string[] = [];
+      if (existsSync(dstKeysFile)) {
+        try { dstKeys = JSON.parse(readFileSync(dstKeysFile, "utf8")); } catch { /* ignore corrupt dst */ }
+      }
+      const merged = [...new Set([...dstKeys, ...srcKeys])];
+      writeFileSync(dstKeysFile, JSON.stringify(merged, null, 2));
+    } catch { /* non-fatal */ }
+  }
+  // 4. Runtime records — unit dispatch state used by selfHealRuntimeRecords().
+  // Without this, a crash during a unit leaves the runtime record only in the
+  // worktree. If the next session resolves basePath before worktree re-entry,
+  // selfHeal can't find or clear the stale record (#769).
+  try {
+    const srcRuntime = join(wtGsd, "runtime", "units");
+    const dstRuntime = join(prGsd, "runtime", "units");
+    if (existsSync(srcRuntime)) {
+      mkdirSync(dstRuntime, { recursive: true });
+      cpSync(srcRuntime, dstRuntime, { recursive: true, force: true });
+    }
+  } catch { /* non-fatal */ }
 }
 // ─── State ────────────────────────────────────────────────────────────────────
@@ -211,6 +253,11 @@ const MAX_LIFETIME_DISPATCHES = 6;
 /** Tracks recovery attempt count per unit for backoff and diagnostics. */
 const unitRecoveryCount = new Map<string, number>();
+/** Track consecutive skips per unit — catches infinite skip loops where deriveState
+ *  keeps returning the same already-completed unit. Reset on any real dispatch. */
+const unitConsecutiveSkips = new Map<string, number>();
+const MAX_CONSECUTIVE_SKIPS = 3;
 /** Persisted completed-unit keys — survives restarts. Loaded from .gsd/completed-units.json. */
 const completedKeySet = new Set<string>();
@@ -297,6 +344,9 @@ let currentUnit: { type: string; id: string; startedAt: number } | null = null;
 /** Track dynamic routing decision for the current unit (for metrics) */
 let currentUnitRouting: { tier: string; modelDowngraded: boolean } | null = null;
+/** Queue of quick-task captures awaiting dispatch after triage resolution */
+let pendingQuickTasks: import("./captures.js").CaptureEntry[] = [];
 /**
  * Model captured at auto-mode start. Used to prevent model bleed between
  * concurrent GSD instances sharing the same global settings.json (#650).
@@ -334,14 +384,19 @@ let lastBaselineCharCount: number | undefined;
 /** SIGTERM handler registered while auto-mode is active — cleared on stop/pause. */
 let _sigtermHandler: (() => void) | null = null;
-/** Tool calls currently being executed — prevents false idle detection during long-running tools. */
-const inFlightTools = new Set<string>();
+/**
+ * Tool calls currently being executed — prevents false idle detection during long-running tools.
+ * Maps toolCallId → start timestamp (ms) so the idle watchdog can detect tools that have been
+ * running suspiciously long (e.g., a Bash command hung because `&` kept stdout open).
+ */
+const inFlightTools = new Map<string, number>();
-type BudgetAlertLevel = 0 | 75 | 90 | 100;
+type BudgetAlertLevel = 0 | 75 | 80 | 90 | 100;
 export function getBudgetAlertLevel(budgetPct: number): BudgetAlertLevel {
   if (budgetPct >= 1.0) return 100;
   if (budgetPct >= 0.90) return 90;
+  if (budgetPct >= 0.80) return 80;
   if (budgetPct >= 0.75) return 75;
   return 0;
 }
@@ -414,11 +469,11 @@ export function isAutoPaused(): boolean {
 /**
  * Mark a tool execution as in-flight. Called from index.ts on tool_execution_start.
- * Prevents the idle watchdog from declaring the agent idle while tools are executing.
+ * Records start time so the idle watchdog can detect tools hung longer than the idle timeout.
  */
 export function markToolStart(toolCallId: string): void {
   if (!active) return;
-  inFlightTools.add(toolCallId);
+  inFlightTools.set(toolCallId, Date.now());
 }
 /**
@@ -428,6 +483,16 @@ export function markToolEnd(toolCallId: string): void {
   inFlightTools.delete(toolCallId);
 }
+/**
+ * Returns the age (ms) of the oldest currently in-flight tool, or 0 if none.
+ * Exported for testing.
+ */
+export function getOldestInFlightToolAgeMs(): number {
+  if (inFlightTools.size === 0) return 0;
+  const oldestStart = Math.min(...inFlightTools.values());
+  return Date.now() - oldestStart;
+}
 /**
  * Return the base path to use for the auto.lock file.
  * Always uses the original project root (not the worktree) so that
@@ -518,11 +583,7 @@ function startDispatchGapWatchdog(ctx: ExtensionContext, pi: ExtensionAPI): void
       await dispatchNextUnit(ctx, pi);
     } catch (retryErr) {
       const message = retryErr instanceof Error ? retryErr.message : String(retryErr);
-      ctx.ui.notify(
-        `Dispatch gap recovery failed: ${message}. Stopping auto-mode.`,
-        "error",
-      );
-      await stopAuto(ctx, pi);
+      await stopAuto(ctx, pi, `Dispatch gap recovery failed: ${message}`);
       return;
     }
@@ -530,17 +591,14 @@ function startDispatchGapWatchdog(ctx: ExtensionContext, pi: ExtensionAPI): void
     // (no sendMessage called → no timeout set), auto-mode is permanently
     // stalled. Stop cleanly instead of leaving it active but idle (#537).
     if (active && !unitTimeoutHandle && !wrapupWarningHandle) {
-      ctx.ui.notify(
-        "Auto-mode stalled — no dispatchable unit found after retry. Stopping. Run /gsd auto to restart.",
-        "warning",
-      );
-      await stopAuto(ctx, pi);
+      await stopAuto(ctx, pi, "Stalled — no dispatchable unit after retry");
     }
   }, DISPATCH_GAP_TIMEOUT_MS);
 }
-export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI): Promise<void> {
+export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI, reason?: string): Promise<void> {
   if (!active && !paused) return;
+  const reasonSuffix = reason ? ` — ${reason}` : "";
   clearUnitTimeout();
   if (lockBase()) clearLock(lockBase());
   clearSkillSnapshot();
@@ -592,11 +650,11 @@ export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI): Promi
   if (ledger && ledger.units.length > 0) {
     const totals = getProjectTotals(ledger.units);
     ctx?.ui.notify(
-      `Auto-mode stopped. Session: ${formatCost(totals.cost)} · ${formatTokenCount(totals.tokens.total)} tokens · ${ledger.units.length} units`,
+      `Auto-mode stopped${reasonSuffix}. Session: ${formatCost(totals.cost)} · ${formatTokenCount(totals.tokens.total)} tokens · ${ledger.units.length} units`,
       "info",
     );
   } else {
-    ctx?.ui.notify("Auto-mode stopped.", "info");
+    ctx?.ui.notify(`Auto-mode stopped${reasonSuffix}.`, "info");
   }
   // Sync disk state so next resume starts from accurate state
@@ -621,6 +679,7 @@ export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI): Promi
   stepMode = false;
   unitDispatchCount.clear();
   unitRecoveryCount.clear();
+  unitConsecutiveSkips.clear();
   inFlightTools.clear();
   lastBudgetAlertLevel = 0;
   unitLifetimeDispatches.clear();
@@ -629,6 +688,7 @@ export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI): Promi
   currentMilestoneId = null;
   originalBasePath = "";
   completedUnits = [];
+  pendingQuickTasks = [];
   clearSliceProgressCache();
   clearActivityLogState();
   resetProactiveHealing();
@@ -710,6 +770,7 @@ export async function startAuto(
     basePath = base;
     unitDispatchCount.clear();
     unitLifetimeDispatches.clear();
+    unitConsecutiveSkips.clear();
     // Re-initialize metrics in case ledger was lost during pause
     if (!getLedger()) initMetrics(base);
     // Ensure milestone ID is set on git service for integration branch resolution
@@ -782,6 +843,9 @@ export async function startAuto(
       pausedSessionFile = null;
     }
+    // Write lock on resume so cross-process status detection works (#723).
+    writeLock(lockBase(), "resuming", currentMilestoneId ?? "unknown", completedUnits.length);
     await dispatchNextUnit(ctx, pi);
     return;
   }
@@ -988,6 +1052,7 @@ export async function startAuto(
   basePath = base;
   unitDispatchCount.clear();
   unitRecoveryCount.clear();
+  unitConsecutiveSkips.clear();
   lastBudgetAlertLevel = 0;
   unitLifetimeDispatches.clear();
   completedKeySet.clear();
@@ -998,6 +1063,7 @@ export async function startAuto(
   autoStartTime = Date.now();
   resourceSyncedAtOnStart = readResourceSyncedAt();
   completedUnits = [];
+  pendingQuickTasks = [];
   currentUnit = null;
   currentMilestoneId = state.activeMilestone?.id ?? null;
   originalModelId = ctx.model?.id ?? null;
@@ -1052,6 +1118,13 @@ export async function startAuto(
       }
       // Re-register SIGTERM handler with the original basePath (lock lives there)
       registerSigtermHandler(originalBasePath);
+      // After worktree entry, load completed keys from BOTH locations (project root
+      // + worktree) so the in-memory set is the union. Prevents re-dispatch of units
+      // completed in either location after crash/restart (#769).
+      if (basePath !== originalBasePath) {
+        loadPersistedKeys(basePath, completedKeySet);
+      }
     } catch (err) {
       // Worktree creation is non-fatal — continue in the project root.
       ctx.ui.notify(
@@ -1088,11 +1161,12 @@ export async function startAuto(
     }
   }
-  // Initialize metrics — loads existing ledger from disk
-  initMetrics(base);
+  // Initialize metrics — loads existing ledger from disk.
+  // Use basePath (not base) so worktree-mode reads the worktree ledger (#769).
+  initMetrics(basePath);
   // Initialize routing history for adaptive learning
-  initRoutingHistory(base);
+  initRoutingHistory(basePath);
   // Capture the session's current model at auto-mode start (#650).
   // This prevents model bleed when multiple GSD instances share the
@@ -1116,6 +1190,11 @@ export async function startAuto(
     : "Will loop until milestone complete.";
   ctx.ui.notify(`${modeLabel} started. ${scopeMsg}`, "info");
+  // Write initial lock file immediately so cross-process status detection
+  // works even before the first unit is dispatched (#723).
+  // The lock is updated with unit-specific info on each dispatch and cleared on stop.
+  writeLock(lockBase(), "starting", currentMilestoneId ?? "unknown", 0);
   // Secrets collection gate — collect pending secrets before first dispatch
   const mid = state.activeMilestone!.id;
   try {
@@ -1138,8 +1217,10 @@ export async function startAuto(
     );
   }
-  // Self-heal: clear stale runtime records where artifacts already exist
-  await selfHealRuntimeRecords(base, ctx, completedKeySet);
+  // Self-heal: clear stale runtime records where artifacts already exist.
+  // Use basePath (not base) — in worktree mode, basePath points to the worktree
+  // where runtime records and artifacts actually live (#769).
+  await selfHealRuntimeRecords(basePath, ctx, completedKeySet);
   // Self-heal: remove stale .git/index.lock from prior crash.
   // A stale lock file blocks all git operations (commit, merge, checkout).
@@ -1205,6 +1286,27 @@ export async function handleAgentEnd(
   // Unit completed — clear its timeout
   clearUnitTimeout();
+    // ── Parallel worker signal check ─────────────────────────────────────
+    // When running as a parallel worker (GSD_MILESTONE_LOCK set), check for
+    // coordinator signals before dispatching the next unit.
+    const milestoneLock = process.env.GSD_MILESTONE_LOCK;
+    if (milestoneLock) {
+      const signal = consumeSignal(basePath, milestoneLock);
+      if (signal) {
+        if (signal.signal === "stop") {
+          _handlingAgentEnd = false;
+          await stopAuto(ctx, pi);
+          return;
+        }
+        if (signal.signal === "pause") {
+          _handlingAgentEnd = false;
+          await pauseAuto(ctx, pi);
+          return;
+        }
+        // "resume" and "rebase" signals are handled elsewhere or no-op here
+      }
+    }
   // Invalidate all caches — the unit just completed and may have
   // written planning files (task summaries, roadmap checkboxes, etc.)
   invalidateAllCaches();
@@ -1297,6 +1399,53 @@ export async function handleAgentEnd(
       }
     }
+    // ── Post-triage: execute actionable resolutions (inject, replan, queue quick-tasks) ──
+    // After a triage-captures unit completes, the LLM has classified captures and
+    // updated CAPTURES.md. Now we execute those classifications: inject tasks into
+    // the plan, write replan triggers, and queue quick-tasks for dispatch.
+    if (currentUnit.type === "triage-captures") {
+      try {
+        const { executeTriageResolutions } = await import("./triage-resolution.js");
+        const state = await deriveState(basePath);
+        const mid = state.activeMilestone?.id;
+        const sid = state.activeSlice?.id;
+        if (mid && sid) {
+          const triageResult = executeTriageResolutions(basePath, mid, sid);
+          if (triageResult.injected > 0) {
+            ctx.ui.notify(
+              `Triage: injected ${triageResult.injected} task${triageResult.injected === 1 ? "" : "s"} into ${sid} plan.`,
+              "info",
+            );
+          }
+          if (triageResult.replanned > 0) {
+            ctx.ui.notify(
+              `Triage: replan trigger written for ${sid} — next dispatch will enter replanning.`,
+              "info",
+            );
+          }
+          if (triageResult.quickTasks.length > 0) {
+            // Queue quick-tasks for dispatch. They'll be picked up by the
+            // quick-task dispatch block below the triage check.
+            for (const qt of triageResult.quickTasks) {
+              pendingQuickTasks.push(qt);
+            }
+            ctx.ui.notify(
+              `Triage: ${triageResult.quickTasks.length} quick-task${triageResult.quickTasks.length === 1 ? "" : "s"} queued for execution.`,
+              "info",
+            );
+          }
+          for (const action of triageResult.actions) {
+            process.stderr.write(`gsd-triage: ${action}\n`);
+          }
+        }
+      } catch (err) {
+        // Non-fatal — triage resolution failure shouldn't block dispatch
+        process.stderr.write(`gsd-triage: resolution execution failed: ${(err as Error).message}\n`);
+      }
+    }
     // ── Path A fix: verify artifact and persist completion before re-entering dispatch ──
     // After doctor + rebuildState, check whether the just-completed unit actually
     // produced its expected artifact. If so, persist the completion key now so the
@@ -1391,7 +1540,7 @@ export async function handleAgentEnd(
       const result = await cmdCtx!.newSession();
       if (result.cancelled) {
         resetHookState();
-        await stopAuto(ctx, pi);
+        await stopAuto(ctx, pi, "Hook session cancelled");
         return;
       }
       const sessionFile = ctx.sessionManager.getSessionFile();
@@ -1521,7 +1670,7 @@ export async function handleAgentEnd(
               return;
             }
             const sessionFile = ctx.sessionManager.getSessionFile();
-            writeLock(basePath, triageUnitType, triageUnitId, completedUnits.length, sessionFile);
+            writeLock(lockBase(), triageUnitType, triageUnitId, completedUnits.length, sessionFile);
             // Start unit timeout for triage (use same supervisor config as hooks)
             clearUnitTimeout();
@@ -1551,6 +1700,85 @@ export async function handleAgentEnd(
     }
   }
+  // ── Quick-task dispatch: execute queued quick-tasks from triage resolution ──
+  // Quick-tasks are self-contained one-off tasks that don't modify the plan.
+  // They're queued during post-triage resolution and dispatched here one at a time.
+  if (
+    !stepMode &&
+    pendingQuickTasks.length > 0 &&
+    currentUnit &&
+    currentUnit.type !== "quick-task"
+  ) {
+    try {
+      const capture = pendingQuickTasks.shift()!;
+      const { buildQuickTaskPrompt } = await import("./triage-resolution.js");
+      const { markCaptureExecuted } = await import("./captures.js");
+      const prompt = buildQuickTaskPrompt(capture);
+      ctx.ui.notify(
+        `Executing quick-task: ${capture.id} — "${capture.text}"`,
+        "info",
+      );
+      // Close out previous unit metrics
+      if (currentUnit) {
+        const modelId = ctx.model?.id ?? "unknown";
+        snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId);
+        saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
+      }
+      // Dispatch quick-task as a new unit
+      const qtUnitType = "quick-task";
+      const qtUnitId = `${currentMilestoneId}/${capture.id}`;
+      const qtStartedAt = Date.now();
+      currentUnit = { type: qtUnitType, id: qtUnitId, startedAt: qtStartedAt };
+      writeUnitRuntimeRecord(basePath, qtUnitType, qtUnitId, qtStartedAt, {
+        phase: "dispatched",
+        wrapupWarningSent: false,
+        timeoutAt: null,
+        lastProgressAt: qtStartedAt,
+        progressCount: 0,
+        lastProgressKind: "dispatch",
+      });
+      const state = await deriveState(basePath);
+      updateProgressWidget(ctx, qtUnitType, qtUnitId, state);
+      const result = await cmdCtx!.newSession();
+      if (result.cancelled) {
+        await stopAuto(ctx, pi);
+        return;
+      }
+      const sessionFile = ctx.sessionManager.getSessionFile();
+      writeLock(lockBase(), qtUnitType, qtUnitId, completedUnits.length, sessionFile);
+      // Mark capture as executed now that the unit is dispatched
+      markCaptureExecuted(basePath, capture.id);
+      // Start unit timeout for quick-task
+      clearUnitTimeout();
+      const supervisor = resolveAutoSupervisorConfig();
+      const qtTimeoutMs = (supervisor.hard_timeout_minutes ?? 30) * 60 * 1000;
+      unitTimeoutHandle = setTimeout(async () => {
+        unitTimeoutHandle = null;
+        if (!active) return;
+        ctx.ui.notify(
+          `Quick-task ${capture.id} exceeded timeout. Pausing auto-mode.`,
+          "warning",
+        );
+        await pauseAuto(ctx, pi);
+      }, qtTimeoutMs);
+      if (!active) return;
+      pi.sendMessage(
+        { customType: "gsd-auto", content: prompt, display: verbose },
+        { triggerTurn: true },
+      );
+      return; // handleAgentEnd will fire again when quick-task session completes
+    } catch {
+      // Non-fatal — proceed to normal dispatch
+    }
+  }
   // In step mode, pause and show a wizard instead of immediately dispatching
   if (stepMode) {
     await showStepWizard(ctx, pi);
@@ -1610,7 +1838,15 @@ async function showStepWizard(
   // If no active milestone or everything is complete, stop
   if (!mid || state.phase === "complete") {
-    await stopAuto(ctx, pi);
+    const incomplete = state.registry.filter(m => m.status !== "complete");
+    if (incomplete.length > 0 && state.phase !== "complete" && state.phase !== "blocked") {
+      const ids = incomplete.map(m => m.id).join(", ");
+      const diag = `basePath=${basePath}, milestones=[${state.registry.map(m => `${m.id}:${m.status}`).join(", ")}], phase=${state.phase}`;
+      ctx.ui.notify(`Unexpected: ${incomplete.length} incomplete milestone(s) (${ids}) but no active milestone.\n   Diagnostic: ${diag}`, "error");
+      await stopAuto(ctx, pi, `No active milestone — ${incomplete.length} incomplete (${ids})`);
+    } else {
+      await stopAuto(ctx, pi, state.phase === "complete" ? "All work complete" : "No active milestone");
+    }
     return;
   }
@@ -1733,8 +1969,7 @@ async function dispatchNextUnit(
   // doesn't provide. Stop gracefully instead of crashing.
   const staleMsg = checkResourcesStale();
   if (staleMsg) {
-    await stopAuto(ctx, pi);
-    ctx.ui.notify(staleMsg, "error");
+    await stopAuto(ctx, pi, staleMsg);
     return;
   }
@@ -1788,6 +2023,7 @@ async function dispatchNextUnit(
     // Reset stuck detection for new milestone
     unitDispatchCount.clear();
     unitRecoveryCount.clear();
+  unitConsecutiveSkips.clear();
     unitLifetimeDispatches.clear();
     // Clear completed-units.json for the finished milestone
     try {
@@ -1880,8 +2116,25 @@ async function dispatchNextUnit(
       snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
       saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
     }
-    sendDesktopNotification("GSD", "All milestones complete!", "success", "milestone");
-    await stopAuto(ctx, pi);
+    const incomplete = state.registry.filter(m => m.status !== "complete");
+    if (incomplete.length === 0) {
+      // Genuinely all complete
+      sendDesktopNotification("GSD", "All milestones complete!", "success", "milestone");
+      await stopAuto(ctx, pi, "All milestones complete");
+    } else if (state.phase === "blocked") {
+      // Milestones exist but are dependency-blocked
+      const blockerMsg = `Blocked: ${state.blockers.join(", ")}`;
+      await stopAuto(ctx, pi, blockerMsg);
+      ctx.ui.notify(`${blockerMsg}. Fix and run /gsd auto.`, "warning");
+      sendDesktopNotification("GSD", blockerMsg, "error", "attention");
+    } else {
+      // Milestones with remaining work exist but none became active — unexpected
+      const ids = incomplete.map(m => m.id).join(", ");
+      const diag = `basePath=${basePath}, milestones=[${state.registry.map(m => `${m.id}:${m.status}`).join(", ")}], phase=${state.phase}`;
+      ctx.ui.notify(`Unexpected: ${incomplete.length} incomplete milestone(s) (${ids}) but no active milestone.\n   Diagnostic: ${diag}`, "error");
+      await stopAuto(ctx, pi, `No active milestone — ${incomplete.length} incomplete (${ids}), see diagnostic above`);
+    }
     return;
   }
@@ -1889,8 +2142,8 @@ async function dispatchNextUnit(
   // The !mid check above returns early if mid is falsy; midTitle comes from
   // the same object so it should always be present when mid is.
   if (!midTitle) {
-    await stopAuto(ctx, pi);
-    return;
+    midTitle = mid; // Defensive fallback: use milestone ID as title
+    ctx.ui.notify(`Milestone ${mid} has no title in roadmap — using ID as fallback.`, "warning");
   }
   // ── Mid-merge safety check: detect leftover merge state from a prior session ──
@@ -1908,7 +2161,10 @@ async function dispatchNextUnit(
       snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
       saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
     }
-    await stopAuto(ctx, pi);
+    const noMilestoneReason = !mid
+      ? "No active milestone after merge reconciliation"
+      : `Milestone ${mid} has no title after reconciliation`;
+    await stopAuto(ctx, pi, noMilestoneReason);
     return;
   }
@@ -1983,7 +2239,7 @@ async function dispatchNextUnit(
       }
     }
     sendDesktopNotification("GSD", `Milestone ${mid} complete!`, "success", "milestone");
-    await stopAuto(ctx, pi);
+    await stopAuto(ctx, pi, `Milestone ${mid} complete`);
     return;
   }
@@ -1993,8 +2249,8 @@ async function dispatchNextUnit(
       snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
       saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
     }
-    await stopAuto(ctx, pi);
     const blockerMsg = `Blocked: ${state.blockers.join(", ")}`;
+    await stopAuto(ctx, pi, blockerMsg);
     ctx.ui.notify(`${blockerMsg}. Fix and run /gsd auto.`, "warning");
     sendDesktopNotification("GSD", blockerMsg, "error", "attention");
     return;
@@ -2020,9 +2276,8 @@ async function dispatchNextUnit(
       const msg = `Budget ceiling ${formatCost(budgetCeiling)} reached (spent ${formatCost(totalCost)}).`;
       lastBudgetAlertLevel = newBudgetAlertLevel;
       if (budgetEnforcementAction === "halt") {
-        ctx.ui.notify(`${msg} Stopping auto-mode.`, "error");
         sendDesktopNotification("GSD", msg, "error", "budget");
-        await stopAuto(ctx, pi);
+        await stopAuto(ctx, pi, "Budget ceiling reached");
         return;
       }
       if (budgetEnforcementAction === "pause") {
@@ -2037,6 +2292,10 @@ async function dispatchNextUnit(
       lastBudgetAlertLevel = newBudgetAlertLevel;
       ctx.ui.notify(`Budget 90%: ${formatCost(totalCost)} / ${formatCost(budgetCeiling)}`, "warning");
       sendDesktopNotification("GSD", `Budget 90%: ${formatCost(totalCost)} / ${formatCost(budgetCeiling)}`, "warning", "budget");
+    } else if (newBudgetAlertLevel === 80) {
+      lastBudgetAlertLevel = newBudgetAlertLevel;
+      ctx.ui.notify(`Approaching budget ceiling — 80%: ${formatCost(totalCost)} / ${formatCost(budgetCeiling)}`, "warning");
+      sendDesktopNotification("GSD", `Approaching budget ceiling — 80%: ${formatCost(totalCost)} / ${formatCost(budgetCeiling)}`, "warning", "budget");
     } else if (newBudgetAlertLevel === 75) {
       lastBudgetAlertLevel = newBudgetAlertLevel;
       ctx.ui.notify(`Budget 75%: ${formatCost(totalCost)} / ${formatCost(budgetCeiling)}`, "info");
@@ -2101,8 +2360,7 @@ async function dispatchNextUnit(
       snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId, { promptCharCount: lastPromptCharCount, baselineCharCount: lastBaselineCharCount, ...(currentUnitRouting ?? {}) });
       saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
     }
-    await stopAuto(ctx, pi);
-    ctx.ui.notify(dispatchResult.reason, dispatchResult.level);
+    await stopAuto(ctx, pi, dispatchResult.reason);
     return;
   }
@@ -2142,8 +2400,7 @@ async function dispatchNextUnit(
   const priorSliceBlocker = getPriorSliceCompletionBlocker(basePath, getMainBranch(basePath), unitType, unitId);
   if (priorSliceBlocker) {
-    await stopAuto(ctx, pi);
-    ctx.ui.notify(priorSliceBlocker, "error");
+    await stopAuto(ctx, pi, priorSliceBlocker);
     return;
   }
@@ -2155,6 +2412,26 @@ async function dispatchNextUnit(
     // Cross-validate: does the expected artifact actually exist?
     const artifactExists = verifyExpectedArtifact(unitType, unitId, basePath);
     if (artifactExists) {
+      // Guard against infinite skip loops: if deriveState keeps returning the
+      // same completed unit, consecutive skips will trip this breaker. Evict the
+      // key so the next dispatch forces full reconciliation instead of looping.
+      const skipCount = (unitConsecutiveSkips.get(idempotencyKey) ?? 0) + 1;
+      unitConsecutiveSkips.set(idempotencyKey, skipCount);
+      if (skipCount > MAX_CONSECUTIVE_SKIPS) {
+        unitConsecutiveSkips.delete(idempotencyKey);
+        completedKeySet.delete(idempotencyKey);
+        removePersistedKey(basePath, idempotencyKey);
+        invalidateStateCache();
+        ctx.ui.notify(
+          `Skip loop detected: ${unitType} ${unitId} skipped ${skipCount} times without advancing. Evicting completion record and forcing reconciliation.`,
+          "warning",
+        );
+        _skipDepth++;
+        await new Promise(r => setTimeout(r, 50));
+        await dispatchNextUnit(ctx, pi);
+        _skipDepth = Math.max(0, _skipDepth - 1);
+        return;
+      }
       ctx.ui.notify(
         `Skipping ${unitType} ${unitId} — already completed in a prior session. Advancing.`,
         "info",
@@ -2184,6 +2461,24 @@ async function dispatchNextUnit(
     persistCompletedKey(basePath, idempotencyKey);
     completedKeySet.add(idempotencyKey);
     invalidateStateCache();
+    // Same consecutive-skip guard as the idempotency path above.
+    const skipCount2 = (unitConsecutiveSkips.get(idempotencyKey) ?? 0) + 1;
+    unitConsecutiveSkips.set(idempotencyKey, skipCount2);
+    if (skipCount2 > MAX_CONSECUTIVE_SKIPS) {
+      unitConsecutiveSkips.delete(idempotencyKey);
+      completedKeySet.delete(idempotencyKey);
+      removePersistedKey(basePath, idempotencyKey);
+      invalidateStateCache();
+      ctx.ui.notify(
+        `Skip loop detected: ${unitType} ${unitId} skipped ${skipCount2} times without advancing. Evicting completion record and forcing reconciliation.`,
+        "warning",
+      );
+      _skipDepth++;
+      await new Promise(r => setTimeout(r, 50));
+      await dispatchNextUnit(ctx, pi);
+      _skipDepth = Math.max(0, _skipDepth - 1);
+      return;
+    }
     ctx.ui.notify(
       `Skipping ${unitType} ${unitId} — artifact exists but completion key was missing. Repaired and advancing.`,
       "info",
@@ -2199,6 +2494,8 @@ async function dispatchNextUnit(
   // Pattern A→B→A→B would reset retryCount every time; this map catches it.
   const dispatchKey = `${unitType}/${unitId}`;
   const prevCount = unitDispatchCount.get(dispatchKey) ?? 0;
+  // Real dispatch reached — clear the consecutive-skip counter for this unit.
+  unitConsecutiveSkips.delete(dispatchKey);
   debugLog("dispatch-unit", {
     type: unitType,
@@ -2220,9 +2517,9 @@ async function dispatchNextUnit(
     }
     saveActivityLog(ctx, basePath, unitType, unitId);
     const expected = diagnoseExpectedArtifact(unitType, unitId, basePath);
-    await stopAuto(ctx, pi);
+    await stopAuto(ctx, pi, `Hard loop: ${unitType} ${unitId}`);
     ctx.ui.notify(
-      `Hard loop detected: ${unitType} ${unitId} dispatched ${lifetimeCount} times total (across reconciliation cycles). Stopping.${expected ? `\n   Expected artifact: ${expected}` : ""}\n   This may indicate deriveState() keeps returning the same unit despite artifacts existing.\n   Check .gsd/completed-units.json and the slice plan checkbox state.`,
+      `Hard loop detected: ${unitType} ${unitId} dispatched ${lifetimeCount} times total (across reconciliation cycles).${expected ? `\n   Expected artifact: ${expected}` : ""}\n   This may indicate deriveState() keeps returning the same unit despite artifacts existing.\n   Check .gsd/completed-units.json and the slice plan checkbox state.`,
       "error",
     );
     return;
@@ -2315,7 +2612,7 @@ async function dispatchNextUnit(
     const expected = diagnoseExpectedArtifact(unitType, unitId, basePath);
     const remediation = buildLoopRemediationSteps(unitType, unitId, basePath);
-    await stopAuto(ctx, pi);
+    await stopAuto(ctx, pi, `Loop: ${unitType} ${unitId}`);
     sendDesktopNotification("GSD", `Loop detected: ${unitType} ${unitId}`, "error", "error");
     ctx.ui.notify(
       `Loop detected: ${unitType} ${unitId} dispatched ${prevCount + 1} times total. Expected artifact not found.${expected ? `\n   Expected: ${expected}` : ""}${remediation ? `\n\n   Remediation steps:\n${remediation}` : "\n   Check branch state and .gsd/ artifacts."}`,
@@ -2456,8 +2753,7 @@ async function dispatchNextUnit(
   // Fresh session
   const result = await cmdCtx!.newSession();
   if (result.cancelled) {
-    await stopAuto(ctx, pi);
-    ctx.ui.notify("Auto-mode stopped.", "info");
+    await stopAuto(ctx, pi, "Session cancelled");
     return;
   }
@@ -2713,13 +3009,27 @@ async function dispatchNextUnit(
     if (Date.now() - runtime.lastProgressAt < idleTimeoutMs) return;
     // Agent has tool calls currently executing (await_job, long bash, etc.) —
-    // not idle, just waiting for tool completion.
+    // not idle, just waiting for tool completion. But only suppress recovery
+    // if the tool started recently. A tool in-flight for longer than the idle
+    // timeout is likely stuck — e.g., `python -m http.server 8080 &` keeps the
+    // shell's stdout/stderr open, causing the Bash tool to hang indefinitely.
     if (inFlightTools.size > 0) {
-      writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit.startedAt, {
-        lastProgressAt: Date.now(),
-        lastProgressKind: "tool-in-flight",
-      });
-      return;
+      const oldestStart = Math.min(...inFlightTools.values());
+      const toolAgeMs = Date.now() - oldestStart;
+      if (toolAgeMs < idleTimeoutMs) {
+        writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit.startedAt, {
+          lastProgressAt: Date.now(),
+          lastProgressKind: "tool-in-flight",
+        });
+        return;
+      }
+      // Oldest tool has been running >= idleTimeoutMs — treat as a stuck/hung
+      // tool (e.g., background process holding stdout open). Fall through to
+      // idle recovery without resetting the progress clock.
+      ctx.ui.notify(
+        `Stalled tool detected: a tool has been in-flight for ${Math.round(toolAgeMs / 60000)}min. Treating as hung — attempting idle recovery.`,
+        "warning",
+      );
     }
     // Before triggering recovery, check if the agent is actually producing
@@ -3144,6 +3454,14 @@ export {
   buildLoopRemediationSteps,
 } from "./auto-recovery.js";
+/**
+ * Test-only: expose skip-loop state for unit tests.
+ * Not part of the public API.
+ */
+export function _getUnitConsecutiveSkips(): Map<string, number> { return unitConsecutiveSkips; }
+export function _resetUnitConsecutiveSkips(): void { unitConsecutiveSkips.clear(); }
+export { MAX_CONSECUTIVE_SKIPS };
 /**
  * Dispatch a hook unit directly, bypassing normal pre-dispatch hooks.
  * Used for manual hook triggers via /gsd run-hook.
@@ -3168,6 +3486,7 @@ export async function dispatchHookUnit(
     autoStartTime = Date.now();
     currentUnit = null;
     completedUnits = [];
+    pendingQuickTasks = [];
   }
   const hookUnitType = `hook/${hookName}`;
@@ -3248,3 +3567,192 @@ export async function dispatchHookUnit(
   return true;
 }
+// ─── Direct Phase Dispatch ────────────────────────────────────────────────────
+export async function dispatchDirectPhase(
+  ctx: ExtensionCommandContext,
+  pi: ExtensionAPI,
+  phase: string,
+  base: string,
+): Promise<void> {
+  const state = await deriveState(base);
+  const mid = state.activeMilestone?.id;
+  const midTitle = state.activeMilestone?.title ?? "";
+  if (!mid) {
+    ctx.ui.notify("Cannot dispatch: no active milestone.", "warning");
+    return;
+  }
+  const normalized = phase.toLowerCase();
+  let unitType: string;
+  let unitId: string;
+  let prompt: string;
+  switch (normalized) {
+    case "research":
+    case "research-milestone":
+    case "research-slice": {
+      const isSlice = normalized === "research-slice" || (normalized === "research" && state.phase !== "pre-planning");
+      if (isSlice) {
+        const sid = state.activeSlice?.id;
+        const sTitle = state.activeSlice?.title ?? "";
+        if (!sid) {
+          ctx.ui.notify("Cannot dispatch research-slice: no active slice.", "warning");
+          return;
+        }
+        unitType = "research-slice";
+        unitId = `${mid}/${sid}`;
+        prompt = await buildResearchSlicePrompt(mid, midTitle, sid, sTitle, base);
+      } else {
+        unitType = "research-milestone";
+        unitId = mid;
+        prompt = await buildResearchMilestonePrompt(mid, midTitle, base);
+      }
+      break;
+    }
+    case "plan":
+    case "plan-milestone":
+    case "plan-slice": {
+      const isSlice = normalized === "plan-slice" || (normalized === "plan" && state.phase !== "pre-planning");
+      if (isSlice) {
+        const sid = state.activeSlice?.id;
+        const sTitle = state.activeSlice?.title ?? "";
+        if (!sid) {
+          ctx.ui.notify("Cannot dispatch plan-slice: no active slice.", "warning");
+          return;
+        }
+        unitType = "plan-slice";
+        unitId = `${mid}/${sid}`;
+        prompt = await buildPlanSlicePrompt(mid, midTitle, sid, sTitle, base);
+      } else {
+        unitType = "plan-milestone";
+        unitId = mid;
+        prompt = await buildPlanMilestonePrompt(mid, midTitle, base);
+      }
+      break;
+    }
+    case "execute":
+    case "execute-task": {
+      const sid = state.activeSlice?.id;
+      const sTitle = state.activeSlice?.title ?? "";
+      const tid = state.activeTask?.id;
+      const tTitle = state.activeTask?.title ?? "";
+      if (!sid) {
+        ctx.ui.notify("Cannot dispatch execute-task: no active slice.", "warning");
+        return;
+      }
+      if (!tid) {
+        ctx.ui.notify("Cannot dispatch execute-task: no active task.", "warning");
+        return;
+      }
+      unitType = "execute-task";
+      unitId = `${mid}/${sid}/${tid}`;
+      prompt = await buildExecuteTaskPrompt(mid, sid, sTitle, tid, tTitle, base);
+      break;
+    }
+    case "complete":
+    case "complete-slice":
+    case "complete-milestone": {
+      const isSlice = normalized === "complete-slice" || (normalized === "complete" && state.phase === "summarizing");
+      if (isSlice) {
+        const sid = state.activeSlice?.id;
+        const sTitle = state.activeSlice?.title ?? "";
+        if (!sid) {
+          ctx.ui.notify("Cannot dispatch complete-slice: no active slice.", "warning");
+          return;
+        }
+        unitType = "complete-slice";
+        unitId = `${mid}/${sid}`;
+        prompt = await buildCompleteSlicePrompt(mid, midTitle, sid, sTitle, base);
+      } else {
+        unitType = "complete-milestone";
+        unitId = mid;
+        prompt = await buildCompleteMilestonePrompt(mid, midTitle, base);
+      }
+      break;
+    }
+    case "reassess":
+    case "reassess-roadmap": {
+      const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP");
+      const roadmapContent = roadmapFile ? await loadFile(roadmapFile) : null;
+      if (!roadmapContent) {
+        ctx.ui.notify("Cannot dispatch reassess-roadmap: no roadmap found.", "warning");
+        return;
+      }
+      const roadmap = parseRoadmap(roadmapContent);
+      const completedSlices = roadmap.slices.filter(s => s.done);
+      if (completedSlices.length === 0) {
+        ctx.ui.notify("Cannot dispatch reassess-roadmap: no completed slices.", "warning");
+        return;
+      }
+      const completedSliceId = completedSlices[completedSlices.length - 1].id;
+      unitType = "reassess-roadmap";
+      unitId = `${mid}/${completedSliceId}`;
+      prompt = await buildReassessRoadmapPrompt(mid, midTitle, completedSliceId, base);
+      break;
+    }
+    case "uat":
+    case "run-uat": {
+      const sid = state.activeSlice?.id;
+      if (!sid) {
+        ctx.ui.notify("Cannot dispatch run-uat: no active slice.", "warning");
+        return;
+      }
+      const uatFile = resolveSliceFile(base, mid, sid, "UAT");
+      if (!uatFile) {
+        ctx.ui.notify("Cannot dispatch run-uat: no UAT file found.", "warning");
+        return;
+      }
+      const uatContent = await loadFile(uatFile);
+      if (!uatContent) {
+        ctx.ui.notify("Cannot dispatch run-uat: UAT file is empty.", "warning");
+        return;
+      }
+      const uatPath = relSliceFile(base, mid, sid, "UAT");
+      unitType = "run-uat";
+      unitId = `${mid}/${sid}`;
+      prompt = await buildRunUatPrompt(mid, sid, uatPath, uatContent, base);
+      break;
+    }
+    case "replan":
+    case "replan-slice": {
+      const sid = state.activeSlice?.id;
+      const sTitle = state.activeSlice?.title ?? "";
+      if (!sid) {
+        ctx.ui.notify("Cannot dispatch replan-slice: no active slice.", "warning");
+        return;
+      }
+      unitType = "replan-slice";
+      unitId = `${mid}/${sid}`;
+      prompt = await buildReplanSlicePrompt(mid, midTitle, sid, sTitle, base);
+      break;
+    }
+    default:
+      ctx.ui.notify(
+        `Unknown phase "${phase}". Valid phases: research, plan, execute, complete, reassess, uat, replan.`,
+        "warning",
+      );
+      return;
+  }
+  ctx.ui.notify(`Dispatching ${unitType} for ${unitId}...`, "info");
+  const result = await ctx.newSession();
+  if (result.cancelled) {
+    ctx.ui.notify("Session creation cancelled.", "warning");
+    return;
+  }
+  pi.sendMessage(
+    { customType: "gsd-dispatch", content: prompt, display: false },
+    { triggerTurn: true },
+  );
+}