npm - gsd-pi - Versions diffs - 2.72.0-dev.de4c4b3 → 2.73.0-dev.1cfd50c - Mend

gsd-pi 2.72.0-dev.de4c4b3 → 2.73.0-dev.1cfd50c

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (259) hide show

package/README.md CHANGED Viewed

@@ -623,8 +623,10 @@ The best practice for working in teams is to ensure unique milestone names acros
 # ── GSD: Runtime / Ephemeral (per-developer, per-session) ──────────────────
 # Crash detection sentinel — PID lock, written per auto-mode session
 .gsd/auto.lock
-# Auto-mode dispatch tracker — prevents re-running completed units
-.gsd/completed-units.json
+# Auto-mode dispatch tracker — prevents re-running completed units (includes archived per-milestone files)
+.gsd/completed-units*.json
+# State manifest — workflow state for recovery
+.gsd/state-manifest.json
 # Derived state cache — regenerated from plan/roadmap files on disk
 .gsd/STATE.md
 # Per-developer token/cost accumulator
@@ -637,6 +639,14 @@ The best practice for working in teams is to ensure unique milestone names acros
 .gsd/worktrees/
 # Parallel orchestration IPC and worker status
 .gsd/parallel/
+# SQLite database and WAL sidecars — checkpoint state, forensics data
+.gsd/gsd.db*
+# Daily-rotated event journal — structured event log for forensics
+.gsd/journal/
+# Doctor run history — diagnostic check results
+.gsd/doctor-history.jsonl
+# Workflow event log — structured event stream
+.gsd/event-log.jsonl
 # Generated HTML reports (regenerable via /gsd export --html)
 .gsd/reports/
 # Session-specific interrupted-work markers

package/dist/cli.js CHANGED Viewed

@@ -5,8 +5,7 @@ import { agentDir, sessionsDir, authFilePath } from './app-paths.js';
 import { initResources, buildResourceLoader, getNewerManagedResourceVersion } from './resource-loader.js';
 import { ensureManagedTools } from './tool-bootstrap.js';
 import { loadStoredEnvKeys } from './wizard.js';
-import { migratePiCredentials } from './pi-migration.js';
-import { validateConfiguredModel } from './startup-model-validation.js';
+import { migratePiCredentials, getPiDefaultModelAndProvider } from './pi-migration.js';
 import { shouldMigrateAnthropicToClaudeCode } from './provider-migrations.js';
 import { shouldRunOnboarding, runOnboarding } from './onboarding.js';
 import chalk from 'chalk';
@@ -102,6 +101,41 @@ function parseCliArgs(argv) {
     }
     return flags;
 }
+/**
+ * Validate the configured default model against the registry and reset it if
+ * it no longer exists.  Must run AFTER extensions have registered their
+ * providers so that extension models (e.g. pi-claude-cli) are visible.
+ */
+function validateConfiguredModel(modelRegistry, settingsManager) {
+    const configuredProvider = settingsManager.getDefaultProvider();
+    const configuredModel = settingsManager.getDefaultModel();
+    const allModels = modelRegistry.getAll();
+    const availableModels = modelRegistry.getAvailable();
+    const configuredExists = configuredProvider && configuredModel &&
+        allModels.some((m) => m.provider === configuredProvider && m.id === configuredModel);
+    const configuredAvailable = configuredProvider && configuredModel &&
+        availableModels.some((m) => m.provider === configuredProvider && m.id === configuredModel);
+    if (!configuredModel || !configuredExists) {
+        // Model not configured at all, or removed from registry — pick a fallback.
+        // Only fires when the model is genuinely unknown (not just temporarily unavailable).
+        const piDefault = getPiDefaultModelAndProvider();
+        const preferred = (piDefault
+            ? availableModels.find((m) => m.provider === piDefault.provider && m.id === piDefault.model)
+            : undefined) ||
+            availableModels.find((m) => m.provider === 'openai' && m.id === 'gpt-5.4') ||
+            availableModels.find((m) => m.provider === 'openai') ||
+            availableModels.find((m) => m.provider === 'anthropic' && m.id === 'claude-opus-4-6') ||
+            availableModels.find((m) => m.provider === 'anthropic' && m.id.includes('opus')) ||
+            availableModels.find((m) => m.provider === 'anthropic') ||
+            availableModels[0];
+        if (preferred) {
+            settingsManager.setDefaultModelAndProvider(preferred.provider, preferred.id);
+        }
+    }
+    if (settingsManager.getDefaultThinkingLevel() !== 'off' && !configuredExists) {
+        settingsManager.setDefaultThinkingLevel('off');
+    }
+}
 const cliFlags = parseCliArgs(process.argv);
 const isPrintMode = cliFlags.print || cliFlags.mode !== undefined;
 // Early resource-skew check — must run before TTY gate so version mismatch
@@ -320,8 +354,22 @@ if (!isPrintMode) {
 if (!isPrintMode && process.stdout.columns && process.stdout.columns < 40) {
     process.stderr.write(chalk.yellow(`[gsd] Terminal width is ${process.stdout.columns} columns (minimum recommended: 40). Output may be unreadable.\n`));
 }
-// --list-models: print available models and exit (no TTY needed)
+// --list-models: load extensions so that extension-registered providers (e.g.
+// pi-claude-cli) appear in the listing, then flush their pending registrations
+// into the model registry before printing.
 if (cliFlags.listModels !== undefined) {
+    exitIfManagedResourcesAreNewer(agentDir);
+    initResources(agentDir);
+    const listModelsLoader = new DefaultResourceLoader({
+        agentDir,
+        additionalExtensionPaths: cliFlags.extensions.length > 0 ? cliFlags.extensions : undefined,
+    });
+    await listModelsLoader.reload();
+    const listModelsExtensions = listModelsLoader.getExtensions();
+    for (const { name, config } of listModelsExtensions.runtime.pendingProviderRegistrations) {
+        modelRegistry.registerProvider(name, config);
+    }
+    listModelsExtensions.runtime.pendingProviderRegistrations = [];
     const models = modelRegistry.getAvailable();
     if (models.length === 0) {
         console.log('No models available. Set API keys in environment variables.');
@@ -461,6 +509,10 @@ if (isPrintMode) {
             process.stderr.write(`[gsd] ${prefix}: ${err.error}\n`);
         }
     }
+    // Validate configured model now that extension providers are registered.
+    // Must run after createAgentSession() which flushes pendingProviderRegistrations
+    // so extension models (e.g. pi-claude-cli) are visible in the registry.
+    validateConfiguredModel(modelRegistry, settingsManager);
     // Apply --model override if specified
     if (cliFlags.model) {
         const available = modelRegistry.getAvailable();
@@ -648,6 +700,10 @@ if (extensionsResult.errors.length > 0) {
         process.stderr.write(`[gsd] ${prefix}: ${err.error}\n`);
     }
 }
+// Validate configured model now that extension providers are registered.
+// Must run after createAgentSession() which flushes pendingProviderRegistrations
+// so extension models (e.g. pi-claude-cli) are visible in the registry.
+validateConfiguredModel(modelRegistry, settingsManager);
 // Restore scoped models from settings on startup.
 // The upstream InteractiveMode reads enabledModels from settings when /scoped-models is opened,
 // but doesn't apply them to the session at startup — so Ctrl+P cycles all models instead of

package/dist/onboarding.js CHANGED Viewed

@@ -277,6 +277,16 @@ async function runLlmStep(p, pc, authStorage) {
         p.log.info('Your Claude subscription will be used for inference. No API key needed.');
         // Store sentinel so hasAuth('claude-code') returns true on future boots
         authStorage.set('claude-code', { type: 'api_key', key: 'cli' });
+        // Persist claude-code as the default provider so the startup migration in
+        // cli.ts does not need to fire and the user is not left on "anthropic".
+        const settingsPath = join(agentDir, 'settings.json');
+        try {
+            const raw = existsSync(settingsPath) ? JSON.parse(readFileSync(settingsPath, 'utf-8')) : {};
+            raw.defaultProvider = 'claude-code';
+            mkdirSync(dirname(settingsPath), { recursive: true });
+            writeFileSync(settingsPath, JSON.stringify(raw, null, 2), 'utf-8');
+        }
+        catch { /* non-fatal — startup migration will catch it */ }
         return true;
     }
     // ── Step 2: Which provider? ──────────────────────────────────────────────

package/dist/resources/extensions/async-jobs/await-tool.js CHANGED Viewed

@@ -54,11 +54,14 @@ export function createAwaitTool(getManager) {
                     };
                 }
             }
-            // Mark all watched jobs as awaited upfront so the onJobComplete
-            // callback (which fires synchronously in the promise .then()) knows
-            // to suppress the follow-up message.
+            // Suppress follow-up notifications for all watched jobs upfront.
+            // suppressFollowUp() cancels the pending delivery timer (if any), which
+            // handles both the within-turn case (job completes while we await) and
+            // the cross-turn case (job already completed before await_job was called).
+            // Previously this only set j.awaited = true, which missed the cross-turn
+            // case because the queueMicrotask had already fired (#3787).
             for (const j of watched)
-                j.awaited = true;
+                manager.suppressFollowUp(j.id);
             // If all watched jobs are already done, return immediately
             const running = watched.filter((j) => j.status === "running");
             if (running.length === 0) {

package/dist/resources/extensions/async-jobs/job-manager.js CHANGED Viewed

@@ -118,13 +118,38 @@ export class AsyncJobManager {
         }
     }
     // ── Private ────────────────────────────────────────────────────────────
+    /**
+     * Suppress follow-up notification for a job — cancels any pending delivery
+     * timer and marks the job as awaited. Safe to call at any time, including
+     * before or after the job completes (#3787).
+     */
+    suppressFollowUp(id) {
+        const job = this.jobs.get(id);
+        if (!job)
+            return;
+        job.awaited = true;
+        if (job.deliveryTimer !== undefined) {
+            clearTimeout(job.deliveryTimer);
+            job.deliveryTimer = undefined;
+        }
+    }
     deliverResult(job) {
         if (!this.onJobComplete)
             return;
-        // Defer delivery by one microtask so await_job's .then() chain runs first
-        // and can set job.awaited = true before onJobComplete checks it (#2762).
+        // Use setTimeout(0) instead of queueMicrotask so the handle is cancellable.
+        // suppressFollowUp() can clear this timer even when await_job is called in
+        // a later LLM turn (after the job already completed). queueMicrotask ran
+        // immediately and could not be cancelled (#2762, #3787).
         const cb = this.onJobComplete;
-        queueMicrotask(() => cb(job));
+        job.deliveryTimer = setTimeout(() => {
+            job.deliveryTimer = undefined;
+            if (!job.awaited)
+                cb(job);
+        }, 0);
+        // Allow process to exit even if timer is pending
+        if (typeof job.deliveryTimer === "object" && "unref" in job.deliveryTimer) {
+            job.deliveryTimer.unref();
+        }
     }
     scheduleEviction(id) {
         const existing = this.evictionTimers.get(id);

package/dist/resources/extensions/claude-code-cli/partial-builder.js CHANGED Viewed

@@ -6,6 +6,44 @@
  */
 import { hasXmlParameterTags, repairToolJson } from "@gsd/pi-ai";
 // ---------------------------------------------------------------------------
+// MCP tool name parsing
+// ---------------------------------------------------------------------------
+/**
+ * Split a Claude Code MCP tool name (`mcp__<server>__<tool>`) into its parts.
+ * Returns null for non-prefixed names so callers can fall through unchanged.
+ *
+ * Server names may contain hyphens (`gsd-workflow`); the SDK uses the literal
+ * `__` delimiter between the server name and the tool name.
+ */
+export function parseMcpToolName(name) {
+    if (!name.startsWith("mcp__"))
+        return null;
+    const rest = name.slice("mcp__".length);
+    const delim = rest.indexOf("__");
+    if (delim <= 0 || delim === rest.length - 2)
+        return null;
+    return { server: rest.slice(0, delim), tool: rest.slice(delim + 2) };
+}
+/**
+ * Build a GSD ToolCall block from a Claude Code SDK tool_use block, stripping
+ * the `mcp__<server>__` prefix from the name so registered extension renderers
+ * (which use the unprefixed canonical names) can match. The original server
+ * name is preserved on the block for diagnostics and rendering.
+ */
+function toolCallFromBlock(id, rawName, input) {
+    const parsed = parseMcpToolName(rawName);
+    const toolCall = {
+        type: "toolCall",
+        id,
+        name: parsed ? parsed.tool : rawName,
+        arguments: input,
+    };
+    if (parsed) {
+        toolCall.mcpServer = parsed.server;
+    }
+    return toolCall;
+}
+// ---------------------------------------------------------------------------
 // Content-block mapping helpers
 // ---------------------------------------------------------------------------
 /**
@@ -22,12 +60,7 @@ export function mapContentBlock(block) {
                 ...(block.signature ? { thinkingSignature: block.signature } : {}),
             };
         case "tool_use":
-            return {
-                type: "toolCall",
-                id: block.id,
-                name: block.name,
-                arguments: block.input,
-            };
+            return toolCallFromBlock(block.id, block.name, block.input);
         case "server_tool_use":
             return {
                 type: "serverToolUse",
@@ -149,12 +182,7 @@ export class PartialMessageBuilder {
                 }
                 if (block.type === "tool_use") {
                     this.toolJsonAccum.set(streamIndex, "");
-                    this.partial.content.push({
-                        type: "toolCall",
-                        id: block.id,
-                        name: block.name,
-                        arguments: {},
-                    });
+                    this.partial.content.push(toolCallFromBlock(block.id, block.name, {}));
                     return { type: "toolcall_start", contentIndex, partial: this.partial };
                 }
                 if (block.type === "server_tool_use") {

package/dist/resources/extensions/claude-code-cli/stream-adapter.js CHANGED Viewed

@@ -92,18 +92,34 @@ function extractMessageText(msg) {
  * call effectively stateless. This version serialises the complete
  * conversation history (system prompt + all user/assistant turns) so
  * Claude Code has full context for multi-turn continuity.
+ *
+ * History is wrapped in XML-tag structure rather than `[User]`/`[Assistant]`
+ * bracket headers. Bracket headers read to the model as an in-context
+ * demonstration of how turns are delimited, causing it to fabricate fake
+ * user turns in its own output. XML tags read as document structure and
+ * don't get mirrored in free text.
  */
 export function buildPromptFromContext(context) {
-    const parts = [];
+    const hasContent = Boolean(context.systemPrompt) || context.messages.some((m) => extractMessageText(m));
+    if (!hasContent)
+        return "";
+    const parts = [
+        "Respond only to the final user message below. " +
+            "Do not emit <user_message>, <assistant_message>, or <prior_system_context> tags in your response.",
+    ];
     if (context.systemPrompt) {
-        parts.push(`[System]\n${context.systemPrompt}`);
+        parts.push(`<prior_system_context>\n${context.systemPrompt}\n</prior_system_context>`);
     }
+    const turns = [];
     for (const msg of context.messages) {
         const text = extractMessageText(msg);
         if (!text)
             continue;
-        const label = msg.role === "user" ? "User" : msg.role === "assistant" ? "Assistant" : "System";
-        parts.push(`[${label}]\n${text}`);
+        const tag = msg.role === "user" ? "user_message" : msg.role === "assistant" ? "assistant_message" : "system_message";
+        turns.push(`<${tag}>\n${text}\n</${tag}>`);
+    }
+    if (turns.length > 0) {
+        parts.push(`<conversation_history>\n${turns.join("\n")}\n</conversation_history>`);
     }
     return parts.join("\n\n");
 }
@@ -389,32 +405,25 @@ export function makeAbortedMessage(model, lastTextContent) {
 /**
  * Resolve the Claude Code permission mode for the current run.
  *
- * - Auto-mode / headless runs bypass permissions so tool calls don't block
- *   on prompts the user isn't watching.
- * - Interactive runs default to `acceptEdits` so file/bash writes still
- *   land quickly but the SDK retains a permission gate.
- * - `GSD_CLAUDE_CODE_PERMISSION_MODE` forces a specific mode when set.
+ * GSD subagents run underneath a host Claude Code session the user has
+ * already consented to, and their work (edits, shell inspection, MCP calls)
+ * spans the full workflow toolset. Defaulting the inner SDK to
+ * `bypassPermissions` avoids per-tool approval prompts that offer no
+ * meaningful safety beyond what the host session and the subagent prompts
+ * already enforce. `GSD_CLAUDE_CODE_PERMISSION_MODE` lets security-conscious
+ * users opt into a stricter mode (`acceptEdits`, `default`, `plan`).
  *
- * Cross-extension coupling is kept minimal by dynamically importing
- * `isAutoActive` and falling back to the bypass default if the import
- * fails (e.g. in unit tests that load stream-adapter in isolation).
+ * Tradeoff: bypass means a prompt-injection payload read from an untrusted
+ * file could trigger tool calls without a second gate. Accepted for GSD
+ * because the workflow is explicit user intent and the alternative
+ * (#4099) is continuous approval fatigue that blocks real work.
  */
 export async function resolveClaudePermissionMode(env = process.env) {
     const override = env.GSD_CLAUDE_CODE_PERMISSION_MODE?.trim();
     if (override === "bypassPermissions" || override === "acceptEdits" || override === "default" || override === "plan") {
         return override;
     }
-    try {
-        const autoMod = (await import("../gsd/auto.js"));
-        if (typeof autoMod.isAutoActive === "function" && autoMod.isAutoActive()) {
-            return "bypassPermissions";
-        }
-        return "acceptEdits";
-    }
-    catch {
-        // auto.ts unavailable (tests, non-GSD contexts) — stay permissive.
-        return "bypassPermissions";
-    }
+    return "bypassPermissions";
 }
 /**
  * Build the options object passed to the Claude Agent SDK's `query()` call.
@@ -431,6 +440,21 @@ export function buildSdkOptions(modelId, prompt, overrides, extraOptions = {}) {
     const mcpServers = buildWorkflowMcpServers();
     const permissionMode = overrides?.permissionMode ?? "bypassPermissions";
     const disallowedTools = ["AskUserQuestion"];
+    // Pre-authorize the safe built-ins and every registered workflow MCP
+    // server's tools. `acceptEdits` mode (the interactive default) only
+    // auto-approves file edits — Read/Glob/Grep, basic shell inspection, and
+    // every `mcp__gsd-workflow__*` call still surface as "This command
+    // requires approval" and block GSD actions (#4099).
+    const allowedTools = [
+        "Read",
+        "Write",
+        "Edit",
+        "Glob",
+        "Grep",
+        "Bash(ls:*)",
+        "Bash(pwd)",
+        ...(mcpServers ? Object.keys(mcpServers).map((serverName) => `mcp__${serverName}__*`) : []),
+    ];
     return {
         pathToClaudeCodeExecutable: getClaudePath(),
         model: modelId,
@@ -442,6 +466,7 @@ export function buildSdkOptions(modelId, prompt, overrides, extraOptions = {}) {
         settingSources: ["project"],
         systemPrompt: { type: "preset", preset: "claude_code" },
         disallowedTools,
+        ...(allowedTools.length > 0 ? { allowedTools } : {}),
         ...(mcpServers ? { mcpServers } : {}),
         betas: modelId.includes("sonnet") ? ["context-1m-2025-08-07"] : [],
         ...extraOptions,

package/dist/resources/extensions/gsd/auto/loop.js CHANGED Viewed

@@ -13,6 +13,69 @@ import { runPreDispatch, runDispatch, runGuards, runUnitPhase, runFinalize, } fr
 import { debugLog } from "../debug-logger.js";
 import { isInfrastructureError, isTransientCooldownError, getCooldownRetryAfterMs, COOLDOWN_FALLBACK_WAIT_MS, MAX_COOLDOWN_RETRIES } from "./infra-errors.js";
 import { resolveEngine } from "../engine-resolver.js";
+import { logWarning } from "../workflow-logger.js";
+import { gsdRoot } from "../paths.js";
+import { readFileSync, writeFileSync, mkdirSync } from "node:fs";
+import { join } from "node:path";
+// ── Stuck detection persistence (#3704) ──────────────────────────────────
+// Persist stuck detection state to disk so it survives session restarts.
+// Without this, restarting auto-mode resets all counters, allowing the
+// same blocked unit to burn a full retry budget each session.
+function stuckStatePath(basePath) {
+    return join(gsdRoot(basePath), "runtime", "stuck-state.json");
+}
+function loadStuckState(basePath) {
+    try {
+        const data = JSON.parse(readFileSync(stuckStatePath(basePath), "utf-8"));
+        return {
+            recentUnits: Array.isArray(data.recentUnits) ? data.recentUnits : [],
+            stuckRecoveryAttempts: typeof data.stuckRecoveryAttempts === "number" ? data.stuckRecoveryAttempts : 0,
+        };
+    }
+    catch (err) {
+        debugLog("autoLoop", { phase: "load-stuck-state-failed", error: err instanceof Error ? err.message : String(err) });
+        return { recentUnits: [], stuckRecoveryAttempts: 0 };
+    }
+}
+function saveStuckState(basePath, state) {
+    try {
+        const filePath = stuckStatePath(basePath);
+        mkdirSync(join(gsdRoot(basePath), "runtime"), { recursive: true });
+        writeFileSync(filePath, JSON.stringify({
+            recentUnits: state.recentUnits.slice(-20), // keep last 20 entries
+            stuckRecoveryAttempts: state.stuckRecoveryAttempts,
+            updatedAt: new Date().toISOString(),
+        }) + "\n");
+    }
+    catch (err) {
+        debugLog("autoLoop", { phase: "save-stuck-state-failed", error: err instanceof Error ? err.message : String(err) });
+    }
+}
+// ── Memory pressure monitoring (#3331) ──────────────────────────────────
+// Check heap usage every N iterations and trigger graceful shutdown before
+// the OS OOM killer sends SIGKILL. The threshold is 90% of the V8 heap
+// limit (--max-old-space-size or default ~1.5-4GB depending on platform).
+const MEMORY_CHECK_INTERVAL = 5; // check every 5 iterations
+const MEMORY_PRESSURE_THRESHOLD = 0.85; // 85% of heap limit
+function checkMemoryPressure() {
+    const mem = process.memoryUsage();
+    // v8.getHeapStatistics() gives heap_size_limit but requires import
+    // Use a conservative estimate: RSS > 3GB is danger zone on most systems
+    const heapMB = Math.round(mem.heapUsed / 1024 / 1024);
+    const rssMB = Math.round(mem.rss / 1024 / 1024);
+    // Try to get the actual V8 heap limit
+    let limitMB = 4096; // conservative default
+    try {
+        const v8 = require("node:v8");
+        const stats = v8.getHeapStatistics();
+        limitMB = Math.round(stats.heap_size_limit / 1024 / 1024);
+    }
+    catch {
+        limitMB = 4096; /* v8 stats unavailable — use conservative default */
+    }
+    const pct = heapMB / limitMB;
+    return { pressured: pct > MEMORY_PRESSURE_THRESHOLD, heapMB, limitMB, pct };
+}
 /**
  * Main auto-mode execution loop. Iterates: derive → dispatch → guards →
  * runUnit → finalize → repeat. Exits when s.active becomes false or a
@@ -24,7 +87,13 @@ import { resolveEngine } from "../engine-resolver.js";
 export async function autoLoop(ctx, pi, s, deps) {
     debugLog("autoLoop", { phase: "enter" });
     let iteration = 0;
-    const loopState = { recentUnits: [], stuckRecoveryAttempts: 0, consecutiveFinalizeTimeouts: 0 };
+    // Load persisted stuck state so counters survive session restarts (#3704)
+    const persisted = loadStuckState(s.basePath);
+    const loopState = {
+        recentUnits: persisted.recentUnits,
+        stuckRecoveryAttempts: persisted.stuckRecoveryAttempts,
+        consecutiveFinalizeTimeouts: 0,
+    };
     let consecutiveErrors = 0;
     let consecutiveCooldowns = 0;
     const recentErrorMessages = [];
@@ -44,6 +113,19 @@ export async function autoLoop(ctx, pi, s, deps) {
             await deps.stopAuto(ctx, pi, `Safety: loop exceeded ${MAX_LOOP_ITERATIONS} iterations — possible runaway`);
             break;
         }
+        // ── Memory pressure check (#3331) ──
+        // Graceful shutdown before OOM killer sends SIGKILL.
+        if (iteration % MEMORY_CHECK_INTERVAL === 0) {
+            const mem = checkMemoryPressure();
+            debugLog("autoLoop", { phase: "memory-check", ...mem });
+            if (mem.pressured) {
+                logWarning("dispatch", `Memory pressure: ${mem.heapMB}MB / ${mem.limitMB}MB (${Math.round(mem.pct * 100)}%) — stopping auto-mode to prevent OOM kill`);
+                await deps.stopAuto(ctx, pi, `Memory pressure: heap at ${mem.heapMB}MB / ${mem.limitMB}MB (${Math.round(mem.pct * 100)}%). ` +
+                    `Stopping gracefully to prevent OOM kill after ${iteration} iterations. ` +
+                    `Resume with /gsd auto to continue from where you left off.`);
+                break;
+            }
+        }
         if (!s.cmdCtx) {
             debugLog("autoLoop", { phase: "exit", reason: "no-cmdCtx" });
             break;
@@ -162,6 +244,7 @@ export async function autoLoop(ctx, pi, s, deps) {
                 consecutiveCooldowns = 0;
                 recentErrorMessages.length = 0;
                 deps.emitJournalEvent({ ts: new Date().toISOString(), flowId, seq: nextSeq(), eventType: "iteration-end", data: { iteration } });
+                saveStuckState(s.basePath, loopState); // persist across session restarts (#3704)
                 debugLog("autoLoop", { phase: "iteration-complete", iteration });
                 if (reconcileResult.outcome === "milestone-complete") {
                     await deps.stopAuto(ctx, pi, "Workflow complete");

package/dist/resources/extensions/gsd/auto-post-unit.js CHANGED Viewed

@@ -16,6 +16,7 @@ import { loadFile, parseSummary, resolveAllOverrides } from "./files.js";
 import { loadPrompt } from "./prompt-loader.js";
 import { resolveSliceFile, resolveSlicePath, resolveTaskFile, resolveMilestoneFile, resolveTasksDir, buildTaskFileName, } from "./paths.js";
 import { invalidateAllCaches } from "./cache.js";
+import { rebuildState } from "./doctor.js";
 import { parseUnitId } from "./unit-id.js";
 import { closeoutUnit } from "./auto-unit-closeout.js";
 import { autoCommitCurrentBranch, } from "./worktree.js";
@@ -288,6 +289,11 @@ export async function postUnitPreVerification(pctx, opts) {
                 debugLog("postUnit", { phase: "browser-teardown", status: "closed" });
             }
         });
+        // Keep the on-disk STATE.md aligned with the live derived state after
+        // ordinary unit completion, before any worktree state is synced back.
+        await runSafely("postUnit", "state-rebuild", async () => {
+            await rebuildState(s.basePath);
+        });
         // Sync worktree state back to project root (skipped for lightweight sidecars)
         if (!opts?.skipWorktreeSync && s.originalBasePath && s.originalBasePath !== s.basePath) {
             await runSafely("postUnit", "worktree-sync", () => {

package/dist/resources/extensions/gsd/auto-recovery.js CHANGED Viewed

@@ -224,6 +224,17 @@ export function verifyExpectedArtifact(unitType, unitId, base) {
         if (!isValidationTerminal(validationContent))
             return false;
     }
+    if (unitType === "plan-milestone") {
+        try {
+            const roadmap = parseLegacyRoadmap(readFileSync(absPath, "utf-8"));
+            if (roadmap.slices.length === 0)
+                return false;
+        }
+        catch (err) {
+            logWarning("recovery", `plan-milestone roadmap verification failed: ${err instanceof Error ? err.message : String(err)}`);
+            return false;
+        }
+    }
     // plan-slice must produce a plan with actual task entries, not just a scaffold.
     // The plan file may exist from a prior discussion/context step with only headings
     // but no tasks. Without this check the artifact is considered "complete" and the

package/dist/resources/extensions/gsd/auto.js CHANGED Viewed

@@ -425,9 +425,13 @@ function cleanupAfterLoopExit(ctx) {
         /* best-effort — mirror stopAuto cleanup */
         logWarning("session", `lock cleanup failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
     }
-    ctx.ui.setStatus("gsd-auto", undefined);
-    ctx.ui.setWidget("gsd-progress", undefined);
-    ctx.ui.setFooter(undefined);
+    // A transient provider-error pause intentionally leaves the paused badge
+    // visible so the user still has a resumable auto-mode signal on screen.
+    if (!s.paused) {
+        ctx.ui.setStatus("gsd-auto", undefined);
+        ctx.ui.setWidget("gsd-progress", undefined);
+        ctx.ui.setFooter(undefined);
+    }
     // Restore CWD out of worktree back to original project root
     if (s.originalBasePath) {
         s.basePath = s.originalBasePath;
@@ -529,7 +533,22 @@ export async function stopAuto(ctx, pi, reason) {
         catch (e) {
             debugLog("stop-cleanup-worktree", { error: e instanceof Error ? e.message : String(e) });
         }
-        // ── Step 5: DB cleanup ──
+        // ── Step 5: Rebuild state while DB is still open (#3599) ──
+        // rebuildState() calls deriveState() which needs the DB for authoritative
+        // state. Previously this ran after closeDatabase(), forcing a filesystem
+        // fallback that could disagree with the DB-backed dispatch decisions —
+        // a split-brain where dispatch says "blocked" but STATE.md shows work.
+        if (s.basePath) {
+            try {
+                await rebuildState(s.basePath);
+            }
+            catch (e) {
+                debugLog("stop-rebuild-state-failed", {
+                    error: e instanceof Error ? e.message : String(e),
+                });
+            }
+        }
+        // ── Step 6: DB cleanup ──
         if (isDbAvailable()) {
             try {
                 const { closeDatabase } = await import("./gsd-db.js");
@@ -541,7 +560,7 @@ export async function stopAuto(ctx, pi, reason) {
                 });
             }
         }
-        // ── Step 6: Restore basePath and chdir ──
+        // ── Step 7: Restore basePath and chdir ──
         try {
             if (s.originalBasePath) {
                 s.basePath = s.originalBasePath;
@@ -557,7 +576,7 @@ export async function stopAuto(ctx, pi, reason) {
         catch (e) {
             debugLog("stop-cleanup-basepath", { error: e instanceof Error ? e.message : String(e) });
         }
-        // ── Step 7: Ledger notification ──
+        // ── Step 8: Ledger notification ──
         try {
             const ledger = getLedger();
             if (ledger && ledger.units.length > 0) {
@@ -571,17 +590,6 @@ export async function stopAuto(ctx, pi, reason) {
         catch (e) {
             debugLog("stop-cleanup-ledger", { error: e instanceof Error ? e.message : String(e) });
         }
-        // ── Step 8: Rebuild state ──
-        if (s.basePath) {
-            try {
-                await rebuildState(s.basePath);
-            }
-            catch (e) {
-                debugLog("stop-rebuild-state-failed", {
-                    error: e instanceof Error ? e.message : String(e),
-                });
-            }
-        }
         // ── Step 9: Cmux sidebar / event log ──
         try {
             clearCmuxSidebar(loadedPreferences);
@@ -1294,8 +1302,6 @@ export async function dispatchHookUnit(ctx, pi, hookName, triggerUnitType, trigg
     pi.sendMessage({ customType: "gsd-auto", content: hookPrompt, display: true }, { triggerTurn: true });
     return true;
 }
-// Direct phase dispatch → auto-direct-dispatch.ts
-export { dispatchDirectPhase } from "./auto-direct-dispatch.js";
 // Re-export recovery functions for external consumers
 export { buildLoopRemediationSteps, } from "./auto-recovery.js";
 export { resolveExpectedArtifactPath } from "./auto-artifact-paths.js";