npm - @opengsd/gsd-pi - Versions diffs - 1.1.1-dev.9bb7453 → 1.1.1-dev.9f86580 - Mend

@opengsd/gsd-pi 1.1.1-dev.9bb7453 → 1.1.1-dev.9f86580

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (219) hide show

package/dist/resources/.managed-resources-content-hash CHANGED Viewed

	@@ -1 +1 @@
1	- ~~ba1c57462cf67a0e~~
1	+ f692671bcb7f8bc4

package/dist/resources/extensions/browser-tools/engine/managed-gsd-browser.js CHANGED Viewed

@@ -435,11 +435,27 @@ function formatManagedBrowserError(toolName, error) {
     return [
         `gsd-browser engine or tool unavailable for ${toolName}: ${message}`,
         "",
-        "GSD browser automation now uses the managed gsd-browser engine by default.",
+        "The managed gsd-browser engine is enabled for this session but is unavailable.",
         "Run /gsd doctor or reinstall dependencies so @opengsd/gsd-browser is available.",
-        "Set GSD_BROWSER_ENGINE=legacy only when you intentionally need the Playwright compatibility engine.",
+        "Unset GSD_BROWSER_ENGINE or set GSD_BROWSER_ENGINE=playwright to use the default Playwright engine.",
     ].join("\n");
 }
+/**
+ * Eagerly establish the managed gsd-browser connection so browser tools are
+ * ready before first use. Best-effort: returns the error instead of throwing so
+ * callers (e.g. session-start warm-up) can surface a warning without failing the
+ * session. Connecting only spawns the gsd-browser MCP daemon; it does not launch
+ * Chrome (that happens lazily on the first navigation).
+ */
+export async function warmUpManagedGsdBrowser(ctx, signal) {
+    try {
+        await getOrConnectManagedGsdBrowser(ctx, signal);
+        return { ok: true };
+    }
+    catch (error) {
+        return { ok: false, error: error instanceof Error ? error.message : String(error) };
+    }
+}
 export function registerManagedGsdBrowserTools(pi) {
     for (const tool of MANAGED_BROWSER_TOOLS) {
         pi.registerTool({

package/dist/resources/extensions/browser-tools/engine/selection.js CHANGED Viewed

@@ -1,4 +1,4 @@
-const DEFAULT_BROWSER_ENGINE = "gsd-browser";
+const DEFAULT_BROWSER_ENGINE = "legacy";
 export function resolveBrowserEngineMode(env = process.env) {
     const raw = env.GSD_BROWSER_ENGINE?.trim();
     if (!raw)

package/dist/resources/extensions/browser-tools/extension-manifest.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "id": "browser-tools",
   "name": "Browser Tools",
   "version": "1.0.0",
-  "description": "GSD browser automation contract adapter backed by the managed gsd-browser engine",
+  "description": "GSD browser automation contract adapter backed by Playwright with optional managed gsd-browser support",
   "tier": "bundled",
   "requires": { "platform": ">=2.29.0" },
   "provides": {

package/dist/resources/extensions/browser-tools/index.js CHANGED Viewed

@@ -1,7 +1,8 @@
 /** browser-tools — Pi Browser Automation Contract adapter. */
 import { importExtensionModule } from "@gsd/pi-coding-agent";
-import { closeManagedGsdBrowser, registerManagedGsdBrowserTools } from "./engine/managed-gsd-browser.js";
+import { closeManagedGsdBrowser, registerManagedGsdBrowserTools, warmUpManagedGsdBrowser } from "./engine/managed-gsd-browser.js";
 import { resolveBrowserEngineMode } from "./engine/selection.js";
+import { detectWebApp } from "./web-app-detect.js";
 let legacyRegistrationPromise = null;
 let managedRegistrationPromise = null;
 let registeredEngine = null;
@@ -147,6 +148,29 @@ async function registerBrowserTools(pi) {
         throw error;
     }
 }
+function isWarmUpDisabled() {
+    const value = process.env.GSD_BROWSER_WARMUP?.trim().toLowerCase();
+    return value === "0" || value === "false" || value === "off";
+}
+/**
+ * Auto-initialize the managed gsd-browser engine only when explicitly selected
+ * for a web app. Best-effort and non-blocking: warm-up runs in the background
+ * and only surfaces a warning if it fails.
+ */
+function maybeWarmUpManagedEngine(pi, ctx) {
+    if (isWarmUpDisabled())
+        return;
+    if (resolveBrowserEngineMode() !== "gsd-browser")
+        return;
+    const projectRoot = ctx.cwd || process.cwd();
+    if (!detectWebApp(projectRoot))
+        return;
+    void warmUpManagedGsdBrowser(ctx).then((result) => {
+        if (!result.ok && ctx.hasUI) {
+            ctx.ui.notify(`gsd-browser auto-init failed: ${result.error}. Browser UAT tools will retry on first use; run /gsd doctor if this persists.`, "warning");
+        }
+    });
+}
 async function closeActiveBrowserEngines() {
     await closeManagedGsdBrowser();
     if (legacyRegistrationPromise) {
@@ -157,12 +181,15 @@ async function closeActiveBrowserEngines() {
 export default function (pi) {
     pi.on("session_start", async (_event, ctx) => {
         if (ctx.hasUI) {
-            void registerBrowserTools(pi).catch((error) => {
+            void registerBrowserTools(pi)
+                .then(() => maybeWarmUpManagedEngine(pi, ctx))
+                .catch((error) => {
                 ctx.ui.notify(`browser-tools failed to load: ${error instanceof Error ? error.message : String(error)}`, "warning");
             });
             return;
         }
         await registerBrowserTools(pi);
+        maybeWarmUpManagedEngine(pi, ctx);
     });
     pi.on("session_shutdown", async () => {
         await closeActiveBrowserEngines();

package/dist/resources/extensions/browser-tools/web-app-detect.js ADDED Viewed

@@ -0,0 +1,52 @@
+/**
+ * web-app-detect — lightweight, synchronous heuristic for deciding whether the
+ * project under development is a web app. Used only when the optional managed
+ * gsd-browser engine is selected and can be warmed before first use.
+ */
+import { existsSync, readFileSync } from "node:fs";
+import { resolve } from "node:path";
+// Frontend frameworks / bundlers whose presence in dependencies indicates a
+// browser-facing web app worth warming the optional managed engine for.
+const WEB_DEPENDENCY_RE = /^(react|react-dom|next|nuxt|vue|@vue\/|svelte|@sveltejs\/|solid-js|astro|@remix-run\/|gatsby|preact|@angular\/core|vite|@vitejs\/|@builder\.io\/qwik|@web\/dev-server|@11ty\/eleventy)/;
+// package.json scripts that imply a dev server / browser-facing build.
+const WEB_SCRIPT_RE = /\b(vite|next|nuxt|astro|remix|webpack(-dev-server)?|parcel|ng serve|serve\b|http-server|live-server|gatsby)\b/;
+function readPackageJson(projectRoot) {
+    const packageJsonPath = resolve(projectRoot, "package.json");
+    if (!existsSync(packageJsonPath))
+        return null;
+    try {
+        const parsed = JSON.parse(readFileSync(packageJsonPath, "utf-8"));
+        return parsed && typeof parsed === "object" ? parsed : null;
+    }
+    catch {
+        return null;
+    }
+}
+function dependencyNames(pkg) {
+    return [
+        ...Object.keys(pkg.dependencies ?? {}),
+        ...Object.keys(pkg.devDependencies ?? {}),
+        ...Object.keys(pkg.peerDependencies ?? {}),
+    ];
+}
+/**
+ * Returns true when the project looks like a browser-facing web app. Conservative
+ * and dependency-free: a false negative just means lazy connection (the prior
+ * behavior); a false positive only warms an idle engine connection.
+ */
+export function detectWebApp(projectRoot) {
+    const pkg = readPackageJson(projectRoot);
+    if (pkg) {
+        if (dependencyNames(pkg).some((name) => WEB_DEPENDENCY_RE.test(name)))
+            return true;
+        const scriptValues = Object.values(pkg.scripts ?? {}).filter((value) => typeof value === "string");
+        if (scriptValues.some((script) => WEB_SCRIPT_RE.test(script)))
+            return true;
+    }
+    // No package.json signal — fall back to a top-level index.html (static sites).
+    if (existsSync(resolve(projectRoot, "index.html")))
+        return true;
+    if (existsSync(resolve(projectRoot, "public", "index.html")))
+        return true;
+    return false;
+}

package/dist/resources/extensions/gsd/auto/phases.js CHANGED Viewed

@@ -16,6 +16,8 @@ import { detectStuck } from "./detect-stuck.js";
 import { runUnit } from "./run-unit.js";
 import { debugLog } from "../debug-logger.js";
 import { resolveWorktreeProjectRoot, normalizeWorktreePathForCompare } from "../worktree-root.js";
+import { buildManualValidationGuidance } from "../worktree-manager.js";
+import { relSliceFile } from "../paths.js";
 import { classifyProject } from "../detection.js";
 import { MergeConflictError } from "../git-service.js";
 import { setCurrentPhase, clearCurrentPhase } from "../../shared/gsd-phase-state.js";
@@ -47,6 +49,7 @@ import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js";
 import { getContextPauseAction } from "../auto-budget.js";
 import { getWorkflowTransportSupportError, getRequiredWorkflowToolsForAutoUnit, supportsStructuredQuestions, } from "../workflow-mcp.js";
 import { prepareWorkflowMcpForProject } from "../workflow-mcp-auto-prep.js";
+import { getToolBaselineSnapshot } from "../auto-model-selection.js";
 import { resolveManifest } from "../unit-context-manifest.js";
 import { createWorktreeSafetyModule } from "../worktree-safety.js";
 import { isSuspiciousGhostCompletion } from "../auto-unit-closeout.js";
@@ -302,6 +305,8 @@ async function validateSourceWriteWorktreeSafety(ic, unitType, unitId, milestone
 // ─── Session timeout auto-resume state ────────────────────────────────────────
 let consecutiveSessionTimeouts = 0;
 const MAX_SESSION_TIMEOUT_AUTO_RESUMES = 3;
+/** Maximum zero-tool-call retries before pausing — context exhaustion is deterministic. */
+const MAX_ZERO_TOOL_RETRIES = 1;
 export function resetSessionTimeoutState() {
     consecutiveSessionTimeouts = 0;
 }
@@ -1070,7 +1075,13 @@ export async function runDispatch(ic, preData, loopState) {
     const authMode = provider && typeof ctx.modelRegistry?.getProviderAuthMode === "function"
         ? ctx.modelRegistry.getProviderAuthMode(provider)
         : undefined;
-    const activeTools = typeof pi.getActiveTools === "function" ? pi.getActiveTools() : [];
+    // Use the baseline snapshot rather than the live active-tool set: a prior
+    // unit's per-provider narrowing (hook overrides, Groq 128-tool cap, etc.)
+    // can strip required MCP tools from the live set even though
+    // selectAndApplyModel will restore them before the unit is dispatched.
+    // Checking a stale-narrowed set causes false transport-preflight warnings
+    // that repeat on every /gsd auto resume (#477 follow-up).
+    const activeTools = getToolBaselineSnapshot(pi);
     // Deep planning intentionally keeps human checkpoints in plain chat. In
     // Claude Code/local MCP transports, structured question requests can be
     // cancelled outside the normal chat flow, which made approval gates easy to
@@ -1093,6 +1104,9 @@ export async function runDispatch(ic, preData, loopState) {
         sessionContextWindow: ctx.model?.contextWindow,
         sessionProvider: ctx.model?.provider,
         modelRegistry: ctx.modelRegistry,
+        activeTools,
+        sessionBaseUrl: ctx.model?.baseUrl,
+        sessionAuthMode: authMode,
     });
     if (isUnhandledPhaseWarning(dispatchResult)) {
         deps.invalidateAllCaches();
@@ -1116,6 +1130,9 @@ export async function runDispatch(ic, preData, loopState) {
             sessionContextWindow: ctx.model?.contextWindow,
             sessionProvider: ctx.model?.provider,
             modelRegistry: ctx.modelRegistry,
+            activeTools,
+            sessionBaseUrl: ctx.model?.baseUrl,
+            sessionAuthMode: authMode,
         });
     }
     if (dispatchResult.action === "stop") {
@@ -2059,13 +2076,23 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
                     });
                 }
                 else {
+                    const zeroToolKey = `${unitType}/${unitId}`;
+                    const attempt = (s.zeroToolRetryCount.get(zeroToolKey) ?? 0) + 1;
                     debugLog("runUnitPhase", {
                         phase: "zero-tool-calls",
                         unitType,
                         unitId,
+                        attempt,
                         warning: "Unit completed with 0 tool calls — likely context exhaustion, marking as failed",
                     });
-                    ctx.ui.notify(`${unitType} ${unitId} completed with 0 tool calls — context exhaustion, will retry`, "warning");
+                    if (attempt > MAX_ZERO_TOOL_RETRIES) {
+                        s.zeroToolRetryCount.delete(zeroToolKey);
+                        ctx.ui.notify(`${unitType} ${unitId} completed with 0 tool calls — context exhaustion, pausing auto-mode after ${MAX_ZERO_TOOL_RETRIES} retry.`, "error");
+                        await deps.pauseAuto(ctx, pi);
+                        return { action: "break", reason: "zero-tool-calls-exhausted" };
+                    }
+                    s.zeroToolRetryCount.set(zeroToolKey, attempt);
+                    ctx.ui.notify(`${unitType} ${unitId} completed with 0 tool calls — context exhaustion, will retry (attempt ${attempt}/${MAX_ZERO_TOOL_RETRIES})`, "warning");
                     return {
                         action: "retry",
                         reason: "zero-tool-calls",
@@ -2087,6 +2114,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
     if (artifactVerified) {
         s.unitDispatchCount.delete(dispatchKey);
         s.unitRecoveryCount.delete(`${unitType}/${unitId}`);
+        s.zeroToolRetryCount.delete(dispatchKey);
     }
     // Write phase handoff anchor after successful research/planning completion
     const anchorPhases = new Set(["research-milestone", "research-slice", "plan-milestone", "plan-slice"]);
@@ -2232,7 +2260,21 @@ export async function runFinalize(ic, iterData, loopState, sidecarItem) {
         }
     }
     if (pauseAfterUatDispatch) {
-        ctx.ui.notify("UAT requires human execution. Auto-mode will pause after this unit writes the result file.", "info");
+        const pauseMid = iterData.mid;
+        const pauseSliceId = pauseMid && iterData.unitId.startsWith(`${pauseMid}/`)
+            ? iterData.unitId.slice(pauseMid.length + 1)
+            : undefined;
+        const guidance = pauseMid
+            ? buildManualValidationGuidance(s.basePath, pauseMid, {
+                uatPath: pauseSliceId
+                    ? relSliceFile(s.basePath, pauseMid, pauseSliceId, "UAT")
+                    : undefined,
+            })
+            : null;
+        const pauseMessage = guidance
+            ? `UAT requires human execution. Auto-mode will pause after this unit writes the result file.\n\n${guidance}`
+            : "UAT requires human execution. Auto-mode will pause after this unit writes the result file.";
+        ctx.ui.notify(pauseMessage, "info");
         await deps.pauseAuto(ctx, pi);
         debugLog("autoLoop", { phase: "exit", reason: "uat-pause" });
         clearFinalizingUnit();

package/dist/resources/extensions/gsd/auto/session.js CHANGED Viewed

@@ -94,6 +94,7 @@ export class AutoSession {
     verificationRetryCount = new Map();
     verificationRetryFailureHashes = new Map();
     exhaustedVerificationUnits = new Set();
+    zeroToolRetryCount = new Map();
     pausedSessionFile = null;
     pausedUnitType = null;
     pausedUnitId = null;
@@ -266,6 +267,7 @@ export class AutoSession {
         this.verificationRetryCount.clear();
         this.verificationRetryFailureHashes.clear();
         this.exhaustedVerificationUnits.clear();
+        this.zeroToolRetryCount.clear();
         this.pausedSessionFile = null;
         this.pausedUnitType = null;
         this.pausedUnitId = null;

package/dist/resources/extensions/gsd/auto-dispatch.js CHANGED Viewed

@@ -1,7 +1,7 @@
 // Project/App: gsd-pi
 // File Purpose: Declarative auto-mode dispatch rules and dispatch resolver.
 import { loadFile, extractUatType, loadActiveOverrides } from "./files.js";
-import { isDbAvailable, getMilestoneSlices, getPendingGates, markAllGatesOmitted, getMilestone, insertAssessment, setSliceSketchFlag, transaction, getAssessment } from "./gsd-db.js";
+import { isDbAvailable, getMilestoneSlices, getPendingGates, markAllGatesOmitted, getMilestone, insertAssessment, setSliceSketchFlag, transaction, getAssessment, } from "./gsd-db.js";
 import { isClosedStatus } from "./status-guards.js";
 import { extractVerdict, isAcceptableUatVerdict } from "./verdict-parser.js";
 import { gsdRoot, resolveGsdPathContract, resolveMilestoneFile, resolveMilestonePath, resolveSliceFile, resolveSlicePath, resolveTaskFile, relTaskFile, relSliceFile, buildMilestoneFileName, buildSliceFileName, buildTaskFileName, gsdProjectionRoot, } from "./paths.js";
@@ -21,6 +21,7 @@ import { isAutoActive } from "./auto.js";
 import { markDepthVerified } from "./bootstrap/write-gate.js";
 import { ensureWorkflowPreferencesCaptured } from "./planning-depth.js";
 import { MILESTONE_ID_RE } from "./milestone-ids.js";
+import { getWorkflowTransportSupportError, getRequiredWorkflowToolsForAutoUnit, } from "./workflow-mcp.js";
 import { PROJECT_RESEARCH_INFLIGHT_MARKER, } from "./project-research-policy.js";
 import { isWorkflowPrefsCaptured, resolveDeepProjectSetupState, } from "./deep-project-setup-policy.js";
 import { annotateBackgroundable } from "./delegation-policy.js";
@@ -35,6 +36,7 @@ import { probeGitConflictState } from "./git-conflict-state.js";
 import { runTurnGitAction } from "./git-service.js";
 import { parseUnitId } from "./unit-id.js";
 import { resolveExpectedArtifactPath } from "./auto-artifact-paths.js";
+import { checkCloseoutConsistencyGate, formatCloseoutConsistencyBlock, } from "./closeout-consistency-gate.js";
 function resolveExistingExpectedArtifact(unitType, unitId, basePath) {
     const artifactPath = resolveExpectedArtifactPath(unitType, unitId, basePath);
     return artifactPath && existsSync(artifactPath) ? artifactPath : null;
@@ -466,11 +468,18 @@ export const DISPATCH_RULES = [
     },
     {
         name: "run-uat (post-completion)",
-        match: async ({ state, mid, basePath, prefs }) => {
+        match: async ({ state, mid, basePath, prefs, sessionProvider, sessionAuthMode, activeTools, sessionBaseUrl }) => {
             const needsRunUat = await checkNeedsRunUat(basePath, mid, state, prefs);
             if (!needsRunUat)
                 return null;
             const { sliceId, uatType } = needsRunUat;
+            // Transport preflight: verify required MCP tools are actually connected
+            // before consuming a retry attempt. Fixes tool-starved sessions burning
+            // all MAX_UAT_ATTEMPTS before stopping (#477).
+            const transportError = getWorkflowTransportSupportError(sessionProvider, getRequiredWorkflowToolsForAutoUnit("run-uat"), { projectRoot: basePath, surface: "auto-mode", unitType: "run-uat", authMode: sessionAuthMode, baseUrl: sessionBaseUrl, activeTools });
+            if (transportError) {
+                return { action: "stop", reason: transportError, level: "warning" };
+            }
             // Cap run-uat dispatch attempts to prevent infinite replay (#3624).
             // Check before incrementing so an exhausted counter cannot create a
             // no-progress skip loop that starves later dispatch rules.
@@ -1355,6 +1364,16 @@ export const DISPATCH_RULES = [
                         prompt: await buildCompleteMilestonePrompt(mid, midTitle, basePath),
                     };
                 }
+                if (milestone) {
+                    const closeoutGate = checkCloseoutConsistencyGate(mid, { refreshFromDisk: true });
+                    if (!closeoutGate.ok) {
+                        return {
+                            action: "stop",
+                            reason: formatCloseoutConsistencyBlock(closeoutGate),
+                            level: "warning",
+                        };
+                    }
+                }
             }
             return {
                 action: "stop",

package/dist/resources/extensions/gsd/auto-model-selection.js CHANGED Viewed

@@ -63,6 +63,32 @@ const TOOL_BASELINE = new WeakMap();
 export function clearToolBaseline(pi) {
     TOOL_BASELINE.delete(pi);
 }
+/**
+ * Return the union of the pre-dispatch baseline tool set and the current live
+ * active tools, or just the live tools when no baseline has been recorded yet.
+ *
+ * Use this instead of `pi.getActiveTools()` anywhere you need the full tool
+ * surface for a preflight/routing check that runs BEFORE `selectAndApplyModel`
+ * restores the baseline — e.g. in `runDispatch` and `decideNextUnit`.
+ *
+ * The union is intentional:
+ *   - Baseline covers tools that a prior unit's per-provider narrowing (hook
+ *     overrides, Groq 128-tool cap, etc.) has removed from the live set.
+ *     Those tools will be restored by `selectAndApplyModel` before dispatch, so
+ *     dropping them from the preflight check would be a false negative.
+ *   - Live set covers tools connected after the baseline was first captured
+ *     (e.g. MCP servers attached mid-session or after a paused resume).
+ *     Without the live merge, a stale baseline permanently hides newly
+ *     connected MCP tools and prevents transport-preflight from clearing on
+ *     resume (#477 follow-up).
+ */
+export function getToolBaselineSnapshot(pi) {
+    const live = typeof pi.getActiveTools === "function" ? pi.getActiveTools() : [];
+    const baseline = TOOL_BASELINE.get(pi);
+    if (baseline === undefined)
+        return live;
+    return [...new Set([...baseline, ...live])];
+}
 /**
  * Models eligible for the pre-dispatch policy gate. Prefer registry-available
  * models; when that list is empty (common after worktree resume before registry

package/dist/resources/extensions/gsd/auto-prompts.js CHANGED Viewed

@@ -31,6 +31,7 @@ import { hasBrowserRequiredText } from "./browser-evidence.js";
 import { debugLog } from "./debug-logger.js";
 import { buildSkillActivationBlock, buildSkillDiscoveryVars } from "./skill-activation.js";
 import { findMilestoneIds } from "./milestone-ids.js";
+import { buildRunUatPresentationForType, RUN_UAT_TOOL_PRESENTATION_PLAN_ID } from "./tool-presentation-plan.js";
 export { buildSkillActivationBlock, buildSkillDiscoveryVars };
 // ─── Preamble Cap ─────────────────────────────────────────────────────────────
 /**
@@ -2939,6 +2940,7 @@ export async function buildRunUatPrompt(mid, sliceId, uatPath, uatContent, base)
     emitPromptContextTelemetry("run-uat", contextTelemetry, inlinedContext);
     const uatResultPath = join(base, relSliceFile(base, mid, sliceId, "ASSESSMENT"));
     const uatType = resolveEffectiveUatType(uatContent);
+    const canonicalPresentation = JSON.stringify(buildRunUatPresentationForType(uatType), null, 2);
     return loadPrompt("run-uat", {
         workingDirectory: base,
         milestoneId: mid,
@@ -2946,6 +2948,8 @@ export async function buildRunUatPrompt(mid, sliceId, uatPath, uatContent, base)
         uatPath,
         uatResultPath,
         uatType,
+        toolPresentationPlanId: RUN_UAT_TOOL_PRESENTATION_PLAN_ID,
+        canonicalPresentation,
         inlinedContext,
         skillActivation: buildSkillActivationBlock({
             base,

package/dist/resources/extensions/gsd/auto-recovery.js CHANGED Viewed

@@ -32,6 +32,7 @@ import { isGsdWorktreePath } from "./worktree-root.js";
 import { resolveCanonicalMilestoneRoot } from "./worktree-manager.js";
 import { hasImplementationArtifacts } from "./milestone-implementation-evidence.js";
 import { loadAllCaptures, loadPendingCaptures } from "./captures.js";
+import { checkCloseoutConsistencyGate } from "./closeout-consistency-gate.js";
 // Re-export so existing consumers of auto-recovery.ts keep working.
 export { resolveExpectedArtifactPath, diagnoseExpectedArtifact };
 export { classifyMilestoneSummaryContent, } from "./milestone-summary-classifier.js";
@@ -571,10 +572,8 @@ export function verifyExpectedArtifact(unitType, unitId, base) {
             return false;
         const { milestone: mid } = parseUnitId(unitId);
         if (mid && isDbAvailable()) {
-            const dbMilestone = getMilestone(mid);
-            if (!dbMilestone)
-                return false;
-            if (!isClosedStatus(dbMilestone.status) && summaryOutcome !== "success")
+            const closeoutGate = checkCloseoutConsistencyGate(mid, { refreshFromDisk: true });
+            if (!closeoutGate.ok)
                 return false;
         }
         if (hasImplementationArtifacts(base, mid) === "absent")

package/dist/resources/extensions/gsd/auto-timers.js CHANGED Viewed

@@ -104,6 +104,14 @@ export function startUnitSupervision(sctx) {
     const softTimeoutMs = supervisionTimeouts.softTimeoutMs;
     const idleTimeoutMs = supervisionTimeouts.idleTimeoutMs;
     const hardTimeoutMs = supervisionTimeouts.hardTimeoutMs;
+    // A single hung tool gets its own short budget, NOT the general idle window:
+    // a long-but-progressing session is not idle, but a tool stuck for minutes
+    // is. Falls back to the idle window only if misconfigured to zero. The
+    // hung-tool budget is intentionally not scaled by task estimate — a stuck
+    // tool call is stuck regardless of how long the overall task should take.
+    const stalledToolTimeoutMs = (supervisor.stalled_tool_timeout_minutes ?? 0) > 0
+        ? supervisor.stalled_tool_timeout_minutes * 60 * 1000
+        : idleTimeoutMs;
     // ── 1. Soft timeout warning ──
     s.wrapupWarningHandle = setTimeout(() => {
         s.wrapupWarningHandle = null;
@@ -144,10 +152,12 @@ export function startUnitSupervision(sctx) {
             const runtime = readUnitRuntimeRecord(s.basePath, unitType, unitId);
             if (!runtime)
                 return;
-            if (Date.now() - runtime.lastProgressAt < idleTimeoutMs)
-                return;
-            // Agent has tool calls currently executing — not idle, just waiting.
-            // But only suppress recovery if the tool started recently.
+            // In-flight tool handling runs on its own dedicated hung-tool budget,
+            // independent of the general idle gate below, so a genuinely stuck tool
+            // is caught in minutes instead of waiting out the (typically much longer)
+            // idle window (#2527, follow-up). A tool actively executing within budget
+            // is real progress, so refreshing lastProgressAt here also keeps the idle
+            // gate from firing during legitimate long-running tool calls.
             let stalledToolDetected = false;
             if (getInFlightToolCount() > 0) {
                 // User-interactive tools (ask_user_questions, secure_env_collect) block
@@ -161,21 +171,25 @@ export function startUnitSupervision(sctx) {
                 }
                 const oldestStart = getOldestInFlightToolStart();
                 const toolAgeMs = Date.now() - oldestStart;
-                if (toolAgeMs < idleTimeoutMs) {
+                if (toolAgeMs < stalledToolTimeoutMs) {
                     writeUnitRuntimeRecord(s.basePath, unitType, unitId, s.currentUnit.startedAt, {
                         lastProgressAt: Date.now(),
                         lastProgressKind: "tool-in-flight",
                     });
                     return;
                 }
-                // Tool has been in-flight longer than idle timeout — treat as hung.
-                // Clear the stale entries so subsequent ticks don't re-detect them,
-                // and set the flag so the filesystem-activity check below does not
-                // override the stall verdict (#2527).
+                // Tool has been in-flight longer than the hung-tool budget — treat as
+                // hung. Clear the stale entries so subsequent ticks don't re-detect
+                // them, and set the flag so the idle gate and filesystem-activity check
+                // below do not override the stall verdict (#2527).
                 stalledToolDetected = true;
                 clearInFlightTools();
-                ctx.ui.notify(`Stalled tool detected: a tool has been in-flight for ${Math.round(toolAgeMs / 60000)}min. Treating as hung — attempting idle recovery.`, "warning");
+                ctx.ui.notify(`Stalled tool detected: a tool has been in-flight for ${Math.round(toolAgeMs / 60000)}min (budget ${Math.round(stalledToolTimeoutMs / 60000)}min). Treating as hung — attempting idle recovery.`, "warning");
             }
+            // No hung tool — apply the general idle gate. A unit that has made
+            // meaningful progress within the idle window is not idle yet.
+            if (!stalledToolDetected && Date.now() - runtime.lastProgressAt < idleTimeoutMs)
+                return;
             // Check if the agent is producing work on disk.
             // Skip this when a stalled tool was just detected — filesystem changes
             // from earlier in the task should not override the stall verdict (#2527).

package/dist/resources/extensions/gsd/auto-unit-tool-scope.js CHANGED Viewed

@@ -1,57 +1,6 @@
 import { parseUnitId } from "./unit-id.js";
-import { RUN_UAT_WORKFLOW_TOOL_NAMES } from "./tool-presentation-plan.js";
-export const RUN_UAT_BROWSER_TOOL_NAMES = [
-    "browser_navigate",
-    "browser_click",
-    "browser_type",
-    "browser_fill_form",
-    "browser_click_ref",
-    "browser_fill_ref",
-    "browser_wait_for",
-    "browser_assert",
-    "browser_verify",
-    "browser_screenshot",
-    "browser_snapshot_refs",
-    "browser_find",
-    "browser_get_console_logs",
-    "browser_get_network_logs",
-    "browser_evaluate",
-    "browser_reload",
-    "browser_batch",
-    "browser_act",
-];
-export const AUTO_UNIT_SCOPED_TOOLS = {
-    "research-milestone": ["gsd_summary_save", "gsd_decision_save"],
-    "plan-milestone": ["gsd_plan_milestone", "gsd_decision_save", "gsd_requirement_update"],
-    "discuss-milestone": [
-        "gsd_summary_save",
-        "gsd_decision_save",
-        "gsd_requirement_save",
-        "gsd_requirement_update",
-        "gsd_plan_milestone",
-        "gsd_milestone_generate_id",
-    ],
-    "discuss-slice": ["gsd_summary_save", "gsd_decision_save"],
-    "validate-milestone": ["gsd_validate_milestone", "gsd_reassess_roadmap", "subagent"],
-    "complete-milestone": ["gsd_complete_milestone", "subagent"],
-    "research-slice": ["gsd_summary_save", "gsd_decision_save"],
-    "plan-slice": ["gsd_plan_slice", "gsd_plan_task", "gsd_decision_save"],
-    "refine-slice": ["gsd_plan_slice", "gsd_plan_task", "gsd_decision_save"],
-    "replan-slice": ["gsd_replan_slice", "gsd_plan_task", "gsd_decision_save"],
-    "complete-slice": ["gsd_slice_complete", "gsd_task_reopen", "gsd_replan_slice", "gsd_decision_save", "gsd_requirement_update", "subagent"],
-    "reassess-roadmap": ["gsd_reassess_roadmap"],
-    "execute-task": ["gsd_task_complete", "gsd_decision_save"],
-    "execute-task-simple": ["gsd_task_complete", "gsd_decision_save"],
-    "reactive-execute": ["gsd_task_complete", "gsd_decision_save"],
-    "run-uat": [...RUN_UAT_WORKFLOW_TOOL_NAMES, "subagent", ...RUN_UAT_BROWSER_TOOL_NAMES],
-    "gate-evaluate": ["gsd_save_gate_result"],
-    "rewrite-docs": ["gsd_summary_save", "gsd_decision_save"],
-    "workflow-preferences": ["gsd_summary_save"],
-    "discuss-project": ["gsd_summary_save", "gsd_decision_save", "gsd_requirement_save"],
-    "discuss-requirements": ["gsd_requirement_save", "gsd_summary_save"],
-    "research-decision": ["gsd_summary_save"],
-    "research-project": ["gsd_summary_save", "gsd_decision_save"],
-};
+import { AUTO_UNIT_SCOPED_TOOLS, getForbiddenGsdToolReason, } from "./unit-tool-contracts.js";
+export { AUTO_UNIT_SCOPED_TOOLS, RUN_UAT_BROWSER_TOOL_NAMES, } from "./unit-tool-contracts.js";
 const WORKFLOW_TOOL_ALIASES = {
     gsd_save_decision: "gsd_decision_save",
     gsd_update_requirement: "gsd_requirement_update",
@@ -88,6 +37,7 @@ const SCOPED_GSD_LIFECYCLE_TOOLS = new Set([
 ]
     .filter((tool) => tool.startsWith("gsd_"))
     .map(canonicalWorkflowToolName));
+export const GSD_PHASE_SCOPE_DISPLAY_REASON = "This GSD phase only allows its scoped workflow tools.";
 function stripMcpToolPrefix(toolName) {
     if (!toolName.startsWith("mcp__"))
         return toolName;
@@ -103,11 +53,18 @@ export function isWorkflowAliasTool(toolName) {
 }
 function hardBlockReason(unitType, what) {
     return [
-        `HARD BLOCK: unit "${unitType}" is constrained by auto-unit tool scope — ${what}.`,
+        `HARD BLOCK: Tool Contract failure for unit "${unitType}" — ${what}.`,
         "This is a mechanical phase-boundary gate. You MUST NOT proceed, retry the same call,",
         "or route around this block; the orchestrator owns phase transitions.",
     ].join(" ");
 }
+function hardBlock(unitType, what) {
+    return {
+        block: true,
+        reason: hardBlockReason(unitType, what),
+        displayReason: GSD_PHASE_SCOPE_DISPLAY_REASON,
+    };
+}
 function allowedGsdToolsForUnit(unitType) {
     return [...new Set((AUTO_UNIT_SCOPED_TOOLS[unitType] ?? [])
             .filter((tool) => tool.startsWith("gsd_"))
@@ -143,20 +100,14 @@ function shouldBlockTaskCompletionScope(unitType, unitId, toolName, input) {
         actualTask === expected.task) {
         return { block: false };
     }
-    return {
-        block: true,
-        reason: hardBlockReason(unitType, `gsd_task_complete may only complete the active task ${expected.milestone}/${expected.slice}/${expected.task}; requested ${actualMilestone}/${actualSlice}/${actualTask}`),
-    };
+    return hardBlock(unitType, `gsd_task_complete may only complete the active task ${expected.milestone}/${expected.slice}/${expected.task}; requested ${actualMilestone}/${actualSlice}/${actualTask}`);
 }
 export function shouldBlockAutoUnitToolCall(unitType, toolName, input, unitId) {
     const scopedTools = AUTO_UNIT_SCOPED_TOOLS[unitType];
     if (!scopedTools)
         return { block: false };
     if (isNativeWorkflowTool(toolName)) {
-        return {
-            block: true,
-            reason: hardBlockReason(unitType, "native Workflow is not permitted inside a dispatched GSD auto-mode unit"),
-        };
+        return hardBlock(unitType, "native Workflow is not permitted inside a dispatched GSD auto-mode unit");
     }
     const taskScope = shouldBlockTaskCompletionScope(unitType, unitId, toolName, input);
     if (taskScope.block)
@@ -167,8 +118,9 @@ export function shouldBlockAutoUnitToolCall(unitType, toolName, input, unitId) {
     const allowedTools = allowedGsdToolsForUnit(unitType);
     if (allowedTools.includes(canonicalTool))
         return { block: false };
-    return {
-        block: true,
-        reason: hardBlockReason(unitType, `GSD lifecycle tool "${canonicalTool}" is not permitted; allowed GSD tools: ${allowedTools.length > 0 ? allowedTools.join(", ") : "(none)"}`),
-    };
+    const forbiddenReason = getForbiddenGsdToolReason(unitType, canonicalTool);
+    if (forbiddenReason) {
+        return hardBlock(unitType, `GSD lifecycle tool "${canonicalTool}" is not permitted; ${forbiddenReason} Fix unit-tool-contracts.ts or the ${unitType} prompt.`);
+    }
+    return hardBlock(unitType, `GSD lifecycle tool "${canonicalTool}" is not permitted; allowed GSD tools: ${allowedTools.length > 0 ? allowedTools.join(", ") : "(none)"}`);
 }