@opengsd/gsd-pi 1.1.1-dev.75048e7 → 1.1.1-dev.9f86580

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/dist/resources/.managed-resources-content-hash +1 -1
  2. package/dist/resources/extensions/browser-tools/engine/managed-gsd-browser.js +18 -2
  3. package/dist/resources/extensions/browser-tools/engine/selection.js +1 -1
  4. package/dist/resources/extensions/browser-tools/extension-manifest.json +1 -1
  5. package/dist/resources/extensions/browser-tools/index.js +29 -2
  6. package/dist/resources/extensions/browser-tools/web-app-detect.js +52 -0
  7. package/dist/resources/extensions/gsd/auto/phases.js +45 -3
  8. package/dist/resources/extensions/gsd/auto/session.js +2 -0
  9. package/dist/resources/extensions/gsd/auto-dispatch.js +10 -2
  10. package/dist/resources/extensions/gsd/auto-model-selection.js +26 -0
  11. package/dist/resources/extensions/gsd/auto-timers.js +24 -10
  12. package/dist/resources/extensions/gsd/auto.js +26 -4
  13. package/dist/resources/extensions/gsd/bootstrap/register-hooks.js +29 -21
  14. package/dist/resources/extensions/gsd/bootstrap/system-context.js +1 -1
  15. package/dist/resources/extensions/gsd/commands/handlers/auto.js +10 -0
  16. package/dist/resources/extensions/gsd/commands-mcp-status.js +1 -1
  17. package/dist/resources/extensions/gsd/config-overlay.js +1 -0
  18. package/dist/resources/extensions/gsd/context-masker.js +129 -5
  19. package/dist/resources/extensions/gsd/guided-flow.js +4 -1
  20. package/dist/resources/extensions/gsd/planner-handoff.js +98 -0
  21. package/dist/resources/extensions/gsd/preferences-models.js +1 -0
  22. package/dist/resources/extensions/gsd/prompts/plan-milestone.md +1 -1
  23. package/dist/resources/extensions/gsd/prompts/run-uat.md +2 -2
  24. package/dist/resources/extensions/gsd/prompts/system.md +1 -1
  25. package/dist/resources/extensions/gsd/skill-manifest.js +12 -0
  26. package/dist/resources/extensions/gsd/tool-contract.js +1 -1
  27. package/dist/resources/extensions/gsd/tool-presentation-plan.js +19 -2
  28. package/dist/resources/extensions/gsd/tools/complete-slice.js +28 -1
  29. package/dist/resources/extensions/gsd/tools/workflow-tool-executors.js +32 -4
  30. package/dist/resources/extensions/gsd/unit-tool-contracts.js +38 -14
  31. package/dist/resources/extensions/gsd/workflow-mcp.js +2 -3
  32. package/dist/resources/extensions/gsd/worktree-manager.js +26 -0
  33. package/dist/resources/extensions/gsd/worktree-reentry.js +96 -0
  34. package/dist/resources/extensions/shared/gsd-browser-cli.js +6 -0
  35. package/dist/web/standalone/.next/BUILD_ID +1 -1
  36. package/dist/web/standalone/.next/app-path-routes-manifest.json +8 -8
  37. package/dist/web/standalone/.next/build-manifest.json +2 -2
  38. package/dist/web/standalone/.next/prerender-manifest.json +3 -3
  39. package/dist/web/standalone/.next/server/app/_global-error.html +1 -1
  40. package/dist/web/standalone/.next/server/app/_global-error.rsc +1 -1
  41. package/dist/web/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
  42. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
  43. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
  44. package/dist/web/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
  45. package/dist/web/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
  46. package/dist/web/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  47. package/dist/web/standalone/.next/server/app/_not-found.html +1 -1
  48. package/dist/web/standalone/.next/server/app/_not-found.rsc +1 -1
  49. package/dist/web/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
  50. package/dist/web/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
  51. package/dist/web/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
  52. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
  53. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
  54. package/dist/web/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
  55. package/dist/web/standalone/.next/server/app/index.html +1 -1
  56. package/dist/web/standalone/.next/server/app/index.rsc +1 -1
  57. package/dist/web/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +1 -1
  58. package/dist/web/standalone/.next/server/app/index.segments/_full.segment.rsc +1 -1
  59. package/dist/web/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
  60. package/dist/web/standalone/.next/server/app/index.segments/_index.segment.rsc +1 -1
  61. package/dist/web/standalone/.next/server/app/index.segments/_tree.segment.rsc +1 -1
  62. package/dist/web/standalone/.next/server/app-paths-manifest.json +8 -8
  63. package/dist/web/standalone/.next/server/chunks/8357.js +1 -1
  64. package/dist/web/standalone/.next/server/middleware-build-manifest.js +1 -1
  65. package/dist/web/standalone/.next/server/pages/404.html +1 -1
  66. package/dist/web/standalone/.next/server/pages/500.html +1 -1
  67. package/dist/web/standalone/.next/server/server-reference-manifest.json +1 -1
  68. package/package.json +1 -1
  69. package/packages/cloud-mcp-gateway/package.json +2 -2
  70. package/packages/contracts/package.json +1 -1
  71. package/packages/daemon/package.json +4 -4
  72. package/packages/gsd-agent-core/package.json +5 -5
  73. package/packages/gsd-agent-modes/package.json +7 -7
  74. package/packages/mcp-server/package.json +3 -3
  75. package/packages/native/package.json +1 -1
  76. package/packages/pi-agent-core/package.json +1 -1
  77. package/packages/pi-ai/dist/models.generated.d.ts +158 -2
  78. package/packages/pi-ai/dist/models.generated.d.ts.map +1 -1
  79. package/packages/pi-ai/dist/models.generated.js +149 -9
  80. package/packages/pi-ai/dist/models.generated.js.map +1 -1
  81. package/packages/pi-ai/dist/providers/transform-messages.d.ts.map +1 -1
  82. package/packages/pi-ai/dist/providers/transform-messages.js +8 -1
  83. package/packages/pi-ai/dist/providers/transform-messages.js.map +1 -1
  84. package/packages/pi-ai/package.json +1 -1
  85. package/packages/pi-coding-agent/package.json +7 -7
  86. package/packages/pi-tui/package.json +1 -1
  87. package/packages/rpc-client/package.json +2 -2
  88. package/pkg/package.json +1 -1
  89. package/scripts/install/handoff.js +16 -3
  90. package/src/resources/extensions/browser-tools/engine/managed-gsd-browser.ts +21 -2
  91. package/src/resources/extensions/browser-tools/engine/selection.ts +1 -1
  92. package/src/resources/extensions/browser-tools/extension-manifest.json +1 -1
  93. package/src/resources/extensions/browser-tools/index.ts +36 -5
  94. package/src/resources/extensions/browser-tools/tests/browser-engine-selection.test.mjs +2 -2
  95. package/src/resources/extensions/browser-tools/tests/gsd-browser-launch-config.test.mjs +37 -0
  96. package/src/resources/extensions/browser-tools/tests/web-app-detect.test.mjs +68 -0
  97. package/src/resources/extensions/browser-tools/web-app-detect.ts +63 -0
  98. package/src/resources/extensions/gsd/auto/phases.ts +48 -6
  99. package/src/resources/extensions/gsd/auto/session.ts +2 -0
  100. package/src/resources/extensions/gsd/auto-dispatch.ts +34 -2
  101. package/src/resources/extensions/gsd/auto-model-selection.ts +26 -0
  102. package/src/resources/extensions/gsd/auto-timers.ts +25 -9
  103. package/src/resources/extensions/gsd/auto.ts +28 -4
  104. package/src/resources/extensions/gsd/bootstrap/register-hooks.ts +40 -21
  105. package/src/resources/extensions/gsd/bootstrap/system-context.ts +1 -1
  106. package/src/resources/extensions/gsd/commands/handlers/auto.ts +9 -0
  107. package/src/resources/extensions/gsd/commands-mcp-status.ts +1 -1
  108. package/src/resources/extensions/gsd/config-overlay.ts +1 -0
  109. package/src/resources/extensions/gsd/context-masker.ts +152 -5
  110. package/src/resources/extensions/gsd/guided-flow.ts +4 -1
  111. package/src/resources/extensions/gsd/planner-handoff.ts +149 -0
  112. package/src/resources/extensions/gsd/preferences-models.ts +1 -0
  113. package/src/resources/extensions/gsd/preferences-types.ts +8 -0
  114. package/src/resources/extensions/gsd/prompts/plan-milestone.md +1 -1
  115. package/src/resources/extensions/gsd/prompts/run-uat.md +2 -2
  116. package/src/resources/extensions/gsd/prompts/system.md +1 -1
  117. package/src/resources/extensions/gsd/skill-manifest.ts +12 -0
  118. package/src/resources/extensions/gsd/tests/auto-loop.test.ts +99 -0
  119. package/src/resources/extensions/gsd/tests/auto-model-selection-tool-poisoning.test.ts +66 -4
  120. package/src/resources/extensions/gsd/tests/auto-supervisor.test.mjs +4 -0
  121. package/src/resources/extensions/gsd/tests/bundled-skill-triggers.test.ts +9 -0
  122. package/src/resources/extensions/gsd/tests/complete-slice-verification-gate.test.ts +118 -0
  123. package/src/resources/extensions/gsd/tests/context-masker.test.ts +56 -1
  124. package/src/resources/extensions/gsd/tests/custom-engine-loop-integration.test.ts +1 -0
  125. package/src/resources/extensions/gsd/tests/dispatch-rule-coverage.test.ts +24 -0
  126. package/src/resources/extensions/gsd/tests/integration/run-uat.test.ts +1 -1
  127. package/src/resources/extensions/gsd/tests/interrupted-session-auto.test.ts +27 -0
  128. package/src/resources/extensions/gsd/tests/journal-integration.test.ts +1 -0
  129. package/src/resources/extensions/gsd/tests/mcp-project-config.test.ts +7 -1
  130. package/src/resources/extensions/gsd/tests/mcp-status.test.ts +1 -1
  131. package/src/resources/extensions/gsd/tests/planner-handoff.test.ts +100 -0
  132. package/src/resources/extensions/gsd/tests/prompt-contracts.test.ts +113 -1
  133. package/src/resources/extensions/gsd/tests/provider-switch-observer.test.ts +55 -0
  134. package/src/resources/extensions/gsd/tests/runtime-invariant-modules.test.ts +20 -0
  135. package/src/resources/extensions/gsd/tests/skill-manifest.test.ts +4 -3
  136. package/src/resources/extensions/gsd/tests/workflow-mcp.test.ts +77 -10
  137. package/src/resources/extensions/gsd/tests/workflow-tool-executors.test.ts +131 -2
  138. package/src/resources/extensions/gsd/tests/worktree-reentry.test.ts +102 -0
  139. package/src/resources/extensions/gsd/tool-contract.ts +1 -1
  140. package/src/resources/extensions/gsd/tool-presentation-plan.ts +21 -2
  141. package/src/resources/extensions/gsd/tools/complete-slice.ts +29 -1
  142. package/src/resources/extensions/gsd/tools/workflow-tool-executors.ts +46 -4
  143. package/src/resources/extensions/gsd/unit-tool-contracts.ts +38 -14
  144. package/src/resources/extensions/gsd/workflow-mcp.ts +2 -3
  145. package/src/resources/extensions/gsd/worktree-manager.ts +32 -0
  146. package/src/resources/extensions/gsd/worktree-reentry.ts +103 -0
  147. package/src/resources/extensions/shared/gsd-browser-cli.ts +6 -0
  148. /package/dist/web/standalone/.next/static/{h4TGni4xJzlZjGkxaT6uU → zzYMrKpPGfRQRxSFO32Jr}/_buildManifest.js +0 -0
  149. /package/dist/web/standalone/.next/static/{h4TGni4xJzlZjGkxaT6uU → zzYMrKpPGfRQRxSFO32Jr}/_ssgManifest.js +0 -0
@@ -35,6 +35,8 @@ import { detectStuck } from "./detect-stuck.js";
35
35
  import { runUnit } from "./run-unit.js";
36
36
  import { debugLog } from "../debug-logger.js";
37
37
  import { resolveWorktreeProjectRoot, normalizeWorktreePathForCompare } from "../worktree-root.js";
38
+ import { buildManualValidationGuidance } from "../worktree-manager.js";
39
+ import { relSliceFile } from "../paths.js";
38
40
  import { classifyProject } from "../detection.js";
39
41
  import { MergeConflictError } from "../git-service.js";
40
42
  import { setCurrentPhase, clearCurrentPhase } from "../../shared/gsd-phase-state.js";
@@ -83,6 +85,7 @@ import {
83
85
  supportsStructuredQuestions,
84
86
  } from "../workflow-mcp.js";
85
87
  import { prepareWorkflowMcpForProject } from "../workflow-mcp-auto-prep.js";
88
+ import { getToolBaselineSnapshot } from "../auto-model-selection.js";
86
89
  import type { DispatchAction } from "../auto-dispatch.js";
87
90
  import { resolveManifest } from "../unit-context-manifest.js";
88
91
  import { createWorktreeSafetyModule, type WorktreeSafetyResult } from "../worktree-safety.js";
@@ -397,6 +400,8 @@ async function validateSourceWriteWorktreeSafety(
397
400
 
398
401
  let consecutiveSessionTimeouts = 0;
399
402
  const MAX_SESSION_TIMEOUT_AUTO_RESUMES = 3;
403
+ /** Maximum zero-tool-call retries before pausing — context exhaustion is deterministic. */
404
+ const MAX_ZERO_TOOL_RETRIES = 1;
400
405
 
401
406
  export function resetSessionTimeoutState(): void {
402
407
  consecutiveSessionTimeouts = 0;
@@ -1446,7 +1451,13 @@ export async function runDispatch(
1446
1451
  const authMode = provider && typeof ctx.modelRegistry?.getProviderAuthMode === "function"
1447
1452
  ? ctx.modelRegistry.getProviderAuthMode(provider)
1448
1453
  : undefined;
1449
- const activeTools = typeof pi.getActiveTools === "function" ? pi.getActiveTools() : [];
1454
+ // Use the baseline snapshot rather than the live active-tool set: a prior
1455
+ // unit's per-provider narrowing (hook overrides, Groq 128-tool cap, etc.)
1456
+ // can strip required MCP tools from the live set even though
1457
+ // selectAndApplyModel will restore them before the unit is dispatched.
1458
+ // Checking a stale-narrowed set causes false transport-preflight warnings
1459
+ // that repeat on every /gsd auto resume (#477 follow-up).
1460
+ const activeTools = getToolBaselineSnapshot(pi);
1450
1461
  // Deep planning intentionally keeps human checkpoints in plain chat. In
1451
1462
  // Claude Code/local MCP transports, structured question requests can be
1452
1463
  // cancelled outside the normal chat flow, which made approval gates easy to
@@ -1470,6 +1481,9 @@ export async function runDispatch(
1470
1481
  sessionContextWindow: ctx.model?.contextWindow,
1471
1482
  sessionProvider: ctx.model?.provider,
1472
1483
  modelRegistry: ctx.modelRegistry as MinimalModelRegistry | undefined,
1484
+ activeTools,
1485
+ sessionBaseUrl: ctx.model?.baseUrl,
1486
+ sessionAuthMode: authMode,
1473
1487
  });
1474
1488
  if (isUnhandledPhaseWarning(dispatchResult)) {
1475
1489
  deps.invalidateAllCaches();
@@ -1493,6 +1507,9 @@ export async function runDispatch(
1493
1507
  sessionContextWindow: ctx.model?.contextWindow,
1494
1508
  sessionProvider: ctx.model?.provider,
1495
1509
  modelRegistry: ctx.modelRegistry as MinimalModelRegistry | undefined,
1510
+ activeTools,
1511
+ sessionBaseUrl: ctx.model?.baseUrl,
1512
+ sessionAuthMode: authMode,
1496
1513
  });
1497
1514
  }
1498
1515
 
@@ -2711,14 +2728,27 @@ export async function runUnitPhase(
2711
2728
  unitId,
2712
2729
  });
2713
2730
  } else {
2731
+ const zeroToolKey = `${unitType}/${unitId}`;
2732
+ const attempt = (s.zeroToolRetryCount.get(zeroToolKey) ?? 0) + 1;
2714
2733
  debugLog("runUnitPhase", {
2715
2734
  phase: "zero-tool-calls",
2716
2735
  unitType,
2717
2736
  unitId,
2737
+ attempt,
2718
2738
  warning: "Unit completed with 0 tool calls — likely context exhaustion, marking as failed",
2719
2739
  });
2740
+ if (attempt > MAX_ZERO_TOOL_RETRIES) {
2741
+ s.zeroToolRetryCount.delete(zeroToolKey);
2742
+ ctx.ui.notify(
2743
+ `${unitType} ${unitId} completed with 0 tool calls — context exhaustion, pausing auto-mode after ${MAX_ZERO_TOOL_RETRIES} retry.`,
2744
+ "error",
2745
+ );
2746
+ await deps.pauseAuto(ctx, pi);
2747
+ return { action: "break", reason: "zero-tool-calls-exhausted" };
2748
+ }
2749
+ s.zeroToolRetryCount.set(zeroToolKey, attempt);
2720
2750
  ctx.ui.notify(
2721
- `${unitType} ${unitId} completed with 0 tool calls — context exhaustion, will retry`,
2751
+ `${unitType} ${unitId} completed with 0 tool calls — context exhaustion, will retry (attempt ${attempt}/${MAX_ZERO_TOOL_RETRIES})`,
2722
2752
  "warning",
2723
2753
  );
2724
2754
  return {
@@ -2748,6 +2778,7 @@ export async function runUnitPhase(
2748
2778
  if (artifactVerified) {
2749
2779
  s.unitDispatchCount.delete(dispatchKey);
2750
2780
  s.unitRecoveryCount.delete(`${unitType}/${unitId}`);
2781
+ s.zeroToolRetryCount.delete(dispatchKey);
2751
2782
  }
2752
2783
 
2753
2784
  // Write phase handoff anchor after successful research/planning completion
@@ -2927,10 +2958,21 @@ export async function runFinalize(
2927
2958
  }
2928
2959
 
2929
2960
  if (pauseAfterUatDispatch) {
2930
- ctx.ui.notify(
2931
- "UAT requires human execution. Auto-mode will pause after this unit writes the result file.",
2932
- "info",
2933
- );
2961
+ const pauseMid = iterData.mid;
2962
+ const pauseSliceId = pauseMid && iterData.unitId.startsWith(`${pauseMid}/`)
2963
+ ? iterData.unitId.slice(pauseMid.length + 1)
2964
+ : undefined;
2965
+ const guidance = pauseMid
2966
+ ? buildManualValidationGuidance(s.basePath, pauseMid, {
2967
+ uatPath: pauseSliceId
2968
+ ? relSliceFile(s.basePath, pauseMid, pauseSliceId, "UAT")
2969
+ : undefined,
2970
+ })
2971
+ : null;
2972
+ const pauseMessage = guidance
2973
+ ? `UAT requires human execution. Auto-mode will pause after this unit writes the result file.\n\n${guidance}`
2974
+ : "UAT requires human execution. Auto-mode will pause after this unit writes the result file.";
2975
+ ctx.ui.notify(pauseMessage, "info");
2934
2976
  await deps.pauseAuto(ctx, pi);
2935
2977
  debugLog("autoLoop", { phase: "exit", reason: "uat-pause" });
2936
2978
  clearFinalizingUnit();
@@ -176,6 +176,7 @@ export class AutoSession {
176
176
  readonly verificationRetryCount = new Map<string, number>();
177
177
  readonly verificationRetryFailureHashes = new Map<string, string>();
178
178
  readonly exhaustedVerificationUnits = new Set<string>();
179
+ readonly zeroToolRetryCount = new Map<string, number>();
179
180
  pausedSessionFile: string | null = null;
180
181
  pausedUnitType: string | null = null;
181
182
  pausedUnitId: string | null = null;
@@ -362,6 +363,7 @@ export class AutoSession {
362
363
  this.verificationRetryCount.clear();
363
364
  this.verificationRetryFailureHashes.clear();
364
365
  this.exhaustedVerificationUnits.clear();
366
+ this.zeroToolRetryCount.clear();
365
367
  this.pausedSessionFile = null;
366
368
  this.pausedUnitType = null;
367
369
  this.pausedUnitId = null;
@@ -17,7 +17,17 @@ import type { GSDPreferences } from "./preferences.js";
17
17
  import type { UatType } from "./files.js";
18
18
  import type { MinimalModelRegistry } from "./context-budget.js";
19
19
  import { loadFile, extractUatType, loadActiveOverrides } from "./files.js";
20
- import { isDbAvailable, getMilestoneSlices, getPendingGates, markAllGatesOmitted, getMilestone, insertAssessment, setSliceSketchFlag, transaction, getAssessment } from "./gsd-db.js";
20
+ import {
21
+ isDbAvailable,
22
+ getMilestoneSlices,
23
+ getPendingGates,
24
+ markAllGatesOmitted,
25
+ getMilestone,
26
+ insertAssessment,
27
+ setSliceSketchFlag,
28
+ transaction,
29
+ getAssessment,
30
+ } from "./gsd-db.js";
21
31
  import { isClosedStatus } from "./status-guards.js";
22
32
  import { extractVerdict, isAcceptableUatVerdict } from "./verdict-parser.js";
23
33
 
@@ -76,6 +86,10 @@ import { isAutoActive } from "./auto.js";
76
86
  import { markDepthVerified } from "./bootstrap/write-gate.js";
77
87
  import { ensureWorkflowPreferencesCaptured } from "./planning-depth.js";
78
88
  import { MILESTONE_ID_RE } from "./milestone-ids.js";
89
+ import {
90
+ getWorkflowTransportSupportError,
91
+ getRequiredWorkflowToolsForAutoUnit,
92
+ } from "./workflow-mcp.js";
79
93
  import {
80
94
  PROJECT_RESEARCH_INFLIGHT_MARKER,
81
95
  } from "./project-research-policy.js";
@@ -136,6 +150,12 @@ export interface DispatchContext {
136
150
  modelRegistry?: MinimalModelRegistry;
137
151
  /** Session model provider, used for provider-specific effective context windows. */
138
152
  sessionProvider?: string;
153
+ /** Active tools in the current session, used for transport preflight checks. */
154
+ activeTools?: string[];
155
+ /** Session model base URL, used for transport preflight checks. */
156
+ sessionBaseUrl?: string;
157
+ /** Session model auth mode, used for transport preflight checks. */
158
+ sessionAuthMode?: "apiKey" | "oauth" | "externalCli" | "none";
139
159
  }
140
160
 
141
161
  function resolveExistingExpectedArtifact(
@@ -653,11 +673,23 @@ export const DISPATCH_RULES: DispatchRule[] = [
653
673
  },
654
674
  {
655
675
  name: "run-uat (post-completion)",
656
- match: async ({ state, mid, basePath, prefs }) => {
676
+ match: async ({ state, mid, basePath, prefs, sessionProvider, sessionAuthMode, activeTools, sessionBaseUrl }) => {
657
677
  const needsRunUat = await checkNeedsRunUat(basePath, mid, state, prefs);
658
678
  if (!needsRunUat) return null;
659
679
  const { sliceId, uatType } = needsRunUat;
660
680
 
681
+ // Transport preflight: verify required MCP tools are actually connected
682
+ // before consuming a retry attempt. Fixes tool-starved sessions burning
683
+ // all MAX_UAT_ATTEMPTS before stopping (#477).
684
+ const transportError = getWorkflowTransportSupportError(
685
+ sessionProvider,
686
+ getRequiredWorkflowToolsForAutoUnit("run-uat"),
687
+ { projectRoot: basePath, surface: "auto-mode", unitType: "run-uat", authMode: sessionAuthMode, baseUrl: sessionBaseUrl, activeTools },
688
+ );
689
+ if (transportError) {
690
+ return { action: "stop" as const, reason: transportError, level: "warning" as const };
691
+ }
692
+
661
693
  // Cap run-uat dispatch attempts to prevent infinite replay (#3624).
662
694
  // Check before incrementing so an exhausted counter cannot create a
663
695
  // no-progress skip loop that starves later dispatch rules.
@@ -90,6 +90,32 @@ export function clearToolBaseline(pi: ExtensionAPI | object): void {
90
90
  TOOL_BASELINE.delete(pi as unknown as object);
91
91
  }
92
92
 
93
+ /**
94
+ * Return the union of the pre-dispatch baseline tool set and the current live
95
+ * active tools, or just the live tools when no baseline has been recorded yet.
96
+ *
97
+ * Use this instead of `pi.getActiveTools()` anywhere you need the full tool
98
+ * surface for a preflight/routing check that runs BEFORE `selectAndApplyModel`
99
+ * restores the baseline — e.g. in `runDispatch` and `decideNextUnit`.
100
+ *
101
+ * The union is intentional:
102
+ * - Baseline covers tools that a prior unit's per-provider narrowing (hook
103
+ * overrides, Groq 128-tool cap, etc.) has removed from the live set.
104
+ * Those tools will be restored by `selectAndApplyModel` before dispatch, so
105
+ * dropping them from the preflight check would be a false negative.
106
+ * - Live set covers tools connected after the baseline was first captured
107
+ * (e.g. MCP servers attached mid-session or after a paused resume).
108
+ * Without the live merge, a stale baseline permanently hides newly
109
+ * connected MCP tools and prevents transport-preflight from clearing on
110
+ * resume (#477 follow-up).
111
+ */
112
+ export function getToolBaselineSnapshot(pi: ExtensionAPI): string[] {
113
+ const live = typeof pi.getActiveTools === "function" ? pi.getActiveTools() : [];
114
+ const baseline = TOOL_BASELINE.get(pi as unknown as object);
115
+ if (baseline === undefined) return live;
116
+ return [...new Set([...baseline, ...live])];
117
+ }
118
+
93
119
  /**
94
120
  * Models eligible for the pre-dispatch policy gate. Prefer registry-available
95
121
  * models; when that list is empty (common after worktree resume before registry
@@ -147,6 +147,15 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
147
147
  const softTimeoutMs = supervisionTimeouts.softTimeoutMs;
148
148
  const idleTimeoutMs = supervisionTimeouts.idleTimeoutMs;
149
149
  const hardTimeoutMs = supervisionTimeouts.hardTimeoutMs;
150
+ // A single hung tool gets its own short budget, NOT the general idle window:
151
+ // a long-but-progressing session is not idle, but a tool stuck for minutes
152
+ // is. Falls back to the idle window only if misconfigured to zero. The
153
+ // hung-tool budget is intentionally not scaled by task estimate — a stuck
154
+ // tool call is stuck regardless of how long the overall task should take.
155
+ const stalledToolTimeoutMs =
156
+ (supervisor.stalled_tool_timeout_minutes ?? 0) > 0
157
+ ? supervisor.stalled_tool_timeout_minutes! * 60 * 1000
158
+ : idleTimeoutMs;
150
159
 
151
160
  // ── 1. Soft timeout warning ──
152
161
  s.wrapupWarningHandle = setTimeout(() => {
@@ -189,10 +198,13 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
189
198
  };
190
199
  const runtime = readUnitRuntimeRecord(s.basePath, unitType, unitId);
191
200
  if (!runtime) return;
192
- if (Date.now() - runtime.lastProgressAt < idleTimeoutMs) return;
193
201
 
194
- // Agent has tool calls currently executing not idle, just waiting.
195
- // But only suppress recovery if the tool started recently.
202
+ // In-flight tool handling runs on its own dedicated hung-tool budget,
203
+ // independent of the general idle gate below, so a genuinely stuck tool
204
+ // is caught in minutes instead of waiting out the (typically much longer)
205
+ // idle window (#2527, follow-up). A tool actively executing within budget
206
+ // is real progress, so refreshing lastProgressAt here also keeps the idle
207
+ // gate from firing during legitimate long-running tool calls.
196
208
  let stalledToolDetected = false;
197
209
  if (getInFlightToolCount() > 0) {
198
210
  // User-interactive tools (ask_user_questions, secure_env_collect) block
@@ -206,25 +218,29 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
206
218
  }
207
219
  const oldestStart = getOldestInFlightToolStart()!;
208
220
  const toolAgeMs = Date.now() - oldestStart;
209
- if (toolAgeMs < idleTimeoutMs) {
221
+ if (toolAgeMs < stalledToolTimeoutMs) {
210
222
  writeUnitRuntimeRecord(s.basePath, unitType, unitId, s.currentUnit.startedAt, {
211
223
  lastProgressAt: Date.now(),
212
224
  lastProgressKind: "tool-in-flight",
213
225
  });
214
226
  return;
215
227
  }
216
- // Tool has been in-flight longer than idle timeout — treat as hung.
217
- // Clear the stale entries so subsequent ticks don't re-detect them,
218
- // and set the flag so the filesystem-activity check below does not
219
- // override the stall verdict (#2527).
228
+ // Tool has been in-flight longer than the hung-tool budget — treat as
229
+ // hung. Clear the stale entries so subsequent ticks don't re-detect
230
+ // them, and set the flag so the idle gate and filesystem-activity check
231
+ // below do not override the stall verdict (#2527).
220
232
  stalledToolDetected = true;
221
233
  clearInFlightTools();
222
234
  ctx.ui.notify(
223
- `Stalled tool detected: a tool has been in-flight for ${Math.round(toolAgeMs / 60000)}min. Treating as hung — attempting idle recovery.`,
235
+ `Stalled tool detected: a tool has been in-flight for ${Math.round(toolAgeMs / 60000)}min (budget ${Math.round(stalledToolTimeoutMs / 60000)}min). Treating as hung — attempting idle recovery.`,
224
236
  "warning",
225
237
  );
226
238
  }
227
239
 
240
+ // No hung tool — apply the general idle gate. A unit that has made
241
+ // meaningful progress within the idle window is not idle yet.
242
+ if (!stalledToolDetected && Date.now() - runtime.lastProgressAt < idleTimeoutMs) return;
243
+
228
244
  // Check if the agent is producing work on disk.
229
245
  // Skip this when a stalled tool was just detected — filesystem changes
230
246
  // from earlier in the task should not override the stall verdict (#2527).
@@ -107,7 +107,7 @@ import {
107
107
  } from "./auto-tool-tracking.js";
108
108
  import { closeoutUnit } from "./auto-unit-closeout.js";
109
109
  import { recoverTimedOutUnit } from "./auto-timeout-recovery.js";
110
- import { selectAndApplyModel, resolveModelId, clearToolBaseline } from "./auto-model-selection.js";
110
+ import { selectAndApplyModel, resolveModelId, clearToolBaseline, getToolBaselineSnapshot } from "./auto-model-selection.js";
111
111
  import { resetRoutingHistory, recordOutcome } from "./routing-history.js";
112
112
  import {
113
113
  checkPostUnitHooks,
@@ -542,8 +542,26 @@ function handlePausedSessionResumeRecovery(
542
542
  ): { skippedReplay: boolean } {
543
543
  if (!state.pausedSessionFile) return { skippedReplay: false };
544
544
 
545
- const pausedRecoveryUnitType = state.currentUnit?.type ?? state.pausedUnitType ?? "unknown";
546
- const pausedRecoveryUnitId = state.currentUnit?.id ?? state.pausedUnitId ?? "unknown";
545
+ const pausedRecoveryUnitType = state.currentUnit?.type ?? state.pausedUnitType ?? null;
546
+ const pausedRecoveryUnitId = state.currentUnit?.id ?? state.pausedUnitId ?? null;
547
+
548
+ // When the paused-session metadata never captured the unit identity (the
549
+ // pause happened between units, or the worker died before currentUnit was
550
+ // set), we have nothing to verify against and nothing correct to target. A
551
+ // replay synthesized with an "unknown" unit re-injects an unbounded,
552
+ // mis-identified tool-call blob into the fresh resume context — exactly the
553
+ // thrash that turns one stuck unit into several. Disk state has already been
554
+ // rebuilt (rebuildState + doctor) before this runs, so skip the replay and
555
+ // let the normal dispatcher recompute the next unit from disk.
556
+ if (!pausedRecoveryUnitType || !pausedRecoveryUnitId) {
557
+ state.pausedSessionFile = null;
558
+ state.pausedUnitType = null;
559
+ state.pausedUnitId = null;
560
+ state.pendingCrashRecovery = null;
561
+ notify("Paused session had no recorded unit identity. Skipping tool-call replay and resuming from disk state.");
562
+ return { skippedReplay: true };
563
+ }
564
+
547
565
  const completedPausedUnit = verifyExpectedArtifact(
548
566
  pausedRecoveryUnitType,
549
567
  pausedRecoveryUnitId,
@@ -2154,7 +2172,10 @@ export function createWiredDispatchAdapter(
2154
2172
  sessionProvider && typeof ctx.modelRegistry?.getProviderAuthMode === "function"
2155
2173
  ? ctx.modelRegistry.getProviderAuthMode(sessionProvider)
2156
2174
  : undefined;
2157
- const activeTools = typeof pi.getActiveTools === "function" ? pi.getActiveTools() : [];
2175
+ // Use baseline snapshot same reason as phases.ts:runDispatch: the live
2176
+ // active set may be narrowed by the prior unit before selectAndApplyModel
2177
+ // restores it, causing false transport-preflight failures (#477 follow-up).
2178
+ const activeTools = getToolBaselineSnapshot(pi);
2158
2179
  // Mirrors runDispatch: deep-planning keeps approval gates in plain chat
2159
2180
  // because structured questions can be cancelled outside the chat turn on
2160
2181
  // some transports.
@@ -2201,6 +2222,9 @@ export function createWiredDispatchAdapter(
2201
2222
  sessionContextWindow,
2202
2223
  sessionProvider,
2203
2224
  modelRegistry,
2225
+ activeTools,
2226
+ sessionAuthMode: authMode,
2227
+ sessionBaseUrl: ctx.model?.baseUrl,
2204
2228
  });
2205
2229
 
2206
2230
  if (action.action === "stop") {
@@ -259,7 +259,19 @@ export function buildRunUatGsdToolSet(
259
259
  ...RUN_UAT_BROWSER_TOOL_NAMES,
260
260
  ],
261
261
  );
262
- return [...new Set(scoped)];
262
+ const resolved = [...new Set(scoped)];
263
+
264
+ const unresolved = RUN_UAT_WORKFLOW_TOOL_NAMES.filter(
265
+ (tool) => !resolved.some((name) => name === tool || (name.startsWith("mcp__") && name.endsWith(`__${tool}`))),
266
+ );
267
+ if (unresolved.length > 0) {
268
+ safetyLogWarning(
269
+ "bootstrap",
270
+ `buildRunUatGsdToolSet: required run-uat workflow tool(s) not found in active/registered surface: ${unresolved.join(", ")}. Session may lack gsd-workflow MCP connection.`,
271
+ );
272
+ }
273
+
274
+ return resolved;
263
275
  }
264
276
 
265
277
  export function buildMinimalGsdWorkflowToolSet(
@@ -577,6 +589,17 @@ export function registerHooks(
577
589
  if (isAutoActive() || preserveCloseoutSurface) {
578
590
  ctx.ui.setWidget("gsd-health", undefined);
579
591
  }
592
+ // Cold start after /quit relaunches with cwd at the project root. When
593
+ // auto-mode is neither active nor paused (its own resume path re-enters the
594
+ // worktree with a lease check — auto.ts:3032), proactively chdir back into
595
+ // the active milestone's worktree so subsequent work isn't stranded at the
596
+ // root. Best-effort and a no-op when already inside a worktree.
597
+ if (!isAutoActive() && !isAutoPaused() && !preserveCloseoutSurface) {
598
+ try {
599
+ const { reenterActiveWorktreeIfNeeded } = await import("../worktree-reentry.js");
600
+ await reenterActiveWorktreeIfNeeded(basePath);
601
+ } catch { /* non-fatal */ }
602
+ }
580
603
  });
581
604
 
582
605
  pi.on("session_switch", async (_event, ctx) => {
@@ -1293,17 +1316,25 @@ export function registerHooks(
1293
1316
  if (isAutoActive()) {
1294
1317
  try {
1295
1318
  const { loadEffectiveGSDPreferences } = await import("../preferences.js");
1319
+ const {
1320
+ createObservationMask,
1321
+ createResponsesInputObservationMask,
1322
+ truncateContextResultMessages,
1323
+ truncateResponsesInputResultItems,
1324
+ } = await import("../context-masker.js");
1296
1325
  const prefs = loadEffectiveGSDPreferences();
1297
1326
  const cmConfig = prefs?.preferences.context_management;
1298
1327
 
1299
1328
  // Observation masking: replace old tool results with placeholders
1300
1329
  if (cmConfig?.observation_masking !== false) {
1301
1330
  const keepTurns = cmConfig?.observation_mask_turns ?? 8;
1302
- const { createObservationMask } = await import("../context-masker.js");
1303
- const mask = createObservationMask(keepTurns);
1304
1331
  const messages = payload.messages;
1305
1332
  if (Array.isArray(messages)) {
1306
- payload.messages = mask(messages);
1333
+ payload.messages = createObservationMask(keepTurns)(messages);
1334
+ }
1335
+ const input = payload.input;
1336
+ if (Array.isArray(input)) {
1337
+ payload.input = createResponsesInputObservationMask(keepTurns)(input);
1307
1338
  }
1308
1339
  }
1309
1340
 
@@ -1313,23 +1344,11 @@ export function registerHooks(
1313
1344
  const maxChars = cmConfig?.tool_result_max_chars ?? 800;
1314
1345
  const msgs = payload.messages;
1315
1346
  if (Array.isArray(msgs)) {
1316
- payload.messages = msgs.map((msg: Record<string, unknown>) => {
1317
- // Match toolResult messages (role: "toolResult", content is array of content blocks)
1318
- if (msg?.role === "toolResult" && Array.isArray(msg.content)) {
1319
- const blocks = msg.content as Array<Record<string, unknown>>;
1320
- const totalLen = blocks.reduce((sum: number, b) => sum + (typeof b.text === "string" ? b.text.length : 0), 0);
1321
- if (totalLen > maxChars) {
1322
- const truncated = blocks.map(b => {
1323
- if (typeof b.text === "string" && b.text.length > maxChars) {
1324
- return { ...b, text: b.text.slice(0, maxChars) + "\n…[truncated]" };
1325
- }
1326
- return b;
1327
- });
1328
- return { ...msg, content: truncated };
1329
- }
1330
- }
1331
- return msg;
1332
- });
1347
+ payload.messages = truncateContextResultMessages(msgs as any, maxChars);
1348
+ }
1349
+ const input = payload.input;
1350
+ if (Array.isArray(input)) {
1351
+ payload.input = truncateResponsesInputResultItems(input as any, maxChars);
1333
1352
  }
1334
1353
  } catch { /* non-fatal */ }
1335
1354
  }
@@ -61,7 +61,7 @@ export const BUNDLED_SKILL_TRIGGERS: Array<{ trigger: string; skill: string }> =
61
61
  { trigger: "Core Web Vitals — fix LCP, CLS, INP; layout shifts; page experience optimization", skill: "core-web-vitals" },
62
62
  { trigger: "GitHub Actions CI/CD — write, run, and debug workflow files; live syntax and run monitoring", skill: "github-workflows" },
63
63
  { trigger: "Comprehensive web quality audit — performance, accessibility, SEO, and best-practices (Lighthouse-style)", skill: "web-quality-audit" },
64
- { trigger: "gsd-browser UAT default browser MCP/CLI for real UI verification, screenshots, assertions, console/network diagnostics", skill: "gsd-browser" },
64
+ { trigger: "gsd-browser opt-in and External MCP UAT screenshots, assertions, console/network diagnostics", skill: "gsd-browser" },
65
65
  { trigger: "Browser automation — open sites, fill forms, click, screenshot, scrape, or test web apps programmatically", skill: "agent-browser" },
66
66
  { trigger: "Review UI code for Web Interface Guidelines compliance — UX, design, and accessibility patterns", skill: "web-design-guidelines" },
67
67
  { trigger: "UI/UX patterns reference — animations, CSS, typography, prefetching, icons (file:line findings)", skill: "userinterface-wiki" },
@@ -209,6 +209,15 @@ export async function handleAutoCommand(trimmed: string, ctx: ExtensionCommandCo
209
209
  if (trimmed === "") {
210
210
  if (!(await guardRemoteSession(ctx, pi))) return true;
211
211
  const basePath = projectRoot();
212
+ // Cold start after /quit lands at the project root, not the worktree. If the
213
+ // active milestone has a live worktree, chdir back into it now so the agent
214
+ // doesn't have to search for it. Best-effort; resolves to a no-op otherwise.
215
+ try {
216
+ const { reenterActiveWorktreeIfNeeded } = await import("../../worktree-reentry.js");
217
+ await reenterActiveWorktreeIfNeeded(basePath, {
218
+ notify: (message) => ctx.ui.notify(message, "info"),
219
+ });
220
+ } catch { /* non-fatal */ }
212
221
  const { hasGsdBootstrapArtifacts } = await import("../../detection.js");
213
222
  const { gsdRoot } = await import("../../paths.js");
214
223
  if (!hasGsdBootstrapArtifacts(gsdRoot(basePath))) {
@@ -73,7 +73,7 @@ export function formatMcpInitResult(
73
73
  `Config: ${configPath}`,
74
74
  "",
75
75
  "MCP-capable clients can now load the GSD workflow and gsd-browser MCP servers from this folder.",
76
- "Pi Providers use the managed gsd-browser engine directly; this project config is for External MCP Clients.",
76
+ "Pi Providers use built-in browser tools directly; this project config is for External MCP Clients.",
77
77
  "Restart or reconnect any client that already has this project open.",
78
78
  ].join("\n");
79
79
  }
@@ -139,6 +139,7 @@ function collectConfigSections(): ConfigSection[] {
139
139
  if (sup.model) supRows.push({ label: "Model", value: sup.model });
140
140
  supRows.push({ label: "Soft timeout", value: `${sup.soft_timeout_minutes}m` });
141
141
  supRows.push({ label: "Idle timeout", value: `${sup.idle_timeout_minutes}m` });
142
+ supRows.push({ label: "Stalled tool timeout", value: `${sup.stalled_tool_timeout_minutes}m` });
142
143
  supRows.push({ label: "Hard timeout", value: `${sup.hard_timeout_minutes}m` });
143
144
  sections.push({ title: "Auto Supervisor", rows: supRows });
144
145
  }