gsd-pi 2.22.0 → 2.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/README.md +25 -1
  2. package/dist/cli.js +62 -4
  3. package/dist/headless.d.ts +21 -0
  4. package/dist/headless.js +346 -0
  5. package/dist/help-text.js +32 -0
  6. package/dist/mcp-server.d.ts +20 -3
  7. package/dist/mcp-server.js +21 -1
  8. package/dist/models-resolver.d.ts +32 -0
  9. package/dist/models-resolver.js +50 -0
  10. package/dist/resources/extensions/bg-shell/output-formatter.ts +36 -16
  11. package/dist/resources/extensions/bg-shell/process-manager.ts +6 -4
  12. package/dist/resources/extensions/bg-shell/types.ts +33 -1
  13. package/dist/resources/extensions/browser-tools/capture.ts +18 -16
  14. package/dist/resources/extensions/browser-tools/index.ts +20 -0
  15. package/dist/resources/extensions/browser-tools/tests/browser-tools-unit.test.cjs +25 -0
  16. package/dist/resources/extensions/browser-tools/tools/action-cache.ts +216 -0
  17. package/dist/resources/extensions/browser-tools/tools/codegen.ts +274 -0
  18. package/dist/resources/extensions/browser-tools/tools/device.ts +183 -0
  19. package/dist/resources/extensions/browser-tools/tools/extract.ts +229 -0
  20. package/dist/resources/extensions/browser-tools/tools/injection-detect.ts +221 -0
  21. package/dist/resources/extensions/browser-tools/tools/network-mock.ts +244 -0
  22. package/dist/resources/extensions/browser-tools/tools/pdf.ts +92 -0
  23. package/dist/resources/extensions/browser-tools/tools/state-persistence.ts +202 -0
  24. package/dist/resources/extensions/browser-tools/tools/visual-diff.ts +209 -0
  25. package/dist/resources/extensions/browser-tools/tools/zoom.ts +104 -0
  26. package/dist/resources/extensions/gsd/auto-dashboard.ts +2 -0
  27. package/dist/resources/extensions/gsd/auto-recovery.ts +10 -0
  28. package/dist/resources/extensions/gsd/auto.ts +437 -11
  29. package/dist/resources/extensions/gsd/captures.ts +49 -0
  30. package/dist/resources/extensions/gsd/commands.ts +20 -3
  31. package/dist/resources/extensions/gsd/dashboard-overlay.ts +16 -2
  32. package/dist/resources/extensions/gsd/diff-context.ts +73 -80
  33. package/dist/resources/extensions/gsd/doctor.ts +20 -1
  34. package/dist/resources/extensions/gsd/forensics.ts +95 -52
  35. package/dist/resources/extensions/gsd/guided-flow.ts +10 -5
  36. package/dist/resources/extensions/gsd/mcp-server.ts +33 -12
  37. package/dist/resources/extensions/gsd/post-unit-hooks.ts +2 -1
  38. package/dist/resources/extensions/gsd/prompts/execute-task.md +5 -0
  39. package/dist/resources/extensions/gsd/prompts/guided-discuss-milestone.md +104 -1
  40. package/dist/resources/extensions/gsd/prompts/plan-milestone.md +1 -0
  41. package/dist/resources/extensions/gsd/prompts/system.md +2 -1
  42. package/dist/resources/extensions/gsd/prompts/validate-milestone.md +91 -0
  43. package/dist/resources/extensions/gsd/roadmap-slices.ts +41 -1
  44. package/dist/resources/extensions/gsd/session-forensics.ts +36 -2
  45. package/dist/resources/extensions/gsd/templates/milestone-validation.md +62 -0
  46. package/dist/resources/extensions/gsd/tests/auto-lock-creation.test.ts +186 -0
  47. package/dist/resources/extensions/gsd/tests/auto-recovery.test.ts +64 -0
  48. package/dist/resources/extensions/gsd/tests/auto-skip-loop.test.ts +123 -0
  49. package/dist/resources/extensions/gsd/tests/doctor.test.ts +58 -0
  50. package/dist/resources/extensions/gsd/tests/in-flight-tool-tracking.test.ts +17 -6
  51. package/dist/resources/extensions/gsd/tests/integration/headless-command.ts +534 -0
  52. package/dist/resources/extensions/gsd/tests/roadmap-slices.test.ts +43 -1
  53. package/dist/resources/extensions/gsd/tests/triage-dispatch.test.ts +120 -0
  54. package/dist/resources/extensions/gsd/tests/triage-resolution.test.ts +203 -2
  55. package/dist/resources/extensions/gsd/tests/visualizer-overlay.test.ts +8 -3
  56. package/dist/resources/extensions/gsd/triage-resolution.ts +83 -0
  57. package/dist/resources/extensions/gsd/visualizer-overlay.ts +8 -1
  58. package/dist/resources/extensions/gsd/workspace-index.ts +34 -6
  59. package/package.json +1 -1
  60. package/packages/pi-coding-agent/dist/core/tools/bash-background.test.d.ts +10 -0
  61. package/packages/pi-coding-agent/dist/core/tools/bash-background.test.d.ts.map +1 -0
  62. package/packages/pi-coding-agent/dist/core/tools/bash-background.test.js +79 -0
  63. package/packages/pi-coding-agent/dist/core/tools/bash-background.test.js.map +1 -0
  64. package/packages/pi-coding-agent/dist/core/tools/bash.d.ts +18 -0
  65. package/packages/pi-coding-agent/dist/core/tools/bash.d.ts.map +1 -1
  66. package/packages/pi-coding-agent/dist/core/tools/bash.js +77 -1
  67. package/packages/pi-coding-agent/dist/core/tools/bash.js.map +1 -1
  68. package/packages/pi-coding-agent/dist/core/tools/index.d.ts +1 -1
  69. package/packages/pi-coding-agent/dist/core/tools/index.d.ts.map +1 -1
  70. package/packages/pi-coding-agent/dist/core/tools/index.js +1 -1
  71. package/packages/pi-coding-agent/dist/core/tools/index.js.map +1 -1
  72. package/packages/pi-coding-agent/dist/index.d.ts +1 -1
  73. package/packages/pi-coding-agent/dist/index.d.ts.map +1 -1
  74. package/packages/pi-coding-agent/dist/index.js +1 -1
  75. package/packages/pi-coding-agent/dist/index.js.map +1 -1
  76. package/packages/pi-coding-agent/src/core/tools/bash-background.test.ts +91 -0
  77. package/packages/pi-coding-agent/src/core/tools/bash.ts +83 -1
  78. package/packages/pi-coding-agent/src/core/tools/index.ts +1 -0
  79. package/packages/pi-coding-agent/src/index.ts +1 -0
  80. package/src/resources/extensions/bg-shell/output-formatter.ts +36 -16
  81. package/src/resources/extensions/bg-shell/process-manager.ts +6 -4
  82. package/src/resources/extensions/bg-shell/types.ts +33 -1
  83. package/src/resources/extensions/browser-tools/capture.ts +18 -16
  84. package/src/resources/extensions/browser-tools/index.ts +20 -0
  85. package/src/resources/extensions/browser-tools/tests/browser-tools-unit.test.cjs +25 -0
  86. package/src/resources/extensions/browser-tools/tools/action-cache.ts +216 -0
  87. package/src/resources/extensions/browser-tools/tools/codegen.ts +274 -0
  88. package/src/resources/extensions/browser-tools/tools/device.ts +183 -0
  89. package/src/resources/extensions/browser-tools/tools/extract.ts +229 -0
  90. package/src/resources/extensions/browser-tools/tools/injection-detect.ts +221 -0
  91. package/src/resources/extensions/browser-tools/tools/network-mock.ts +244 -0
  92. package/src/resources/extensions/browser-tools/tools/pdf.ts +92 -0
  93. package/src/resources/extensions/browser-tools/tools/state-persistence.ts +202 -0
  94. package/src/resources/extensions/browser-tools/tools/visual-diff.ts +209 -0
  95. package/src/resources/extensions/browser-tools/tools/zoom.ts +104 -0
  96. package/src/resources/extensions/gsd/auto-dashboard.ts +2 -0
  97. package/src/resources/extensions/gsd/auto-recovery.ts +10 -0
  98. package/src/resources/extensions/gsd/auto.ts +437 -11
  99. package/src/resources/extensions/gsd/captures.ts +49 -0
  100. package/src/resources/extensions/gsd/commands.ts +20 -3
  101. package/src/resources/extensions/gsd/dashboard-overlay.ts +16 -2
  102. package/src/resources/extensions/gsd/diff-context.ts +73 -80
  103. package/src/resources/extensions/gsd/doctor.ts +20 -1
  104. package/src/resources/extensions/gsd/forensics.ts +95 -52
  105. package/src/resources/extensions/gsd/guided-flow.ts +10 -5
  106. package/src/resources/extensions/gsd/mcp-server.ts +33 -12
  107. package/src/resources/extensions/gsd/post-unit-hooks.ts +2 -1
  108. package/src/resources/extensions/gsd/prompts/execute-task.md +5 -0
  109. package/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md +104 -1
  110. package/src/resources/extensions/gsd/prompts/plan-milestone.md +1 -0
  111. package/src/resources/extensions/gsd/prompts/system.md +2 -1
  112. package/src/resources/extensions/gsd/prompts/validate-milestone.md +91 -0
  113. package/src/resources/extensions/gsd/roadmap-slices.ts +41 -1
  114. package/src/resources/extensions/gsd/session-forensics.ts +36 -2
  115. package/src/resources/extensions/gsd/templates/milestone-validation.md +62 -0
  116. package/src/resources/extensions/gsd/tests/auto-lock-creation.test.ts +186 -0
  117. package/src/resources/extensions/gsd/tests/auto-recovery.test.ts +64 -0
  118. package/src/resources/extensions/gsd/tests/auto-skip-loop.test.ts +123 -0
  119. package/src/resources/extensions/gsd/tests/doctor.test.ts +58 -0
  120. package/src/resources/extensions/gsd/tests/in-flight-tool-tracking.test.ts +17 -6
  121. package/src/resources/extensions/gsd/tests/integration/headless-command.ts +534 -0
  122. package/src/resources/extensions/gsd/tests/roadmap-slices.test.ts +43 -1
  123. package/src/resources/extensions/gsd/tests/triage-dispatch.test.ts +120 -0
  124. package/src/resources/extensions/gsd/tests/triage-resolution.test.ts +203 -2
  125. package/src/resources/extensions/gsd/tests/visualizer-overlay.test.ts +8 -3
  126. package/src/resources/extensions/gsd/triage-resolution.ts +83 -0
  127. package/src/resources/extensions/gsd/visualizer-overlay.ts +8 -1
  128. package/src/resources/extensions/gsd/workspace-index.ts +34 -6
@@ -125,6 +125,18 @@ import {
125
125
  reconcileMergeState,
126
126
  } from "./auto-recovery.js";
127
127
  import { resolveDispatch, resetRewriteCircuitBreaker } from "./auto-dispatch.js";
128
+ import {
129
+ buildResearchSlicePrompt,
130
+ buildResearchMilestonePrompt,
131
+ buildPlanSlicePrompt,
132
+ buildPlanMilestonePrompt,
133
+ buildExecuteTaskPrompt,
134
+ buildCompleteSlicePrompt,
135
+ buildCompleteMilestonePrompt,
136
+ buildReassessRoadmapPrompt,
137
+ buildRunUatPrompt,
138
+ buildReplanSlicePrompt,
139
+ } from "./auto-prompts.js";
128
140
  import {
129
141
  type AutoDashboardData,
130
142
  updateProgressWidget as _updateProgressWidget,
@@ -211,6 +223,11 @@ const MAX_LIFETIME_DISPATCHES = 6;
211
223
  /** Tracks recovery attempt count per unit for backoff and diagnostics. */
212
224
  const unitRecoveryCount = new Map<string, number>();
213
225
 
226
+ /** Track consecutive skips per unit — catches infinite skip loops where deriveState
227
+ * keeps returning the same already-completed unit. Reset on any real dispatch. */
228
+ const unitConsecutiveSkips = new Map<string, number>();
229
+ const MAX_CONSECUTIVE_SKIPS = 3;
230
+
214
231
  /** Persisted completed-unit keys — survives restarts. Loaded from .gsd/completed-units.json. */
215
232
  const completedKeySet = new Set<string>();
216
233
 
@@ -297,6 +314,9 @@ let currentUnit: { type: string; id: string; startedAt: number } | null = null;
297
314
  /** Track dynamic routing decision for the current unit (for metrics) */
298
315
  let currentUnitRouting: { tier: string; modelDowngraded: boolean } | null = null;
299
316
 
317
+ /** Queue of quick-task captures awaiting dispatch after triage resolution */
318
+ let pendingQuickTasks: import("./captures.js").CaptureEntry[] = [];
319
+
300
320
  /**
301
321
  * Model captured at auto-mode start. Used to prevent model bleed between
302
322
  * concurrent GSD instances sharing the same global settings.json (#650).
@@ -334,8 +354,12 @@ let lastBaselineCharCount: number | undefined;
334
354
  /** SIGTERM handler registered while auto-mode is active — cleared on stop/pause. */
335
355
  let _sigtermHandler: (() => void) | null = null;
336
356
 
337
- /** Tool calls currently being executed — prevents false idle detection during long-running tools. */
338
- const inFlightTools = new Set<string>();
357
+ /**
358
+ * Tool calls currently being executed — prevents false idle detection during long-running tools.
359
+ * Maps toolCallId → start timestamp (ms) so the idle watchdog can detect tools that have been
360
+ * running suspiciously long (e.g., a Bash command hung because `&` kept stdout open).
361
+ */
362
+ const inFlightTools = new Map<string, number>();
339
363
 
340
364
  type BudgetAlertLevel = 0 | 75 | 90 | 100;
341
365
 
@@ -414,11 +438,11 @@ export function isAutoPaused(): boolean {
414
438
 
415
439
  /**
416
440
  * Mark a tool execution as in-flight. Called from index.ts on tool_execution_start.
417
- * Prevents the idle watchdog from declaring the agent idle while tools are executing.
441
+ * Records start time so the idle watchdog can detect tools hung longer than the idle timeout.
418
442
  */
419
443
  export function markToolStart(toolCallId: string): void {
420
444
  if (!active) return;
421
- inFlightTools.add(toolCallId);
445
+ inFlightTools.set(toolCallId, Date.now());
422
446
  }
423
447
 
424
448
  /**
@@ -428,6 +452,16 @@ export function markToolEnd(toolCallId: string): void {
428
452
  inFlightTools.delete(toolCallId);
429
453
  }
430
454
 
455
+ /**
456
+ * Returns the age (ms) of the oldest currently in-flight tool, or 0 if none.
457
+ * Exported for testing.
458
+ */
459
+ export function getOldestInFlightToolAgeMs(): number {
460
+ if (inFlightTools.size === 0) return 0;
461
+ const oldestStart = Math.min(...inFlightTools.values());
462
+ return Date.now() - oldestStart;
463
+ }
464
+
431
465
  /**
432
466
  * Return the base path to use for the auto.lock file.
433
467
  * Always uses the original project root (not the worktree) so that
@@ -621,6 +655,7 @@ export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI): Promi
621
655
  stepMode = false;
622
656
  unitDispatchCount.clear();
623
657
  unitRecoveryCount.clear();
658
+ unitConsecutiveSkips.clear();
624
659
  inFlightTools.clear();
625
660
  lastBudgetAlertLevel = 0;
626
661
  unitLifetimeDispatches.clear();
@@ -629,6 +664,7 @@ export async function stopAuto(ctx?: ExtensionContext, pi?: ExtensionAPI): Promi
629
664
  currentMilestoneId = null;
630
665
  originalBasePath = "";
631
666
  completedUnits = [];
667
+ pendingQuickTasks = [];
632
668
  clearSliceProgressCache();
633
669
  clearActivityLogState();
634
670
  resetProactiveHealing();
@@ -710,6 +746,7 @@ export async function startAuto(
710
746
  basePath = base;
711
747
  unitDispatchCount.clear();
712
748
  unitLifetimeDispatches.clear();
749
+ unitConsecutiveSkips.clear();
713
750
  // Re-initialize metrics in case ledger was lost during pause
714
751
  if (!getLedger()) initMetrics(base);
715
752
  // Ensure milestone ID is set on git service for integration branch resolution
@@ -782,6 +819,9 @@ export async function startAuto(
782
819
  pausedSessionFile = null;
783
820
  }
784
821
 
822
+ // Write lock on resume so cross-process status detection works (#723).
823
+ writeLock(lockBase(), "resuming", currentMilestoneId ?? "unknown", completedUnits.length);
824
+
785
825
  await dispatchNextUnit(ctx, pi);
786
826
  return;
787
827
  }
@@ -988,6 +1028,7 @@ export async function startAuto(
988
1028
  basePath = base;
989
1029
  unitDispatchCount.clear();
990
1030
  unitRecoveryCount.clear();
1031
+ unitConsecutiveSkips.clear();
991
1032
  lastBudgetAlertLevel = 0;
992
1033
  unitLifetimeDispatches.clear();
993
1034
  completedKeySet.clear();
@@ -998,6 +1039,7 @@ export async function startAuto(
998
1039
  autoStartTime = Date.now();
999
1040
  resourceSyncedAtOnStart = readResourceSyncedAt();
1000
1041
  completedUnits = [];
1042
+ pendingQuickTasks = [];
1001
1043
  currentUnit = null;
1002
1044
  currentMilestoneId = state.activeMilestone?.id ?? null;
1003
1045
  originalModelId = ctx.model?.id ?? null;
@@ -1116,6 +1158,11 @@ export async function startAuto(
1116
1158
  : "Will loop until milestone complete.";
1117
1159
  ctx.ui.notify(`${modeLabel} started. ${scopeMsg}`, "info");
1118
1160
 
1161
+ // Write initial lock file immediately so cross-process status detection
1162
+ // works even before the first unit is dispatched (#723).
1163
+ // The lock is updated with unit-specific info on each dispatch and cleared on stop.
1164
+ writeLock(lockBase(), "starting", currentMilestoneId ?? "unknown", 0);
1165
+
1119
1166
  // Secrets collection gate — collect pending secrets before first dispatch
1120
1167
  const mid = state.activeMilestone!.id;
1121
1168
  try {
@@ -1297,6 +1344,53 @@ export async function handleAgentEnd(
1297
1344
  }
1298
1345
  }
1299
1346
 
1347
+ // ── Post-triage: execute actionable resolutions (inject, replan, queue quick-tasks) ──
1348
+ // After a triage-captures unit completes, the LLM has classified captures and
1349
+ // updated CAPTURES.md. Now we execute those classifications: inject tasks into
1350
+ // the plan, write replan triggers, and queue quick-tasks for dispatch.
1351
+ if (currentUnit.type === "triage-captures") {
1352
+ try {
1353
+ const { executeTriageResolutions } = await import("./triage-resolution.js");
1354
+ const state = await deriveState(basePath);
1355
+ const mid = state.activeMilestone?.id;
1356
+ const sid = state.activeSlice?.id;
1357
+
1358
+ if (mid && sid) {
1359
+ const triageResult = executeTriageResolutions(basePath, mid, sid);
1360
+
1361
+ if (triageResult.injected > 0) {
1362
+ ctx.ui.notify(
1363
+ `Triage: injected ${triageResult.injected} task${triageResult.injected === 1 ? "" : "s"} into ${sid} plan.`,
1364
+ "info",
1365
+ );
1366
+ }
1367
+ if (triageResult.replanned > 0) {
1368
+ ctx.ui.notify(
1369
+ `Triage: replan trigger written for ${sid} — next dispatch will enter replanning.`,
1370
+ "info",
1371
+ );
1372
+ }
1373
+ if (triageResult.quickTasks.length > 0) {
1374
+ // Queue quick-tasks for dispatch. They'll be picked up by the
1375
+ // quick-task dispatch block below the triage check.
1376
+ for (const qt of triageResult.quickTasks) {
1377
+ pendingQuickTasks.push(qt);
1378
+ }
1379
+ ctx.ui.notify(
1380
+ `Triage: ${triageResult.quickTasks.length} quick-task${triageResult.quickTasks.length === 1 ? "" : "s"} queued for execution.`,
1381
+ "info",
1382
+ );
1383
+ }
1384
+ for (const action of triageResult.actions) {
1385
+ process.stderr.write(`gsd-triage: ${action}\n`);
1386
+ }
1387
+ }
1388
+ } catch (err) {
1389
+ // Non-fatal — triage resolution failure shouldn't block dispatch
1390
+ process.stderr.write(`gsd-triage: resolution execution failed: ${(err as Error).message}\n`);
1391
+ }
1392
+ }
1393
+
1300
1394
  // ── Path A fix: verify artifact and persist completion before re-entering dispatch ──
1301
1395
  // After doctor + rebuildState, check whether the just-completed unit actually
1302
1396
  // produced its expected artifact. If so, persist the completion key now so the
@@ -1521,7 +1615,7 @@ export async function handleAgentEnd(
1521
1615
  return;
1522
1616
  }
1523
1617
  const sessionFile = ctx.sessionManager.getSessionFile();
1524
- writeLock(basePath, triageUnitType, triageUnitId, completedUnits.length, sessionFile);
1618
+ writeLock(lockBase(), triageUnitType, triageUnitId, completedUnits.length, sessionFile);
1525
1619
 
1526
1620
  // Start unit timeout for triage (use same supervisor config as hooks)
1527
1621
  clearUnitTimeout();
@@ -1551,6 +1645,85 @@ export async function handleAgentEnd(
1551
1645
  }
1552
1646
  }
1553
1647
 
1648
+ // ── Quick-task dispatch: execute queued quick-tasks from triage resolution ──
1649
+ // Quick-tasks are self-contained one-off tasks that don't modify the plan.
1650
+ // They're queued during post-triage resolution and dispatched here one at a time.
1651
+ if (
1652
+ !stepMode &&
1653
+ pendingQuickTasks.length > 0 &&
1654
+ currentUnit &&
1655
+ currentUnit.type !== "quick-task"
1656
+ ) {
1657
+ try {
1658
+ const capture = pendingQuickTasks.shift()!;
1659
+ const { buildQuickTaskPrompt } = await import("./triage-resolution.js");
1660
+ const { markCaptureExecuted } = await import("./captures.js");
1661
+ const prompt = buildQuickTaskPrompt(capture);
1662
+
1663
+ ctx.ui.notify(
1664
+ `Executing quick-task: ${capture.id} — "${capture.text}"`,
1665
+ "info",
1666
+ );
1667
+
1668
+ // Close out previous unit metrics
1669
+ if (currentUnit) {
1670
+ const modelId = ctx.model?.id ?? "unknown";
1671
+ snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId);
1672
+ saveActivityLog(ctx, basePath, currentUnit.type, currentUnit.id);
1673
+ }
1674
+
1675
+ // Dispatch quick-task as a new unit
1676
+ const qtUnitType = "quick-task";
1677
+ const qtUnitId = `${currentMilestoneId}/${capture.id}`;
1678
+ const qtStartedAt = Date.now();
1679
+ currentUnit = { type: qtUnitType, id: qtUnitId, startedAt: qtStartedAt };
1680
+ writeUnitRuntimeRecord(basePath, qtUnitType, qtUnitId, qtStartedAt, {
1681
+ phase: "dispatched",
1682
+ wrapupWarningSent: false,
1683
+ timeoutAt: null,
1684
+ lastProgressAt: qtStartedAt,
1685
+ progressCount: 0,
1686
+ lastProgressKind: "dispatch",
1687
+ });
1688
+ const state = await deriveState(basePath);
1689
+ updateProgressWidget(ctx, qtUnitType, qtUnitId, state);
1690
+
1691
+ const result = await cmdCtx!.newSession();
1692
+ if (result.cancelled) {
1693
+ await stopAuto(ctx, pi);
1694
+ return;
1695
+ }
1696
+ const sessionFile = ctx.sessionManager.getSessionFile();
1697
+ writeLock(lockBase(), qtUnitType, qtUnitId, completedUnits.length, sessionFile);
1698
+
1699
+ // Mark capture as executed now that the unit is dispatched
1700
+ markCaptureExecuted(basePath, capture.id);
1701
+
1702
+ // Start unit timeout for quick-task
1703
+ clearUnitTimeout();
1704
+ const supervisor = resolveAutoSupervisorConfig();
1705
+ const qtTimeoutMs = (supervisor.hard_timeout_minutes ?? 30) * 60 * 1000;
1706
+ unitTimeoutHandle = setTimeout(async () => {
1707
+ unitTimeoutHandle = null;
1708
+ if (!active) return;
1709
+ ctx.ui.notify(
1710
+ `Quick-task ${capture.id} exceeded timeout. Pausing auto-mode.`,
1711
+ "warning",
1712
+ );
1713
+ await pauseAuto(ctx, pi);
1714
+ }, qtTimeoutMs);
1715
+
1716
+ if (!active) return;
1717
+ pi.sendMessage(
1718
+ { customType: "gsd-auto", content: prompt, display: verbose },
1719
+ { triggerTurn: true },
1720
+ );
1721
+ return; // handleAgentEnd will fire again when quick-task session completes
1722
+ } catch {
1723
+ // Non-fatal — proceed to normal dispatch
1724
+ }
1725
+ }
1726
+
1554
1727
  // In step mode, pause and show a wizard instead of immediately dispatching
1555
1728
  if (stepMode) {
1556
1729
  await showStepWizard(ctx, pi);
@@ -1788,6 +1961,7 @@ async function dispatchNextUnit(
1788
1961
  // Reset stuck detection for new milestone
1789
1962
  unitDispatchCount.clear();
1790
1963
  unitRecoveryCount.clear();
1964
+ unitConsecutiveSkips.clear();
1791
1965
  unitLifetimeDispatches.clear();
1792
1966
  // Clear completed-units.json for the finished milestone
1793
1967
  try {
@@ -2155,6 +2329,26 @@ async function dispatchNextUnit(
2155
2329
  // Cross-validate: does the expected artifact actually exist?
2156
2330
  const artifactExists = verifyExpectedArtifact(unitType, unitId, basePath);
2157
2331
  if (artifactExists) {
2332
+ // Guard against infinite skip loops: if deriveState keeps returning the
2333
+ // same completed unit, consecutive skips will trip this breaker. Evict the
2334
+ // key so the next dispatch forces full reconciliation instead of looping.
2335
+ const skipCount = (unitConsecutiveSkips.get(idempotencyKey) ?? 0) + 1;
2336
+ unitConsecutiveSkips.set(idempotencyKey, skipCount);
2337
+ if (skipCount > MAX_CONSECUTIVE_SKIPS) {
2338
+ unitConsecutiveSkips.delete(idempotencyKey);
2339
+ completedKeySet.delete(idempotencyKey);
2340
+ removePersistedKey(basePath, idempotencyKey);
2341
+ invalidateStateCache();
2342
+ ctx.ui.notify(
2343
+ `Skip loop detected: ${unitType} ${unitId} skipped ${skipCount} times without advancing. Evicting completion record and forcing reconciliation.`,
2344
+ "warning",
2345
+ );
2346
+ _skipDepth++;
2347
+ await new Promise(r => setTimeout(r, 50));
2348
+ await dispatchNextUnit(ctx, pi);
2349
+ _skipDepth = Math.max(0, _skipDepth - 1);
2350
+ return;
2351
+ }
2158
2352
  ctx.ui.notify(
2159
2353
  `Skipping ${unitType} ${unitId} — already completed in a prior session. Advancing.`,
2160
2354
  "info",
@@ -2184,6 +2378,24 @@ async function dispatchNextUnit(
2184
2378
  persistCompletedKey(basePath, idempotencyKey);
2185
2379
  completedKeySet.add(idempotencyKey);
2186
2380
  invalidateStateCache();
2381
+ // Same consecutive-skip guard as the idempotency path above.
2382
+ const skipCount2 = (unitConsecutiveSkips.get(idempotencyKey) ?? 0) + 1;
2383
+ unitConsecutiveSkips.set(idempotencyKey, skipCount2);
2384
+ if (skipCount2 > MAX_CONSECUTIVE_SKIPS) {
2385
+ unitConsecutiveSkips.delete(idempotencyKey);
2386
+ completedKeySet.delete(idempotencyKey);
2387
+ removePersistedKey(basePath, idempotencyKey);
2388
+ invalidateStateCache();
2389
+ ctx.ui.notify(
2390
+ `Skip loop detected: ${unitType} ${unitId} skipped ${skipCount2} times without advancing. Evicting completion record and forcing reconciliation.`,
2391
+ "warning",
2392
+ );
2393
+ _skipDepth++;
2394
+ await new Promise(r => setTimeout(r, 50));
2395
+ await dispatchNextUnit(ctx, pi);
2396
+ _skipDepth = Math.max(0, _skipDepth - 1);
2397
+ return;
2398
+ }
2187
2399
  ctx.ui.notify(
2188
2400
  `Skipping ${unitType} ${unitId} — artifact exists but completion key was missing. Repaired and advancing.`,
2189
2401
  "info",
@@ -2199,6 +2411,8 @@ async function dispatchNextUnit(
2199
2411
  // Pattern A→B→A→B would reset retryCount every time; this map catches it.
2200
2412
  const dispatchKey = `${unitType}/${unitId}`;
2201
2413
  const prevCount = unitDispatchCount.get(dispatchKey) ?? 0;
2414
+ // Real dispatch reached — clear the consecutive-skip counter for this unit.
2415
+ unitConsecutiveSkips.delete(dispatchKey);
2202
2416
 
2203
2417
  debugLog("dispatch-unit", {
2204
2418
  type: unitType,
@@ -2713,13 +2927,27 @@ async function dispatchNextUnit(
2713
2927
  if (Date.now() - runtime.lastProgressAt < idleTimeoutMs) return;
2714
2928
 
2715
2929
  // Agent has tool calls currently executing (await_job, long bash, etc.) —
2716
- // not idle, just waiting for tool completion.
2930
+ // not idle, just waiting for tool completion. But only suppress recovery
2931
+ // if the tool started recently. A tool in-flight for longer than the idle
2932
+ // timeout is likely stuck — e.g., `python -m http.server 8080 &` keeps the
2933
+ // shell's stdout/stderr open, causing the Bash tool to hang indefinitely.
2717
2934
  if (inFlightTools.size > 0) {
2718
- writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit.startedAt, {
2719
- lastProgressAt: Date.now(),
2720
- lastProgressKind: "tool-in-flight",
2721
- });
2722
- return;
2935
+ const oldestStart = Math.min(...inFlightTools.values());
2936
+ const toolAgeMs = Date.now() - oldestStart;
2937
+ if (toolAgeMs < idleTimeoutMs) {
2938
+ writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit.startedAt, {
2939
+ lastProgressAt: Date.now(),
2940
+ lastProgressKind: "tool-in-flight",
2941
+ });
2942
+ return;
2943
+ }
2944
+ // Oldest tool has been running >= idleTimeoutMs — treat as a stuck/hung
2945
+ // tool (e.g., background process holding stdout open). Fall through to
2946
+ // idle recovery without resetting the progress clock.
2947
+ ctx.ui.notify(
2948
+ `Stalled tool detected: a tool has been in-flight for ${Math.round(toolAgeMs / 60000)}min. Treating as hung — attempting idle recovery.`,
2949
+ "warning",
2950
+ );
2723
2951
  }
2724
2952
 
2725
2953
  // Before triggering recovery, check if the agent is actually producing
@@ -3144,6 +3372,14 @@ export {
3144
3372
  buildLoopRemediationSteps,
3145
3373
  } from "./auto-recovery.js";
3146
3374
 
3375
+ /**
3376
+ * Test-only: expose skip-loop state for unit tests.
3377
+ * Not part of the public API.
3378
+ */
3379
+ export function _getUnitConsecutiveSkips(): Map<string, number> { return unitConsecutiveSkips; }
3380
+ export function _resetUnitConsecutiveSkips(): void { unitConsecutiveSkips.clear(); }
3381
+ export { MAX_CONSECUTIVE_SKIPS };
3382
+
3147
3383
  /**
3148
3384
  * Dispatch a hook unit directly, bypassing normal pre-dispatch hooks.
3149
3385
  * Used for manual hook triggers via /gsd run-hook.
@@ -3168,6 +3404,7 @@ export async function dispatchHookUnit(
3168
3404
  autoStartTime = Date.now();
3169
3405
  currentUnit = null;
3170
3406
  completedUnits = [];
3407
+ pendingQuickTasks = [];
3171
3408
  }
3172
3409
 
3173
3410
  const hookUnitType = `hook/${hookName}`;
@@ -3248,3 +3485,192 @@ export async function dispatchHookUnit(
3248
3485
 
3249
3486
  return true;
3250
3487
  }
3488
+
3489
+
3490
+ // ─── Direct Phase Dispatch ────────────────────────────────────────────────────
3491
+
3492
+ export async function dispatchDirectPhase(
3493
+ ctx: ExtensionCommandContext,
3494
+ pi: ExtensionAPI,
3495
+ phase: string,
3496
+ base: string,
3497
+ ): Promise<void> {
3498
+ const state = await deriveState(base);
3499
+ const mid = state.activeMilestone?.id;
3500
+ const midTitle = state.activeMilestone?.title ?? "";
3501
+
3502
+ if (!mid) {
3503
+ ctx.ui.notify("Cannot dispatch: no active milestone.", "warning");
3504
+ return;
3505
+ }
3506
+
3507
+ const normalized = phase.toLowerCase();
3508
+ let unitType: string;
3509
+ let unitId: string;
3510
+ let prompt: string;
3511
+
3512
+ switch (normalized) {
3513
+ case "research":
3514
+ case "research-milestone":
3515
+ case "research-slice": {
3516
+ const isSlice = normalized === "research-slice" || (normalized === "research" && state.phase !== "pre-planning");
3517
+ if (isSlice) {
3518
+ const sid = state.activeSlice?.id;
3519
+ const sTitle = state.activeSlice?.title ?? "";
3520
+ if (!sid) {
3521
+ ctx.ui.notify("Cannot dispatch research-slice: no active slice.", "warning");
3522
+ return;
3523
+ }
3524
+ unitType = "research-slice";
3525
+ unitId = `${mid}/${sid}`;
3526
+ prompt = await buildResearchSlicePrompt(mid, midTitle, sid, sTitle, base);
3527
+ } else {
3528
+ unitType = "research-milestone";
3529
+ unitId = mid;
3530
+ prompt = await buildResearchMilestonePrompt(mid, midTitle, base);
3531
+ }
3532
+ break;
3533
+ }
3534
+
3535
+ case "plan":
3536
+ case "plan-milestone":
3537
+ case "plan-slice": {
3538
+ const isSlice = normalized === "plan-slice" || (normalized === "plan" && state.phase !== "pre-planning");
3539
+ if (isSlice) {
3540
+ const sid = state.activeSlice?.id;
3541
+ const sTitle = state.activeSlice?.title ?? "";
3542
+ if (!sid) {
3543
+ ctx.ui.notify("Cannot dispatch plan-slice: no active slice.", "warning");
3544
+ return;
3545
+ }
3546
+ unitType = "plan-slice";
3547
+ unitId = `${mid}/${sid}`;
3548
+ prompt = await buildPlanSlicePrompt(mid, midTitle, sid, sTitle, base);
3549
+ } else {
3550
+ unitType = "plan-milestone";
3551
+ unitId = mid;
3552
+ prompt = await buildPlanMilestonePrompt(mid, midTitle, base);
3553
+ }
3554
+ break;
3555
+ }
3556
+
3557
+ case "execute":
3558
+ case "execute-task": {
3559
+ const sid = state.activeSlice?.id;
3560
+ const sTitle = state.activeSlice?.title ?? "";
3561
+ const tid = state.activeTask?.id;
3562
+ const tTitle = state.activeTask?.title ?? "";
3563
+ if (!sid) {
3564
+ ctx.ui.notify("Cannot dispatch execute-task: no active slice.", "warning");
3565
+ return;
3566
+ }
3567
+ if (!tid) {
3568
+ ctx.ui.notify("Cannot dispatch execute-task: no active task.", "warning");
3569
+ return;
3570
+ }
3571
+ unitType = "execute-task";
3572
+ unitId = `${mid}/${sid}/${tid}`;
3573
+ prompt = await buildExecuteTaskPrompt(mid, sid, sTitle, tid, tTitle, base);
3574
+ break;
3575
+ }
3576
+
3577
+ case "complete":
3578
+ case "complete-slice":
3579
+ case "complete-milestone": {
3580
+ const isSlice = normalized === "complete-slice" || (normalized === "complete" && state.phase === "summarizing");
3581
+ if (isSlice) {
3582
+ const sid = state.activeSlice?.id;
3583
+ const sTitle = state.activeSlice?.title ?? "";
3584
+ if (!sid) {
3585
+ ctx.ui.notify("Cannot dispatch complete-slice: no active slice.", "warning");
3586
+ return;
3587
+ }
3588
+ unitType = "complete-slice";
3589
+ unitId = `${mid}/${sid}`;
3590
+ prompt = await buildCompleteSlicePrompt(mid, midTitle, sid, sTitle, base);
3591
+ } else {
3592
+ unitType = "complete-milestone";
3593
+ unitId = mid;
3594
+ prompt = await buildCompleteMilestonePrompt(mid, midTitle, base);
3595
+ }
3596
+ break;
3597
+ }
3598
+
3599
+ case "reassess":
3600
+ case "reassess-roadmap": {
3601
+ const roadmapFile = resolveMilestoneFile(base, mid, "ROADMAP");
3602
+ const roadmapContent = roadmapFile ? await loadFile(roadmapFile) : null;
3603
+ if (!roadmapContent) {
3604
+ ctx.ui.notify("Cannot dispatch reassess-roadmap: no roadmap found.", "warning");
3605
+ return;
3606
+ }
3607
+ const roadmap = parseRoadmap(roadmapContent);
3608
+ const completedSlices = roadmap.slices.filter(s => s.done);
3609
+ if (completedSlices.length === 0) {
3610
+ ctx.ui.notify("Cannot dispatch reassess-roadmap: no completed slices.", "warning");
3611
+ return;
3612
+ }
3613
+ const completedSliceId = completedSlices[completedSlices.length - 1].id;
3614
+ unitType = "reassess-roadmap";
3615
+ unitId = `${mid}/${completedSliceId}`;
3616
+ prompt = await buildReassessRoadmapPrompt(mid, midTitle, completedSliceId, base);
3617
+ break;
3618
+ }
3619
+
3620
+ case "uat":
3621
+ case "run-uat": {
3622
+ const sid = state.activeSlice?.id;
3623
+ if (!sid) {
3624
+ ctx.ui.notify("Cannot dispatch run-uat: no active slice.", "warning");
3625
+ return;
3626
+ }
3627
+ const uatFile = resolveSliceFile(base, mid, sid, "UAT");
3628
+ if (!uatFile) {
3629
+ ctx.ui.notify("Cannot dispatch run-uat: no UAT file found.", "warning");
3630
+ return;
3631
+ }
3632
+ const uatContent = await loadFile(uatFile);
3633
+ if (!uatContent) {
3634
+ ctx.ui.notify("Cannot dispatch run-uat: UAT file is empty.", "warning");
3635
+ return;
3636
+ }
3637
+ const uatPath = relSliceFile(base, mid, sid, "UAT");
3638
+ unitType = "run-uat";
3639
+ unitId = `${mid}/${sid}`;
3640
+ prompt = await buildRunUatPrompt(mid, sid, uatPath, uatContent, base);
3641
+ break;
3642
+ }
3643
+
3644
+ case "replan":
3645
+ case "replan-slice": {
3646
+ const sid = state.activeSlice?.id;
3647
+ const sTitle = state.activeSlice?.title ?? "";
3648
+ if (!sid) {
3649
+ ctx.ui.notify("Cannot dispatch replan-slice: no active slice.", "warning");
3650
+ return;
3651
+ }
3652
+ unitType = "replan-slice";
3653
+ unitId = `${mid}/${sid}`;
3654
+ prompt = await buildReplanSlicePrompt(mid, midTitle, sid, sTitle, base);
3655
+ break;
3656
+ }
3657
+
3658
+ default:
3659
+ ctx.ui.notify(
3660
+ `Unknown phase "${phase}". Valid phases: research, plan, execute, complete, reassess, uat, replan.`,
3661
+ "warning",
3662
+ );
3663
+ return;
3664
+ }
3665
+
3666
+ ctx.ui.notify(`Dispatching ${unitType} for ${unitId}...`, "info");
3667
+ const result = await ctx.newSession();
3668
+ if (result.cancelled) {
3669
+ ctx.ui.notify("Session creation cancelled.", "warning");
3670
+ return;
3671
+ }
3672
+ pi.sendMessage(
3673
+ { customType: "gsd-dispatch", content: prompt, display: false },
3674
+ { triggerTurn: true },
3675
+ );
3676
+ }
@@ -26,6 +26,7 @@ export interface CaptureEntry {
26
26
  resolution?: string;
27
27
  rationale?: string;
28
28
  resolvedAt?: string;
29
+ executed?: boolean;
29
30
  }
30
31
 
31
32
  export interface TriageResult {
@@ -211,6 +212,52 @@ export function markCaptureResolved(
211
212
  writeFileSync(filePath, updated, "utf-8");
212
213
  }
213
214
 
215
+ /**
216
+ * Mark a resolved capture as executed — its resolution action was carried out.
217
+ * Appends `**Executed:** <timestamp>` to the capture's section in CAPTURES.md.
218
+ */
219
+ export function markCaptureExecuted(basePath: string, captureId: string): void {
220
+ const filePath = resolveCapturesPath(basePath);
221
+ if (!existsSync(filePath)) return;
222
+
223
+ const content = readFileSync(filePath, "utf-8");
224
+ const executedAt = new Date().toISOString();
225
+
226
+ const sectionRegex = new RegExp(
227
+ `(### ${escapeRegex(captureId)}\\n(?:(?!### ).)*?)(?=### |$)`,
228
+ "s",
229
+ );
230
+ const match = sectionRegex.exec(content);
231
+ if (!match) return;
232
+
233
+ let section = match[1];
234
+
235
+ // Remove any existing Executed field (in case of re-execution)
236
+ section = section.replace(/\*\*Executed:\*\*\s*.+\n?/g, "");
237
+
238
+ // Append Executed timestamp
239
+ section = section.trimEnd() + "\n" + `**Executed:** ${executedAt}` + "\n";
240
+
241
+ const updated = content.replace(sectionRegex, section);
242
+ writeFileSync(filePath, updated, "utf-8");
243
+ }
244
+
245
+ /**
246
+ * Load resolved captures that have actionable classifications (inject, replan,
247
+ * quick-task) but have NOT yet been executed.
248
+ * These are captures whose resolutions need to be carried out.
249
+ */
250
+ export function loadActionableCaptures(basePath: string): CaptureEntry[] {
251
+ return loadAllCaptures(basePath).filter(
252
+ c =>
253
+ c.status === "resolved" &&
254
+ !c.executed &&
255
+ (c.classification === "inject" ||
256
+ c.classification === "replan" ||
257
+ c.classification === "quick-task"),
258
+ );
259
+ }
260
+
214
261
  // ─── Parser ───────────────────────────────────────────────────────────────────
215
262
 
216
263
  /**
@@ -235,6 +282,7 @@ function parseCapturesContent(content: string): CaptureEntry[] {
235
282
  const resolution = extractBoldField(body, "Resolution");
236
283
  const rationale = extractBoldField(body, "Rationale");
237
284
  const resolvedAt = extractBoldField(body, "Resolved");
285
+ const executedAt = extractBoldField(body, "Executed");
238
286
 
239
287
  if (!text || !timestamp) continue;
240
288
 
@@ -251,6 +299,7 @@ function parseCapturesContent(content: string): CaptureEntry[] {
251
299
  ...(resolution ? { resolution } : {}),
252
300
  ...(rationale ? { rationale } : {}),
253
301
  ...(resolvedAt ? { resolvedAt } : {}),
302
+ ...(executedAt ? { executed: true } : {}),
254
303
  });
255
304
  }
256
305