gsd-pi 2.76.0-dev.97807402 → 2.76.0-dev.97f5583d9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/dist/resources/extensions/gsd/auto/phases.js +28 -1
  2. package/dist/resources/extensions/gsd/auto/session.js +12 -0
  3. package/dist/resources/extensions/gsd/auto-dispatch.js +16 -3
  4. package/dist/resources/extensions/gsd/auto-post-unit.js +24 -1
  5. package/dist/resources/extensions/gsd/auto-prompts.js +14 -0
  6. package/dist/resources/extensions/gsd/auto-worktree.js +21 -5
  7. package/dist/resources/extensions/gsd/auto.js +42 -10
  8. package/dist/resources/extensions/gsd/bootstrap/register-hooks.js +11 -1
  9. package/dist/resources/extensions/gsd/bootstrap/write-gate.js +22 -1
  10. package/dist/resources/extensions/gsd/clean-root-preflight.js +93 -0
  11. package/dist/resources/extensions/gsd/safety/evidence-collector.js +96 -0
  12. package/dist/resources/extensions/gsd/safety/file-change-validator.js +3 -1
  13. package/dist/resources/extensions/gsd/safety/safety-harness.js +1 -1
  14. package/dist/resources/extensions/gsd/uok/plan-v2.js +20 -3
  15. package/dist/tsconfig.extensions.tsbuildinfo +1 -1
  16. package/dist/web/standalone/.next/BUILD_ID +1 -1
  17. package/dist/web/standalone/.next/app-path-routes-manifest.json +9 -9
  18. package/dist/web/standalone/.next/build-manifest.json +2 -2
  19. package/dist/web/standalone/.next/prerender-manifest.json +3 -3
  20. package/dist/web/standalone/.next/server/app/_global-error.html +1 -1
  21. package/dist/web/standalone/.next/server/app/_global-error.rsc +1 -1
  22. package/dist/web/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
  23. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
  24. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
  25. package/dist/web/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
  26. package/dist/web/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
  27. package/dist/web/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  28. package/dist/web/standalone/.next/server/app/_not-found.html +1 -1
  29. package/dist/web/standalone/.next/server/app/_not-found.rsc +1 -1
  30. package/dist/web/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
  31. package/dist/web/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
  32. package/dist/web/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
  33. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
  34. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
  35. package/dist/web/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
  36. package/dist/web/standalone/.next/server/app/index.html +1 -1
  37. package/dist/web/standalone/.next/server/app/index.rsc +1 -1
  38. package/dist/web/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +1 -1
  39. package/dist/web/standalone/.next/server/app/index.segments/_full.segment.rsc +1 -1
  40. package/dist/web/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
  41. package/dist/web/standalone/.next/server/app/index.segments/_index.segment.rsc +1 -1
  42. package/dist/web/standalone/.next/server/app/index.segments/_tree.segment.rsc +1 -1
  43. package/dist/web/standalone/.next/server/app-paths-manifest.json +9 -9
  44. package/dist/web/standalone/.next/server/middleware-build-manifest.js +1 -1
  45. package/dist/web/standalone/.next/server/middleware-manifest.json +5 -5
  46. package/dist/web/standalone/.next/server/pages/404.html +1 -1
  47. package/dist/web/standalone/.next/server/pages/500.html +1 -1
  48. package/dist/web/standalone/.next/server/server-reference-manifest.json +1 -1
  49. package/package.json +1 -1
  50. package/packages/mcp-server/dist/server.d.ts +7 -0
  51. package/packages/mcp-server/dist/server.d.ts.map +1 -1
  52. package/packages/mcp-server/dist/server.js +23 -3
  53. package/packages/mcp-server/dist/server.js.map +1 -1
  54. package/packages/mcp-server/src/mcp-server.test.ts +30 -0
  55. package/packages/mcp-server/src/server.ts +43 -9
  56. package/packages/mcp-server/tsconfig.tsbuildinfo +1 -1
  57. package/packages/pi-ai/dist/providers/anthropic-auth.test.js +1 -1
  58. package/packages/pi-ai/dist/providers/anthropic-auth.test.js.map +1 -1
  59. package/packages/pi-ai/dist/providers/anthropic-shared.d.ts.map +1 -1
  60. package/packages/pi-ai/dist/providers/anthropic-shared.js +25 -4
  61. package/packages/pi-ai/dist/providers/anthropic-shared.js.map +1 -1
  62. package/packages/pi-ai/dist/providers/anthropic.d.ts.map +1 -1
  63. package/packages/pi-ai/dist/providers/anthropic.js +8 -3
  64. package/packages/pi-ai/dist/providers/anthropic.js.map +1 -1
  65. package/packages/pi-ai/dist/providers/minimax-tool-name.test.d.ts +2 -0
  66. package/packages/pi-ai/dist/providers/minimax-tool-name.test.d.ts.map +1 -0
  67. package/packages/pi-ai/dist/providers/minimax-tool-name.test.js +80 -0
  68. package/packages/pi-ai/dist/providers/minimax-tool-name.test.js.map +1 -0
  69. package/packages/pi-ai/src/providers/anthropic-auth.test.ts +1 -1
  70. package/packages/pi-ai/src/providers/anthropic-shared.ts +23 -4
  71. package/packages/pi-ai/src/providers/anthropic.ts +9 -3
  72. package/packages/pi-ai/src/providers/minimax-tool-name.test.ts +98 -0
  73. package/packages/pi-ai/tsconfig.tsbuildinfo +1 -1
  74. package/src/resources/extensions/gsd/auto/loop-deps.ts +13 -0
  75. package/src/resources/extensions/gsd/auto/phases.ts +52 -1
  76. package/src/resources/extensions/gsd/auto/session.ts +22 -0
  77. package/src/resources/extensions/gsd/auto-dispatch.ts +16 -3
  78. package/src/resources/extensions/gsd/auto-post-unit.ts +28 -1
  79. package/src/resources/extensions/gsd/auto-prompts.ts +28 -1
  80. package/src/resources/extensions/gsd/auto-worktree.ts +28 -11
  81. package/src/resources/extensions/gsd/auto.ts +46 -10
  82. package/src/resources/extensions/gsd/bootstrap/register-hooks.ts +11 -1
  83. package/src/resources/extensions/gsd/bootstrap/write-gate.ts +22 -1
  84. package/src/resources/extensions/gsd/clean-root-preflight.ts +111 -0
  85. package/src/resources/extensions/gsd/safety/evidence-collector.ts +119 -0
  86. package/src/resources/extensions/gsd/safety/file-change-validator.ts +3 -1
  87. package/src/resources/extensions/gsd/safety/safety-harness.ts +3 -0
  88. package/src/resources/extensions/gsd/tests/auto-loop.test.ts +3 -1
  89. package/src/resources/extensions/gsd/tests/auto-paused-session-validation.test.ts +12 -0
  90. package/src/resources/extensions/gsd/tests/clean-root-preflight.test.ts +186 -0
  91. package/src/resources/extensions/gsd/tests/custom-engine-loop-integration.test.ts +2 -0
  92. package/src/resources/extensions/gsd/tests/double-merge-guard.test.ts +1 -1
  93. package/src/resources/extensions/gsd/tests/journal-integration.test.ts +2 -0
  94. package/src/resources/extensions/gsd/tests/pre-exec-gate-loop.test.ts +272 -0
  95. package/src/resources/extensions/gsd/tests/safety-harness-false-positives.test.ts +205 -0
  96. package/src/resources/extensions/gsd/tests/uok-plan-v2-wiring.test.ts +23 -0
  97. package/src/resources/extensions/gsd/uok/plan-v2.ts +26 -3
  98. package/src/resources/extensions/gsd/workflow-logger.ts +2 -1
  99. /package/dist/web/standalone/.next/static/{pI48IF3dgfs0CBrYi2bh_ → lLdDRDspgYzfz0bJAmUSz}/_buildManifest.js +0 -0
  100. /package/dist/web/standalone/.next/static/{pI48IF3dgfs0CBrYi2bh_ → lLdDRDspgYzfz0bJAmUSz}/_ssgManifest.js +0 -0
@@ -22,6 +22,7 @@ import type { CmuxLogLevel } from "../../cmux/index.js";
22
22
  import type { JournalEntry } from "../journal.js";
23
23
  import type { MergeReconcileResult } from "../auto-recovery.js";
24
24
  import type { UokTurnObserver } from "../uok/contracts.js";
25
+ import type { PreflightResult } from "../clean-root-preflight.js";
25
26
 
26
27
  /**
27
28
  * Dependencies injected by the caller (auto.ts startAuto) so autoLoop
@@ -122,6 +123,18 @@ export interface LoopDeps {
122
123
  ) => string | null;
123
124
  reconcileMergeState: (basePath: string, ctx: ExtensionContext) => MergeReconcileResult;
124
125
 
126
+ // Clean-root preflight gate (#2909)
127
+ preflightCleanRoot: (
128
+ basePath: string,
129
+ milestoneId: string,
130
+ notify: (message: string, level: "info" | "warning" | "error") => void,
131
+ ) => PreflightResult;
132
+ postflightPopStash: (
133
+ basePath: string,
134
+ milestoneId: string,
135
+ notify: (message: string, level: "info" | "warning" | "error") => void,
136
+ ) => void;
137
+
125
138
  // Budget/context/secrets
126
139
  getLedger: () => unknown;
127
140
  getProjectTotals: (units: unknown) => { cost: number };
@@ -54,7 +54,8 @@ import type { MinimalModelRegistry } from "../context-budget.js";
54
54
  import { ensurePlanV2Graph } from "../uok/plan-v2.js";
55
55
  import { resolveUokFlags } from "../uok/flags.js";
56
56
  import { UokGateRunner } from "../uok/gate-runner.js";
57
- import { resetEvidence } from "../safety/evidence-collector.js";
57
+ import { resetEvidence, loadEvidenceFromDisk } from "../safety/evidence-collector.js";
58
+ import { parseUnitId } from "../unit-id.js";
58
59
  import { createCheckpoint, cleanupCheckpoint, rollbackToCheckpoint } from "../safety/git-checkpoint.js";
59
60
  import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js";
60
61
  import {
@@ -545,6 +546,12 @@ export async function runPreDispatch(
545
546
  loopState.stuckRecoveryAttempts = 0;
546
547
 
547
548
  // Worktree lifecycle on milestone transition — merge current, enter next
549
+ // #2909: preflight — warn + stash dirty working tree before merge
550
+ const preflightTransition = deps.preflightCleanRoot(
551
+ s.originalBasePath || s.basePath,
552
+ s.currentMilestoneId!,
553
+ ctx.ui.notify.bind(ctx.ui),
554
+ );
548
555
  try {
549
556
  deps.resolver.mergeAndExit(s.currentMilestoneId!, ctx.ui);
550
557
  } catch (mergeErr) {
@@ -566,6 +573,14 @@ export async function runPreDispatch(
566
573
  await deps.stopAuto(ctx, pi, `Merge error on milestone ${s.currentMilestoneId}: ${String(mergeErr)}`);
567
574
  return { action: "break", reason: "merge-failed" };
568
575
  }
576
+ // #2909: postflight — restore stashed changes after successful merge
577
+ if (preflightTransition.stashPushed) {
578
+ deps.postflightPopStash(
579
+ s.originalBasePath || s.basePath,
580
+ s.currentMilestoneId!,
581
+ ctx.ui.notify.bind(ctx.ui),
582
+ );
583
+ }
569
584
 
570
585
  // PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
571
586
 
@@ -644,6 +659,12 @@ export async function runPreDispatch(
644
659
  if (incomplete.length === 0 && state.registry.length > 0) {
645
660
  // All milestones complete — merge milestone branch before stopping
646
661
  if (s.currentMilestoneId) {
662
+ // #2909: preflight — warn + stash dirty working tree before merge
663
+ const preflightAllComplete = deps.preflightCleanRoot(
664
+ s.originalBasePath || s.basePath,
665
+ s.currentMilestoneId,
666
+ ctx.ui.notify.bind(ctx.ui),
667
+ );
647
668
  try {
648
669
  deps.resolver.mergeAndExit(s.currentMilestoneId, ctx.ui);
649
670
  // Prevent stopAuto from attempting the same merge (#2645)
@@ -665,6 +686,14 @@ export async function runPreDispatch(
665
686
  await deps.stopAuto(ctx, pi, `Merge error on milestone ${s.currentMilestoneId}: ${String(mergeErr)}`);
666
687
  return { action: "break", reason: "merge-failed" };
667
688
  }
689
+ // #2909: postflight — restore stashed changes after successful merge
690
+ if (preflightAllComplete.stashPushed) {
691
+ deps.postflightPopStash(
692
+ s.originalBasePath || s.basePath,
693
+ s.currentMilestoneId,
694
+ ctx.ui.notify.bind(ctx.ui),
695
+ );
696
+ }
668
697
 
669
698
  // PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
670
699
  }
@@ -758,6 +787,12 @@ export async function runPreDispatch(
758
787
  if (state.phase === "complete") {
759
788
  // Milestone merge on complete (before closeout so branch state is clean)
760
789
  if (s.currentMilestoneId) {
790
+ // #2909: preflight — warn + stash dirty working tree before merge
791
+ const preflightComplete = deps.preflightCleanRoot(
792
+ s.originalBasePath || s.basePath,
793
+ s.currentMilestoneId,
794
+ ctx.ui.notify.bind(ctx.ui),
795
+ );
761
796
  try {
762
797
  deps.resolver.mergeAndExit(s.currentMilestoneId, ctx.ui);
763
798
  // Prevent stopAuto from attempting the same merge (#2645)
@@ -779,6 +814,14 @@ export async function runPreDispatch(
779
814
  await deps.stopAuto(ctx, pi, `Merge error on milestone ${s.currentMilestoneId}: ${String(mergeErr)}`);
780
815
  return { action: "break", reason: "merge-failed" };
781
816
  }
817
+ // #2909: postflight — restore stashed changes after successful merge
818
+ if (preflightComplete.stashPushed) {
819
+ deps.postflightPopStash(
820
+ s.originalBasePath || s.basePath,
821
+ s.currentMilestoneId,
822
+ ctx.ui.notify.bind(ctx.ui),
823
+ );
824
+ }
782
825
 
783
826
  // PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
784
827
  }
@@ -1385,6 +1428,14 @@ export async function runUnitPhase(
1385
1428
  );
1386
1429
  if (safetyConfig.enabled && safetyConfig.evidence_collection) {
1387
1430
  resetEvidence();
1431
+ // Restore persisted evidence so session-restart resumes don't produce
1432
+ // false-positive "no bash calls" warnings (Bug #4385).
1433
+ if (s.basePath && unitType === "execute-task") {
1434
+ const { milestone: eMid, slice: eSid, task: eTid } = parseUnitId(unitId);
1435
+ if (eMid && eSid && eTid) {
1436
+ loadEvidenceFromDisk(s.basePath, eMid, eSid, eTid);
1437
+ }
1438
+ }
1388
1439
  }
1389
1440
  // Only checkpoint code-executing units (not lifecycle/planning units)
1390
1441
  if (safetyConfig.enabled && safetyConfig.checkpoints && unitType === "execute-task") {
@@ -64,6 +64,15 @@ export interface SidecarItem {
64
64
  captureId?: string;
65
65
  }
66
66
 
67
+ export interface PreExecFailure {
68
+ /** Milestone/slice that failed (e.g. "M001/S02"). */
69
+ unitId: string;
70
+ /** Verbatim blocking check strings from the failed gate run. */
71
+ blockingFindings: string[];
72
+ /** Condensed gate verdict excerpt for context (status + rationale). */
73
+ verdictExcerpt: string;
74
+ }
75
+
67
76
  // ─── Constants ───────────────────────────────────────────────────────────────
68
77
 
69
78
  export const MAX_UNIT_DISPATCHES = 3;
@@ -139,6 +148,18 @@ export class AutoSession {
139
148
  // ── Sidecar queue ─────────────────────────────────────────────────────
140
149
  sidecarQueue: SidecarItem[] = [];
141
150
 
151
+ // ── Pre-exec gate failure context (#4551) ───────────────────────────
152
+ /**
153
+ * Persisted when a pre-execution gate fails on a plan-slice or refine-slice
154
+ * unit. The planning → plan-slice dispatch rule reads this field and injects
155
+ * the failure details into the next re-dispatch prompt so the LLM can fix the
156
+ * specific issues instead of producing an identical plan.
157
+ *
158
+ * Cleared after it has been consumed (injected into the prompt) to avoid
159
+ * stale context bleeding into unrelated slices.
160
+ */
161
+ lastPreExecFailure: PreExecFailure | null = null;
162
+
142
163
  // ── Tool invocation errors (#2883) ──────────────────────────────────
143
164
  /** Set when a GSD tool execution ends with isError due to malformed/truncated
144
165
  * JSON arguments. Checked by postUnitPreVerification to break retry loops. */
@@ -267,6 +288,7 @@ export class AutoSession {
267
288
  this.sidecarQueue = [];
268
289
  this.rewriteAttemptCount = 0;
269
290
  this.consecutiveCompleteBootstraps = 0;
291
+ this.lastPreExecFailure = null;
270
292
  this.lastToolInvocationError = null;
271
293
  this.lastGitActionFailure = null;
272
294
  this.lastGitActionStatus = null;
@@ -568,15 +568,28 @@ export const DISPATCH_RULES: DispatchRule[] = [
568
568
  },
569
569
  {
570
570
  name: "planning → plan-slice",
571
- match: async ({ state, mid, midTitle, basePath, sessionContextWindow, modelRegistry }) => {
571
+ match: async ({ state, mid, midTitle, basePath, sessionContextWindow, modelRegistry, session }) => {
572
572
  if (state.phase !== "planning") return null;
573
573
  if (!state.activeSlice) return missingSliceStop(mid, state.phase);
574
574
  const sid = state.activeSlice!.id;
575
575
  const sTitle = state.activeSlice!.title;
576
+ // #4551: Consume any persisted pre-exec failure for this slice so the
577
+ // re-dispatched prompt includes the exact blocked references. Clear the
578
+ // field immediately after reading to prevent stale context leaking into
579
+ // a later, unrelated plan-slice run.
580
+ const unitId = `${mid}/${sid}`;
581
+ let priorPreExecFailure: { blockingFindings: string[]; verdictExcerpt: string } | undefined;
582
+ if (session?.lastPreExecFailure?.unitId === unitId) {
583
+ priorPreExecFailure = {
584
+ blockingFindings: session.lastPreExecFailure.blockingFindings,
585
+ verdictExcerpt: session.lastPreExecFailure.verdictExcerpt,
586
+ };
587
+ session.lastPreExecFailure = null;
588
+ }
576
589
  return {
577
590
  action: "dispatch",
578
591
  unitType: "plan-slice",
579
- unitId: `${mid}/${sid}`,
592
+ unitId,
580
593
  prompt: await buildPlanSlicePrompt(
581
594
  mid,
582
595
  midTitle,
@@ -584,7 +597,7 @@ export const DISPATCH_RULES: DispatchRule[] = [
584
597
  sTitle,
585
598
  basePath,
586
599
  undefined,
587
- { sessionContextWindow, modelRegistry },
600
+ { sessionContextWindow, modelRegistry, priorPreExecFailure },
588
601
  ),
589
602
  };
590
603
  },
@@ -55,7 +55,7 @@ import { hasPendingCaptures, loadPendingCaptures, revertExecutorResolvedCaptures
55
55
  import { debugLog } from "./debug-logger.js";
56
56
  import { runSafely } from "./auto-utils.js";
57
57
  import type { AutoSession, SidecarItem } from "./auto/session.js";
58
- import { getEvidence } from "./safety/evidence-collector.js";
58
+ import { getEvidence, clearEvidenceFromDisk } from "./safety/evidence-collector.js";
59
59
  import { validateFileChanges } from "./safety/file-change-validator.js";
60
60
  // crossReferenceEvidence available for future use when verification_evidence is stored in DB
61
61
  // import { crossReferenceEvidence, type ClaimedEvidence } from "./safety/evidence-cross-ref.js";
@@ -711,6 +711,16 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
711
711
  debugLog("postUnit", { phase: "safety-content-validation", error: String(e) });
712
712
  }
713
713
  }
714
+
715
+ // Clear persisted evidence file now that post-unit processing is complete
716
+ // (Bug #4385 — prevents stale evidence from affecting retries of same unit ID).
717
+ if (safetyConfig.evidence_collection && s.currentUnit.type === "execute-task" && sMid && sSid && sTid) {
718
+ try {
719
+ clearEvidenceFromDisk(s.basePath, sMid, sSid, sTid);
720
+ } catch (e) {
721
+ debugLog("postUnit", { phase: "safety-evidence-clear", error: String(e) });
722
+ }
723
+ }
714
724
  }
715
725
  } catch (e) {
716
726
  debugLog("postUnit", { phase: "safety-harness", error: String(e) });
@@ -1133,6 +1143,15 @@ export async function postUnitPostVerification(pctx: PostUnitContext): Promise<"
1133
1143
  `Pre-execution checks failed: ${blockingCount} blocking issue${blockingCount === 1 ? "" : "s"} found\n${details}${suffix}${evidenceNote}`,
1134
1144
  "error",
1135
1145
  );
1146
+ // Persist failure context so the next plan-slice re-dispatch can inject
1147
+ // it into the prompt and break the infinite loop (#4551).
1148
+ s.lastPreExecFailure = {
1149
+ unitId: currentUnit.id,
1150
+ blockingFindings: blockingChecks.map(
1151
+ c => `[${c.category}] ${c.target}: ${c.message}`,
1152
+ ),
1153
+ verdictExcerpt: `status=${result.status}; ${blockingCount} blocking issue${blockingCount === 1 ? "" : "s"} detected`,
1154
+ };
1136
1155
  preExecPauseNeeded = true;
1137
1156
  } else if (result.status === "warn") {
1138
1157
  ctx.ui.notify(
@@ -1141,6 +1160,14 @@ export async function postUnitPostVerification(pctx: PostUnitContext): Promise<"
1141
1160
  );
1142
1161
  // Strict mode: treat warnings as blocking
1143
1162
  if (prefs?.enhanced_verification_strict === true) {
1163
+ const warnChecks = result.checks.filter(c => !c.passed);
1164
+ s.lastPreExecFailure = {
1165
+ unitId: currentUnit.id,
1166
+ blockingFindings: warnChecks.map(
1167
+ c => `[${c.category}] ${c.target}: ${c.message}`,
1168
+ ),
1169
+ verdictExcerpt: `status=${result.status} (strict mode); ${warnChecks.length} warning${warnChecks.length === 1 ? "" : "s"} treated as blocking`,
1170
+ };
1144
1171
  preExecPauseNeeded = true;
1145
1172
  }
1146
1173
  }
@@ -1380,7 +1380,18 @@ async function renderSlicePrompt(options: {
1380
1380
 
1381
1381
  export async function buildPlanSlicePrompt(
1382
1382
  mid: string, _midTitle: string, sid: string, sTitle: string, base: string, level?: InlineLevel,
1383
- options?: { softScopeHint?: string; sessionContextWindow?: number; modelRegistry?: MinimalModelRegistry },
1383
+ options?: {
1384
+ softScopeHint?: string;
1385
+ sessionContextWindow?: number;
1386
+ modelRegistry?: MinimalModelRegistry;
1387
+ /** Failure context from a prior pre-exec gate run (#4551). When present, a
1388
+ * "Fix these specific issues" section is appended so the LLM addresses the
1389
+ * exact problems instead of producing an identical plan that fails again. */
1390
+ priorPreExecFailure?: {
1391
+ blockingFindings: string[];
1392
+ verdictExcerpt: string;
1393
+ };
1394
+ },
1384
1395
  ): Promise<string> {
1385
1396
  const prependBlocks: string[] = [];
1386
1397
  // ADR-011: when the refining-phase dispatch rule gracefully downgrades to
@@ -1393,6 +1404,22 @@ export async function buildPlanSlicePrompt(
1393
1404
  `This scope was captured during an earlier progressive-planning pass that was later disabled. Treat it as context only — you may plan beyond it if the work genuinely requires more scope. Do NOT treat this as a hard boundary.`,
1394
1405
  );
1395
1406
  }
1407
+ // #4551: inject pre-exec failure context so the re-dispatched plan-slice
1408
+ // addresses the exact blocked references rather than reproducing the same plan.
1409
+ if (options?.priorPreExecFailure) {
1410
+ const { blockingFindings, verdictExcerpt } = options.priorPreExecFailure;
1411
+ const findingsList = blockingFindings.length > 0
1412
+ ? blockingFindings.map(f => `- ${f}`).join("\n")
1413
+ : "- (no specific findings recorded)";
1414
+ prependBlocks.push(
1415
+ `## Fix these specific issues from the prior pre-exec check\n\n` +
1416
+ `The previous plan-slice attempt was blocked by pre-execution validation.\n` +
1417
+ `Gate verdict: ${verdictExcerpt}\n\n` +
1418
+ `Blocked references that must be resolved in this plan:\n${findingsList}\n\n` +
1419
+ `Revise the plan so that every reference listed above is satisfied before execution begins. ` +
1420
+ `Do not reproduce the same file paths, package names, or task ordering that caused these failures.`,
1421
+ );
1422
+ }
1396
1423
  return renderSlicePrompt({
1397
1424
  mid, sid, sTitle, base,
1398
1425
  level: level ?? resolveInlineLevel(),
@@ -1994,21 +1994,38 @@ export function mergeMilestoneToMain(
1994
1994
  // When a milestone only produced .gsd/ metadata (summaries, roadmaps) but no
1995
1995
  // real code, the user sees "milestone complete" but nothing changed in their
1996
1996
  // codebase. Surface this so the caller can warn the user.
1997
+ //
1998
+ // Bug #4385 fix: use `git diff-tree --root` instead of `git diff HEAD~1 HEAD`.
1999
+ // `HEAD~1` does not exist on initial commits and is unreliable on shallow clones
2000
+ // and merge commits. `diff-tree --root` handles all three cases correctly.
2001
+ // The empty-tree hash (4b825dc…) is the universal fallback for refs that don't exist.
2002
+ const GIT_EMPTY_TREE = "4b825dc642cb6eb9a060e54bf8d69288fbee4904";
1997
2003
  let codeFilesChanged = false;
1998
2004
  if (!nothingToCommit) {
1999
2005
  try {
2000
- const mergedFiles = nativeDiffNumstat(
2001
- originalBasePath_,
2002
- "HEAD~1",
2003
- "HEAD",
2004
- );
2005
- codeFilesChanged = mergedFiles.some(
2006
- (entry) => !entry.path.startsWith(".gsd/"),
2007
- );
2006
+ const diffTreeOutput = execFileSync(
2007
+ "git",
2008
+ ["diff-tree", "--root", "--no-commit-id", "-r", "--name-only", "HEAD"],
2009
+ { cwd: originalBasePath_, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" },
2010
+ ).trim();
2011
+ const mergedFiles = diffTreeOutput ? diffTreeOutput.split("\n").filter(Boolean) : [];
2012
+ codeFilesChanged = mergedFiles.some((f) => !f.startsWith(".gsd/"));
2008
2013
  } catch (e) {
2009
- // If HEAD~1 doesn't exist (first commit), assume code was changed
2010
- logWarning("worktree", `diff numstat failed (assuming code changed): ${(e as Error).message}`);
2011
- codeFilesChanged = true;
2014
+ // diff-tree failed (e.g. unborn HEAD in a brand-new repo) fall back to
2015
+ // comparing against the empty tree so initial-commit repos still report changes.
2016
+ try {
2017
+ const fallbackOutput = execFileSync(
2018
+ "git",
2019
+ ["diff", "--name-only", GIT_EMPTY_TREE, "HEAD"],
2020
+ { cwd: originalBasePath_, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" },
2021
+ ).trim();
2022
+ const fallbackFiles = fallbackOutput ? fallbackOutput.split("\n").filter(Boolean) : [];
2023
+ codeFilesChanged = fallbackFiles.some((f) => !f.startsWith(".gsd/"));
2024
+ } catch {
2025
+ // Truly unable to determine — assume code was changed to avoid silent data loss
2026
+ logWarning("worktree", `diff-tree and empty-tree fallback both failed (assuming code changed): ${(e as Error).message}`);
2027
+ codeFilesChanged = true;
2028
+ }
2012
2029
  }
2013
2030
  }
2014
2031
 
@@ -126,8 +126,9 @@ import {
126
126
  formatTokenCount,
127
127
  } from "./metrics.js";
128
128
  import { setLogBasePath, logWarning, logError } from "./workflow-logger.js";
129
+ import { preflightCleanRoot, postflightPopStash } from "./clean-root-preflight.js";
129
130
  import { homedir } from "node:os";
130
- import { join } from "node:path";
131
+ import { isAbsolute, join } from "node:path";
131
132
  import { pathToFileURL } from "node:url";
132
133
  import { readFileSync, existsSync, mkdirSync, writeFileSync, unlinkSync } from "node:fs";
133
134
  import { atomicWriteSync } from "./atomic-write.js";
@@ -309,6 +310,21 @@ function restoreMilestoneLockEnv(): void {
309
310
  s.milestoneLockEnvCaptured = false;
310
311
  }
311
312
 
313
+ function normalizeSessionFilePath(raw: unknown): string | null {
314
+ if (typeof raw !== "string") return null;
315
+ const trimmed = raw.trim();
316
+ if (!trimmed) return null;
317
+ const firstLine = trimmed.split(/\r?\n/, 1)[0]?.trim() ?? "";
318
+ if (!firstLine) return null;
319
+
320
+ // Guard against accidental message concatenation by trimming to .jsonl.
321
+ const jsonlIndex = firstLine.toLowerCase().indexOf(".jsonl");
322
+ const candidate = jsonlIndex >= 0 ? firstLine.slice(0, jsonlIndex + ".jsonl".length) : firstLine;
323
+ if (!isAbsolute(candidate)) return null;
324
+ if (!candidate.toLowerCase().endsWith(".jsonl")) return null;
325
+ return candidate;
326
+ }
327
+
312
328
  export function startAutoDetached(
313
329
  ctx: ExtensionCommandContext,
314
330
  pi: ExtensionAPI,
@@ -1055,7 +1071,7 @@ export async function pauseAuto(
1055
1071
  // from provider-error pause and avoid hard-stopping (#2762).
1056
1072
  resolveAgentEndCancelled(_errorContext);
1057
1073
 
1058
- s.pausedSessionFile = ctx?.sessionManager?.getSessionFile() ?? null;
1074
+ s.pausedSessionFile = normalizeSessionFilePath(ctx?.sessionManager?.getSessionFile() ?? null);
1059
1075
 
1060
1076
  // Persist paused-session metadata so resume survives /exit (#1383).
1061
1077
  // The fresh-start bootstrap checks for this file and restores worktree context.
@@ -1287,6 +1303,10 @@ function buildLoopDeps(): LoopDeps {
1287
1303
 
1288
1304
  // Journal
1289
1305
  emitJournalEvent: (entry: JournalEntry) => _emitJournalEvent(s.basePath, entry),
1306
+
1307
+ // Clean-root preflight gate (#2909)
1308
+ preflightCleanRoot,
1309
+ postflightPopStash,
1290
1310
  } as unknown as LoopDeps;
1291
1311
  }
1292
1312
 
@@ -1359,7 +1379,11 @@ export async function startAuto(
1359
1379
  s.autoStartTime = meta.autoStartTime || Date.now();
1360
1380
  s.sessionMilestoneLock = meta.milestoneLock ?? null;
1361
1381
  s.paused = true;
1362
- try { unlinkSync(pausedPath); } catch (e) { logWarning("session", `pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`, { file: "auto.ts" }); }
1382
+ try { unlinkSync(pausedPath); } catch (e) {
1383
+ if ((e as NodeJS.ErrnoException).code !== "ENOENT") {
1384
+ logWarning("session", `pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`, { file: "auto.ts" });
1385
+ }
1386
+ }
1363
1387
  ctx.ui.notify(
1364
1388
  `Resuming paused custom workflow${meta.activeRunDir ? ` (${meta.activeRunDir})` : ""}.`,
1365
1389
  "info",
@@ -1378,7 +1402,9 @@ export async function startAuto(
1378
1402
  const summaryFile = resolveMilestoneFile(base, meta.milestoneId, "SUMMARY");
1379
1403
  if (!mDir || summaryFile) {
1380
1404
  try { unlinkSync(pausedPath); } catch (err) {
1381
- logWarning("session", `pause file cleanup failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
1405
+ if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
1406
+ logWarning("session", `pause file cleanup failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
1407
+ }
1382
1408
  }
1383
1409
  ctx.ui.notify(
1384
1410
  `Paused milestone ${meta.milestoneId} is ${!mDir ? "missing" : "already complete"}. Starting fresh.`,
@@ -1388,20 +1414,28 @@ export async function startAuto(
1388
1414
  s.currentMilestoneId = meta.milestoneId;
1389
1415
  s.originalBasePath = meta.originalBasePath || base;
1390
1416
  s.stepMode = meta.stepMode ?? requestedStepMode;
1391
- s.pausedSessionFile = meta.sessionFile ?? null;
1417
+ s.pausedSessionFile = normalizeSessionFilePath(meta.sessionFile ?? null);
1392
1418
  s.pausedUnitType = meta.unitType ?? null;
1393
1419
  s.pausedUnitId = meta.unitId ?? null;
1394
1420
  s.autoStartTime = meta.autoStartTime || Date.now();
1395
1421
  s.sessionMilestoneLock = meta.milestoneLock ?? null;
1396
1422
  s.paused = true;
1397
- try { unlinkSync(pausedPath); } catch (e) { logWarning("session", `pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`, { file: "auto.ts" }); }
1423
+ try { unlinkSync(pausedPath); } catch (e) {
1424
+ if ((e as NodeJS.ErrnoException).code !== "ENOENT") {
1425
+ logWarning("session", `pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`, { file: "auto.ts" });
1426
+ }
1427
+ }
1398
1428
  ctx.ui.notify(
1399
1429
  `Resuming paused session for ${meta.milestoneId}${meta.worktreePath && existsSync(meta.worktreePath) ? ` (worktree)` : ""}.`,
1400
1430
  "info",
1401
1431
  );
1402
1432
  }
1403
1433
  } else if (existsSync(pausedPath)) {
1404
- try { unlinkSync(pausedPath); } catch (e) { logWarning("session", `stale pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`, { file: "auto.ts" }); }
1434
+ try { unlinkSync(pausedPath); } catch (e) {
1435
+ if ((e as NodeJS.ErrnoException).code !== "ENOENT") {
1436
+ logWarning("session", `stale pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`, { file: "auto.ts" });
1437
+ }
1438
+ }
1405
1439
  }
1406
1440
  }
1407
1441
  } catch (err) {
@@ -1460,7 +1494,9 @@ export async function startAuto(
1460
1494
  // Lock acquired — now safe to delete the pause file
1461
1495
  if (s.pausedSessionFile) {
1462
1496
  try { unlinkSync(s.pausedSessionFile); } catch (err) {
1463
- logWarning("session", `pause file cleanup failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
1497
+ if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
1498
+ logWarning("session", `pause file cleanup failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
1499
+ }
1464
1500
  }
1465
1501
  s.pausedSessionFile = null;
1466
1502
  }
@@ -1771,12 +1807,12 @@ export async function dispatchHookUnit(
1771
1807
  }
1772
1808
  }
1773
1809
 
1774
- const sessionFile = ctx.sessionManager.getSessionFile();
1810
+ const sessionFile = normalizeSessionFilePath(ctx.sessionManager.getSessionFile());
1775
1811
  writeLock(
1776
1812
  lockBase(),
1777
1813
  hookUnitType,
1778
1814
  triggerUnitId,
1779
- sessionFile,
1815
+ sessionFile ?? undefined,
1780
1816
  );
1781
1817
 
1782
1818
  clearUnitTimeout();
@@ -23,7 +23,8 @@ import { isParallelActive, shutdownParallel } from "../parallel-orchestrator.js"
23
23
  import { checkToolCallLoop, resetToolCallLoopGuard } from "./tool-call-loop-guard.js";
24
24
  import { saveActivityLog } from "../activity-log.js";
25
25
  import { resetAskUserQuestionsCache } from "../../ask-user-questions.js";
26
- import { recordToolCall as safetyRecordToolCall, recordToolResult as safetyRecordToolResult } from "../safety/evidence-collector.js";
26
+ import { recordToolCall as safetyRecordToolCall, recordToolResult as safetyRecordToolResult, saveEvidenceToDisk } from "../safety/evidence-collector.js";
27
+ import { parseUnitId } from "../unit-id.js";
27
28
  import { classifyCommand } from "../safety/destructive-guard.js";
28
29
  import { logWarning as safetyLogWarning } from "../workflow-logger.js";
29
30
  import { installNotifyInterceptor } from "./notify-interceptor.js";
@@ -499,6 +500,15 @@ export function registerHooks(
499
500
  // Safety harness: record tool execution results for evidence cross-referencing
500
501
  if (isAutoActive()) {
501
502
  safetyRecordToolResult(event.toolCallId, event.toolName, event.result, event.isError);
503
+ // Persist evidence to disk after each tool result so it survives a session
504
+ // restart mid-unit (Bug #4385 — non-persisted evidence false positives).
505
+ const dash = getAutoDashboardData();
506
+ if (dash.basePath && dash.currentUnit?.type === "execute-task") {
507
+ const { milestone: pMid, slice: pSid, task: pTid } = parseUnitId(dash.currentUnit.id);
508
+ if (pMid && pSid && pTid) {
509
+ saveEvidenceToDisk(dash.basePath, pMid, pSid, pTid);
510
+ }
511
+ }
502
512
  }
503
513
  });
504
514
 
@@ -28,8 +28,29 @@ const QUEUE_SAFE_TOOLS = new Set([
28
28
  /**
29
29
  * Bash commands that are read-only / investigative — safe during queue mode.
30
30
  * Matches the leading command in a bash invocation.
31
+ *
32
+ * Extension policy: add commands here when they are read-only / diagnostic.
33
+ * Never add commands that mutate project state (write files, run builds that
34
+ * emit artifacts, install packages, etc.).
35
+ *
36
+ * Current read-only additions (Bug #4385):
37
+ * npm run <diagnostic> — read-only diagnostic scripts: test, lint, typecheck, etc.
38
+ * NOT: build, install, compile, generate, deploy (artifact-producing)
39
+ * npm ls/list/info — inspect installed packages (read-only)
40
+ * npm outdated/audit — security/update checks (read-only)
41
+ * npx <pkg> — run a package binary without installing globally
42
+ * tsx — TypeScript runner used for dry-run / inspection scripts
43
+ * node --print — evaluate and print an expression, no side effects
44
+ * python / python3 — script inspection, version checks
45
+ * pip / pip3 show — show installed package info (read-only)
46
+ * jq — read-only JSON query
47
+ * yq — read-only YAML query
48
+ * curl -s / curl --silent — fetch for inspection (no -o / no output redirect)
49
+ * openssl version — version / certificate inspection
50
+ * env / printenv — print environment variables
51
+ * true / false — shell no-ops / test exit codes
31
52
  */
32
- const BASH_READ_ONLY_RE = /^\s*(cat|head|tail|less|more|wc|file|stat|du|df|which|type|echo|printf|ls|find|grep|rg|awk|sed\b(?!.*-i)|sort|uniq|diff|comm|tr|cut|tee\s+-a\s+\/dev\/null|git\s+(log|show|diff|status|branch|tag|remote|rev-parse|ls-files|blame|shortlog|describe|stash\s+list|config\s+--get|cat-file)|gh\s+(issue|pr|api|repo|release)\s+(view|list|diff|status|checks)|mkdir\s+-p\s+\.gsd|rtk\s)/;
53
+ const BASH_READ_ONLY_RE = /^\s*(cat|head|tail|less|more|wc|file|stat|du|df|which|type|echo|printf|ls|find|grep|rg|awk|sed\b(?!.*-i)|sort|uniq|diff|comm|tr|cut|tee\s+-a\s+\/dev\/null|git\s+(log|show|diff|status|branch|tag|remote|rev-parse|ls-files|blame|shortlog|describe|stash\s+list|config\s+--get|cat-file)|gh\s+(issue|pr|api|repo|release)\s+(view|list|diff|status|checks)|mkdir\s+-p\s+\.gsd|rtk\s|npm\s+run\s+(test|test:\w+|lint|lint:\w+|typecheck|type-check|type-check:\w+|check|verify|audit|outdated|format:check|ci|validate)\b|npm\s+(ls|list|info|view|show|outdated|audit|explain|doctor|ping|--version|-v)\b|npx\s|tsx\s|node\s+(--print|--version|-v\b)|python[23]?\s+(-c\s+'[^']*'|--version|-V\b|-m\s+(pip\s+show|pip\s+list|site))|pip[23]?\s+(show|list|freeze|check|index\s+versions)\b|jq\s|yq\s|curl\s+(-s\b|--silent\b)(?!\s+[^|>]*\s-[oO]\b)(?!\s+[^|>]*\s--output\b)[^|>]*$|openssl\s+(version|x509|s_client)|env\b|printenv\b|true\b|false\b)/;
33
54
 
34
55
  const verifiedDepthMilestones = new Set<string>();
35
56
  let activeQueuePhase = false;