gsd-pi 2.76.0-dev.97807402 → 2.76.0-dev.97f5583d9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/dist/resources/extensions/gsd/auto/phases.js +28 -1
  2. package/dist/resources/extensions/gsd/auto/session.js +12 -0
  3. package/dist/resources/extensions/gsd/auto-dispatch.js +16 -3
  4. package/dist/resources/extensions/gsd/auto-post-unit.js +24 -1
  5. package/dist/resources/extensions/gsd/auto-prompts.js +14 -0
  6. package/dist/resources/extensions/gsd/auto-worktree.js +21 -5
  7. package/dist/resources/extensions/gsd/auto.js +42 -10
  8. package/dist/resources/extensions/gsd/bootstrap/register-hooks.js +11 -1
  9. package/dist/resources/extensions/gsd/bootstrap/write-gate.js +22 -1
  10. package/dist/resources/extensions/gsd/clean-root-preflight.js +93 -0
  11. package/dist/resources/extensions/gsd/safety/evidence-collector.js +96 -0
  12. package/dist/resources/extensions/gsd/safety/file-change-validator.js +3 -1
  13. package/dist/resources/extensions/gsd/safety/safety-harness.js +1 -1
  14. package/dist/resources/extensions/gsd/uok/plan-v2.js +20 -3
  15. package/dist/tsconfig.extensions.tsbuildinfo +1 -1
  16. package/dist/web/standalone/.next/BUILD_ID +1 -1
  17. package/dist/web/standalone/.next/app-path-routes-manifest.json +9 -9
  18. package/dist/web/standalone/.next/build-manifest.json +2 -2
  19. package/dist/web/standalone/.next/prerender-manifest.json +3 -3
  20. package/dist/web/standalone/.next/server/app/_global-error.html +1 -1
  21. package/dist/web/standalone/.next/server/app/_global-error.rsc +1 -1
  22. package/dist/web/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
  23. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
  24. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
  25. package/dist/web/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
  26. package/dist/web/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
  27. package/dist/web/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  28. package/dist/web/standalone/.next/server/app/_not-found.html +1 -1
  29. package/dist/web/standalone/.next/server/app/_not-found.rsc +1 -1
  30. package/dist/web/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
  31. package/dist/web/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
  32. package/dist/web/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
  33. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
  34. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
  35. package/dist/web/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
  36. package/dist/web/standalone/.next/server/app/index.html +1 -1
  37. package/dist/web/standalone/.next/server/app/index.rsc +1 -1
  38. package/dist/web/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +1 -1
  39. package/dist/web/standalone/.next/server/app/index.segments/_full.segment.rsc +1 -1
  40. package/dist/web/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
  41. package/dist/web/standalone/.next/server/app/index.segments/_index.segment.rsc +1 -1
  42. package/dist/web/standalone/.next/server/app/index.segments/_tree.segment.rsc +1 -1
  43. package/dist/web/standalone/.next/server/app-paths-manifest.json +9 -9
  44. package/dist/web/standalone/.next/server/middleware-build-manifest.js +1 -1
  45. package/dist/web/standalone/.next/server/middleware-manifest.json +5 -5
  46. package/dist/web/standalone/.next/server/pages/404.html +1 -1
  47. package/dist/web/standalone/.next/server/pages/500.html +1 -1
  48. package/dist/web/standalone/.next/server/server-reference-manifest.json +1 -1
  49. package/package.json +1 -1
  50. package/packages/mcp-server/dist/server.d.ts +7 -0
  51. package/packages/mcp-server/dist/server.d.ts.map +1 -1
  52. package/packages/mcp-server/dist/server.js +23 -3
  53. package/packages/mcp-server/dist/server.js.map +1 -1
  54. package/packages/mcp-server/src/mcp-server.test.ts +30 -0
  55. package/packages/mcp-server/src/server.ts +43 -9
  56. package/packages/mcp-server/tsconfig.tsbuildinfo +1 -1
  57. package/packages/pi-ai/dist/providers/anthropic-auth.test.js +1 -1
  58. package/packages/pi-ai/dist/providers/anthropic-auth.test.js.map +1 -1
  59. package/packages/pi-ai/dist/providers/anthropic-shared.d.ts.map +1 -1
  60. package/packages/pi-ai/dist/providers/anthropic-shared.js +25 -4
  61. package/packages/pi-ai/dist/providers/anthropic-shared.js.map +1 -1
  62. package/packages/pi-ai/dist/providers/anthropic.d.ts.map +1 -1
  63. package/packages/pi-ai/dist/providers/anthropic.js +8 -3
  64. package/packages/pi-ai/dist/providers/anthropic.js.map +1 -1
  65. package/packages/pi-ai/dist/providers/minimax-tool-name.test.d.ts +2 -0
  66. package/packages/pi-ai/dist/providers/minimax-tool-name.test.d.ts.map +1 -0
  67. package/packages/pi-ai/dist/providers/minimax-tool-name.test.js +80 -0
  68. package/packages/pi-ai/dist/providers/minimax-tool-name.test.js.map +1 -0
  69. package/packages/pi-ai/src/providers/anthropic-auth.test.ts +1 -1
  70. package/packages/pi-ai/src/providers/anthropic-shared.ts +23 -4
  71. package/packages/pi-ai/src/providers/anthropic.ts +9 -3
  72. package/packages/pi-ai/src/providers/minimax-tool-name.test.ts +98 -0
  73. package/packages/pi-ai/tsconfig.tsbuildinfo +1 -1
  74. package/src/resources/extensions/gsd/auto/loop-deps.ts +13 -0
  75. package/src/resources/extensions/gsd/auto/phases.ts +52 -1
  76. package/src/resources/extensions/gsd/auto/session.ts +22 -0
  77. package/src/resources/extensions/gsd/auto-dispatch.ts +16 -3
  78. package/src/resources/extensions/gsd/auto-post-unit.ts +28 -1
  79. package/src/resources/extensions/gsd/auto-prompts.ts +28 -1
  80. package/src/resources/extensions/gsd/auto-worktree.ts +28 -11
  81. package/src/resources/extensions/gsd/auto.ts +46 -10
  82. package/src/resources/extensions/gsd/bootstrap/register-hooks.ts +11 -1
  83. package/src/resources/extensions/gsd/bootstrap/write-gate.ts +22 -1
  84. package/src/resources/extensions/gsd/clean-root-preflight.ts +111 -0
  85. package/src/resources/extensions/gsd/safety/evidence-collector.ts +119 -0
  86. package/src/resources/extensions/gsd/safety/file-change-validator.ts +3 -1
  87. package/src/resources/extensions/gsd/safety/safety-harness.ts +3 -0
  88. package/src/resources/extensions/gsd/tests/auto-loop.test.ts +3 -1
  89. package/src/resources/extensions/gsd/tests/auto-paused-session-validation.test.ts +12 -0
  90. package/src/resources/extensions/gsd/tests/clean-root-preflight.test.ts +186 -0
  91. package/src/resources/extensions/gsd/tests/custom-engine-loop-integration.test.ts +2 -0
  92. package/src/resources/extensions/gsd/tests/double-merge-guard.test.ts +1 -1
  93. package/src/resources/extensions/gsd/tests/journal-integration.test.ts +2 -0
  94. package/src/resources/extensions/gsd/tests/pre-exec-gate-loop.test.ts +272 -0
  95. package/src/resources/extensions/gsd/tests/safety-harness-false-positives.test.ts +205 -0
  96. package/src/resources/extensions/gsd/tests/uok-plan-v2-wiring.test.ts +23 -0
  97. package/src/resources/extensions/gsd/uok/plan-v2.ts +26 -3
  98. package/src/resources/extensions/gsd/workflow-logger.ts +2 -1
  99. /package/dist/web/standalone/.next/static/{pI48IF3dgfs0CBrYi2bh_ → lLdDRDspgYzfz0bJAmUSz}/_buildManifest.js +0 -0
  100. /package/dist/web/standalone/.next/static/{pI48IF3dgfs0CBrYi2bh_ → lLdDRDspgYzfz0bJAmUSz}/_ssgManifest.js +0 -0
@@ -30,7 +30,8 @@ import { isDbAvailable, getMilestoneSlices } from "../gsd-db.js";
30
30
  import { ensurePlanV2Graph } from "../uok/plan-v2.js";
31
31
  import { resolveUokFlags } from "../uok/flags.js";
32
32
  import { UokGateRunner } from "../uok/gate-runner.js";
33
- import { resetEvidence } from "../safety/evidence-collector.js";
33
+ import { resetEvidence, loadEvidenceFromDisk } from "../safety/evidence-collector.js";
34
+ import { parseUnitId } from "../unit-id.js";
34
35
  import { createCheckpoint, cleanupCheckpoint, rollbackToCheckpoint } from "../safety/git-checkpoint.js";
35
36
  import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js";
36
37
  import { getWorkflowTransportSupportError, getRequiredWorkflowToolsForAutoUnit, supportsStructuredQuestions, } from "../workflow-mcp.js";
@@ -389,6 +390,8 @@ export async function runPreDispatch(ic, loopState) {
389
390
  loopState.recentUnits.length = 0;
390
391
  loopState.stuckRecoveryAttempts = 0;
391
392
  // Worktree lifecycle on milestone transition — merge current, enter next
393
+ // #2909: preflight — warn + stash dirty working tree before merge
394
+ const preflightTransition = deps.preflightCleanRoot(s.originalBasePath || s.basePath, s.currentMilestoneId, ctx.ui.notify.bind(ctx.ui));
392
395
  try {
393
396
  deps.resolver.mergeAndExit(s.currentMilestoneId, ctx.ui);
394
397
  }
@@ -405,6 +408,10 @@ export async function runPreDispatch(ic, loopState) {
405
408
  await deps.stopAuto(ctx, pi, `Merge error on milestone ${s.currentMilestoneId}: ${String(mergeErr)}`);
406
409
  return { action: "break", reason: "merge-failed" };
407
410
  }
411
+ // #2909: postflight — restore stashed changes after successful merge
412
+ if (preflightTransition.stashPushed) {
413
+ deps.postflightPopStash(s.originalBasePath || s.basePath, s.currentMilestoneId, ctx.ui.notify.bind(ctx.ui));
414
+ }
408
415
  // PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
409
416
  deps.invalidateAllCaches();
410
417
  state = await deps.deriveState(s.basePath);
@@ -459,6 +466,8 @@ export async function runPreDispatch(ic, loopState) {
459
466
  if (incomplete.length === 0 && state.registry.length > 0) {
460
467
  // All milestones complete — merge milestone branch before stopping
461
468
  if (s.currentMilestoneId) {
469
+ // #2909: preflight — warn + stash dirty working tree before merge
470
+ const preflightAllComplete = deps.preflightCleanRoot(s.originalBasePath || s.basePath, s.currentMilestoneId, ctx.ui.notify.bind(ctx.ui));
462
471
  try {
463
472
  deps.resolver.mergeAndExit(s.currentMilestoneId, ctx.ui);
464
473
  // Prevent stopAuto from attempting the same merge (#2645)
@@ -475,6 +484,10 @@ export async function runPreDispatch(ic, loopState) {
475
484
  await deps.stopAuto(ctx, pi, `Merge error on milestone ${s.currentMilestoneId}: ${String(mergeErr)}`);
476
485
  return { action: "break", reason: "merge-failed" };
477
486
  }
487
+ // #2909: postflight — restore stashed changes after successful merge
488
+ if (preflightAllComplete.stashPushed) {
489
+ deps.postflightPopStash(s.originalBasePath || s.basePath, s.currentMilestoneId, ctx.ui.notify.bind(ctx.ui));
490
+ }
478
491
  // PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
479
492
  }
480
493
  deps.sendDesktopNotification("GSD", "All milestones complete!", "success", "milestone", basename(s.originalBasePath || s.basePath));
@@ -539,6 +552,8 @@ export async function runPreDispatch(ic, loopState) {
539
552
  if (state.phase === "complete") {
540
553
  // Milestone merge on complete (before closeout so branch state is clean)
541
554
  if (s.currentMilestoneId) {
555
+ // #2909: preflight — warn + stash dirty working tree before merge
556
+ const preflightComplete = deps.preflightCleanRoot(s.originalBasePath || s.basePath, s.currentMilestoneId, ctx.ui.notify.bind(ctx.ui));
542
557
  try {
543
558
  deps.resolver.mergeAndExit(s.currentMilestoneId, ctx.ui);
544
559
  // Prevent stopAuto from attempting the same merge (#2645)
@@ -555,6 +570,10 @@ export async function runPreDispatch(ic, loopState) {
555
570
  await deps.stopAuto(ctx, pi, `Merge error on milestone ${s.currentMilestoneId}: ${String(mergeErr)}`);
556
571
  return { action: "break", reason: "merge-failed" };
557
572
  }
573
+ // #2909: postflight — restore stashed changes after successful merge
574
+ if (preflightComplete.stashPushed) {
575
+ deps.postflightPopStash(s.originalBasePath || s.basePath, s.currentMilestoneId, ctx.ui.notify.bind(ctx.ui));
576
+ }
558
577
  // PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
559
578
  }
560
579
  deps.sendDesktopNotification("GSD", `Milestone ${mid} complete!`, "success", "milestone", basename(s.originalBasePath || s.basePath));
@@ -1028,6 +1047,14 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
1028
1047
  const safetyConfig = resolveSafetyHarnessConfig(prefs?.safety_harness);
1029
1048
  if (safetyConfig.enabled && safetyConfig.evidence_collection) {
1030
1049
  resetEvidence();
1050
+ // Restore persisted evidence so session-restart resumes don't produce
1051
+ // false-positive "no bash calls" warnings (Bug #4385).
1052
+ if (s.basePath && unitType === "execute-task") {
1053
+ const { milestone: eMid, slice: eSid, task: eTid } = parseUnitId(unitId);
1054
+ if (eMid && eSid && eTid) {
1055
+ loadEvidenceFromDisk(s.basePath, eMid, eSid, eTid);
1056
+ }
1057
+ }
1031
1058
  }
1032
1059
  // Only checkpoint code-executing units (not lifecycle/planning units)
1033
1060
  if (safetyConfig.enabled && safetyConfig.checkpoints && unitType === "execute-task") {
@@ -79,6 +79,17 @@ export class AutoSession {
79
79
  lastStateRebuildAt = 0;
80
80
  // ── Sidecar queue ─────────────────────────────────────────────────────
81
81
  sidecarQueue = [];
82
+ // ── Pre-exec gate failure context (#4551) ───────────────────────────
83
+ /**
84
+ * Persisted when a pre-execution gate fails on a plan-slice or refine-slice
85
+ * unit. The planning → plan-slice dispatch rule reads this field and injects
86
+ * the failure details into the next re-dispatch prompt so the LLM can fix the
87
+ * specific issues instead of producing an identical plan.
88
+ *
89
+ * Cleared after it has been consumed (injected into the prompt) to avoid
90
+ * stale context bleeding into unrelated slices.
91
+ */
92
+ lastPreExecFailure = null;
82
93
  // ── Tool invocation errors (#2883) ──────────────────────────────────
83
94
  /** Set when a GSD tool execution ends with isError due to malformed/truncated
84
95
  * JSON arguments. Checked by postUnitPreVerification to break retry loops. */
@@ -199,6 +210,7 @@ export class AutoSession {
199
210
  this.sidecarQueue = [];
200
211
  this.rewriteAttemptCount = 0;
201
212
  this.consecutiveCompleteBootstraps = 0;
213
+ this.lastPreExecFailure = null;
202
214
  this.lastToolInvocationError = null;
203
215
  this.lastGitActionFailure = null;
204
216
  this.lastGitActionStatus = null;
@@ -451,18 +451,31 @@ export const DISPATCH_RULES = [
451
451
  },
452
452
  {
453
453
  name: "planning → plan-slice",
454
- match: async ({ state, mid, midTitle, basePath, sessionContextWindow, modelRegistry }) => {
454
+ match: async ({ state, mid, midTitle, basePath, sessionContextWindow, modelRegistry, session }) => {
455
455
  if (state.phase !== "planning")
456
456
  return null;
457
457
  if (!state.activeSlice)
458
458
  return missingSliceStop(mid, state.phase);
459
459
  const sid = state.activeSlice.id;
460
460
  const sTitle = state.activeSlice.title;
461
+ // #4551: Consume any persisted pre-exec failure for this slice so the
462
+ // re-dispatched prompt includes the exact blocked references. Clear the
463
+ // field immediately after reading to prevent stale context leaking into
464
+ // a later, unrelated plan-slice run.
465
+ const unitId = `${mid}/${sid}`;
466
+ let priorPreExecFailure;
467
+ if (session?.lastPreExecFailure?.unitId === unitId) {
468
+ priorPreExecFailure = {
469
+ blockingFindings: session.lastPreExecFailure.blockingFindings,
470
+ verdictExcerpt: session.lastPreExecFailure.verdictExcerpt,
471
+ };
472
+ session.lastPreExecFailure = null;
473
+ }
461
474
  return {
462
475
  action: "dispatch",
463
476
  unitType: "plan-slice",
464
- unitId: `${mid}/${sid}`,
465
- prompt: await buildPlanSlicePrompt(mid, midTitle, sid, sTitle, basePath, undefined, { sessionContextWindow, modelRegistry }),
477
+ unitId,
478
+ prompt: await buildPlanSlicePrompt(mid, midTitle, sid, sTitle, basePath, undefined, { sessionContextWindow, modelRegistry, priorPreExecFailure }),
466
479
  };
467
480
  },
468
481
  },
@@ -30,7 +30,7 @@ import { checkPostUnitHooks, isRetryPending, consumeRetryTrigger, persistHookSta
30
30
  import { hasPendingCaptures, loadPendingCaptures, revertExecutorResolvedCaptures } from "./captures.js";
31
31
  import { debugLog } from "./debug-logger.js";
32
32
  import { runSafely } from "./auto-utils.js";
33
- import { getEvidence } from "./safety/evidence-collector.js";
33
+ import { getEvidence, clearEvidenceFromDisk } from "./safety/evidence-collector.js";
34
34
  import { validateFileChanges } from "./safety/file-change-validator.js";
35
35
  // crossReferenceEvidence available for future use when verification_evidence is stored in DB
36
36
  // import { crossReferenceEvidence, type ClaimedEvidence } from "./safety/evidence-cross-ref.js";
@@ -588,6 +588,16 @@ export async function postUnitPreVerification(pctx, opts) {
588
588
  debugLog("postUnit", { phase: "safety-content-validation", error: String(e) });
589
589
  }
590
590
  }
591
+ // Clear persisted evidence file now that post-unit processing is complete
592
+ // (Bug #4385 — prevents stale evidence from affecting retries of same unit ID).
593
+ if (safetyConfig.evidence_collection && s.currentUnit.type === "execute-task" && sMid && sSid && sTid) {
594
+ try {
595
+ clearEvidenceFromDisk(s.basePath, sMid, sSid, sTid);
596
+ }
597
+ catch (e) {
598
+ debugLog("postUnit", { phase: "safety-evidence-clear", error: String(e) });
599
+ }
600
+ }
591
601
  }
592
602
  }
593
603
  catch (e) {
@@ -950,12 +960,25 @@ export async function postUnitPostVerification(pctx) {
950
960
  const suffix = blockingChecks.length > 3 ? `\n \u2022 ...and ${blockingChecks.length - 3} more` : "";
951
961
  const evidenceNote = `\nSee ${sid}-PRE-EXEC-VERIFY.json for full details.`;
952
962
  ctx.ui.notify(`Pre-execution checks failed: ${blockingCount} blocking issue${blockingCount === 1 ? "" : "s"} found\n${details}${suffix}${evidenceNote}`, "error");
963
+ // Persist failure context so the next plan-slice re-dispatch can inject
964
+ // it into the prompt and break the infinite loop (#4551).
965
+ s.lastPreExecFailure = {
966
+ unitId: currentUnit.id,
967
+ blockingFindings: blockingChecks.map(c => `[${c.category}] ${c.target}: ${c.message}`),
968
+ verdictExcerpt: `status=${result.status}; ${blockingCount} blocking issue${blockingCount === 1 ? "" : "s"} detected`,
969
+ };
953
970
  preExecPauseNeeded = true;
954
971
  }
955
972
  else if (result.status === "warn") {
956
973
  ctx.ui.notify(`Pre-execution checks passed with warnings`, "warning");
957
974
  // Strict mode: treat warnings as blocking
958
975
  if (prefs?.enhanced_verification_strict === true) {
976
+ const warnChecks = result.checks.filter(c => !c.passed);
977
+ s.lastPreExecFailure = {
978
+ unitId: currentUnit.id,
979
+ blockingFindings: warnChecks.map(c => `[${c.category}] ${c.target}: ${c.message}`),
980
+ verdictExcerpt: `status=${result.status} (strict mode); ${warnChecks.length} warning${warnChecks.length === 1 ? "" : "s"} treated as blocking`,
981
+ };
959
982
  preExecPauseNeeded = true;
960
983
  }
961
984
  }
@@ -1223,6 +1223,20 @@ export async function buildPlanSlicePrompt(mid, _midTitle, sid, sTitle, base, le
1223
1223
  prependBlocks.push(`## Prior Sketch Scope (soft hint — non-binding)\n\n${options.softScopeHint.trim()}\n\n` +
1224
1224
  `This scope was captured during an earlier progressive-planning pass that was later disabled. Treat it as context only — you may plan beyond it if the work genuinely requires more scope. Do NOT treat this as a hard boundary.`);
1225
1225
  }
1226
+ // #4551: inject pre-exec failure context so the re-dispatched plan-slice
1227
+ // addresses the exact blocked references rather than reproducing the same plan.
1228
+ if (options?.priorPreExecFailure) {
1229
+ const { blockingFindings, verdictExcerpt } = options.priorPreExecFailure;
1230
+ const findingsList = blockingFindings.length > 0
1231
+ ? blockingFindings.map(f => `- ${f}`).join("\n")
1232
+ : "- (no specific findings recorded)";
1233
+ prependBlocks.push(`## Fix these specific issues from the prior pre-exec check\n\n` +
1234
+ `The previous plan-slice attempt was blocked by pre-execution validation.\n` +
1235
+ `Gate verdict: ${verdictExcerpt}\n\n` +
1236
+ `Blocked references that must be resolved in this plan:\n${findingsList}\n\n` +
1237
+ `Revise the plan so that every reference listed above is satisfied before execution begins. ` +
1238
+ `Do not reproduce the same file paths, package names, or task ordering that caused these failures.`);
1239
+ }
1226
1240
  return renderSlicePrompt({
1227
1241
  mid, sid, sTitle, base,
1228
1242
  level: level ?? resolveInlineLevel(),
@@ -1743,16 +1743,32 @@ export function mergeMilestoneToMain(originalBasePath_, milestoneId, roadmapCont
1743
1743
  // When a milestone only produced .gsd/ metadata (summaries, roadmaps) but no
1744
1744
  // real code, the user sees "milestone complete" but nothing changed in their
1745
1745
  // codebase. Surface this so the caller can warn the user.
1746
+ //
1747
+ // Bug #4385 fix: use `git diff-tree --root` instead of `git diff HEAD~1 HEAD`.
1748
+ // `HEAD~1` does not exist on initial commits and is unreliable on shallow clones
1749
+ // and merge commits. `diff-tree --root` handles all three cases correctly.
1750
+ // The empty-tree hash (4b825dc…) is the universal fallback for refs that don't exist.
1751
+ const GIT_EMPTY_TREE = "4b825dc642cb6eb9a060e54bf8d69288fbee4904";
1746
1752
  let codeFilesChanged = false;
1747
1753
  if (!nothingToCommit) {
1748
1754
  try {
1749
- const mergedFiles = nativeDiffNumstat(originalBasePath_, "HEAD~1", "HEAD");
1750
- codeFilesChanged = mergedFiles.some((entry) => !entry.path.startsWith(".gsd/"));
1755
+ const diffTreeOutput = execFileSync("git", ["diff-tree", "--root", "--no-commit-id", "-r", "--name-only", "HEAD"], { cwd: originalBasePath_, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
1756
+ const mergedFiles = diffTreeOutput ? diffTreeOutput.split("\n").filter(Boolean) : [];
1757
+ codeFilesChanged = mergedFiles.some((f) => !f.startsWith(".gsd/"));
1751
1758
  }
1752
1759
  catch (e) {
1753
- // If HEAD~1 doesn't exist (first commit), assume code was changed
1754
- logWarning("worktree", `diff numstat failed (assuming code changed): ${e.message}`);
1755
- codeFilesChanged = true;
1760
+ // diff-tree failed (e.g. unborn HEAD in a brand-new repo) fall back to
1761
+ // comparing against the empty tree so initial-commit repos still report changes.
1762
+ try {
1763
+ const fallbackOutput = execFileSync("git", ["diff", "--name-only", GIT_EMPTY_TREE, "HEAD"], { cwd: originalBasePath_, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
1764
+ const fallbackFiles = fallbackOutput ? fallbackOutput.split("\n").filter(Boolean) : [];
1765
+ codeFilesChanged = fallbackFiles.some((f) => !f.startsWith(".gsd/"));
1766
+ }
1767
+ catch {
1768
+ // Truly unable to determine — assume code was changed to avoid silent data loss
1769
+ logWarning("worktree", `diff-tree and empty-tree fallback both failed (assuming code changed): ${e.message}`);
1770
+ codeFilesChanged = true;
1771
+ }
1756
1772
  }
1757
1773
  }
1758
1774
  // 10. Auto-push if enabled
@@ -37,8 +37,9 @@ import { getRtkSessionSavings } from "../shared/rtk-session-stats.js";
37
37
  import { deactivateGSD } from "../shared/gsd-phase-state.js";
38
38
  import { initMetrics, resetMetrics, getLedger, getProjectTotals, formatCost, formatTokenCount, } from "./metrics.js";
39
39
  import { setLogBasePath, logWarning } from "./workflow-logger.js";
40
+ import { preflightCleanRoot, postflightPopStash } from "./clean-root-preflight.js";
40
41
  import { homedir } from "node:os";
41
- import { join } from "node:path";
42
+ import { isAbsolute, join } from "node:path";
42
43
  import { pathToFileURL } from "node:url";
43
44
  import { readFileSync, existsSync, mkdirSync, writeFileSync, unlinkSync } from "node:fs";
44
45
  import { atomicWriteSync } from "./atomic-write.js";
@@ -136,6 +137,24 @@ function restoreMilestoneLockEnv() {
136
137
  s.hadMilestoneLockEnv = false;
137
138
  s.milestoneLockEnvCaptured = false;
138
139
  }
140
+ function normalizeSessionFilePath(raw) {
141
+ if (typeof raw !== "string")
142
+ return null;
143
+ const trimmed = raw.trim();
144
+ if (!trimmed)
145
+ return null;
146
+ const firstLine = trimmed.split(/\r?\n/, 1)[0]?.trim() ?? "";
147
+ if (!firstLine)
148
+ return null;
149
+ // Guard against accidental message concatenation by trimming to .jsonl.
150
+ const jsonlIndex = firstLine.toLowerCase().indexOf(".jsonl");
151
+ const candidate = jsonlIndex >= 0 ? firstLine.slice(0, jsonlIndex + ".jsonl".length) : firstLine;
152
+ if (!isAbsolute(candidate))
153
+ return null;
154
+ if (!candidate.toLowerCase().endsWith(".jsonl"))
155
+ return null;
156
+ return candidate;
157
+ }
139
158
  export function startAutoDetached(ctx, pi, base, verboseMode, options) {
140
159
  void startAuto(ctx, pi, base, verboseMode, options).catch((err) => {
141
160
  const message = getErrorMessage(err);
@@ -775,7 +794,7 @@ export async function pauseAuto(ctx, _pi, _errorContext) {
775
794
  // Pass errorContext so runUnitPhase can distinguish user-initiated pause
776
795
  // from provider-error pause and avoid hard-stopping (#2762).
777
796
  resolveAgentEndCancelled(_errorContext);
778
- s.pausedSessionFile = ctx?.sessionManager?.getSessionFile() ?? null;
797
+ s.pausedSessionFile = normalizeSessionFilePath(ctx?.sessionManager?.getSessionFile() ?? null);
779
798
  // Persist paused-session metadata so resume survives /exit (#1383).
780
799
  // The fresh-start bootstrap checks for this file and restores worktree context.
781
800
  try {
@@ -974,6 +993,9 @@ function buildLoopDeps() {
974
993
  },
975
994
  // Journal
976
995
  emitJournalEvent: (entry) => _emitJournalEvent(s.basePath, entry),
996
+ // Clean-root preflight gate (#2909)
997
+ preflightCleanRoot,
998
+ postflightPopStash,
977
999
  };
978
1000
  }
979
1001
  /**
@@ -1030,7 +1052,9 @@ export async function startAuto(ctx, pi, base, verboseMode, options) {
1030
1052
  unlinkSync(pausedPath);
1031
1053
  }
1032
1054
  catch (e) {
1033
- logWarning("session", `pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`, { file: "auto.ts" });
1055
+ if (e.code !== "ENOENT") {
1056
+ logWarning("session", `pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`, { file: "auto.ts" });
1057
+ }
1034
1058
  }
1035
1059
  ctx.ui.notify(`Resuming paused custom workflow${meta.activeRunDir ? ` (${meta.activeRunDir})` : ""}.`, "info");
1036
1060
  }
@@ -1048,7 +1072,9 @@ export async function startAuto(ctx, pi, base, verboseMode, options) {
1048
1072
  unlinkSync(pausedPath);
1049
1073
  }
1050
1074
  catch (err) {
1051
- logWarning("session", `pause file cleanup failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
1075
+ if (err.code !== "ENOENT") {
1076
+ logWarning("session", `pause file cleanup failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
1077
+ }
1052
1078
  }
1053
1079
  ctx.ui.notify(`Paused milestone ${meta.milestoneId} is ${!mDir ? "missing" : "already complete"}. Starting fresh.`, "info");
1054
1080
  }
@@ -1056,7 +1082,7 @@ export async function startAuto(ctx, pi, base, verboseMode, options) {
1056
1082
  s.currentMilestoneId = meta.milestoneId;
1057
1083
  s.originalBasePath = meta.originalBasePath || base;
1058
1084
  s.stepMode = meta.stepMode ?? requestedStepMode;
1059
- s.pausedSessionFile = meta.sessionFile ?? null;
1085
+ s.pausedSessionFile = normalizeSessionFilePath(meta.sessionFile ?? null);
1060
1086
  s.pausedUnitType = meta.unitType ?? null;
1061
1087
  s.pausedUnitId = meta.unitId ?? null;
1062
1088
  s.autoStartTime = meta.autoStartTime || Date.now();
@@ -1066,7 +1092,9 @@ export async function startAuto(ctx, pi, base, verboseMode, options) {
1066
1092
  unlinkSync(pausedPath);
1067
1093
  }
1068
1094
  catch (e) {
1069
- logWarning("session", `pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`, { file: "auto.ts" });
1095
+ if (e.code !== "ENOENT") {
1096
+ logWarning("session", `pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`, { file: "auto.ts" });
1097
+ }
1070
1098
  }
1071
1099
  ctx.ui.notify(`Resuming paused session for ${meta.milestoneId}${meta.worktreePath && existsSync(meta.worktreePath) ? ` (worktree)` : ""}.`, "info");
1072
1100
  }
@@ -1076,7 +1104,9 @@ export async function startAuto(ctx, pi, base, verboseMode, options) {
1076
1104
  unlinkSync(pausedPath);
1077
1105
  }
1078
1106
  catch (e) {
1079
- logWarning("session", `stale pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`, { file: "auto.ts" });
1107
+ if (e.code !== "ENOENT") {
1108
+ logWarning("session", `stale pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`, { file: "auto.ts" });
1109
+ }
1080
1110
  }
1081
1111
  }
1082
1112
  }
@@ -1132,7 +1162,9 @@ export async function startAuto(ctx, pi, base, verboseMode, options) {
1132
1162
  unlinkSync(s.pausedSessionFile);
1133
1163
  }
1134
1164
  catch (err) {
1135
- logWarning("session", `pause file cleanup failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
1165
+ if (err.code !== "ENOENT") {
1166
+ logWarning("session", `pause file cleanup failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
1167
+ }
1136
1168
  }
1137
1169
  s.pausedSessionFile = null;
1138
1170
  }
@@ -1360,8 +1392,8 @@ export async function dispatchHookUnit(ctx, pi, hookName, triggerUnitType, trigg
1360
1392
  `Ensure the model is defined in models.json and has auth configured.`, "warning");
1361
1393
  }
1362
1394
  }
1363
- const sessionFile = ctx.sessionManager.getSessionFile();
1364
- writeLock(lockBase(), hookUnitType, triggerUnitId, sessionFile);
1395
+ const sessionFile = normalizeSessionFilePath(ctx.sessionManager.getSessionFile());
1396
+ writeLock(lockBase(), hookUnitType, triggerUnitId, sessionFile ?? undefined);
1365
1397
  clearUnitTimeout();
1366
1398
  const supervisor = resolveAutoSupervisorConfig();
1367
1399
  const hookHardTimeoutMs = (supervisor.hard_timeout_minutes ?? 30) * 60 * 1000;
@@ -17,7 +17,8 @@ import { isParallelActive, shutdownParallel } from "../parallel-orchestrator.js"
17
17
  import { checkToolCallLoop, resetToolCallLoopGuard } from "./tool-call-loop-guard.js";
18
18
  import { saveActivityLog } from "../activity-log.js";
19
19
  import { resetAskUserQuestionsCache } from "../../ask-user-questions.js";
20
- import { recordToolCall as safetyRecordToolCall, recordToolResult as safetyRecordToolResult } from "../safety/evidence-collector.js";
20
+ import { recordToolCall as safetyRecordToolCall, recordToolResult as safetyRecordToolResult, saveEvidenceToDisk } from "../safety/evidence-collector.js";
21
+ import { parseUnitId } from "../unit-id.js";
21
22
  import { classifyCommand } from "../safety/destructive-guard.js";
22
23
  import { logWarning as safetyLogWarning } from "../workflow-logger.js";
23
24
  import { installNotifyInterceptor } from "./notify-interceptor.js";
@@ -457,6 +458,15 @@ export function registerHooks(pi, ecosystemHandlers) {
457
458
  // Safety harness: record tool execution results for evidence cross-referencing
458
459
  if (isAutoActive()) {
459
460
  safetyRecordToolResult(event.toolCallId, event.toolName, event.result, event.isError);
461
+ // Persist evidence to disk after each tool result so it survives a session
462
+ // restart mid-unit (Bug #4385 — non-persisted evidence false positives).
463
+ const dash = getAutoDashboardData();
464
+ if (dash.basePath && dash.currentUnit?.type === "execute-task") {
465
+ const { milestone: pMid, slice: pSid, task: pTid } = parseUnitId(dash.currentUnit.id);
466
+ if (pMid && pSid && pTid) {
467
+ saveEvidenceToDisk(dash.basePath, pMid, pSid, pTid);
468
+ }
469
+ }
460
470
  }
461
471
  });
462
472
  pi.on("model_select", async (_event, ctx) => {
@@ -24,8 +24,29 @@ const QUEUE_SAFE_TOOLS = new Set([
24
24
  /**
25
25
  * Bash commands that are read-only / investigative — safe during queue mode.
26
26
  * Matches the leading command in a bash invocation.
27
+ *
28
+ * Extension policy: add commands here when they are read-only / diagnostic.
29
+ * Never add commands that mutate project state (write files, run builds that
30
+ * emit artifacts, install packages, etc.).
31
+ *
32
+ * Current read-only additions (Bug #4385):
33
+ * npm run <diagnostic> — read-only diagnostic scripts: test, lint, typecheck, etc.
34
+ * NOT: build, install, compile, generate, deploy (artifact-producing)
35
+ * npm ls/list/info — inspect installed packages (read-only)
36
+ * npm outdated/audit — security/update checks (read-only)
37
+ * npx <pkg> — run a package binary without installing globally
38
+ * tsx — TypeScript runner used for dry-run / inspection scripts
39
+ * node --print — evaluate and print an expression, no side effects
40
+ * python / python3 — script inspection, version checks
41
+ * pip / pip3 show — show installed package info (read-only)
42
+ * jq — read-only JSON query
43
+ * yq — read-only YAML query
44
+ * curl -s / curl --silent — fetch for inspection (no -o / no output redirect)
45
+ * openssl version — version / certificate inspection
46
+ * env / printenv — print environment variables
47
+ * true / false — shell no-ops / test exit codes
27
48
  */
28
- const BASH_READ_ONLY_RE = /^\s*(cat|head|tail|less|more|wc|file|stat|du|df|which|type|echo|printf|ls|find|grep|rg|awk|sed\b(?!.*-i)|sort|uniq|diff|comm|tr|cut|tee\s+-a\s+\/dev\/null|git\s+(log|show|diff|status|branch|tag|remote|rev-parse|ls-files|blame|shortlog|describe|stash\s+list|config\s+--get|cat-file)|gh\s+(issue|pr|api|repo|release)\s+(view|list|diff|status|checks)|mkdir\s+-p\s+\.gsd|rtk\s)/;
49
+ const BASH_READ_ONLY_RE = /^\s*(cat|head|tail|less|more|wc|file|stat|du|df|which|type|echo|printf|ls|find|grep|rg|awk|sed\b(?!.*-i)|sort|uniq|diff|comm|tr|cut|tee\s+-a\s+\/dev\/null|git\s+(log|show|diff|status|branch|tag|remote|rev-parse|ls-files|blame|shortlog|describe|stash\s+list|config\s+--get|cat-file)|gh\s+(issue|pr|api|repo|release)\s+(view|list|diff|status|checks)|mkdir\s+-p\s+\.gsd|rtk\s|npm\s+run\s+(test|test:\w+|lint|lint:\w+|typecheck|type-check|type-check:\w+|check|verify|audit|outdated|format:check|ci|validate)\b|npm\s+(ls|list|info|view|show|outdated|audit|explain|doctor|ping|--version|-v)\b|npx\s|tsx\s|node\s+(--print|--version|-v\b)|python[23]?\s+(-c\s+'[^']*'|--version|-V\b|-m\s+(pip\s+show|pip\s+list|site))|pip[23]?\s+(show|list|freeze|check|index\s+versions)\b|jq\s|yq\s|curl\s+(-s\b|--silent\b)(?!\s+[^|>]*\s-[oO]\b)(?!\s+[^|>]*\s--output\b)[^|>]*$|openssl\s+(version|x509|s_client)|env\b|printenv\b|true\b|false\b)/;
29
50
  const verifiedDepthMilestones = new Set();
30
51
  let activeQueuePhase = false;
31
52
  /**
@@ -0,0 +1,93 @@
1
+ /**
2
+ * clean-root-preflight.ts — Preflight gate for dirty working trees before milestone merges.
3
+ *
4
+ * #2909: Adds a fast-path git status check before milestone completion merges.
5
+ * When the working tree is dirty the user is warned and changes are auto-stashed
6
+ * so the merge can proceed cleanly. After the merge completes, postflightPopStash
7
+ * restores the stashed changes.
8
+ *
9
+ * Design constraints (from Trek-e approval):
10
+ * - Warn the user before stashing (no silent surprises)
11
+ * - git stash push / git stash pop only — no custom stash management layer
12
+ * - Stash/pop errors are logged but MUST NOT block the merge
13
+ * - Fast-path status check — clean trees pay no extra cost
14
+ */
15
+ import { execFileSync } from "node:child_process";
16
+ import { GIT_NO_PROMPT_ENV } from "./git-constants.js";
17
+ import { logWarning } from "./workflow-logger.js";
18
+ import { nativeHasChanges } from "./native-git-bridge.js";
19
+ /**
20
+ * Check the working tree for dirty files before a milestone merge.
21
+ *
22
+ * Clean tree path: O(1) — returns immediately with stashPushed=false.
23
+ *
24
+ * Dirty tree path:
25
+ * 1. Emits a warning notification via the provided `notify` callback.
26
+ * 2. Runs `git stash push --include-untracked -m "gsd-preflight-stash"`.
27
+ * 3. Returns stashPushed=true so the caller knows to call postflightPopStash.
28
+ *
29
+ * Any stash error is logged but does NOT throw — the merge proceeds regardless.
30
+ */
31
+ export function preflightCleanRoot(basePath, milestoneId, notify) {
32
+ // Fast-path: clean tree — nothing to do
33
+ let isDirty = false;
34
+ try {
35
+ isDirty = nativeHasChanges(basePath);
36
+ }
37
+ catch (err) {
38
+ // If the status check itself fails, treat as clean and let the merge decide
39
+ logWarning("preflight", `clean-root status check failed: ${err instanceof Error ? err.message : String(err)}`);
40
+ return { stashPushed: false, summary: "" };
41
+ }
42
+ if (!isDirty) {
43
+ return { stashPushed: false, summary: "" };
44
+ }
45
+ // Warn the user before stashing
46
+ const warnMsg = `Working tree has uncommitted changes before milestone ${milestoneId} merge. Auto-stashing to allow clean merge (stash will be restored after merge).`;
47
+ notify(warnMsg, "warning");
48
+ // Push the stash
49
+ try {
50
+ execFileSync("git", ["stash", "push", "--include-untracked", "-m", "gsd-preflight-stash"], {
51
+ cwd: basePath,
52
+ stdio: ["ignore", "pipe", "pipe"],
53
+ encoding: "utf-8",
54
+ env: GIT_NO_PROMPT_ENV,
55
+ });
56
+ return {
57
+ stashPushed: true,
58
+ summary: `Stashed uncommitted changes before merge (milestone ${milestoneId}).`,
59
+ };
60
+ }
61
+ catch (err) {
62
+ // Stash failure is non-fatal — log and let the merge attempt proceed
63
+ const msg = `git stash push failed before merge of milestone ${milestoneId}: ${err instanceof Error ? err.message : String(err)}`;
64
+ logWarning("preflight", msg);
65
+ notify(`Auto-stash failed before milestone ${milestoneId} merge — proceeding anyway. ${msg}`, "warning");
66
+ return { stashPushed: false, summary: `stash-push-failed: ${msg}` };
67
+ }
68
+ }
69
+ /**
70
+ * Restore stashed changes after a milestone merge completes.
71
+ *
72
+ * Only called when preflightCleanRoot returned stashPushed=true.
73
+ * Any pop error (e.g. conflict) is logged and notified but does NOT throw —
74
+ * the merge already completed successfully.
75
+ */
76
+ export function postflightPopStash(basePath, milestoneId, notify) {
77
+ try {
78
+ execFileSync("git", ["stash", "pop"], {
79
+ cwd: basePath,
80
+ stdio: ["ignore", "pipe", "pipe"],
81
+ encoding: "utf-8",
82
+ env: GIT_NO_PROMPT_ENV,
83
+ });
84
+ notify(`Restored stashed changes after milestone ${milestoneId} merge.`, "info");
85
+ }
86
+ catch (err) {
87
+ // Pop conflicts mean the merged code collides with the stashed changes.
88
+ // Log a warning — the user needs to resolve manually, but the merge succeeded.
89
+ const msg = `git stash pop failed after merge of milestone ${milestoneId}: ${err instanceof Error ? err.message : String(err)}. Run "git stash pop" manually to restore your changes.`;
90
+ logWarning("preflight", msg);
91
+ notify(msg, "warning");
92
+ }
93
+ }