npm - gsd-pi - Versions diffs - 2.76.0-dev.97807402 → 2.76.0-dev.97f5583d9 - Mend

gsd-pi 2.76.0-dev.97807402 → 2.76.0-dev.97f5583d9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (100) hide show

package/src/resources/extensions/gsd/auto/loop-deps.ts CHANGED Viewed

@@ -22,6 +22,7 @@ import type { CmuxLogLevel } from "../../cmux/index.js";
 import type { JournalEntry } from "../journal.js";
 import type { MergeReconcileResult } from "../auto-recovery.js";
 import type { UokTurnObserver } from "../uok/contracts.js";
+import type { PreflightResult } from "../clean-root-preflight.js";
 /**
  * Dependencies injected by the caller (auto.ts startAuto) so autoLoop
@@ -122,6 +123,18 @@ export interface LoopDeps {
   ) => string | null;
   reconcileMergeState: (basePath: string, ctx: ExtensionContext) => MergeReconcileResult;
+  // Clean-root preflight gate (#2909)
+  preflightCleanRoot: (
+    basePath: string,
+    milestoneId: string,
+    notify: (message: string, level: "info" | "warning" | "error") => void,
+  ) => PreflightResult;
+  postflightPopStash: (
+    basePath: string,
+    milestoneId: string,
+    notify: (message: string, level: "info" | "warning" | "error") => void,
+  ) => void;
   // Budget/context/secrets
   getLedger: () => unknown;
   getProjectTotals: (units: unknown) => { cost: number };

package/src/resources/extensions/gsd/auto/phases.ts CHANGED Viewed

@@ -54,7 +54,8 @@ import type { MinimalModelRegistry } from "../context-budget.js";
 import { ensurePlanV2Graph } from "../uok/plan-v2.js";
 import { resolveUokFlags } from "../uok/flags.js";
 import { UokGateRunner } from "../uok/gate-runner.js";
-import { resetEvidence } from "../safety/evidence-collector.js";
+import { resetEvidence, loadEvidenceFromDisk } from "../safety/evidence-collector.js";
+import { parseUnitId } from "../unit-id.js";
 import { createCheckpoint, cleanupCheckpoint, rollbackToCheckpoint } from "../safety/git-checkpoint.js";
 import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js";
 import {
@@ -545,6 +546,12 @@ export async function runPreDispatch(
     loopState.stuckRecoveryAttempts = 0;
     // Worktree lifecycle on milestone transition — merge current, enter next
+    // #2909: preflight — warn + stash dirty working tree before merge
+    const preflightTransition = deps.preflightCleanRoot(
+      s.originalBasePath || s.basePath,
+      s.currentMilestoneId!,
+      ctx.ui.notify.bind(ctx.ui),
+    );
     try {
       deps.resolver.mergeAndExit(s.currentMilestoneId!, ctx.ui);
     } catch (mergeErr) {
@@ -566,6 +573,14 @@ export async function runPreDispatch(
       await deps.stopAuto(ctx, pi, `Merge error on milestone ${s.currentMilestoneId}: ${String(mergeErr)}`);
       return { action: "break", reason: "merge-failed" };
     }
+    // #2909: postflight — restore stashed changes after successful merge
+    if (preflightTransition.stashPushed) {
+      deps.postflightPopStash(
+        s.originalBasePath || s.basePath,
+        s.currentMilestoneId!,
+        ctx.ui.notify.bind(ctx.ui),
+      );
+    }
     // PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
@@ -644,6 +659,12 @@ export async function runPreDispatch(
     if (incomplete.length === 0 && state.registry.length > 0) {
       // All milestones complete — merge milestone branch before stopping
       if (s.currentMilestoneId) {
+        // #2909: preflight — warn + stash dirty working tree before merge
+        const preflightAllComplete = deps.preflightCleanRoot(
+          s.originalBasePath || s.basePath,
+          s.currentMilestoneId,
+          ctx.ui.notify.bind(ctx.ui),
+        );
         try {
           deps.resolver.mergeAndExit(s.currentMilestoneId, ctx.ui);
           // Prevent stopAuto from attempting the same merge (#2645)
@@ -665,6 +686,14 @@ export async function runPreDispatch(
           await deps.stopAuto(ctx, pi, `Merge error on milestone ${s.currentMilestoneId}: ${String(mergeErr)}`);
           return { action: "break", reason: "merge-failed" };
         }
+        // #2909: postflight — restore stashed changes after successful merge
+        if (preflightAllComplete.stashPushed) {
+          deps.postflightPopStash(
+            s.originalBasePath || s.basePath,
+            s.currentMilestoneId,
+            ctx.ui.notify.bind(ctx.ui),
+          );
+        }
         // PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
       }
@@ -758,6 +787,12 @@ export async function runPreDispatch(
   if (state.phase === "complete") {
     // Milestone merge on complete (before closeout so branch state is clean)
     if (s.currentMilestoneId) {
+      // #2909: preflight — warn + stash dirty working tree before merge
+      const preflightComplete = deps.preflightCleanRoot(
+        s.originalBasePath || s.basePath,
+        s.currentMilestoneId,
+        ctx.ui.notify.bind(ctx.ui),
+      );
       try {
         deps.resolver.mergeAndExit(s.currentMilestoneId, ctx.ui);
         // Prevent stopAuto from attempting the same merge (#2645)
@@ -779,6 +814,14 @@ export async function runPreDispatch(
         await deps.stopAuto(ctx, pi, `Merge error on milestone ${s.currentMilestoneId}: ${String(mergeErr)}`);
         return { action: "break", reason: "merge-failed" };
       }
+      // #2909: postflight — restore stashed changes after successful merge
+      if (preflightComplete.stashPushed) {
+        deps.postflightPopStash(
+          s.originalBasePath || s.basePath,
+          s.currentMilestoneId,
+          ctx.ui.notify.bind(ctx.ui),
+        );
+      }
       // PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
     }
@@ -1385,6 +1428,14 @@ export async function runUnitPhase(
   );
   if (safetyConfig.enabled && safetyConfig.evidence_collection) {
     resetEvidence();
+    // Restore persisted evidence so session-restart resumes don't produce
+    // false-positive "no bash calls" warnings (Bug #4385).
+    if (s.basePath && unitType === "execute-task") {
+      const { milestone: eMid, slice: eSid, task: eTid } = parseUnitId(unitId);
+      if (eMid && eSid && eTid) {
+        loadEvidenceFromDisk(s.basePath, eMid, eSid, eTid);
+      }
+    }
   }
   // Only checkpoint code-executing units (not lifecycle/planning units)
   if (safetyConfig.enabled && safetyConfig.checkpoints && unitType === "execute-task") {

package/src/resources/extensions/gsd/auto/session.ts CHANGED Viewed

@@ -64,6 +64,15 @@ export interface SidecarItem {
   captureId?: string;
 }
+export interface PreExecFailure {
+  /** Milestone/slice that failed (e.g. "M001/S02"). */
+  unitId: string;
+  /** Verbatim blocking check strings from the failed gate run. */
+  blockingFindings: string[];
+  /** Condensed gate verdict excerpt for context (status + rationale). */
+  verdictExcerpt: string;
+}
 // ─── Constants ───────────────────────────────────────────────────────────────
 export const MAX_UNIT_DISPATCHES = 3;
@@ -139,6 +148,18 @@ export class AutoSession {
   // ── Sidecar queue ─────────────────────────────────────────────────────
   sidecarQueue: SidecarItem[] = [];
+  // ── Pre-exec gate failure context (#4551) ───────────────────────────
+  /**
+   * Persisted when a pre-execution gate fails on a plan-slice or refine-slice
+   * unit. The planning → plan-slice dispatch rule reads this field and injects
+   * the failure details into the next re-dispatch prompt so the LLM can fix the
+   * specific issues instead of producing an identical plan.
+   *
+   * Cleared after it has been consumed (injected into the prompt) to avoid
+   * stale context bleeding into unrelated slices.
+   */
+  lastPreExecFailure: PreExecFailure | null = null;
   // ── Tool invocation errors (#2883) ──────────────────────────────────
   /** Set when a GSD tool execution ends with isError due to malformed/truncated
    *  JSON arguments. Checked by postUnitPreVerification to break retry loops. */
@@ -267,6 +288,7 @@ export class AutoSession {
     this.sidecarQueue = [];
     this.rewriteAttemptCount = 0;
     this.consecutiveCompleteBootstraps = 0;
+    this.lastPreExecFailure = null;
     this.lastToolInvocationError = null;
     this.lastGitActionFailure = null;
     this.lastGitActionStatus = null;

package/src/resources/extensions/gsd/auto-dispatch.ts CHANGED Viewed

@@ -568,15 +568,28 @@ export const DISPATCH_RULES: DispatchRule[] = [
   },
   {
     name: "planning → plan-slice",
-    match: async ({ state, mid, midTitle, basePath, sessionContextWindow, modelRegistry }) => {
+    match: async ({ state, mid, midTitle, basePath, sessionContextWindow, modelRegistry, session }) => {
       if (state.phase !== "planning") return null;
       if (!state.activeSlice) return missingSliceStop(mid, state.phase);
       const sid = state.activeSlice!.id;
       const sTitle = state.activeSlice!.title;
+      // #4551: Consume any persisted pre-exec failure for this slice so the
+      // re-dispatched prompt includes the exact blocked references. Clear the
+      // field immediately after reading to prevent stale context leaking into
+      // a later, unrelated plan-slice run.
+      const unitId = `${mid}/${sid}`;
+      let priorPreExecFailure: { blockingFindings: string[]; verdictExcerpt: string } | undefined;
+      if (session?.lastPreExecFailure?.unitId === unitId) {
+        priorPreExecFailure = {
+          blockingFindings: session.lastPreExecFailure.blockingFindings,
+          verdictExcerpt: session.lastPreExecFailure.verdictExcerpt,
+        };
+        session.lastPreExecFailure = null;
+      }
       return {
         action: "dispatch",
         unitType: "plan-slice",
-        unitId: `${mid}/${sid}`,
+        unitId,
         prompt: await buildPlanSlicePrompt(
           mid,
           midTitle,
@@ -584,7 +597,7 @@ export const DISPATCH_RULES: DispatchRule[] = [
           sTitle,
           basePath,
           undefined,
-          { sessionContextWindow, modelRegistry },
+          { sessionContextWindow, modelRegistry, priorPreExecFailure },
         ),
       };
     },

package/src/resources/extensions/gsd/auto-post-unit.ts CHANGED Viewed

@@ -55,7 +55,7 @@ import { hasPendingCaptures, loadPendingCaptures, revertExecutorResolvedCaptures
 import { debugLog } from "./debug-logger.js";
 import { runSafely } from "./auto-utils.js";
 import type { AutoSession, SidecarItem } from "./auto/session.js";
-import { getEvidence } from "./safety/evidence-collector.js";
+import { getEvidence, clearEvidenceFromDisk } from "./safety/evidence-collector.js";
 import { validateFileChanges } from "./safety/file-change-validator.js";
 // crossReferenceEvidence available for future use when verification_evidence is stored in DB
 // import { crossReferenceEvidence, type ClaimedEvidence } from "./safety/evidence-cross-ref.js";
@@ -711,6 +711,16 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
             debugLog("postUnit", { phase: "safety-content-validation", error: String(e) });
           }
         }
+        // Clear persisted evidence file now that post-unit processing is complete
+        // (Bug #4385 — prevents stale evidence from affecting retries of same unit ID).
+        if (safetyConfig.evidence_collection && s.currentUnit.type === "execute-task" && sMid && sSid && sTid) {
+          try {
+            clearEvidenceFromDisk(s.basePath, sMid, sSid, sTid);
+          } catch (e) {
+            debugLog("postUnit", { phase: "safety-evidence-clear", error: String(e) });
+          }
+        }
       }
     } catch (e) {
       debugLog("postUnit", { phase: "safety-harness", error: String(e) });
@@ -1133,6 +1143,15 @@ export async function postUnitPostVerification(pctx: PostUnitContext): Promise<"
             `Pre-execution checks failed: ${blockingCount} blocking issue${blockingCount === 1 ? "" : "s"} found\n${details}${suffix}${evidenceNote}`,
             "error",
           );
+          // Persist failure context so the next plan-slice re-dispatch can inject
+          // it into the prompt and break the infinite loop (#4551).
+          s.lastPreExecFailure = {
+            unitId: currentUnit.id,
+            blockingFindings: blockingChecks.map(
+              c => `[${c.category}] ${c.target}: ${c.message}`,
+            ),
+            verdictExcerpt: `status=${result.status}; ${blockingCount} blocking issue${blockingCount === 1 ? "" : "s"} detected`,
+          };
           preExecPauseNeeded = true;
         } else if (result.status === "warn") {
           ctx.ui.notify(
@@ -1141,6 +1160,14 @@ export async function postUnitPostVerification(pctx: PostUnitContext): Promise<"
           );
           // Strict mode: treat warnings as blocking
           if (prefs?.enhanced_verification_strict === true) {
+            const warnChecks = result.checks.filter(c => !c.passed);
+            s.lastPreExecFailure = {
+              unitId: currentUnit.id,
+              blockingFindings: warnChecks.map(
+                c => `[${c.category}] ${c.target}: ${c.message}`,
+              ),
+              verdictExcerpt: `status=${result.status} (strict mode); ${warnChecks.length} warning${warnChecks.length === 1 ? "" : "s"} treated as blocking`,
+            };
             preExecPauseNeeded = true;
           }
         }

package/src/resources/extensions/gsd/auto-prompts.ts CHANGED Viewed

@@ -1380,7 +1380,18 @@ async function renderSlicePrompt(options: {
 export async function buildPlanSlicePrompt(
   mid: string, _midTitle: string, sid: string, sTitle: string, base: string, level?: InlineLevel,
-  options?: { softScopeHint?: string; sessionContextWindow?: number; modelRegistry?: MinimalModelRegistry },
+  options?: {
+    softScopeHint?: string;
+    sessionContextWindow?: number;
+    modelRegistry?: MinimalModelRegistry;
+    /** Failure context from a prior pre-exec gate run (#4551). When present, a
+     *  "Fix these specific issues" section is appended so the LLM addresses the
+     *  exact problems instead of producing an identical plan that fails again. */
+    priorPreExecFailure?: {
+      blockingFindings: string[];
+      verdictExcerpt: string;
+    };
+  },
 ): Promise<string> {
   const prependBlocks: string[] = [];
   // ADR-011: when the refining-phase dispatch rule gracefully downgrades to
@@ -1393,6 +1404,22 @@ export async function buildPlanSlicePrompt(
       `This scope was captured during an earlier progressive-planning pass that was later disabled. Treat it as context only — you may plan beyond it if the work genuinely requires more scope. Do NOT treat this as a hard boundary.`,
     );
   }
+  // #4551: inject pre-exec failure context so the re-dispatched plan-slice
+  // addresses the exact blocked references rather than reproducing the same plan.
+  if (options?.priorPreExecFailure) {
+    const { blockingFindings, verdictExcerpt } = options.priorPreExecFailure;
+    const findingsList = blockingFindings.length > 0
+      ? blockingFindings.map(f => `- ${f}`).join("\n")
+      : "- (no specific findings recorded)";
+    prependBlocks.push(
+      `## Fix these specific issues from the prior pre-exec check\n\n` +
+      `The previous plan-slice attempt was blocked by pre-execution validation.\n` +
+      `Gate verdict: ${verdictExcerpt}\n\n` +
+      `Blocked references that must be resolved in this plan:\n${findingsList}\n\n` +
+      `Revise the plan so that every reference listed above is satisfied before execution begins. ` +
+      `Do not reproduce the same file paths, package names, or task ordering that caused these failures.`,
+    );
+  }
   return renderSlicePrompt({
     mid, sid, sTitle, base,
     level: level ?? resolveInlineLevel(),

package/src/resources/extensions/gsd/auto-worktree.ts CHANGED Viewed

@@ -1994,21 +1994,38 @@ export function mergeMilestoneToMain(
   // When a milestone only produced .gsd/ metadata (summaries, roadmaps) but no
   // real code, the user sees "milestone complete" but nothing changed in their
   // codebase. Surface this so the caller can warn the user.
+  //
+  // Bug #4385 fix: use `git diff-tree --root` instead of `git diff HEAD~1 HEAD`.
+  // `HEAD~1` does not exist on initial commits and is unreliable on shallow clones
+  // and merge commits. `diff-tree --root` handles all three cases correctly.
+  // The empty-tree hash (4b825dc…) is the universal fallback for refs that don't exist.
+  const GIT_EMPTY_TREE = "4b825dc642cb6eb9a060e54bf8d69288fbee4904";
   let codeFilesChanged = false;
   if (!nothingToCommit) {
     try {
-      const mergedFiles = nativeDiffNumstat(
-        originalBasePath_,
-        "HEAD~1",
-        "HEAD",
-      );
-      codeFilesChanged = mergedFiles.some(
-        (entry) => !entry.path.startsWith(".gsd/"),
-      );
+      const diffTreeOutput = execFileSync(
+        "git",
+        ["diff-tree", "--root", "--no-commit-id", "-r", "--name-only", "HEAD"],
+        { cwd: originalBasePath_, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" },
+      ).trim();
+      const mergedFiles = diffTreeOutput ? diffTreeOutput.split("\n").filter(Boolean) : [];
+      codeFilesChanged = mergedFiles.some((f) => !f.startsWith(".gsd/"));
     } catch (e) {
-      // If HEAD~1 doesn't exist (first commit), assume code was changed
-      logWarning("worktree", `diff numstat failed (assuming code changed): ${(e as Error).message}`);
-      codeFilesChanged = true;
+      // diff-tree failed (e.g. unborn HEAD in a brand-new repo) — fall back to
+      // comparing against the empty tree so initial-commit repos still report changes.
+      try {
+        const fallbackOutput = execFileSync(
+          "git",
+          ["diff", "--name-only", GIT_EMPTY_TREE, "HEAD"],
+          { cwd: originalBasePath_, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" },
+        ).trim();
+        const fallbackFiles = fallbackOutput ? fallbackOutput.split("\n").filter(Boolean) : [];
+        codeFilesChanged = fallbackFiles.some((f) => !f.startsWith(".gsd/"));
+      } catch {
+        // Truly unable to determine — assume code was changed to avoid silent data loss
+        logWarning("worktree", `diff-tree and empty-tree fallback both failed (assuming code changed): ${(e as Error).message}`);
+        codeFilesChanged = true;
+      }
     }
   }

package/src/resources/extensions/gsd/auto.ts CHANGED Viewed

@@ -126,8 +126,9 @@ import {
   formatTokenCount,
 } from "./metrics.js";
 import { setLogBasePath, logWarning, logError } from "./workflow-logger.js";
+import { preflightCleanRoot, postflightPopStash } from "./clean-root-preflight.js";
 import { homedir } from "node:os";
-import { join } from "node:path";
+import { isAbsolute, join } from "node:path";
 import { pathToFileURL } from "node:url";
 import { readFileSync, existsSync, mkdirSync, writeFileSync, unlinkSync } from "node:fs";
 import { atomicWriteSync } from "./atomic-write.js";
@@ -309,6 +310,21 @@ function restoreMilestoneLockEnv(): void {
   s.milestoneLockEnvCaptured = false;
 }
+function normalizeSessionFilePath(raw: unknown): string | null {
+  if (typeof raw !== "string") return null;
+  const trimmed = raw.trim();
+  if (!trimmed) return null;
+  const firstLine = trimmed.split(/\r?\n/, 1)[0]?.trim() ?? "";
+  if (!firstLine) return null;
+  // Guard against accidental message concatenation by trimming to .jsonl.
+  const jsonlIndex = firstLine.toLowerCase().indexOf(".jsonl");
+  const candidate = jsonlIndex >= 0 ? firstLine.slice(0, jsonlIndex + ".jsonl".length) : firstLine;
+  if (!isAbsolute(candidate)) return null;
+  if (!candidate.toLowerCase().endsWith(".jsonl")) return null;
+  return candidate;
+}
 export function startAutoDetached(
   ctx: ExtensionCommandContext,
   pi: ExtensionAPI,
@@ -1055,7 +1071,7 @@ export async function pauseAuto(
   // from provider-error pause and avoid hard-stopping (#2762).
   resolveAgentEndCancelled(_errorContext);
-  s.pausedSessionFile = ctx?.sessionManager?.getSessionFile() ?? null;
+  s.pausedSessionFile = normalizeSessionFilePath(ctx?.sessionManager?.getSessionFile() ?? null);
   // Persist paused-session metadata so resume survives /exit (#1383).
   // The fresh-start bootstrap checks for this file and restores worktree context.
@@ -1287,6 +1303,10 @@ function buildLoopDeps(): LoopDeps {
     // Journal
     emitJournalEvent: (entry: JournalEntry) => _emitJournalEvent(s.basePath, entry),
+    // Clean-root preflight gate (#2909)
+    preflightCleanRoot,
+    postflightPopStash,
   } as unknown as LoopDeps;
 }
@@ -1359,7 +1379,11 @@ export async function startAuto(
         s.autoStartTime = meta.autoStartTime || Date.now();
         s.sessionMilestoneLock = meta.milestoneLock ?? null;
         s.paused = true;
-        try { unlinkSync(pausedPath); } catch (e) { logWarning("session", `pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`, { file: "auto.ts" }); }
+        try { unlinkSync(pausedPath); } catch (e) {
+          if ((e as NodeJS.ErrnoException).code !== "ENOENT") {
+            logWarning("session", `pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`, { file: "auto.ts" });
+          }
+        }
         ctx.ui.notify(
           `Resuming paused custom workflow${meta.activeRunDir ? ` (${meta.activeRunDir})` : ""}.`,
           "info",
@@ -1378,7 +1402,9 @@ export async function startAuto(
           const summaryFile = resolveMilestoneFile(base, meta.milestoneId, "SUMMARY");
           if (!mDir || summaryFile) {
             try { unlinkSync(pausedPath); } catch (err) {
-              logWarning("session", `pause file cleanup failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
+              if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
+                logWarning("session", `pause file cleanup failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
+              }
             }
             ctx.ui.notify(
               `Paused milestone ${meta.milestoneId} is ${!mDir ? "missing" : "already complete"}. Starting fresh.`,
@@ -1388,20 +1414,28 @@ export async function startAuto(
             s.currentMilestoneId = meta.milestoneId;
             s.originalBasePath = meta.originalBasePath || base;
             s.stepMode = meta.stepMode ?? requestedStepMode;
-            s.pausedSessionFile = meta.sessionFile ?? null;
+            s.pausedSessionFile = normalizeSessionFilePath(meta.sessionFile ?? null);
             s.pausedUnitType = meta.unitType ?? null;
             s.pausedUnitId = meta.unitId ?? null;
             s.autoStartTime = meta.autoStartTime || Date.now();
             s.sessionMilestoneLock = meta.milestoneLock ?? null;
             s.paused = true;
-            try { unlinkSync(pausedPath); } catch (e) { logWarning("session", `pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`, { file: "auto.ts" }); }
+            try { unlinkSync(pausedPath); } catch (e) {
+              if ((e as NodeJS.ErrnoException).code !== "ENOENT") {
+                logWarning("session", `pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`, { file: "auto.ts" });
+              }
+            }
             ctx.ui.notify(
               `Resuming paused session for ${meta.milestoneId}${meta.worktreePath && existsSync(meta.worktreePath) ? ` (worktree)` : ""}.`,
               "info",
             );
           }
         } else if (existsSync(pausedPath)) {
-          try { unlinkSync(pausedPath); } catch (e) { logWarning("session", `stale pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`, { file: "auto.ts" }); }
+          try { unlinkSync(pausedPath); } catch (e) {
+            if ((e as NodeJS.ErrnoException).code !== "ENOENT") {
+              logWarning("session", `stale pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`, { file: "auto.ts" });
+            }
+          }
         }
       }
     } catch (err) {
@@ -1460,7 +1494,9 @@ export async function startAuto(
     // Lock acquired — now safe to delete the pause file
     if (s.pausedSessionFile) {
       try { unlinkSync(s.pausedSessionFile); } catch (err) {
-        logWarning("session", `pause file cleanup failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
+        if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
+          logWarning("session", `pause file cleanup failed: ${err instanceof Error ? err.message : String(err)}`, { file: "auto.ts" });
+        }
       }
       s.pausedSessionFile = null;
     }
@@ -1771,12 +1807,12 @@ export async function dispatchHookUnit(
     }
   }
-  const sessionFile = ctx.sessionManager.getSessionFile();
+  const sessionFile = normalizeSessionFilePath(ctx.sessionManager.getSessionFile());
   writeLock(
     lockBase(),
     hookUnitType,
     triggerUnitId,
-    sessionFile,
+    sessionFile ?? undefined,
   );
   clearUnitTimeout();

package/src/resources/extensions/gsd/bootstrap/register-hooks.ts CHANGED Viewed

@@ -23,7 +23,8 @@ import { isParallelActive, shutdownParallel } from "../parallel-orchestrator.js"
 import { checkToolCallLoop, resetToolCallLoopGuard } from "./tool-call-loop-guard.js";
 import { saveActivityLog } from "../activity-log.js";
 import { resetAskUserQuestionsCache } from "../../ask-user-questions.js";
-import { recordToolCall as safetyRecordToolCall, recordToolResult as safetyRecordToolResult } from "../safety/evidence-collector.js";
+import { recordToolCall as safetyRecordToolCall, recordToolResult as safetyRecordToolResult, saveEvidenceToDisk } from "../safety/evidence-collector.js";
+import { parseUnitId } from "../unit-id.js";
 import { classifyCommand } from "../safety/destructive-guard.js";
 import { logWarning as safetyLogWarning } from "../workflow-logger.js";
 import { installNotifyInterceptor } from "./notify-interceptor.js";
@@ -499,6 +500,15 @@ export function registerHooks(
     // Safety harness: record tool execution results for evidence cross-referencing
     if (isAutoActive()) {
       safetyRecordToolResult(event.toolCallId, event.toolName, event.result, event.isError);
+      // Persist evidence to disk after each tool result so it survives a session
+      // restart mid-unit (Bug #4385 — non-persisted evidence false positives).
+      const dash = getAutoDashboardData();
+      if (dash.basePath && dash.currentUnit?.type === "execute-task") {
+        const { milestone: pMid, slice: pSid, task: pTid } = parseUnitId(dash.currentUnit.id);
+        if (pMid && pSid && pTid) {
+          saveEvidenceToDisk(dash.basePath, pMid, pSid, pTid);
+        }
+      }
     }
   });

package/src/resources/extensions/gsd/bootstrap/write-gate.ts CHANGED Viewed

@@ -28,8 +28,29 @@ const QUEUE_SAFE_TOOLS = new Set([
 /**
  * Bash commands that are read-only / investigative — safe during queue mode.
  * Matches the leading command in a bash invocation.
+ *
+ * Extension policy: add commands here when they are read-only / diagnostic.
+ * Never add commands that mutate project state (write files, run builds that
+ * emit artifacts, install packages, etc.).
+ *
+ * Current read-only additions (Bug #4385):
+ *   npm run <diagnostic> — read-only diagnostic scripts: test, lint, typecheck, etc.
+ *                         NOT: build, install, compile, generate, deploy (artifact-producing)
+ *   npm ls/list/info    — inspect installed packages (read-only)
+ *   npm outdated/audit  — security/update checks (read-only)
+ *   npx <pkg>           — run a package binary without installing globally
+ *   tsx                 — TypeScript runner used for dry-run / inspection scripts
+ *   node --print        — evaluate and print an expression, no side effects
+ *   python / python3    — script inspection, version checks
+ *   pip / pip3 show     — show installed package info (read-only)
+ *   jq                  — read-only JSON query
+ *   yq                  — read-only YAML query
+ *   curl -s / curl --silent — fetch for inspection (no -o / no output redirect)
+ *   openssl version     — version / certificate inspection
+ *   env / printenv      — print environment variables
+ *   true / false        — shell no-ops / test exit codes
  */
-const BASH_READ_ONLY_RE = /^\s*(cat|head|tail|less|more|wc|file|stat|du|df|which|type|echo|printf|ls|find|grep|rg|awk|sed\b(?!.*-i)|sort|uniq|diff|comm|tr|cut|tee\s+-a\s+\/dev\/null|git\s+(log|show|diff|status|branch|tag|remote|rev-parse|ls-files|blame|shortlog|describe|stash\s+list|config\s+--get|cat-file)|gh\s+(issue|pr|api|repo|release)\s+(view|list|diff|status|checks)|mkdir\s+-p\s+\.gsd|rtk\s)/;
+const BASH_READ_ONLY_RE = /^\s*(cat|head|tail|less|more|wc|file|stat|du|df|which|type|echo|printf|ls|find|grep|rg|awk|sed\b(?!.*-i)|sort|uniq|diff|comm|tr|cut|tee\s+-a\s+\/dev\/null|git\s+(log|show|diff|status|branch|tag|remote|rev-parse|ls-files|blame|shortlog|describe|stash\s+list|config\s+--get|cat-file)|gh\s+(issue|pr|api|repo|release)\s+(view|list|diff|status|checks)|mkdir\s+-p\s+\.gsd|rtk\s|npm\s+run\s+(test|test:\w+|lint|lint:\w+|typecheck|type-check|type-check:\w+|check|verify|audit|outdated|format:check|ci|validate)\b|npm\s+(ls|list|info|view|show|outdated|audit|explain|doctor|ping|--version|-v)\b|npx\s|tsx\s|node\s+(--print|--version|-v\b)|python[23]?\s+(-c\s+'[^']*'|--version|-V\b|-m\s+(pip\s+show|pip\s+list|site))|pip[23]?\s+(show|list|freeze|check|index\s+versions)\b|jq\s|yq\s|curl\s+(-s\b|--silent\b)(?!\s+[^|>]*\s-[oO]\b)(?!\s+[^|>]*\s--output\b)[^|>]*$|openssl\s+(version|x509|s_client)|env\b|printenv\b|true\b|false\b)/;
 const verifiedDepthMilestones = new Set<string>();
 let activeQueuePhase = false;