npm - gsd-pi - Versions diffs - 2.63.0 → 2.64.0 - Mend

gsd-pi 2.63.0 → 2.64.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (353) hide show

package/dist/resources/extensions/gsd/auto/phases.js CHANGED Viewed

@@ -13,13 +13,20 @@ import { runUnit } from "./run-unit.js";
 import { debugLog } from "../debug-logger.js";
 import { PROJECT_FILES } from "../detection.js";
 import { MergeConflictError } from "../git-service.js";
-import { join, basename } from "node:path";
-import { existsSync, cpSync } from "node:fs";
+import { join, basename, dirname, parse as parsePath } from "node:path";
+import { existsSync, cpSync, readdirSync } from "node:fs";
 import { logWarning, logError } from "../workflow-logger.js";
 import { gsdRoot } from "../paths.js";
 import { atomicWriteSync } from "../atomic-write.js";
 import { verifyExpectedArtifact, diagnoseExpectedArtifact, buildLoopRemediationSteps } from "../auto-recovery.js";
 import { writeUnitRuntimeRecord } from "../unit-runtime.js";
+import { withTimeout, FINALIZE_POST_TIMEOUT_MS } from "./finalize-timeout.js";
+import { getEligibleSlices } from "../slice-parallel-eligibility.js";
+import { startSliceParallel } from "../slice-parallel-orchestrator.js";
+import { isDbAvailable, getMilestoneSlices } from "../gsd-db.js";
+import { resetEvidence } from "../safety/evidence-collector.js";
+import { createCheckpoint, cleanupCheckpoint, rollbackToCheckpoint } from "../safety/git-checkpoint.js";
+import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js";
 // ─── generateMilestoneReport ──────────────────────────────────────────────────
 /**
  * Resolve the base path for milestone reports.
@@ -142,6 +149,50 @@ export async function runPreDispatch(ic, loopState) {
         mid,
         statePhase: state.phase,
     });
+    // ── Slice-level parallelism gate (#2340) ─────────────────────────────
+    // When slice_parallel is enabled, check if multiple slices are eligible
+    // for parallel execution. If so, dispatch them in parallel and stop the
+    // sequential loop. Workers are spawned via slice-parallel-orchestrator.ts.
+    if (prefs?.slice_parallel?.enabled &&
+        mid &&
+        !process.env.GSD_PARALLEL_WORKER &&
+        isDbAvailable()) {
+        try {
+            const dbSlices = getMilestoneSlices(mid);
+            if (dbSlices.length > 0) {
+                const doneIds = new Set(dbSlices.filter(sl => sl.status === "complete" || sl.status === "done").map(sl => sl.id));
+                const sliceInputs = dbSlices.map(sl => ({
+                    id: sl.id,
+                    done: doneIds.has(sl.id),
+                    depends: sl.depends ?? [],
+                }));
+                const eligible = getEligibleSlices(sliceInputs, doneIds);
+                if (eligible.length > 1) {
+                    debugLog("autoLoop", {
+                        phase: "slice-parallel-dispatch",
+                        iteration: ic.iteration,
+                        mid,
+                        eligibleSlices: eligible.map(e => e.id),
+                    });
+                    ctx.ui.notify(`Slice-parallel: dispatching ${eligible.length} eligible slices for ${mid}.`, "info");
+                    const result = await startSliceParallel(s.basePath, mid, eligible, { maxWorkers: prefs.slice_parallel.max_workers ?? 2 });
+                    if (result.started.length > 0) {
+                        ctx.ui.notify(`Slice-parallel: started ${result.started.length} worker(s): ${result.started.join(", ")}.`, "info");
+                        await deps.stopAuto(ctx, pi, `Slice-parallel dispatched for ${mid}`);
+                        return { action: "break", reason: "slice-parallel-dispatched" };
+                    }
+                    // Fall through to sequential if no workers started
+                }
+            }
+        }
+        catch (err) {
+            debugLog("autoLoop", {
+                phase: "slice-parallel-check-error",
+                error: err instanceof Error ? err.message : String(err),
+            });
+            // Non-fatal — fall through to sequential dispatch
+        }
+    }
     // ── Milestone transition ────────────────────────────────────────────
     if (mid && s.currentMilestoneId && mid !== s.currentMilestoneId) {
         deps.emitJournalEvent({ ts: new Date().toISOString(), flowId: ic.flowId, seq: ic.nextSeq(), eventType: "milestone-transition", data: { from: s.currentMilestoneId, to: mid } });
@@ -669,11 +720,40 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
         }
         const hasProjectFile = PROJECT_FILES.some((f) => deps.existsSync(join(s.basePath, f)));
         const hasSrcDir = deps.existsSync(join(s.basePath, "src"));
-        if (!hasProjectFile && !hasSrcDir) {
+        // Xcode bundles have project-specific names (*.xcodeproj, *.xcworkspace)
+        // that cannot be matched by exact filename — scan the directory by suffix.
+        let hasXcodeBundle = false;
+        try {
+            const entries = deps.existsSync(s.basePath) ? readdirSync(s.basePath) : [];
+            hasXcodeBundle = entries.some((e) => e.endsWith(".xcodeproj") || e.endsWith(".xcworkspace"));
+        }
+        catch (err) {
+            debugLog("runUnitPhase", { phase: "xcode-bundle-scan-failed", basePath: s.basePath, error: String(err) });
+        }
+        // Monorepo support (#2347): if no project files in the worktree directory,
+        // walk parent directories up to the filesystem root. In monorepos,
+        // package.json / Cargo.toml etc. live in a parent directory.
+        let hasProjectFileInParent = false;
+        if (!hasProjectFile && !hasSrcDir && !hasXcodeBundle) {
+            let checkDir = dirname(s.basePath);
+            const { root } = parsePath(checkDir);
+            while (checkDir !== root) {
+                // Stop at git repository boundary — ancestors above the repo root
+                // (e.g. ~ or /usr/local) may contain unrelated project files.
+                if (deps.existsSync(join(checkDir, ".git")))
+                    break;
+                if (PROJECT_FILES.some((f) => deps.existsSync(join(checkDir, f)))) {
+                    hasProjectFileInParent = true;
+                    break;
+                }
+                checkDir = dirname(checkDir);
+            }
+        }
+        if (!hasProjectFile && !hasSrcDir && !hasXcodeBundle && !hasProjectFileInParent) {
             // Greenfield projects won't have project files yet — the first task creates them.
             // Log a warning but allow execution to proceed. The .git check above is sufficient
             // to ensure we're in a valid working directory.
-            debugLog("runUnitPhase", { phase: "worktree-health-warn-greenfield", basePath: s.basePath, hasProjectFile, hasSrcDir });
+            debugLog("runUnitPhase", { phase: "worktree-health-warn-greenfield", basePath: s.basePath, hasProjectFile, hasSrcDir, hasXcodeBundle });
             ctx.ui.notify(`Warning: ${s.basePath} has no recognized project files — proceeding as greenfield project`, "warning");
         }
     }
@@ -683,6 +763,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
         s.currentUnit.id === unitId);
     const previousTier = s.currentUnitRouting?.tier;
     s.currentUnit = { type: unitType, id: unitId, startedAt: Date.now() };
+    s.lastToolInvocationError = null; // #2883: clear stale error from previous unit
     const unitStartSeq = ic.nextSeq();
     deps.emitJournalEvent({ ts: new Date().toISOString(), flowId: ic.flowId, seq: unitStartSeq, eventType: "unit-start", data: { unitType, unitId } });
     deps.captureAvailableSkills();
@@ -695,18 +776,22 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
         lastProgressKind: "dispatch",
         recoveryAttempts: 0, // Reset so re-dispatched units get full recovery budget (#2322)
     });
-    // Select and apply model (with tier escalation on retry — normal units only)
-    const modelResult = await deps.selectAndApplyModel(ctx, pi, unitType, unitId, s.basePath, prefs, s.verbose, s.autoModeStartModel, sidecarItem ? undefined : { isRetry, previousTier });
-    s.currentUnitRouting =
-        modelResult.routing;
-    s.currentUnitModel =
-        modelResult.appliedModel;
-    // Status bar + progress widget
+    // Status bar (widget + preconditions deferred until after model selection — see #2899)
     ctx.ui.setStatus("gsd-auto", "auto");
     if (mid)
         deps.updateSliceProgressCache(s.basePath, mid, state.activeSlice?.id);
-    deps.updateProgressWidget(ctx, unitType, unitId, state);
-    deps.ensurePreconditions(unitType, unitId, s.basePath, state);
+    // ── Safety harness: reset evidence + create checkpoint ──
+    const safetyConfig = resolveSafetyHarnessConfig(prefs?.safety_harness);
+    if (safetyConfig.enabled && safetyConfig.evidence_collection) {
+        resetEvidence();
+    }
+    // Only checkpoint code-executing units (not lifecycle/planning units)
+    if (safetyConfig.enabled && safetyConfig.checkpoints && unitType === "execute-task") {
+        s.checkpointSha = createCheckpoint(s.basePath, unitId);
+        if (s.checkpointSha) {
+            debugLog("runUnitPhase", { phase: "checkpoint-created", unitId, sha: s.checkpointSha.slice(0, 8) });
+        }
+    }
     // Prompt injection
     let finalPrompt = prompt;
     if (s.pendingVerificationRetry) {
@@ -764,6 +849,12 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
         const msg = reorderErr instanceof Error ? reorderErr.message : String(reorderErr);
         logWarning("engine", "Prompt reorder failed", { error: msg });
     }
+    // Select and apply model (with tier escalation on retry — normal units only)
+    const modelResult = await deps.selectAndApplyModel(ctx, pi, unitType, unitId, s.basePath, prefs, s.verbose, s.autoModeStartModel, sidecarItem ? undefined : { isRetry, previousTier });
+    s.currentUnitRouting =
+        modelResult.routing;
+    s.currentUnitModel =
+        modelResult.appliedModel;
     // Apply sidecar/pre-dispatch hook model override (takes priority over standard model selection)
     const hookModelOverride = sidecarItem?.model ?? iterData.hookModelOverride;
     if (hookModelOverride) {
@@ -784,6 +875,15 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
                 `Ensure the model is defined in models.json and has auth configured.`, "warning");
         }
     }
+    // Store the final dispatched model ID so the dashboard can read it (#2899).
+    // This accounts for hook model overrides applied after selectAndApplyModel.
+    s.currentDispatchedModelId = s.currentUnitModel
+        ? `${s.currentUnitModel.provider ?? ""}/${s.currentUnitModel.id ?? ""}`
+        : null;
+    // Progress widget + preconditions — deferred to after model selection so the
+    // widget's first render tick shows the correct model (#2899).
+    deps.updateProgressWidget(ctx, unitType, unitId, state);
+    deps.ensurePreconditions(unitType, unitId, s.basePath, state);
     // Start unit supervision
     deps.clearUnitTimeout();
     deps.startUnitSupervision({
@@ -860,11 +960,13 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
     if (s.currentUnit) {
         await deps.closeoutUnit(ctx, s.basePath, unitType, unitId, s.currentUnit.startedAt, deps.buildSnapshotOpts(unitType, unitId));
     }
-    // ── Zero tool-call guard (#1833) ──────────────────────────────────
-    // An execute-task agent that completes with 0 tool calls made no
-    // real changes — its summary is hallucinated. Treat as failed so
-    // the task is retried instead of silently marked complete.
-    if (unitType === "execute-task") {
+    // ── Zero tool-call guard (#1833, #2653) ──────────────────────────
+    // Any unit that completes with 0 tool calls made no real progress —
+    // likely context exhaustion where all tool calls errored out. Treat
+    // as failed so the unit is retried in a fresh context instead of
+    // silently passing through to artifact verification (which loops
+    // forever when the unit never produced its artifact).
+    {
         const currentLedger = deps.getLedger();
         if (currentLedger?.units) {
             const lastUnit = [...currentLedger.units].reverse().find((u) => u.type === unitType && u.id === unitId && u.startedAt === s.currentUnit?.startedAt);
@@ -873,11 +975,11 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
                     phase: "zero-tool-calls",
                     unitType,
                     unitId,
-                    warning: "Task completed with 0 tool calls — likely hallucinated, marking as failed",
+                    warning: "Unit completed with 0 tool calls — likely context exhaustion, marking as failed",
                 });
-                ctx.ui.notify(`${unitType} ${unitId} completed with 0 tool calls — hallucinated summary, will retry`, "warning");
+                ctx.ui.notify(`${unitType} ${unitId} completed with 0 tool calls — context exhaustion, will retry`, "warning");
                 // Fall through to next iteration where dispatch will re-derive
-                // and re-dispatch this task.
+                // and re-dispatch this unit.
                 return { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt } };
             }
         }
@@ -912,6 +1014,25 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
         }
     }
     deps.emitJournalEvent({ ts: new Date().toISOString(), flowId: ic.flowId, seq: ic.nextSeq(), eventType: "unit-end", data: { unitType, unitId, status: unitResult.status, artifactVerified, ...(unitResult.errorContext ? { errorContext: unitResult.errorContext } : {}) }, causedBy: { flowId: ic.flowId, seq: unitStartSeq } });
+    // ── Safety harness: checkpoint cleanup or rollback ──
+    if (s.checkpointSha) {
+        if (unitResult.status === "error" && safetyConfig.auto_rollback) {
+            const rolled = rollbackToCheckpoint(s.basePath, unitId, s.checkpointSha);
+            if (rolled) {
+                ctx.ui.notify(`Rolled back to pre-unit checkpoint for ${unitId}`, "info");
+                debugLog("runUnitPhase", { phase: "checkpoint-rollback", unitId });
+            }
+        }
+        else if (unitResult.status === "error") {
+            ctx.ui.notify(`Unit ${unitId} failed. Pre-unit checkpoint available at ${s.checkpointSha.slice(0, 8)}`, "warning");
+        }
+        else {
+            // Success — clean up checkpoint ref
+            cleanupCheckpoint(s.basePath, unitId);
+            debugLog("runUnitPhase", { phase: "checkpoint-cleaned", unitId });
+        }
+        s.checkpointSha = null;
+    }
     return { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt } };
 }
 // ─── runFinalize ──────────────────────────────────────────────────────────────
@@ -993,7 +1114,21 @@ export async function runFinalize(ic, iterData, sidecarItem) {
         }
     }
     // Post-verification processing (DB dual-write, hooks, triage, quick-tasks)
-    const postResult = await deps.postUnitPostVerification(postUnitCtx);
+    // Timeout guard: if postUnitPostVerification hangs (e.g., module import
+    // deadlock, SQLite transaction hang), force-continue after timeout so the
+    // auto-loop is not permanently frozen (#2344).
+    const postResultGuard = await withTimeout(deps.postUnitPostVerification(postUnitCtx), FINALIZE_POST_TIMEOUT_MS, "postUnitPostVerification");
+    if (postResultGuard.timedOut) {
+        debugLog("autoLoop", {
+            phase: "post-verification-timeout",
+            iteration: ic.iteration,
+            unitType: iterData.unitType,
+            unitId: iterData.unitId,
+        });
+        ctx.ui.notify(`postUnitPostVerification timed out after ${FINALIZE_POST_TIMEOUT_MS / 1000}s for ${iterData.unitType} ${iterData.unitId} — continuing to next iteration`, "warning");
+        return { action: "next", data: undefined };
+    }
+    const postResult = postResultGuard.value;
     if (postResult === "stopped") {
         debugLog("autoLoop", {
             phase: "exit",

package/dist/resources/extensions/gsd/auto/session.js CHANGED Viewed

@@ -50,6 +50,8 @@ export class AutoSession {
     // ── Model state ──────────────────────────────────────────────────────────
     autoModeStartModel = null;
     currentUnitModel = null;
+    /** Fully-qualified model ID (provider/id) set after selectAndApplyModel + hook overrides (#2899). */
+    currentDispatchedModelId = null;
     originalModelId = null;
     originalModelProvider = null;
     lastBudgetAlertLevel = 0;
@@ -62,6 +64,10 @@ export class AutoSession {
     lastStateRebuildAt = 0;
     // ── Sidecar queue ─────────────────────────────────────────────────────
     sidecarQueue = [];
+    // ── Tool invocation errors (#2883) ──────────────────────────────────
+    /** Set when a GSD tool execution ends with isError due to malformed/truncated
+     *  JSON arguments. Checked by postUnitPreVerification to break retry loops. */
+    lastToolInvocationError = null;
     // ── Isolation degradation ────────────────────────────────────────────
     /** Set to true when worktree creation fails; prevents merge of nonexistent branch. */
     isolationDegraded = false;
@@ -76,6 +82,9 @@ export class AutoSession {
     lastPromptCharCount;
     lastBaselineCharCount;
     pendingQuickTasks = [];
+    // ── Safety harness ───────────────────────────────────────────────────────
+    /** SHA of the pre-unit git checkpoint ref. Cleared on success or rollback. */
+    checkpointSha = null;
     // ── Signal handler ───────────────────────────────────────────────────────
     sigtermHandler = null;
     // ── Loop promise state ──────────────────────────────────────────────────
@@ -132,6 +141,7 @@ export class AutoSession {
         // Model
         this.autoModeStartModel = null;
         this.currentUnitModel = null;
+        this.currentDispatchedModelId = null;
         this.originalModelId = null;
         this.originalModelProvider = null;
         this.lastBudgetAlertLevel = 0;
@@ -149,8 +159,10 @@ export class AutoSession {
         this.pendingQuickTasks = [];
         this.sidecarQueue = [];
         this.rewriteAttemptCount = 0;
+        this.lastToolInvocationError = null;
         this.isolationDegraded = false;
         this.milestoneMergedInPhases = false;
+        this.checkpointSha = null;
         // Signal handler
         this.sigtermHandler = null;
         // Loop promise state lives in auto-loop.ts module scope

package/dist/resources/extensions/gsd/auto-dashboard.js CHANGED Viewed

@@ -516,9 +516,15 @@ export function updateProgressWidget(ctx, unitType, unitId, state, accessors, ti
                 const cxWindow = cxUsage?.contextWindow ?? cmdCtx?.model?.contextWindow ?? 0;
                 const cxPctVal = cxUsage?.percent ?? 0;
                 const cxPct = cxUsage?.percent !== null ? cxPctVal.toFixed(1) : "?";
-                // Model display — shown in context section, not stats
-                const modelId = cmdCtx?.model?.id ?? "";
-                const modelProvider = cmdCtx?.model?.provider ?? "";
+                // Model display — prefer dispatched model ID (set after selectAndApplyModel
+                // + hook overrides) over cmdCtx?.model which can be stale (#2899).
+                const dispatchedModelId = accessors.getCurrentDispatchedModelId();
+                const modelId = dispatchedModelId
+                    ? dispatchedModelId.split("/").slice(1).join("/") || dispatchedModelId
+                    : (cmdCtx?.model?.id ?? "");
+                const modelProvider = dispatchedModelId
+                    ? dispatchedModelId.split("/")[0] || ""
+                    : (cmdCtx?.model?.provider ?? "");
                 const tierIcon = resolveServiceTierIcon(effectiveServiceTier, modelId);
                 const modelDisplay = (modelProvider && modelId
                     ? `${modelProvider}/${modelId}`

package/dist/resources/extensions/gsd/auto-model-selection.js CHANGED Viewed

@@ -15,6 +15,9 @@ export function resolvePreferredModelConfig(unitType, autoModeStartModel) {
     const routingConfig = resolveDynamicRoutingConfig();
     if (!routingConfig.enabled || !routingConfig.tier_models)
         return undefined;
+    // Don't synthesize a routing config for flat-rate providers (#3453).
+    if (autoModeStartModel && isFlatRateProvider(autoModeStartModel.provider))
+        return undefined;
     const ceilingModel = routingConfig.tier_models.heavy
         ?? (autoModeStartModel ? `${autoModeStartModel.provider}/${autoModeStartModel.id}` : undefined);
     if (!ceilingModel)
@@ -41,6 +44,25 @@ export async function selectAndApplyModel(ctx, pi, unitType, unitId, basePath, p
         const routingConfig = resolveDynamicRoutingConfig();
         let effectiveModelConfig = modelConfig;
         let routingTierLabel = "";
+        // Disable routing for flat-rate providers like GitHub Copilot (#3453).
+        // All models cost the same per request, so downgrading to a cheaper
+        // model provides no cost benefit — it only degrades quality.
+        // Fail-closed: if primary model can't be resolved, fall back to
+        // provider-level signals rather than allowing unwanted downgrades.
+        if (routingConfig.enabled) {
+            const primaryModel = resolveModelId(modelConfig.primary, availableModels, ctx.model?.provider);
+            if (primaryModel) {
+                if (isFlatRateProvider(primaryModel.provider)) {
+                    routingConfig.enabled = false;
+                }
+            }
+            else if ((autoModeStartModel && isFlatRateProvider(autoModeStartModel.provider))
+                || (ctx.model?.provider && isFlatRateProvider(ctx.model.provider))) {
+                // Primary model unresolvable but provider signals indicate flat-rate —
+                // disable routing to prevent quality degradation.
+                routingConfig.enabled = false;
+            }
+        }
         if (routingConfig.enabled) {
             let budgetPct;
             if (routingConfig.budget_pressure !== false) {
@@ -244,3 +266,13 @@ export function resolveModelId(modelId, availableModels, currentProvider) {
     // Fall back to first non-extension candidate, or any candidate
     return candidates.find(m => !EXTENSION_PROVIDERS.has(m.provider)) ?? candidates[0];
 }
+/**
+ * Flat-rate providers charge the same per request regardless of model.
+ * Dynamic routing provides no cost benefit — it only degrades quality (#3453).
+ * Uses case-insensitive matching with alias support to prevent fail-open on
+ * provider naming variations (e.g. "copilot" vs "github-copilot").
+ */
+const FLAT_RATE_PROVIDERS = new Set(["github-copilot", "copilot"]);
+export function isFlatRateProvider(provider) {
+    return FLAT_RATE_PROVIDERS.has(provider.toLowerCase());
+}

package/dist/resources/extensions/gsd/auto-post-unit.js CHANGED Viewed

@@ -19,7 +19,7 @@ import { invalidateAllCaches } from "./cache.js";
 import { parseUnitId } from "./unit-id.js";
 import { closeoutUnit } from "./auto-unit-closeout.js";
 import { autoCommitCurrentBranch, } from "./worktree.js";
-import { verifyExpectedArtifact, resolveExpectedArtifactPath, diagnoseExpectedArtifact, } from "./auto-recovery.js";
+import { verifyExpectedArtifact, resolveExpectedArtifactPath, writeBlockerPlaceholder, diagnoseExpectedArtifact, } from "./auto-recovery.js";
 import { regenerateIfMissing } from "./workflow-projections.js";
 import { syncStateToProjectRoot } from "./auto-worktree.js";
 import { isDbAvailable, getTask, getSlice, getMilestone, updateTaskStatus, _getAdapter } from "./gsd-db.js";
@@ -29,6 +29,15 @@ import { checkPostUnitHooks, isRetryPending, consumeRetryTrigger, persistHookSta
 import { hasPendingCaptures, loadPendingCaptures, revertExecutorResolvedCaptures } from "./captures.js";
 import { debugLog } from "./debug-logger.js";
 import { runSafely } from "./auto-utils.js";
+import { getEvidence } from "./safety/evidence-collector.js";
+import { validateFileChanges } from "./safety/file-change-validator.js";
+// crossReferenceEvidence available for future use when verification_evidence is stored in DB
+// import { crossReferenceEvidence, type ClaimedEvidence } from "./safety/evidence-cross-ref.js";
+import { validateContent } from "./safety/content-validator.js";
+import { resolveSafetyHarnessConfig } from "./safety/safety-harness.js";
+import { resolveExpectedArtifactPath as resolveArtifactForContent } from "./auto-artifact-paths.js";
+/** Maximum verification retry attempts before escalating to blocker placeholder (#2653). */
+const MAX_VERIFICATION_RETRIES = 3;
 /** Enqueue a sidecar item (hook, triage, or quick-task) for the main loop to
  *  drain via runUnit. Logs the enqueue event and notifies the UI. */
 function enqueueSidecar(s, ctx, entry, debugExtra, notification) {
@@ -339,6 +348,78 @@ export async function postUnitPreVerification(pctx, opts) {
         catch (e) {
             debugLog("postUnit", { phase: "rogue-detection", error: String(e) });
         }
+        // ── Safety harness: post-unit validation ──
+        try {
+            const { loadEffectiveGSDPreferences } = await import("./preferences.js");
+            const prefs = loadEffectiveGSDPreferences()?.preferences;
+            const safetyConfig = resolveSafetyHarnessConfig(prefs?.safety_harness);
+            if (safetyConfig.enabled) {
+                const { milestone: sMid, slice: sSid, task: sTid } = parseUnitId(s.currentUnit.id);
+                // File change validation (execute-task only, after auto-commit)
+                if (safetyConfig.file_change_validation && s.currentUnit.type === "execute-task" && sMid && sSid && sTid && isDbAvailable()) {
+                    try {
+                        const taskRow = getTask(sMid, sSid, sTid);
+                        if (taskRow) {
+                            const expectedOutput = taskRow.expected_output ?? [];
+                            const plannedFiles = taskRow.files ?? [];
+                            const audit = validateFileChanges(s.basePath, expectedOutput, plannedFiles);
+                            if (audit && audit.violations.length > 0) {
+                                const warnings = audit.violations.filter(v => v.severity === "warning");
+                                for (const v of warnings) {
+                                    logWarning("safety", `file-change: ${v.file} — ${v.reason}`);
+                                }
+                                if (warnings.length > 0) {
+                                    ctx.ui.notify(`Safety: ${warnings.length} unexpected file change(s) outside task plan`, "warning");
+                                }
+                            }
+                        }
+                    }
+                    catch (e) {
+                        debugLog("postUnit", { phase: "safety-file-change", error: String(e) });
+                    }
+                }
+                // Evidence cross-reference (execute-task only)
+                // Verification evidence is passed via the complete-task tool call and
+                // stored in the SUMMARY.md on disk — not available as structured data
+                // in the DB. The evidence collector tracks actual bash tool calls, so
+                // we can still detect units that claimed success but ran no commands.
+                if (safetyConfig.evidence_cross_reference && s.currentUnit.type === "execute-task") {
+                    try {
+                        const actual = getEvidence();
+                        const bashCalls = actual.filter(e => e.kind === "bash");
+                        // If the task is marked complete but zero bash commands were run,
+                        // it's suspicious — the LLM may have fabricated results.
+                        if (sMid && sSid && sTid && isDbAvailable()) {
+                            const taskRow = getTask(sMid, sSid, sTid);
+                            if (taskRow?.status === "complete" && taskRow.verify && bashCalls.length === 0) {
+                                logWarning("safety", "task marked complete with verification commands but no bash calls were executed");
+                                ctx.ui.notify(`Safety: task ${sTid} has verification commands but no bash calls were recorded`, "warning");
+                            }
+                        }
+                    }
+                    catch (e) {
+                        debugLog("postUnit", { phase: "safety-evidence-xref", error: String(e) });
+                    }
+                }
+                // Content validation (plan-slice, plan-milestone)
+                if (safetyConfig.content_validation) {
+                    try {
+                        const artifactPath = resolveArtifactForContent(s.currentUnit.type, s.currentUnit.id, s.basePath);
+                        const contentViolations = validateContent(s.currentUnit.type, artifactPath);
+                        for (const v of contentViolations) {
+                            logWarning("safety", `content: ${v.reason}`);
+                            ctx.ui.notify(`Content validation: ${v.reason}`, "warning");
+                        }
+                    }
+                    catch (e) {
+                        debugLog("postUnit", { phase: "safety-content-validation", error: String(e) });
+                    }
+                }
+            }
+        }
+        catch (e) {
+            debugLog("postUnit", { phase: "safety-harness", error: String(e) });
+        }
         // Artifact verification
         let triggerArtifactVerified = false;
         if (!s.currentUnit.type.startsWith("hook/")) {
@@ -374,6 +455,8 @@ export async function postUnitPreVerification(pctx, opts) {
             // When artifact verification fails for a unit type that has a known expected
             // artifact, return "retry" so the caller re-dispatches with failure context
             // instead of blindly re-dispatching the same unit (#1571).
+            // After MAX_VERIFICATION_RETRIES, escalate to writeBlockerPlaceholder so the
+            // pipeline can advance instead of looping forever (#2653).
             //
             // HOWEVER, if the DB is unavailable (db_unavailable), the artifact was never
             // written because the completion tool failed at the infra level. Retrying
@@ -387,20 +470,51 @@ export async function postUnitPreVerification(pctx, opts) {
                 ctx.ui.notify(`Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} — DB unavailable, skipping retry.${dbSkipDiag ? ` Expected: ${dbSkipDiag}` : ""}`, "error");
             }
             else if (!triggerArtifactVerified) {
+                // #2883: If the artifact is missing because the tool invocation itself
+                // failed (malformed/truncated JSON arguments), retrying will produce the
+                // same failure. Pause auto-mode instead of entering a stuck retry loop.
+                if (s.lastToolInvocationError) {
+                    const errMsg = `Tool invocation failed for ${s.currentUnit.type}: ${s.lastToolInvocationError}. Structured argument generation failed — pausing auto-mode.`;
+                    debugLog("postUnit", { phase: "tool-invocation-error-pause", unitType: s.currentUnit.type, unitId: s.currentUnit.id, error: s.lastToolInvocationError });
+                    ctx.ui.notify(errMsg, "error");
+                    s.lastToolInvocationError = null;
+                    await pauseAuto(ctx, pi);
+                    return "dispatched";
+                }
                 const hasExpectedArtifact = resolveExpectedArtifactPath(s.currentUnit.type, s.currentUnit.id, s.basePath) !== null;
                 if (hasExpectedArtifact) {
                     const retryKey = `${s.currentUnit.type}:${s.currentUnit.id}`;
                     const attempt = (s.verificationRetryCount.get(retryKey) ?? 0) + 1;
                     s.verificationRetryCount.set(retryKey, attempt);
-                    const retryDiag = diagnoseExpectedArtifact(s.currentUnit.type, s.currentUnit.id, s.basePath);
-                    s.pendingVerificationRetry = {
-                        unitId: s.currentUnit.id,
-                        failureContext: `Artifact verification failed: expected artifact for ${s.currentUnit.type} "${s.currentUnit.id}" was not found on disk after unit execution (attempt ${attempt}).${retryDiag ? ` Expected: ${retryDiag}` : ""}`,
-                        attempt,
-                    };
-                    debugLog("postUnit", { phase: "artifact-verify-retry", unitType: s.currentUnit.type, unitId: s.currentUnit.id, attempt });
-                    ctx.ui.notify(`Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} — retrying (attempt ${attempt}).${retryDiag ? ` Expected: ${retryDiag}` : ""}`, "warning");
-                    return "retry";
+                    if (attempt > MAX_VERIFICATION_RETRIES) {
+                        // Retries exhausted — write a blocker placeholder so the pipeline
+                        // can advance past this stuck unit (#2653).
+                        debugLog("postUnit", {
+                            phase: "artifact-verify-escalate",
+                            unitType: s.currentUnit.type,
+                            unitId: s.currentUnit.id,
+                            attempt,
+                            maxRetries: MAX_VERIFICATION_RETRIES,
+                        });
+                        const reason = `Artifact verification failed after ${MAX_VERIFICATION_RETRIES} retries for ${s.currentUnit.type} "${s.currentUnit.id}".`;
+                        writeBlockerPlaceholder(s.currentUnit.type, s.currentUnit.id, s.basePath, reason);
+                        ctx.ui.notify(`${s.currentUnit.type} ${s.currentUnit.id} — verification retries exhausted (${MAX_VERIFICATION_RETRIES}), wrote blocker placeholder to advance pipeline`, "warning");
+                        // Reset retry count and fall through to "continue" so the loop
+                        // re-derives state with the placeholder in place.
+                        s.verificationRetryCount.delete(retryKey);
+                        s.pendingVerificationRetry = null;
+                        // Do NOT return "retry" — fall through to "continue" below.
+                    }
+                    else {
+                        s.pendingVerificationRetry = {
+                            unitId: s.currentUnit.id,
+                            failureContext: `Artifact verification failed: expected artifact for ${s.currentUnit.type} "${s.currentUnit.id}" was not found on disk after unit execution (attempt ${attempt}).`,
+                            attempt,
+                        };
+                        debugLog("postUnit", { phase: "artifact-verify-retry", unitType: s.currentUnit.type, unitId: s.currentUnit.id, attempt });
+                        ctx.ui.notify(`Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} — retrying (attempt ${attempt})`, "warning");
+                        return "retry";
+                    }
                 }
             }
         }

package/dist/resources/extensions/gsd/auto-prompts.js CHANGED Viewed

@@ -880,11 +880,16 @@ export async function buildResearchSlicePrompt(mid, _midTitle, sid, sTitle, base
     const contextRel = relMilestoneFile(base, mid, "CONTEXT");
     const milestoneResearchPath = resolveMilestoneFile(base, mid, "RESEARCH");
     const milestoneResearchRel = relMilestoneFile(base, mid, "RESEARCH");
+    const sliceContextPath = resolveSliceFile(base, mid, sid, "CONTEXT");
+    const sliceContextRel = relSliceFile(base, mid, sid, "CONTEXT");
     const inlined = [];
     inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
     const contextInline = await inlineFileOptional(contextPath, contextRel, "Milestone Context");
     if (contextInline)
         inlined.push(contextInline);
+    const sliceCtxInline = await inlineFileOptional(sliceContextPath, sliceContextRel, "Slice Context (from discussion)");
+    if (sliceCtxInline)
+        inlined.push(sliceCtxInline);
     const researchInline = await inlineFileOptional(milestoneResearchPath, milestoneResearchRel, "Milestone Research");
     if (researchInline)
         inlined.push(researchInline);
@@ -931,12 +936,17 @@ export async function buildPlanSlicePrompt(mid, _midTitle, sid, sTitle, base, le
     const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
     const researchPath = resolveSliceFile(base, mid, sid, "RESEARCH");
     const researchRel = relSliceFile(base, mid, sid, "RESEARCH");
+    const sliceContextPath = resolveSliceFile(base, mid, sid, "CONTEXT");
+    const sliceContextRel = relSliceFile(base, mid, sid, "CONTEXT");
     const inlined = [];
     // Inject phase handoff anchor from research phase (if available)
     const researchSliceAnchor = readPhaseAnchor(base, mid, "research-slice");
     if (researchSliceAnchor)
         inlined.push(formatAnchorForPrompt(researchSliceAnchor));
     inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
+    const sliceCtxInline = await inlineFileOptional(sliceContextPath, sliceContextRel, "Slice Context (from discussion)");
+    if (sliceCtxInline)
+        inlined.push(sliceCtxInline);
     const researchInline = await inlineFileOptional(researchPath, researchRel, "Slice Research");
     if (researchInline)
         inlined.push(researchInline);
@@ -1097,8 +1107,13 @@ export async function buildCompleteSlicePrompt(mid, _midTitle, sid, sTitle, base
     const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
     const slicePlanPath = resolveSliceFile(base, mid, sid, "PLAN");
     const slicePlanRel = relSliceFile(base, mid, sid, "PLAN");
+    const sliceContextPath = resolveSliceFile(base, mid, sid, "CONTEXT");
+    const sliceContextRel = relSliceFile(base, mid, sid, "CONTEXT");
     const inlined = [];
     inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
+    const sliceCtxInline = await inlineFileOptional(sliceContextPath, sliceContextRel, "Slice Context (from discussion)");
+    if (sliceCtxInline)
+        inlined.push(sliceCtxInline);
     inlined.push(await inlineFile(slicePlanPath, slicePlanRel, "Slice Plan"));
     if (inlineLevel !== "minimal") {
         const requirementsInline = await inlineRequirementsFromDb(base, sid, inlineLevel);
@@ -1351,8 +1366,13 @@ export async function buildReplanSlicePrompt(mid, midTitle, sid, sTitle, base) {
     const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
     const slicePlanPath = resolveSliceFile(base, mid, sid, "PLAN");
     const slicePlanRel = relSliceFile(base, mid, sid, "PLAN");
+    const sliceContextPath = resolveSliceFile(base, mid, sid, "CONTEXT");
+    const sliceContextRel = relSliceFile(base, mid, sid, "CONTEXT");
     const inlined = [];
     inlined.push(await inlineFile(roadmapPath, roadmapRel, "Milestone Roadmap"));
+    const sliceCtxInline = await inlineFileOptional(sliceContextPath, sliceContextRel, "Slice Context (from discussion)");
+    if (sliceCtxInline)
+        inlined.push(sliceCtxInline);
     inlined.push(await inlineFile(slicePlanPath, slicePlanRel, "Current Slice Plan"));
     // Find the blocker task summary — the completed task with blocker_discovered: true
     let blockerTaskId = "";
@@ -1454,8 +1474,13 @@ export async function buildReassessRoadmapPrompt(mid, midTitle, completedSliceId
     const roadmapRel = relMilestoneFile(base, mid, "ROADMAP");
     const summaryPath = resolveSliceFile(base, mid, completedSliceId, "SUMMARY");
     const summaryRel = relSliceFile(base, mid, completedSliceId, "SUMMARY");
+    const sliceContextPath = resolveSliceFile(base, mid, completedSliceId, "CONTEXT");
+    const sliceContextRel = relSliceFile(base, mid, completedSliceId, "CONTEXT");
     const inlined = [];
     inlined.push(await inlineFile(roadmapPath, roadmapRel, "Current Roadmap"));
+    const sliceCtxInline = await inlineFileOptional(sliceContextPath, sliceContextRel, "Slice Context (from discussion)");
+    if (sliceCtxInline)
+        inlined.push(sliceCtxInline);
     inlined.push(await inlineFile(summaryPath, summaryRel, `${completedSliceId} Summary`));
     if (inlineLevel !== "minimal") {
         const projectInline = await inlineProjectFromDb(base);