npm - @opengsd/gsd-pi - Versions diffs - 1.1.1-dev.a5a2de8 → 1.1.1-dev.b2556262 - Mend

@opengsd/gsd-pi 1.1.1-dev.a5a2de8 → 1.1.1-dev.b2556262

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (325) hide show

package/dist/resources/extensions/gsd/auto/session.js CHANGED Viewed

@@ -17,6 +17,7 @@
  * auto-session-encapsulation.test.ts enforce that auto.ts has no module-level
  * `let` or `var` declarations.
  */
+import { SourceObservationStore, supportsSourceObservationsForUnit } from "../source-observations.js";
 import { resolveWorktreeProjectRoot } from "../worktree-root.js";
 import { normalizeRealPath } from "../paths.js";
 // ─── Constants ───────────────────────────────────────────────────────────────
@@ -76,6 +77,7 @@ export class AutoSession {
     currentTurnId = null;
     currentUnitRouting = null;
     currentMilestoneId = null;
+    sourceObservations = new SourceObservationStore();
     // ── Model state ──────────────────────────────────────────────────────────
     autoModeStartModel = null;
     /** Explicit /gsd model pin captured at bootstrap (session-scoped policy override). */
@@ -94,6 +96,7 @@ export class AutoSession {
     verificationRetryCount = new Map();
     verificationRetryFailureHashes = new Map();
     exhaustedVerificationUnits = new Set();
+    zeroToolRetryCount = new Map();
     pausedSessionFile = null;
     pausedUnitType = null;
     pausedUnitId = null;
@@ -195,6 +198,23 @@ export class AutoSession {
         this.unitDispatchCount.clear();
         this.unitLifetimeDispatches.clear();
     }
+    setCurrentUnit(unit) {
+        this.currentUnit = unit;
+        if (!supportsSourceObservationsForUnit(unit.type)) {
+            this.sourceObservations.clear();
+            return;
+        }
+        this.sourceObservations.beginUnit({
+            unitType: unit.type,
+            unitId: unit.id,
+            startedAt: unit.startedAt,
+            basePath: unit.workspaceRoot ?? this.basePath,
+        });
+    }
+    clearCurrentUnit() {
+        this.currentUnit = null;
+        this.sourceObservations.clear();
+    }
     get lockBasePath() {
         return resolveWorktreeProjectRoot(this.basePath, this.originalBasePath);
     }
@@ -245,7 +265,7 @@ export class AutoSession {
         this.unitLifetimeDispatches.clear();
         this.unitRecoveryCount.clear();
         // Unit
-        this.currentUnit = null;
+        this.clearCurrentUnit();
         this.currentTraceId = null;
         this.currentTurnId = null;
         this.currentUnitRouting = null;
@@ -266,6 +286,7 @@ export class AutoSession {
         this.verificationRetryCount.clear();
         this.verificationRetryFailureHashes.clear();
         this.exhaustedVerificationUnits.clear();
+        this.zeroToolRetryCount.clear();
         this.pausedSessionFile = null;
         this.pausedUnitType = null;
         this.pausedUnitId = null;

package/dist/resources/extensions/gsd/auto/workflow-kernel.js CHANGED Viewed

@@ -71,6 +71,7 @@ const COMPLETE_AND_BREAK_REASONS = [
     "verification-pause",
     "finalize-pre-timeout",
     "finalize-post-timeout",
+    "milestone-complete",
 ];
 function isCompleteAndBreakReason(reason) {
     return COMPLETE_AND_BREAK_REASONS.includes(reason);

package/dist/resources/extensions/gsd/auto-dispatch.js CHANGED Viewed

@@ -1,7 +1,8 @@
 // Project/App: gsd-pi
 // File Purpose: Declarative auto-mode dispatch rules and dispatch resolver.
 import { loadFile, extractUatType, loadActiveOverrides } from "./files.js";
-import { isDbAvailable, getMilestoneSlices, getPendingGates, markAllGatesOmitted, getMilestone, insertAssessment, setSliceSketchFlag, transaction, getAssessment } from "./gsd-db.js";
+import { getUatBrowserToolSupportError } from "./uat-policy.js";
+import { isDbAvailable, getMilestoneSlices, getPendingGatesForTurn, markPendingGatesOmittedForTurn, getMilestone, insertArtifact, insertAssessment, setSliceSketchFlag, transaction, getAssessment, } from "./gsd-db.js";
 import { isClosedStatus } from "./status-guards.js";
 import { extractVerdict, isAcceptableUatVerdict } from "./verdict-parser.js";
 import { gsdRoot, resolveGsdPathContract, resolveMilestoneFile, resolveMilestonePath, resolveSliceFile, resolveSlicePath, resolveTaskFile, relTaskFile, relSliceFile, buildMilestoneFileName, buildSliceFileName, buildTaskFileName, gsdProjectionRoot, } from "./paths.js";
@@ -12,7 +13,7 @@ import { logWarning, logError } from "./workflow-logger.js";
 import { dirname, join } from "node:path";
 import { hasImplementationArtifacts } from "./milestone-implementation-evidence.js";
 import { buildDiscussMilestonePrompt, buildDiscussProjectPrompt, buildDiscussRequirementsPrompt, buildResearchDecisionPrompt, buildResearchProjectPrompt, buildResearchMilestonePrompt, buildPlanMilestonePrompt, buildResearchSlicePrompt, buildPlanSlicePrompt, buildRefineSlicePrompt, buildExecuteTaskPrompt, buildCompleteSlicePrompt, buildCompleteMilestonePrompt, buildValidateMilestonePrompt, buildReplanSlicePrompt, buildRunUatPrompt, buildReassessRoadmapPrompt, buildRewriteDocsPrompt, buildReactiveExecutePrompt, buildGateEvaluatePrompt, buildParallelResearchSlicesPrompt, checkNeedsReassessment, checkNeedsRunUat, } from "./auto-prompts.js";
-import { resolveModelWithFallbacksForUnit } from "./preferences-models.js";
+import { resolveModelWithFallbacksForUnit, resolveThinkingLevelForUnit } from "./preferences-models.js";
 import { resolveUokFlags } from "./uok/flags.js";
 import { selectReactiveDispatchBatch } from "./uok/execution-graph.js";
 import { getMilestonePipelineVariant } from "./milestone-scope-classifier.js";
@@ -21,6 +22,7 @@ import { isAutoActive } from "./auto.js";
 import { markDepthVerified } from "./bootstrap/write-gate.js";
 import { ensureWorkflowPreferencesCaptured } from "./planning-depth.js";
 import { MILESTONE_ID_RE } from "./milestone-ids.js";
+import { getWorkflowTransportSupportError, getRequiredWorkflowToolsForAutoUnit, } from "./workflow-mcp.js";
 import { PROJECT_RESEARCH_INFLIGHT_MARKER, } from "./project-research-policy.js";
 import { isWorkflowPrefsCaptured, resolveDeepProjectSetupState, } from "./deep-project-setup-policy.js";
 import { annotateBackgroundable } from "./delegation-policy.js";
@@ -249,6 +251,43 @@ export function findMissingSummaries(basePath, mid) {
     })
         .map(s => s.id);
 }
+function stringField(row, key) {
+    const value = row?.[key];
+    return typeof value === "string" ? value : null;
+}
+function stripGsdPrefix(path) {
+    return path.startsWith(".gsd/") ? path.slice(".gsd/".length) : path;
+}
+function persistSliceAssessmentBackfill(assessmentRelPath, mid, sliceId, content) {
+    const artifactPath = stripGsdPrefix(assessmentRelPath);
+    const existingAssessment = getAssessment(assessmentRelPath) ??
+        getAssessment(artifactPath);
+    const scope = stringField(existingAssessment, "scope") ?? "run-uat";
+    const status = stringField(existingAssessment, "status") ??
+        extractVerdict(content)?.toLowerCase() ??
+        "unknown";
+    transaction(() => {
+        insertArtifact({
+            path: artifactPath,
+            artifact_type: "ASSESSMENT",
+            milestone_id: mid,
+            slice_id: sliceId,
+            task_id: null,
+            full_content: content,
+        });
+        if (!getAssessment(assessmentRelPath)) {
+            insertAssessment({
+                path: assessmentRelPath,
+                milestoneId: mid,
+                sliceId,
+                taskId: null,
+                status,
+                scope,
+                fullContent: content,
+            });
+        }
+    });
+}
 function backfillMissingAssessmentsFromSummaries(basePath, mid) {
     const completedSliceIds = new Set();
     if (isDbAvailable()) {
@@ -280,11 +319,12 @@ function backfillMissingAssessmentsFromSummaries(basePath, mid) {
         const slicePath = resolveSlicePath(basePath, mid, sliceId);
         const assessmentPath = resolveSliceFile(basePath, mid, sliceId, "ASSESSMENT")
             ?? (slicePath ? join(slicePath, buildSliceFileName(sliceId, "ASSESSMENT")) : null);
-        if (!assessmentPath || existsSync(assessmentPath))
+        if (!assessmentPath)
             continue;
-        mkdirSync(dirname(assessmentPath), { recursive: true });
+        const assessmentRelPath = relSliceFile(basePath, mid, sliceId, "ASSESSMENT");
         const now = new Date().toISOString();
-        const content = [
+        const didCreateAssessment = !existsSync(assessmentPath);
+        const content = didCreateAssessment ? [
             "---",
             `sliceId: ${sliceId}`,
             "verdict: PASS",
@@ -296,8 +336,19 @@ function backfillMissingAssessmentsFromSummaries(basePath, mid) {
             "Auto-created during milestone validation because this completed slice had a SUMMARY but no ASSESSMENT artifact.",
             "No additional reassessment changes were detected in this backfill step.",
             "",
-        ].join("\n");
-        writeFileSync(assessmentPath, content, "utf-8");
+        ].join("\n") : readFileSync(assessmentPath, "utf-8");
+        if (isDbAvailable()) {
+            try {
+                persistSliceAssessmentBackfill(assessmentRelPath, mid, sliceId, content);
+            }
+            catch (err) {
+                logWarning("dispatch", `failed to backfill assessment DB rows for ${mid}/${sliceId}: ${err.message}`);
+            }
+        }
+        if (didCreateAssessment) {
+            mkdirSync(dirname(assessmentPath), { recursive: true });
+            writeFileSync(assessmentPath, content, "utf-8");
+        }
     }
 }
 // ─── Rewrite Circuit Breaker ──────────────────────────────────────────────
@@ -467,11 +518,27 @@ export const DISPATCH_RULES = [
     },
     {
         name: "run-uat (post-completion)",
-        match: async ({ state, mid, basePath, prefs }) => {
+        match: async ({ state, mid, basePath, prefs, sessionProvider, sessionAuthMode, activeTools, sessionBaseUrl }) => {
             const needsRunUat = await checkNeedsRunUat(basePath, mid, state, prefs);
             if (!needsRunUat)
                 return null;
             const { sliceId, uatType } = needsRunUat;
+            // Transport preflight: verify required MCP tools are actually connected
+            // before consuming a retry attempt. Fixes tool-starved sessions burning
+            // all MAX_UAT_ATTEMPTS before stopping (#477).
+            const transportError = getWorkflowTransportSupportError(sessionProvider, getRequiredWorkflowToolsForAutoUnit("run-uat"), { projectRoot: basePath, surface: "auto-mode", unitType: "run-uat", authMode: sessionAuthMode, baseUrl: sessionBaseUrl, activeTools });
+            if (transportError) {
+                return { action: "stop", reason: transportError, level: "warning" };
+            }
+            const browserToolError = getUatBrowserToolSupportError({
+                uatType,
+                activeTools,
+                milestoneId: mid,
+                sliceId,
+            });
+            if (browserToolError) {
+                return { action: "stop", reason: browserToolError, level: "warning" };
+            }
             // Cap run-uat dispatch attempts to prevent infinite replay (#3624).
             // Check before incrementing so an exhausted counter cannot create a
             // no-progress skip loop that starves later dispatch rules.
@@ -877,7 +944,7 @@ export const DISPATCH_RULES = [
                 action: "dispatch",
                 unitType: "research-slice",
                 unitId: `${mid}/parallel-research`,
-                prompt: await buildParallelResearchSlicesPrompt(mid, midTitle, researchReadySlices, basePath, resolveModelWithFallbacksForUnit("subagent")?.primary),
+                prompt: await buildParallelResearchSlicesPrompt(mid, midTitle, researchReadySlices, basePath, resolveModelWithFallbacksForUnit("subagent")?.primary, resolveThinkingLevelForUnit("subagent")),
             };
         },
     },
@@ -1029,17 +1096,17 @@ export const DISPATCH_RULES = [
             // Gate evaluation is opt-in via preferences
             const gateConfig = prefs?.gate_evaluation;
             if (!gateConfig?.enabled) {
-                markAllGatesOmitted(mid, sid);
+                markPendingGatesOmittedForTurn(mid, sid, "gate-evaluate");
                 return { action: "skip" };
             }
-            const pending = getPendingGates(mid, sid, "slice");
+            const pending = getPendingGatesForTurn(mid, sid, "gate-evaluate");
             if (pending.length === 0)
                 return { action: "skip" };
             return {
                 action: "dispatch",
                 unitType: "gate-evaluate",
                 unitId: `${mid}/${sid}/gates+${pending.map(g => g.gate_id).join(",")}`,
-                prompt: await buildGateEvaluatePrompt(mid, midTitle, sid, sTitle, basePath, resolveModelWithFallbacksForUnit("subagent")?.primary),
+                prompt: await buildGateEvaluatePrompt(mid, midTitle, sid, sTitle, basePath, resolveModelWithFallbacksForUnit("subagent")?.primary, resolveThinkingLevelForUnit("subagent")),
             };
         },
     },
@@ -1082,6 +1149,7 @@ export const DISPATCH_RULES = [
                 return null;
             const maxParallel = reactiveConfig?.max_parallel ?? 2;
             const subagentModel = reactiveConfig?.subagent_model ?? resolveModelWithFallbacksForUnit("subagent")?.primary;
+            const subagentThinking = resolveThinkingLevelForUnit("subagent");
             // Default-on safety threshold: only activate reactive dispatch when at
             // least N tasks are ready. Users who explicitly enabled reactive_execution
             // keep the legacy threshold of 2 (matches the prior "any parallelism is
@@ -1139,7 +1207,7 @@ export const DISPATCH_RULES = [
                     action: "dispatch",
                     unitType: "reactive-execute",
                     unitId: `${mid}/${sid}/reactive+${batchSuffix}`,
-                    prompt: await buildReactiveExecutePrompt(mid, midTitle, sid, sTitle, selected, basePath, subagentModel, { sessionContextWindow, modelRegistry, sessionProvider }),
+                    prompt: await buildReactiveExecutePrompt(mid, midTitle, sid, sTitle, selected, basePath, subagentModel, { sessionContextWindow, modelRegistry, sessionProvider, subagentThinking }),
                 };
             }
             catch (err) {

package/dist/resources/extensions/gsd/auto-model-selection.js CHANGED Viewed

@@ -3,7 +3,8 @@
  * Handles complexity-based routing, model resolution across providers,
  * and fallback chains.
  */
-import { resolveModelWithFallbacksForUnit, resolveDynamicRoutingConfig } from "./preferences.js";
+import { clampThinkingLevel } from "@gsd/pi-ai";
+import { resolveModelWithFallbacksForUnit, resolveThinkingLevelForUnit, resolveDynamicRoutingConfig } from "./preferences.js";
 import { classifyUnitComplexity, extractTaskMetadata, tierLabel } from "./complexity-classifier.js";
 import { resolveModelForComplexity, escalateTier, getEligibleModels, loadCapabilityOverrides, adjustToolSet } from "./model-router.js";
 import { getLedger, getProjectTotals } from "./metrics.js";
@@ -63,6 +64,32 @@ const TOOL_BASELINE = new WeakMap();
 export function clearToolBaseline(pi) {
     TOOL_BASELINE.delete(pi);
 }
+/**
+ * Return the union of the pre-dispatch baseline tool set and the current live
+ * active tools, or just the live tools when no baseline has been recorded yet.
+ *
+ * Use this instead of `pi.getActiveTools()` anywhere you need the full tool
+ * surface for a preflight/routing check that runs BEFORE `selectAndApplyModel`
+ * restores the baseline — e.g. in `runDispatch` and `decideNextUnit`.
+ *
+ * The union is intentional:
+ *   - Baseline covers tools that a prior unit's per-provider narrowing (hook
+ *     overrides, Groq 128-tool cap, etc.) has removed from the live set.
+ *     Those tools will be restored by `selectAndApplyModel` before dispatch, so
+ *     dropping them from the preflight check would be a false negative.
+ *   - Live set covers tools connected after the baseline was first captured
+ *     (e.g. MCP servers attached mid-session or after a paused resume).
+ *     Without the live merge, a stale baseline permanently hides newly
+ *     connected MCP tools and prevents transport-preflight from clearing on
+ *     resume (#477 follow-up).
+ */
+export function getToolBaselineSnapshot(pi) {
+    const live = typeof pi.getActiveTools === "function" ? pi.getActiveTools() : [];
+    const baseline = TOOL_BASELINE.get(pi);
+    if (baseline === undefined)
+        return live;
+    return [...new Set([...baseline, ...live])];
+}
 /**
  * Models eligible for the pre-dispatch policy gate. Prefer registry-available
  * models; when that list is empty (common after worktree resume before registry
@@ -202,10 +229,88 @@ function restoreToolBaseline(pi) {
         pi.setActiveTools([...baseline]);
     }
 }
-function reapplyThinkingLevel(pi, level) {
+/**
+ * Apply the desired reasoning effort for the just-selected model, clamping to
+ * what the model actually supports (ADR-026). An unsupported level is never
+ * sent to the provider — it is clamped via `clampThinkingLevel` and the
+ * mismatch is surfaced once per (model, requested-level). Returns the level
+ * actually applied so callers can record it.
+ */
+export function applyThinkingLevelForModel(pi, desired, model, ctx) {
+    if (!desired)
+        return desired;
+    // Capability-clamp only when we have a bare string level AND the model
+    // advertises reasoning capability (`reasoning` is always present on real
+    // registry models). Richer host snapshot shapes (e.g. `{ effort: "high" }`)
+    // and partial model objects are applied verbatim — we never coerce an unknown
+    // shape into a string or guess capability we can't see.
+    if (typeof desired === "string" && model != null && typeof model === "object" && "reasoning" in model) {
+        const clamped = clampThinkingLevel(model, desired);
+        pi.setThinkingLevel(clamped);
+        if (clamped !== desired) {
+            const key = `${model.provider}/${model.id}:${desired}`;
+            if (!_warnedThinkingClamp.has(key)) {
+                _warnedThinkingClamp.add(key);
+                ctx.ui.notify(`Thinking level '${desired}' not supported by ${model.provider}/${model.id}; using '${clamped}'.`, "warning");
+            }
+        }
+        return clamped;
+    }
+    pi.setThinkingLevel(desired);
+    return desired;
+}
+/** Warn-once guard for capability clamps, keyed by `provider/id:requested`. */
+const _warnedThinkingClamp = new Set();
+/** Warn-once guard for the execute-task floor punch-through advisory. */
+let _warnedExecuteTaskFloorBypass = false;
+/**
+ * Ascending severity order for reasoning levels (matches @gsd/pi-agent-core
+ * `ThinkingLevel`). Used only for floor comparisons below.
+ */
+const THINKING_LEVEL_ORDER = [
+    "off",
+    "minimal",
+    "low",
+    "medium",
+    "high",
+    "xhigh",
+];
+/**
+ * Minimum reasoning level for code-writing units.
+ *
+ * `execute-task` is the only unit that edits source. With a low/minimal
+ * thinking level a model does not plan its edits and compensates by re-reading
+ * the same files dozens of times per task (measured: index.html read ~49× in a
+ * single task on a minimal-thinking model) and shelling out to `nl`/`sed` to
+ * re-locate code after every edit invalidates its line numbers. Flooring the
+ * level for this unit type removes that read/bash thrash. Planning, research,
+ * and lifecycle units are unaffected.
+ */
+const EXECUTE_TASK_MIN_THINKING_LEVEL = "medium";
+function thinkingLevelRank(level) {
+    const idx = THINKING_LEVEL_ORDER.indexOf(level);
+    return idx === -1 ? 0 : idx;
+}
+/**
+ * Raise (never lower) the thinking level for code-writing units to a sane
+ * floor. Returns the input unchanged for non-`execute-task` units, when no
+ * level was captured, or when the captured level already meets the floor.
+ */
+export function floorThinkingLevelForUnit(unitType, level) {
+    if (unitType !== "execute-task")
+        return level;
     if (!level)
-        return;
-    pi.setThinkingLevel(level);
+        return level;
+    // Only act on the recognized string levels. Any other shape (e.g. a richer
+    // host snapshot object) is passed through untouched so we never coerce an
+    // unknown representation into a bare string the host can't apply.
+    if (!THINKING_LEVEL_ORDER.includes(level)) {
+        return level;
+    }
+    if (thinkingLevelRank(level) >= thinkingLevelRank(EXECUTE_TASK_MIN_THINKING_LEVEL)) {
+        return level;
+    }
+    return EXECUTE_TASK_MIN_THINKING_LEVEL;
 }
 export function resolvePreferredModelConfig(unitType, autoModeStartModel, isAutoMode = true) {
     const explicitConfig = resolveModelWithFallbacksForUnit(unitType);
@@ -256,6 +361,32 @@ sessionModelOverride,
 /** Thinking level captured at auto-mode start and re-applied after model swaps. */
 autoModeStartThinkingLevel) {
     const uokFlags = resolveUokFlags(prefs);
+    // Resolve reasoning effort for this dispatch (ADR-026). An explicit per-phase
+    // thinking config (inline `models.<phase>.thinking` or the separate `thinking`
+    // block) expresses hard user intent: it bypasses the execute-task floor and is
+    // honored verbatim, then capability-clamped per model at apply time below.
+    // With no explicit level, fall back to the auto-start session level and raise
+    // the code-writing floor — preserving prior behavior exactly. Recomputed per
+    // dispatch so neither the floor nor a phase override leaks to other units.
+    const explicitThinkingLevel = resolveThinkingLevelForUnit(unitType);
+    const desiredThinkingLevel = explicitThinkingLevel
+        ?? floorThinkingLevelForUnit(unitType, autoModeStartThinkingLevel);
+    if (explicitThinkingLevel) {
+        if (unitType === "execute-task" &&
+            thinkingLevelRank(explicitThinkingLevel) < thinkingLevelRank(EXECUTE_TASK_MIN_THINKING_LEVEL) &&
+            !_warnedExecuteTaskFloorBypass) {
+            _warnedExecuteTaskFloorBypass = true;
+            ctx.ui.notify(`Explicit execution thinking '${explicitThinkingLevel}' is below the measured execute-task floor ` +
+                `(${EXECUTE_TASK_MIN_THINKING_LEVEL}); honoring it as configured. Low reasoning on code edits can ` +
+                `cause repeated file re-reads.`, "warning");
+        }
+    }
+    else if (verbose &&
+        desiredThinkingLevel &&
+        desiredThinkingLevel !== autoModeStartThinkingLevel) {
+        ctx.ui.notify(`Thinking level raised to ${desiredThinkingLevel} for ${unitType} (was ${autoModeStartThinkingLevel ?? "unset"})`, "info");
+    }
+    let appliedThinkingLevel = null;
     const effectiveSessionModelOverride = sessionModelOverride === undefined
         ? getSessionModelOverride(ctx.sessionManager.getSessionId())
         : (sessionModelOverride ?? undefined);
@@ -533,7 +664,7 @@ autoModeStartThinkingLevel) {
             const ok = await pi.setModel(model, { persist: false });
             if (ok) {
                 appliedModel = model;
-                reapplyThinkingLevel(pi, autoModeStartThinkingLevel);
+                appliedThinkingLevel = applyThinkingLevelForModel(pi, desiredThinkingLevel, model, ctx);
                 // ADR-005: Adjust active tool set for the selected model's provider capabilities.
                 // Hard-filter incompatible tools, then let extensions override via adjust_tool_set hook.
                 const activeToolNames = pi.getActiveTools();
@@ -591,7 +722,7 @@ autoModeStartThinkingLevel) {
                 if (!ok)
                     continue;
                 appliedModel = model;
-                reapplyThinkingLevel(pi, autoModeStartThinkingLevel);
+                appliedThinkingLevel = applyThinkingLevelForModel(pi, desiredThinkingLevel, model, ctx);
                 attemptedPolicyEligible = true;
                 if (verbose) {
                     ctx.ui.notify(`Model policy: configured model unavailable; using ${model.provider}/${model.id}`, "info");
@@ -621,18 +752,32 @@ autoModeStartThinkingLevel) {
                         const fallbackOk = await pi.setModel(byId, { persist: false });
                         if (fallbackOk) {
                             appliedModel = byId;
-                            reapplyThinkingLevel(pi, autoModeStartThinkingLevel);
+                            appliedThinkingLevel = applyThinkingLevelForModel(pi, desiredThinkingLevel, byId, ctx);
                         }
                     }
                 }
                 else {
                     appliedModel = startModel;
-                    reapplyThinkingLevel(pi, autoModeStartThinkingLevel);
+                    appliedThinkingLevel = applyThinkingLevelForModel(pi, desiredThinkingLevel, startModel, ctx);
                 }
             }
         }
     }
-    return { routing, appliedModel };
+    // If no model branch applied a thinking level (e.g. interactive guided-flow
+    // with a `thinking:` block but no per-phase model and no start model), still
+    // honor an explicitly configured phase thinking level against the current
+    // session model. Only the explicit path runs here — the floored session
+    // default is intentionally left untouched so no-config interactive runs keep
+    // the user's /model thinking level. (ADR-026)
+    if (appliedThinkingLevel == null && explicitThinkingLevel && ctx.model) {
+        // Prefer the full registry model (carries reasoning capability so the level
+        // can be clamped); fall back to ctx.model. Always route through
+        // applyThinkingLevelForModel so the clamp runs whenever capability metadata
+        // exists — never a raw verbatim setThinkingLevel that bypasses it (ADR-026).
+        const current = resolveModelId(`${ctx.model.provider}/${ctx.model.id}`, ctx.modelRegistry?.getAvailable?.() ?? [], ctx.model.provider) ?? ctx.model;
+        appliedThinkingLevel = applyThinkingLevelForModel(pi, explicitThinkingLevel, current, ctx);
+    }
+    return { routing, appliedModel, appliedThinkingLevel };
 }
 /**
  * Resolve a model ID string to a model object from the available models list.

package/dist/resources/extensions/gsd/auto-post-unit.js CHANGED Viewed

@@ -1280,6 +1280,7 @@ export async function postUnitPreVerification(pctx, opts) {
                 logError("engine", "triage resolution failed", { error: err.message });
             }
         }
+        let blockingContentViolation = null;
         // ── Safety harness: post-unit validation ──
         try {
             const { loadEffectiveGSDPreferences } = await import("./preferences.js");
@@ -1399,8 +1400,15 @@ export async function postUnitPreVerification(pctx, opts) {
                         const artifactPath = resolveArtifactForContent(s.currentUnit.type, s.currentUnit.id, s.basePath);
                         const contentViolations = validateContent(s.currentUnit.type, artifactPath);
                         for (const v of contentViolations) {
-                            logWarning("safety", `content: ${v.reason}`);
-                            ctx.ui.notify(`Content validation: ${v.reason}`, "warning");
+                            if (v.severity === "error") {
+                                blockingContentViolation ??= v.reason;
+                                logError("safety", `content: ${v.reason}`);
+                                ctx.ui.notify(`Content validation: ${v.reason}`, "error");
+                            }
+                            else {
+                                logWarning("safety", `content: ${v.reason}`);
+                                ctx.ui.notify(`Content validation: ${v.reason}`, "warning");
+                            }
                         }
                     }
                     catch (e) {
@@ -1573,6 +1581,15 @@ export async function postUnitPreVerification(pctx, opts) {
                     return "continue";
                 }
             }
+            if (blockingContentViolation && triggerArtifactVerified) {
+                triggerArtifactVerified = false;
+                debugLog("postUnit", {
+                    phase: "content-validation-blocked-artifact",
+                    unitType: s.currentUnit.type,
+                    unitId: s.currentUnit.id,
+                    reason: blockingContentViolation,
+                });
+            }
             // When artifact verification fails for a unit type that has a known expected
             // artifact, ask the caller to retry so it re-dispatches with failure context
             // instead of blindly re-dispatching the same unit (#1571).

package/dist/resources/extensions/gsd/auto-prompts.js CHANGED Viewed

@@ -8,7 +8,7 @@
  * utility.
  */
 import { loadFile, parseContinue, parseSummary, loadActiveOverrides, formatOverridesSection } from "./files.js";
-import { hasVerdict, getUatType, extractVerdict } from "./verdict-parser.js";
+import { hasVerdict, extractVerdict } from "./verdict-parser.js";
 import { loadPrompt, inlineTemplate } from "./prompt-loader.js";
 import { resolveMilestoneFile, resolveSliceFile, resolveSlicePath, resolveTasksDir, resolveTaskFiles, resolveTaskFile, relMilestoneFile, relSliceFile, relSlicePath, relMilestonePath, resolveGsdRootFile, relGsdRootFile, resolveRuntimeFile, } from "./paths.js";
 import { resolveInlineLevel, loadEffectiveGSDPreferences } from "./preferences.js";
@@ -27,11 +27,11 @@ import { logWarning } from "./workflow-logger.js";
 import { inlineGraphSubgraph } from "./graph-context.js";
 import { buildExtractionStepsBlock } from "./commands-extract-learnings.js";
 import { classifyProject } from "./detection.js";
-import { hasBrowserRequiredText } from "./browser-evidence.js";
 import { debugLog } from "./debug-logger.js";
 import { buildSkillActivationBlock, buildSkillDiscoveryVars } from "./skill-activation.js";
 import { findMilestoneIds } from "./milestone-ids.js";
-import { buildRunUatResultPresentation, RUN_UAT_TOOL_PRESENTATION_PLAN_ID } from "./tool-presentation-plan.js";
+import { buildRunUatPresentationForType, RUN_UAT_TOOL_PRESENTATION_PLAN_ID } from "./tool-presentation-plan.js";
+import { resolveEffectiveUatType, shouldDispatchUatForContent } from "./uat-policy.js";
 export { buildSkillActivationBlock, buildSkillDiscoveryVars };
 // ─── Preamble Cap ─────────────────────────────────────────────────────────────
 /**
@@ -228,17 +228,6 @@ function prependContextModeToBlock(unitType, base, block, renderMode = "standalo
         return contextMode;
     return `${contextMode}\n\n${block}`;
 }
-function resolveEffectiveUatType(content) {
-    const uatType = getUatType(content);
-    if (uatType === "artifact-driven" && hasBrowserRequiredText(content)) {
-        return "browser-executable";
-    }
-    return uatType;
-}
-function shouldDispatchUatForContent(content, prefs) {
-    const uatType = resolveEffectiveUatType(content);
-    return !!prefs?.uat_dispatch || uatType !== "artifact-driven" || hasBrowserRequiredText(content);
-}
 // ─── Executor Constraints ─────────────────────────────────────────────────────
 /**
  * Format executor context constraints for injection into the plan-slice prompt.
@@ -2940,7 +2929,7 @@ export async function buildRunUatPrompt(mid, sliceId, uatPath, uatContent, base)
     emitPromptContextTelemetry("run-uat", contextTelemetry, inlinedContext);
     const uatResultPath = join(base, relSliceFile(base, mid, sliceId, "ASSESSMENT"));
     const uatType = resolveEffectiveUatType(uatContent);
-    const canonicalPresentation = JSON.stringify(buildRunUatResultPresentation(), null, 2);
+    const canonicalPresentation = JSON.stringify(buildRunUatPresentationForType(uatType), null, 2);
     return loadPrompt("run-uat", {
         workingDirectory: base,
         milestoneId: mid,
@@ -3077,7 +3066,23 @@ export async function buildReassessRoadmapPrompt(mid, midTitle, completedSliceId
     });
 }
 // ─── Reactive Execute Prompt ──────────────────────────────────────────────
-export async function buildReactiveExecutePrompt(mid, midTitle, sid, sTitle, readyTaskIds, base, subagentModel, opts) {
+/**
+ * Build the `with model: "…" and thinking: "…"` suffix injected into a prompt
+ * that instructs the coordinator how to dispatch a `subagent` call. Either or
+ * both may be absent (ADR-026 / #508).
+ */
+function subagentCallSuffix(model, thinking) {
+    const parts = [];
+    if (model)
+        parts.push(`model: "${model}"`);
+    if (thinking)
+        parts.push(`thinking: "${thinking}"`);
+    return parts.length > 0 ? ` with ${parts.join(" and ")}` : "";
+}
+export async function buildReactiveExecutePrompt(mid, midTitle, sid, sTitle, readyTaskIds, base, subagentModel,
+// Reasoning effort travels inside opts here (not as a positional param) so
+// existing positional `opts` callers don't shift (#508).
+opts) {
     const { loadSliceTaskIO, deriveTaskGraph, graphMetrics } = await import("./reactive-graph.js");
     // Build graph for context
     const taskIO = await loadSliceTaskIO(base, mid, sid);
@@ -3151,7 +3156,7 @@ export async function buildReactiveExecutePrompt(mid, midTitle, sid, sTitle, rea
             "",
             `When done, say: "Task ${tid} complete."`,
         ].join("\n");
-        const modelSuffix = subagentModel ? ` with model: "${subagentModel}"` : "";
+        const modelSuffix = subagentCallSuffix(subagentModel, opts?.subagentThinking);
         subagentSections.push([
             `### ${tid}: ${tTitle}`,
             "",
@@ -3217,10 +3222,10 @@ function renderGatesToCloseBlock(gates, opts) {
     }
     return lines.join("\n").trimEnd();
 }
-export async function buildParallelResearchSlicesPrompt(mid, midTitle, slices, basePath, subagentModel) {
+export async function buildParallelResearchSlicesPrompt(mid, midTitle, slices, basePath, subagentModel, subagentThinking) {
     // Build individual research-slice prompts for each slice
     const subagentSections = [];
-    const modelSuffix = subagentModel ? ` with model: "${subagentModel}"` : "";
+    const modelSuffix = subagentCallSuffix(subagentModel, subagentThinking);
     for (const slice of slices) {
         const slicePrompt = await buildResearchSlicePrompt(mid, midTitle, slice.id, slice.title, basePath, { contextModeRenderMode: "nested" });
         subagentSections.push([
@@ -3242,7 +3247,7 @@ export async function buildParallelResearchSlicesPrompt(mid, midTitle, slices, b
         subagentPrompts: subagentSections.join("\n\n---\n\n"),
     });
 }
-export async function buildGateEvaluatePrompt(mid, midTitle, sid, sTitle, base, subagentModel) {
+export async function buildGateEvaluatePrompt(mid, midTitle, sid, sTitle, base, subagentModel, subagentThinking) {
     // Pull only the gates this turn actually owns (Q3/Q4). Filter via the
     // registry so that scope:"slice" gates owned by other turns (Q8) can't
     // leak into this prompt and can't block dispatch via silent skip.
@@ -3291,7 +3296,7 @@ export async function buildGateEvaluatePrompt(mid, midTitle, sid, sTitle, base,
             "- `rationale`: one-sentence justification",
             "- `findings`: detailed markdown findings (or empty if omitted)",
         ].join("\n");
-        const modelSuffix = subagentModel ? ` with model: "${subagentModel}"` : "";
+        const modelSuffix = subagentCallSuffix(subagentModel, subagentThinking);
         subagentSections.push([
             `### ${def.id}: ${def.question}`,
             "",

package/dist/resources/extensions/gsd/auto-recovery.js CHANGED Viewed

@@ -14,7 +14,7 @@ import { appendEvent } from "./workflow-events.js";
 import { atomicWriteSync } from "./atomic-write.js";
 import { clearParseCache } from "./files.js";
 import { parseRoadmap as parseLegacyRoadmap, parsePlan as parseLegacyPlan } from "./parsers-legacy.js";
-import { isDbAvailable, getTask, getSlice, getSliceTasks, getPendingGates, updateTaskStatus, updateSliceStatus, insertSlice, getMilestone, getMilestoneSlices, getLatestAssessmentByScope, updateMilestoneStatus, refreshOpenDatabaseFromDisk, transaction } from "./gsd-db.js";
+import { isDbAvailable, getTask, getSlice, getSliceTasks, getPendingGatesForTurn, updateTaskStatus, updateSliceStatus, insertSlice, getMilestone, getMilestoneSlices, getLatestAssessmentByScope, updateMilestoneStatus, refreshOpenDatabaseFromDisk, transaction, } from "./gsd-db.js";
 import { isValidationTerminal } from "./state.js";
 import { getErrorMessage } from "./error-utils.js";
 import { logWarning, logError } from "./workflow-logger.js";
@@ -329,7 +329,9 @@ export function verifyExpectedArtifact(unitType, unitId, base) {
         if (gateIds.length === 0)
             return true;
         try {
-            const pending = getPendingGates(mid, sid, "slice");
+            if (!isDbAvailable())
+                return false;
+            const pending = getPendingGatesForTurn(mid, sid, "gate-evaluate");
             const pendingIds = new Set(pending.map((g) => g.gate_id));
             // All dispatched gates must no longer be pending
             for (const gid of gateIds) {