npm - @opengsd/gsd-pi - Versions diffs - 1.1.1-dev.9f86580 → 1.1.1-dev.b2556262 - Mend

@opengsd/gsd-pi 1.1.1-dev.9f86580 → 1.1.1-dev.b2556262

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (261) hide show

package/src/resources/extensions/gsd/auto-dispatch.ts CHANGED Viewed

@@ -14,15 +14,16 @@
 import type { GSDState } from "./types.js";
 import type { GSDPreferences } from "./preferences.js";
-import type { UatType } from "./files.js";
 import type { MinimalModelRegistry } from "./context-budget.js";
 import { loadFile, extractUatType, loadActiveOverrides } from "./files.js";
+import { getUatBrowserToolSupportError, type UatType } from "./uat-policy.js";
 import {
   isDbAvailable,
   getMilestoneSlices,
-  getPendingGates,
-  markAllGatesOmitted,
+  getPendingGatesForTurn,
+  markPendingGatesOmittedForTurn,
   getMilestone,
+  insertArtifact,
   insertAssessment,
   setSliceSketchFlag,
   transaction,
@@ -77,7 +78,7 @@ import {
   checkNeedsReassessment,
   checkNeedsRunUat,
 } from "./auto-prompts.js";
-import { resolveModelWithFallbacksForUnit } from "./preferences-models.js";
+import { resolveModelWithFallbacksForUnit, resolveThinkingLevelForUnit } from "./preferences-models.js";
 import { resolveUokFlags } from "./uok/flags.js";
 import { selectReactiveDispatchBatch } from "./uok/execution-graph.js";
 import { getMilestonePipelineVariant } from "./milestone-scope-classifier.js";
@@ -439,6 +440,53 @@ export function findMissingSummaries(basePath: string, mid: string): string[] {
     .map(s => s.id);
 }
+function stringField(row: Record<string, unknown> | null, key: string): string | null {
+  const value = row?.[key];
+  return typeof value === "string" ? value : null;
+}
+function stripGsdPrefix(path: string): string {
+  return path.startsWith(".gsd/") ? path.slice(".gsd/".length) : path;
+}
+function persistSliceAssessmentBackfill(
+  assessmentRelPath: string,
+  mid: string,
+  sliceId: string,
+  content: string,
+): void {
+  const artifactPath = stripGsdPrefix(assessmentRelPath);
+  const existingAssessment =
+    getAssessment(assessmentRelPath) ??
+    getAssessment(artifactPath);
+  const scope = stringField(existingAssessment, "scope") ?? "run-uat";
+  const status = stringField(existingAssessment, "status") ??
+    extractVerdict(content)?.toLowerCase() ??
+    "unknown";
+  transaction(() => {
+    insertArtifact({
+      path: artifactPath,
+      artifact_type: "ASSESSMENT",
+      milestone_id: mid,
+      slice_id: sliceId,
+      task_id: null,
+      full_content: content,
+    });
+    if (!getAssessment(assessmentRelPath)) {
+      insertAssessment({
+        path: assessmentRelPath,
+        milestoneId: mid,
+        sliceId,
+        taskId: null,
+        status,
+        scope,
+        fullContent: content,
+      });
+    }
+  });
+}
 function backfillMissingAssessmentsFromSummaries(basePath: string, mid: string): void {
   const completedSliceIds = new Set<string>();
   if (isDbAvailable()) {
@@ -467,11 +515,12 @@ function backfillMissingAssessmentsFromSummaries(basePath: string, mid: string):
     const slicePath = resolveSlicePath(basePath, mid, sliceId);
     const assessmentPath = resolveSliceFile(basePath, mid, sliceId, "ASSESSMENT")
       ?? (slicePath ? join(slicePath, buildSliceFileName(sliceId, "ASSESSMENT")) : null);
-    if (!assessmentPath || existsSync(assessmentPath)) continue;
+    if (!assessmentPath) continue;
-    mkdirSync(dirname(assessmentPath), { recursive: true });
+    const assessmentRelPath = relSliceFile(basePath, mid, sliceId, "ASSESSMENT");
     const now = new Date().toISOString();
-    const content = [
+    const didCreateAssessment = !existsSync(assessmentPath);
+    const content = didCreateAssessment ? [
       "---",
       `sliceId: ${sliceId}`,
       "verdict: PASS",
@@ -483,8 +532,20 @@ function backfillMissingAssessmentsFromSummaries(basePath: string, mid: string):
       "Auto-created during milestone validation because this completed slice had a SUMMARY but no ASSESSMENT artifact.",
       "No additional reassessment changes were detected in this backfill step.",
       "",
-    ].join("\n");
-    writeFileSync(assessmentPath, content, "utf-8");
+    ].join("\n") : readFileSync(assessmentPath, "utf-8");
+    if (isDbAvailable()) {
+      try {
+        persistSliceAssessmentBackfill(assessmentRelPath, mid, sliceId, content);
+      } catch (err) {
+        logWarning("dispatch", `failed to backfill assessment DB rows for ${mid}/${sliceId}: ${(err as Error).message}`);
+      }
+    }
+    if (didCreateAssessment) {
+      mkdirSync(dirname(assessmentPath), { recursive: true });
+      writeFileSync(assessmentPath, content, "utf-8");
+    }
   }
 }
@@ -689,6 +750,15 @@ export const DISPATCH_RULES: DispatchRule[] = [
       if (transportError) {
         return { action: "stop" as const, reason: transportError, level: "warning" as const };
       }
+      const browserToolError = getUatBrowserToolSupportError({
+        uatType,
+        activeTools,
+        milestoneId: mid,
+        sliceId,
+      });
+      if (browserToolError) {
+        return { action: "stop" as const, reason: browserToolError, level: "warning" as const };
+      }
       // Cap run-uat dispatch attempts to prevent infinite replay (#3624).
       // Check before incrementing so an exhausted counter cannot create a
@@ -1095,6 +1165,7 @@ export const DISPATCH_RULES: DispatchRule[] = [
           researchReadySlices,
           basePath,
           resolveModelWithFallbacksForUnit("subagent")?.primary,
+          resolveThinkingLevelForUnit("subagent"),
         ),
       };
     },
@@ -1264,11 +1335,11 @@ export const DISPATCH_RULES: DispatchRule[] = [
       // Gate evaluation is opt-in via preferences
       const gateConfig = prefs?.gate_evaluation;
       if (!gateConfig?.enabled) {
-        markAllGatesOmitted(mid, sid);
+        markPendingGatesOmittedForTurn(mid, sid, "gate-evaluate");
         return { action: "skip" };
       }
-      const pending = getPendingGates(mid, sid, "slice");
+      const pending = getPendingGatesForTurn(mid, sid, "gate-evaluate");
       if (pending.length === 0) return { action: "skip" };
       return {
@@ -1282,6 +1353,7 @@ export const DISPATCH_RULES: DispatchRule[] = [
           sTitle,
           basePath,
           resolveModelWithFallbacksForUnit("subagent")?.primary,
+          resolveThinkingLevelForUnit("subagent"),
         ),
       };
     },
@@ -1327,6 +1399,7 @@ export const DISPATCH_RULES: DispatchRule[] = [
       if (resolveSliceFile(basePath, mid, sid, "REACTIVE-BLOCKER")) return null;
       const maxParallel = reactiveConfig?.max_parallel ?? 2;
       const subagentModel = reactiveConfig?.subagent_model ?? resolveModelWithFallbacksForUnit("subagent")?.primary;
+      const subagentThinking = resolveThinkingLevelForUnit("subagent");
       // Default-on safety threshold: only activate reactive dispatch when at
       // least N tasks are ready. Users who explicitly enabled reactive_execution
       // keep the legacy threshold of 2 (matches the prior "any parallelism is
@@ -1413,7 +1486,7 @@ export const DISPATCH_RULES: DispatchRule[] = [
             selected,
             basePath,
             subagentModel,
-            { sessionContextWindow, modelRegistry, sessionProvider },
+            { sessionContextWindow, modelRegistry, sessionProvider, subagentThinking },
           ),
         };
       } catch (err) {

package/src/resources/extensions/gsd/auto-model-selection.ts CHANGED Viewed

@@ -4,11 +4,11 @@
  * and fallback chains.
  */
-import type { Api, Model } from "@gsd/pi-ai";
-import { getProviderCapabilities } from "@gsd/pi-ai";
+import type { Api, Model, ModelThinkingLevel } from "@gsd/pi-ai";
+import { getProviderCapabilities, clampThinkingLevel } from "@gsd/pi-ai";
 import type { ExtensionAPI, ExtensionContext } from "@gsd/pi-coding-agent";
 import type { GSDPreferences } from "./preferences.js";
-import { resolveModelWithFallbacksForUnit, resolveDynamicRoutingConfig } from "./preferences.js";
+import { resolveModelWithFallbacksForUnit, resolveThinkingLevelForUnit, resolveDynamicRoutingConfig } from "./preferences.js";
 import type { ComplexityTier } from "./complexity-classifier.js";
 import { classifyUnitComplexity, extractTaskMetadata, tierLabel } from "./complexity-classifier.js";
 import { resolveModelForComplexity, escalateTier, getEligibleModels, loadCapabilityOverrides, adjustToolSet, filterToolsForProvider } from "./model-router.js";
@@ -57,6 +57,12 @@ export interface ModelSelectionResult {
   routing: { tier: string; modelDowngraded: boolean } | null;
   /** Concrete model applied before dispatch so it can be restored after a fresh session. */
   appliedModel: Model<Api> | null;
+  /**
+   * Reasoning effort applied for this dispatch after per-phase resolution,
+   * floor, and capability clamping (ADR-026). Null when no level was applied
+   * (e.g. no start level captured). Surfaced for metrics/telemetry.
+   */
+  appliedThinkingLevel?: ReturnType<ExtensionAPI["getThinkingLevel"]> | null;
 }
 export interface PreferredModelConfig {
@@ -278,12 +284,103 @@ function restoreToolBaseline(pi: ExtensionAPI): void {
   }
 }
-function reapplyThinkingLevel(
+/**
+ * Apply the desired reasoning effort for the just-selected model, clamping to
+ * what the model actually supports (ADR-026). An unsupported level is never
+ * sent to the provider — it is clamped via `clampThinkingLevel` and the
+ * mismatch is surfaced once per (model, requested-level). Returns the level
+ * actually applied so callers can record it.
+ */
+export function applyThinkingLevelForModel(
   pi: ExtensionAPI,
+  desired: ReturnType<ExtensionAPI["getThinkingLevel"]> | null | undefined,
+  model: Model<Api>,
+  ctx: ExtensionContext,
+): ReturnType<ExtensionAPI["getThinkingLevel"]> | null | undefined {
+  if (!desired) return desired;
+  // Capability-clamp only when we have a bare string level AND the model
+  // advertises reasoning capability (`reasoning` is always present on real
+  // registry models). Richer host snapshot shapes (e.g. `{ effort: "high" }`)
+  // and partial model objects are applied verbatim — we never coerce an unknown
+  // shape into a string or guess capability we can't see.
+  if (typeof desired === "string" && model != null && typeof model === "object" && "reasoning" in model) {
+    const clamped = clampThinkingLevel(model, desired as ModelThinkingLevel) as ReturnType<ExtensionAPI["getThinkingLevel"]>;
+    pi.setThinkingLevel(clamped);
+    if (clamped !== desired) {
+      const key = `${model.provider}/${model.id}:${desired}`;
+      if (!_warnedThinkingClamp.has(key)) {
+        _warnedThinkingClamp.add(key);
+        ctx.ui.notify(
+          `Thinking level '${desired}' not supported by ${model.provider}/${model.id}; using '${clamped}'.`,
+          "warning",
+        );
+      }
+    }
+    return clamped;
+  }
+  pi.setThinkingLevel(desired);
+  return desired;
+}
+/** Warn-once guard for capability clamps, keyed by `provider/id:requested`. */
+const _warnedThinkingClamp = new Set<string>();
+/** Warn-once guard for the execute-task floor punch-through advisory. */
+let _warnedExecuteTaskFloorBypass = false;
+type EffectiveThinkingLevel = ReturnType<ExtensionAPI["getThinkingLevel"]>;
+/**
+ * Ascending severity order for reasoning levels (matches @gsd/pi-agent-core
+ * `ThinkingLevel`). Used only for floor comparisons below.
+ */
+const THINKING_LEVEL_ORDER: readonly EffectiveThinkingLevel[] = [
+  "off",
+  "minimal",
+  "low",
+  "medium",
+  "high",
+  "xhigh",
+] as EffectiveThinkingLevel[];
+/**
+ * Minimum reasoning level for code-writing units.
+ *
+ * `execute-task` is the only unit that edits source. With a low/minimal
+ * thinking level a model does not plan its edits and compensates by re-reading
+ * the same files dozens of times per task (measured: index.html read ~49× in a
+ * single task on a minimal-thinking model) and shelling out to `nl`/`sed` to
+ * re-locate code after every edit invalidates its line numbers. Flooring the
+ * level for this unit type removes that read/bash thrash. Planning, research,
+ * and lifecycle units are unaffected.
+ */
+const EXECUTE_TASK_MIN_THINKING_LEVEL: EffectiveThinkingLevel = "medium";
+function thinkingLevelRank(level: EffectiveThinkingLevel): number {
+  const idx = THINKING_LEVEL_ORDER.indexOf(level);
+  return idx === -1 ? 0 : idx;
+}
+/**
+ * Raise (never lower) the thinking level for code-writing units to a sane
+ * floor. Returns the input unchanged for non-`execute-task` units, when no
+ * level was captured, or when the captured level already meets the floor.
+ */
+export function floorThinkingLevelForUnit(
+  unitType: string,
   level: ReturnType<ExtensionAPI["getThinkingLevel"]> | null | undefined,
-): void {
-  if (!level) return;
-  pi.setThinkingLevel(level);
+): ReturnType<ExtensionAPI["getThinkingLevel"]> | null | undefined {
+  if (unitType !== "execute-task") return level;
+  if (!level) return level;
+  // Only act on the recognized string levels. Any other shape (e.g. a richer
+  // host snapshot object) is passed through untouched so we never coerce an
+  // unknown representation into a bare string the host can't apply.
+  if (!THINKING_LEVEL_ORDER.includes(level as EffectiveThinkingLevel)) {
+    return level;
+  }
+  if (thinkingLevelRank(level as EffectiveThinkingLevel) >= thinkingLevelRank(EXECUTE_TASK_MIN_THINKING_LEVEL)) {
+    return level;
+  }
+  return EXECUTE_TASK_MIN_THINKING_LEVEL;
 }
 export function resolvePreferredModelConfig(
@@ -354,6 +451,42 @@ export async function selectAndApplyModel(
   autoModeStartThinkingLevel?: ReturnType<ExtensionAPI["getThinkingLevel"]> | null,
 ): Promise<ModelSelectionResult> {
   const uokFlags = resolveUokFlags(prefs);
+  // Resolve reasoning effort for this dispatch (ADR-026). An explicit per-phase
+  // thinking config (inline `models.<phase>.thinking` or the separate `thinking`
+  // block) expresses hard user intent: it bypasses the execute-task floor and is
+  // honored verbatim, then capability-clamped per model at apply time below.
+  // With no explicit level, fall back to the auto-start session level and raise
+  // the code-writing floor — preserving prior behavior exactly. Recomputed per
+  // dispatch so neither the floor nor a phase override leaks to other units.
+  const explicitThinkingLevel =
+    resolveThinkingLevelForUnit(unitType) as ReturnType<ExtensionAPI["getThinkingLevel"]> | undefined;
+  const desiredThinkingLevel = explicitThinkingLevel
+    ?? floorThinkingLevelForUnit(unitType, autoModeStartThinkingLevel);
+  if (explicitThinkingLevel) {
+    if (
+      unitType === "execute-task" &&
+      thinkingLevelRank(explicitThinkingLevel) < thinkingLevelRank(EXECUTE_TASK_MIN_THINKING_LEVEL) &&
+      !_warnedExecuteTaskFloorBypass
+    ) {
+      _warnedExecuteTaskFloorBypass = true;
+      ctx.ui.notify(
+        `Explicit execution thinking '${explicitThinkingLevel}' is below the measured execute-task floor ` +
+        `(${EXECUTE_TASK_MIN_THINKING_LEVEL}); honoring it as configured. Low reasoning on code edits can ` +
+        `cause repeated file re-reads.`,
+        "warning",
+      );
+    }
+  } else if (
+    verbose &&
+    desiredThinkingLevel &&
+    desiredThinkingLevel !== autoModeStartThinkingLevel
+  ) {
+    ctx.ui.notify(
+      `Thinking level raised to ${desiredThinkingLevel} for ${unitType} (was ${autoModeStartThinkingLevel ?? "unset"})`,
+      "info",
+    );
+  }
+  let appliedThinkingLevel: ReturnType<ExtensionAPI["getThinkingLevel"]> | null | undefined = null;
   const effectiveSessionModelOverride = sessionModelOverride === undefined
     ? getSessionModelOverride(ctx.sessionManager.getSessionId())
     : (sessionModelOverride ?? undefined);
@@ -699,7 +832,7 @@ export async function selectAndApplyModel(
       const ok = await pi.setModel(model, { persist: false });
       if (ok) {
         appliedModel = model;
-        reapplyThinkingLevel(pi, autoModeStartThinkingLevel);
+        appliedThinkingLevel = applyThinkingLevelForModel(pi, desiredThinkingLevel, model, ctx);
         // ADR-005: Adjust active tool set for the selected model's provider capabilities.
         // Hard-filter incompatible tools, then let extensions override via adjust_tool_set hook.
@@ -759,7 +892,7 @@ export async function selectAndApplyModel(
         const ok = await pi.setModel(model, { persist: false });
         if (!ok) continue;
         appliedModel = model;
-        reapplyThinkingLevel(pi, autoModeStartThinkingLevel);
+        appliedThinkingLevel = applyThinkingLevelForModel(pi, desiredThinkingLevel, model, ctx);
         attemptedPolicyEligible = true;
         if (verbose) {
           ctx.ui.notify(
@@ -805,18 +938,37 @@ export async function selectAndApplyModel(
             const fallbackOk = await pi.setModel(byId, { persist: false });
             if (fallbackOk) {
               appliedModel = byId;
-              reapplyThinkingLevel(pi, autoModeStartThinkingLevel);
+              appliedThinkingLevel = applyThinkingLevelForModel(pi, desiredThinkingLevel, byId, ctx);
             }
           }
         } else {
           appliedModel = startModel;
-          reapplyThinkingLevel(pi, autoModeStartThinkingLevel);
+          appliedThinkingLevel = applyThinkingLevelForModel(pi, desiredThinkingLevel, startModel, ctx);
         }
       }
     }
   }
-  return { routing, appliedModel };
+  // If no model branch applied a thinking level (e.g. interactive guided-flow
+  // with a `thinking:` block but no per-phase model and no start model), still
+  // honor an explicitly configured phase thinking level against the current
+  // session model. Only the explicit path runs here — the floored session
+  // default is intentionally left untouched so no-config interactive runs keep
+  // the user's /model thinking level. (ADR-026)
+  if (appliedThinkingLevel == null && explicitThinkingLevel && ctx.model) {
+    // Prefer the full registry model (carries reasoning capability so the level
+    // can be clamped); fall back to ctx.model. Always route through
+    // applyThinkingLevelForModel so the clamp runs whenever capability metadata
+    // exists — never a raw verbatim setThinkingLevel that bypasses it (ADR-026).
+    const current = resolveModelId(
+      `${ctx.model.provider}/${ctx.model.id}`,
+      ctx.modelRegistry?.getAvailable?.() ?? [],
+      ctx.model.provider,
+    ) ?? (ctx.model as Model<Api>);
+    appliedThinkingLevel = applyThinkingLevelForModel(pi, explicitThinkingLevel, current, ctx);
+  }
+  return { routing, appliedModel, appliedThinkingLevel };
 }
 /**

package/src/resources/extensions/gsd/auto-post-unit.ts CHANGED Viewed

@@ -1521,6 +1521,8 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
       }
     }
+    let blockingContentViolation: string | null = null;
     // ── Safety harness: post-unit validation ──
     try {
       const { loadEffectiveGSDPreferences } = await import("./preferences.js");
@@ -1668,8 +1670,14 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
             const artifactPath = resolveArtifactForContent(s.currentUnit.type, s.currentUnit.id, s.basePath);
             const contentViolations = validateContent(s.currentUnit.type, artifactPath);
             for (const v of contentViolations) {
-              logWarning("safety", `content: ${v.reason}`);
-              ctx.ui.notify(`Content validation: ${v.reason}`, "warning");
+              if (v.severity === "error") {
+                blockingContentViolation ??= v.reason;
+                logError("safety", `content: ${v.reason}`);
+                ctx.ui.notify(`Content validation: ${v.reason}`, "error");
+              } else {
+                logWarning("safety", `content: ${v.reason}`);
+                ctx.ui.notify(`Content validation: ${v.reason}`, "warning");
+              }
             }
           } catch (e) {
             debugLog("postUnit", { phase: "safety-content-validation", error: String(e) });
@@ -1868,6 +1876,16 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
         }
       }
+      if (blockingContentViolation && triggerArtifactVerified) {
+        triggerArtifactVerified = false;
+        debugLog("postUnit", {
+          phase: "content-validation-blocked-artifact",
+          unitType: s.currentUnit.type,
+          unitId: s.currentUnit.id,
+          reason: blockingContentViolation,
+        });
+      }
       // When artifact verification fails for a unit type that has a known expected
       // artifact, ask the caller to retry so it re-dispatches with failure context
       // instead of blindly re-dispatching the same unit (#1571).

package/src/resources/extensions/gsd/auto-prompts.ts CHANGED Viewed

@@ -10,8 +10,8 @@
  */
 import { loadFile, parseContinue, parseSummary, loadActiveOverrides, formatOverridesSection, parseTaskPlanFile } from "./files.js";
-import type { Override, UatType } from "./files.js";
-import { hasVerdict, getUatType, extractVerdict } from "./verdict-parser.js";
+import type { Override } from "./files.js";
+import { hasVerdict, extractVerdict } from "./verdict-parser.js";
 import { loadPrompt, inlineTemplate } from "./prompt-loader.js";
 import {
   resolveMilestoneFile, resolveSliceFile, resolveSlicePath,
@@ -42,11 +42,11 @@ import { logWarning } from "./workflow-logger.js";
 import { inlineGraphSubgraph } from "./graph-context.js";
 import { buildExtractionStepsBlock } from "./commands-extract-learnings.js";
 import { classifyProject, type ProjectClassification } from "./detection.js";
-import { hasBrowserRequiredText } from "./browser-evidence.js";
 import { debugLog } from "./debug-logger.js";
 import { buildSkillActivationBlock, buildSkillDiscoveryVars } from "./skill-activation.js";
 import { findMilestoneIds } from "./milestone-ids.js";
 import { buildRunUatPresentationForType, RUN_UAT_TOOL_PRESENTATION_PLAN_ID } from "./tool-presentation-plan.js";
+import { resolveEffectiveUatType, shouldDispatchUatForContent, type UatType } from "./uat-policy.js";
 export { buildSkillActivationBlock, buildSkillDiscoveryVars };
@@ -286,19 +286,6 @@ function prependContextModeToBlock(
   return `${contextMode}\n\n${block}`;
 }
-function resolveEffectiveUatType(content: string): UatType {
-  const uatType = getUatType(content);
-  if (uatType === "artifact-driven" && hasBrowserRequiredText(content)) {
-    return "browser-executable";
-  }
-  return uatType;
-}
-function shouldDispatchUatForContent(content: string, prefs: GSDPreferences | undefined): boolean {
-  const uatType = resolveEffectiveUatType(content);
-  return !!prefs?.uat_dispatch || uatType !== "artifact-driven" || hasBrowserRequiredText(content);
-}
 // ─── Executor Constraints ─────────────────────────────────────────────────────
 /**
@@ -3543,11 +3530,25 @@ export async function buildReassessRoadmapPrompt(
 // ─── Reactive Execute Prompt ──────────────────────────────────────────────
+/**
+ * Build the `with model: "…" and thinking: "…"` suffix injected into a prompt
+ * that instructs the coordinator how to dispatch a `subagent` call. Either or
+ * both may be absent (ADR-026 / #508).
+ */
+function subagentCallSuffix(model?: string, thinking?: string): string {
+  const parts: string[] = [];
+  if (model) parts.push(`model: "${model}"`);
+  if (thinking) parts.push(`thinking: "${thinking}"`);
+  return parts.length > 0 ? ` with ${parts.join(" and ")}` : "";
+}
 export async function buildReactiveExecutePrompt(
   mid: string, midTitle: string, sid: string, sTitle: string,
   readyTaskIds: string[], base: string,
   subagentModel?: string,
-  opts?: { sessionContextWindow?: number; modelRegistry?: MinimalModelRegistry; sessionProvider?: string },
+  // Reasoning effort travels inside opts here (not as a positional param) so
+  // existing positional `opts` callers don't shift (#508).
+  opts?: { sessionContextWindow?: number; modelRegistry?: MinimalModelRegistry; sessionProvider?: string; subagentThinking?: string },
 ): Promise<string> {
   const { loadSliceTaskIO, deriveTaskGraph, graphMetrics } = await import("./reactive-graph.js");
@@ -3640,7 +3641,7 @@ export async function buildReactiveExecutePrompt(
       `When done, say: "Task ${tid} complete."`,
     ].join("\n");
-    const modelSuffix = subagentModel ? ` with model: "${subagentModel}"` : "";
+    const modelSuffix = subagentCallSuffix(subagentModel, opts?.subagentThinking);
     subagentSections.push([
       `### ${tid}: ${tTitle}`,
       "",
@@ -3724,10 +3725,11 @@ export async function buildParallelResearchSlicesPrompt(
   slices: Array<{ id: string; title: string }>,
   basePath: string,
   subagentModel?: string,
+  subagentThinking?: string,
 ): Promise<string> {
   // Build individual research-slice prompts for each slice
   const subagentSections: string[] = [];
-  const modelSuffix = subagentModel ? ` with model: "${subagentModel}"` : "";
+  const modelSuffix = subagentCallSuffix(subagentModel, subagentThinking);
   for (const slice of slices) {
     const slicePrompt = await buildResearchSlicePrompt(mid, midTitle, slice.id, slice.title, basePath, { contextModeRenderMode: "nested" });
     subagentSections.push([
@@ -3755,6 +3757,7 @@ export async function buildGateEvaluatePrompt(
   mid: string, midTitle: string, sid: string, sTitle: string,
   base: string,
   subagentModel?: string,
+  subagentThinking?: string,
 ): Promise<string> {
   // Pull only the gates this turn actually owns (Q3/Q4). Filter via the
   // registry so that scope:"slice" gates owned by other turns (Q8) can't
@@ -3811,7 +3814,7 @@ export async function buildGateEvaluatePrompt(
       "- `findings`: detailed markdown findings (or empty if omitted)",
     ].join("\n");
-    const modelSuffix = subagentModel ? ` with model: "${subagentModel}"` : "";
+    const modelSuffix = subagentCallSuffix(subagentModel, subagentThinking);
     subagentSections.push([
       `### ${def.id}: ${def.question}`,
       "",

package/src/resources/extensions/gsd/auto-recovery.ts CHANGED Viewed

@@ -15,7 +15,25 @@ import { appendEvent } from "./workflow-events.js";
 import { atomicWriteSync } from "./atomic-write.js";
 import { clearParseCache } from "./files.js";
 import { parseRoadmap as parseLegacyRoadmap, parsePlan as parseLegacyPlan } from "./parsers-legacy.js";
-import { isDbAvailable, getTask, getSlice, getSliceTasks, getPendingGates, updateTaskStatus, updateSliceStatus, insertSlice, getMilestone, getMilestoneSlices, getLatestAssessmentByScope, updateMilestoneStatus, refreshOpenDatabaseFromDisk, getCompletedMilestoneTaskFileHints, getMilestoneCommitAttributionShas, recordMilestoneCommitAttribution, transaction } from "./gsd-db.js";
+import {
+  isDbAvailable,
+  getTask,
+  getSlice,
+  getSliceTasks,
+  getPendingGatesForTurn,
+  updateTaskStatus,
+  updateSliceStatus,
+  insertSlice,
+  getMilestone,
+  getMilestoneSlices,
+  getLatestAssessmentByScope,
+  updateMilestoneStatus,
+  refreshOpenDatabaseFromDisk,
+  getCompletedMilestoneTaskFileHints,
+  getMilestoneCommitAttributionShas,
+  recordMilestoneCommitAttribution,
+  transaction,
+} from "./gsd-db.js";
 import { isValidationTerminal } from "./state.js";
 import { getErrorMessage } from "./error-utils.js";
 import { logWarning, logError } from "./workflow-logger.js";
@@ -390,8 +408,9 @@ export function verifyExpectedArtifact(
     if (gateIds.length === 0) return true;
     try {
-      const pending = getPendingGates(mid, sid, "slice");
-      const pendingIds = new Set(pending.map((g: any) => g.gate_id));
+      if (!isDbAvailable()) return false;
+      const pending = getPendingGatesForTurn(mid, sid, "gate-evaluate");
+      const pendingIds = new Set<string>(pending.map((g) => g.gate_id));
       // All dispatched gates must no longer be pending
       for (const gid of gateIds) {
         if (pendingIds.has(gid)) return false;

package/src/resources/extensions/gsd/auto-runtime-state.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 // GSD auto-mode runtime state
 import { AutoSession } from "./auto/session.js";
 import type { CurrentUnit } from "./auto/session.js";
+import type { SourceObservationStore } from "./source-observations.js";
 import {
   isDeterministicPolicyError,
   isQueuedUserMessageSkip,
@@ -65,3 +66,7 @@ export function clearToolInvocationError(): void {
   if (!autoSession.active) return;
   autoSession.lastToolInvocationError = null;
 }
+export function getSourceObservationStore(): SourceObservationStore {
+  return autoSession.sourceObservations;
+}

package/src/resources/extensions/gsd/auto-start.ts CHANGED Viewed

@@ -1557,7 +1557,7 @@ export async function bootstrapAutoSession(
     s.autoStartTime = Date.now();
     s.resourceVersionOnStart = readResourceVersion();
     s.pendingQuickTasks = [];
-    s.currentUnit = null;
+    s.clearCurrentUnit();
     s.currentMilestoneId ??=
       strandedRecoveryAction?.milestoneId ??
       (deepProjectStagePending ? null : state.activeMilestone?.id ?? null);