npm - @opengsd/gsd-pi - Versions diffs - 1.1.1-dev.3ea310e → 1.1.1-dev.74e8dd1 - Mend

@opengsd/gsd-pi 1.1.1-dev.3ea310e → 1.1.1-dev.74e8dd1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (177) hide show

package/src/resources/extensions/gsd/bootstrap/db-tools.ts CHANGED Viewed

@@ -413,6 +413,92 @@ export function registerDbTools(pi: ExtensionAPI): void {
   pi.registerTool(summarySaveTool);
   registerAlias(pi, summarySaveTool, "gsd_save_summary", "gsd_summary_save");
+  // ─── gsd_uat_result_save ─────────────────────────────────────────────────
+  const uatResultSaveExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
+    const { executeUatResultSave } = await loadWorkflowExecutors();
+    return executeUatResultSave(params, resolveWorkflowToolBasePath(_ctx, params));
+  };
+  const uatEvidenceRef = Type.Object({
+    kind: StringEnum(["gsd_uat_exec", "gsd_exec", "screenshot", "log", "url", "browser"], { description: "Evidence kind" }),
+    ref: Type.String({ description: "Evidence ID, approved .gsd path, or URL" }),
+    note: Type.Optional(Type.String({ description: "Short evidence note" })),
+  });
+  const uatCheck = Type.Object({
+    id: Type.String({ description: "Stable check ID from the UAT spec" }),
+    description: Type.String({ description: "Check description" }),
+    mode: StringEnum(["artifact", "runtime", "browser", "human-follow-up"], { description: "Evidence mode" }),
+    result: StringEnum(["PASS", "FAIL", "NEEDS-HUMAN"], { description: "Check result" }),
+    evidence: Type.Optional(Type.Array(uatEvidenceRef, { description: "Objective evidence references" })),
+    notes: Type.Optional(Type.String({ description: "Observed result, failure notes, or human instruction" })),
+    nonAutomatable: Type.Optional(Type.Boolean({ description: "True when the check is explicitly non-automatable" })),
+  });
+  const toolPresentationBlock = Type.Object({
+    surface: StringEnum(["provider-tools", "claude-code-sdk", "mcp", "hybrid"], { description: "Tool presentation surface" }),
+    model: Type.Optional(Type.Object({
+      provider: Type.Optional(Type.String()),
+      api: Type.Optional(Type.String()),
+      id: Type.Optional(Type.String()),
+    })),
+    presentedTools: Type.Array(Type.String(), { description: "Tool names actually presented to the model" }),
+    blockedTools: Type.Array(Type.Object({
+      name: Type.String(),
+      reason: Type.String(),
+    }), { description: "Tool names blocked from the model with reasons" }),
+    aliases: Type.Optional(Type.Array(Type.Object({
+      requested: Type.String(),
+      canonical: Type.String(),
+    }))),
+    fallbackToolsUsed: Type.Optional(Type.Array(Type.String())),
+    toolPresentationPlanId: Type.Optional(Type.String()),
+    notes: Type.Optional(Type.String()),
+  });
+  const uatResultSaveTool = {
+    name: "gsd_uat_result_save",
+    label: "Save UAT Result",
+    description:
+      "Save a structured UAT result for a slice. Validates evidence, writes the ASSESSMENT artifact, " +
+      "records attempt history, and saves the aggregate UAT gate result.",
+    promptSnippet: "Save structured UAT checks, evidence, verdict, and tool-presentation proof",
+    promptGuidelines: [
+      "Call gsd_uat_result_save once after all UAT checks have been executed.",
+      "Every PASS or FAIL check must cite objective evidence, preferably a gsd_uat_exec evidence ID.",
+      "Include the presented and blocked tool set in presentation so tool timing is auditable.",
+      "Do not use raw gsd_summary_save as a substitute for UAT results.",
+    ],
+    parameters: Type.Object({
+      milestoneId: Type.String({ description: "Milestone ID (e.g. M001)" }),
+      sliceId: Type.String({ description: "Slice ID (e.g. S01)" }),
+      uatType: StringEnum(["artifact-driven", "browser-executable", "runtime-executable", "live-runtime", "mixed", "human-experience"], { description: "Declared UAT mode" }),
+      verdict: StringEnum(["PASS", "FAIL", "PARTIAL"], { description: "Overall UAT verdict" }),
+      checks: Type.Array(uatCheck, { description: "Structured check results" }),
+      presentation: toolPresentationBlock,
+      notes: Type.Optional(Type.String({ description: "Overall verdict rationale" })),
+      attempt: Type.Optional(Type.String({ description: "Attempt number or auto" })),
+      previousAttemptId: Type.Optional(Type.String({ description: "Prior attempt ID, when retrying" })),
+    }),
+    execute: uatResultSaveExecute,
+    renderCall(args: any, theme: any) {
+      let text = theme.fg("toolTitle", theme.bold("uat_result_save "));
+      text += theme.fg("accent", `${args.milestoneId ?? "?"}/${args.sliceId ?? "?"}`);
+      if (args.verdict) text += theme.fg("dim", ` → ${args.verdict}`);
+      return new Text(text, 0, 0);
+    },
+    renderResult(result: any, _options: any, theme: any) {
+      const d = readDetails(result);
+      if (result.isError || d?.error) {
+        return new Text(theme.fg("error", formatToolErrorText(result, d)), 0, 0);
+      }
+      return new Text(theme.fg("success", `UAT ${d?.sliceId ?? ""}: ${d?.verdict ?? "saved"}`), 0, 0);
+    },
+  };
+  pi.registerTool(uatResultSaveTool);
   // ─── gsd_milestone_generate_id (formerly gsd_generate_milestone_id) ────
   const milestoneGenerateIdExecute = async (_toolCallId: string, _params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {

package/src/resources/extensions/gsd/bootstrap/exec-tools.ts CHANGED Viewed

@@ -25,6 +25,57 @@ async function loadContextModePreferences(baseDir: string) {
 }
 export function registerExecTools(pi: ExtensionAPI): void {
+  pi.registerTool({
+    name: "gsd_uat_exec",
+    label: "UAT Exec",
+    description:
+      "Run a UAT-scoped bash/node/python check with milestone/slice/check metadata. " +
+      "Uses the same capped .gsd/exec evidence store as gsd_exec, but rejects commands that mutate dependencies, git state, credentials, or destructive files.",
+    promptSnippet: "Run one UAT check and save typed evidence under .gsd/exec",
+    promptGuidelines: [
+      "Use gsd_uat_exec for each automated UAT check.",
+      "Every PASS/FAIL check saved by gsd_uat_result_save must reference objective evidence from this tool or another approved GSD evidence path.",
+      "Do not install packages, mutate git state, edit source files, or dump credentials during UAT.",
+    ],
+    parameters: Type.Object({
+      milestoneId: Type.String({ description: "Milestone ID (e.g. M001)" }),
+      sliceId: Type.String({ description: "Slice ID (e.g. S01)" }),
+      checkId: Type.String({ description: "Stable check ID from the UAT spec (e.g. UAT-01)" }),
+      intent: Type.String({
+        description:
+          "UAT command intent. Use one canonical value: uat-artifact-check, uat-runtime-check, " +
+          "uat-browser-check, uat-service-start, or uat-log-inspection. Short aliases such as artifact, " +
+          "runtime, browser, service-start, and log-inspection are accepted.",
+      }),
+      runtime: Type.Optional(
+        Type.String({
+          description:
+            "Optional interpreter. Defaults to bash. Supported: bash, node, python; sh/shell, js/nodejs, and py/python3 aliases are accepted.",
+        }),
+      ),
+      script: Type.Optional(Type.String({ description: "Script body. Keep output small (log the finding, not the data)." })),
+      command: Type.Optional(Type.String({ description: "Alias for script; defaults to bash when runtime is omitted." })),
+      cmd: Type.Optional(Type.String({ description: "Short alias for script." })),
+      code: Type.Optional(Type.String({ description: "Alias for script, useful for node/python snippets." })),
+      expected: Type.Optional(Type.String({ description: "Expected outcome for this UAT check." })),
+      timeout_ms: Type.Optional(
+        Type.Number({
+          description: "Per-invocation timeout (ms). Capped at 600000. Default from preferences.",
+          minimum: 1_000,
+          maximum: 600_000,
+        }),
+      ),
+    }),
+    async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
+      const { executeUatExec } = await import("../tools/exec-tool.js");
+      const baseDir = resolveCtxCwd(_ctx);
+      return executeUatExec(params as Parameters<typeof executeUatExec>[0], {
+        baseDir,
+        preferences: await loadContextModePreferences(baseDir),
+      });
+    },
+  });
   pi.registerTool({
     name: "gsd_exec",
     label: "Exec (Sandboxed)",

package/src/resources/extensions/gsd/bootstrap/register-hooks.ts CHANGED Viewed

@@ -36,7 +36,9 @@ import { resolveSkillManifest } from "../skill-manifest.js";
 import { applyUnitSkillVisibility, unitHasSkillManifest } from "../skill-scope.js";
 import { getGuidedUnitContext } from "../guided-unit-context.js";
 import { registerPlanMilestoneSchemaRecovery } from "./plan-milestone-schema-recovery.js";
-import { AUTO_UNIT_SCOPED_TOOLS, isWorkflowAliasTool } from "../auto-unit-tool-scope.js";
+import { AUTO_UNIT_SCOPED_TOOLS, RUN_UAT_BROWSER_TOOL_NAMES, isWorkflowAliasTool } from "../auto-unit-tool-scope.js";
+import { filterToolsForProvider } from "../model-router.js";
+import { RUN_UAT_WORKFLOW_TOOL_NAMES } from "../tool-presentation-plan.js";
 let approvalQuestionAbortInFlight = false;
@@ -123,6 +125,7 @@ export const MINIMAL_GSD_TOOL_NAMES = [
   "gsd_resume",
   "gsd_milestone_status",
   "gsd_checkpoint_db",
+  "gsd_plan_milestone",
   "memory_query",
   "capture_thought",
 ] as const;
@@ -226,6 +229,9 @@ export function buildMinimalAutoGsdToolSet(
   unitType: string | undefined,
   registeredToolNames: readonly string[] = activeToolNames,
 ): string[] {
+  if (unitType === "run-uat") {
+    return buildRunUatGsdToolSet(activeToolNames, registeredToolNames);
+  }
   const unitTools = unitType ? AUTO_UNIT_SCOPED_TOOLS[unitType] ?? [] : [];
   const autoBaseTools = new Set<string>(MINIMAL_AUTO_BASE_TOOL_NAMES);
   const availableBaseTools = registeredToolNames.filter((name) => autoBaseTools.has(name));
@@ -240,6 +246,17 @@ export function buildMinimalAutoGsdToolSet(
   return withPreservedShimTools([...new Set([...preserved, ...scoped])]);
 }
+export function buildRunUatGsdToolSet(
+  activeToolNames: readonly string[],
+  registeredToolNames: readonly string[] = activeToolNames,
+): string[] {
+  const scoped = resolveScopedToolNames(
+    [...activeToolNames, ...registeredToolNames],
+    [...RUN_UAT_WORKFLOW_TOOL_NAMES, "subagent", ...RUN_UAT_BROWSER_TOOL_NAMES],
+  );
+  return [...new Set(scoped)];
+}
 export function buildMinimalGsdWorkflowToolSet(
   activeToolNames: readonly string[],
   registeredToolNames: readonly string[] = activeToolNames,
@@ -1022,9 +1039,8 @@ export function registerHooks(
     if (result.block) return result;
   });
-  // ── Safety harness: evidence collection + destructive command warnings ──
+  // ── Safety harness: evidence collection + destructive command blocking ──
   pi.on("tool_call", async (event, ctx) => {
-    if (!isAutoActive()) return;
     markToolStart(event.toolCallId, event.toolName);
     safetyRecordToolCall(event.toolCallId, event.toolName, event.input as Record<string, unknown>);
@@ -1041,17 +1057,28 @@ export function registerHooks(
       }
     }
-    // Destructive command classification (warn only, never block)
+    // Destructive command classification + hard gate in all modes.
     if (isToolCallEventType("bash", event)) {
       const classification = classifyCommand(event.input.command);
       if (classification.destructive) {
+        const reason = [
+          "HARD BLOCK: destructive Bash command requires explicit human confirmation.",
+          `Detected: ${classification.labels.join(", ")}`,
+          "Run this via ask_user_questions, wait for the user's response,",
+          "then issue the command only when confirmed in the current turn.",
+        ].join(" ");
         safetyLogWarning("safety", `destructive command: ${classification.labels.join(", ")}`, {
           command: String(event.input.command).slice(0, 200),
         });
-        ctx.ui.notify(
-          `Destructive command detected: ${classification.labels.join(", ")}`,
-          "warning",
-        );
+        if (ctx) {
+          await maybePauseAutoForApprovalGate(
+            ctx,
+            pi,
+            isAutoActive(),
+            "Depth confirmation is waiting for your answer — pausing auto-mode.",
+          );
+        }
+        return { block: true, reason };
       }
     }
   });
@@ -1320,19 +1347,27 @@ export function registerHooks(
     const fullToolsRequested = isFullGsdToolSurfaceRequested();
     const dropAliases = !fullToolsRequested;
     const dropBrowser = !fullToolsRequested && !isBrowserToolSurfaceRequested();
-    const providerCompatible = compatible.filter(
-      (name) => !(dropAliases && isWorkflowAliasTool(name)) && !(dropBrowser && isBrowserTool(name)),
+    const aliasFilteredCompatible = compatible.filter(
+      (name) => !(dropAliases && isWorkflowAliasTool(name)),
+    );
+    const providerCompatible = aliasFilteredCompatible.filter(
+      (name) => !(dropBrowser && isBrowserTool(name)),
     );
     const surfaceReduced = providerCompatible.length !== compatible.length;
     if (fullToolsRequested) {
       return surfaceReduced ? { toolNames: providerCompatible } : undefined;
     }
     const registeredToolNames = resolveRegisteredToolNames(pi, event.activeToolNames);
+    const compatibleRegisteredToolNames = filterToolsForProvider(
+      registeredToolNames,
+      event.selectedModelApi,
+      event.selectedModelProvider,
+    ).compatible.filter((name) => !(dropAliases && isWorkflowAliasTool(name)));
     const guidedUnit = getGuidedUnitContext();
     const requestScoped = buildRequestScopedGsdToolSet(
-      providerCompatible,
+      guidedUnit?.unitType === "run-uat" ? aliasFilteredCompatible : providerCompatible,
       event.requestCustomMessages,
-      registeredToolNames,
+      guidedUnit?.unitType === "run-uat" ? compatibleRegisteredToolNames : registeredToolNames,
       guidedUnit?.unitType,
     );
     if (requestScoped) {
@@ -1342,9 +1377,11 @@ export function registerHooks(
     if (dash.active && dash.currentUnit) {
       return {
         toolNames: buildMinimalAutoGsdToolSet(
-          providerCompatible,
+          dash.currentUnit.type === "run-uat" ? aliasFilteredCompatible : providerCompatible,
           dash.currentUnit.type,
-          resolveRegisteredToolNames(pi, event.activeToolNames),
+          dash.currentUnit.type === "run-uat"
+            ? compatibleRegisteredToolNames
+            : resolveRegisteredToolNames(pi, event.activeToolNames),
         ),
       };
     }

package/src/resources/extensions/gsd/bootstrap/write-gate.ts CHANGED Viewed

@@ -679,6 +679,7 @@ const PLANNING_SUBAGENT_TOOLS = new Set(["subagent", "task"]);
  * manifests still declare per-unit subsets via ToolsPolicy.allowedSubagents.
  */
 const PLANNING_DISPATCH_AGENT_REGISTRY = {
+  mnemo: { readOnlySpecialist: true },
   scout: { readOnlySpecialist: true },
   planner: { readOnlySpecialist: true },
   reviewer: { readOnlySpecialist: true },
@@ -692,7 +693,7 @@ export const ALLOWED_PLANNING_DISPATCH_AGENTS = new Set<string>(
     .map(([agentId]) => agentId),
 );
-let warnedMissingPlanningDispatchAgentClasses = false;
+let warnedMissingControlledDispatchAgentClasses = false;
 function isReadOnlySpecialist(agentId: string): boolean {
   const metadata = PLANNING_DISPATCH_AGENT_REGISTRY[agentId as keyof typeof PLANNING_DISPATCH_AGENT_REGISTRY];
@@ -703,11 +704,20 @@ function allowedPlanningDispatchAgentsList(): string {
   return [...ALLOWED_PLANNING_DISPATCH_AGENTS].join(", ");
 }
-function warnMissingPlanningDispatchAgentClasses(unitType: string, mode: string, toolName: string): void {
-  if (warnedMissingPlanningDispatchAgentClasses) return;
-  warnedMissingPlanningDispatchAgentClasses = true;
+function allowsControlledSubagentDispatch(
+  policy: ToolsPolicy,
+): policy is ToolsPolicy & { readonly allowedSubagents: readonly string[] } {
+  return (
+    (policy.mode === "planning-dispatch" || policy.mode === "verification") &&
+    Array.isArray((policy as { readonly allowedSubagents?: unknown }).allowedSubagents)
+  );
+}
+function warnMissingControlledDispatchAgentClasses(unitType: string, mode: string, toolName: string): void {
+  if (warnedMissingControlledDispatchAgentClasses) return;
+  warnedMissingControlledDispatchAgentClasses = true;
   // TODO(#5060): Remove this migration shim once all subagent/task callers are verified to forward agent identities.
-  const message = `[write-gate] planning-dispatch: shouldBlockPlanningUnit called for tool "${toolName}" ` +
+  const message = `[write-gate] controlled-dispatch: shouldBlockPlanningUnit called for tool "${toolName}" ` +
     `on unit "${unitType}" without agentClasses - stale caller; blocking dispatch.`;
   console.warn(message);
   logWarning("intercept", message, {
@@ -777,8 +787,9 @@ function blockReason(unitType: string, mode: string, what: string): string {
  *   - "docs"       → like "planning" but also allows writes to paths
  *                    matching `allowedPathGlobs` relative to basePath.
  *   - "verification"
- *                  → allows Bash for project verification commands, but keeps
- *                    writes restricted to .gsd/ and blocks subagent dispatch.
+ *                  → allows Bash for project verification commands, keeps
+ *                    writes restricted to .gsd/, and permits subagent dispatch
+ *                    only when the manifest declares allowedSubagents.
  *
  * `pathOrCommand` is the file path for write/edit-shaped tools and the
  * shell command for bash. Other tools ignore this argument.
@@ -825,7 +836,7 @@ export function shouldBlockPlanningUnit(
   if (tool.startsWith("gsd_")) return { block: false };
   if (PLANNING_SUBAGENT_TOOLS.has(tool)) {
-    if (policy.mode === "planning-dispatch") {
+    if (allowsControlledSubagentDispatch(policy)) {
       const requested = (agentClasses ?? []).map(a => a.trim()).filter(Boolean);
       const dispatchContract = compileSubagentPermissionContract(policy);
       const allowedSubagents = dispatchContract.allowedSubagents;
@@ -834,7 +845,7 @@ export function shouldBlockPlanningUnit(
       // agent identities yet. Block and warn so stale callers surface in telemetry
       // instead of silently bypassing the gate.
       if (agentClasses === undefined) {
-        warnMissingPlanningDispatchAgentClasses(unitType, policy.mode, tool);
+        warnMissingControlledDispatchAgentClasses(unitType, policy.mode, tool);
         return {
           block: true,
           reason: blockReason(
@@ -857,7 +868,7 @@ export function shouldBlockPlanningUnit(
           reason: blockReason(
             unitType,
             policy.mode,
-            `subagent dispatch of "${globallyDisallowed}" not permitted; only read-only specialists (${allowedPlanningDispatchAgentsList()}) may be dispatched from planning-dispatch units`,
+            `subagent dispatch of "${globallyDisallowed}" not permitted; only read-only specialists (${allowedPlanningDispatchAgentsList()}) may be dispatched from ${policy.mode} units`,
           ),
         };
       }

package/src/resources/extensions/gsd/commands/handlers/core.ts CHANGED Viewed

@@ -75,7 +75,7 @@ export function showHelp(ctx: ExtensionCommandContext, args = ""): void {
     "  /gsd new-milestone  Create milestone from headless context (used by gsd headless)",
     "  /gsd new-project    Bootstrap a new project (use --deep for staged project-level discovery)",
     "  /gsd quick          Execute a quick task without full planning overhead",
-    "  /gsd dispatch       Dispatch a specific phase directly  [research|plan|execute|complete|uat|replan]",
+    "  /gsd dispatch       Dispatch a specific phase directly  [research|plan|execute|complete|validate|reassess|uat|replan]",
     "  /gsd verdict <v>    Override milestone validation verdict  [pass|needs-attention|needs-remediation] [--milestone Mxxx] [--rationale \"...\"]",
     "  /gsd parallel       Parallel milestone orchestration  [start|status|stop|pause|resume|merge|watch]",
     "  /gsd workflow       Custom workflow lifecycle  [new|run|list|validate|pause|resume]",

package/src/resources/extensions/gsd/commands-prefs-wizard.ts CHANGED Viewed

@@ -23,6 +23,9 @@ import {
 import { loadFile, saveFile, splitFrontmatter, parseFrontmatterMap } from "./files.js";
 import { runClaudeImportFlow } from "./claude-import.js";
+const DEFAULT_WIDGET_MODE = "small";
+const WIDGET_MODE_OPTIONS = [DEFAULT_WIDGET_MODE, "full", "min", "off"] as const;
 /** Extract body content after frontmatter closing delimiter, or null if none. */
 function extractBodyAfterFrontmatter(content: string): string | null {
   const closingIdx = content.indexOf("\n---", content.indexOf("---"));
@@ -1558,7 +1561,7 @@ async function configureAdvanced(ctx: ExtensionCommandContext, prefs: Record<str
     prefs.min_request_interval_ms = minRequestInterval;
   }
-  const widget = await promptEnum(ctx, "Auto-mode widget display", prefs.widget_mode, ["full", "small", "min", "off"], "full");
+  const widget = await promptEnum(ctx, "Auto-mode widget display", prefs.widget_mode, WIDGET_MODE_OPTIONS, DEFAULT_WIDGET_MODE);
   if (widget !== undefined) prefs.widget_mode = widget;
   const experimental = (prefs.experimental as Record<string, unknown> | undefined) ?? {};

package/src/resources/extensions/gsd/commands-verdict.ts CHANGED Viewed

@@ -238,7 +238,7 @@ export async function handleVerdict(
   if (effectiveVerdict === "needs-remediation") {
     ctx.ui.notify(
-      "Follow up with gsd_reassess_roadmap to add remediation slices, then re-run /gsd auto.",
+      "Follow up with /gsd dispatch reassess to add remediation slices, then re-run /gsd auto.",
       "info",
     );
   }

package/src/resources/extensions/gsd/config-overlay.ts CHANGED Viewed

@@ -23,6 +23,8 @@ import {
   resolveAutoSupervisorConfig,
 } from "./preferences.js";
+const DEFAULT_WIDGET_MODE = "small";
 // ─── Data Collection ──────────────────────────────────────────────────────
 interface ConfigSection {
@@ -160,7 +162,7 @@ function collectConfigSections(): ConfigSection[] {
   if (prefs?.service_tier) toggleRows.push({ label: "service_tier", value: prefs.service_tier });
   if (prefs?.search_provider && prefs.search_provider !== "auto") toggleRows.push({ label: "search_provider", value: prefs.search_provider });
   if (prefs?.context_selection) toggleRows.push({ label: "context_selection", value: prefs.context_selection });
-  if (prefs?.widget_mode && prefs.widget_mode !== "full") toggleRows.push({ label: "widget_mode", value: prefs.widget_mode });
+  if (prefs?.widget_mode && prefs.widget_mode !== DEFAULT_WIDGET_MODE) toggleRows.push({ label: "widget_mode", value: prefs.widget_mode });
   if (prefs?.experimental?.rtk) toggleRows.push({ label: "experimental.rtk", value: "on" });
   if (toggleRows.length > 0) sections.push({ title: "Toggles", rows: toggleRows });

package/src/resources/extensions/gsd/error-classifier.ts CHANGED Viewed

@@ -47,9 +47,10 @@ export function resetRetryState(state: RetryState): void {
 const PERMANENT_RE = /auth|unauthorized|forbidden|invalid.*key|invalid.*api|billing|quota exceeded|account/i;
 // Include provider-specific quota-window phrasing like:
 // - "You've hit your limit"
+// - "You've reached your limit"
 // - "usage limit" / "quota reached"
 // - "out of extra usage"
-const RATE_LIMIT_RE = /rate.?limit|too many requests|429|hit your limit|usage limit|out of extra usage|quota (?:reached|hit)|limit.*resets?/i;
+const RATE_LIMIT_RE = /rate.?limit|too many requests|429|(?:hit|reached) your (?:\w+ )?limit|(?:usage|session|weekly|daily|monthly|quota) limit|out of extra usage|quota (?:reached|hit)|limit.*resets?/i;
 // OpenRouter affordability-style quota errors should be treated as transient
 // so core retry logic can lower maxTokens and continue in-session.
 const AFFORDABILITY_RE = /requires more credits|can only afford|insufficient credits|not enough credits|fewer max_tokens/i;

package/src/resources/extensions/gsd/exec-sandbox.ts CHANGED Viewed

@@ -20,6 +20,8 @@ export interface ExecSandboxRequest {
   script: string;
   /** Optional purpose/label recorded in meta.json. */
   purpose?: string;
+  /** Optional structured metadata recorded in meta.json. */
+  metadata?: Record<string, unknown>;
   /** Per-invocation timeout in ms. Clamped to `clamp_timeout_ms`. */
   timeout_ms?: number;
 }
@@ -315,6 +317,7 @@ function writeMeta(
     id: result.id,
     runtime: result.runtime,
     purpose: request.purpose ?? null,
+    ...(request.metadata ? { metadata: request.metadata } : {}),
     script_chars: request.script.length,
     started_at: now.toISOString(),
     finished_at: new Date(now.getTime() + result.duration_ms).toISOString(),
@@ -328,6 +331,7 @@ function writeMeta(
     stderr_truncated: result.stderr_truncated,
     stdout_path: result.stdout_path,
     stderr_path: result.stderr_path,
+    ...(request.metadata ? { metadata: request.metadata } : {}),
   };
   writeFileSync(path, `${JSON.stringify(meta, null, 2)}\n`);
 }

package/src/resources/extensions/gsd/preferences-types.ts CHANGED Viewed

@@ -423,7 +423,7 @@ export interface GSDPreferences {
   search_provider?: "brave" | "tavily" | "ollama" | "native" | "auto";
   /** Context selection mode for file inlining. "full" inlines entire files, "smart" uses semantic chunking. Default derived from token profile. */
   context_selection?: ContextSelectionMode;
-  /** Default widget display mode for auto-mode dashboard. "full" | "small" | "min" | "off". Default: "full". */
+  /** Default widget display mode for auto-mode dashboard. "full" | "small" | "min" | "off". Default: "small". */
   widget_mode?: "full" | "small" | "min" | "off";
   /** Reactive (graph-derived parallel) task execution within slices. Disabled by default. */
   reactive_execution?: ReactiveExecutionConfig;

package/src/resources/extensions/gsd/prompts/run-uat.md CHANGED Viewed

@@ -37,7 +37,13 @@ You are the UAT runner. Execute every check defined in `{{uatPath}}` as deeply a
 Choose the lightest tool that proves the check honestly:
-- Run shell commands with `bash`
+- Run automated checks with `gsd_uat_exec`
+  - Use `uat-artifact-check` as `intent` for static file, grep, structure, or artifact checks.
+  - Use `uat-runtime-check` as `intent` for executing tests, scripts, or runtime assertions.
+  - Use `uat-browser-check` as `intent` for browser interaction or screenshot-backed UI checks.
+  - Use `uat-service-start` as `intent` only when starting or connecting to an app/service.
+  - Use `uat-log-inspection` as `intent` for checking logs or captured output files.
+  - The result-table evidence mode is separate; do not use `artifact`, `runtime`, or `human-follow-up` as `intent`.
 - Run `grep` / `rg` checks against files
 - Run `node` / other script invocations
 - Read files and verify their contents
@@ -48,7 +54,7 @@ Choose the lightest tool that proves the check honestly:
 For each check, record:
 - The check description (from the UAT file)
 - The evidence mode used: `artifact`, `runtime`, or `human-follow-up`
-- The command or action taken
+- The command or action taken, including the `gsd_uat_exec` evidence ID for automated checks
 - The actual result observed
 - `PASS`, `FAIL`, or `NEEDS-HUMAN`
@@ -57,7 +63,7 @@ After running all checks, compute the **overall verdict**:
 - `FAIL` — one or more automatable checks failed
 - `PARTIAL` — one or more automatable checks were skipped or returned inconclusive results (not the same as `NEEDS-HUMAN` — use PARTIAL only when the agent itself could not determine pass/fail for a check it was supposed to automate)
-Call `gsd_summary_save` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "ASSESSMENT"`, and the full UAT result markdown as `content` — the tool computes the file path and persists to both DB and disk. The content should follow this format:
+Call `gsd_summary_save` with `milestone_id: "{{milestoneId}}"`, `slice_id: "{{sliceId}}"`, `artifact_type: "ASSESSMENT"`, and the full UAT result markdown as `content`. The tool computes the assessment path, persists to DB/disk, and saves the aggregate UAT gate. The content should follow this logical shape:
 ```markdown
 ---
@@ -86,6 +92,6 @@ date: <ISO 8601 timestamp>
 ---
-**You MUST call `gsd_summary_save` with the UAT result content before finishing.**
+**You MUST call `gsd_summary_save` with `artifact_type: "ASSESSMENT"` and the UAT result content before finishing. Do not write the assessment file directly.**
 When done, say: "UAT {{sliceId}} complete."

package/src/resources/extensions/gsd/prompts/system.md CHANGED Viewed

@@ -32,7 +32,7 @@ GSD ships with bundled skills. Installed skills are listed in `<available_skills
 - Never print, echo, log, or restate secrets or credentials. Report only key names and applied/skipped status.
 - Never ask the user to edit `.env` files or set secrets manually. Use `secure_env_collect`.
 - In enduring files, write current state only unless the file is explicitly historical.
-- **Never take outward-facing actions on GitHub or external services without explicit user confirmation.** This includes creating/closing issues, merging/approving/commenting on PRs, pushing remote branches, publishing packages, or any state change outside local filesystem. Read-only listing/viewing/diffing is fine. Present intent and get a clear "yes" first. **Non-bypassable:** no response, ambiguity, or `ask_user_questions` failure means re-ask; never rationalize past the block. Missing "yes" means "no."
+- **Never take outward-facing actions on GitHub or external services without explicit user confirmation.** This includes creating/closing issues, merging/approving/commenting on PRs, pushing remote branches, publishing packages, terragrunt/aws/kubectl mutations, or any state change outside local filesystem. Read-only listing/viewing/diffing is fine. Present intent and get a clear "yes" first. **Non-bypassable:** no response, ambiguity, or `ask_user_questions` failure means re-ask; never rationalize past the block. Missing "yes" means "no."
 If a `GSD Skill Preferences` block appears below, treat it as durable guidance for skills to use, prefer, or avoid unless it conflicts with artifact rules, verification, or higher-priority instructions.
@@ -160,4 +160,6 @@ Fix root causes, not symptoms. If applying temporary mitigation, label it and pr
 - When debugging, stay curious. Problems are puzzles. Say what's interesting about the failure before reaching for fixes.
 - After completing a task, give a brief summary and 2-4 numbered next-step options; last option is always "Other". Omit the list for strict output formats.
+  If any next step is destructive/outward-facing, present it via `ask_user_questions` and wait for the user's answer before execution. Do not execute a next-step item from a prior plain-text numbered list without fresh confirmation.
 Good narration states a decision or finding: "Three handlers follow a middleware pattern - using that instead of a custom wrapper." Bad narration just announces the next call ("Reading the file now.") or emits compressed planner notes ("Need create plan artifact maybe read existing plans.").

package/src/resources/extensions/gsd/safety/destructive-guard.ts CHANGED Viewed

@@ -24,6 +24,9 @@ const DESTRUCTIVE_PATTERNS: readonly DestructivePattern[] = [
   { pattern: /\btruncate\s+table\b/i, label: "SQL truncate" },
   { pattern: /\bchmod\s+777\b/, label: "world-writable permissions" },
   { pattern: /\bcurl\s.*\|\s*(bash|sh|zsh)\b/, label: "pipe to shell" },
+  { pattern: /\bterra(form|grunt)\s+(apply|destroy)/i, label: "IaC apply/destroy" },
+  { pattern: /\baws\s+\w+\s+(delete|create|put|remove|terminate)\b/i, label: "AWS mutation" },
+  { pattern: /\bkubectl\s+(delete|apply)\b/i, label: "kubectl mutation" },
 ];
 // ─── Public API ─────────────────────────────────────────────────────────────

package/src/resources/extensions/gsd/skill-activation.ts CHANGED Viewed

@@ -50,6 +50,16 @@ function tokenizeSkillContext(...parts: Array<string | null | undefined>): Set<s
   return tokens;
 }
+function tokenizeUnitType(unitType: string | undefined): Set<string> {
+  const tokens = new Set<string>();
+  const value = unitType?.trim().toLowerCase();
+  if (!value) return tokens;
+  tokens.add(value);
+  tokens.add(value.replace(/[-_]+/g, " "));
+  tokens.add(value.replace(/[-_\s]+/g, ""));
+  return tokens;
+}
 function skillMatchesContext(skill: Skill, contextTokens: Set<string>): boolean {
   const haystacks = [
     skill.name.toLowerCase(),
@@ -79,17 +89,25 @@ function ruleMatchesContext(when: string, contextTokens: Set<string>): boolean {
   );
 }
+function ruleMatchesUnitType(when: string, unitType: string | undefined): boolean {
+  if (!unitType) return false;
+  const whenTokens = tokenizeSkillContext(when);
+  const unitTokens = tokenizeUnitType(unitType);
+  return [...unitTokens].some(token => whenTokens.has(token));
+}
 function resolveSkillRuleMatches(
   prefs: GSDPreferences | undefined,
   contextTokens: Set<string>,
   base: string,
+  unitType?: string,
 ): { include: string[]; avoid: string[] } {
   if (!prefs?.skill_rules?.length) return { include: [], avoid: [] };
   const include: string[] = [];
   const avoid: string[] = [];
   for (const rule of prefs.skill_rules) {
-    if (!ruleMatchesContext(rule.when, contextTokens)) continue;
+    if (!ruleMatchesContext(rule.when, contextTokens) && !ruleMatchesUnitType(rule.when, unitType)) continue;
     include.push(...resolvePreferenceSkillNames([...(rule.use ?? []), ...(rule.prefer ?? [])], base));
     avoid.push(...resolvePreferenceSkillNames(rule.avoid ?? [], base));
   }
@@ -196,7 +214,7 @@ export function buildSkillActivationBlock(params: {
     matched.add(name);
   }
-  const ruleMatches = resolveSkillRuleMatches(prefs, contextTokens, params.base);
+  const ruleMatches = resolveSkillRuleMatches(prefs, contextTokens, params.base, params.unitType);
   for (const name of ruleMatches.include) matched.add(name);
   for (const name of ruleMatches.avoid) avoided.add(name);