npm - gsd-pi - Versions diffs - 2.63.0-dev.351157b → 2.63.0-dev.786f0ff - Mend

gsd-pi 2.63.0-dev.351157b → 2.63.0-dev.786f0ff

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

package/src/resources/extensions/gsd/auto/detect-stuck.ts CHANGED Viewed

@@ -6,6 +6,13 @@
 import type { WindowEntry } from "./types.js";
+/**
+ * Pattern matching ENOENT errors with a file path.
+ * Matches: "ENOENT: no such file or directory, access '/path/to/file'"
+ * and similar Node.js filesystem error messages.
+ */
+const ENOENT_PATH_RE = /ENOENT[^']*'([^']+)'/;
 /**
  * Analyze a sliding window of recent unit dispatches for stuck patterns.
  * Returns a signal with reason if stuck, null otherwise.
@@ -13,6 +20,8 @@ import type { WindowEntry } from "./types.js";
  * Rule 1: Same error string twice in a row → stuck immediately.
  * Rule 2: Same unit key 3+ consecutive times → stuck (preserves prior behavior).
  * Rule 3: Oscillation A→B→A→B in last 4 entries → stuck.
+ * Rule 4: Same ENOENT path in any 2 entries within the window → stuck (#3575).
+ *         Missing files don't self-heal between retries — retrying wastes budget.
  */
 export function detectStuck(
   window: readonly WindowEntry[],
@@ -56,5 +65,23 @@ export function detectStuck(
     }
   }
+  // Rule 4: Same ENOENT path seen twice in window (#3575)
+  // Missing files don't appear between retries — stop immediately.
+  const enoentPaths = new Map<string, number>();
+  for (const entry of window) {
+    if (!entry.error) continue;
+    const match = ENOENT_PATH_RE.exec(entry.error);
+    if (!match) continue;
+    const filePath = match[1];
+    const count = (enoentPaths.get(filePath) ?? 0) + 1;
+    if (count >= 2) {
+      return {
+        stuck: true,
+        reason: `Missing file referenced twice: ${filePath} (ENOENT)`,
+      };
+    }
+    enoentPaths.set(filePath, count);
+  }
   return null;
 }

package/src/resources/extensions/gsd/auto/phases.ts CHANGED Viewed

@@ -37,6 +37,9 @@ import { withTimeout, FINALIZE_POST_TIMEOUT_MS } from "./finalize-timeout.js";
 import { getEligibleSlices } from "../slice-parallel-eligibility.js";
 import { startSliceParallel } from "../slice-parallel-orchestrator.js";
 import { isDbAvailable, getMilestoneSlices } from "../gsd-db.js";
+import { resetEvidence } from "../safety/evidence-collector.js";
+import { createCheckpoint, cleanupCheckpoint, rollbackToCheckpoint } from "../safety/git-checkpoint.js";
+import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js";
 // ─── generateMilestoneReport ──────────────────────────────────────────────────
@@ -1079,6 +1082,21 @@ export async function runUnitPhase(
   if (mid)
     deps.updateSliceProgressCache(s.basePath, mid, state.activeSlice?.id);
+  // ── Safety harness: reset evidence + create checkpoint ──
+  const safetyConfig = resolveSafetyHarnessConfig(
+    prefs?.safety_harness as Record<string, unknown> | undefined,
+  );
+  if (safetyConfig.enabled && safetyConfig.evidence_collection) {
+    resetEvidence();
+  }
+  // Only checkpoint code-executing units (not lifecycle/planning units)
+  if (safetyConfig.enabled && safetyConfig.checkpoints && unitType === "execute-task") {
+    s.checkpointSha = createCheckpoint(s.basePath, unitId);
+    if (s.checkpointSha) {
+      debugLog("runUnitPhase", { phase: "checkpoint-created", unitId, sha: s.checkpointSha.slice(0, 8) });
+    }
+  }
   // Prompt injection
   let finalPrompt = prompt;
@@ -1376,6 +1394,27 @@ export async function runUnitPhase(
   deps.emitJournalEvent({ ts: new Date().toISOString(), flowId: ic.flowId, seq: ic.nextSeq(), eventType: "unit-end", data: { unitType, unitId, status: unitResult.status, artifactVerified, ...(unitResult.errorContext ? { errorContext: unitResult.errorContext } : {}) }, causedBy: { flowId: ic.flowId, seq: unitStartSeq } });
+  // ── Safety harness: checkpoint cleanup or rollback ──
+  if (s.checkpointSha) {
+    if (unitResult.status === "error" && safetyConfig.auto_rollback) {
+      const rolled = rollbackToCheckpoint(s.basePath, unitId, s.checkpointSha);
+      if (rolled) {
+        ctx.ui.notify(`Rolled back to pre-unit checkpoint for ${unitId}`, "info");
+        debugLog("runUnitPhase", { phase: "checkpoint-rollback", unitId });
+      }
+    } else if (unitResult.status === "error") {
+      ctx.ui.notify(
+        `Unit ${unitId} failed. Pre-unit checkpoint available at ${s.checkpointSha.slice(0, 8)}`,
+        "warning",
+      );
+    } else {
+      // Success — clean up checkpoint ref
+      cleanupCheckpoint(s.basePath, unitId);
+      debugLog("runUnitPhase", { phase: "checkpoint-cleaned", unitId });
+    }
+    s.checkpointSha = null;
+  }
   return { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt } };
 }

package/src/resources/extensions/gsd/auto/session.ts CHANGED Viewed

@@ -145,6 +145,10 @@ export class AutoSession {
   lastBaselineCharCount: number | undefined;
   pendingQuickTasks: CaptureEntry[] = [];
+  // ── Safety harness ───────────────────────────────────────────────────────
+  /** SHA of the pre-unit git checkpoint ref. Cleared on success or rollback. */
+  checkpointSha: string | null = null;
   // ── Signal handler ───────────────────────────────────────────────────────
   sigtermHandler: (() => void) | null = null;
@@ -223,6 +227,7 @@ export class AutoSession {
     this.lastToolInvocationError = null;
     this.isolationDegraded = false;
     this.milestoneMergedInPhases = false;
+    this.checkpointSha = null;
     // Signal handler
     this.sigtermHandler = null;

package/src/resources/extensions/gsd/auto-model-selection.ts CHANGED Viewed

@@ -31,6 +31,9 @@ export function resolvePreferredModelConfig(
   const routingConfig = resolveDynamicRoutingConfig();
   if (!routingConfig.enabled || !routingConfig.tier_models) return undefined;
+  // Don't synthesize a routing config for flat-rate providers (#3453).
+  if (autoModeStartModel && isFlatRateProvider(autoModeStartModel.provider)) return undefined;
   const ceilingModel = routingConfig.tier_models.heavy
     ?? (autoModeStartModel ? `${autoModeStartModel.provider}/${autoModeStartModel.id}` : undefined);
   if (!ceilingModel) return undefined;
@@ -71,6 +74,27 @@ export async function selectAndApplyModel(
     let effectiveModelConfig = modelConfig;
     let routingTierLabel = "";
+    // Disable routing for flat-rate providers like GitHub Copilot (#3453).
+    // All models cost the same per request, so downgrading to a cheaper
+    // model provides no cost benefit — it only degrades quality.
+    // Fail-closed: if primary model can't be resolved, fall back to
+    // provider-level signals rather than allowing unwanted downgrades.
+    if (routingConfig.enabled) {
+      const primaryModel = resolveModelId(modelConfig.primary, availableModels, ctx.model?.provider);
+      if (primaryModel) {
+        if (isFlatRateProvider(primaryModel.provider)) {
+          routingConfig.enabled = false;
+        }
+      } else if (
+        (autoModeStartModel && isFlatRateProvider(autoModeStartModel.provider))
+        || (ctx.model?.provider && isFlatRateProvider(ctx.model.provider))
+      ) {
+        // Primary model unresolvable but provider signals indicate flat-rate —
+        // disable routing to prevent quality degradation.
+        routingConfig.enabled = false;
+      }
+    }
     if (routingConfig.enabled) {
       let budgetPct: number | undefined;
       if (routingConfig.budget_pressure !== false) {
@@ -320,3 +344,15 @@ export function resolveModelId<T extends { id: string; provider: string }>(
   // Fall back to first non-extension candidate, or any candidate
   return candidates.find(m => !EXTENSION_PROVIDERS.has(m.provider)) ?? candidates[0];
 }
+/**
+ * Flat-rate providers charge the same per request regardless of model.
+ * Dynamic routing provides no cost benefit — it only degrades quality (#3453).
+ * Uses case-insensitive matching with alias support to prevent fail-open on
+ * provider naming variations (e.g. "copilot" vs "github-copilot").
+ */
+const FLAT_RATE_PROVIDERS = new Set(["github-copilot", "copilot"]);
+export function isFlatRateProvider(provider: string): boolean {
+  return FLAT_RATE_PROVIDERS.has(provider.toLowerCase());
+}

package/src/resources/extensions/gsd/auto-post-unit.ts CHANGED Viewed

@@ -52,6 +52,13 @@ import { hasPendingCaptures, loadPendingCaptures, revertExecutorResolvedCaptures
 import { debugLog } from "./debug-logger.js";
 import { runSafely } from "./auto-utils.js";
 import type { AutoSession, SidecarItem } from "./auto/session.js";
+import { getEvidence } from "./safety/evidence-collector.js";
+import { validateFileChanges } from "./safety/file-change-validator.js";
+// crossReferenceEvidence available for future use when verification_evidence is stored in DB
+// import { crossReferenceEvidence, type ClaimedEvidence } from "./safety/evidence-cross-ref.js";
+import { validateContent } from "./safety/content-validator.js";
+import { resolveSafetyHarnessConfig } from "./safety/safety-harness.js";
+import { resolveExpectedArtifactPath as resolveArtifactForContent } from "./auto-artifact-paths.js";
 /** Maximum verification retry attempts before escalating to blocker placeholder (#2653). */
 const MAX_VERIFICATION_RETRIES = 3;
@@ -437,6 +444,87 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
       debugLog("postUnit", { phase: "rogue-detection", error: String(e) });
     }
+    // ── Safety harness: post-unit validation ──
+    try {
+      const { loadEffectiveGSDPreferences } = await import("./preferences.js");
+      const prefs = loadEffectiveGSDPreferences()?.preferences;
+      const safetyConfig = resolveSafetyHarnessConfig(
+        prefs?.safety_harness as Record<string, unknown> | undefined,
+      );
+      if (safetyConfig.enabled) {
+        const { milestone: sMid, slice: sSid, task: sTid } = parseUnitId(s.currentUnit.id);
+        // File change validation (execute-task only, after auto-commit)
+        if (safetyConfig.file_change_validation && s.currentUnit.type === "execute-task" && sMid && sSid && sTid && isDbAvailable()) {
+          try {
+            const taskRow = getTask(sMid, sSid, sTid);
+            if (taskRow) {
+              const expectedOutput = taskRow.expected_output ?? [];
+              const plannedFiles = taskRow.files ?? [];
+              const audit = validateFileChanges(s.basePath, expectedOutput, plannedFiles);
+              if (audit && audit.violations.length > 0) {
+                const warnings = audit.violations.filter(v => v.severity === "warning");
+                for (const v of warnings) {
+                  logWarning("safety", `file-change: ${v.file} — ${v.reason}`);
+                }
+                if (warnings.length > 0) {
+                  ctx.ui.notify(
+                    `Safety: ${warnings.length} unexpected file change(s) outside task plan`,
+                    "warning",
+                  );
+                }
+              }
+            }
+          } catch (e) {
+            debugLog("postUnit", { phase: "safety-file-change", error: String(e) });
+          }
+        }
+        // Evidence cross-reference (execute-task only)
+        // Verification evidence is passed via the complete-task tool call and
+        // stored in the SUMMARY.md on disk — not available as structured data
+        // in the DB. The evidence collector tracks actual bash tool calls, so
+        // we can still detect units that claimed success but ran no commands.
+        if (safetyConfig.evidence_cross_reference && s.currentUnit.type === "execute-task") {
+          try {
+            const actual = getEvidence();
+            const bashCalls = actual.filter(e => e.kind === "bash");
+            // If the task is marked complete but zero bash commands were run,
+            // it's suspicious — the LLM may have fabricated results.
+            if (sMid && sSid && sTid && isDbAvailable()) {
+              const taskRow = getTask(sMid, sSid, sTid);
+              if (taskRow?.status === "complete" && taskRow.verify && bashCalls.length === 0) {
+                logWarning("safety", "task marked complete with verification commands but no bash calls were executed");
+                ctx.ui.notify(
+                  `Safety: task ${sTid} has verification commands but no bash calls were recorded`,
+                  "warning",
+                );
+              }
+            }
+          } catch (e) {
+            debugLog("postUnit", { phase: "safety-evidence-xref", error: String(e) });
+          }
+        }
+        // Content validation (plan-slice, plan-milestone)
+        if (safetyConfig.content_validation) {
+          try {
+            const artifactPath = resolveArtifactForContent(s.currentUnit.type, s.currentUnit.id, s.basePath);
+            const contentViolations = validateContent(s.currentUnit.type, artifactPath);
+            for (const v of contentViolations) {
+              logWarning("safety", `content: ${v.reason}`);
+              ctx.ui.notify(`Content validation: ${v.reason}`, "warning");
+            }
+          } catch (e) {
+            debugLog("postUnit", { phase: "safety-content-validation", error: String(e) });
+          }
+        }
+      }
+    } catch (e) {
+      debugLog("postUnit", { phase: "safety-harness", error: String(e) });
+    }
     // Artifact verification
     let triggerArtifactVerified = false;
     if (!s.currentUnit.type.startsWith("hook/")) {

package/src/resources/extensions/gsd/auto-timers.ts CHANGED Viewed

@@ -106,8 +106,9 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
     }
   }
   const estimateMinutes = taskEstimate ? parseEstimateMinutes(taskEstimate) : null;
+  const MAX_TIMEOUT_SCALE = 6; // Cap at 6x (60min task). Prevents 2h+ tasks from creating 120min+ timeout windows.
   const timeoutScale = estimateMinutes && estimateMinutes > 0
-    ? Math.max(1, estimateMinutes / 10)  // 10min task = 1x, 30min = 3x, 2h = 12x
+    ? Math.min(MAX_TIMEOUT_SCALE, Math.max(1, estimateMinutes / 10))
     : 1;
   const softTimeoutMs = (supervisor.soft_timeout_minutes ?? 0) * 60 * 1000 * timeoutScale;

package/src/resources/extensions/gsd/bootstrap/db-tools.ts CHANGED Viewed

@@ -704,8 +704,14 @@ export function registerDbTools(pi: ExtensionAPI): void {
       };
     }
     try {
+      // Coerce string items to objects for verificationEvidence (#3541).
+      const coerced = { ...params };
+      coerced.verificationEvidence = (params.verificationEvidence ?? []).map((v: any) =>
+        typeof v === "string" ? { command: v, exitCode: -1, verdict: "unknown (coerced from string)", durationMs: 0 } : v,
+      );
       const { handleCompleteTask } = await import("../tools/complete-task.js");
-      const result = await handleCompleteTask(params, process.cwd());
+      const result = await handleCompleteTask(coerced, process.cwd());
       if ("error" in result) {
         return {
           content: [{ type: "text" as const, text: `Error completing task: ${result.error}` }],
@@ -761,12 +767,15 @@ export function registerDbTools(pi: ExtensionAPI): void {
       keyDecisions: Type.Optional(Type.Array(Type.String(), { description: "List of key decisions made during this task" })),
       blockerDiscovered: Type.Optional(Type.Boolean({ description: "Whether a plan-invalidating blocker was discovered" })),
       verificationEvidence: Type.Optional(Type.Array(
-        Type.Object({
-          command: Type.String({ description: "Verification command that was run" }),
-          exitCode: Type.Number({ description: "Exit code of the command" }),
-          verdict: Type.String({ description: "Pass/fail verdict (e.g. '✅ pass', '❌ fail')" }),
-          durationMs: Type.Number({ description: "Duration of the command in milliseconds" }),
-        }),
+        Type.Union([
+          Type.Object({
+            command: Type.String({ description: "Verification command that was run" }),
+            exitCode: Type.Number({ description: "Exit code of the command" }),
+            verdict: Type.String({ description: "Pass/fail verdict (e.g. '✅ pass', '❌ fail')" }),
+            durationMs: Type.Number({ description: "Duration of the command in milliseconds" }),
+          }),
+          Type.String({ description: "Fallback: verification summary string" }),
+        ]),
         { description: "Array of verification evidence entries" },
       )),
     }),
@@ -787,8 +796,42 @@ export function registerDbTools(pi: ExtensionAPI): void {
       };
     }
     try {
+      // Coerce string items to objects for fields where LLMs sometimes pass
+      // plain strings instead of the expected { key, value } shape (#3541).
+      // Parses "key — value" or "key - value" format when possible.
+      const splitPair = (s: string): [string, string] => {
+        const m = s.match(/^(.+?)\s*(?:—|-)\s+(.+)$/);
+        return m ? [m[1].trim(), m[2].trim()] : [s.trim(), ""];
+      };
+      const coerced = { ...params };
+      coerced.filesModified = (params.filesModified ?? []).map((f: any) => {
+        if (typeof f !== "string") return f;
+        const [path, description] = splitPair(f);
+        return { path, description };
+      });
+      coerced.requires = (params.requires ?? []).map((r: any) => {
+        if (typeof r !== "string") return r;
+        const [slice, provides] = splitPair(r);
+        return { slice, provides };
+      });
+      coerced.requirementsAdvanced = (params.requirementsAdvanced ?? []).map((r: any) => {
+        if (typeof r !== "string") return r;
+        const [id, how] = splitPair(r);
+        return { id, how };
+      });
+      coerced.requirementsValidated = (params.requirementsValidated ?? []).map((r: any) => {
+        if (typeof r !== "string") return r;
+        const [id, proof] = splitPair(r);
+        return { id, proof };
+      });
+      coerced.requirementsInvalidated = (params.requirementsInvalidated ?? []).map((r: any) => {
+        if (typeof r !== "string") return r;
+        const [id, what] = splitPair(r);
+        return { id, what };
+      });
       const { handleCompleteSlice } = await import("../tools/complete-slice.js");
-      const result = await handleCompleteSlice(params, process.cwd());
+      const result = await handleCompleteSlice(coerced, process.cwd());
       if ("error" in result) {
         return {
           content: [{ type: "text" as const, text: `Error completing slice: ${result.error}` }],
@@ -850,38 +893,53 @@ export function registerDbTools(pi: ExtensionAPI): void {
       drillDownPaths: Type.Optional(Type.Array(Type.String(), { description: "Paths to task summaries for drill-down" })),
       affects: Type.Optional(Type.Array(Type.String(), { description: "Downstream slices affected" })),
       requirementsAdvanced: Type.Optional(Type.Array(
-        Type.Object({
-          id: Type.String({ description: "Requirement ID" }),
-          how: Type.String({ description: "How it was advanced" }),
-        }),
+        Type.Union([
+          Type.Object({
+            id: Type.String({ description: "Requirement ID" }),
+            how: Type.String({ description: "How it was advanced" }),
+          }),
+          Type.String({ description: "Fallback: 'ID — how' string" }),
+        ]),
         { description: "Requirements advanced by this slice" },
       )),
       requirementsValidated: Type.Optional(Type.Array(
-        Type.Object({
-          id: Type.String({ description: "Requirement ID" }),
-          proof: Type.String({ description: "What proof validates it" }),
-        }),
+        Type.Union([
+          Type.Object({
+            id: Type.String({ description: "Requirement ID" }),
+            proof: Type.String({ description: "What proof validates it" }),
+          }),
+          Type.String({ description: "Fallback: 'ID — proof' string" }),
+        ]),
         { description: "Requirements validated by this slice" },
       )),
       requirementsInvalidated: Type.Optional(Type.Array(
-        Type.Object({
-          id: Type.String({ description: "Requirement ID" }),
-          what: Type.String({ description: "What changed" }),
-        }),
+        Type.Union([
+          Type.Object({
+            id: Type.String({ description: "Requirement ID" }),
+            what: Type.String({ description: "What changed" }),
+          }),
+          Type.String({ description: "Fallback: 'ID — what' string" }),
+        ]),
         { description: "Requirements invalidated or re-scoped" },
       )),
       filesModified: Type.Optional(Type.Array(
-        Type.Object({
-          path: Type.String({ description: "File path" }),
-          description: Type.String({ description: "What changed" }),
-        }),
+        Type.Union([
+          Type.Object({
+            path: Type.String({ description: "File path" }),
+            description: Type.String({ description: "What changed" }),
+          }),
+          Type.String({ description: "Fallback: file path string" }),
+        ]),
         { description: "Files modified with descriptions" },
       )),
       requires: Type.Optional(Type.Array(
-        Type.Object({
-          slice: Type.String({ description: "Dependency slice ID" }),
-          provides: Type.String({ description: "What was consumed from it" }),
-        }),
+        Type.Union([
+          Type.Object({
+            slice: Type.String({ description: "Dependency slice ID" }),
+            provides: Type.String({ description: "What was consumed from it" }),
+          }),
+          Type.String({ description: "Fallback: slice ID string" }),
+        ]),
         { description: "Upstream slice dependencies consumed" },
       )),
     }),

package/src/resources/extensions/gsd/bootstrap/register-hooks.ts CHANGED Viewed

@@ -18,6 +18,9 @@ import { isParallelActive, shutdownParallel } from "../parallel-orchestrator.js"
 import { checkToolCallLoop, resetToolCallLoopGuard } from "./tool-call-loop-guard.js";
 import { saveActivityLog } from "../activity-log.js";
 import { resetAskUserQuestionsCache } from "../../ask-user-questions.js";
+import { recordToolCall as safetyRecordToolCall, recordToolResult as safetyRecordToolResult } from "../safety/evidence-collector.js";
+import { classifyCommand } from "../safety/destructive-guard.js";
+import { logWarning as safetyLogWarning } from "../workflow-logger.js";
 // Skip the welcome screen on the very first session_start — cli.ts already
 // printed it before the TUI launched. Only re-print on /clear (subsequent sessions).
@@ -203,6 +206,26 @@ export function registerHooks(pi: ExtensionAPI): void {
     if (result.block) return result;
   });
+  // ── Safety harness: evidence collection + destructive command warnings ──
+  pi.on("tool_call", async (event, ctx) => {
+    if (!isAutoActive()) return;
+    safetyRecordToolCall(event.toolName, event.input as Record<string, unknown>);
+    // Destructive command classification (warn only, never block)
+    if (isToolCallEventType("bash", event)) {
+      const classification = classifyCommand(event.input.command);
+      if (classification.destructive) {
+        safetyLogWarning("safety", `destructive command: ${classification.labels.join(", ")}`, {
+          command: String(event.input.command).slice(0, 200),
+        });
+        ctx.ui.notify(
+          `Destructive command detected: ${classification.labels.join(", ")}`,
+          "warning",
+        );
+      }
+    }
+  });
   pi.on("tool_result", async (event) => {
     if (event.toolName !== "ask_user_questions") return;
     const milestoneId = getDiscussionMilestoneId();
@@ -268,6 +291,10 @@ export function registerHooks(pi: ExtensionAPI): void {
         : (typeof event.result?.content?.[0]?.text === "string" ? event.result.content[0].text : String(event.result));
       recordToolInvocationError(event.toolName, errorText);
     }
+    // Safety harness: record tool execution results for evidence cross-referencing
+    if (isAutoActive()) {
+      safetyRecordToolResult(event.toolCallId, event.toolName, event.result, event.isError);
+    }
   });
   pi.on("model_select", async (_event, ctx) => {

package/src/resources/extensions/gsd/bootstrap/system-context.ts CHANGED Viewed

@@ -6,9 +6,10 @@ import type { ExtensionContext } from "@gsd/pi-coding-agent";
 import { logWarning } from "../workflow-logger.js";
 import { debugTime } from "../debug-logger.js";
-import { loadPrompt } from "../prompt-loader.js";
+import { loadPrompt, getTemplatesDir } from "../prompt-loader.js";
 import { readForensicsMarker } from "../forensics.js";
 import { resolveAllSkillReferences, renderPreferencesForSystemPrompt, loadEffectiveGSDPreferences } from "../preferences.js";
+import { resolveSkillReference } from "../preferences-skills.js";
 import { resolveGsdRootFile, resolveSliceFile, resolveSlicePath, resolveTaskFile, resolveTaskFiles, resolveTasksDir, relSliceFile, relSlicePath, relTaskFile } from "../paths.js";
 import { hasSkillSnapshot, detectNewSkills, formatSkillsXml } from "../skill-discovery.js";
 import { getActiveAutoWorktreeContext } from "../auto-worktree.js";
@@ -20,6 +21,31 @@ import { markCmuxPromptShown, shouldPromptToEnableCmux } from "../../cmux/index.
 const gsdHome = process.env.GSD_HOME || join(homedir(), ".gsd");
+/**
+ * Bundled skill triggers — resolved dynamically at runtime instead of
+ * hardcoding absolute paths in the system prompt template. Only skills
+ * that actually exist on disk are included in the table. (#3575)
+ */
+const BUNDLED_SKILL_TRIGGERS: Array<{ trigger: string; skill: string }> = [
+  { trigger: "Frontend UI - web components, pages, landing pages, dashboards, React/HTML/CSS, styling", skill: "frontend-design" },
+  { trigger: "macOS or iOS apps - SwiftUI, Xcode, App Store", skill: "swiftui" },
+  { trigger: "Debugging - complex bugs, failing tests, root-cause investigation after standard approaches fail", skill: "debug-like-expert" },
+];
+function buildBundledSkillsTable(): string {
+  const cwd = process.cwd();
+  const rows: string[] = [];
+  for (const { trigger, skill } of BUNDLED_SKILL_TRIGGERS) {
+    const resolution = resolveSkillReference(skill, cwd);
+    if (resolution.method === "unresolved") continue; // skill not installed — omit from prompt
+    rows.push(`| ${trigger} | \`${resolution.resolvedPath}\` |`);
+  }
+  if (rows.length === 0) {
+    return "*No bundled skills found. Install skills to `~/.agents/skills/` or `~/.claude/skills/`.*";
+  }
+  return `| Trigger | Skill to load |\n|---|---|\n${rows.join("\n")}`;
+}
 function warnDeprecatedAgentInstructions(): void {
   const paths = [
     join(gsdHome, "agent-instructions.md"),
@@ -43,7 +69,10 @@ export async function buildBeforeAgentStartResult(
   if (!existsSync(join(process.cwd(), ".gsd"))) return undefined;
   const stopContextTimer = debugTime("context-inject");
-  const systemContent = loadPrompt("system");
+  const systemContent = loadPrompt("system", {
+    bundledSkillsTable: buildBundledSkillsTable(),
+    templatesDir: getTemplatesDir(),
+  });
   const loadedPreferences = loadEffectiveGSDPreferences();
   if (shouldPromptToEnableCmux(loadedPreferences?.preferences)) {
     markCmuxPromptShown();

package/src/resources/extensions/gsd/preferences-types.ts CHANGED Viewed

@@ -105,6 +105,7 @@ export const KNOWN_PREFERENCE_KEYS = new Set<string>([
   "experimental",
   "codebase",
   "slice_parallel",
+  "safety_harness",
 ]);
 /** Canonical list of all dispatch unit types. */
@@ -291,6 +292,18 @@ export interface GSDPreferences {
   codebase?: CodebaseMapPreferences;
   /** Slice-level parallelism within a milestone. Disabled by default. */
   slice_parallel?: { enabled?: boolean; max_workers?: number };
+  /** LLM safety harness configuration. Monitors, validates, and constrains LLM behavior during auto-mode. Enabled by default with warn-and-continue policy. */
+  safety_harness?: {
+    enabled?: boolean;
+    evidence_collection?: boolean;
+    file_change_validation?: boolean;
+    evidence_cross_reference?: boolean;
+    destructive_command_warnings?: boolean;
+    content_validation?: boolean;
+    checkpoints?: boolean;
+    auto_rollback?: boolean;
+    timeout_scale_cap?: number;
+  };
 }
 export interface LoadedGSDPreferences {

package/src/resources/extensions/gsd/prompt-loader.ts CHANGED Viewed

@@ -51,6 +51,14 @@ const __extensionDir = resolveExtensionDir();
 const promptsDir = join(__extensionDir, "prompts");
 const templatesDir = join(__extensionDir, "templates");
+/**
+ * Return the resolved templates directory path for use in prompts.
+ * Avoids hardcoding `~/.gsd/agent/extensions/gsd/templates/` in templates. (#3575)
+ */
+export function getTemplatesDir(): string {
+  return templatesDir;
+}
 // Cache all templates eagerly at module load — a running session uses the
 // template versions that were on disk at startup, immune to later overwrites.
 const templateCache = new Map<string, string>();

package/src/resources/extensions/gsd/prompts/system.md CHANGED Viewed

@@ -24,13 +24,9 @@ Leave the project in a state where the next agent can immediately understand wha
 ## Skills
-GSD ships with bundled skills. Load the relevant skill file with the `read` tool before starting work when the task matches.
+GSD ships with bundled skills. Load the relevant skill file with the `read` tool before starting work when the task matches. Use bare skill names — GSD resolves them to the correct path automatically.
-| Trigger | Skill to load |
-|---|---|
-| Frontend UI - web components, pages, landing pages, dashboards, React/HTML/CSS, styling | `~/.gsd/agent/skills/frontend-design/SKILL.md` |
-| macOS or iOS apps - SwiftUI, Xcode, App Store | `~/.gsd/agent/skills/swiftui/SKILL.md` |
-| Debugging - complex bugs, failing tests, root-cause investigation after standard approaches fail | `~/.gsd/agent/skills/debug-like-expert/SKILL.md` |
+{{bundledSkillsTable}}
 ## Hard Rules
@@ -119,7 +115,7 @@ In all modes, slices commit sequentially on the active branch; there are no per-
 ### Artifact Templates
 Templates showing the expected format for each artifact type are in:
-`~/.gsd/agent/extensions/gsd/templates/`
+`{{templatesDir}}`
 **Always read the relevant template before writing an artifact** to match the expected structure exactly. The parsers that read these files depend on specific formatting: