npm - gsd-pi - Versions diffs - 2.59.0-dev.023bd39 → 2.59.0-dev.d77b3dd - Mend

gsd-pi 2.59.0-dev.023bd39 → 2.59.0-dev.d77b3dd

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

package/src/resources/extensions/gsd/captures.ts CHANGED Viewed

@@ -15,7 +15,7 @@ import { gsdRoot } from "./paths.js";
 // ─── Types ────────────────────────────────────────────────────────────────────
-export type Classification = "quick-task" | "inject" | "defer" | "replan" | "note";
+export type Classification = "quick-task" | "inject" | "defer" | "replan" | "note" | "stop" | "backtrack";
 export interface CaptureEntry {
   id: string;
@@ -42,7 +42,7 @@ export interface TriageResult {
 const CAPTURES_FILENAME = "CAPTURES.md";
 const VALID_CLASSIFICATIONS: readonly string[] = [
-  "quick-task", "inject", "defer", "replan", "note",
+  "quick-task", "inject", "defer", "replan", "note", "stop", "backtrack",
 ];
 // ─── Path Resolution ──────────────────────────────────────────────────────────
@@ -285,6 +285,75 @@ export function loadActionableCaptures(basePath: string, currentMilestoneId?: st
   );
 }
+/**
+ * Load unexecuted stop captures — user directives to halt auto-mode.
+ * These are checked in the pre-dispatch guard pipeline (runGuards) to
+ * pause auto-mode before the next unit is dispatched.
+ */
+export function loadStopCaptures(basePath: string): CaptureEntry[] {
+  return loadAllCaptures(basePath).filter(
+    c => c.status === "resolved" && !c.executed &&
+      (c.classification === "stop" || c.classification === "backtrack"),
+  );
+}
+/**
+ * Load unexecuted backtrack captures specifically — captures directing
+ * auto-mode to abandon current milestone and return to a previous one.
+ */
+export function loadBacktrackCaptures(basePath: string): CaptureEntry[] {
+  return loadAllCaptures(basePath).filter(
+    c => c.status === "resolved" && !c.executed && c.classification === "backtrack",
+  );
+}
+/**
+ * Revert captures that were silenced by non-triage agents.
+ *
+ * When an execute-task or other non-triage agent writes `**Status:** resolved`
+ * to CAPTURES.md, it bypasses the triage pipeline entirely. This function
+ * detects such captures (resolved but missing the Classification field that
+ * triage always writes) and reverts them to pending so the triage sidecar
+ * picks them up properly.
+ *
+ * Returns the number of captures reverted.
+ */
+export function revertExecutorResolvedCaptures(basePath: string): number {
+  const filePath = resolveCapturesPath(basePath);
+  if (!existsSync(filePath)) return 0;
+  let content = readFileSync(filePath, "utf-8");
+  let reverted = 0;
+  const all = loadAllCaptures(basePath);
+  for (const capture of all) {
+    // A properly triaged capture has both resolved status AND a classification.
+    // An executor-silenced capture has resolved status but NO classification.
+    if (capture.status === "resolved" && !capture.classification) {
+      const sectionRegex = new RegExp(
+        `(### ${escapeRegex(capture.id)}\\n(?:(?!### ).)*?)(?=### |$)`,
+        "s",
+      );
+      const match = sectionRegex.exec(content);
+      if (match) {
+        let section = match[1];
+        section = section.replace(
+          /\*\*Status:\*\*\s*resolved/i,
+          "**Status:** pending",
+        );
+        content = content.replace(sectionRegex, section);
+        reverted++;
+      }
+    }
+  }
+  if (reverted > 0) {
+    writeFileSync(filePath, content, "utf-8");
+  }
+  return reverted;
+}
 /**
  * Retroactively stamp a capture with a milestone ID.
  *

package/src/resources/extensions/gsd/complexity-classifier.ts CHANGED Viewed

@@ -212,7 +212,7 @@ function analyzePlanComplexity(
 /**
  * Extract task metadata from the task plan file on disk.
  */
-function extractTaskMetadata(unitId: string, basePath: string): TaskMetadata {
+export function extractTaskMetadata(unitId: string, basePath: string): TaskMetadata {
   const meta: TaskMetadata = {};
   const { milestone: mid, slice: sid, task: tid } = parseUnitId(unitId);
   if (!mid || !sid || !tid) return meta;

package/src/resources/extensions/gsd/context-masker.ts ADDED Viewed

@@ -0,0 +1,74 @@
+/**
+ * Observation masking for GSD auto-mode sessions.
+ *
+ * Replaces tool result content older than N turns with a placeholder.
+ * Reduces context bloat between compactions with zero LLM overhead.
+ * Preserves message ordering, roles, and all assistant/user messages.
+ *
+ * Operates on the pi-ai Message[] format (post-convertToLlm, pre-provider):
+ *   - toolResult messages: { role: "toolResult", content: TextContent[] }
+ *   - bash results are already converted to: { role: "user", content: [{type:"text",text:"..."}] }
+ *     and start with "Ran `" from bashExecutionToText.
+ */
+interface MaskableMessage {
+  role: string;
+  content: unknown;
+  type?: string;
+  [key: string]: unknown;
+}
+const MASK_PLACEHOLDER = "[result masked — within summarized history]";
+const MASK_CONTENT_BLOCK = [{ type: "text" as const, text: MASK_PLACEHOLDER }];
+function findTurnBoundary(messages: MaskableMessage[], keepRecentTurns: number): number {
+  let turnsSeen = 0;
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const m = messages[i];
+    // In the LLM payload, genuine user turns have role "user".
+    // Tool results have role "toolResult" and are excluded by this check.
+    if (m.role === "user") {
+      // Skip bash-result user messages (converted from bashExecution) — these aren't real user turns
+      if (isBashResultUserMessage(m)) continue;
+      turnsSeen++;
+      if (turnsSeen >= keepRecentTurns) return i;
+    }
+  }
+  return 0;
+}
+/**
+ * Detect user messages that originated from bashExecution.
+ * After convertToLlm, these are {role: "user", content: [{type:"text", text:"Ran `cmd`\n..."}]}.
+ * The bashExecutionToText format always starts with "Ran `".
+ */
+function isBashResultUserMessage(m: MaskableMessage): boolean {
+  if (m.role !== "user" || !Array.isArray(m.content)) return false;
+  const first = m.content[0];
+  return first && typeof first === "object" && "text" in first &&
+    typeof first.text === "string" && first.text.startsWith("Ran `");
+}
+function isMaskableMessage(m: MaskableMessage): boolean {
+  // Tool result messages (role: "toolResult" in pi-ai format)
+  if (m.role === "toolResult") return true;
+  // Bash-result user messages (converted from bashExecution by convertToLlm)
+  if (isBashResultUserMessage(m)) return true;
+  return false;
+}
+export function createObservationMask(keepRecentTurns: number = 8) {
+  return (messages: MaskableMessage[]): MaskableMessage[] => {
+    const boundary = findTurnBoundary(messages, keepRecentTurns);
+    if (boundary === 0) return messages;
+    return messages.map((m, i) => {
+      if (i >= boundary) return m;
+      if (isMaskableMessage(m)) {
+        // Content may be string or array of content blocks — always replace with array
+        return { ...m, content: MASK_CONTENT_BLOCK };
+      }
+      return m;
+    });
+  };
+}

package/src/resources/extensions/gsd/docs/preferences-reference.md CHANGED Viewed

@@ -189,6 +189,13 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea
   - `budget_pressure`: boolean — downgrade model tier when budget is under pressure. Default: `true`.
   - `cross_provider`: boolean — allow routing across different providers. Default: `true`.
   - `hooks`: boolean — enable routing hooks. Default: `true`.
+  - `capability_routing`: boolean — enable capability-profile scoring for model selection within a tier. Requires `enabled: true`. Default: `false`.
+- `context_management`: configures context hygiene for auto-mode sessions. Keys:
+  - `observation_masking`: boolean — mask old tool results to reduce context bloat. Default: `true`.
+  - `observation_mask_turns`: number — keep this many recent turns verbatim (1-50). Default: `8`.
+  - `compaction_threshold_percent`: number — trigger compaction at this % of context window (0.5-0.95). Lower values fire compaction earlier, reducing drift. Default: `0.70`.
+  - `tool_result_max_chars`: number — max chars per tool result in GSD sessions (200-10000). Default: `800`.
 - `auto_visualize`: boolean — show a visualizer hint after each milestone completion in auto-mode. Default: `false`.

package/src/resources/extensions/gsd/gsd-db.ts CHANGED Viewed

@@ -1661,11 +1661,11 @@ export function getActiveSliceFromDb(milestoneId: string): SliceRow | null {
   const row = currentDb.prepare(
     `SELECT s.* FROM slices s
      WHERE s.milestone_id = :mid
-       AND s.status NOT IN ('complete', 'done')
+       AND s.status NOT IN ('complete', 'done', 'skipped')
        AND NOT EXISTS (
          SELECT 1 FROM json_each(s.depends) AS dep
          WHERE dep.value NOT IN (
-           SELECT id FROM slices WHERE milestone_id = :mid AND status IN ('complete', 'done')
+           SELECT id FROM slices WHERE milestone_id = :mid AND status IN ('complete', 'done', 'skipped')
          )
        )
      ORDER BY s.sequence, s.id

package/src/resources/extensions/gsd/model-router.ts CHANGED Viewed

@@ -10,6 +10,7 @@ import type { ResolvedModelConfig } from "./preferences.js";
 export interface DynamicRoutingConfig {
   enabled?: boolean;
+  capability_routing?: boolean;    // default: false — enable capability profile scoring
   tier_models?: {
     light?: string;
     standard?: string;
@@ -32,6 +33,12 @@ export interface RoutingDecision {
   wasDowngraded: boolean;
   /** Human-readable reason for this decision */
   reason: string;
+  /** How the model was selected. */
+  selectionMethod?: "tier-only" | "capability-scored";
+  /** Capability scores per model (when capability-scored). */
+  capabilityScores?: Record<string, number>;
+  /** Task requirement vector (when capability-scored). */
+  taskRequirements?: Partial<Record<string, number>>;
 }
 // ─── Known Model Tiers ───────────────────────────────────────────────────────
@@ -114,6 +121,91 @@ const MODEL_COST_PER_1K_INPUT: Record<string, number> = {
   "deepseek-chat": 0.00014,
 };
+// ─── Capability Profiles (ADR-004 Phase 2) ──────────────────────────────────
+// 7-dimension profiles, 0–100 normalized. Models without a profile
+// score 50 uniformly — capability scoring is a no-op for them.
+export interface ModelCapabilities {
+  coding: number;
+  debugging: number;
+  research: number;
+  reasoning: number;
+  speed: number;
+  longContext: number;
+  instruction: number;
+}
+export const MODEL_CAPABILITY_PROFILES: Record<string, ModelCapabilities> = {
+  "claude-opus-4-6":     { coding: 95, debugging: 90, research: 85, reasoning: 95, speed: 30, longContext: 80, instruction: 90 },
+  "claude-sonnet-4-6":   { coding: 85, debugging: 80, research: 75, reasoning: 80, speed: 60, longContext: 75, instruction: 85 },
+  "claude-haiku-4-5":    { coding: 60, debugging: 50, research: 45, reasoning: 50, speed: 95, longContext: 50, instruction: 75 },
+  "gpt-4o":              { coding: 80, debugging: 75, research: 70, reasoning: 75, speed: 65, longContext: 70, instruction: 80 },
+  "gpt-4o-mini":         { coding: 55, debugging: 45, research: 40, reasoning: 45, speed: 90, longContext: 45, instruction: 70 },
+  "gemini-2.5-pro":      { coding: 75, debugging: 70, research: 85, reasoning: 75, speed: 55, longContext: 90, instruction: 75 },
+  "gemini-2.0-flash":    { coding: 50, debugging: 40, research: 50, reasoning: 40, speed: 95, longContext: 60, instruction: 65 },
+  "deepseek-chat":       { coding: 75, debugging: 65, research: 55, reasoning: 70, speed: 70, longContext: 55, instruction: 65 },
+  "o3":                  { coding: 80, debugging: 85, research: 80, reasoning: 92, speed: 25, longContext: 70, instruction: 85 },
+};
+const BASE_REQUIREMENTS: Record<string, Partial<Record<keyof ModelCapabilities, number>>> = {
+  "execute-task":       { coding: 0.9, instruction: 0.7, speed: 0.3 },
+  "research-milestone": { research: 0.9, longContext: 0.7, reasoning: 0.5 },
+  "research-slice":     { research: 0.9, longContext: 0.7, reasoning: 0.5 },
+  "plan-milestone":     { reasoning: 0.9, coding: 0.5 },
+  "plan-slice":         { reasoning: 0.9, coding: 0.5 },
+  "replan-slice":       { reasoning: 0.9, debugging: 0.6, coding: 0.5 },
+  "reassess-roadmap":   { reasoning: 0.9, research: 0.5 },
+  "complete-slice":     { instruction: 0.8, speed: 0.7 },
+  "run-uat":            { instruction: 0.7, speed: 0.8 },
+  "discuss-milestone":  { reasoning: 0.6, instruction: 0.7 },
+  "complete-milestone": { instruction: 0.8, reasoning: 0.5 },
+};
+/**
+ * Compute a task requirement vector from unit type and optional metadata.
+ */
+export function computeTaskRequirements(
+  unitType: string,
+  metadata?: { tags?: string[]; complexityKeywords?: string[]; fileCount?: number; estimatedLines?: number },
+): Partial<Record<keyof ModelCapabilities, number>> {
+  const base = { ...(BASE_REQUIREMENTS[unitType] ?? { reasoning: 0.5 }) };
+  if (unitType === "execute-task" && metadata) {
+    if (metadata.tags?.some(t => /^(docs?|readme|comment|config|typo|rename)$/i.test(t))) {
+      return { ...base, instruction: 0.9, coding: 0.3, speed: 0.7 };
+    }
+    if (metadata.complexityKeywords?.some(k => k === "concurrency" || k === "compatibility")) {
+      return { ...base, debugging: 0.9, reasoning: 0.8 };
+    }
+    if (metadata.complexityKeywords?.some(k => k === "migration" || k === "architecture")) {
+      return { ...base, reasoning: 0.9, coding: 0.8 };
+    }
+    if ((metadata.fileCount ?? 0) >= 6 || (metadata.estimatedLines ?? 0) >= 500) {
+      return { ...base, coding: 0.9, reasoning: 0.7 };
+    }
+  }
+  return base;
+}
+/**
+ * Score a model against a task requirement vector.
+ * Returns weighted average in range 0–100. Returns 50 for empty requirements.
+ */
+export function scoreModel(
+  capabilities: ModelCapabilities,
+  requirements: Partial<Record<keyof ModelCapabilities, number>>,
+): number {
+  let weightedSum = 0;
+  let weightSum = 0;
+  for (const [dim, weight] of Object.entries(requirements)) {
+    const capability = capabilities[dim as keyof ModelCapabilities] ?? 50;
+    weightedSum += weight * capability;
+    weightSum += weight;
+  }
+  return weightSum > 0 ? weightedSum / weightSum : 50;
+}
 // ─── Public API ──────────────────────────────────────────────────────────────
 /**
@@ -132,6 +224,8 @@ export function resolveModelForComplexity(
   phaseConfig: ResolvedModelConfig | undefined,
   routingConfig: DynamicRoutingConfig,
   availableModelIds: string[],
+  unitType?: string,
+  metadata?: { tags?: string[]; complexityKeywords?: string[]; fileCount?: number; estimatedLines?: number },
 ): RoutingDecision {
   // If no phase config or routing disabled, pass through
   if (!phaseConfig || !routingConfig.enabled) {
@@ -175,25 +269,40 @@ export function resolveModelForComplexity(
   }
   // Find the best model for the requested tier
-  const targetModelId = findModelForTier(
-    requestedTier,
-    routingConfig,
-    availableModelIds,
-    routingConfig.cross_provider !== false,
-  );
+  const useCapabilityScoring = routingConfig.capability_routing && unitType;
+  let targetModelId: string | null;
+  let capabilityScores: Record<string, number> | undefined;
+  let taskRequirements: Partial<Record<string, number>> | undefined;
+  let selectionMethod: "tier-only" | "capability-scored" = "tier-only";
+  if (useCapabilityScoring) {
+    const result = findModelForTierWithCapability(
+      requestedTier, routingConfig, availableModelIds,
+      routingConfig.cross_provider !== false, unitType, metadata,
+    );
+    targetModelId = result.modelId;
+    capabilityScores = Object.keys(result.scores).length > 0 ? result.scores : undefined;
+    taskRequirements = Object.keys(result.requirements).length > 0 ? result.requirements : undefined;
+    selectionMethod = capabilityScores ? "capability-scored" : "tier-only";
+  } else {
+    targetModelId = findModelForTier(
+      requestedTier, routingConfig, availableModelIds,
+      routingConfig.cross_provider !== false,
+    );
+  }
   if (!targetModelId) {
-    // No suitable model found — use configured primary
     return {
       modelId: configuredPrimary,
       fallbacks: phaseConfig.fallbacks,
       tier: requestedTier,
       wasDowngraded: false,
       reason: `no ${requestedTier}-tier model available`,
+      selectionMethod,
     };
   }
-  // Build fallback chain: [downgraded_model, ...configured_fallbacks, configured_primary]
   const fallbacks = [
     ...phaseConfig.fallbacks.filter(f => f !== targetModelId),
     configuredPrimary,
@@ -205,6 +314,9 @@ export function resolveModelForComplexity(
     tier: requestedTier,
     wasDowngraded: true,
     reason: classification.reason,
+    selectionMethod,
+    capabilityScores,
+    taskRequirements,
   };
 }
@@ -226,6 +338,7 @@ export function escalateTier(currentTier: ComplexityTier): ComplexityTier | null
 export function defaultRoutingConfig(): DynamicRoutingConfig {
   return {
     enabled: true,
+    capability_routing: false,
     escalate_on_failure: true,
     budget_pressure: true,
     cross_provider: true,
@@ -298,6 +411,56 @@ function findModelForTier(
   return candidates[0] ?? null;
 }
+function findModelForTierWithCapability(
+  tier: ComplexityTier,
+  config: DynamicRoutingConfig,
+  availableModelIds: string[],
+  crossProvider: boolean,
+  unitType: string,
+  metadata?: { tags?: string[]; complexityKeywords?: string[]; fileCount?: number; estimatedLines?: number },
+): { modelId: string | null; scores: Record<string, number>; requirements: Partial<Record<string, number>> } {
+  const explicitModel = config.tier_models?.[tier];
+  if (explicitModel) {
+    const match = availableModelIds.find(id => {
+      const bareAvail = id.includes("/") ? id.split("/").pop()! : id;
+      const bareExplicit = explicitModel.includes("/") ? explicitModel.split("/").pop()! : explicitModel;
+      return bareAvail === bareExplicit || id === explicitModel;
+    });
+    if (match) return { modelId: match, scores: {}, requirements: {} };
+  }
+  const requirements = computeTaskRequirements(unitType, metadata);
+  const candidates = availableModelIds.filter(id => getModelTier(id) === tier);
+  if (candidates.length === 0) return { modelId: null, scores: {}, requirements };
+  const scores: Record<string, number> = {};
+  for (const id of candidates) {
+    const bareId = id.includes("/") ? id.split("/").pop()! : id;
+    const profile = getModelProfile(bareId);
+    scores[id] = scoreModel(profile, requirements);
+  }
+  candidates.sort((a, b) => {
+    const scoreDiff = scores[b] - scores[a];
+    if (Math.abs(scoreDiff) > 2) return scoreDiff;
+    if (crossProvider) {
+      const costDiff = getModelCost(a) - getModelCost(b);
+      if (costDiff !== 0) return costDiff;
+    }
+    return a.localeCompare(b);
+  });
+  return { modelId: candidates[0], scores, requirements };
+}
+function getModelProfile(bareId: string): ModelCapabilities {
+  if (MODEL_CAPABILITY_PROFILES[bareId]) return MODEL_CAPABILITY_PROFILES[bareId];
+  for (const [knownId, profile] of Object.entries(MODEL_CAPABILITY_PROFILES)) {
+    if (bareId.includes(knownId) || knownId.includes(bareId)) return profile;
+  }
+  return { coding: 50, debugging: 50, research: 50, reasoning: 50, speed: 50, longContext: 50, instruction: 50 };
+}
 function getModelCost(modelId: string): number {
   const bareId = modelId.includes("/") ? modelId.split("/").pop()! : modelId;

package/src/resources/extensions/gsd/phase-anchor.ts ADDED Viewed

@@ -0,0 +1,71 @@
+/**
+ * Phase handoff anchors — compact structured summaries written between
+ * GSD auto-mode phases so downstream agents inherit decisions, blockers,
+ * and intent without re-inferring from scratch.
+ */
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { gsdRoot } from "./paths.js";
+export interface PhaseAnchor {
+  phase: string;
+  milestoneId: string;
+  generatedAt: string;
+  intent: string;
+  decisions: string[];
+  blockers: string[];
+  nextSteps: string[];
+}
+function anchorsDir(basePath: string, milestoneId: string): string {
+  return join(gsdRoot(basePath), "milestones", milestoneId, "anchors");
+}
+function anchorPath(basePath: string, milestoneId: string, phase: string): string {
+  return join(anchorsDir(basePath, milestoneId), `${phase}.json`);
+}
+export function writePhaseAnchor(basePath: string, milestoneId: string, anchor: PhaseAnchor): void {
+  const dir = anchorsDir(basePath, milestoneId);
+  if (!existsSync(dir)) {
+    mkdirSync(dir, { recursive: true });
+  }
+  writeFileSync(anchorPath(basePath, milestoneId, anchor.phase), JSON.stringify(anchor, null, 2), "utf-8");
+}
+export function readPhaseAnchor(basePath: string, milestoneId: string, phase: string): PhaseAnchor | null {
+  const path = anchorPath(basePath, milestoneId, phase);
+  if (!existsSync(path)) return null;
+  try {
+    return JSON.parse(readFileSync(path, "utf-8")) as PhaseAnchor;
+  } catch {
+    return null;
+  }
+}
+export function formatAnchorForPrompt(anchor: PhaseAnchor): string {
+  const lines: string[] = [
+    `## Handoff from ${anchor.phase}`,
+    "",
+    `**Intent:** ${anchor.intent}`,
+  ];
+  if (anchor.decisions.length > 0) {
+    lines.push("", "**Decisions:**");
+    for (const d of anchor.decisions) lines.push(`- ${d}`);
+  }
+  if (anchor.blockers.length > 0) {
+    lines.push("", "**Blockers:**");
+    for (const b of anchor.blockers) lines.push(`- ${b}`);
+  }
+  if (anchor.nextSteps.length > 0) {
+    lines.push("", "**Next steps:**");
+    for (const s of anchor.nextSteps) lines.push(`- ${s}`);
+  }
+  lines.push("", "---");
+  return lines.join("\n");
+}

package/src/resources/extensions/gsd/preferences-types.ts CHANGED Viewed

@@ -21,6 +21,13 @@ import type {
   GateEvaluationConfig,
 } from "./types.js";
 import type { DynamicRoutingConfig } from "./model-router.js";
+export interface ContextManagementConfig {
+  observation_masking?: boolean;          // default: true
+  observation_mask_turns?: number;        // default: 8, range: 1-50
+  compaction_threshold_percent?: number;  // default: 0.70, range: 0.5-0.95
+  tool_result_max_chars?: number;         // default: 800, range: 200-10000
+}
 import type { GitHubSyncConfig } from "../github-sync/types.js";
 // ─── Workflow Modes ──────────────────────────────────────────────────────────
@@ -94,6 +101,7 @@ export const KNOWN_PREFERENCE_KEYS = new Set<string>([
   "forensics_dedup",
   "show_token_cost",
   "stale_commit_threshold_minutes",
+  "context_management",
   "experimental",
 ]);
@@ -227,6 +235,7 @@ export interface GSDPreferences {
   post_unit_hooks?: PostUnitHookConfig[];
   pre_dispatch_hooks?: PreDispatchHookConfig[];
   dynamic_routing?: DynamicRoutingConfig;
+  context_management?: ContextManagementConfig;
   token_profile?: TokenProfile;
   phases?: PhaseSkipPreferences;
   auto_visualize?: boolean;

package/src/resources/extensions/gsd/preferences-validation.ts CHANGED Viewed

@@ -428,6 +428,10 @@ export function validatePreferences(preferences: GSDPreferences): {
         if (typeof dr.hooks === "boolean") validDr.hooks = dr.hooks;
         else errors.push("dynamic_routing.hooks must be a boolean");
       }
+      if (dr.capability_routing !== undefined) {
+        if (typeof dr.capability_routing === "boolean") validDr.capability_routing = dr.capability_routing;
+        else errors.push("dynamic_routing.capability_routing must be a boolean");
+      }
       if (dr.tier_models !== undefined) {
         if (typeof dr.tier_models === "object" && dr.tier_models !== null) {
           const tm = dr.tier_models as Record<string, unknown>;
@@ -452,6 +456,40 @@ export function validatePreferences(preferences: GSDPreferences): {
     }
   }
+  // ─── Context Management ──────────────────────────────────────────────
+  if (preferences.context_management !== undefined) {
+    if (typeof preferences.context_management === "object" && preferences.context_management !== null) {
+      const cm = preferences.context_management as unknown as Record<string, unknown>;
+      const validCm: Record<string, unknown> = {};
+      if (cm.observation_masking !== undefined) {
+        if (typeof cm.observation_masking === "boolean") validCm.observation_masking = cm.observation_masking;
+        else errors.push("context_management.observation_masking must be a boolean");
+      }
+      if (cm.observation_mask_turns !== undefined) {
+        const turns = cm.observation_mask_turns;
+        if (typeof turns === "number" && turns >= 1 && turns <= 50) validCm.observation_mask_turns = turns;
+        else errors.push("context_management.observation_mask_turns must be a number between 1 and 50");
+      }
+      if (cm.compaction_threshold_percent !== undefined) {
+        const pct = cm.compaction_threshold_percent;
+        if (typeof pct === "number" && pct >= 0.5 && pct <= 0.95) validCm.compaction_threshold_percent = pct;
+        else errors.push("context_management.compaction_threshold_percent must be a number between 0.5 and 0.95");
+      }
+      if (cm.tool_result_max_chars !== undefined) {
+        const chars = cm.tool_result_max_chars;
+        if (typeof chars === "number" && chars >= 200 && chars <= 10000) validCm.tool_result_max_chars = chars;
+        else errors.push("context_management.tool_result_max_chars must be a number between 200 and 10000");
+      }
+      if (Object.keys(validCm).length > 0) {
+        validated.context_management = validCm as any;
+      }
+    } else {
+      errors.push("context_management must be an object");
+    }
+  }
   // ─── Parallel Config ────────────────────────────────────────────────────
   if (preferences.parallel && typeof preferences.parallel === "object") {
     const p = preferences.parallel as unknown as Record<string, unknown>;

package/src/resources/extensions/gsd/prompts/execute-task.md CHANGED Viewed

@@ -12,6 +12,8 @@ A researcher explored the codebase and a planner decomposed the work — you are
 {{runtimeContext}}
+{{phaseAnchorSection}}
 {{resumeSection}}
 {{carryForwardSection}}

package/src/resources/extensions/gsd/prompts/rethink.md CHANGED Viewed

@@ -45,6 +45,13 @@ reason: "<reason>"
 ### Unpark a milestone
 Remove the `{ID}-PARKED.md` file from the milestone directory to reactivate it.
+### Skip a slice
+Mark a slice as skipped so auto-mode advances past it without executing. Use the `gsd_skip_slice` tool:
+```
+gsd_skip_slice({ milestone_id: "M003", slice_id: "S02", reason: "Descoped — feature moved to M005" })
+```
+Skipped slices are treated as closed by the state machine (like "complete" but distinct). Use when a slice is no longer needed or has been superseded. The slice data is preserved for reference.
 ### Discard a milestone
 **Permanently** delete a milestone directory and prune it from QUEUE-ORDER.json. **Always confirm with the user before discarding.** Warn explicitly if the milestone has completed work.

package/src/resources/extensions/gsd/prompts/triage-captures.md CHANGED Viewed

@@ -20,6 +20,8 @@ The user captured thoughts during execution using `/gsd capture`. Your job is to
 For each capture, classify it as one of:
+- **stop**: User directive to halt auto-mode immediately. Use when the user says "stop", "halt", "abort", "don't continue", "pause", or otherwise wants execution to cease. Auto-mode will pause after the current unit completes. Examples: "stop running", "halt execution", "don't continue".
+- **backtrack**: User directive to abandon the current milestone and return to a previous one. The user believes earlier milestones missed critical features or need rework. Include the target milestone ID (e.g., M003) in the Resolution field. Auto-mode will pause and write a regression marker. Examples: "restart from M003", "go back to milestone 3", "M004 and M005 failed, restart from M003".
 - **quick-task**: Small, self-contained, no downstream impact. Can be done in minutes without modifying the plan. Examples: fix a typo, add a missing import, tweak a config value.
 - **inject**: Belongs in the current slice but wasn't planned. Needs a new task added to the slice plan. Examples: add error handling to a module being built, add a missing test case for current work.
 - **defer**: Belongs in a future slice or milestone. Not urgent for current work. Examples: performance optimization, feature that depends on unbuilt infrastructure, nice-to-have enhancement.
@@ -28,10 +30,12 @@ For each capture, classify it as one of:
 ## Decision Guidelines
+- **ALWAYS classify as stop** when the user explicitly says "stop", "halt", "abort", or "don't continue". Never shoe-horn a stop directive into "replan" or "note".
+- **ALWAYS classify as backtrack** when the user references returning to a previous milestone, restarting from an earlier point, or abandoning current milestone work. Include the target milestone ID in the Resolution field (e.g., "Backtrack to M003").
 - Prefer **quick-task** when the work is clearly small and self-contained.
 - Prefer **inject** over **replan** when only a new task is needed, not rewriting existing ones.
 - Prefer **defer** over **inject** when the work doesn't belong in the current slice's scope.
-- Use **replan** only when remaining incomplete tasks need to change — not just for adding work.
+- Use **replan** only when remaining incomplete tasks in the *current slice* need to change — not for cross-milestone issues.
 - Use **note** for observations that don't require action.
 - When unsure between quick-task and inject, consider: will this take more than 10 minutes? If yes, inject.
@@ -46,6 +50,7 @@ For each capture, classify it as one of:
    - If applicable, which files would be affected
    For captures classified as **note** or **defer**, auto-confirm without asking — these are low-impact.
+   For captures classified as **stop** or **backtrack**, auto-confirm without asking — these are urgent user directives that must be honored immediately.
    For captures classified as **quick-task**, **inject**, or **replan**, ask the user to confirm or choose a different classification.
 3. **Update** `.gsd/CAPTURES.md` — for each capture, update its section with the confirmed classification: