npm - gsd-pi - Versions diffs - 2.48.0-dev.ced2eca → 2.49.0-dev.9e177e9 - Mend

gsd-pi 2.48.0-dev.ced2eca → 2.49.0-dev.9e177e9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (249) hide show

package/src/resources/extensions/gsd/git-service.ts CHANGED Viewed

@@ -18,8 +18,8 @@ import { loadEffectiveGSDPreferences } from "./preferences.js";
 import {
   detectWorktreeName,
-  SLICE_BRANCH_RE,
 } from "./worktree.js";
+import { SLICE_BRANCH_RE, QUICK_BRANCH_RE, WORKFLOW_BRANCH_RE } from "./branch-patterns.js";
 import {
   nativeGetCurrentBranch,
   nativeDetectMainBranch,
@@ -243,8 +243,8 @@ export function readIntegrationBranch(basePath: string, milestoneId: string): st
  *
  * The file is committed immediately so the metadata is persisted in git.
  */
-/** Regex matching GSD quick-task branches: gsd/quick/<num>-<slug> */
-export const QUICK_BRANCH_RE = /^gsd\/quick\//;
+/** Re-export for backward compatibility — canonical definitions in branch-patterns.ts */
+export { QUICK_BRANCH_RE, WORKFLOW_BRANCH_RE } from "./branch-patterns.js";
 export function writeIntegrationBranch(
   basePath: string,
@@ -257,6 +257,10 @@ export function writeIntegrationBranch(
   // to their origin branch on completion. Recording one as the integration
   // target causes milestone merges to land on the wrong branch (#1293).
   if (QUICK_BRANCH_RE.test(branch)) return;
+  // Don't record workflow-template branches (hotfix, bugfix, spike, etc.) —
+  // same root cause as quick-task branches (#2498). All templates create
+  // gsd/<templateId>/<slug> branches that are ephemeral.
+  if (WORKFLOW_BRANCH_RE.test(branch)) return;
   // Validate
   if (!VALID_BRANCH_NAME.test(branch)) return;
   // Skip if already recorded with the same branch (idempotent across restarts).
@@ -441,11 +445,6 @@ export class GitServiceImpl {
     this._milestoneId = milestoneId;
   }
-  /** Convenience wrapper: run git in this repo's basePath. */
-  private git(args: string[], options: { allowFailure?: boolean; input?: string } = {}): string {
-    return runGit(this.basePath, args, options);
-  }
   /**
    * Smart staging: `git add -A` excluding GSD runtime paths via pathspec.
    * Falls back to plain `git add -A` if the exclusion pathspec fails.
@@ -604,11 +603,6 @@ export class GitServiceImpl {
     return nativeGetCurrentBranch(this.basePath);
   }
-  /** True if currently on a GSD slice branch. */
-  // ─── Branch Lifecycle ──────────────────────────────────────────────────
-  // ─── S05 Features ─────────────────────────────────────────────────────
   /**
    * Create a snapshot ref for the given label (typically a slice branch name).
    * Gated on prefs.snapshots === true. Ref path: refs/gsd/snapshots/<label>/<timestamp>
@@ -669,8 +663,6 @@ export class GitServiceImpl {
     }
   }
-  // ─── Merge ─────────────────────────────────────────────────────────────
 }
 // ─── Draft PR Creation ─────────────────────────────────────────────────────

package/src/resources/extensions/gsd/gsd-db.ts CHANGED Viewed

@@ -8,7 +8,7 @@
 import { createRequire } from "node:module";
 import { existsSync, copyFileSync, mkdirSync } from "node:fs";
 import { dirname } from "node:path";
-import type { Decision, Requirement } from "./types.js";
+import type { Decision, Requirement, GateRow, GateId, GateScope, GateStatus, GateVerdict } from "./types.js";
 import { GSDError, GSD_STALE_STATE } from "./errors.js";
 const _require = createRequire(import.meta.url);
@@ -149,7 +149,7 @@ function openRawDb(path: string): unknown {
   return new Database(path);
 }
-const SCHEMA_VERSION = 11;
+const SCHEMA_VERSION = 12;
 function initSchema(db: DbAdapter, fileBacked: boolean): void {
   if (fileBacked) db.exec("PRAGMA journal_mode=WAL");
@@ -355,6 +355,23 @@ function initSchema(db: DbAdapter, fileBacked: boolean): void {
       )
     `);
+    db.exec(`
+      CREATE TABLE IF NOT EXISTS quality_gates (
+        milestone_id TEXT NOT NULL,
+        slice_id TEXT NOT NULL,
+        gate_id TEXT NOT NULL,
+        scope TEXT NOT NULL DEFAULT 'slice',
+        task_id TEXT NOT NULL DEFAULT '',
+        status TEXT NOT NULL DEFAULT 'pending',
+        verdict TEXT NOT NULL DEFAULT '',
+        rationale TEXT NOT NULL DEFAULT '',
+        findings TEXT NOT NULL DEFAULT '',
+        evaluated_at TEXT DEFAULT NULL,
+        PRIMARY KEY (milestone_id, slice_id, gate_id, task_id),
+        FOREIGN KEY (milestone_id, slice_id) REFERENCES slices(milestone_id, id)
+      )
+    `);
     db.exec("CREATE INDEX IF NOT EXISTS idx_memories_active ON memories(superseded_by)");
     db.exec("CREATE INDEX IF NOT EXISTS idx_replan_history_milestone ON replan_history(milestone_id, created_at)");
@@ -637,6 +654,29 @@ function migrateSchema(db: DbAdapter): void {
       });
     }
+    if (currentVersion < 12) {
+      db.exec(`
+        CREATE TABLE IF NOT EXISTS quality_gates (
+          milestone_id TEXT NOT NULL,
+          slice_id TEXT NOT NULL,
+          gate_id TEXT NOT NULL,
+          scope TEXT NOT NULL DEFAULT 'slice',
+          task_id TEXT DEFAULT NULL,
+          status TEXT NOT NULL DEFAULT 'pending',
+          verdict TEXT NOT NULL DEFAULT '',
+          rationale TEXT NOT NULL DEFAULT '',
+          findings TEXT NOT NULL DEFAULT '',
+          evaluated_at TEXT DEFAULT NULL,
+          PRIMARY KEY (milestone_id, slice_id, gate_id, COALESCE(task_id, '')),
+          FOREIGN KEY (milestone_id, slice_id) REFERENCES slices(milestone_id, id)
+        )
+      `);
+      db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({
+        ":version": 12,
+        ":applied_at": new Date().toISOString(),
+      });
+    }
     db.exec("COMMIT");
   } catch (err) {
     db.exec("ROLLBACK");
@@ -1722,3 +1762,111 @@ export function getAssessment(path: string): Record<string, unknown> | null {
   ).get({ ":path": path });
   return row ?? null;
 }
+// ─── Quality Gates ───────────────────────────────────────────────────────
+function rowToGate(row: Record<string, unknown>): GateRow {
+  return {
+    milestone_id: row["milestone_id"] as string,
+    slice_id: row["slice_id"] as string,
+    gate_id: row["gate_id"] as GateId,
+    scope: row["scope"] as GateScope,
+    task_id: (row["task_id"] as string) ?? "",
+    status: row["status"] as GateStatus,
+    verdict: (row["verdict"] as GateVerdict) || "",
+    rationale: (row["rationale"] as string) || "",
+    findings: (row["findings"] as string) || "",
+    evaluated_at: (row["evaluated_at"] as string) ?? null,
+  };
+}
+export function insertGateRow(g: {
+  milestoneId: string;
+  sliceId: string;
+  gateId: GateId;
+  scope: GateScope;
+  taskId?: string | null;
+  status?: GateStatus;
+}): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `INSERT OR IGNORE INTO quality_gates (milestone_id, slice_id, gate_id, scope, task_id, status)
+     VALUES (:mid, :sid, :gid, :scope, :tid, :status)`,
+  ).run({
+    ":mid": g.milestoneId,
+    ":sid": g.sliceId,
+    ":gid": g.gateId,
+    ":scope": g.scope,
+    ":tid": g.taskId ?? "",
+    ":status": g.status ?? "pending",
+  });
+}
+export function saveGateResult(g: {
+  milestoneId: string;
+  sliceId: string;
+  gateId: string;
+  taskId?: string | null;
+  verdict: GateVerdict;
+  rationale: string;
+  findings: string;
+}): void {
+  if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
+  currentDb.prepare(
+    `UPDATE quality_gates
+     SET status = 'complete', verdict = :verdict, rationale = :rationale,
+         findings = :findings, evaluated_at = :evaluated_at
+     WHERE milestone_id = :mid AND slice_id = :sid AND gate_id = :gid
+       AND task_id = :tid`,
+  ).run({
+    ":mid": g.milestoneId,
+    ":sid": g.sliceId,
+    ":gid": g.gateId,
+    ":tid": g.taskId ?? "",
+    ":verdict": g.verdict,
+    ":rationale": g.rationale,
+    ":findings": g.findings,
+    ":evaluated_at": new Date().toISOString(),
+  });
+}
+export function getPendingGates(milestoneId: string, sliceId: string, scope?: GateScope): GateRow[] {
+  if (!currentDb) return [];
+  const sql = scope
+    ? `SELECT * FROM quality_gates WHERE milestone_id = :mid AND slice_id = :sid AND scope = :scope AND status = 'pending'`
+    : `SELECT * FROM quality_gates WHERE milestone_id = :mid AND slice_id = :sid AND status = 'pending'`;
+  const params: Record<string, unknown> = { ":mid": milestoneId, ":sid": sliceId };
+  if (scope) params[":scope"] = scope;
+  return currentDb.prepare(sql).all(params).map(rowToGate);
+}
+export function getGateResults(milestoneId: string, sliceId: string, scope?: GateScope): GateRow[] {
+  if (!currentDb) return [];
+  const sql = scope
+    ? `SELECT * FROM quality_gates WHERE milestone_id = :mid AND slice_id = :sid AND scope = :scope`
+    : `SELECT * FROM quality_gates WHERE milestone_id = :mid AND slice_id = :sid`;
+  const params: Record<string, unknown> = { ":mid": milestoneId, ":sid": sliceId };
+  if (scope) params[":scope"] = scope;
+  return currentDb.prepare(sql).all(params).map(rowToGate);
+}
+export function markAllGatesOmitted(milestoneId: string, sliceId: string): void {
+  if (!currentDb) return;
+  currentDb.prepare(
+    `UPDATE quality_gates SET status = 'omitted', verdict = 'omitted', evaluated_at = :now
+     WHERE milestone_id = :mid AND slice_id = :sid AND status = 'pending'`,
+  ).run({
+    ":mid": milestoneId,
+    ":sid": sliceId,
+    ":now": new Date().toISOString(),
+  });
+}
+export function getPendingSliceGateCount(milestoneId: string, sliceId: string): number {
+  if (!currentDb) return 0;
+  const row = currentDb.prepare(
+    `SELECT COUNT(*) as cnt FROM quality_gates
+     WHERE milestone_id = :mid AND slice_id = :sid AND scope = 'slice' AND status = 'pending'`,
+  ).get({ ":mid": milestoneId, ":sid": sliceId });
+  return row ? (row["cnt"] as number) : 0;
+}

package/src/resources/extensions/gsd/guided-flow-queue.ts CHANGED Viewed

@@ -244,12 +244,22 @@ export async function buildExistingMilestonesContext(
     }
   }
-  // For each milestone, include context and status
+  // For each milestone, include context and status.
+  // Completed milestones get a compact summary line only — loading their full
+  // CONTEXT.md + SUMMARY.md files is expensive and triggers 429 rate limits on
+  // projects with many completed milestones (#2379).
   for (const mid of milestoneIds) {
     const registryEntry = state.registry.find(m => m.id === mid);
     const status = registryEntry?.status ?? "unknown";
     const title = registryEntry?.title ?? mid;
+    // Completed milestones: emit a one-liner — the LLM only needs to know
+    // they exist for dedup/dependency purposes, not their full content.
+    if (status === "complete") {
+      sections.push(`### ${mid}: ${title}\n**Status:** complete`);
+      continue;
+    }
     const parts: string[] = [];
     parts.push(`### ${mid}: ${title}\n**Status:** ${status}`);
@@ -271,17 +281,6 @@ export async function buildExistingMilestonesContext(
       }
     }
-    // For completed milestones, include the summary if it exists
-    if (status === "complete") {
-      const summaryFile = resolveMilestoneFile(basePath, mid, "SUMMARY");
-      if (summaryFile) {
-        const content = await loadFile(summaryFile);
-        if (content) {
-          parts.push(`\n**Summary:**\n${content.trim()}`);
-        }
-      }
-    }
     // For active/pending/parked milestones, include the roadmap if it exists
     // (shows what's planned but not yet built)
     if (status === "active" || status === "pending" || status === "parked") {

package/src/resources/extensions/gsd/markdown-renderer.ts CHANGED Viewed

@@ -20,8 +20,10 @@ import {
   getSlice,
   getArtifact,
   insertArtifact,
+  getGateResults,
 } from "./gsd-db.js";
 import type { MilestoneRow, SliceRow, TaskRow, ArtifactRow } from "./gsd-db.js";
+import type { GateRow } from "./types.js";
 import {
   resolveMilestoneFile,
   resolveSliceFile,
@@ -188,7 +190,7 @@ function renderRoadmapMarkdown(milestone: MilestoneRow, slices: SliceRow[]): str
   return `${lines.join("\n").trimEnd()}\n`;
 }
-function renderTaskPlanMarkdown(task: TaskRow): string {
+function renderTaskPlanMarkdown(task: TaskRow, taskGates: GateRow[] = []): string {
   const estimatedSteps = Math.max(1, task.description.trim().split(/\n+/).filter(Boolean).length || 1);
   const estimatedFiles = task.files.length > 0
     ? task.files.length
@@ -251,10 +253,22 @@ function renderTaskPlanMarkdown(task: TaskRow): string {
     lines.push("");
   }
+  // ── Quality Gate Sections (Q5/Q6/Q7) ──────────────────────────────────
+  const gateLabels: Record<string, string> = { Q5: "Failure Modes", Q6: "Load Profile", Q7: "Negative Tests" };
+  for (const [gid, label] of Object.entries(gateLabels)) {
+    const gate = taskGates.find(g => g.gate_id === gid && g.status === "complete");
+    if (gate && gate.verdict !== "omitted") {
+      lines.push(`## ${label}`);
+      lines.push("");
+      lines.push(gate.findings.trim() || `- **Verdict:** ${gate.verdict}\n- **Rationale:** ${gate.rationale}`);
+      lines.push("");
+    }
+  }
   return `${lines.join("\n").trimEnd()}\n`;
 }
-function renderSlicePlanMarkdown(slice: SliceRow, tasks: TaskRow[]): string {
+function renderSlicePlanMarkdown(slice: SliceRow, tasks: TaskRow[], gates: GateRow[] = []): string {
   const lines: string[] = [];
   lines.push(`# ${slice.id}: ${slice.title || slice.id}`);
@@ -274,6 +288,23 @@ function renderSlicePlanMarkdown(slice: SliceRow, tasks: TaskRow[]): string {
   }
   lines.push("");
+  // ── Quality Gate Sections (Q3/Q4) ────────────────────────────────────
+  const q3 = gates.find(g => g.gate_id === "Q3" && g.status === "complete");
+  if (q3 && q3.verdict !== "omitted") {
+    lines.push("## Threat Surface");
+    lines.push("");
+    lines.push(q3.findings.trim() || `- **Verdict:** ${q3.verdict}\n- **Rationale:** ${q3.rationale}`);
+    lines.push("");
+  }
+  const q4 = gates.find(g => g.gate_id === "Q4" && g.status === "complete");
+  if (q4 && q4.verdict !== "omitted") {
+    lines.push("## Requirement Impact");
+    lines.push("");
+    lines.push(q4.findings.trim() || `- **Verdict:** ${q4.verdict}\n- **Rationale:** ${q4.rationale}`);
+    lines.push("");
+  }
   if (slice.proof_level.trim()) {
     lines.push("## Proof Level");
     lines.push("");
@@ -354,7 +385,8 @@ export async function renderPlanFromDb(
   const absPath = resolveSliceFile(basePath, milestoneId, sliceId, "PLAN")
     ?? join(slicePath, `${sliceId}-PLAN.md`);
   const artifactPath = toArtifactPath(absPath, basePath);
-  const content = renderSlicePlanMarkdown(slice, tasks);
+  const sliceGates = getGateResults(milestoneId, sliceId, "slice");
+  const content = renderSlicePlanMarkdown(slice, tasks, sliceGates);
   await writeAndStore(absPath, artifactPath, content, {
     artifact_type: "PLAN",
@@ -387,7 +419,8 @@ export async function renderTaskPlanFromDb(
   mkdirSync(tasksDir, { recursive: true });
   const absPath = join(tasksDir, buildTaskFileName(taskId, "PLAN"));
   const artifactPath = toArtifactPath(absPath, basePath);
-  const content = task.full_plan_md.trim() ? task.full_plan_md : renderTaskPlanMarkdown(task);
+  const taskGates = getGateResults(milestoneId, sliceId, "task").filter(g => g.task_id === taskId);
+  const content = task.full_plan_md.trim() ? task.full_plan_md : renderTaskPlanMarkdown(task, taskGates);
   await writeAndStore(absPath, artifactPath, content, {
     artifact_type: "PLAN",

package/src/resources/extensions/gsd/preferences-types.ts CHANGED Viewed

@@ -18,6 +18,7 @@ import type {
   ParallelConfig,
   ContextSelectionMode,
   ReactiveExecutionConfig,
+  GateEvaluationConfig,
 } from "./types.js";
 import type { DynamicRoutingConfig } from "./model-router.js";
 import type { GitHubSyncConfig } from "../github-sync/types.js";
@@ -87,6 +88,7 @@ export const KNOWN_PREFERENCE_KEYS = new Set<string>([
   "context_selection",
   "widget_mode",
   "reactive_execution",
+  "gate_evaluation",
   "github",
   "service_tier",
   "forensics_dedup",
@@ -96,7 +98,7 @@ export const KNOWN_PREFERENCE_KEYS = new Set<string>([
 /** Canonical list of all dispatch unit types. */
 export const KNOWN_UNIT_TYPES = [
   "research-milestone", "plan-milestone", "research-slice", "plan-slice",
-  "execute-task", "reactive-execute", "complete-slice", "replan-slice", "reassess-roadmap",
+  "execute-task", "reactive-execute", "gate-evaluate", "complete-slice", "replan-slice", "reassess-roadmap",
   "run-uat", "complete-milestone",
 ] as const;
 export type UnitType = (typeof KNOWN_UNIT_TYPES)[number];
@@ -221,6 +223,8 @@ export interface GSDPreferences {
   widget_mode?: "full" | "small" | "min" | "off";
   /** Reactive (graph-derived parallel) task execution within slices. Disabled by default. */
   reactive_execution?: ReactiveExecutionConfig;
+  /** Parallel quality gate evaluation during slice planning. Disabled by default. */
+  gate_evaluation?: GateEvaluationConfig;
   /** GitHub sync configuration. Opt-in: syncs GSD events to GitHub Issues, Milestones, and PRs. */
   github?: GitHubSyncConfig;
   /** OpenAI service tier preference. "priority" = 2x cost, faster. "flex" = 0.5x cost, slower. Only affects gpt-5.4 models. */

package/src/resources/extensions/gsd/preferences-validation.ts CHANGED Viewed

@@ -538,6 +538,43 @@ export function validatePreferences(preferences: GSDPreferences): {
     }
   }
+  // ─── Gate Evaluation ─────────────────────────────────────────────────────
+  if (preferences.gate_evaluation !== undefined) {
+    if (typeof preferences.gate_evaluation === "object" && preferences.gate_evaluation !== null) {
+      const ge = preferences.gate_evaluation as unknown as Record<string, unknown>;
+      const validGe: Record<string, unknown> = {};
+      if (ge.enabled !== undefined) {
+        if (typeof ge.enabled === "boolean") validGe.enabled = ge.enabled;
+        else errors.push("gate_evaluation.enabled must be a boolean");
+      }
+      if (ge.slice_gates !== undefined) {
+        if (Array.isArray(ge.slice_gates) && ge.slice_gates.every((g: unknown) => typeof g === "string")) {
+          validGe.slice_gates = ge.slice_gates;
+        } else {
+          errors.push("gate_evaluation.slice_gates must be an array of strings");
+        }
+      }
+      if (ge.task_gates !== undefined) {
+        if (typeof ge.task_gates === "boolean") validGe.task_gates = ge.task_gates;
+        else errors.push("gate_evaluation.task_gates must be a boolean");
+      }
+      const knownGeKeys = new Set(["enabled", "slice_gates", "task_gates"]);
+      for (const key of Object.keys(ge)) {
+        if (!knownGeKeys.has(key)) {
+          warnings.push(`unknown gate_evaluation key "${key}" — ignored`);
+        }
+      }
+      if (Object.keys(validGe).length > 0) {
+        validated.gate_evaluation = validGe as unknown as import("./types.js").GateEvaluationConfig;
+      }
+    } else {
+      errors.push("gate_evaluation must be an object");
+    }
+  }
   // ─── Verification Preferences ───────────────────────────────────────────
   if (preferences.verification_commands !== undefined) {
     if (Array.isArray(preferences.verification_commands)) {

package/src/resources/extensions/gsd/prompts/complete-milestone.md CHANGED Viewed

@@ -20,11 +20,13 @@ Then:
 3. **Verify code changes exist.** Run `git diff --stat HEAD $(git merge-base HEAD main) -- ':!.gsd/'` (or the equivalent for the integration branch). If no non-`.gsd/` files appear in the diff, the milestone produced only planning artifacts and no actual code. Record this as a **verification failure**.
 4. Verify each **success criterion** from the milestone definition in `{{roadmapPath}}`. For each criterion, confirm it was met with specific evidence from slice summaries, test results, or observable behavior. Record any criterion that was NOT met as a **verification failure**.
 5. Verify the milestone's **definition of done** — all slices are `[x]`, all slice summaries exist, and any cross-slice integration points work correctly. Record any unmet items as a **verification failure**.
-6. Validate **requirement status transitions**. For each requirement that changed status during this milestone, confirm the transition is supported by evidence. Requirements can move between Active, Validated, Deferred, Blocked, or Out of Scope — but only with proof.
+6. If the roadmap includes a **Horizontal Checklist**, verify each item was addressed during the milestone. Note unchecked items in the milestone summary.
+7. Fill the **Decision Re-evaluation** table in the milestone summary. For each key decision from `.gsd/DECISIONS.md` made during this milestone, evaluate whether it is still valid given what was actually built. Flag decisions that should be revisited next milestone.
+8. Validate **requirement status transitions**. For each requirement that changed status during this milestone, confirm the transition is supported by evidence. Requirements can move between Active, Validated, Deferred, Blocked, or Out of Scope — but only with proof.
 ### Verification Gate — STOP if verification failed
-**If ANY verification failure was recorded in steps 3, 4, or 5, you MUST follow the failure path below. Do NOT proceed to step 7.**
+**If ANY verification failure was recorded in steps 3, 4, or 5, you MUST follow the failure path below. Do NOT proceed to step 9.**
 **Failure path** (verification failed):
 - Do NOT call `gsd_complete_milestone` — the milestone must not be marked as complete.
@@ -33,13 +35,30 @@ Then:
 - Write a clear summary of what failed and why to help the next attempt.
 - Say: "Milestone {{milestoneId}} verification FAILED — not complete." and stop.
-**Success path** (all verifications passed — continue with steps 7–11):
+**Success path** (all verifications passed — continue with steps 9–13):
-7. **Persist completion through `gsd_complete_milestone`.** Call it with: `milestoneId`, `title`, `oneLiner`, `narrative`, `successCriteriaResults`, `definitionOfDoneResults`, `requirementOutcomes`, `keyDecisions`, `keyFiles`, `lessonsLearned`, `followUps`, `deviations`, `verificationPassed: true`. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding.
-8. For each requirement whose status changed in step 6, call `gsd_requirement_update` with the requirement ID and updated `status` and `validation` fields — the tool regenerates `.gsd/REQUIREMENTS.md` automatically.
-9. Update `.gsd/PROJECT.md` to reflect milestone completion and current project state.
-10. Review all slice summaries for cross-cutting lessons, patterns, or gotchas that emerged during this milestone. Append any non-obvious, reusable insights to `.gsd/KNOWLEDGE.md`.
-11. Do not commit manually — the system auto-commits your changes after this unit completes.
+9. **Persist completion through `gsd_complete_milestone`.** Call it with the parameters below. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding.
+   **Required parameters:**
+   - `milestoneId` (string) — Milestone ID (e.g. M001)
+   - `title` (string) — Milestone title
+   - `oneLiner` (string) — One-sentence summary of what the milestone achieved
+   - `narrative` (string) — Detailed narrative of what happened during the milestone
+   - `successCriteriaResults` (string) — Markdown detailing how each success criterion was met or not met
+   - `definitionOfDoneResults` (string) — Markdown detailing how each definition-of-done item was met
+   - `requirementOutcomes` (string) — Markdown detailing requirement status transitions with evidence
+   - `keyDecisions` (array of strings) — Key architectural/pattern decisions made during the milestone
+   - `keyFiles` (array of strings) — Key files created or modified during the milestone
+   - `lessonsLearned` (array of strings) — Lessons learned during the milestone
+   - `verificationPassed` (boolean) — Must be `true` — confirms that code change verification, success criteria, and definition of done checks all passed before completion
+   **Optional parameters:**
+   - `followUps` (string) — Follow-up items for future milestones
+   - `deviations` (string) — Deviations from the original plan
+10. For each requirement whose status changed in step 8, call `gsd_requirement_update` with the requirement ID and updated `status` and `validation` fields — the tool regenerates `.gsd/REQUIREMENTS.md` automatically.
+11. Update `.gsd/PROJECT.md` to reflect milestone completion and current project state.
+12. Review all slice summaries for cross-cutting lessons, patterns, or gotchas that emerged during this milestone. Append any non-obvious, reusable insights to `.gsd/KNOWLEDGE.md`.
+13. Do not commit manually — the system auto-commits your changes after this unit completes.
 - Say: "Milestone {{milestoneId}} complete."
 **Important:** Do NOT skip the code change verification, success criteria, or definition of done verification (steps 3-5). The milestone summary must reflect actual verified outcomes, not assumed success. Verification failures BLOCK completion — there is no override. The milestone stays in its current state until issues are resolved and verification is re-run.

package/src/resources/extensions/gsd/prompts/complete-slice.md CHANGED Viewed

@@ -23,14 +23,15 @@ Then:
 2. {{skillActivation}}
 3. Run all slice-level verification checks defined in the slice plan. All must pass before marking the slice done. If any fail, fix them first.
 4. If the slice plan includes observability/diagnostic surfaces, confirm they work. Skip this for simple slices that don't have observability sections.
-5. If this slice produced evidence that a requirement changed status (Active → Validated, Active → Deferred, etc.), call `gsd_save_decision` with scope="requirement", decision="{requirement-id}", choice="{new-status}", rationale="{evidence}". Do NOT write `.gsd/REQUIREMENTS.md` directly — the engine renders it from the database.
-6. Write `{{sliceSummaryPath}}` (compress all task summaries).
-7. Write `{{sliceUatPath}}` — a concrete UAT script with real test cases derived from the slice plan and task summaries. Include preconditions, numbered steps with expected outcomes, and edge cases. This must NOT be a placeholder or generic template — tailor every test case to what this slice actually built.
-8. Review task summaries for `key_decisions`. Append any significant decisions to `.gsd/DECISIONS.md` if missing.
-9. Review task summaries for patterns, gotchas, or non-obvious lessons learned. If any would save future agents from repeating investigation or hitting the same issues, append them to `.gsd/KNOWLEDGE.md`. Only add entries that are genuinely useful — don't pad with obvious observations.
-10. Call `gsd_complete_slice` with milestone_id, slice_id, the slice summary, and the UAT result. Do NOT manually mark the roadmap checkbox — the tool writes to the DB and renders the ROADMAP.md projection automatically.
-11. Do not run git commands — the system commits your changes and handles any merge after this unit succeeds.
-12. Update `.gsd/PROJECT.md` if it exists — refresh current state if needed.
+5. If the slice involved runtime behavior, fill the **Operational Readiness** section (Q8) in the slice summary: health signal, failure signal, recovery procedure, and monitoring gaps. Omit entirely for simple slices with no runtime concerns.
+6. If this slice produced evidence that a requirement changed status (Active → Validated, Active → Deferred, etc.), call `gsd_save_decision` with scope="requirement", decision="{requirement-id}", choice="{new-status}", rationale="{evidence}". Do NOT write `.gsd/REQUIREMENTS.md` directly — the engine renders it from the database.
+7. Write `{{sliceSummaryPath}}` (compress all task summaries).
+8. Write `{{sliceUatPath}}` — a concrete UAT script with real test cases derived from the slice plan and task summaries. Include preconditions, numbered steps with expected outcomes, and edge cases. This must NOT be a placeholder or generic template — tailor every test case to what this slice actually built.
+9. Review task summaries for `key_decisions`. Append any significant decisions to `.gsd/DECISIONS.md` if missing.
+10. Review task summaries for patterns, gotchas, or non-obvious lessons learned. If any would save future agents from repeating investigation or hitting the same issues, append them to `.gsd/KNOWLEDGE.md`. Only add entries that are genuinely useful — don't pad with obvious observations.
+11. Call `gsd_complete_slice` with milestone_id, slice_id, the slice summary, and the UAT result. Do NOT manually mark the roadmap checkbox — the tool writes to the DB and renders the ROADMAP.md projection automatically.
+12. Do not run git commands — the system commits your changes and handles any merge after this unit succeeds.
+13. Update `.gsd/PROJECT.md` if it exists — refresh current state if needed.
 **You MUST call `gsd_complete_slice` with the slice summary and UAT content before finishing. The tool persists to both DB and disk and renders `{{sliceSummaryPath}}` and `{{sliceUatPath}}` automatically.**

package/src/resources/extensions/gsd/prompts/execute-task.md CHANGED Viewed

@@ -38,18 +38,21 @@ Then:
    - Correct: `command > /dev/null 2>&1 &` or `nohup command > /dev/null 2>&1 &`
    - Example: `python -m http.server 8080 > /dev/null 2>&1 &` (NOT `python -m http.server 8080 &`)
    - Preferred: use the `bg_shell` tool if available — it manages process lifecycle correctly without stream-inheritance issues
-6. Verify must-haves are met by running concrete checks (tests, commands, observable behaviors)
-7. Run the slice-level verification checks defined in the slice plan's Verification section. Track which pass. On the final task of the slice, all must pass before marking done. On intermediate tasks, partial passes are expected — note which ones pass in the summary.
-8. After the verification gate runs (you'll see gate results in stderr/notify output), populate the `## Verification Evidence` table in your task summary with the check results. Use the `formatEvidenceTable` format: one row per check with command, exit code, verdict (✅ pass / ❌ fail), and duration. If no verification commands were discovered, note that in the section.
-9. If the task touches UI, browser flows, DOM behavior, or user-visible web state:
+6. If the task plan includes a **Failure Modes** section (Q5), implement the error/timeout/malformed handling specified. Verify each dependency's failure path is handled. Skip if the section is absent.
+7. If the task plan includes a **Load Profile** section (Q6), implement protections for the identified 10x breakpoint (connection pooling, rate limiting, pagination, etc.). Skip if absent.
+8. If the task plan includes a **Negative Tests** section (Q7), write the specified negative test cases alongside the happy-path tests — malformed inputs, error paths, and boundary conditions. Skip if absent.
+9. Verify must-haves are met by running concrete checks (tests, commands, observable behaviors)
+10. Run the slice-level verification checks defined in the slice plan's Verification section. Track which pass. On the final task of the slice, all must pass before marking done. On intermediate tasks, partial passes are expected — note which ones pass in the summary.
+11. After the verification gate runs (you'll see gate results in stderr/notify output), populate the `## Verification Evidence` table in your task summary with the check results. Use the `formatEvidenceTable` format: one row per check with command, exit code, verdict (✅ pass / ❌ fail), and duration. If no verification commands were discovered, note that in the section.
+12. If the task touches UI, browser flows, DOM behavior, or user-visible web state:
    - exercise the real flow in the browser
    - prefer `browser_batch` when the next few actions are obvious and sequential
    - prefer `browser_assert` for explicit pass/fail verification of the intended outcome
    - use `browser_diff` when an action's effect is ambiguous
    - use console/network/dialog diagnostics when validating async, stateful, or failure-prone UI
    - record verification in terms of explicit checks passed/failed, not only prose interpretation
-10. If the task plan includes an Observability Impact section, verify those signals directly. Skip this step if the task plan omits the section.
-11. **If execution is running long or verification fails:**
+13. If the task plan includes an Observability Impact section, verify those signals directly. Skip this step if the task plan omits the section.
+14. **If execution is running long or verification fails:**
     **Context budget:** You have approximately **{{verificationBudget}}** reserved for verification context. If you've used most of your context and haven't finished all steps, stop implementing and prioritize writing the task summary with clear notes on what's done and what remains. A partial summary that enables clean resumption is more valuable than one more half-finished step with no documentation. Never sacrifice summary quality for one more implementation step.
@@ -60,13 +63,13 @@ Then:
     - Distinguish "I know" from "I assume." Observable facts (the error says X) are strong evidence. Assumptions (this library should work this way) need verification.
     - Know when to stop. If you've tried 3+ fixes without progress, your mental model is probably wrong. Stop. List what you know for certain. List what you've ruled out. Form fresh hypotheses from there.
     - Don't fix symptoms. Understand *why* something fails before changing code. A test that passes after a change you don't understand is luck, not a fix.
-11. **Blocker discovery:** If execution reveals that the remaining slice plan is fundamentally invalid — not just a bug or minor deviation, but a plan-invalidating finding like a wrong API, missing capability, or architectural mismatch — set `blocker_discovered: true` in the task summary frontmatter and describe the blocker clearly in the summary narrative. Do NOT set `blocker_discovered: true` for ordinary debugging, minor deviations, or issues that can be fixed within the current task or the remaining plan. This flag triggers an automatic replan of the slice.
-12. If you made an architectural, pattern, library, or observability decision during this task that downstream work should know about, append it to `.gsd/DECISIONS.md` (read the template at `~/.gsd/agent/extensions/gsd/templates/decisions.md` if the file doesn't exist yet). Not every task produces decisions — only append when a meaningful choice was made.
-13. If you discover a non-obvious rule, recurring gotcha, or useful pattern during execution, append it to `.gsd/KNOWLEDGE.md`. Only add entries that would save future agents from repeating your investigation. Don't add obvious things.
-14. Read the template at `~/.gsd/agent/extensions/gsd/templates/task-summary.md`
-15. Write `{{taskSummaryPath}}`
-16. Call `gsd_complete_task` with milestone_id, slice_id, task_id, and a summary of what was accomplished. This is your final required step — do NOT manually edit PLAN.md checkboxes. The tool marks the task complete, updates the DB, and renders PLAN.md automatically.
-17. Do not run git commands — the system reads your task summary after completion and creates a meaningful commit from it (type inferred from title, message from your one-liner, key files from frontmatter). Write a clear, specific one-liner in the summary — it becomes the commit message.
+15. **Blocker discovery:** If execution reveals that the remaining slice plan is fundamentally invalid — not just a bug or minor deviation, but a plan-invalidating finding like a wrong API, missing capability, or architectural mismatch — set `blocker_discovered: true` in the task summary frontmatter and describe the blocker clearly in the summary narrative. Do NOT set `blocker_discovered: true` for ordinary debugging, minor deviations, or issues that can be fixed within the current task or the remaining plan. This flag triggers an automatic replan of the slice.
+16. If you made an architectural, pattern, library, or observability decision during this task that downstream work should know about, append it to `.gsd/DECISIONS.md` (read the template at `~/.gsd/agent/extensions/gsd/templates/decisions.md` if the file doesn't exist yet). Not every task produces decisions — only append when a meaningful choice was made.
+17. If you discover a non-obvious rule, recurring gotcha, or useful pattern during execution, append it to `.gsd/KNOWLEDGE.md`. Only add entries that would save future agents from repeating your investigation. Don't add obvious things.
+18. Read the template at `~/.gsd/agent/extensions/gsd/templates/task-summary.md`
+19. Write `{{taskSummaryPath}}`
+20. Call `gsd_complete_task` with milestone_id, slice_id, task_id, and a summary of what was accomplished. This is your final required step — do NOT manually edit PLAN.md checkboxes. The tool marks the task complete, updates the DB, and renders PLAN.md automatically.
+21. Do not run git commands — the system reads your task summary after completion and creates a meaningful commit from it (type inferred from title, message from your one-liner, key files from frontmatter). Write a clear, specific one-liner in the summary — it becomes the commit message.
 All work stays in your working directory: `{{workingDirectory}}`.