npm - work-kit-cli - Versions diffs - 0.4.1 → 0.5.0 - Mend

work-kit-cli 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/README.md +8 -1
package/cli/src/commands/bootstrap.test.ts +1 -1
package/cli/src/commands/bootstrap.ts +22 -14
package/cli/src/commands/complete.ts +76 -2
package/cli/src/commands/doctor.ts +51 -2
package/cli/src/commands/extract.ts +47 -25
package/cli/src/commands/init.test.ts +3 -1
package/cli/src/commands/init.ts +22 -15
package/cli/src/commands/learn.test.ts +2 -2
package/cli/src/commands/learn.ts +2 -1
package/cli/src/config/agent-map.ts +10 -2
package/cli/src/config/constants.ts +7 -0
package/cli/src/config/loopback-routes.ts +6 -0
package/cli/src/config/model-routing.ts +7 -1
package/cli/src/config/workflow.ts +12 -6
package/cli/src/index.ts +2 -2
package/cli/src/state/helpers.test.ts +1 -1
package/cli/src/state/schema.ts +11 -4
package/cli/src/state/validators.test.ts +21 -2
package/cli/src/state/validators.ts +2 -2
package/cli/src/utils/knowledge.ts +7 -1
package/cli/src/workflow/gates.ts +1 -0
package/cli/src/workflow/parallel.ts +6 -1
package/cli/src/workflow/transitions.test.ts +2 -2
package/package.json +2 -2
package/skills/auto-kit/SKILL.md +8 -1
package/skills/full-kit/SKILL.md +14 -7
package/skills/wk-bootstrap/SKILL.md +8 -0
package/skills/wk-debug/SKILL.md +127 -0
package/skills/wk-define/SKILL.md +87 -0
package/skills/wk-define/steps/refine.md +71 -0
package/skills/wk-define/steps/spec.md +70 -0
package/skills/wk-plan/steps/architecture.md +16 -0
package/skills/wk-test/steps/browser.md +92 -0
package/skills/wk-test/steps/e2e.md +45 -23
package/skills/wk-wrap-up/steps/knowledge.md +8 -3

package/README.md CHANGED Viewed

@@ -1,6 +1,13 @@
 # work-kit
-Structured development workflow for [Claude Code](https://claude.com/claude-code). Two modes, 6 phases, 27 steps — orchestrated by a TypeScript CLI with reusable skill files.
+Structured development workflow for [Claude Code](https://claude.com/claude-code). Two modes, 7 phases, 31 steps, plus auto-debug recovery — orchestrated by a TypeScript CLI with reusable skill files.
+## What's new in v0.5
+- **Define phase** runs before Plan to refine vague asks into a concrete spec (auto-skipped for bug fixes/refactors).
+- **wk-debug** triage skill auto-fires when any step reports `needs_debug`, then the originating step retries (max 2 iterations). Not user-invocable — fires from inside the pipeline.
+- **`test/browser`** drives the running app via Chrome DevTools MCP and verifies user-facing acceptance criteria in a real browser. Skips gracefully if the MCP isn't installed.
+- **`decision` knowledge type** auto-graduates `## Decisions` bullets into `.work-kit-knowledge/decisions.md` so future sessions don't re-litigate settled choices.
 ## Installation

package/cli/src/commands/bootstrap.test.ts CHANGED Viewed

@@ -48,7 +48,7 @@ describe("bootstrapCommand", () => {
     assert.equal(result.slug, "test-feature");
     assert.equal(result.mode, "full-kit");
     assert.equal(result.status, "in-progress");
-    assert.equal(result.phase, "plan");
+    assert.equal(result.phase, "define");
     assert.equal(result.recovery, null);
   });

package/cli/src/commands/bootstrap.ts CHANGED Viewed

@@ -2,12 +2,13 @@ import fs from "node:fs";
 import { findWorktreeRoot, readState, writeState, statePath } from "../state/store.js";
 import { unpause } from "../state/helpers.js";
 import { CLI_BINARY, STALE_THRESHOLD_MS } from "../config/constants.js";
-import { fileForType, readKnowledgeFile } from "../utils/knowledge.js";
+import { fileForType, readKnowledgeFile, KNOWLEDGE_TYPES, type KnowledgeType } from "../utils/knowledge.js";
 export interface BootstrapKnowledge {
   lessons?: string;
   conventions?: string;
   risks?: string;
+  decisions?: string;
 }
 export interface BootstrapResult {
@@ -24,8 +25,8 @@ export interface BootstrapResult {
   nextAction?: string;
   recovery?: string | null;
   /**
-   * Project-level knowledge files (lessons/conventions/risks) read from
-   * <mainRepoRoot>/.work-kit-knowledge/. Capped at 200 lines per file.
+   * Project-level knowledge files (lessons/conventions/risks/decisions) read
+   * from <mainRepoRoot>/.work-kit-knowledge/. Capped at 200 lines per file.
    * workflow.md is intentionally excluded — it's a write-only artifact for
    * human curators, not session context.
    */
@@ -89,26 +90,33 @@ export function bootstrapCommand(startDir?: string, options: BootstrapOptions =
     nextAction = `Continue ${state.currentPhase ?? "next phase"}${state.currentStep ? "/" + state.currentStep : ""}. Run \`${CLI_BINARY} next\` to get the agent prompt.`;
   }
-  // Load project-level knowledge files (best effort, never breaks bootstrap).
   // workflow.md is intentionally excluded — it's a write-only artifact for
   // human curators, not session context.
+  const INJECTED_TYPES: KnowledgeType[] = KNOWLEDGE_TYPES.filter(
+    (t) => t !== "workflow"
+  ) as KnowledgeType[];
+  // Map each knowledge type to its plural field name on BootstrapKnowledge.
+  const TYPE_TO_FIELD: Record<Exclude<KnowledgeType, "workflow">, keyof BootstrapKnowledge> = {
+    lesson: "lessons",
+    convention: "conventions",
+    risk: "risks",
+    decision: "decisions",
+  };
   let knowledge: BootstrapKnowledge | undefined;
   try {
     const mainRepoRoot = state.metadata?.mainRepoRoot;
     if (mainRepoRoot) {
-      const lessons = readKnowledgeFile(mainRepoRoot, fileForType("lesson"));
-      const conventions = readKnowledgeFile(mainRepoRoot, fileForType("convention"));
-      const risks = readKnowledgeFile(mainRepoRoot, fileForType("risk"));
-      if (lessons || conventions || risks) {
-        knowledge = {
-          ...(lessons && { lessons }),
-          ...(conventions && { conventions }),
-          ...(risks && { risks }),
-        };
+      const collected: BootstrapKnowledge = {};
+      for (const type of INJECTED_TYPES) {
+        const content = readKnowledgeFile(mainRepoRoot, fileForType(type));
+        if (content) {
+          collected[TYPE_TO_FIELD[type as Exclude<KnowledgeType, "workflow">]] = content;
+        }
       }
+      if (Object.keys(collected).length > 0) knowledge = collected;
     }
   } catch (err: any) {
-    // Non-fatal: log to stderr but don't break bootstrap
     process.stderr.write(`work-kit: failed to load knowledge files: ${err.message}\n`);
   }

package/cli/src/commands/complete.ts CHANGED Viewed

@@ -5,9 +5,12 @@ import { isPhaseComplete, nextStepInPhase } from "../workflow/transitions.js";
 import { checkLoopback, countLoopbacksForRoute } from "../workflow/loopbacks.js";
 import { PHASE_ORDER } from "../config/workflow.js";
 import { parseLocation, resetToLocation } from "../state/helpers.js";
-import { TRACKER_DIR, ARCHIVE_DIR, INDEX_FILE, SUMMARY_FILE, MAX_LOOPBACKS_PER_ROUTE, CLI_BINARY } from "../config/constants.js";
-import { isStepOutcome, STEP_OUTCOMES, type Action, type PhaseName, type StepOutcome, type WorkKitState } from "../state/schema.js";
+import { TRACKER_DIR, ARCHIVE_DIR, INDEX_FILE, SUMMARY_FILE, MAX_LOOPBACKS_PER_ROUTE, MAX_DEBUG_ITERATIONS, SKILL_DIR_PREFIX, CLI_BINARY } from "../config/constants.js";
+import { isStepOutcome, STEP_OUTCOMES, type Action, type Location, type PhaseName, type StepOutcome, type StepState, type WorkKitState } from "../state/schema.js";
 import { stateMdPath } from "../state/store.js";
+import { resolveModel } from "../config/model-routing.js";
+const DEBUG_SKILL_FILE = `.claude/skills/${SKILL_DIR_PREFIX}debug/SKILL.md`;
 export function completeCommand(target: string, outcome?: string, worktreeRoot?: string): Action {
   const root = worktreeRoot || findWorktreeRoot();
@@ -50,6 +53,10 @@ export function completeCommand(target: string, outcome?: string, worktreeRoot?:
     return { action: "error", message: `${phase}/${step} is skipped and cannot be completed. Add it to the workflow first.` };
   }
+  if (typedOutcome === "needs_debug") {
+    return handleNeedsDebug(root, state, stepState, { phase, step });
+  }
   stepState.status = "completed";
   stepState.completedAt = new Date().toISOString();
   if (typedOutcome) {
@@ -143,6 +150,73 @@ export function completeCommand(target: string, outcome?: string, worktreeRoot?:
 // ── Archive on completion ──────────────────────────────────────────
+/**
+ * Divert a step that reported `needs_debug` into the wk-debug skill. The
+ * originating step stays in-progress so the next `next()` call retries it
+ * after the debug agent finishes. Bails to `wait_for_user` once the per-step
+ * iteration cap is reached.
+ */
+function handleNeedsDebug(
+  root: string,
+  state: WorkKitState,
+  stepState: StepState,
+  origin: Location
+): Action {
+  const debugCount = state.loopbacks.filter(
+    (lb) => lb.kind === "debug" && lb.from.phase === origin.phase && lb.from.step === origin.step
+  ).length;
+  if (debugCount >= MAX_DEBUG_ITERATIONS) {
+    writeState(root, state);
+    return {
+      action: "wait_for_user",
+      message: `${origin.phase}/${origin.step} reported needs_debug but max debug iterations (${MAX_DEBUG_ITERATIONS}) reached. Surface to user — manual intervention required.`,
+    };
+  }
+  const iteration = debugCount + 1;
+  state.loopbacks.push({
+    from: origin,
+    to: origin,
+    reason: `Step reported needs_debug — invoking wk-debug (iteration ${iteration})`,
+    timestamp: new Date().toISOString(),
+    kind: "debug",
+  });
+  stepState.status = "in-progress";
+  delete stepState.outcome;
+  delete stepState.completedAt;
+  writeState(root, state);
+  const agentPrompt = [
+    `# Debug Triage`,
+    ``,
+    `**Origin:** ${origin.phase}/${origin.step}`,
+    `**Iteration:** ${iteration} of ${MAX_DEBUG_ITERATIONS}`,
+    `**Worktree:** ${root}`,
+    ``,
+    `## Instructions`,
+    `Read and follow the skill file: \`${DEBUG_SKILL_FILE}\``,
+    ``,
+    `The originating step (${origin.phase}/${origin.step}) hit something it cannot resolve.`,
+    `Read \`.work-kit/state.md\` and the originating agent's working notes for that step.`,
+    ``,
+    `Run the 5-step triage methodology. Write your full report to \`.work-kit/debug-<ISO-timestamp>.md\`.`,
+    `Do NOT call \`work-kit complete\` for the originating step — when you finish, the orchestrator will re-run \`work-kit next\` and the originating step will retry automatically.`,
+  ].join("\n");
+  const debugModel = resolveModel(state, origin.phase, origin.step);
+  return {
+    action: "spawn_debug_agent",
+    origin,
+    iteration,
+    skillFile: DEBUG_SKILL_FILE,
+    agentPrompt,
+    onComplete: `${CLI_BINARY} next`,
+    ...(debugModel && { model: debugModel }),
+  };
+}
 function archiveFolderName(slug: string, completedAt: string): string {
   return `${slug}-${completedAt.split("T")[0]}`;
 }

package/cli/src/commands/doctor.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import * as fs from "node:fs";
 import * as path from "node:path";
+import * as os from "node:os";
 import { execFileSync } from "node:child_process";
 import { findWorktreeRoot, readState, stateExists } from "../state/store.js";
@@ -35,7 +36,7 @@ export function doctorCommand(worktreeRoot?: string): { ok: boolean; checks: Che
   }
   // 3. Phase skill files
-  const phases = ["wk-plan", "wk-build", "wk-test", "wk-review", "wk-deploy", "wk-wrap-up"];
+  const phases = ["wk-define", "wk-plan", "wk-build", "wk-test", "wk-review", "wk-deploy", "wk-wrap-up", "wk-debug"];
   let phasesMissing = 0;
   for (const phase of phases) {
     const phasePath = path.join(skillsDir, phase, "SKILL.md");
@@ -49,6 +50,26 @@ export function doctorCommand(worktreeRoot?: string): { ok: boolean; checks: Che
     checks.push({ name: "skill:phases", status: "fail", message: `${phasesMissing} phase skill(s) missing from ${skillsDir}` });
   }
+  // 3b. Chrome DevTools MCP availability (used by test/browser).
+  // Warn-only: if missing, the browser step skips itself but the rest of the
+  // pipeline runs unaffected.
+  const cdpMcpAvailable = detectChromeDevtoolsMcp();
+  if (cdpMcpAvailable === "yes") {
+    checks.push({ name: "mcp:chrome-devtools", status: "pass", message: "Chrome DevTools MCP detected" });
+  } else if (cdpMcpAvailable === "unknown") {
+    checks.push({
+      name: "mcp:chrome-devtools",
+      status: "warn",
+      message: "Chrome DevTools MCP could not be detected. The test/browser step will be skipped if invoked.",
+    });
+  } else {
+    checks.push({
+      name: "mcp:chrome-devtools",
+      status: "warn",
+      message: "Chrome DevTools MCP not configured. test/browser will skip — install the MCP server to enable live browser verification.",
+    });
+  }
   // 4. Git available
   try {
     const gitVersion = execFileSync("git", ["--version"], { encoding: "utf-8" }).trim();
@@ -62,7 +83,7 @@ export function doctorCommand(worktreeRoot?: string): { ok: boolean; checks: Che
   if (root && stateExists(root)) {
     try {
       const state = readState(root);
-      if (state.version === 2 && state.slug && state.status) {
+      if (state.version === 3 && state.slug && state.status) {
         checks.push({ name: "state", status: "pass", message: `Active work-kit: "${state.slug}" (${state.status})` });
       } else {
         checks.push({ name: "state", status: "warn", message: "tracker.json exists but has unexpected structure" });
@@ -77,3 +98,31 @@ export function doctorCommand(worktreeRoot?: string): { ok: boolean; checks: Che
   const ok = checks.every((c) => c.status !== "fail");
   return { ok, checks };
 }
+/**
+ * Best-effort detection of the Chrome DevTools MCP server. We can't call MCP
+ * tools from the CLI, so we scan the most common config trails for any
+ * chrome-devtools-flavored server entry. Returns "yes" on a hit, "no" if a
+ * config exists but doesn't mention it, and "unknown" if no config exists.
+ */
+function detectChromeDevtoolsMcp(): "yes" | "no" | "unknown" {
+  const claudeDir = path.join(os.homedir(), ".claude");
+  const candidates = [
+    path.join(claudeDir, "settings.json"),
+    path.join(claudeDir, "mcp.json"),
+    path.join(process.cwd(), ".mcp.json"),
+  ];
+  let sawAny = false;
+  for (const file of candidates) {
+    let raw: string;
+    try {
+      raw = fs.readFileSync(file, "utf-8");
+    } catch {
+      continue; // missing or unreadable — skip
+    }
+    sawAny = true;
+    if (/chrome[-_]?devtools/i.test(raw)) return "yes";
+  }
+  return sawAny ? "no" : "unknown";
+}

package/cli/src/commands/extract.ts CHANGED Viewed

@@ -43,47 +43,69 @@ function emptyByType(): Record<KnowledgeType, number> {
 const OBSERVATION_RE = /^-\s*\[([a-z]+)(?::([a-z0-9-]+\/[a-z0-9-]+))?\]\s*(.+)$/i;
 /**
- * Walk state.md once and emit raw entries from the three sections we know:
- * Observations (typed bullets), Decisions (any bullet → convention),
- * Deviations (any bullet → workflow with [deviation] prefix).
+ * A bullet under `## Decisions` is harvested when it follows the documented
+ * shape `**<context>**: chose <X> over <Y> — <why>`. Free-form lines are
+ * skipped (not errors). The leading `**context**:` becomes the entry's title.
+ */
+const DECISION_RE = /^-\s*\*\*([^*]+)\*\*\s*:\s*(.+)$/;
+/**
+ * Walk state.md once and emit raw entries from:
+ *   - `## Observations` — typed bullets (`- [lesson|convention|risk|workflow|decision] text`)
+ *   - `## Decisions`    — bullets matching `**<context>**: chose X over Y — <why>`
+ *
+ * `## Deviations` stays scratch — agents routinely dump test plans there.
  */
 function parseStateMd(stateMd: string): RawEntry[] {
   const out: RawEntry[] = [];
   if (!stateMd) return out;
-  // Only `## Observations` is auto-harvested. `## Decisions` and `## Deviations`
-  // are agent scratch space during normal phase work — they routinely contain
-  // test plans, acceptance-criteria checklists, and self-review dumps. Auto-
-  // routing them floods workflow.md with noise. Agents opt into harvesting by
-  // writing typed bullets (`- [lesson|convention|risk|workflow] text`) under
-  // `## Observations`.
-  let inObservations = false;
+  type Section = "observations" | "decisions" | "other";
+  let section: Section = "other";
   for (const rawLine of stateMd.split("\n")) {
     const trimmed = rawLine.trim();
     if (trimmed.startsWith("## ")) {
-      inObservations = trimmed.slice(3).trim().toLowerCase() === "observations";
+      const heading = trimmed.slice(3).trim().toLowerCase();
+      if (heading === "observations") section = "observations";
+      else if (heading === "decisions") section = "decisions";
+      else section = "other";
       continue;
     }
-    if (!inObservations) continue;
+    if (section === "other") continue;
     if (!trimmed.startsWith("-") || trimmed.startsWith("<!--")) continue;
-    const m = trimmed.match(OBSERVATION_RE);
-    if (!m) continue;
-    const tag = m[1].toLowerCase();
-    if (!isKnowledgeType(tag)) continue;
-    const phaseStep = m[2];
-    const text = m[3].trim();
-    if (text.length === 0) continue;
-    const entry: RawEntry = { type: tag, text, source: "auto-state-md" };
-    if (phaseStep) {
-      const [p, s] = phaseStep.split("/");
-      entry.phase = p;
-      entry.step = s;
+    if (section === "observations") {
+      const m = trimmed.match(OBSERVATION_RE);
+      if (!m) continue;
+      const tag = m[1].toLowerCase();
+      if (!isKnowledgeType(tag)) continue;
+      const phaseStep = m[2];
+      const text = m[3].trim();
+      if (text.length === 0) continue;
+      const entry: RawEntry = { type: tag, text, source: "auto-state-md" };
+      if (phaseStep) {
+        const [p, s] = phaseStep.split("/");
+        entry.phase = p;
+        entry.step = s;
+      }
+      out.push(entry);
+      continue;
     }
-    out.push(entry);
+    // section === "decisions"
+    const m = trimmed.match(DECISION_RE);
+    if (!m) continue;
+    const context = m[1].trim();
+    const rationale = m[2].trim();
+    if (context.length === 0 || rationale.length === 0) continue;
+    out.push({
+      type: "decision",
+      text: `**${context}**: ${rationale}`,
+      source: "auto-state-md",
+    });
   }
   return out;

package/cli/src/commands/init.test.ts CHANGED Viewed

@@ -40,7 +40,9 @@ describe("initCommand", () => {
     );
     assert.equal(state.slug, "add-user-login");
     assert.equal(state.status, "in-progress");
-    assert.equal(state.currentPhase, "plan");
+    assert.equal(state.currentPhase, "define");
+    assert.equal(state.currentStep, "refine");
+    assert.equal(state.version, 3);
   });
   it("returns spawn_agent action", () => {

package/cli/src/commands/init.ts CHANGED Viewed

@@ -44,7 +44,16 @@ function buildPhases(workflow?: WorkflowStep[]): Record<PhaseName, PhaseState> {
   return phases;
 }
-function generateStateMd(slug: string, branch: string, mode: string, description: string, classification?: string, workflow?: WorkflowStep[]): string {
+function generateStateMd(
+  slug: string,
+  branch: string,
+  mode: string,
+  description: string,
+  firstPhase: string,
+  firstStep: string,
+  classification?: string,
+  workflow?: WorkflowStep[]
+): string {
   const title = slug.replace(/-/g, " ").replace(/\b\w/g, (c) => c.toUpperCase());
   const date = new Date().toISOString().split("T")[0];
@@ -60,8 +69,8 @@ function generateStateMd(slug: string, branch: string, mode: string, description
     md += `**Classification:** ${classification}\n`;
   }
-  md += `**Phase:** plan
-**Step:** clarify
+  md += `**Phase:** ${firstPhase}
+**Step:** ${firstStep}
 **Status:** in-progress
 ## Description
@@ -182,21 +191,19 @@ export function initCommand(options: {
     workflow = buildFullWorkflow();
   }
-  // Find first active step
-  let firstPhase: PhaseName = "plan";
-  let firstStep = "clarify";
-  if (workflow) {
-    const first = workflow.find((s) => s.included);
-    if (first) {
-      firstPhase = first.phase;
-      firstStep = first.step;
-    }
+  // First active step is always the first `included` entry in the workflow.
+  // For full-kit that's define/refine; for auto-kit it depends on classification.
+  let firstPhase: PhaseName = "define";
+  let firstStep = "refine";
+  const first = workflow?.find((s) => s.included);
+  if (first) {
+    firstPhase = first.phase;
+    firstStep = first.step;
   }
   // Build state
   const state: WorkKitState = {
-    version: 2,
+    version: 3,
     slug,
     branch,
     started: new Date().toISOString(),
@@ -221,7 +228,7 @@ export function initCommand(options: {
   // Write state files
   writeState(worktreeRoot, state);
-  writeStateMd(worktreeRoot, generateStateMd(slug, branch, modeLabel, description, classification, workflow));
+  writeStateMd(worktreeRoot, generateStateMd(slug, branch, modeLabel, description, firstPhase, firstStep, classification, workflow));
   const model = resolveModel(state, firstPhase, firstStep);

package/cli/src/commands/learn.test.ts CHANGED Viewed

@@ -146,8 +146,8 @@ describe("learnCommand", () => {
       path.join(tmp, KNOWLEDGE_DIR, "lessons.md"),
       "utf-8"
     );
-    // Init starts at plan/clarify
-    assert.ok(content.includes("plan/clarify"));
+    // Full-kit init now starts at define/refine (Define is the new first phase)
+    assert.ok(content.includes("define/refine"));
   });
   it("extracts typed bullets from state.md ## Observations", () => {

package/cli/src/commands/learn.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import {
   ensureKnowledgeDir,
   fileForType,
   isKnowledgeType,
+  KNOWLEDGE_TYPES,
   redact,
   type KnowledgeEntry,
   type KnowledgeType,
@@ -36,7 +37,7 @@ export function learnCommand(opts: LearnOptions): LearnResult {
   if (!opts.type || !isKnowledgeType(opts.type)) {
     return {
       action: "error",
-      message: `Invalid --type "${opts.type}". Must be one of: lesson, convention, risk, workflow.`,
+      message: `Invalid --type "${opts.type}". Must be one of: ${KNOWLEDGE_TYPES.join(", ")}.`,
     };
   }
   if (!opts.text || opts.text.trim().length === 0) {

package/cli/src/config/agent-map.ts CHANGED Viewed

@@ -12,8 +12,11 @@ export interface AgentContext {
 // Phase-level context (what the phase runner agent reads)
 export const PHASE_CONTEXT: Record<PhaseName, AgentContext> = {
+  define: {
+    sections: ["## Description"],
+  },
   plan: {
-    sections: ["## Description", "## Criteria"],
+    sections: ["## Description", "### Define: Final", "## Criteria"],
   },
   build: {
     sections: ["### Plan: Final", "## Criteria", "## Description"],
@@ -34,10 +37,15 @@ export const PHASE_CONTEXT: Record<PhaseName, AgentContext> = {
 // Step-level context (for parallel sub-agents that need specific sections)
 export const STEP_CONTEXT: Record<string, AgentContext> = {
+  // Define steps
+  "define/refine": { sections: ["## Description"] },
+  "define/spec": { sections: ["## Description", "### Define: Refine"] },
   // Test steps
   "test/verify": { sections: ["### Build: Final", "## Criteria"] },
+  "test/browser": { sections: ["### Build: Final", "## Criteria", "### Plan: UX Flow"] },
   "test/e2e": { sections: ["### Build: Final", "### Plan: Final"] },
-  "test/validate": { sections: ["### Test: Verify", "### Test: E2E", "## Criteria"] },
+  "test/validate": { sections: ["### Test: Verify", "### Test: Browser", "### Test: E2E", "## Criteria"] },
   // Review steps
   "review/self-review": { sections: ["### Build: Final"], needsGitDiff: true },

package/cli/src/config/constants.ts CHANGED Viewed

@@ -48,6 +48,13 @@ export const KNOWLEDGE_LOCK = ".lock";
 export const MAX_LOOPBACKS_PER_ROUTE = 2;
+/**
+ * Max times wk-debug can be invoked for the same originating step before the
+ * orchestrator surfaces the failure to the user. Tracked separately from
+ * standard loopbacks via `LoopbackRecord.kind === "debug"`.
+ */
+export const MAX_DEBUG_ITERATIONS = 2;
 // ── Staleness ───────────────────────────────────────────────────────
 /** Threshold (ms) after which an in-progress state is considered stale. */

package/cli/src/config/loopback-routes.ts CHANGED Viewed

@@ -12,6 +12,12 @@ export interface LoopbackRoute {
 }
 export const LOOPBACK_ROUTES: LoopbackRoute[] = [
+  {
+    from: { phase: "define", step: "spec" },
+    triggerOutcome: "revise",
+    to: { phase: "define", step: "refine" },
+    reason: "Spec found ambiguity — looping back to Refine",
+  },
   {
     from: { phase: "plan", step: "audit" },
     triggerOutcome: "revise",

package/cli/src/config/model-routing.ts CHANGED Viewed

@@ -32,6 +32,7 @@ const HARD_DEFAULT: ModelTier = "sonnet";
 // ── Phase defaults ──────────────────────────────────────────────────
 export const BY_PHASE: Record<PhaseName, ModelTier> = {
+  define: "opus",
   plan: "sonnet",
   build: "sonnet",
   test: "sonnet",
@@ -43,6 +44,10 @@ export const BY_PHASE: Record<PhaseName, ModelTier> = {
 // ── Step-level overrides (phase/step keys) ──────────────────────────
 export const BY_STEP: Record<string, ModelTier> = {
+  // Define — refining a vague ask is reasoning-heavy
+  "define/refine": "opus",
+  "define/spec": "sonnet",
   // Plan — research/design-heavy steps benefit from opus
   "plan/clarify": "sonnet",
   "plan/investigate": "opus",
@@ -63,8 +68,9 @@ export const BY_STEP: Record<string, ModelTier> = {
   "build/integration": "sonnet",
   "build/commit": "haiku",
-  // Test — verify is mechanical, e2e/validate need judgment
+  // Test — verify is mechanical, browser/e2e/validate need judgment
   "test/verify": "haiku",
+  "test/browser": "sonnet",
   "test/e2e": "sonnet",
   "test/validate": "sonnet",