npm - work-kit-cli - Versions diffs - 0.4.1 → 0.5.0 - Mend

work-kit-cli 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/README.md +8 -1
package/cli/src/commands/bootstrap.test.ts +1 -1
package/cli/src/commands/bootstrap.ts +22 -14
package/cli/src/commands/complete.ts +76 -2
package/cli/src/commands/doctor.ts +51 -2
package/cli/src/commands/extract.ts +47 -25
package/cli/src/commands/init.test.ts +3 -1
package/cli/src/commands/init.ts +22 -15
package/cli/src/commands/learn.test.ts +2 -2
package/cli/src/commands/learn.ts +2 -1
package/cli/src/config/agent-map.ts +10 -2
package/cli/src/config/constants.ts +7 -0
package/cli/src/config/loopback-routes.ts +6 -0
package/cli/src/config/model-routing.ts +7 -1
package/cli/src/config/workflow.ts +12 -6
package/cli/src/index.ts +2 -2
package/cli/src/state/helpers.test.ts +1 -1
package/cli/src/state/schema.ts +11 -4
package/cli/src/state/validators.test.ts +21 -2
package/cli/src/state/validators.ts +2 -2
package/cli/src/utils/knowledge.ts +7 -1
package/cli/src/workflow/gates.ts +1 -0
package/cli/src/workflow/parallel.ts +6 -1
package/cli/src/workflow/transitions.test.ts +2 -2
package/package.json +2 -2
package/skills/auto-kit/SKILL.md +8 -1
package/skills/full-kit/SKILL.md +14 -7
package/skills/wk-bootstrap/SKILL.md +8 -0
package/skills/wk-debug/SKILL.md +127 -0
package/skills/wk-define/SKILL.md +87 -0
package/skills/wk-define/steps/refine.md +71 -0
package/skills/wk-define/steps/spec.md +70 -0
package/skills/wk-plan/steps/architecture.md +16 -0
package/skills/wk-test/steps/browser.md +92 -0
package/skills/wk-test/steps/e2e.md +45 -23
package/skills/wk-wrap-up/steps/knowledge.md +8 -3

package/cli/src/config/workflow.ts CHANGED Viewed

@@ -14,7 +14,8 @@ export const PHASE_ORDER: PhaseName[] = [...PHASE_NAMES];
 // ── Prerequisites ───────────────────────────────────────────────────
 export const PHASE_PREREQUISITES: Record<PhaseName, PhaseName | null> = {
-  plan: null,
+  define: null,
+  plan: "define", // when define is fully skipped, validators treat the skipped phase as satisfied
   build: "plan",
   test: "build",
   review: "test",
@@ -36,55 +37,60 @@ type InclusionRule = "YES" | "skip" | "if UI" | "if DB" | "optional";
 const WORKFLOW_MATRIX: Record<Classification, Record<string, InclusionRule>> = {
   "bug-fix": {
+    "define/refine": "skip", "define/spec": "skip",
     "plan/clarify": "YES", "plan/investigate": "YES", "plan/sketch": "skip", "plan/scope": "skip",
     "plan/ux-flow": "skip", "plan/architecture": "skip", "plan/blueprint": "skip", "plan/audit": "skip",
     "build/setup": "skip", "build/migration": "skip", "build/red": "YES", "build/core": "YES",
     "build/ui": "if UI", "build/refactor": "skip", "build/integration": "skip", "build/commit": "YES",
-    "test/verify": "YES", "test/e2e": "skip", "test/validate": "YES",
+    "test/verify": "YES", "test/browser": "skip", "test/e2e": "skip", "test/validate": "YES",
     "review/self-review": "YES", "review/security": "skip", "review/performance": "skip",
     "review/compliance": "skip", "review/handoff": "YES",
     "deploy/merge": "YES", "deploy/monitor": "optional", "deploy/remediate": "optional",
     "wrap-up/summary": "YES", "wrap-up/knowledge": "skip",
   },
   "small-change": {
+    "define/refine": "skip", "define/spec": "skip",
     "plan/clarify": "YES", "plan/investigate": "skip", "plan/sketch": "skip", "plan/scope": "skip",
     "plan/ux-flow": "skip", "plan/architecture": "skip", "plan/blueprint": "skip", "plan/audit": "skip",
     "build/setup": "skip", "build/migration": "skip", "build/red": "skip", "build/core": "YES",
     "build/ui": "if UI", "build/refactor": "skip", "build/integration": "skip", "build/commit": "YES",
-    "test/verify": "YES", "test/e2e": "skip", "test/validate": "skip",
+    "test/verify": "YES", "test/browser": "skip", "test/e2e": "skip", "test/validate": "skip",
     "review/self-review": "YES", "review/security": "skip", "review/performance": "skip",
     "review/compliance": "skip", "review/handoff": "YES",
     "deploy/merge": "YES", "deploy/monitor": "optional", "deploy/remediate": "optional",
     "wrap-up/summary": "YES", "wrap-up/knowledge": "skip",
   },
   refactor: {
+    "define/refine": "skip", "define/spec": "skip",
     "plan/clarify": "YES", "plan/investigate": "YES", "plan/sketch": "skip", "plan/scope": "skip",
     "plan/ux-flow": "skip", "plan/architecture": "skip", "plan/blueprint": "skip", "plan/audit": "skip",
     "build/setup": "skip", "build/migration": "skip", "build/red": "skip", "build/core": "YES",
     "build/ui": "if UI", "build/refactor": "YES", "build/integration": "skip", "build/commit": "YES",
-    "test/verify": "YES", "test/e2e": "skip", "test/validate": "skip",
+    "test/verify": "YES", "test/browser": "skip", "test/e2e": "skip", "test/validate": "skip",
     "review/self-review": "YES", "review/security": "skip", "review/performance": "YES",
     "review/compliance": "skip", "review/handoff": "YES",
     "deploy/merge": "YES", "deploy/monitor": "optional", "deploy/remediate": "optional",
     "wrap-up/summary": "YES", "wrap-up/knowledge": "YES",
   },
   feature: {
+    "define/refine": "YES", "define/spec": "YES",
     "plan/clarify": "YES", "plan/investigate": "YES", "plan/sketch": "YES", "plan/scope": "YES",
     "plan/ux-flow": "if UI", "plan/architecture": "YES", "plan/blueprint": "YES", "plan/audit": "skip",
     "build/setup": "YES", "build/migration": "if DB", "build/red": "YES", "build/core": "YES",
     "build/ui": "if UI", "build/refactor": "skip", "build/integration": "YES", "build/commit": "YES",
-    "test/verify": "YES", "test/e2e": "if UI", "test/validate": "YES",
+    "test/verify": "YES", "test/browser": "if UI", "test/e2e": "if UI", "test/validate": "YES",
     "review/self-review": "YES", "review/security": "YES", "review/performance": "skip",
     "review/compliance": "YES", "review/handoff": "YES",
     "deploy/merge": "YES", "deploy/monitor": "optional", "deploy/remediate": "optional",
     "wrap-up/summary": "YES", "wrap-up/knowledge": "YES",
   },
   "large-feature": {
+    "define/refine": "YES", "define/spec": "YES",
     "plan/clarify": "YES", "plan/investigate": "YES", "plan/sketch": "YES", "plan/scope": "YES",
     "plan/ux-flow": "if UI", "plan/architecture": "YES", "plan/blueprint": "YES", "plan/audit": "YES",
     "build/setup": "YES", "build/migration": "if DB", "build/red": "YES", "build/core": "YES",
     "build/ui": "if UI", "build/refactor": "YES", "build/integration": "YES", "build/commit": "YES",
-    "test/verify": "YES", "test/e2e": "YES", "test/validate": "YES",
+    "test/verify": "YES", "test/browser": "if UI", "test/e2e": "YES", "test/validate": "YES",
     "review/self-review": "YES", "review/security": "YES", "review/performance": "YES",
     "review/compliance": "YES", "review/handoff": "YES",
     "deploy/merge": "YES", "deploy/monitor": "optional", "deploy/remediate": "optional",

package/cli/src/index.ts CHANGED Viewed

@@ -361,8 +361,8 @@ program
 program
   .command("learn")
-  .description("Append a knowledge entry (lesson/convention/risk/workflow) to .work-kit-knowledge/")
-  .requiredOption("--type <type>", "Entry type: lesson, convention, risk, workflow")
+  .description("Append a knowledge entry (lesson/convention/risk/workflow/decision) to .work-kit-knowledge/")
+  .requiredOption("--type <type>", "Entry type: lesson, convention, risk, workflow, decision")
   .requiredOption("--text <text>", "Free-form text. Secrets are auto-redacted at write time.")
   .option("--scope <glob>", "Optional path glob (stored, not yet used for filtering)")
   .option("--phase <phase>", "Override session phase auto-fill")

package/cli/src/state/helpers.test.ts CHANGED Viewed

@@ -14,7 +14,7 @@ function makeState(): WorkKitState {
     phases[phase] = { status: "pending", steps };
   }
   return {
-    version: 2,
+    version: 3,
     slug: "test",
     branch: "feature/test",
     started: "2026-01-01",

package/cli/src/state/schema.ts CHANGED Viewed

@@ -1,15 +1,17 @@
 // ── Phase & Step Types ──────────────────────────────────────────────
-export const PHASE_NAMES = ["plan", "build", "test", "review", "deploy", "wrap-up"] as const;
+export const PHASE_NAMES = ["define", "plan", "build", "test", "review", "deploy", "wrap-up"] as const;
 export type PhaseName = (typeof PHASE_NAMES)[number];
+export const DEFINE_STEPS = ["refine", "spec"] as const;
 export const PLAN_STEPS = ["clarify", "investigate", "sketch", "scope", "ux-flow", "architecture", "blueprint", "audit"] as const;
 export const BUILD_STEPS = ["setup", "migration", "red", "core", "ui", "refactor", "integration", "commit"] as const;
-export const TEST_STEPS = ["verify", "e2e", "validate"] as const;
+export const TEST_STEPS = ["verify", "e2e", "browser", "validate"] as const;
 export const REVIEW_STEPS = ["self-review", "security", "performance", "compliance", "handoff"] as const;
 export const DEPLOY_STEPS = ["merge", "monitor", "remediate"] as const;
 export const WRAPUP_STEPS = ["summary", "knowledge"] as const;
+export type DefineStep = (typeof DEFINE_STEPS)[number];
 export type PlanStep = (typeof PLAN_STEPS)[number];
 export type BuildStep = (typeof BUILD_STEPS)[number];
 export type TestStep = (typeof TEST_STEPS)[number];
@@ -17,9 +19,10 @@ export type ReviewStep = (typeof REVIEW_STEPS)[number];
 export type DeployStep = (typeof DEPLOY_STEPS)[number];
 export type WrapUpStep = (typeof WRAPUP_STEPS)[number];
-export type StepName = PlanStep | BuildStep | TestStep | ReviewStep | DeployStep | WrapUpStep;
+export type StepName = DefineStep | PlanStep | BuildStep | TestStep | ReviewStep | DeployStep | WrapUpStep;
 export const STEPS_BY_PHASE: Record<PhaseName, readonly string[]> = {
+  define: DEFINE_STEPS,
   plan: PLAN_STEPS,
   build: BUILD_STEPS,
   test: TEST_STEPS,
@@ -87,6 +90,7 @@ export const STEP_OUTCOMES = [
   "changes_requested",   // review handoff requested changes
   "fix_needed",          // deploy merge blocked, fix required
   "fix_and_redeploy",    // remediation requires another deploy cycle
+  "needs_debug",         // step hit an error it can't resolve — invoke wk-debug, then return
   "blocked",             // step cannot proceed without external input
   "skipped",             // step intentionally skipped at runtime
 ] as const;
@@ -127,6 +131,8 @@ export interface LoopbackRecord {
   to: Location;
   reason: string;
   timestamp: string;
+  /** "debug" loopbacks are virtual: the agent spawns wk-debug then retries the same step. */
+  kind?: "standard" | "debug";
 }
 // ── Workflow (auto-kit) ─────────────────────────────────────────────
@@ -144,7 +150,7 @@ export type WorkStatus = "in-progress" | "paused" | "completed" | "failed";
 // ── Main State ──────────────────────────────────────────────────────
 export interface WorkKitState {
-  version: 2;
+  version: 3;
   slug: string;
   branch: string;
   started: string;
@@ -182,6 +188,7 @@ export interface AgentSpec {
 export type Action =
   | { action: "spawn_agent"; phase: PhaseName; step: string; skillFile: string; agentPrompt: string; onComplete: string; model?: ModelTier }
   | { action: "spawn_parallel_agents"; agents: AgentSpec[]; thenSequential?: AgentSpec; onComplete: string }
+  | { action: "spawn_debug_agent"; origin: Location; iteration: number; skillFile: string; agentPrompt: string; onComplete: string; model?: ModelTier }
   | { action: "wait_for_user"; message: string }
   | { action: "loopback"; from: Location; to: Location; reason: string }
   | { action: "complete"; message: string }

package/cli/src/state/validators.test.ts CHANGED Viewed

@@ -14,7 +14,7 @@ function makeState(): WorkKitState {
     phases[phase] = { status: "pending", steps };
   }
   return {
-    version: 2,
+    version: 3,
     slug: "test",
     branch: "feature/test",
     started: "2026-01-01",
@@ -38,14 +38,32 @@ function completePhase(state: WorkKitState, phase: PhaseName): void {
 }
 describe("validatePhasePrerequisites", () => {
-  it("plan has no prerequisites — valid", () => {
+  it("define has no prerequisites — valid", () => {
     const state = makeState();
+    const result = validatePhasePrerequisites(state, "define");
+    assert.equal(result.valid, true);
+  });
+  it("plan with define complete — valid", () => {
+    const state = makeState();
+    completePhase(state, "define");
+    const result = validatePhasePrerequisites(state, "plan");
+    assert.equal(result.valid, true);
+  });
+  it("plan with define skipped — valid (skipped satisfies prerequisite)", () => {
+    const state = makeState();
+    state.phases.define.status = "skipped";
+    for (const s of Object.values(state.phases.define.steps)) {
+      s.status = "skipped";
+    }
     const result = validatePhasePrerequisites(state, "plan");
     assert.equal(result.valid, true);
   });
   it("build with plan incomplete — invalid", () => {
     const state = makeState();
+    completePhase(state, "define");
     const result = validatePhasePrerequisites(state, "build");
     assert.equal(result.valid, false);
     assert.equal(result.missingPrerequisite, "plan");
@@ -53,6 +71,7 @@ describe("validatePhasePrerequisites", () => {
   it("build with plan complete — valid", () => {
     const state = makeState();
+    completePhase(state, "define");
     completePhase(state, "plan");
     const result = validatePhasePrerequisites(state, "build");
     assert.equal(result.valid, true);

package/cli/src/state/validators.ts CHANGED Viewed

@@ -66,9 +66,9 @@ export function validatePhasePrerequisites(state: WorkKitState, phase: PhaseName
     return { valid: true, message: "Prerequisites met for deploy" };
   }
-  // General case
+  // General case: completed OR fully skipped both satisfy the prerequisite.
   const prereqState = state.phases[prereq];
-  if (prereqState.status !== "completed") {
+  if (prereqState.status !== "completed" && prereqState.status !== "skipped") {
     return {
       valid: false,
       message: `${phase} requires ${prereq} to be complete. Current: ${prereqState.status}`,

package/cli/src/utils/knowledge.ts CHANGED Viewed

@@ -13,7 +13,7 @@ export const AUTO_BLOCK_START = "<!-- work-kit:auto:start -->";
 export const AUTO_BLOCK_END = "<!-- work-kit:auto:end -->";
 export const MANUAL_HEADER = "## Manual";
-export const KNOWLEDGE_TYPES = ["lesson", "convention", "risk", "workflow"] as const;
+export const KNOWLEDGE_TYPES = ["lesson", "convention", "risk", "workflow", "decision"] as const;
 export type KnowledgeType = (typeof KNOWLEDGE_TYPES)[number];
 export function isKnowledgeType(value: string): value is KnowledgeType {
@@ -25,6 +25,7 @@ const TYPE_TO_FILE: Record<KnowledgeType, string> = {
   convention: "conventions.md",
   risk: "risks.md",
   workflow: "workflow.md",
+  decision: "decisions.md",
 };
 const FILE_TO_TITLE: Record<string, string> = {
@@ -32,6 +33,7 @@ const FILE_TO_TITLE: Record<string, string> = {
   "conventions.md": "Conventions",
   "risks.md": "Risks",
   "workflow.md": "Workflow Feedback",
+  "decisions.md": "Decisions",
 };
 const FILE_TO_BLURB: Record<string, string> = {
@@ -43,6 +45,8 @@ const FILE_TO_BLURB: Record<string, string> = {
     "Known fragile or dangerous areas. Touch these with care.",
   "workflow.md":
     "Feedback about the work-kit workflow itself as observed in this project — skill quality, step skips, loopbacks, failure modes. Mined manually to improve work-kit upstream.",
+  "decisions.md":
+    "Architectural and design decisions made during work-kit sessions: what was chosen, what was rejected, why. Format mirrors a lightweight ADR — read these before re-litigating a settled choice.",
 };
 // ── Path Resolvers ──────────────────────────────────────────────────
@@ -180,6 +184,8 @@ benefits.
 - **lessons.md** — things you learned about this codebase (project-specific).
 - **conventions.md** — codified rules this project follows.
 - **risks.md** — fragile or dangerous areas to handle with care.
+- **decisions.md** — architectural choices made during sessions: what was
+  picked, what was rejected, why. Read before re-litigating a settled choice.
 - **workflow.md** — feedback about the work-kit workflow itself as observed
   in this project. Mined manually across projects to improve work-kit.

package/cli/src/workflow/gates.ts CHANGED Viewed

@@ -14,6 +14,7 @@ export const WAIT_AFTER_PHASE: Set<PhaseName> = new Set([
 // ── Phase Display Names ──────────────────────────────────────────────
 export const PHASE_DISPLAY_NAMES: Record<PhaseName, string> = {
+  define: "Define",
   plan: "Plan",
   build: "Build",
   test: "Test",

package/cli/src/workflow/parallel.ts CHANGED Viewed

@@ -15,7 +15,12 @@ export interface ParallelGroup {
  */
 export const DEFAULT_PARALLEL_GROUPS: Record<string, ParallelGroup> = {
   test: {
-    parallel: ["verify", "e2e"],
+    // verify (test suite), e2e (Playwright/etc), and browser (Chrome DevTools
+    // MCP) all run as independent observations. validate consolidates them.
+    // browser is auto-skipped on non-UI classifications, so the parallel set
+    // shrinks naturally. browser.md is responsible for graceful behavior if
+    // the dev server is shared with e2e.
+    parallel: ["verify", "e2e", "browser"],
     thenSequential: "validate",
   },
   review: {

package/cli/src/workflow/transitions.test.ts CHANGED Viewed

@@ -14,7 +14,7 @@ function makeState(): WorkKitState {
     phases[phase] = { status: "pending", steps };
   }
   return {
-    version: 2,
+    version: 3,
     slug: "test",
     branch: "feature/test",
     started: "2026-01-01",
@@ -91,7 +91,7 @@ describe("determineNextStep", () => {
     state.currentPhase = null;
     const result = determineNextStep(state);
     assert.equal(result.type, "phase-boundary");
-    assert.equal(result.phase, "plan");
+    assert.equal(result.phase, "define");
   });
   it("returns step for current phase with pending work", () => {

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "work-kit-cli",
-  "version": "0.4.1",
-  "description": "Structured development workflow for Claude Code. Two modes, 6 phases, 27 steps.",
+  "version": "0.5.0",
+  "description": "Structured development workflow for Claude Code. Two modes, 7 phases, 31 steps, plus debug recovery.",
   "type": "module",
   "bin": {
     "work-kit": "cli/bin/work-kit.mjs",

package/skills/auto-kit/SKILL.md CHANGED Viewed

@@ -26,13 +26,16 @@ Do not proceed until `doctor` reports all checks passed.
 These are the building blocks you pick from:
+- **Define:** Refine, Spec  *(included for `feature` and `large-feature` only)*
 - **Plan:** Clarify, Investigate, Sketch, Scope, UX Flow, Architecture, Blueprint, Audit
 - **Build:** Setup, Migration, Red, Core, UI, Refactor, Integration, Commit
-- **Test:** Verify, E2E, Validate
+- **Test:** Verify, E2E, Browser, Validate  *(Browser uses Chrome DevTools MCP, included for `if UI`)*
 - **Review:** Self-Review, Security, Performance, Compliance, Handoff
 - **Deploy:** Merge, Monitor, Remediate (optional)
 - **Wrap-up**
+**Debug recovery:** any step can report outcome `needs_debug`. The CLI auto-spawns the **wk-debug** skill (5-step triage), then the originating step retries. Max 2 debug attempts per step.
 ## Starting New Work (`/auto-kit <description>`)
 ### Step 1: Analyze
@@ -55,6 +58,8 @@ Based on the classification, select steps. Use this table as a starting point, t
 | Step              | bug-fix | small-change | refactor | feature | large-feature |
 |------------------------|---------|--------------|----------|---------|---------------|
+| **Define: Refine**     | skip    | skip         | skip     | YES     | YES           |
+| **Define: Spec**       | skip    | skip         | skip     | YES     | YES           |
 | **Plan: Clarify**      | YES     | YES          | YES      | YES     | YES           |
 | **Plan: Investigate**  | YES     | skip         | YES      | YES     | YES           |
 | **Plan: Sketch**       | skip    | skip         | skip     | YES     | YES           |
@@ -73,6 +78,7 @@ Based on the classification, select steps. Use this table as a starting point, t
 | **Build: Commit**      | YES     | YES          | YES      | YES     | YES           |
 | **Test: Verify**       | YES     | YES          | YES      | YES     | YES           |
 | **Test: E2E**          | skip    | skip         | skip     | if UI   | YES           |
+| **Test: Browser**      | skip    | skip         | skip     | if UI   | if UI         |
 | **Test: Validate**     | YES     | skip         | skip     | YES     | YES           |
 | **Review: Self-Review**| YES     | YES          | YES      | YES     | YES           |
 | **Review: Security**   | skip    | skip         | skip     | YES     | YES           |
@@ -205,6 +211,7 @@ The CLI manages all state transitions, prerequisites, and loopbacks. Follow this
 3. Follow the action type:
    - **`spawn_agent`**: Use the Agent tool with the provided `agentPrompt`. Pass `skillFile` path for reference. **If the action includes a `model` field, pass it as the Agent tool's `model` parameter; if the field is absent, do not set `model` (let Claude Code's default pick).** After the agent completes: `work-kit complete <phase>/<step> --outcome <outcome>`
    - **`spawn_parallel_agents`**: Spawn all agents in the `agents` array in parallel using the Agent tool. **For each agent, pass its `model` field as the Agent tool's `model` parameter when present; omit when absent.** Wait for all to complete. Then spawn `thenSequential` if provided (same rule for its `model` field). After all complete: `work-kit complete <onComplete target>`
+   - **`spawn_debug_agent`**: A previous step reported `needs_debug`. Spawn the **wk-debug** skill via the Agent tool with the provided `agentPrompt` and `skillFile`. Use the `model` field if present. Do **not** call `work-kit complete` for the debug agent — when it finishes, simply run `work-kit next` and the originating step will retry automatically.
    - **`wait_for_user`**: Report the message to the user and stop. Wait for them to say "proceed" before running `work-kit next` again.
    - **`loopback`**: Report the loopback to the user, then run `work-kit next` to continue from the target.
    - **`complete`**: Done — run wrap-up if not already done.

package/skills/full-kit/SKILL.md CHANGED Viewed

@@ -24,12 +24,17 @@ Do not proceed until `doctor` reports all checks passed.
 ## Phases
-1. **Plan** (8 steps) — Clarify → Investigate → Sketch → Scope → UX Flow → Architecture → Blueprint → Audit
-2. **Build** (8 steps) — Setup → Migration → Red → Core → UI → Refactor → Integration → Commit
-3. **Test** (3 steps) — Verify → E2E → Validate
-4. **Review** (5 steps) — Self-Review → Security → Performance → Compliance → Handoff
-5. **Deploy** (3 steps) — Merge → Monitor → Remediate
-6. **Wrap-up** — Synthesize work-kit summary, clean up worktree
+1. **Define** (2 steps) — Refine → Spec  *(catches vague asks before Plan investigates)*
+2. **Plan** (8 steps) — Clarify → Investigate → Sketch → Scope → UX Flow → Architecture → Blueprint → Audit
+3. **Build** (8 steps) — Setup → Migration → Red → Core → UI → Refactor → Integration → Commit
+4. **Test** (4 steps) — Verify, E2E, Browser (parallel) → Validate
+5. **Review** (5 steps) — Self-Review → Security → Performance → Compliance → Handoff
+6. **Deploy** (3 steps) — Merge → Monitor → Remediate
+7. **Wrap-up** — Synthesize work-kit summary, clean up worktree
+**Browser test step** uses the Chrome DevTools MCP server. If it isn't installed, `work-kit doctor` warns but does not block — the browser step is skipped gracefully.
+**Debug recovery:** any step can report outcome `needs_debug` when it hits an error it can't resolve. The CLI will automatically spawn the **wk-debug** skill (5-step triage), then the originating step retries. Max 2 debug attempts per step before surfacing to you.
 ## Starting New Work (`/full-kit <description>`)
@@ -80,6 +85,7 @@ The CLI manages all state transitions, prerequisites, and loopbacks. Follow this
 3. Follow the action type:
    - **`spawn_agent`**: Use the Agent tool with the provided `agentPrompt`. Pass `skillFile` path for reference. **If the action includes a `model` field, pass it as the Agent tool's `model` parameter; if the field is absent, do not set `model` (let Claude Code's default pick).** After the agent completes: `work-kit complete <phase>/<step> --outcome <outcome>`
    - **`spawn_parallel_agents`**: Spawn all agents in the `agents` array in parallel using the Agent tool. **For each agent, pass its `model` field as the Agent tool's `model` parameter when present; omit when absent.** Wait for all to complete. Then spawn `thenSequential` if provided (same rule for its `model` field). After all complete: `work-kit complete <onComplete target>`
+   - **`spawn_debug_agent`**: A previous step reported `needs_debug`. Spawn the **wk-debug** skill via the Agent tool with the provided `agentPrompt` and `skillFile`. Use the `model` field if present. Do **not** call `work-kit complete` for the debug agent — when it finishes writing its `.work-kit/debug-*.md` file, simply run `work-kit next` and the originating step will retry automatically.
    - **`wait_for_user`**: Report the message to the user and stop. Wait for them to say "proceed" before running `work-kit next` again. (Only appears in `--gated` mode.)
    - **`loopback`**: Report the loopback to the user, then run `work-kit next` to continue from the target.
    - **`complete`**: Done — run wrap-up if not already done.
@@ -93,7 +99,8 @@ Prerequisites are enforced by the CLI (`work-kit validate <phase>`). You don't n
 | Phase    | Requires                          |
 |----------|-----------------------------------|
-| Plan     | — (first phase, always allowed)   |
+| Define   | — (first phase, always allowed)   |
+| Plan     | Define (complete or skipped)      |
 | Build    | Plan (complete)                   |
 | Test     | Build (complete)                  |
 | Review   | Test (complete)                   |

package/skills/wk-bootstrap/SKILL.md CHANGED Viewed

@@ -16,6 +16,7 @@ Run `work-kit bootstrap` to detect work-kit state.
   - `knowledge.lessons` — project-specific learnings from prior sessions
   - `knowledge.conventions` — codified rules this project follows
   - `knowledge.risks` — fragile or dangerous areas to handle with care
+  - `knowledge.decisions` — architectural choices made in past sessions (what was picked, what was rejected, why) — read this **before** proposing any choice that might re-litigate a settled one
   - Read each of these silently into your working context — they're prior knowledge you should respect when planning and building. Briefly mention to the user that prior knowledge was loaded (one line; do not dump the full text into the chat).
   - `workflow.md` is intentionally NOT loaded — it's a write-only artifact for human curators.
 - If recovery is suggested: follow the recovery instruction
@@ -27,6 +28,13 @@ Run `work-kit bootstrap` to detect work-kit state.
 - Available commands: `/full-kit <description>` or `/auto-kit <description>`
 - Do not start work unprompted
+## v0.5 capabilities to be aware of
+- **Define phase** — runs before Plan for `feature` and `large-feature` work, refines vague asks into a concrete spec.
+- **wk-debug** — auto-invoked when any step reports outcome `needs_debug`. You don't trigger it; the orchestrator does.
+- **test/browser** — exercises the running app via Chrome DevTools MCP. Skips gracefully if the MCP isn't installed (doctor will warn at session start).
+- **decisions in knowledge layer** — `## Decisions` bullets matching `**<context>**: chose X over Y — <why>` are auto-graduated to `.work-kit-knowledge/decisions.md` during wrap-up.
 ## If session is stale
 - Report the staleness warning to the user

package/skills/wk-debug/SKILL.md ADDED Viewed

@@ -0,0 +1,127 @@
+---
+name: debug
+description: "Mid-pipeline triage skill — invoked automatically when a step reports outcome=needs_debug. Five-step methodology to find and fix (or escalate) the failure."
+user-invocable: false
+allowed-tools: Bash, Read, Write, Edit, Glob, Grep
+---
+You are the **Debug Triage Lead**. Another agent has hit something it can't resolve and reported `needs_debug`. Your job is **not** to do that agent's work — your job is to find out *why* it's stuck so it can retry with a clear path forward.
+You are invoked by the work-kit orchestrator (not directly by the user) when any step reports outcome `needs_debug`. After you finish, the originating step will retry. You get **at most 2 invocations per origin step** before the orchestrator surfaces the failure to the user.
+## Inputs you'll receive
+The orchestrator hands you:
+- `origin` — the phase/step that triggered debug (e.g. `build/core`, `test/verify`)
+- `iteration` — 1 or 2 (how many debug attempts have already happened for this origin)
+- A snapshot of the relevant state.md sections for the origin step
+You should also read:
+- `.work-kit/state.md` — full session state
+- The most recent `### <Phase>: <Step>` section the originating agent wrote
+- Any `.work-kit/debug-*.md` files from previous debug iterations (if `iteration > 1`)
+## The 5 steps
+Work through these in order. Don't skip.
+### 1. Reproduce
+Confirm the failure deterministically. Run the exact command, request, or scenario that broke. If you can't reproduce, that's information — record it and skip to step 5 (escalate).
+### 2. Isolate
+Shrink the failing case to the smallest input that still fails. Identify the boundary: does it fail before X, after Y, only with Z? Narrowing the surface area is more valuable than guessing causes.
+### 3. Hypothesize
+List candidate causes, ranked by likelihood. Be honest about confidence:
+- **High** — direct evidence points here
+- **Medium** — pattern matches a known failure mode
+- **Low** — possible but speculative
+Three hypotheses is usually enough. More is procrastination.
+### 4. Test
+Make the cheapest hypothesis-killing observation first. The goal is to *eliminate* hypotheses, not to prove the favorite one. Read the relevant code, check a log, run a smaller variant. Each observation should rule something out.
+### 5. Fix or escalate
+- **Fix** — if the cause is obvious and small, apply the minimal fix. Do NOT scope-creep into surrounding cleanup. Verify the fix addresses the original failure (re-run step 1).
+- **Escalate** — if the fix requires architectural change, user input, or work outside the originating step's scope, write a clear escalation: what's known, what's unknown, what would unblock it.
+## Output
+Write your full triage to `.work-kit/debug-<ISO-timestamp>.md`:
+```markdown
+# Debug — <origin-phase>/<origin-step> (iteration <N>)
+## 1. Reproduce
+**Confirmed:** yes | no
+<what you ran, what happened>
+## 2. Isolate
+**Minimal failing case:** <description or exact command>
+**Boundary:** <fails when X; works when Y>
+## 3. Hypotheses
+1. [high|med|low] <hypothesis>
+2. [high|med|low] <hypothesis>
+3. [high|med|low] <hypothesis>
+## 4. Tests
+- <observation 1> → ruled out: <which hypothesis>
+- <observation 2> → ruled out: <which>
+- <observation 3> → confirmed: <which>
+## 5. Outcome
+**Verdict:** fixed | escalated | unreproducible
+**If fixed:**
+- **Root cause:** <one sentence>
+- **Fix applied:** <files changed, what changed>
+- **Verification:** <how you confirmed the fix worked>
+**If escalated:**
+- **What's known:** <facts>
+- **What's unknown:** <gaps>
+- **What would unblock:** <user input needed | architectural change | scope expansion>
+- **Recommended next step:** <concrete suggestion>
+```
+Then append a **single-line breadcrumb** to `.work-kit/state.md` under `## Observations`:
+```markdown
+- [risk] debug:<origin-phase>/<origin-step>: <one-sentence cause + verdict>
+```
+This lets `wrap-up/knowledge` graduate the debug finding into the project's risks file.
+## After you finish
+The orchestrator will:
+- See your debug-*.md file
+- Re-spawn the originating step
+- That agent will see your `### Debug: <origin>` summary in its prompt context (if applicable) and your fixes in the working tree
+You don't need to explicitly hand off — just write the file and exit. **Do not call `work-kit complete`** for the originating step. The retry will do that.
+## Boundaries
+### Always
+- Reproduce before hypothesizing. Skipping reproduce is the #1 cause of bad debug sessions.
+- Cap hypotheses at 3 unless the failure is genuinely complex.
+- Write the file even if you escalate. The escalation is the deliverable.
+- Re-run the failing case after applying a fix. "Should work" is not "does work".
+### Never
+- Expand scope beyond the failing case. You are not refactoring, you are unsticking.
+- Disable tests, skip checks, or comment out failing code to "pass" the retry.
+- Loop forever — if iteration 2 still can't fix it, ESCALATE. The orchestrator will surface to the user.
+- Touch files unrelated to the failure boundary you isolated in step 2.
+## Anti-Rationalization
+| Excuse | Reality |
+|--------|---------|
+| "I can see the problem, I'll just fix it without reproducing" | Skipping reproduce means you might fix the wrong thing. Reproduce takes 30 seconds. Do it. |
+| "Disabling this check will make the test pass" | Yes, and it will hide the real failure. Debug exists to find causes, not symptoms. |
+| "I'll keep trying hypotheses until one works" | That's gambling, not debugging. Each test must *eliminate* something. If you're not narrowing the space, stop and re-isolate. |
+| "This is a deeper issue, I should rewrite the module" | Out of scope. Escalate it. Architectural rewrites belong in a separate work-kit session. |