work-kit-cli 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +8 -1
  2. package/cli/src/commands/bootstrap.test.ts +1 -1
  3. package/cli/src/commands/bootstrap.ts +22 -14
  4. package/cli/src/commands/complete.ts +76 -2
  5. package/cli/src/commands/doctor.ts +51 -2
  6. package/cli/src/commands/extract.ts +30 -18
  7. package/cli/src/commands/init.test.ts +3 -1
  8. package/cli/src/commands/init.ts +22 -15
  9. package/cli/src/commands/learn.test.ts +29 -2
  10. package/cli/src/commands/learn.ts +2 -1
  11. package/cli/src/commands/setup.ts +17 -1
  12. package/cli/src/config/agent-map.ts +10 -2
  13. package/cli/src/config/constants.ts +7 -0
  14. package/cli/src/config/loopback-routes.ts +6 -0
  15. package/cli/src/config/model-routing.ts +7 -1
  16. package/cli/src/config/workflow.ts +12 -6
  17. package/cli/src/index.ts +2 -2
  18. package/cli/src/state/helpers.test.ts +1 -1
  19. package/cli/src/state/schema.ts +11 -4
  20. package/cli/src/state/validators.test.ts +21 -2
  21. package/cli/src/state/validators.ts +2 -2
  22. package/cli/src/utils/knowledge.ts +7 -1
  23. package/cli/src/workflow/gates.ts +1 -0
  24. package/cli/src/workflow/parallel.ts +6 -1
  25. package/cli/src/workflow/transitions.test.ts +2 -2
  26. package/package.json +2 -2
  27. package/skills/auto-kit/SKILL.md +8 -1
  28. package/skills/full-kit/SKILL.md +14 -7
  29. package/skills/wk-bootstrap/SKILL.md +8 -0
  30. package/skills/wk-debug/SKILL.md +127 -0
  31. package/skills/wk-define/SKILL.md +87 -0
  32. package/skills/wk-define/steps/refine.md +71 -0
  33. package/skills/wk-define/steps/spec.md +70 -0
  34. package/skills/wk-plan/steps/architecture.md +16 -0
  35. package/skills/wk-test/steps/browser.md +92 -0
  36. package/skills/wk-test/steps/e2e.md +45 -23
  37. package/skills/wk-wrap-up/SKILL.md +1 -1
  38. package/skills/wk-wrap-up/steps/knowledge.md +9 -4
@@ -14,7 +14,8 @@ export const PHASE_ORDER: PhaseName[] = [...PHASE_NAMES];
14
14
  // ── Prerequisites ───────────────────────────────────────────────────
15
15
 
16
16
  export const PHASE_PREREQUISITES: Record<PhaseName, PhaseName | null> = {
17
- plan: null,
17
+ define: null,
18
+ plan: "define", // when define is fully skipped, validators treat the skipped phase as satisfied
18
19
  build: "plan",
19
20
  test: "build",
20
21
  review: "test",
@@ -36,55 +37,60 @@ type InclusionRule = "YES" | "skip" | "if UI" | "if DB" | "optional";
36
37
 
37
38
  const WORKFLOW_MATRIX: Record<Classification, Record<string, InclusionRule>> = {
38
39
  "bug-fix": {
40
+ "define/refine": "skip", "define/spec": "skip",
39
41
  "plan/clarify": "YES", "plan/investigate": "YES", "plan/sketch": "skip", "plan/scope": "skip",
40
42
  "plan/ux-flow": "skip", "plan/architecture": "skip", "plan/blueprint": "skip", "plan/audit": "skip",
41
43
  "build/setup": "skip", "build/migration": "skip", "build/red": "YES", "build/core": "YES",
42
44
  "build/ui": "if UI", "build/refactor": "skip", "build/integration": "skip", "build/commit": "YES",
43
- "test/verify": "YES", "test/e2e": "skip", "test/validate": "YES",
45
+ "test/verify": "YES", "test/browser": "skip", "test/e2e": "skip", "test/validate": "YES",
44
46
  "review/self-review": "YES", "review/security": "skip", "review/performance": "skip",
45
47
  "review/compliance": "skip", "review/handoff": "YES",
46
48
  "deploy/merge": "YES", "deploy/monitor": "optional", "deploy/remediate": "optional",
47
49
  "wrap-up/summary": "YES", "wrap-up/knowledge": "skip",
48
50
  },
49
51
  "small-change": {
52
+ "define/refine": "skip", "define/spec": "skip",
50
53
  "plan/clarify": "YES", "plan/investigate": "skip", "plan/sketch": "skip", "plan/scope": "skip",
51
54
  "plan/ux-flow": "skip", "plan/architecture": "skip", "plan/blueprint": "skip", "plan/audit": "skip",
52
55
  "build/setup": "skip", "build/migration": "skip", "build/red": "skip", "build/core": "YES",
53
56
  "build/ui": "if UI", "build/refactor": "skip", "build/integration": "skip", "build/commit": "YES",
54
- "test/verify": "YES", "test/e2e": "skip", "test/validate": "skip",
57
+ "test/verify": "YES", "test/browser": "skip", "test/e2e": "skip", "test/validate": "skip",
55
58
  "review/self-review": "YES", "review/security": "skip", "review/performance": "skip",
56
59
  "review/compliance": "skip", "review/handoff": "YES",
57
60
  "deploy/merge": "YES", "deploy/monitor": "optional", "deploy/remediate": "optional",
58
61
  "wrap-up/summary": "YES", "wrap-up/knowledge": "skip",
59
62
  },
60
63
  refactor: {
64
+ "define/refine": "skip", "define/spec": "skip",
61
65
  "plan/clarify": "YES", "plan/investigate": "YES", "plan/sketch": "skip", "plan/scope": "skip",
62
66
  "plan/ux-flow": "skip", "plan/architecture": "skip", "plan/blueprint": "skip", "plan/audit": "skip",
63
67
  "build/setup": "skip", "build/migration": "skip", "build/red": "skip", "build/core": "YES",
64
68
  "build/ui": "if UI", "build/refactor": "YES", "build/integration": "skip", "build/commit": "YES",
65
- "test/verify": "YES", "test/e2e": "skip", "test/validate": "skip",
69
+ "test/verify": "YES", "test/browser": "skip", "test/e2e": "skip", "test/validate": "skip",
66
70
  "review/self-review": "YES", "review/security": "skip", "review/performance": "YES",
67
71
  "review/compliance": "skip", "review/handoff": "YES",
68
72
  "deploy/merge": "YES", "deploy/monitor": "optional", "deploy/remediate": "optional",
69
73
  "wrap-up/summary": "YES", "wrap-up/knowledge": "YES",
70
74
  },
71
75
  feature: {
76
+ "define/refine": "YES", "define/spec": "YES",
72
77
  "plan/clarify": "YES", "plan/investigate": "YES", "plan/sketch": "YES", "plan/scope": "YES",
73
78
  "plan/ux-flow": "if UI", "plan/architecture": "YES", "plan/blueprint": "YES", "plan/audit": "skip",
74
79
  "build/setup": "YES", "build/migration": "if DB", "build/red": "YES", "build/core": "YES",
75
80
  "build/ui": "if UI", "build/refactor": "skip", "build/integration": "YES", "build/commit": "YES",
76
- "test/verify": "YES", "test/e2e": "if UI", "test/validate": "YES",
81
+ "test/verify": "YES", "test/browser": "if UI", "test/e2e": "if UI", "test/validate": "YES",
77
82
  "review/self-review": "YES", "review/security": "YES", "review/performance": "skip",
78
83
  "review/compliance": "YES", "review/handoff": "YES",
79
84
  "deploy/merge": "YES", "deploy/monitor": "optional", "deploy/remediate": "optional",
80
85
  "wrap-up/summary": "YES", "wrap-up/knowledge": "YES",
81
86
  },
82
87
  "large-feature": {
88
+ "define/refine": "YES", "define/spec": "YES",
83
89
  "plan/clarify": "YES", "plan/investigate": "YES", "plan/sketch": "YES", "plan/scope": "YES",
84
90
  "plan/ux-flow": "if UI", "plan/architecture": "YES", "plan/blueprint": "YES", "plan/audit": "YES",
85
91
  "build/setup": "YES", "build/migration": "if DB", "build/red": "YES", "build/core": "YES",
86
92
  "build/ui": "if UI", "build/refactor": "YES", "build/integration": "YES", "build/commit": "YES",
87
- "test/verify": "YES", "test/e2e": "YES", "test/validate": "YES",
93
+ "test/verify": "YES", "test/browser": "if UI", "test/e2e": "YES", "test/validate": "YES",
88
94
  "review/self-review": "YES", "review/security": "YES", "review/performance": "YES",
89
95
  "review/compliance": "YES", "review/handoff": "YES",
90
96
  "deploy/merge": "YES", "deploy/monitor": "optional", "deploy/remediate": "optional",
package/cli/src/index.ts CHANGED
@@ -361,8 +361,8 @@ program
361
361
 
362
362
  program
363
363
  .command("learn")
364
- .description("Append a knowledge entry (lesson/convention/risk/workflow) to .work-kit-knowledge/")
365
- .requiredOption("--type <type>", "Entry type: lesson, convention, risk, workflow")
364
+ .description("Append a knowledge entry (lesson/convention/risk/workflow/decision) to .work-kit-knowledge/")
365
+ .requiredOption("--type <type>", "Entry type: lesson, convention, risk, workflow, decision")
366
366
  .requiredOption("--text <text>", "Free-form text. Secrets are auto-redacted at write time.")
367
367
  .option("--scope <glob>", "Optional path glob (stored, not yet used for filtering)")
368
368
  .option("--phase <phase>", "Override session phase auto-fill")
@@ -14,7 +14,7 @@ function makeState(): WorkKitState {
14
14
  phases[phase] = { status: "pending", steps };
15
15
  }
16
16
  return {
17
- version: 2,
17
+ version: 3,
18
18
  slug: "test",
19
19
  branch: "feature/test",
20
20
  started: "2026-01-01",
@@ -1,15 +1,17 @@
1
1
  // ── Phase & Step Types ──────────────────────────────────────────────
2
2
 
3
- export const PHASE_NAMES = ["plan", "build", "test", "review", "deploy", "wrap-up"] as const;
3
+ export const PHASE_NAMES = ["define", "plan", "build", "test", "review", "deploy", "wrap-up"] as const;
4
4
  export type PhaseName = (typeof PHASE_NAMES)[number];
5
5
 
6
+ export const DEFINE_STEPS = ["refine", "spec"] as const;
6
7
  export const PLAN_STEPS = ["clarify", "investigate", "sketch", "scope", "ux-flow", "architecture", "blueprint", "audit"] as const;
7
8
  export const BUILD_STEPS = ["setup", "migration", "red", "core", "ui", "refactor", "integration", "commit"] as const;
8
- export const TEST_STEPS = ["verify", "e2e", "validate"] as const;
9
+ export const TEST_STEPS = ["verify", "e2e", "browser", "validate"] as const;
9
10
  export const REVIEW_STEPS = ["self-review", "security", "performance", "compliance", "handoff"] as const;
10
11
  export const DEPLOY_STEPS = ["merge", "monitor", "remediate"] as const;
11
12
  export const WRAPUP_STEPS = ["summary", "knowledge"] as const;
12
13
 
14
+ export type DefineStep = (typeof DEFINE_STEPS)[number];
13
15
  export type PlanStep = (typeof PLAN_STEPS)[number];
14
16
  export type BuildStep = (typeof BUILD_STEPS)[number];
15
17
  export type TestStep = (typeof TEST_STEPS)[number];
@@ -17,9 +19,10 @@ export type ReviewStep = (typeof REVIEW_STEPS)[number];
17
19
  export type DeployStep = (typeof DEPLOY_STEPS)[number];
18
20
  export type WrapUpStep = (typeof WRAPUP_STEPS)[number];
19
21
 
20
- export type StepName = PlanStep | BuildStep | TestStep | ReviewStep | DeployStep | WrapUpStep;
22
+ export type StepName = DefineStep | PlanStep | BuildStep | TestStep | ReviewStep | DeployStep | WrapUpStep;
21
23
 
22
24
  export const STEPS_BY_PHASE: Record<PhaseName, readonly string[]> = {
25
+ define: DEFINE_STEPS,
23
26
  plan: PLAN_STEPS,
24
27
  build: BUILD_STEPS,
25
28
  test: TEST_STEPS,
@@ -87,6 +90,7 @@ export const STEP_OUTCOMES = [
87
90
  "changes_requested", // review handoff requested changes
88
91
  "fix_needed", // deploy merge blocked, fix required
89
92
  "fix_and_redeploy", // remediation requires another deploy cycle
93
+ "needs_debug", // step hit an error it can't resolve — invoke wk-debug, then return
90
94
  "blocked", // step cannot proceed without external input
91
95
  "skipped", // step intentionally skipped at runtime
92
96
  ] as const;
@@ -127,6 +131,8 @@ export interface LoopbackRecord {
127
131
  to: Location;
128
132
  reason: string;
129
133
  timestamp: string;
134
+ /** "debug" loopbacks are virtual: the agent spawns wk-debug then retries the same step. */
135
+ kind?: "standard" | "debug";
130
136
  }
131
137
 
132
138
  // ── Workflow (auto-kit) ─────────────────────────────────────────────
@@ -144,7 +150,7 @@ export type WorkStatus = "in-progress" | "paused" | "completed" | "failed";
144
150
  // ── Main State ──────────────────────────────────────────────────────
145
151
 
146
152
  export interface WorkKitState {
147
- version: 2;
153
+ version: 3;
148
154
  slug: string;
149
155
  branch: string;
150
156
  started: string;
@@ -182,6 +188,7 @@ export interface AgentSpec {
182
188
  export type Action =
183
189
  | { action: "spawn_agent"; phase: PhaseName; step: string; skillFile: string; agentPrompt: string; onComplete: string; model?: ModelTier }
184
190
  | { action: "spawn_parallel_agents"; agents: AgentSpec[]; thenSequential?: AgentSpec; onComplete: string }
191
+ | { action: "spawn_debug_agent"; origin: Location; iteration: number; skillFile: string; agentPrompt: string; onComplete: string; model?: ModelTier }
185
192
  | { action: "wait_for_user"; message: string }
186
193
  | { action: "loopback"; from: Location; to: Location; reason: string }
187
194
  | { action: "complete"; message: string }
@@ -14,7 +14,7 @@ function makeState(): WorkKitState {
14
14
  phases[phase] = { status: "pending", steps };
15
15
  }
16
16
  return {
17
- version: 2,
17
+ version: 3,
18
18
  slug: "test",
19
19
  branch: "feature/test",
20
20
  started: "2026-01-01",
@@ -38,14 +38,32 @@ function completePhase(state: WorkKitState, phase: PhaseName): void {
38
38
  }
39
39
 
40
40
  describe("validatePhasePrerequisites", () => {
41
- it("plan has no prerequisites — valid", () => {
41
+ it("define has no prerequisites — valid", () => {
42
42
  const state = makeState();
43
+ const result = validatePhasePrerequisites(state, "define");
44
+ assert.equal(result.valid, true);
45
+ });
46
+
47
+ it("plan with define complete — valid", () => {
48
+ const state = makeState();
49
+ completePhase(state, "define");
50
+ const result = validatePhasePrerequisites(state, "plan");
51
+ assert.equal(result.valid, true);
52
+ });
53
+
54
+ it("plan with define skipped — valid (skipped satisfies prerequisite)", () => {
55
+ const state = makeState();
56
+ state.phases.define.status = "skipped";
57
+ for (const s of Object.values(state.phases.define.steps)) {
58
+ s.status = "skipped";
59
+ }
43
60
  const result = validatePhasePrerequisites(state, "plan");
44
61
  assert.equal(result.valid, true);
45
62
  });
46
63
 
47
64
  it("build with plan incomplete — invalid", () => {
48
65
  const state = makeState();
66
+ completePhase(state, "define");
49
67
  const result = validatePhasePrerequisites(state, "build");
50
68
  assert.equal(result.valid, false);
51
69
  assert.equal(result.missingPrerequisite, "plan");
@@ -53,6 +71,7 @@ describe("validatePhasePrerequisites", () => {
53
71
 
54
72
  it("build with plan complete — valid", () => {
55
73
  const state = makeState();
74
+ completePhase(state, "define");
56
75
  completePhase(state, "plan");
57
76
  const result = validatePhasePrerequisites(state, "build");
58
77
  assert.equal(result.valid, true);
@@ -66,9 +66,9 @@ export function validatePhasePrerequisites(state: WorkKitState, phase: PhaseName
66
66
  return { valid: true, message: "Prerequisites met for deploy" };
67
67
  }
68
68
 
69
- // General case
69
+ // General case: completed OR fully skipped both satisfy the prerequisite.
70
70
  const prereqState = state.phases[prereq];
71
- if (prereqState.status !== "completed") {
71
+ if (prereqState.status !== "completed" && prereqState.status !== "skipped") {
72
72
  return {
73
73
  valid: false,
74
74
  message: `${phase} requires ${prereq} to be complete. Current: ${prereqState.status}`,
@@ -13,7 +13,7 @@ export const AUTO_BLOCK_START = "<!-- work-kit:auto:start -->";
13
13
  export const AUTO_BLOCK_END = "<!-- work-kit:auto:end -->";
14
14
  export const MANUAL_HEADER = "## Manual";
15
15
 
16
- export const KNOWLEDGE_TYPES = ["lesson", "convention", "risk", "workflow"] as const;
16
+ export const KNOWLEDGE_TYPES = ["lesson", "convention", "risk", "workflow", "decision"] as const;
17
17
  export type KnowledgeType = (typeof KNOWLEDGE_TYPES)[number];
18
18
 
19
19
  export function isKnowledgeType(value: string): value is KnowledgeType {
@@ -25,6 +25,7 @@ const TYPE_TO_FILE: Record<KnowledgeType, string> = {
25
25
  convention: "conventions.md",
26
26
  risk: "risks.md",
27
27
  workflow: "workflow.md",
28
+ decision: "decisions.md",
28
29
  };
29
30
 
30
31
  const FILE_TO_TITLE: Record<string, string> = {
@@ -32,6 +33,7 @@ const FILE_TO_TITLE: Record<string, string> = {
32
33
  "conventions.md": "Conventions",
33
34
  "risks.md": "Risks",
34
35
  "workflow.md": "Workflow Feedback",
36
+ "decisions.md": "Decisions",
35
37
  };
36
38
 
37
39
  const FILE_TO_BLURB: Record<string, string> = {
@@ -43,6 +45,8 @@ const FILE_TO_BLURB: Record<string, string> = {
43
45
  "Known fragile or dangerous areas. Touch these with care.",
44
46
  "workflow.md":
45
47
  "Feedback about the work-kit workflow itself as observed in this project — skill quality, step skips, loopbacks, failure modes. Mined manually to improve work-kit upstream.",
48
+ "decisions.md":
49
+ "Architectural and design decisions made during work-kit sessions: what was chosen, what was rejected, why. Format mirrors a lightweight ADR — read these before re-litigating a settled choice.",
46
50
  };
47
51
 
48
52
  // ── Path Resolvers ──────────────────────────────────────────────────
@@ -180,6 +184,8 @@ benefits.
180
184
  - **lessons.md** — things you learned about this codebase (project-specific).
181
185
  - **conventions.md** — codified rules this project follows.
182
186
  - **risks.md** — fragile or dangerous areas to handle with care.
187
+ - **decisions.md** — architectural choices made during sessions: what was
188
+ picked, what was rejected, why. Read before re-litigating a settled choice.
183
189
  - **workflow.md** — feedback about the work-kit workflow itself as observed
184
190
  in this project. Mined manually across projects to improve work-kit.
185
191
 
@@ -14,6 +14,7 @@ export const WAIT_AFTER_PHASE: Set<PhaseName> = new Set([
14
14
  // ── Phase Display Names ──────────────────────────────────────────────
15
15
 
16
16
  export const PHASE_DISPLAY_NAMES: Record<PhaseName, string> = {
17
+ define: "Define",
17
18
  plan: "Plan",
18
19
  build: "Build",
19
20
  test: "Test",
@@ -15,7 +15,12 @@ export interface ParallelGroup {
15
15
  */
16
16
  export const DEFAULT_PARALLEL_GROUPS: Record<string, ParallelGroup> = {
17
17
  test: {
18
- parallel: ["verify", "e2e"],
18
+ // verify (test suite), e2e (Playwright/etc), and browser (Chrome DevTools
19
+ // MCP) all run as independent observations. validate consolidates them.
20
+ // browser is auto-skipped on non-UI classifications, so the parallel set
21
+ // shrinks naturally. browser.md is responsible for graceful behavior if
22
+ // the dev server is shared with e2e.
23
+ parallel: ["verify", "e2e", "browser"],
19
24
  thenSequential: "validate",
20
25
  },
21
26
  review: {
@@ -14,7 +14,7 @@ function makeState(): WorkKitState {
14
14
  phases[phase] = { status: "pending", steps };
15
15
  }
16
16
  return {
17
- version: 2,
17
+ version: 3,
18
18
  slug: "test",
19
19
  branch: "feature/test",
20
20
  started: "2026-01-01",
@@ -91,7 +91,7 @@ describe("determineNextStep", () => {
91
91
  state.currentPhase = null;
92
92
  const result = determineNextStep(state);
93
93
  assert.equal(result.type, "phase-boundary");
94
- assert.equal(result.phase, "plan");
94
+ assert.equal(result.phase, "define");
95
95
  });
96
96
 
97
97
  it("returns step for current phase with pending work", () => {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "work-kit-cli",
3
- "version": "0.4.0",
4
- "description": "Structured development workflow for Claude Code. Two modes, 6 phases, 27 steps.",
3
+ "version": "0.5.0",
4
+ "description": "Structured development workflow for Claude Code. Two modes, 7 phases, 31 steps, plus debug recovery.",
5
5
  "type": "module",
6
6
  "bin": {
7
7
  "work-kit": "cli/bin/work-kit.mjs",
@@ -26,13 +26,16 @@ Do not proceed until `doctor` reports all checks passed.
26
26
 
27
27
  These are the building blocks you pick from:
28
28
 
29
+ - **Define:** Refine, Spec *(included for `feature` and `large-feature` only)*
29
30
  - **Plan:** Clarify, Investigate, Sketch, Scope, UX Flow, Architecture, Blueprint, Audit
30
31
  - **Build:** Setup, Migration, Red, Core, UI, Refactor, Integration, Commit
31
- - **Test:** Verify, E2E, Validate
32
+ - **Test:** Verify, E2E, Browser, Validate *(Browser uses Chrome DevTools MCP, included for `if UI`)*
32
33
  - **Review:** Self-Review, Security, Performance, Compliance, Handoff
33
34
  - **Deploy:** Merge, Monitor, Remediate (optional)
34
35
  - **Wrap-up**
35
36
 
37
+ **Debug recovery:** any step can report outcome `needs_debug`. The CLI auto-spawns the **wk-debug** skill (5-step triage), then the originating step retries. Max 2 debug attempts per step.
38
+
36
39
  ## Starting New Work (`/auto-kit <description>`)
37
40
 
38
41
  ### Step 1: Analyze
@@ -55,6 +58,8 @@ Based on the classification, select steps. Use this table as a starting point, t
55
58
 
56
59
  | Step | bug-fix | small-change | refactor | feature | large-feature |
57
60
  |------------------------|---------|--------------|----------|---------|---------------|
61
+ | **Define: Refine** | skip | skip | skip | YES | YES |
62
+ | **Define: Spec** | skip | skip | skip | YES | YES |
58
63
  | **Plan: Clarify** | YES | YES | YES | YES | YES |
59
64
  | **Plan: Investigate** | YES | skip | YES | YES | YES |
60
65
  | **Plan: Sketch** | skip | skip | skip | YES | YES |
@@ -73,6 +78,7 @@ Based on the classification, select steps. Use this table as a starting point, t
73
78
  | **Build: Commit** | YES | YES | YES | YES | YES |
74
79
  | **Test: Verify** | YES | YES | YES | YES | YES |
75
80
  | **Test: E2E** | skip | skip | skip | if UI | YES |
81
+ | **Test: Browser** | skip | skip | skip | if UI | if UI |
76
82
  | **Test: Validate** | YES | skip | skip | YES | YES |
77
83
  | **Review: Self-Review**| YES | YES | YES | YES | YES |
78
84
  | **Review: Security** | skip | skip | skip | YES | YES |
@@ -205,6 +211,7 @@ The CLI manages all state transitions, prerequisites, and loopbacks. Follow this
205
211
  3. Follow the action type:
206
212
  - **`spawn_agent`**: Use the Agent tool with the provided `agentPrompt`. Pass `skillFile` path for reference. **If the action includes a `model` field, pass it as the Agent tool's `model` parameter; if the field is absent, do not set `model` (let Claude Code's default pick).** After the agent completes: `work-kit complete <phase>/<step> --outcome <outcome>`
207
213
  - **`spawn_parallel_agents`**: Spawn all agents in the `agents` array in parallel using the Agent tool. **For each agent, pass its `model` field as the Agent tool's `model` parameter when present; omit when absent.** Wait for all to complete. Then spawn `thenSequential` if provided (same rule for its `model` field). After all complete: `work-kit complete <onComplete target>`
214
+ - **`spawn_debug_agent`**: A previous step reported `needs_debug`. Spawn the **wk-debug** skill via the Agent tool with the provided `agentPrompt` and `skillFile`. Use the `model` field if present. Do **not** call `work-kit complete` for the debug agent — when it finishes, simply run `work-kit next` and the originating step will retry automatically.
208
215
  - **`wait_for_user`**: Report the message to the user and stop. Wait for them to say "proceed" before running `work-kit next` again.
209
216
  - **`loopback`**: Report the loopback to the user, then run `work-kit next` to continue from the target.
210
217
  - **`complete`**: Done — run wrap-up if not already done.
@@ -24,12 +24,17 @@ Do not proceed until `doctor` reports all checks passed.
24
24
 
25
25
  ## Phases
26
26
 
27
- 1. **Plan** (8 steps) — ClarifyInvestigate Sketch Scope → UX Flow → Architecture → Blueprint → Audit
28
- 2. **Build** (8 steps) — SetupMigrationRedCoreUIRefactorIntegrationCommit
29
- 3. **Test** (3 steps) — VerifyE2EValidate
30
- 4. **Review** (5 steps) — Self-Review Security Performance Compliance → Handoff
31
- 5. **Deploy** (3 steps) — MergeMonitorRemediate
32
- 6. **Wrap-up** — Synthesize work-kit summary, clean up worktree
27
+ 1. **Define** (2 steps) — RefineSpec *(catches vague asks before Plan investigates)*
28
+ 2. **Plan** (8 steps) — ClarifyInvestigateSketchScopeUX Flow ArchitectureBlueprintAudit
29
+ 3. **Build** (8 steps) — SetupMigrationRed → Core → UI → Refactor → Integration → Commit
30
+ 4. **Test** (4 steps) — Verify, E2E, Browser (parallel)Validate
31
+ 5. **Review** (5 steps) — Self-ReviewSecurityPerformance → Compliance → Handoff
32
+ 6. **Deploy** (3 steps) Merge Monitor Remediate
33
+ 7. **Wrap-up** — Synthesize work-kit summary, clean up worktree
34
+
35
+ **Browser test step** uses the Chrome DevTools MCP server. If it isn't installed, `work-kit doctor` warns but does not block — the browser step is skipped gracefully.
36
+
37
+ **Debug recovery:** any step can report outcome `needs_debug` when it hits an error it can't resolve. The CLI will automatically spawn the **wk-debug** skill (5-step triage), then the originating step retries. Max 2 debug attempts per step before surfacing to you.
33
38
 
34
39
  ## Starting New Work (`/full-kit <description>`)
35
40
 
@@ -80,6 +85,7 @@ The CLI manages all state transitions, prerequisites, and loopbacks. Follow this
80
85
  3. Follow the action type:
81
86
  - **`spawn_agent`**: Use the Agent tool with the provided `agentPrompt`. Pass `skillFile` path for reference. **If the action includes a `model` field, pass it as the Agent tool's `model` parameter; if the field is absent, do not set `model` (let Claude Code's default pick).** After the agent completes: `work-kit complete <phase>/<step> --outcome <outcome>`
82
87
  - **`spawn_parallel_agents`**: Spawn all agents in the `agents` array in parallel using the Agent tool. **For each agent, pass its `model` field as the Agent tool's `model` parameter when present; omit when absent.** Wait for all to complete. Then spawn `thenSequential` if provided (same rule for its `model` field). After all complete: `work-kit complete <onComplete target>`
88
+ - **`spawn_debug_agent`**: A previous step reported `needs_debug`. Spawn the **wk-debug** skill via the Agent tool with the provided `agentPrompt` and `skillFile`. Use the `model` field if present. Do **not** call `work-kit complete` for the debug agent — when it finishes writing its `.work-kit/debug-*.md` file, simply run `work-kit next` and the originating step will retry automatically.
83
89
  - **`wait_for_user`**: Report the message to the user and stop. Wait for them to say "proceed" before running `work-kit next` again. (Only appears in `--gated` mode.)
84
90
  - **`loopback`**: Report the loopback to the user, then run `work-kit next` to continue from the target.
85
91
  - **`complete`**: Done — run wrap-up if not already done.
@@ -93,7 +99,8 @@ Prerequisites are enforced by the CLI (`work-kit validate <phase>`). You don't n
93
99
 
94
100
  | Phase | Requires |
95
101
  |----------|-----------------------------------|
96
- | Plan | — (first phase, always allowed) |
102
+ | Define | — (first phase, always allowed) |
103
+ | Plan | Define (complete or skipped) |
97
104
  | Build | Plan (complete) |
98
105
  | Test | Build (complete) |
99
106
  | Review | Test (complete) |
@@ -16,6 +16,7 @@ Run `work-kit bootstrap` to detect work-kit state.
16
16
  - `knowledge.lessons` — project-specific learnings from prior sessions
17
17
  - `knowledge.conventions` — codified rules this project follows
18
18
  - `knowledge.risks` — fragile or dangerous areas to handle with care
19
+ - `knowledge.decisions` — architectural choices made in past sessions (what was picked, what was rejected, why) — read this **before** proposing any choice that might re-litigate a settled one
19
20
  - Read each of these silently into your working context — they're prior knowledge you should respect when planning and building. Briefly mention to the user that prior knowledge was loaded (one line; do not dump the full text into the chat).
20
21
  - `workflow.md` is intentionally NOT loaded — it's a write-only artifact for human curators.
21
22
  - If recovery is suggested: follow the recovery instruction
@@ -27,6 +28,13 @@ Run `work-kit bootstrap` to detect work-kit state.
27
28
  - Available commands: `/full-kit <description>` or `/auto-kit <description>`
28
29
  - Do not start work unprompted
29
30
 
31
+ ## v0.5 capabilities to be aware of
32
+
33
+ - **Define phase** — runs before Plan for `feature` and `large-feature` work, refines vague asks into a concrete spec.
34
+ - **wk-debug** — auto-invoked when any step reports outcome `needs_debug`. You don't trigger it; the orchestrator does.
35
+ - **test/browser** — exercises the running app via Chrome DevTools MCP. Skips gracefully if the MCP isn't installed (doctor will warn at session start).
36
+ - **decisions in knowledge layer** — `## Decisions` bullets matching `**<context>**: chose X over Y — <why>` are auto-graduated to `.work-kit-knowledge/decisions.md` during wrap-up.
37
+
30
38
  ## If session is stale
31
39
 
32
40
  - Report the staleness warning to the user
@@ -0,0 +1,127 @@
1
+ ---
2
+ name: debug
3
+ description: "Mid-pipeline triage skill — invoked automatically when a step reports outcome=needs_debug. Five-step methodology to find and fix (or escalate) the failure."
4
+ user-invocable: false
5
+ allowed-tools: Bash, Read, Write, Edit, Glob, Grep
6
+ ---
7
+
8
+ You are the **Debug Triage Lead**. Another agent has hit something it can't resolve and reported `needs_debug`. Your job is **not** to do that agent's work — your job is to find out *why* it's stuck so it can retry with a clear path forward.
9
+
10
+ You are invoked by the work-kit orchestrator (not directly by the user) when any step reports outcome `needs_debug`. After you finish, the originating step will retry. You get **at most 2 invocations per origin step** before the orchestrator surfaces the failure to the user.
11
+
12
+ ## Inputs you'll receive
13
+
14
+ The orchestrator hands you:
15
+ - `origin` — the phase/step that triggered debug (e.g. `build/core`, `test/verify`)
16
+ - `iteration` — 1 or 2 (how many debug attempts have already happened for this origin)
17
+ - A snapshot of the relevant state.md sections for the origin step
18
+
19
+ You should also read:
20
+ - `.work-kit/state.md` — full session state
21
+ - The most recent `### <Phase>: <Step>` section the originating agent wrote
22
+ - Any `.work-kit/debug-*.md` files from previous debug iterations (if `iteration > 1`)
23
+
24
+ ## The 5 steps
25
+
26
+ Work through these in order. Don't skip.
27
+
28
+ ### 1. Reproduce
29
+ Confirm the failure deterministically. Run the exact command, request, or scenario that broke. If you can't reproduce, that's information — record it and skip to step 5 (escalate).
30
+
31
+ ### 2. Isolate
32
+ Shrink the failing case to the smallest input that still fails. Identify the boundary: does it fail before X, after Y, only with Z? Narrowing the surface area is more valuable than guessing causes.
33
+
34
+ ### 3. Hypothesize
35
+ List candidate causes, ranked by likelihood. Be honest about confidence:
36
+ - **High** — direct evidence points here
37
+ - **Medium** — pattern matches a known failure mode
38
+ - **Low** — possible but speculative
39
+
40
+ Three hypotheses is usually enough. More is procrastination.
41
+
42
+ ### 4. Test
43
+ Make the cheapest hypothesis-killing observation first. The goal is to *eliminate* hypotheses, not to prove the favorite one. Read the relevant code, check a log, run a smaller variant. Each observation should rule something out.
44
+
45
+ ### 5. Fix or escalate
46
+ - **Fix** — if the cause is obvious and small, apply the minimal fix. Do NOT scope-creep into surrounding cleanup. Verify the fix addresses the original failure (re-run step 1).
47
+ - **Escalate** — if the fix requires architectural change, user input, or work outside the originating step's scope, write a clear escalation: what's known, what's unknown, what would unblock it.
48
+
49
+ ## Output
50
+
51
+ Write your full triage to `.work-kit/debug-<ISO-timestamp>.md`:
52
+
53
+ ```markdown
54
+ # Debug — <origin-phase>/<origin-step> (iteration <N>)
55
+
56
+ ## 1. Reproduce
57
+ **Confirmed:** yes | no
58
+ <what you ran, what happened>
59
+
60
+ ## 2. Isolate
61
+ **Minimal failing case:** <description or exact command>
62
+ **Boundary:** <fails when X; works when Y>
63
+
64
+ ## 3. Hypotheses
65
+ 1. [high|med|low] <hypothesis>
66
+ 2. [high|med|low] <hypothesis>
67
+ 3. [high|med|low] <hypothesis>
68
+
69
+ ## 4. Tests
70
+ - <observation 1> → ruled out: <which hypothesis>
71
+ - <observation 2> → ruled out: <which>
72
+ - <observation 3> → confirmed: <which>
73
+
74
+ ## 5. Outcome
75
+ **Verdict:** fixed | escalated | unreproducible
76
+
77
+ **If fixed:**
78
+ - **Root cause:** <one sentence>
79
+ - **Fix applied:** <files changed, what changed>
80
+ - **Verification:** <how you confirmed the fix worked>
81
+
82
+ **If escalated:**
83
+ - **What's known:** <facts>
84
+ - **What's unknown:** <gaps>
85
+ - **What would unblock:** <user input needed | architectural change | scope expansion>
86
+ - **Recommended next step:** <concrete suggestion>
87
+ ```
88
+
89
+ Then append a **single-line breadcrumb** to `.work-kit/state.md` under `## Observations`:
90
+
91
+ ```markdown
92
+ - [risk] debug:<origin-phase>/<origin-step>: <one-sentence cause + verdict>
93
+ ```
94
+
95
+ This lets `wrap-up/knowledge` graduate the debug finding into the project's risks file.
96
+
97
+ ## After you finish
98
+
99
+ The orchestrator will:
100
+ - See your debug-*.md file
101
+ - Re-spawn the originating step
102
+ - That agent will see your `### Debug: <origin>` summary in its prompt context (if applicable) and your fixes in the working tree
103
+
104
+ You don't need to explicitly hand off — just write the file and exit. **Do not call `work-kit complete`** for the originating step. The retry will do that.
105
+
106
+ ## Boundaries
107
+
108
+ ### Always
109
+ - Reproduce before hypothesizing. Skipping reproduce is the #1 cause of bad debug sessions.
110
+ - Cap hypotheses at 3 unless the failure is genuinely complex.
111
+ - Write the file even if you escalate. The escalation is the deliverable.
112
+ - Re-run the failing case after applying a fix. "Should work" is not "does work".
113
+
114
+ ### Never
115
+ - Expand scope beyond the failing case. You are not refactoring, you are unsticking.
116
+ - Disable tests, skip checks, or comment out failing code to "pass" the retry.
117
+ - Loop forever — if iteration 2 still can't fix it, ESCALATE. The orchestrator will surface to the user.
118
+ - Touch files unrelated to the failure boundary you isolated in step 2.
119
+
120
+ ## Anti-Rationalization
121
+
122
+ | Excuse | Reality |
123
+ |--------|---------|
124
+ | "I can see the problem, I'll just fix it without reproducing" | Skipping reproduce means you might fix the wrong thing. Reproduce takes 30 seconds. Do it. |
125
+ | "Disabling this check will make the test pass" | Yes, and it will hide the real failure. Debug exists to find causes, not symptoms. |
126
+ | "I'll keep trying hypotheses until one works" | That's gambling, not debugging. Each test must *eliminate* something. If you're not narrowing the space, stop and re-isolate. |
127
+ | "This is a deeper issue, I should rewrite the module" | Out of scope. Escalate it. Architectural rewrites belong in a separate work-kit session. |