work-kit-cli 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +8 -1
  2. package/cli/src/commands/bootstrap.test.ts +1 -1
  3. package/cli/src/commands/bootstrap.ts +22 -14
  4. package/cli/src/commands/complete.ts +76 -2
  5. package/cli/src/commands/doctor.ts +51 -2
  6. package/cli/src/commands/extract.ts +47 -25
  7. package/cli/src/commands/init.test.ts +3 -1
  8. package/cli/src/commands/init.ts +22 -15
  9. package/cli/src/commands/learn.test.ts +2 -2
  10. package/cli/src/commands/learn.ts +2 -1
  11. package/cli/src/config/agent-map.ts +10 -2
  12. package/cli/src/config/constants.ts +7 -0
  13. package/cli/src/config/loopback-routes.ts +6 -0
  14. package/cli/src/config/model-routing.ts +7 -1
  15. package/cli/src/config/workflow.ts +12 -6
  16. package/cli/src/index.ts +2 -2
  17. package/cli/src/state/helpers.test.ts +1 -1
  18. package/cli/src/state/schema.ts +11 -4
  19. package/cli/src/state/validators.test.ts +21 -2
  20. package/cli/src/state/validators.ts +2 -2
  21. package/cli/src/utils/knowledge.ts +7 -1
  22. package/cli/src/workflow/gates.ts +1 -0
  23. package/cli/src/workflow/parallel.ts +6 -1
  24. package/cli/src/workflow/transitions.test.ts +2 -2
  25. package/package.json +2 -2
  26. package/skills/auto-kit/SKILL.md +8 -1
  27. package/skills/full-kit/SKILL.md +14 -7
  28. package/skills/wk-bootstrap/SKILL.md +8 -0
  29. package/skills/wk-debug/SKILL.md +127 -0
  30. package/skills/wk-define/SKILL.md +87 -0
  31. package/skills/wk-define/steps/refine.md +71 -0
  32. package/skills/wk-define/steps/spec.md +70 -0
  33. package/skills/wk-plan/steps/architecture.md +16 -0
  34. package/skills/wk-test/steps/browser.md +92 -0
  35. package/skills/wk-test/steps/e2e.md +45 -23
  36. package/skills/wk-wrap-up/steps/knowledge.md +8 -3
package/README.md CHANGED
@@ -1,6 +1,13 @@
1
1
  # work-kit
2
2
 
3
- Structured development workflow for [Claude Code](https://claude.com/claude-code). Two modes, 6 phases, 27 steps — orchestrated by a TypeScript CLI with reusable skill files.
3
+ Structured development workflow for [Claude Code](https://claude.com/claude-code). Two modes, 7 phases, 31 steps, plus auto-debug recovery — orchestrated by a TypeScript CLI with reusable skill files.
4
+
5
+ ## What's new in v0.5
6
+
7
+ - **Define phase** runs before Plan to refine vague asks into a concrete spec (auto-skipped for bug fixes/refactors).
8
+ - **wk-debug** triage skill auto-fires when any step reports `needs_debug`, then the originating step retries (max 2 iterations). Not user-invocable — fires from inside the pipeline.
9
+ - **`test/browser`** drives the running app via Chrome DevTools MCP and verifies user-facing acceptance criteria in a real browser. Skips gracefully if the MCP isn't installed.
10
+ - **`decision` knowledge type** auto-graduates `## Decisions` bullets into `.work-kit-knowledge/decisions.md` so future sessions don't re-litigate settled choices.
4
11
 
5
12
  ## Installation
6
13
 
@@ -48,7 +48,7 @@ describe("bootstrapCommand", () => {
48
48
  assert.equal(result.slug, "test-feature");
49
49
  assert.equal(result.mode, "full-kit");
50
50
  assert.equal(result.status, "in-progress");
51
- assert.equal(result.phase, "plan");
51
+ assert.equal(result.phase, "define");
52
52
  assert.equal(result.recovery, null);
53
53
  });
54
54
 
@@ -2,12 +2,13 @@ import fs from "node:fs";
2
2
  import { findWorktreeRoot, readState, writeState, statePath } from "../state/store.js";
3
3
  import { unpause } from "../state/helpers.js";
4
4
  import { CLI_BINARY, STALE_THRESHOLD_MS } from "../config/constants.js";
5
- import { fileForType, readKnowledgeFile } from "../utils/knowledge.js";
5
+ import { fileForType, readKnowledgeFile, KNOWLEDGE_TYPES, type KnowledgeType } from "../utils/knowledge.js";
6
6
 
7
7
  export interface BootstrapKnowledge {
8
8
  lessons?: string;
9
9
  conventions?: string;
10
10
  risks?: string;
11
+ decisions?: string;
11
12
  }
12
13
 
13
14
  export interface BootstrapResult {
@@ -24,8 +25,8 @@ export interface BootstrapResult {
24
25
  nextAction?: string;
25
26
  recovery?: string | null;
26
27
  /**
27
- * Project-level knowledge files (lessons/conventions/risks) read from
28
- * <mainRepoRoot>/.work-kit-knowledge/. Capped at 200 lines per file.
28
+ * Project-level knowledge files (lessons/conventions/risks/decisions) read
29
+ * from <mainRepoRoot>/.work-kit-knowledge/. Capped at 200 lines per file.
29
30
  * workflow.md is intentionally excluded — it's a write-only artifact for
30
31
  * human curators, not session context.
31
32
  */
@@ -89,26 +90,33 @@ export function bootstrapCommand(startDir?: string, options: BootstrapOptions =
89
90
  nextAction = `Continue ${state.currentPhase ?? "next phase"}${state.currentStep ? "/" + state.currentStep : ""}. Run \`${CLI_BINARY} next\` to get the agent prompt.`;
90
91
  }
91
92
 
92
- // Load project-level knowledge files (best effort, never breaks bootstrap).
93
93
  // workflow.md is intentionally excluded — it's a write-only artifact for
94
94
  // human curators, not session context.
95
+ const INJECTED_TYPES: KnowledgeType[] = KNOWLEDGE_TYPES.filter(
96
+ (t) => t !== "workflow"
97
+ ) as KnowledgeType[];
98
+ // Map each knowledge type to its plural field name on BootstrapKnowledge.
99
+ const TYPE_TO_FIELD: Record<Exclude<KnowledgeType, "workflow">, keyof BootstrapKnowledge> = {
100
+ lesson: "lessons",
101
+ convention: "conventions",
102
+ risk: "risks",
103
+ decision: "decisions",
104
+ };
105
+
95
106
  let knowledge: BootstrapKnowledge | undefined;
96
107
  try {
97
108
  const mainRepoRoot = state.metadata?.mainRepoRoot;
98
109
  if (mainRepoRoot) {
99
- const lessons = readKnowledgeFile(mainRepoRoot, fileForType("lesson"));
100
- const conventions = readKnowledgeFile(mainRepoRoot, fileForType("convention"));
101
- const risks = readKnowledgeFile(mainRepoRoot, fileForType("risk"));
102
- if (lessons || conventions || risks) {
103
- knowledge = {
104
- ...(lessons && { lessons }),
105
- ...(conventions && { conventions }),
106
- ...(risks && { risks }),
107
- };
110
+ const collected: BootstrapKnowledge = {};
111
+ for (const type of INJECTED_TYPES) {
112
+ const content = readKnowledgeFile(mainRepoRoot, fileForType(type));
113
+ if (content) {
114
+ collected[TYPE_TO_FIELD[type as Exclude<KnowledgeType, "workflow">]] = content;
115
+ }
108
116
  }
117
+ if (Object.keys(collected).length > 0) knowledge = collected;
109
118
  }
110
119
  } catch (err: any) {
111
- // Non-fatal: log to stderr but don't break bootstrap
112
120
  process.stderr.write(`work-kit: failed to load knowledge files: ${err.message}\n`);
113
121
  }
114
122
 
@@ -5,9 +5,12 @@ import { isPhaseComplete, nextStepInPhase } from "../workflow/transitions.js";
5
5
  import { checkLoopback, countLoopbacksForRoute } from "../workflow/loopbacks.js";
6
6
  import { PHASE_ORDER } from "../config/workflow.js";
7
7
  import { parseLocation, resetToLocation } from "../state/helpers.js";
8
- import { TRACKER_DIR, ARCHIVE_DIR, INDEX_FILE, SUMMARY_FILE, MAX_LOOPBACKS_PER_ROUTE, CLI_BINARY } from "../config/constants.js";
9
- import { isStepOutcome, STEP_OUTCOMES, type Action, type PhaseName, type StepOutcome, type WorkKitState } from "../state/schema.js";
8
+ import { TRACKER_DIR, ARCHIVE_DIR, INDEX_FILE, SUMMARY_FILE, MAX_LOOPBACKS_PER_ROUTE, MAX_DEBUG_ITERATIONS, SKILL_DIR_PREFIX, CLI_BINARY } from "../config/constants.js";
9
+ import { isStepOutcome, STEP_OUTCOMES, type Action, type Location, type PhaseName, type StepOutcome, type StepState, type WorkKitState } from "../state/schema.js";
10
10
  import { stateMdPath } from "../state/store.js";
11
+ import { resolveModel } from "../config/model-routing.js";
12
+
13
+ const DEBUG_SKILL_FILE = `.claude/skills/${SKILL_DIR_PREFIX}debug/SKILL.md`;
11
14
 
12
15
  export function completeCommand(target: string, outcome?: string, worktreeRoot?: string): Action {
13
16
  const root = worktreeRoot || findWorktreeRoot();
@@ -50,6 +53,10 @@ export function completeCommand(target: string, outcome?: string, worktreeRoot?:
50
53
  return { action: "error", message: `${phase}/${step} is skipped and cannot be completed. Add it to the workflow first.` };
51
54
  }
52
55
 
56
+ if (typedOutcome === "needs_debug") {
57
+ return handleNeedsDebug(root, state, stepState, { phase, step });
58
+ }
59
+
53
60
  stepState.status = "completed";
54
61
  stepState.completedAt = new Date().toISOString();
55
62
  if (typedOutcome) {
@@ -143,6 +150,73 @@ export function completeCommand(target: string, outcome?: string, worktreeRoot?:
143
150
 
144
151
  // ── Archive on completion ──────────────────────────────────────────
145
152
 
153
+ /**
154
+ * Divert a step that reported `needs_debug` into the wk-debug skill. The
155
+ * originating step stays in-progress so the next `next()` call retries it
156
+ * after the debug agent finishes. Bails to `wait_for_user` once the per-step
157
+ * iteration cap is reached.
158
+ */
159
+ function handleNeedsDebug(
160
+ root: string,
161
+ state: WorkKitState,
162
+ stepState: StepState,
163
+ origin: Location
164
+ ): Action {
165
+ const debugCount = state.loopbacks.filter(
166
+ (lb) => lb.kind === "debug" && lb.from.phase === origin.phase && lb.from.step === origin.step
167
+ ).length;
168
+
169
+ if (debugCount >= MAX_DEBUG_ITERATIONS) {
170
+ writeState(root, state);
171
+ return {
172
+ action: "wait_for_user",
173
+ message: `${origin.phase}/${origin.step} reported needs_debug but max debug iterations (${MAX_DEBUG_ITERATIONS}) reached. Surface to user — manual intervention required.`,
174
+ };
175
+ }
176
+
177
+ const iteration = debugCount + 1;
178
+ state.loopbacks.push({
179
+ from: origin,
180
+ to: origin,
181
+ reason: `Step reported needs_debug — invoking wk-debug (iteration ${iteration})`,
182
+ timestamp: new Date().toISOString(),
183
+ kind: "debug",
184
+ });
185
+ stepState.status = "in-progress";
186
+ delete stepState.outcome;
187
+ delete stepState.completedAt;
188
+ writeState(root, state);
189
+
190
+ const agentPrompt = [
191
+ `# Debug Triage`,
192
+ ``,
193
+ `**Origin:** ${origin.phase}/${origin.step}`,
194
+ `**Iteration:** ${iteration} of ${MAX_DEBUG_ITERATIONS}`,
195
+ `**Worktree:** ${root}`,
196
+ ``,
197
+ `## Instructions`,
198
+ `Read and follow the skill file: \`${DEBUG_SKILL_FILE}\``,
199
+ ``,
200
+ `The originating step (${origin.phase}/${origin.step}) hit something it cannot resolve.`,
201
+ `Read \`.work-kit/state.md\` and the originating agent's working notes for that step.`,
202
+ ``,
203
+ `Run the 5-step triage methodology. Write your full report to \`.work-kit/debug-<ISO-timestamp>.md\`.`,
204
+ `Do NOT call \`work-kit complete\` for the originating step — when you finish, the orchestrator will re-run \`work-kit next\` and the originating step will retry automatically.`,
205
+ ].join("\n");
206
+
207
+ const debugModel = resolveModel(state, origin.phase, origin.step);
208
+
209
+ return {
210
+ action: "spawn_debug_agent",
211
+ origin,
212
+ iteration,
213
+ skillFile: DEBUG_SKILL_FILE,
214
+ agentPrompt,
215
+ onComplete: `${CLI_BINARY} next`,
216
+ ...(debugModel && { model: debugModel }),
217
+ };
218
+ }
219
+
146
220
  function archiveFolderName(slug: string, completedAt: string): string {
147
221
  return `${slug}-${completedAt.split("T")[0]}`;
148
222
  }
@@ -1,5 +1,6 @@
1
1
  import * as fs from "node:fs";
2
2
  import * as path from "node:path";
3
+ import * as os from "node:os";
3
4
  import { execFileSync } from "node:child_process";
4
5
  import { findWorktreeRoot, readState, stateExists } from "../state/store.js";
5
6
 
@@ -35,7 +36,7 @@ export function doctorCommand(worktreeRoot?: string): { ok: boolean; checks: Che
35
36
  }
36
37
 
37
38
  // 3. Phase skill files
38
- const phases = ["wk-plan", "wk-build", "wk-test", "wk-review", "wk-deploy", "wk-wrap-up"];
39
+ const phases = ["wk-define", "wk-plan", "wk-build", "wk-test", "wk-review", "wk-deploy", "wk-wrap-up", "wk-debug"];
39
40
  let phasesMissing = 0;
40
41
  for (const phase of phases) {
41
42
  const phasePath = path.join(skillsDir, phase, "SKILL.md");
@@ -49,6 +50,26 @@ export function doctorCommand(worktreeRoot?: string): { ok: boolean; checks: Che
49
50
  checks.push({ name: "skill:phases", status: "fail", message: `${phasesMissing} phase skill(s) missing from ${skillsDir}` });
50
51
  }
51
52
 
53
+ // 3b. Chrome DevTools MCP availability (used by test/browser).
54
+ // Warn-only: if missing, the browser step skips itself but the rest of the
55
+ // pipeline runs unaffected.
56
+ const cdpMcpAvailable = detectChromeDevtoolsMcp();
57
+ if (cdpMcpAvailable === "yes") {
58
+ checks.push({ name: "mcp:chrome-devtools", status: "pass", message: "Chrome DevTools MCP detected" });
59
+ } else if (cdpMcpAvailable === "unknown") {
60
+ checks.push({
61
+ name: "mcp:chrome-devtools",
62
+ status: "warn",
63
+ message: "Chrome DevTools MCP could not be detected. The test/browser step will be skipped if invoked.",
64
+ });
65
+ } else {
66
+ checks.push({
67
+ name: "mcp:chrome-devtools",
68
+ status: "warn",
69
+ message: "Chrome DevTools MCP not configured. test/browser will skip — install the MCP server to enable live browser verification.",
70
+ });
71
+ }
72
+
52
73
  // 4. Git available
53
74
  try {
54
75
  const gitVersion = execFileSync("git", ["--version"], { encoding: "utf-8" }).trim();
@@ -62,7 +83,7 @@ export function doctorCommand(worktreeRoot?: string): { ok: boolean; checks: Che
62
83
  if (root && stateExists(root)) {
63
84
  try {
64
85
  const state = readState(root);
65
- if (state.version === 2 && state.slug && state.status) {
86
+ if (state.version === 3 && state.slug && state.status) {
66
87
  checks.push({ name: "state", status: "pass", message: `Active work-kit: "${state.slug}" (${state.status})` });
67
88
  } else {
68
89
  checks.push({ name: "state", status: "warn", message: "tracker.json exists but has unexpected structure" });
@@ -77,3 +98,31 @@ export function doctorCommand(worktreeRoot?: string): { ok: boolean; checks: Che
77
98
  const ok = checks.every((c) => c.status !== "fail");
78
99
  return { ok, checks };
79
100
  }
101
+
102
+ /**
103
+ * Best-effort detection of the Chrome DevTools MCP server. We can't call MCP
104
+ * tools from the CLI, so we scan the most common config trails for any
105
+ * chrome-devtools-flavored server entry. Returns "yes" on a hit, "no" if a
106
+ * config exists but doesn't mention it, and "unknown" if no config exists.
107
+ */
108
+ function detectChromeDevtoolsMcp(): "yes" | "no" | "unknown" {
109
+ const claudeDir = path.join(os.homedir(), ".claude");
110
+ const candidates = [
111
+ path.join(claudeDir, "settings.json"),
112
+ path.join(claudeDir, "mcp.json"),
113
+ path.join(process.cwd(), ".mcp.json"),
114
+ ];
115
+
116
+ let sawAny = false;
117
+ for (const file of candidates) {
118
+ let raw: string;
119
+ try {
120
+ raw = fs.readFileSync(file, "utf-8");
121
+ } catch {
122
+ continue; // missing or unreadable — skip
123
+ }
124
+ sawAny = true;
125
+ if (/chrome[-_]?devtools/i.test(raw)) return "yes";
126
+ }
127
+ return sawAny ? "no" : "unknown";
128
+ }
@@ -43,47 +43,69 @@ function emptyByType(): Record<KnowledgeType, number> {
43
43
  const OBSERVATION_RE = /^-\s*\[([a-z]+)(?::([a-z0-9-]+\/[a-z0-9-]+))?\]\s*(.+)$/i;
44
44
 
45
45
  /**
46
- * Walk state.md once and emit raw entries from the three sections we know:
47
- * Observations (typed bullets), Decisions (any bullet convention),
48
- * Deviations (any bullet workflow with [deviation] prefix).
46
+ * A bullet under `## Decisions` is harvested when it follows the documented
47
+ * shape `**<context>**: chose <X> over <Y> <why>`. Free-form lines are
48
+ * skipped (not errors). The leading `**context**:` becomes the entry's title.
49
+ */
50
+ const DECISION_RE = /^-\s*\*\*([^*]+)\*\*\s*:\s*(.+)$/;
51
+
52
+ /**
53
+ * Walk state.md once and emit raw entries from:
54
+ * - `## Observations` — typed bullets (`- [lesson|convention|risk|workflow|decision] text`)
55
+ * - `## Decisions` — bullets matching `**<context>**: chose X over Y — <why>`
56
+ *
57
+ * `## Deviations` stays scratch — agents routinely dump test plans there.
49
58
  */
50
59
  function parseStateMd(stateMd: string): RawEntry[] {
51
60
  const out: RawEntry[] = [];
52
61
  if (!stateMd) return out;
53
62
 
54
- // Only `## Observations` is auto-harvested. `## Decisions` and `## Deviations`
55
- // are agent scratch space during normal phase work — they routinely contain
56
- // test plans, acceptance-criteria checklists, and self-review dumps. Auto-
57
- // routing them floods workflow.md with noise. Agents opt into harvesting by
58
- // writing typed bullets (`- [lesson|convention|risk|workflow] text`) under
59
- // `## Observations`.
60
- let inObservations = false;
63
+ type Section = "observations" | "decisions" | "other";
64
+ let section: Section = "other";
61
65
 
62
66
  for (const rawLine of stateMd.split("\n")) {
63
67
  const trimmed = rawLine.trim();
64
68
 
65
69
  if (trimmed.startsWith("## ")) {
66
- inObservations = trimmed.slice(3).trim().toLowerCase() === "observations";
70
+ const heading = trimmed.slice(3).trim().toLowerCase();
71
+ if (heading === "observations") section = "observations";
72
+ else if (heading === "decisions") section = "decisions";
73
+ else section = "other";
67
74
  continue;
68
75
  }
69
76
 
70
- if (!inObservations) continue;
77
+ if (section === "other") continue;
71
78
  if (!trimmed.startsWith("-") || trimmed.startsWith("<!--")) continue;
72
79
 
73
- const m = trimmed.match(OBSERVATION_RE);
74
- if (!m) continue;
75
- const tag = m[1].toLowerCase();
76
- if (!isKnowledgeType(tag)) continue;
77
- const phaseStep = m[2];
78
- const text = m[3].trim();
79
- if (text.length === 0) continue;
80
- const entry: RawEntry = { type: tag, text, source: "auto-state-md" };
81
- if (phaseStep) {
82
- const [p, s] = phaseStep.split("/");
83
- entry.phase = p;
84
- entry.step = s;
80
+ if (section === "observations") {
81
+ const m = trimmed.match(OBSERVATION_RE);
82
+ if (!m) continue;
83
+ const tag = m[1].toLowerCase();
84
+ if (!isKnowledgeType(tag)) continue;
85
+ const phaseStep = m[2];
86
+ const text = m[3].trim();
87
+ if (text.length === 0) continue;
88
+ const entry: RawEntry = { type: tag, text, source: "auto-state-md" };
89
+ if (phaseStep) {
90
+ const [p, s] = phaseStep.split("/");
91
+ entry.phase = p;
92
+ entry.step = s;
93
+ }
94
+ out.push(entry);
95
+ continue;
85
96
  }
86
- out.push(entry);
97
+
98
+ // section === "decisions"
99
+ const m = trimmed.match(DECISION_RE);
100
+ if (!m) continue;
101
+ const context = m[1].trim();
102
+ const rationale = m[2].trim();
103
+ if (context.length === 0 || rationale.length === 0) continue;
104
+ out.push({
105
+ type: "decision",
106
+ text: `**${context}**: ${rationale}`,
107
+ source: "auto-state-md",
108
+ });
87
109
  }
88
110
 
89
111
  return out;
@@ -40,7 +40,9 @@ describe("initCommand", () => {
40
40
  );
41
41
  assert.equal(state.slug, "add-user-login");
42
42
  assert.equal(state.status, "in-progress");
43
- assert.equal(state.currentPhase, "plan");
43
+ assert.equal(state.currentPhase, "define");
44
+ assert.equal(state.currentStep, "refine");
45
+ assert.equal(state.version, 3);
44
46
  });
45
47
 
46
48
  it("returns spawn_agent action", () => {
@@ -44,7 +44,16 @@ function buildPhases(workflow?: WorkflowStep[]): Record<PhaseName, PhaseState> {
44
44
  return phases;
45
45
  }
46
46
 
47
- function generateStateMd(slug: string, branch: string, mode: string, description: string, classification?: string, workflow?: WorkflowStep[]): string {
47
+ function generateStateMd(
48
+ slug: string,
49
+ branch: string,
50
+ mode: string,
51
+ description: string,
52
+ firstPhase: string,
53
+ firstStep: string,
54
+ classification?: string,
55
+ workflow?: WorkflowStep[]
56
+ ): string {
48
57
  const title = slug.replace(/-/g, " ").replace(/\b\w/g, (c) => c.toUpperCase());
49
58
  const date = new Date().toISOString().split("T")[0];
50
59
 
@@ -60,8 +69,8 @@ function generateStateMd(slug: string, branch: string, mode: string, description
60
69
  md += `**Classification:** ${classification}\n`;
61
70
  }
62
71
 
63
- md += `**Phase:** plan
64
- **Step:** clarify
72
+ md += `**Phase:** ${firstPhase}
73
+ **Step:** ${firstStep}
65
74
  **Status:** in-progress
66
75
 
67
76
  ## Description
@@ -182,21 +191,19 @@ export function initCommand(options: {
182
191
  workflow = buildFullWorkflow();
183
192
  }
184
193
 
185
- // Find first active step
186
- let firstPhase: PhaseName = "plan";
187
- let firstStep = "clarify";
188
-
189
- if (workflow) {
190
- const first = workflow.find((s) => s.included);
191
- if (first) {
192
- firstPhase = first.phase;
193
- firstStep = first.step;
194
- }
194
+ // First active step is always the first `included` entry in the workflow.
195
+ // For full-kit that's define/refine; for auto-kit it depends on classification.
196
+ let firstPhase: PhaseName = "define";
197
+ let firstStep = "refine";
198
+ const first = workflow?.find((s) => s.included);
199
+ if (first) {
200
+ firstPhase = first.phase;
201
+ firstStep = first.step;
195
202
  }
196
203
 
197
204
  // Build state
198
205
  const state: WorkKitState = {
199
- version: 2,
206
+ version: 3,
200
207
  slug,
201
208
  branch,
202
209
  started: new Date().toISOString(),
@@ -221,7 +228,7 @@ export function initCommand(options: {
221
228
 
222
229
  // Write state files
223
230
  writeState(worktreeRoot, state);
224
- writeStateMd(worktreeRoot, generateStateMd(slug, branch, modeLabel, description, classification, workflow));
231
+ writeStateMd(worktreeRoot, generateStateMd(slug, branch, modeLabel, description, firstPhase, firstStep, classification, workflow));
225
232
 
226
233
  const model = resolveModel(state, firstPhase, firstStep);
227
234
 
@@ -146,8 +146,8 @@ describe("learnCommand", () => {
146
146
  path.join(tmp, KNOWLEDGE_DIR, "lessons.md"),
147
147
  "utf-8"
148
148
  );
149
- // Init starts at plan/clarify
150
- assert.ok(content.includes("plan/clarify"));
149
+ // Full-kit init now starts at define/refine (Define is the new first phase)
150
+ assert.ok(content.includes("define/refine"));
151
151
  });
152
152
 
153
153
  it("extracts typed bullets from state.md ## Observations", () => {
@@ -4,6 +4,7 @@ import {
4
4
  ensureKnowledgeDir,
5
5
  fileForType,
6
6
  isKnowledgeType,
7
+ KNOWLEDGE_TYPES,
7
8
  redact,
8
9
  type KnowledgeEntry,
9
10
  type KnowledgeType,
@@ -36,7 +37,7 @@ export function learnCommand(opts: LearnOptions): LearnResult {
36
37
  if (!opts.type || !isKnowledgeType(opts.type)) {
37
38
  return {
38
39
  action: "error",
39
- message: `Invalid --type "${opts.type}". Must be one of: lesson, convention, risk, workflow.`,
40
+ message: `Invalid --type "${opts.type}". Must be one of: ${KNOWLEDGE_TYPES.join(", ")}.`,
40
41
  };
41
42
  }
42
43
  if (!opts.text || opts.text.trim().length === 0) {
@@ -12,8 +12,11 @@ export interface AgentContext {
12
12
 
13
13
  // Phase-level context (what the phase runner agent reads)
14
14
  export const PHASE_CONTEXT: Record<PhaseName, AgentContext> = {
15
+ define: {
16
+ sections: ["## Description"],
17
+ },
15
18
  plan: {
16
- sections: ["## Description", "## Criteria"],
19
+ sections: ["## Description", "### Define: Final", "## Criteria"],
17
20
  },
18
21
  build: {
19
22
  sections: ["### Plan: Final", "## Criteria", "## Description"],
@@ -34,10 +37,15 @@ export const PHASE_CONTEXT: Record<PhaseName, AgentContext> = {
34
37
 
35
38
  // Step-level context (for parallel sub-agents that need specific sections)
36
39
  export const STEP_CONTEXT: Record<string, AgentContext> = {
40
+ // Define steps
41
+ "define/refine": { sections: ["## Description"] },
42
+ "define/spec": { sections: ["## Description", "### Define: Refine"] },
43
+
37
44
  // Test steps
38
45
  "test/verify": { sections: ["### Build: Final", "## Criteria"] },
46
+ "test/browser": { sections: ["### Build: Final", "## Criteria", "### Plan: UX Flow"] },
39
47
  "test/e2e": { sections: ["### Build: Final", "### Plan: Final"] },
40
- "test/validate": { sections: ["### Test: Verify", "### Test: E2E", "## Criteria"] },
48
+ "test/validate": { sections: ["### Test: Verify", "### Test: Browser", "### Test: E2E", "## Criteria"] },
41
49
 
42
50
  // Review steps
43
51
  "review/self-review": { sections: ["### Build: Final"], needsGitDiff: true },
@@ -48,6 +48,13 @@ export const KNOWLEDGE_LOCK = ".lock";
48
48
 
49
49
  export const MAX_LOOPBACKS_PER_ROUTE = 2;
50
50
 
51
+ /**
52
+ * Max times wk-debug can be invoked for the same originating step before the
53
+ * orchestrator surfaces the failure to the user. Tracked separately from
54
+ * standard loopbacks via `LoopbackRecord.kind === "debug"`.
55
+ */
56
+ export const MAX_DEBUG_ITERATIONS = 2;
57
+
51
58
  // ── Staleness ───────────────────────────────────────────────────────
52
59
 
53
60
  /** Threshold (ms) after which an in-progress state is considered stale. */
@@ -12,6 +12,12 @@ export interface LoopbackRoute {
12
12
  }
13
13
 
14
14
  export const LOOPBACK_ROUTES: LoopbackRoute[] = [
15
+ {
16
+ from: { phase: "define", step: "spec" },
17
+ triggerOutcome: "revise",
18
+ to: { phase: "define", step: "refine" },
19
+ reason: "Spec found ambiguity — looping back to Refine",
20
+ },
15
21
  {
16
22
  from: { phase: "plan", step: "audit" },
17
23
  triggerOutcome: "revise",
@@ -32,6 +32,7 @@ const HARD_DEFAULT: ModelTier = "sonnet";
32
32
  // ── Phase defaults ──────────────────────────────────────────────────
33
33
 
34
34
  export const BY_PHASE: Record<PhaseName, ModelTier> = {
35
+ define: "opus",
35
36
  plan: "sonnet",
36
37
  build: "sonnet",
37
38
  test: "sonnet",
@@ -43,6 +44,10 @@ export const BY_PHASE: Record<PhaseName, ModelTier> = {
43
44
  // ── Step-level overrides (phase/step keys) ──────────────────────────
44
45
 
45
46
  export const BY_STEP: Record<string, ModelTier> = {
47
+ // Define — refining a vague ask is reasoning-heavy
48
+ "define/refine": "opus",
49
+ "define/spec": "sonnet",
50
+
46
51
  // Plan — research/design-heavy steps benefit from opus
47
52
  "plan/clarify": "sonnet",
48
53
  "plan/investigate": "opus",
@@ -63,8 +68,9 @@ export const BY_STEP: Record<string, ModelTier> = {
63
68
  "build/integration": "sonnet",
64
69
  "build/commit": "haiku",
65
70
 
66
- // Test — verify is mechanical, e2e/validate need judgment
71
+ // Test — verify is mechanical, browser/e2e/validate need judgment
67
72
  "test/verify": "haiku",
73
+ "test/browser": "sonnet",
68
74
  "test/e2e": "sonnet",
69
75
  "test/validate": "sonnet",
70
76