work-kit-cli 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/cli/src/commands/bootstrap.test.ts +117 -0
  2. package/cli/src/commands/bootstrap.ts +65 -0
  3. package/cli/src/commands/complete.ts +27 -2
  4. package/cli/src/config/phases.ts +6 -8
  5. package/cli/src/context/extractor.ts +9 -4
  6. package/cli/src/context/prompt-builder.ts +2 -1
  7. package/cli/src/context/redactor.test.ts +111 -0
  8. package/cli/src/context/redactor.ts +38 -0
  9. package/cli/src/index.ts +17 -0
  10. package/cli/src/observer/renderer.ts +12 -11
  11. package/package.json +1 -1
  12. package/skills/auto-kit/SKILL.md +8 -4
  13. package/skills/full-kit/SKILL.md +8 -4
  14. package/skills/wk-bootstrap/SKILL.md +29 -0
  15. package/skills/wk-build/SKILL.md +24 -0
  16. package/skills/wk-build/stages/commit.md +8 -0
  17. package/skills/wk-build/stages/core.md +10 -0
  18. package/skills/wk-build/stages/integration.md +8 -0
  19. package/skills/wk-build/stages/red.md +10 -0
  20. package/skills/wk-build/stages/refactor.md +10 -0
  21. package/skills/wk-deploy/SKILL.md +20 -0
  22. package/skills/wk-deploy/stages/merge.md +9 -1
  23. package/skills/wk-plan/SKILL.md +23 -0
  24. package/skills/wk-plan/stages/audit.md +14 -0
  25. package/skills/wk-plan/stages/blueprint.md +8 -0
  26. package/skills/wk-plan/stages/clarify.md +8 -0
  27. package/skills/wk-plan/stages/investigate.md +10 -0
  28. package/skills/wk-plan/stages/scope.md +8 -0
  29. package/skills/wk-review/SKILL.md +22 -1
  30. package/skills/wk-review/stages/compliance.md +11 -3
  31. package/skills/wk-review/stages/handoff.md +11 -0
  32. package/skills/wk-review/stages/performance.md +1 -0
  33. package/skills/wk-review/stages/security.md +11 -0
  34. package/skills/wk-review/stages/self-review.md +11 -0
  35. package/skills/wk-test/SKILL.md +20 -0
  36. package/skills/wk-test/stages/e2e.md +9 -0
  37. package/skills/wk-test/stages/validate.md +9 -0
  38. package/skills/wk-test/stages/verify.md +9 -0
  39. package/skills/wk-wrap-up/SKILL.md +18 -0
@@ -0,0 +1,117 @@
1
+ import { describe, it, afterEach } from "node:test";
2
+ import * as assert from "node:assert/strict";
3
+ import * as fs from "node:fs";
4
+ import * as path from "node:path";
5
+ import * as os from "node:os";
6
+ import { randomUUID } from "node:crypto";
7
+ import { bootstrapCommand } from "./bootstrap.js";
8
+ import { initCommand } from "./init.js";
9
+
10
+ function makeTmpDir(): string {
11
+ const dir = path.join(os.tmpdir(), `work-kit-test-${randomUUID()}`);
12
+ fs.mkdirSync(dir, { recursive: true });
13
+ return dir;
14
+ }
15
+
16
+ let tmpDirs: string[] = [];
17
+
18
+ afterEach(() => {
19
+ for (const dir of tmpDirs) {
20
+ fs.rmSync(dir, { recursive: true, force: true });
21
+ }
22
+ tmpDirs = [];
23
+ });
24
+
25
+ describe("bootstrapCommand", () => {
26
+ it("returns inactive when no state exists", () => {
27
+ const tmp = makeTmpDir();
28
+ tmpDirs.push(tmp);
29
+
30
+ const result = bootstrapCommand(tmp);
31
+ assert.equal(result.active, false);
32
+ assert.ok(result.nextAction?.includes("/full-kit"));
33
+ });
34
+
35
+ it("returns active state after init", () => {
36
+ const tmp = makeTmpDir();
37
+ tmpDirs.push(tmp);
38
+
39
+ initCommand({
40
+ mode: "full",
41
+ description: "Test feature",
42
+ worktreeRoot: tmp,
43
+ });
44
+
45
+ const result = bootstrapCommand(tmp);
46
+ assert.equal(result.active, true);
47
+ assert.equal(result.slug, "test-feature");
48
+ assert.equal(result.mode, "full-kit");
49
+ assert.equal(result.status, "in-progress");
50
+ assert.equal(result.phase, "plan");
51
+ assert.equal(result.recovery, null);
52
+ });
53
+
54
+ it("detects stale state", () => {
55
+ const tmp = makeTmpDir();
56
+ tmpDirs.push(tmp);
57
+
58
+ initCommand({
59
+ mode: "full",
60
+ description: "Stale test",
61
+ worktreeRoot: tmp,
62
+ });
63
+
64
+ // Backdate the state.json file to 3 hours ago
65
+ const stateFile = path.join(tmp, ".work-kit", "state.json");
66
+ const threeHoursAgo = new Date(Date.now() - 3 * 60 * 60 * 1000);
67
+ fs.utimesSync(stateFile, threeHoursAgo, threeHoursAgo);
68
+
69
+ const result = bootstrapCommand(tmp);
70
+ assert.equal(result.active, true);
71
+ assert.ok(result.recovery !== null);
72
+ assert.ok(result.recovery?.includes("stale"));
73
+ });
74
+
75
+ it("reports completed state", () => {
76
+ const tmp = makeTmpDir();
77
+ tmpDirs.push(tmp);
78
+
79
+ initCommand({
80
+ mode: "full",
81
+ description: "Done feature",
82
+ worktreeRoot: tmp,
83
+ });
84
+
85
+ // Manually set status to completed
86
+ const stateFile = path.join(tmp, ".work-kit", "state.json");
87
+ const state = JSON.parse(fs.readFileSync(stateFile, "utf-8"));
88
+ state.status = "completed";
89
+ fs.writeFileSync(stateFile, JSON.stringify(state, null, 2));
90
+
91
+ const result = bootstrapCommand(tmp);
92
+ assert.equal(result.active, true);
93
+ assert.equal(result.status, "completed");
94
+ assert.ok(result.nextAction?.includes("complete"));
95
+ });
96
+
97
+ it("reports failed state", () => {
98
+ const tmp = makeTmpDir();
99
+ tmpDirs.push(tmp);
100
+
101
+ initCommand({
102
+ mode: "full",
103
+ description: "Failed feature",
104
+ worktreeRoot: tmp,
105
+ });
106
+
107
+ const stateFile = path.join(tmp, ".work-kit", "state.json");
108
+ const state = JSON.parse(fs.readFileSync(stateFile, "utf-8"));
109
+ state.status = "failed";
110
+ fs.writeFileSync(stateFile, JSON.stringify(state, null, 2));
111
+
112
+ const result = bootstrapCommand(tmp);
113
+ assert.equal(result.active, true);
114
+ assert.equal(result.status, "failed");
115
+ assert.ok(result.nextAction?.includes("failed"));
116
+ });
117
+ });
@@ -0,0 +1,65 @@
1
+ import fs from "node:fs";
2
+ import { findWorktreeRoot, readState, statePath } from "../state/store.js";
3
+
4
+ export interface BootstrapResult {
5
+ active: boolean;
6
+ slug?: string;
7
+ branch?: string;
8
+ mode?: string;
9
+ phase?: string | null;
10
+ subStage?: string | null;
11
+ status?: string;
12
+ nextAction?: string;
13
+ recovery?: string | null;
14
+ }
15
+
16
+ export function bootstrapCommand(startDir?: string): BootstrapResult {
17
+ const root = findWorktreeRoot(startDir);
18
+
19
+ if (!root) {
20
+ return {
21
+ active: false,
22
+ nextAction:
23
+ "No active work-kit session. Start one with /full-kit <description> or /auto-kit <description>.",
24
+ };
25
+ }
26
+
27
+ const state = readState(root);
28
+
29
+ // Check for staleness: if state file hasn't been modified in over 1 hour
30
+ let recovery: string | null = null;
31
+ try {
32
+ const stateFile = statePath(root);
33
+ const stat = fs.statSync(stateFile);
34
+ const hourAgo = Date.now() - 60 * 60 * 1000;
35
+ if (stat.mtimeMs < hourAgo) {
36
+ const hoursAgo = Math.round((Date.now() - stat.mtimeMs) / (60 * 60 * 1000));
37
+ recovery = `State appears stale (last update ~${hoursAgo}h ago). Run \`npx work-kit-cli status\` to diagnose. If the agent crashed mid-stage, run \`npx work-kit-cli next\` to resume.`;
38
+ }
39
+ } catch {
40
+ // Ignore stat errors
41
+ }
42
+
43
+ let nextAction: string;
44
+ if (state.status === "completed") {
45
+ nextAction = "Work-kit session is complete. Run wrap-up or start a new session.";
46
+ } else if (state.status === "failed") {
47
+ nextAction = "Work-kit session failed. Run `npx work-kit-cli status` to see details.";
48
+ } else if (recovery) {
49
+ nextAction = recovery;
50
+ } else {
51
+ nextAction = `Continue ${state.currentPhase ?? "next phase"}${state.currentSubStage ? "/" + state.currentSubStage : ""}. Run \`npx work-kit-cli next\` to get the agent prompt.`;
52
+ }
53
+
54
+ return {
55
+ active: true,
56
+ slug: state.slug,
57
+ branch: state.branch,
58
+ mode: state.mode,
59
+ phase: state.currentPhase,
60
+ subStage: state.currentSubStage,
61
+ status: state.status,
62
+ nextAction,
63
+ recovery,
64
+ };
65
+ }
@@ -1,7 +1,8 @@
1
1
  import * as fs from "node:fs";
2
2
  import * as path from "node:path";
3
+ import { execFileSync } from "node:child_process";
3
4
  import { readState, writeState, findWorktreeRoot, readStateMd } from "../state/store.js";
4
- import { isPhaseComplete } from "../engine/transitions.js";
5
+ import { isPhaseComplete, nextSubStageInPhase } from "../engine/transitions.js";
5
6
  import { checkLoopback } from "../engine/loopbacks.js";
6
7
  import { PHASE_ORDER } from "../config/phases.js";
7
8
  import { parseLocation, resetToLocation } from "../state/helpers.js";
@@ -117,6 +118,14 @@ export function completeCommand(target: string, outcome?: string, worktreeRoot?:
117
118
  };
118
119
  }
119
120
 
121
+ // Advance currentSubStage to the next pending sub-stage so the observer refreshes
122
+ const nextSS = nextSubStageInPhase(state, phase);
123
+ if (nextSS) {
124
+ state.currentSubStage = nextSS;
125
+ } else {
126
+ state.currentSubStage = null;
127
+ }
128
+
120
129
  writeState(root, state);
121
130
 
122
131
  return {
@@ -127,8 +136,24 @@ export function completeCommand(target: string, outcome?: string, worktreeRoot?:
127
136
 
128
137
  // ── Archive on completion ──────────────────────────────────────────
129
138
 
139
+ function resolveMainRepoRoot(worktreeRoot: string): string {
140
+ try {
141
+ // git worktree list --porcelain — first "worktree" line is always the main repo
142
+ const output = execFileSync("git", ["worktree", "list", "--porcelain"], {
143
+ cwd: worktreeRoot,
144
+ encoding: "utf-8",
145
+ timeout: 5000,
146
+ });
147
+ const firstLine = output.split("\n").find(l => l.startsWith("worktree "));
148
+ if (firstLine) return firstLine.slice("worktree ".length).trim();
149
+ } catch {
150
+ // fallback
151
+ }
152
+ return worktreeRoot;
153
+ }
154
+
130
155
  function archiveCompleted(worktreeRoot: string, state: WorkKitState): void {
131
- const mainRoot = state.metadata.mainRepoRoot || worktreeRoot;
156
+ const mainRoot = resolveMainRepoRoot(worktreeRoot);
132
157
  const date = new Date().toISOString().split("T")[0];
133
158
  const slug = state.slug;
134
159
  const wkDir = path.join(mainRoot, ".claude", "work-kit");
@@ -41,7 +41,7 @@ const WORKFLOW_MATRIX: Record<Classification, Record<string, InclusionRule>> = {
41
41
  "test/verify": "YES", "test/e2e": "skip", "test/validate": "YES",
42
42
  "review/self-review": "YES", "review/security": "skip", "review/performance": "skip",
43
43
  "review/compliance": "skip", "review/handoff": "YES",
44
- "deploy/merge": "optional", "deploy/monitor": "optional", "deploy/remediate": "optional",
44
+ "deploy/merge": "YES", "deploy/monitor": "optional", "deploy/remediate": "optional",
45
45
  "wrap-up/wrap-up": "YES",
46
46
  },
47
47
  "small-change": {
@@ -52,7 +52,7 @@ const WORKFLOW_MATRIX: Record<Classification, Record<string, InclusionRule>> = {
52
52
  "test/verify": "YES", "test/e2e": "skip", "test/validate": "skip",
53
53
  "review/self-review": "YES", "review/security": "skip", "review/performance": "skip",
54
54
  "review/compliance": "skip", "review/handoff": "YES",
55
- "deploy/merge": "optional", "deploy/monitor": "optional", "deploy/remediate": "optional",
55
+ "deploy/merge": "YES", "deploy/monitor": "optional", "deploy/remediate": "optional",
56
56
  "wrap-up/wrap-up": "YES",
57
57
  },
58
58
  refactor: {
@@ -63,7 +63,7 @@ const WORKFLOW_MATRIX: Record<Classification, Record<string, InclusionRule>> = {
63
63
  "test/verify": "YES", "test/e2e": "skip", "test/validate": "skip",
64
64
  "review/self-review": "YES", "review/security": "skip", "review/performance": "YES",
65
65
  "review/compliance": "skip", "review/handoff": "YES",
66
- "deploy/merge": "optional", "deploy/monitor": "optional", "deploy/remediate": "optional",
66
+ "deploy/merge": "YES", "deploy/monitor": "optional", "deploy/remediate": "optional",
67
67
  "wrap-up/wrap-up": "YES",
68
68
  },
69
69
  feature: {
@@ -74,7 +74,7 @@ const WORKFLOW_MATRIX: Record<Classification, Record<string, InclusionRule>> = {
74
74
  "test/verify": "YES", "test/e2e": "if UI", "test/validate": "YES",
75
75
  "review/self-review": "YES", "review/security": "YES", "review/performance": "skip",
76
76
  "review/compliance": "YES", "review/handoff": "YES",
77
- "deploy/merge": "optional", "deploy/monitor": "optional", "deploy/remediate": "optional",
77
+ "deploy/merge": "YES", "deploy/monitor": "optional", "deploy/remediate": "optional",
78
78
  "wrap-up/wrap-up": "YES",
79
79
  },
80
80
  "large-feature": {
@@ -85,7 +85,7 @@ const WORKFLOW_MATRIX: Record<Classification, Record<string, InclusionRule>> = {
85
85
  "test/verify": "YES", "test/e2e": "YES", "test/validate": "YES",
86
86
  "review/self-review": "YES", "review/security": "YES", "review/performance": "YES",
87
87
  "review/compliance": "YES", "review/handoff": "YES",
88
- "deploy/merge": "optional", "deploy/monitor": "optional", "deploy/remediate": "optional",
88
+ "deploy/merge": "YES", "deploy/monitor": "optional", "deploy/remediate": "optional",
89
89
  "wrap-up/wrap-up": "YES",
90
90
  },
91
91
  };
@@ -110,9 +110,7 @@ export function buildFullWorkflow(): WorkflowStep[] {
110
110
  const steps: WorkflowStep[] = [];
111
111
  for (const phase of PHASE_ORDER) {
112
112
  for (const subStage of SUBSTAGES_BY_PHASE[phase]) {
113
- // Deploy is optional by default
114
- const included = phase !== "deploy";
115
- steps.push({ phase, subStage, included });
113
+ steps.push({ phase, subStage, included: true });
116
114
  }
117
115
  }
118
116
  return steps;
@@ -1,10 +1,11 @@
1
1
  import { readStateMd } from "../state/store.js";
2
+ import { redactIgnoredBlocks } from "./redactor.js";
2
3
 
3
4
  /**
4
5
  * Extract a specific ### section from state.md by heading.
5
6
  * Returns the content between the heading and the next ### heading (or end of file).
6
7
  */
7
- export function extractSection(stateMd: string, heading: string): string | null {
8
+ export function extractSection(stateMd: string, heading: string, redact?: boolean): string | null {
8
9
  // Normalize heading — ensure it starts with ###
9
10
  const prefix = heading.startsWith("###") ? heading : `### ${heading}`;
10
11
  const lines = stateMd.split("\n");
@@ -25,13 +26,15 @@ export function extractSection(stateMd: string, heading: string): string | null
25
26
  }
26
27
  }
27
28
 
28
- return captured.length > 0 ? captured.join("\n").trim() : null;
29
+ if (captured.length === 0) return null;
30
+ const content = captured.join("\n").trim();
31
+ return redact ? redactIgnoredBlocks(content) : content;
29
32
  }
30
33
 
31
34
  /**
32
35
  * Extract a ## section (top-level section like Description, Criteria).
33
36
  */
34
- export function extractTopSection(stateMd: string, heading: string): string | null {
37
+ export function extractTopSection(stateMd: string, heading: string, redact?: boolean): string | null {
35
38
  const prefix = heading.startsWith("##") ? heading : `## ${heading}`;
36
39
  const lines = stateMd.split("\n");
37
40
  let capturing = false;
@@ -51,7 +54,9 @@ export function extractTopSection(stateMd: string, heading: string): string | nu
51
54
  }
52
55
  }
53
56
 
54
- return captured.length > 0 ? captured.join("\n").trim() : null;
57
+ if (captured.length === 0) return null;
58
+ const content = captured.join("\n").trim();
59
+ return redact ? redactIgnoredBlocks(content) : content;
55
60
  }
56
61
 
57
62
  /**
@@ -3,6 +3,7 @@ import { getContextFor } from "../config/agent-map.js";
3
3
  import { extractSection, extractTopSection } from "./extractor.js";
4
4
  import { readStateMd } from "../state/store.js";
5
5
  import { skillFilePath } from "../config/phases.js";
6
+ import { redactIgnoredBlocks } from "./redactor.js";
6
7
 
7
8
  /**
8
9
  * Build a complete agent prompt for a given phase/sub-stage.
@@ -66,5 +67,5 @@ export function buildAgentPrompt(
66
67
  parts.push(`When done, report your outcome so the orchestrator can run: \`npx work-kit-cli complete ${phase}/${subStage} --outcome <outcome>\``);
67
68
  }
68
69
 
69
- return parts.join("\n");
70
+ return redactIgnoredBlocks(parts.join("\n"));
70
71
  }
@@ -0,0 +1,111 @@
1
+ import { describe, it } from "node:test";
2
+ import * as assert from "node:assert/strict";
3
+ import { redactIgnoredBlocks } from "./redactor.js";
4
+
5
+ describe("redactIgnoredBlocks", () => {
6
+ it("passes through content with no markers", () => {
7
+ const input = "line 1\nline 2\nline 3";
8
+ assert.equal(redactIgnoredBlocks(input), input);
9
+ });
10
+
11
+ it("redacts a block between start and end markers (// style)", () => {
12
+ const input = [
13
+ "before",
14
+ "// @wk-ignore-start",
15
+ "secret line 1",
16
+ "secret line 2",
17
+ "// @wk-ignore-end",
18
+ "after",
19
+ ].join("\n");
20
+
21
+ const result = redactIgnoredBlocks(input);
22
+ assert.ok(result.includes("before"));
23
+ assert.ok(result.includes("after"));
24
+ assert.ok(!result.includes("secret"));
25
+ assert.ok(result.includes("[redacted: 4 lines — @wk-ignore]"));
26
+ });
27
+
28
+ it("redacts a block with # comment style", () => {
29
+ const input = [
30
+ "before",
31
+ "# @wk-ignore-start",
32
+ "hidden = true",
33
+ "# @wk-ignore-end",
34
+ "after",
35
+ ].join("\n");
36
+
37
+ const result = redactIgnoredBlocks(input);
38
+ assert.ok(!result.includes("hidden"));
39
+ assert.ok(result.includes("[redacted: 3 lines — @wk-ignore]"));
40
+ });
41
+
42
+ it("redacts a block with HTML comment style", () => {
43
+ const input = [
44
+ "before",
45
+ "<!-- @wk-ignore-start -->",
46
+ "<div>secret</div>",
47
+ "<!-- @wk-ignore-end -->",
48
+ "after",
49
+ ].join("\n");
50
+
51
+ const result = redactIgnoredBlocks(input);
52
+ assert.ok(!result.includes("secret"));
53
+ assert.ok(result.includes("[redacted: 3 lines — @wk-ignore]"));
54
+ });
55
+
56
+ it("handles unclosed marker by redacting to EOF", () => {
57
+ const input = [
58
+ "before",
59
+ "// @wk-ignore-start",
60
+ "line 1",
61
+ "line 2",
62
+ "line 3",
63
+ ].join("\n");
64
+
65
+ const result = redactIgnoredBlocks(input);
66
+ assert.ok(result.includes("before"));
67
+ assert.ok(!result.includes("line 1"));
68
+ assert.ok(result.includes("(unclosed marker)"));
69
+ assert.ok(result.includes("[redacted: 4 lines"));
70
+ });
71
+
72
+ it("handles multiple separate blocks", () => {
73
+ const input = [
74
+ "top",
75
+ "// @wk-ignore-start",
76
+ "hidden1",
77
+ "// @wk-ignore-end",
78
+ "middle",
79
+ "// @wk-ignore-start",
80
+ "hidden2",
81
+ "// @wk-ignore-end",
82
+ "bottom",
83
+ ].join("\n");
84
+
85
+ const result = redactIgnoredBlocks(input);
86
+ assert.ok(result.includes("top"));
87
+ assert.ok(result.includes("middle"));
88
+ assert.ok(result.includes("bottom"));
89
+ assert.ok(!result.includes("hidden1"));
90
+ assert.ok(!result.includes("hidden2"));
91
+ // Two separate redaction placeholders
92
+ const matches = result.match(/\[redacted:/g);
93
+ assert.equal(matches?.length, 2);
94
+ });
95
+
96
+ it("handles single-line block (start and end on same concept)", () => {
97
+ const input = [
98
+ "before",
99
+ "// @wk-ignore-start",
100
+ "// @wk-ignore-end",
101
+ "after",
102
+ ].join("\n");
103
+
104
+ const result = redactIgnoredBlocks(input);
105
+ assert.ok(result.includes("[redacted: 2 lines — @wk-ignore]"));
106
+ });
107
+
108
+ it("returns empty string for empty input", () => {
109
+ assert.equal(redactIgnoredBlocks(""), "");
110
+ });
111
+ });
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Redact blocks between @wk-ignore-start and @wk-ignore-end markers.
3
+ * Supports comment styles: //, #, --, <!-- -->
4
+ * Replaces annotated blocks with a placeholder.
5
+ */
6
+
7
+ const IGNORE_START = /@wk-ignore-start/;
8
+ const IGNORE_END = /@wk-ignore-end/;
9
+
10
+ export function redactIgnoredBlocks(content: string): string {
11
+ const lines = content.split("\n");
12
+ const result: string[] = [];
13
+ let inBlock = false;
14
+ let blockStart = -1;
15
+ let blockLineCount = 0;
16
+
17
+ for (let i = 0; i < lines.length; i++) {
18
+ if (!inBlock && IGNORE_START.test(lines[i])) {
19
+ inBlock = true;
20
+ blockStart = i;
21
+ blockLineCount = 0;
22
+ }
23
+
24
+ if (inBlock) {
25
+ blockLineCount++;
26
+ if (IGNORE_END.test(lines[i]) || i === lines.length - 1) {
27
+ // Emit placeholder
28
+ const warning = IGNORE_END.test(lines[i]) ? "" : " (unclosed marker)";
29
+ result.push(`// [redacted: ${blockLineCount} lines — @wk-ignore${warning}]`);
30
+ inBlock = false;
31
+ }
32
+ } else {
33
+ result.push(lines[i]);
34
+ }
35
+ }
36
+
37
+ return result.join("\n");
38
+ }
package/cli/src/index.ts CHANGED
@@ -15,6 +15,7 @@ import { upgradeCommand } from "./commands/upgrade.js";
15
15
  import { completionsCommand } from "./commands/completions.js";
16
16
  import { observeCommand } from "./commands/observe.js";
17
17
  import { uninstallCommand } from "./commands/uninstall.js";
18
+ import { bootstrapCommand } from "./commands/bootstrap.js";
18
19
  import { bold, green, yellow, red } from "./utils/colors.js";
19
20
  import type { Classification, PhaseName } from "./state/schema.js";
20
21
 
@@ -250,4 +251,20 @@ program
250
251
  await uninstallCommand(targetPath);
251
252
  });
252
253
 
254
+ // ── bootstrap ───────────────────────────────────────────────────────
255
+
256
+ program
257
+ .command("bootstrap")
258
+ .description("Detect work-kit state and output session orientation")
259
+ .option("--json", "Output as JSON", true)
260
+ .action((opts) => {
261
+ try {
262
+ const result = bootstrapCommand();
263
+ console.log(JSON.stringify(result, null, 2));
264
+ } catch (e: any) {
265
+ console.error(JSON.stringify({ action: "error", message: e.message }));
266
+ process.exit(1);
267
+ }
268
+ });
269
+
253
270
  program.parse();
@@ -144,16 +144,8 @@ function renderWorkItem(item: WorkItemView, innerWidth: number): string[] {
144
144
  const gap2 = Math.max(2, innerWidth - modePlainLen - timingPlainLen);
145
145
  lines.push(modeStr + " ".repeat(gap2) + timingText);
146
146
 
147
- // Line 3: progress bar with phase label + substage position
148
- let phaseLabel = "—";
149
- if (item.currentPhase) {
150
- phaseLabel = item.currentSubStage
151
- ? `${item.currentPhase}/${item.currentSubStage}`
152
- : item.currentPhase;
153
- if (item.currentSubStageIndex != null && item.currentPhaseTotal != null) {
154
- phaseLabel += ` (${item.currentSubStageIndex}/${item.currentPhaseTotal})`;
155
- }
156
- }
147
+ // Line 3: progress bar with phase label only (no sub-stage inline)
148
+ const phaseLabel = item.currentPhase || "—";
157
149
  const barMaxWidth = Math.max(20, Math.min(40, innerWidth - 30));
158
150
  lines.push(" " + renderProgressBar(
159
151
  item.progress.completed,
@@ -163,10 +155,19 @@ function renderWorkItem(item: WorkItemView, innerWidth: number): string[] {
163
155
  barMaxWidth
164
156
  ));
165
157
 
166
- // Line 4: phase indicators
158
+ // Line 4: phase indicators with sub-stage shown under current phase
167
159
  const phaseStrs = item.phases.map(p => `${p.name} ${phaseIndicator(p.status)}`);
168
160
  lines.push(" " + phaseStrs.join(" "));
169
161
 
162
+ // Line 5 (optional): current sub-stage detail under the phase line
163
+ if (item.currentSubStage && item.currentPhase) {
164
+ let subLabel = `↳ ${item.currentSubStage}`;
165
+ if (item.currentSubStageIndex != null && item.currentPhaseTotal != null) {
166
+ subLabel += ` (${item.currentSubStageIndex}/${item.currentPhaseTotal})`;
167
+ }
168
+ lines.push(" " + dim(subLabel));
169
+ }
170
+
170
171
  // Line 5 (optional): loopbacks
171
172
  if (item.loopbacks.count > 0) {
172
173
  const lb = item.loopbacks;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "work-kit-cli",
3
- "version": "0.2.1",
3
+ "version": "0.2.3",
4
4
  "description": "Structured development workflow for Claude Code. Two modes, 6 phases, 27 sub-stages.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -104,10 +104,14 @@ The table is a guide, not a rigid rule. Adjust based on the actual request:
104
104
 
105
105
  ## Continuing Work (`/auto-kit` with no args)
106
106
 
107
- 1. Find the active worktree — check `git worktree list` or look for `.work-kit/state.md`
108
- 2. Run `npx work-kit-cli status` to see current state
109
- 3. Run `npx work-kit-cli next` to get the next action
110
- 4. Follow the execution loop below
107
+ 1. Run `npx work-kit-cli bootstrap` to detect session state
108
+ 2. Parse the JSON response:
109
+ - If `active: false` no session found, ask the user for a description and start new work
110
+ - If `recovery` is set — report the recovery suggestion to the user before continuing
111
+ - If `active: true` — report current state (slug, phase, sub-stage) to the user
112
+ 3. `cd` into the worktree directory
113
+ 4. Run `npx work-kit-cli next` to get the next action
114
+ 5. Follow the execution loop below
111
115
 
112
116
  ## Step Validation
113
117
 
@@ -44,10 +44,14 @@ Do not proceed until `doctor` reports all checks passed.
44
44
 
45
45
  ## Continuing Work (`/full-kit` with no args)
46
46
 
47
- 1. Find the active worktree — check `git worktree list` or look for `.work-kit/state.md`
48
- 2. Run `npx work-kit-cli status` to see current state
49
- 3. Run `npx work-kit-cli next` to get the next action
50
- 4. Follow the execution loop below
47
+ 1. Run `npx work-kit-cli bootstrap` to detect session state
48
+ 2. Parse the JSON response:
49
+ - If `active: false` no session found, ask the user for a description and start new work
50
+ - If `recovery` is set — report the recovery suggestion to the user before continuing
51
+ - If `active: true` — report current state (slug, phase, sub-stage) to the user
52
+ 3. `cd` into the worktree directory
53
+ 4. Run `npx work-kit-cli next` to get the next action
54
+ 5. Follow the execution loop below
51
55
 
52
56
  ## Execution Loop
53
57
 
@@ -0,0 +1,29 @@
1
+ ---
2
+ name: bootstrap
3
+ description: "Session bootstrap — detect work-kit state and orient the agent at session start."
4
+ user-invocable: false
5
+ allowed-tools: Bash, Read
6
+ ---
7
+
8
+ # Session Bootstrap
9
+
10
+ Run `npx work-kit-cli bootstrap` to detect work-kit state.
11
+
12
+ ## If active work exists
13
+
14
+ - Report current state to the user: slug, phase, sub-stage, status
15
+ - If recovery is suggested: follow the recovery instruction
16
+ - Otherwise: run `npx work-kit-cli next` to continue the workflow
17
+
18
+ ## If no active work
19
+
20
+ - Inform the user that work-kit is available
21
+ - Available commands: `/full-kit <description>` or `/auto-kit <description>`
22
+ - Do not start work unprompted
23
+
24
+ ## If session is stale
25
+
26
+ - Report the staleness warning to the user
27
+ - Run `npx work-kit-cli status` to get full diagnostics
28
+ - If the state is recoverable, run `npx work-kit-cli next` to resume
29
+ - If the state is corrupted, suggest starting fresh
@@ -40,6 +40,29 @@ Throughout every sub-stage, capture three things in the shared state.md sections
40
40
 
41
41
  These feed into the final work-kit log summary. If you don't record them here, they're lost.
42
42
 
43
+ ## Boundaries
44
+
45
+ ### Always
46
+ - Follow the Blueprint step order unless a dependency requires reordering
47
+ - Run the test suite after every sub-stage that changes code
48
+ - Record every deviation from the Blueprint in the ## Deviations section
49
+ - Match existing codebase patterns found during Plan/Investigate
50
+ - Commit .work-kit/ files separately from feature code
51
+
52
+ ### Ask First
53
+ - Redesigning any part of the Blueprint (adapt minimally, don't redesign)
54
+ - Adding dependencies not specified in the Blueprint
55
+ - Changing the data model beyond what Architecture specified
56
+ - Skipping the Red (failing tests) sub-stage
57
+
58
+ ### Never
59
+ - Write implementation code before writing failing tests (Red comes before Core)
60
+ - Introduce new conventions that differ from existing codebase patterns
61
+ - Refactor code you did not write or modify in this feature
62
+ - Force push to any branch
63
+ - Include .env files, secrets, or credentials in commits
64
+ - Proceed with failing pre-existing tests without explaining why they changed
65
+
43
66
  ## Loop-back
44
67
 
45
68
  If **Refactor** returns "broken" (tests failing after refactor):
@@ -64,6 +87,7 @@ After all sub-stages are done, append a `### Build: Final` section to state.md.
64
87
  ```markdown
65
88
  ### Build: Final
66
89
 
90
+ **Verdict:** complete | complete_with_issues
67
91
  **PR:** #<number> — <title>
68
92
  **PR URL:** <url>
69
93
  **Branch:** feature/<slug>
@@ -41,3 +41,11 @@ description: "Build sub-stage: Create clean commits, push branch, create PR."
41
41
  - If there are secrets or env files staged, remove them
42
42
  - Prefer multiple focused commits over one giant commit
43
43
  - PR description should be useful to a reviewer — not a wall of text
44
+
45
+ ## Anti-Rationalization
46
+
47
+ | Excuse | Reality |
48
+ |--------|---------|
49
+ | "One big commit is fine for this feature" | Atomic commits make review possible, bisection useful, and reverts safe. A 500-line single commit is a review nightmare and an unrevertable blob. |
50
+ | "The PR description can be minimal since reviewers have context" | Reviewers do not have your context. The PR description is the first thing they read — it determines whether they understand or misunderstand every line of your diff. |
51
+ | "I'll clean up the commit history later" | You will not. Commit hygiene happens at commit time or not at all. Write the message as if you are explaining this change to someone six months from now. |
@@ -46,3 +46,13 @@ description: "Build sub-stage: Make failing tests pass — service layer, API, b
46
46
  - Don't build UI yet — that's the UI sub-stage
47
47
  - If a test expectation seems wrong, fix the test only if the Blueprint supports it
48
48
  - Match existing code patterns exactly — naming, file structure, error handling
49
+
50
+ ## Anti-Rationalization
51
+
52
+ | Excuse | Reality |
53
+ |--------|---------|
54
+ | "The Blueprint approach won't work, let me redesign" | Adapt, don't redesign. If the Blueprint truly cannot work, record a Deviation and make the minimal change needed. Full redesigns during Build invalidate the entire Plan phase. |
55
+ | "I should add this extra feature while I'm here" | Scope creep disguised as efficiency. Every addition not in the Blueprint is untested, unreviewed, and unplanned. Add it to a follow-up task instead. |
56
+ | "This pattern is better than what the codebase uses" | Consistency beats local optimization. The next developer expects the established pattern. Introduce new patterns in dedicated refactoring work, not mid-feature. |
57
+
58
+ > **Note:** If you encounter `[redacted: N lines — @wk-ignore]` placeholders in source code, these blocks are intentionally hidden. Do not attempt to reconstruct or work around them.
@@ -42,3 +42,11 @@ description: "Build sub-stage: Wire everything together, verify full data flow e
42
42
  - Check TypeScript types across boundaries — mismatches here cause runtime bugs
43
43
  - If the dev server is available, actually navigate the flow
44
44
  - Document any issues found — they indicate gaps in the Blueprint for future reference
45
+
46
+ ## Anti-Rationalization
47
+
48
+ | Excuse | Reality |
49
+ |--------|---------|
50
+ | "Unit tests passing means integration is fine" | Unit tests mock boundaries. Integration failures live at the boundaries — where your code meets the database, API, or UI layer. |
51
+ | "I already verified data flow during Core" | Core verified that individual pieces work. Integration verifies they work together. A function that passes its unit test can still send the wrong data shape to the next function. |
52
+ | "Integration testing is the Test phase's job" | The Test phase runs automated suites. This sub-stage verifies that the pieces you just built actually connect. Finding a wiring bug here takes 5 minutes; finding it in Test takes 30. |
@@ -30,6 +30,8 @@ description: "Build sub-stage: Write failing tests BEFORE implementation (TDD re
30
30
  **Test Output:**
31
31
  <summary of test run — X tests, Y failing, Z passing (pre-existing)>
32
32
 
33
+ **Coverage:** <N>/<total> criteria have failing tests
34
+
33
35
  **Criteria Coverage:**
34
36
  - "<criterion>" → tested by <test name>
35
37
  ```
@@ -42,3 +44,11 @@ description: "Build sub-stage: Write failing tests BEFORE implementation (TDD re
42
44
  - Existing tests must still pass — only NEW tests should fail
43
45
  - Match the project's existing test patterns and frameworks
44
46
  - If the project has no test framework set up, set one up as part of this step
47
+
48
+ ## Anti-Rationalization
49
+
50
+ | Excuse | Reality |
51
+ |--------|---------|
52
+ | "Writing tests after implementation is more efficient" | Writing tests first defines the contract. Tests written after implementation test what you built, not what you should have built — they encode bugs as features. |
53
+ | "The code is simple enough it doesn't need tests" | Simple code is the easiest to test. If you skip tests here, you will also skip them for complex code with a different excuse. The Red stage exists to build the safety net before you need it. |
54
+ | "The Test phase will cover this" | The Test phase verifies the feature works end-to-end. Unit-level coverage must exist before that. Discovering a logic error in Test means rebuilding from Core. |
@@ -27,6 +27,8 @@ description: "Build sub-stage: Improve code quality while keeping all tests gree
27
27
  **Refactoring Summary:**
28
28
  - <what was improved and why>
29
29
 
30
+ **Changes Made:** <N> files touched
31
+ **Tests:** before=<N> passing, after=<N> passing
30
32
  **Test Status:** passing | broken
31
33
 
32
34
  **If broken:**
@@ -46,3 +48,11 @@ description: "Build sub-stage: Improve code quality while keeping all tests gree
46
48
  - Don't refactor code you didn't write/modify in this feature
47
49
  - If code is already clean, say so and move on — don't refactor for its own sake
48
50
  - Small, incremental changes — not a big-bang rewrite
51
+
52
+ ## Anti-Rationalization
53
+
54
+ | Excuse | Reality |
55
+ |--------|---------|
56
+ | "The code is fine as-is, nothing to refactor" | Fresh code always has cleanup opportunities — redundant variables, unclear names, duplicated logic. Read your code as if reviewing someone else's PR. |
57
+ | "I should refactor this unrelated code too" | Refactor only touches code you wrote or modified in this feature. Unrelated refactoring expands the diff, makes review harder, and risks regressions in code you don't fully understand. |
58
+ | "Tests are flaky, the refactoring didn't really break them" | If tests fail after refactoring, the refactoring changed behavior. Flaky tests that suddenly fail consistently are not flaky — they caught something. Investigate before dismissing. |
@@ -42,6 +42,25 @@ This phase runs as a **fresh agent**. Read only these sections from `.work-kit/s
42
42
  - `### Build: Final` — PR URL, branch
43
43
  - `## Criteria` — for final confirmation
44
44
 
45
+ ## Boundaries
46
+
47
+ ### Always
48
+ - Check CI status before merging
49
+ - Rebase on the default branch before merging to catch integration issues
50
+ - Verify the merge actually completed successfully
51
+ - Monitor deployment status after merge (where applicable)
52
+
53
+ ### Ask First
54
+ - Resolving non-trivial rebase conflicts (show conflicts to the user first)
55
+ - Rolling back a deployment (except for data corruption or security — those are immediate)
56
+
57
+ ### Never
58
+ - Force push to main or master
59
+ - Merge with failing CI checks
60
+ - Skip the rebase step to "save time"
61
+ - Delete branches before confirming the merge succeeded
62
+ - Proceed past a failed deployment without fixing or rolling back
63
+
45
64
  ## Final Output
46
65
 
47
66
  After all sub-stages are done, append a `### Deploy: Final` section to state.md. This is what **Wrap-up reads**.
@@ -49,6 +68,7 @@ After all sub-stages are done, append a `### Deploy: Final` section to state.md.
49
68
  ```markdown
50
69
  ### Deploy: Final
51
70
 
71
+ **Verdict:** shipped | fix_needed | rolled_back
52
72
  **PR:** #<number>
53
73
  **Merge status:** merged | fix_needed | abort
54
74
  **Deploy status:** deployed | failed | not_applicable
@@ -35,8 +35,8 @@ description: "Deploy sub-stage: Get the PR merged safely."
35
35
  ```markdown
36
36
  ### Deploy: Merge
37
37
 
38
+ **CI Status:** passing | failing | N/A
38
39
  **PR:** #<number>
39
- **CI Status:** passing | failing
40
40
  **Conflicts:** none | resolved
41
41
  **Merge Method:** squash | merge | rebase
42
42
  **Result:** merged | fix_needed | abort
@@ -57,3 +57,11 @@ description: "Deploy sub-stage: Get the PR merged safely."
57
57
  - Merge is fully autonomous — do NOT ask the user for permission at any step (review phase already approved it)
58
58
  - Push, create PR, and merge without stopping for confirmation
59
59
  - The entire sync → push → PR → merge flow should complete in one agent pass
60
+
61
+ ## Anti-Rationalization
62
+
63
+ | Excuse | Reality |
64
+ |--------|---------|
65
+ | "CI is probably fine, no need to wait for the check" | "Probably" is not evidence. CI exists to catch what you missed. Wait for the green check — it takes minutes and prevents shipping broken code. |
66
+ | "The conflict is trivial, I'll just force through" | Trivial conflicts still need manual resolution. Force-merging overwrites someone else's work. Resolve conflicts properly — if they are truly trivial, it takes 30 seconds. |
67
+ | "Rebasing will mess up my history, better to merge directly" | A clean rebase on the default branch catches integration issues before they reach main. The few minutes spent rebasing prevent broken builds that affect the entire team. |
@@ -54,6 +54,8 @@ After all sub-stages are done, append a `### Plan: Final` section to state.md. T
54
54
  ```markdown
55
55
  ### Plan: Final
56
56
 
57
+ **Verdict:** ready | revised_with_caveats
58
+
57
59
  **Blueprint:**
58
60
  <the full ordered implementation plan from Blueprint sub-stage — copy it here>
59
61
 
@@ -75,3 +77,24 @@ After all sub-stages are done, append a `### Plan: Final` section to state.md. T
75
77
  Then:
76
78
  - Update state: `**Phase:** plan (complete)`
77
79
  - Commit state: `git add .work-kit/ && git commit -m "work-kit: complete plan"`
80
+
81
+ ## Boundaries
82
+
83
+ ### Always
84
+ - Read every file referenced in the Description before proposing solutions
85
+ - Ask clarifying questions when requirements have multiple valid interpretations
86
+ - Map blast radius by tracing actual code paths, not guessing from file names
87
+ - Include exact file paths in Blueprint steps
88
+ - Map every acceptance criterion to at least one Blueprint step
89
+
90
+ ### Ask First
91
+ - Changing the scope after Clarify (user must approve scope changes)
92
+ - Adding acceptance criteria the user did not request
93
+ - Recommending a complexity rating of x-large (confirm before proceeding)
94
+
95
+ ### Never
96
+ - Propose solutions during Clarify (that is Sketch's job)
97
+ - Skip Investigate to "save time" — code understanding prevents rework
98
+ - Write vague Blueprint steps like "update relevant files" without exact paths
99
+ - Assume the codebase follows standard patterns without verifying in Investigate
100
+ - Proceed past Audit with unresolved gaps in the Blueprint
@@ -25,6 +25,12 @@ description: "Plan sub-stage: Audit the Blueprint for gaps, contradictions, and
25
25
  ### Plan: Audit
26
26
 
27
27
  **Result:** proceed | revise
28
+ **Checklist:**
29
+ - [ ] Every criterion maps to at least one Blueprint step
30
+ - [ ] Every Blueprint step has exact file paths
31
+ - [ ] Dependencies are ordered correctly
32
+ - [ ] Error/edge cases are addressed
33
+ - [ ] No scope creep beyond what Scope defined
28
34
 
29
35
  **Gaps Found:**
30
36
  - <gap description — or "None">
@@ -56,3 +62,11 @@ description: "Plan sub-stage: Audit the Blueprint for gaps, contradictions, and
56
62
  - "Proceed" means you'd bet money this plan works
57
63
  - "Revise" is not failure — it's the audit doing its job
58
64
  - Max 2 revision loops — after that, proceed with noted caveats
65
+
66
+ ## Anti-Rationalization
67
+
68
+ | Excuse | Reality |
69
+ |--------|---------|
70
+ | "The Blueprint looks complete, proceed without nitpicking" | Audit exists because plans always have gaps. If you cannot find any, you are not looking hard enough — check criterion coverage, missing error paths, and dependency order. |
71
+ | "Revising would waste time, the gaps are minor" | A 'minor' gap in the plan becomes a major blocker in Build. Sending back to Blueprint now costs minutes; discovering the gap mid-implementation costs hours. |
72
+ | "I already wrote the Blueprint, so I know it's correct" | Self-review bias is real. Audit requires you to read the Blueprint as if someone else wrote it. Check each criterion against the steps — does every criterion have a step that delivers it? |
@@ -58,3 +58,11 @@ description: "Plan sub-stage: Produce a full ordered step-by-step implementation
58
58
  - If a step is "update 5 files", break it into 5 steps
59
59
  - The Blueprint is the contract — Build phase follows it literally
60
60
  - Include commands to run (migrations, test commands) as steps
61
+
62
+ ## Anti-Rationalization
63
+
64
+ | Excuse | Reality |
65
+ |--------|---------|
66
+ | "High-level steps are sufficient, exact paths aren't needed" | Vague steps like "update the API" become ambiguous during Build. Exact file paths eliminate guesswork and prevent the wrong file from being modified. |
67
+ | "The Architecture section already covers the implementation plan" | Architecture describes structure. Blueprint describes execution order. Without a step-by-step plan, the Build agent will invent its own order — often wrong. |
68
+ | "Adding more detail will just slow things down" | A detailed Blueprint is the single highest-leverage artifact in the entire pipeline. Every minute here saves ten in Build. |
@@ -59,3 +59,11 @@ Update the `## Criteria` section with acceptance criteria, then append:
59
59
  - **Do ask questions** — ambiguity caught here saves hours later
60
60
  - If the request is crystal clear, don't invent questions just to ask them
61
61
  - Ask questions only when the answer materially changes what gets built
62
+
63
+ ## Anti-Rationalization
64
+
65
+ | Excuse | Reality |
66
+ |--------|---------|
67
+ | "The request is clear enough, no questions needed" | Ambiguity hides in assumptions. One clarifying question now prevents a wrong turn that wastes an entire Build phase. |
68
+ | "I should start reading code to understand better" | That is Investigate's job. Clarify defines *what* to build; Investigate discovers *how*. Mixing them leads to solution-driven requirements. |
69
+ | "Acceptance criteria can be refined later" | Vague criteria produce vague implementations. If you cannot write a testable criterion now, you do not understand the request yet. |
@@ -45,3 +45,13 @@ description: "Plan sub-stage: Read codebase systematically, trace paths, map bla
45
45
  - Do NOT propose solutions yet — that's Sketch
46
46
  - Note file paths precisely — these will be referenced in Blueprint
47
47
  - If the codebase has no tests for affected areas, note that as a risk
48
+
49
+ ## Anti-Rationalization
50
+
51
+ | Excuse | Reality |
52
+ |--------|---------|
53
+ | "I already understand the codebase from the description" | You understand the *intent*, not the *implementation*. Blast radius, existing patterns, and hidden dependencies live in the code, not the request. |
54
+ | "Checking more files would waste context" | Skipping investigation wastes far more context when you discover mid-Build that your assumptions were wrong and must restart. |
55
+ | "The blast radius is obvious, no need to trace paths" | Obvious blast radius is the most common source of missed side-effects. Trace the actual call chain — surprises live one hop beyond what seems obvious. |
56
+
57
+ > **Note:** If you encounter `[redacted: N lines — @wk-ignore]` placeholders in source code, these blocks are intentionally hidden. Do not attempt to reconstruct or work around them.
@@ -44,3 +44,11 @@ description: "Plan sub-stage: Define in/out scope, estimate complexity, refine c
44
44
  - If Clarify criteria are too vague, sharpen them now
45
45
  - "Out of scope" is a decision, not a deferral — explain why
46
46
  - Complexity estimate should factor in blast radius from Investigate
47
+
48
+ ## Anti-Rationalization
49
+
50
+ | Excuse | Reality |
51
+ |--------|---------|
52
+ | "Everything is in scope, no need to exclude anything" | Unbounded scope is how features balloon. Explicitly listing what is out of scope prevents drift during Build. |
53
+ | "This is too small to scope formally" | Small tasks with unclear boundaries grow silently. A 2-line scope section costs nothing and prevents "while I'm here" additions. |
54
+ | "The scope is implied by the acceptance criteria" | Criteria say what must work. Scope says what you will and will not touch. A criterion can be met many ways — scope constrains which way. |
@@ -64,6 +64,27 @@ Each writes its own `### Review: <sub-stage>` section to state.md.
64
64
 
65
65
  **Handoff agent** reads all 4 review sections + Test: Final → makes the ship decision.
66
66
 
67
+ ## Boundaries
68
+
69
+ ### Always
70
+ - Read the full git diff before making any review judgments
71
+ - Fix issues directly when fixable in under 5 minutes
72
+ - Run the test suite after any fixes made during review
73
+ - Check every Blueprint step in the Compliance review
74
+ - Produce a clear ship/no-ship verdict with specific reasoning
75
+
76
+ ### Ask First
77
+ - Approving with known failing criteria (explain which and why acceptable)
78
+ - Rejecting a PR (confirm the fundamental problem is not fixable)
79
+ - Making architectural changes during review
80
+
81
+ ### Never
82
+ - Approve a PR with critical or high severity security issues
83
+ - Approve without checking acceptance criteria status
84
+ - Rubber-stamp without reading the diff ("looks good" is not a review)
85
+ - Make changes_requested without specifying exactly what needs to change
86
+ - Skip any of the 4 parallel review sub-stages
87
+
67
88
  ## Final Output
68
89
 
69
90
  After Handoff completes, append a `### Review: Final` section to state.md. This is what **Deploy and Wrap-up read**.
@@ -71,7 +92,7 @@ After Handoff completes, append a `### Review: Final` section to state.md. This
71
92
  ```markdown
72
93
  ### Review: Final
73
94
 
74
- **Decision:** approved | changes_requested | rejected
95
+ **Verdict:** approved | changes_requested | rejected
75
96
 
76
97
  **Summary:** <1-2 sentences — overall assessment>
77
98
 
@@ -28,9 +28,9 @@ description: "Review sub-stage: Compare final code against Blueprint."
28
28
 
29
29
  **Result:** compliant | deviations_found
30
30
 
31
- **Blueprint Steps:**
32
- - Step 1: <implemented | deviated | missing>
33
- - Step 2: <implemented | deviated | missing>
31
+ **Blueprint Steps:** (every step MUST appear with a status)
32
+ - Step 1: <done | deviated | skipped>
33
+ - Step 2: <done | deviated | skipped>
34
34
  - ...
35
35
 
36
36
  **Deviations:**
@@ -46,3 +46,11 @@ description: "Review sub-stage: Compare final code against Blueprint."
46
46
  - But deviations need justification — "I felt like it" is not acceptable
47
47
  - Missing steps are a red flag — they need to be implemented or explicitly dropped with reason
48
48
  - Scope creep should be called out even if the extra code is good
49
+
50
+ ## Anti-Rationalization
51
+
52
+ | Excuse | Reality |
53
+ |--------|---------|
54
+ | "The deviations are improvements over the Blueprint" | Improvements still need documentation. If the implementation differs from the plan, record why — future readers need to know the deviation was intentional, not accidental. |
55
+ | "The Blueprint was wrong, so compliance doesn't apply" | If the Blueprint was wrong, that is itself a finding worth recording. Compliance review catches plan-vs-reality drift — both accidental deviations and deliberate corrections need documentation. |
56
+ | "Minor scope additions don't count as scope creep" | Minor additions compound. Each one is "just a small thing" until the PR is 3x the original scope. If it was not in the Blueprint, it is scope creep — document it as a Deviation. |
@@ -35,6 +35,9 @@ description: "Review sub-stage: Finalize PR, make ship/no-ship decision."
35
35
  **Concerns:**
36
36
  - <any remaining concerns — or "None">
37
37
 
38
+ **Criteria Met:** <N>/<total>
39
+ **Blockers:** <N> (list each if > 0)
40
+
38
41
  **Decision:** approved | changes_requested | rejected
39
42
 
40
43
  **If changes_requested:**
@@ -57,3 +60,11 @@ description: "Review sub-stage: Finalize PR, make ship/no-ship decision."
57
60
  - Don't block on cosmetic issues — fix them directly before finalizing
58
61
  - The PR should be ready for a human reviewer after this step
59
62
  - If you're unsure between approved and changes_requested, ask the user
63
+
64
+ ## Anti-Rationalization
65
+
66
+ | Excuse | Reality |
67
+ |--------|---------|
68
+ | "Changes_requested would slow things down, it's good enough" | Shipping known issues to "save time" moves the cost to production users and the next developer. Requesting changes now is faster than a hotfix later. |
69
+ | "The gaps are minor, we can fix them after merge" | After merge, the context is gone, the branch is deleted, and the priority shifts. Post-merge fixes have a completion rate near zero. Fix it now or accept it will never be fixed. |
70
+ | "Requesting changes will frustrate the developer" | A clear, specific change request is more respectful than silently approving broken code. Developers prefer honest feedback over discovering issues in production. |
@@ -27,6 +27,7 @@ Fix what you can. Document what needs deeper investigation.
27
27
  ```markdown
28
28
  ### Review: Performance
29
29
 
30
+ **Verdict:** clear | issues_noted
30
31
  **Findings:**
31
32
  - <finding — or "None">
32
33
 
@@ -29,6 +29,9 @@ Fix issues directly when possible. Document what you can't fix.
29
29
  ```markdown
30
30
  ### Review: Security
31
31
 
32
+ > **Note:** If you encounter `[redacted: N lines — @wk-ignore]` placeholders, these blocks are excluded from security review. If you suspect a security issue may exist within a redacted area, flag it for human review rather than attempting to reconstruct the code.
33
+
34
+ **Verdict:** clear | risks_noted | blocked
32
35
  **Findings:**
33
36
  - <finding with severity: critical/high/medium/low — or "None">
34
37
 
@@ -47,3 +50,11 @@ Fix issues directly when possible. Document what you can't fix.
47
50
  - Not every feature touches all 10 categories — skip irrelevant ones
48
51
  - Don't add security theater (unnecessary complexity for non-existent threats)
49
52
  - If you find a critical issue, fix it immediately and note it prominently
53
+
54
+ ## Anti-Rationalization
55
+
56
+ | Excuse | Reality |
57
+ |--------|---------|
58
+ | "This feature doesn't touch auth, so there are no security concerns" | Security is not just authentication. Input validation, data exposure, injection, CSRF, and insecure defaults exist in every feature that handles user data or external input. |
59
+ | "Input validation is handled elsewhere" | Verify that claim. "Handled elsewhere" is the most common source of security gaps — each layer assumes another layer validates. Check the actual validation at every boundary. |
60
+ | "This is internal-only, security doesn't matter" | Internal APIs become external when architectures change. Internal networks get compromised. Treat every input as potentially hostile — the cost of basic validation is negligible. |
@@ -27,6 +27,9 @@ description: "Review sub-stage: Review your own diff for obvious issues."
27
27
  ```markdown
28
28
  ### Review: Self-Review
29
29
 
30
+ > **Note:** If you encounter `[redacted: N lines — @wk-ignore]` placeholders in source code, these blocks are intentionally hidden. Do not attempt to reconstruct or work around them.
31
+
32
+ **Verdict:** clean | issues_remain
30
33
  **Issues Found:** <N>
31
34
  **Issues Fixed:** <M>
32
35
  **Remaining Concerns:**
@@ -39,3 +42,11 @@ description: "Review sub-stage: Review your own diff for obvious issues."
39
42
  - Remove ALL debug code (console.log, debugger statements, etc.)
40
43
  - This is about catching careless mistakes, not redesigning the architecture
41
44
  - Be honest — pretending your code is perfect helps no one
45
+
46
+ ## Anti-Rationalization
47
+
48
+ | Excuse | Reality |
49
+ |--------|---------|
50
+ | "My code is already clean, nothing to review" | You wrote it minutes ago — you cannot objectively review your own fresh code. Read it as if someone else wrote it. Look for naming issues, missing error handling, and unclear logic. |
51
+ | "These are minor style issues, not worth fixing" | Accumulated minor issues make code hard to read and maintain. Fix them now while the context is fresh — they take seconds each but compound into significant tech debt. |
52
+ | "The linter didn't flag anything, so the code is fine" | Linters catch syntax and formatting. They do not catch unclear names, missing edge cases, redundant logic, or poor abstractions. Self-review catches what linters cannot. |
@@ -53,6 +53,25 @@ Agent: E2E ──┘
53
53
 
54
54
  Each sub-agent reads the same Context Input sections and writes its own `### Test: <sub-stage>` section to state.md.
55
55
 
56
+ ## Boundaries
57
+
58
+ ### Always
59
+ - Run the full test suite, not just new tests
60
+ - Provide explicit evidence for every satisfied criterion (test name, output, or code reference)
61
+ - Report honest confidence levels — do not inflate confidence
62
+ - Fix regressions immediately rather than documenting them for later
63
+
64
+ ### Ask First
65
+ - Marking a criterion as "not testable" (explain why and get confirmation)
66
+ - Changing or reinterpreting acceptance criteria discovered during testing
67
+ - Disabling or modifying pre-existing tests
68
+
69
+ ### Never
70
+ - Skip failing tests or disable them to make the suite pass
71
+ - Claim a criterion is satisfied without specific evidence
72
+ - Write E2E tests that test implementation details rather than user behavior
73
+ - Modify feature code during Test phase (report issues, don't fix)
74
+
56
75
  ## Final Output
57
76
 
58
77
  After all sub-stages are done, append a `### Test: Final` section to state.md. This is what **Review agents read**.
@@ -60,6 +79,7 @@ After all sub-stages are done, append a `### Test: Final` section to state.md. T
60
79
  ```markdown
61
80
  ### Test: Final
62
81
 
82
+ **Verdict:** pass | gaps_found
63
83
  **Suite status:** all passing | <N> failures
64
84
  **Total tests:** <count> (passing: <N>, failing: <N>)
65
85
 
@@ -22,6 +22,7 @@ description: "Test sub-stage: Test user flows end-to-end."
22
22
  ```markdown
23
23
  ### Test: E2E
24
24
 
25
+ **Verdict:** pass | fail
25
26
  **Tests Written:**
26
27
  - `<test file>`: <flow description>
27
28
 
@@ -42,3 +43,11 @@ description: "Test sub-stage: Test user flows end-to-end."
42
43
  - Focus on user-visible behavior, not internal implementation
43
44
  - Screenshots are evidence — capture them for key states
44
45
  - If a flow fails, fix the implementation (not the test) unless the test expectation is wrong
46
+
47
+ ## Anti-Rationalization
48
+
49
+ | Excuse | Reality |
50
+ |--------|---------|
51
+ | "Manual verification counts as E2E testing" | Manual verification is not repeatable, not documented, and not run in CI. If you cannot automate it, at minimum document the exact manual steps with expected results. |
52
+ | "Unit tests already cover this flow" | Unit tests mock boundaries. E2E tests verify the real flow across boundaries — database, API, UI. A function can pass its unit test and still fail in the real pipeline. |
53
+ | "E2E tests are slow and fragile, not worth the effort" | Slow tests that catch real bugs are more valuable than fast tests that miss them. Write focused E2E tests for critical paths, not exhaustive ones for every edge case. |
@@ -32,6 +32,7 @@ Also append:
32
32
  ```markdown
33
33
  ### Test: Validate
34
34
 
35
+ **Verdict:** pass | gaps_found
35
36
  **Criteria Status:**
36
37
  - Satisfied: <N> / <total>
37
38
  - Gaps: <list of unsatisfied criteria>
@@ -49,3 +50,11 @@ Also append:
49
50
  - If a criterion is genuinely not testable, explain why
50
51
  - Low confidence should trigger concern in the Review phase
51
52
  - Criteria should not change during Test — if a new criterion is discovered, note it but don't add it to the checklist mid-test
53
+
54
+ ## Anti-Rationalization
55
+
56
+ | Excuse | Reality |
57
+ |--------|---------|
58
+ | "The test suite passing counts as evidence for all criteria" | A passing suite proves the tests pass, not that the criteria are met. Each criterion needs a specific test or evidence mapped to it — "tests pass" is not a mapping. |
59
+ | "This criterion is obviously satisfied, no explicit evidence needed" | If it is obvious, it is easy to provide evidence. If you cannot point to specific evidence, the criterion might not actually be met — your confidence is based on assumption, not proof. |
60
+ | "Low confidence is fine because the tests pass" | Low confidence means you are not sure the criterion is met. That is a signal to investigate further, not to accept and move on. The purpose of Validate is to resolve uncertainty, not document it. |
@@ -24,6 +24,7 @@ description: "Test sub-stage: Run existing test suite, check for regressions."
24
24
  ```markdown
25
25
  ### Test: Verify
26
26
 
27
+ **Verdict:** pass | fail
27
28
  **Suite Result:** pass | fail
28
29
  **Total Tests:** <N> passing, <M> failing
29
30
  **Regressions Found:**
@@ -39,3 +40,11 @@ description: "Test sub-stage: Run existing test suite, check for regressions."
39
40
  - Do NOT disable tests to make the suite pass
40
41
  - If a pre-existing test fails and it's a legitimate behavior change, update the test with a comment explaining why
41
42
  - Run the suite at least twice — once to find issues, once to confirm fixes
43
+
44
+ ## Anti-Rationalization
45
+
46
+ | Excuse | Reality |
47
+ |--------|---------|
48
+ | "Disabling this flaky test is easier than fixing it" | Disabling tests erodes the safety net. A flaky test is a test with a real problem — intermittent failures often reveal race conditions or state leaks that will bite production. |
49
+ | "The failing test was testing the old behavior" | Then update the test to match the new behavior and verify the update is intentional. Deleting a test because it fails is destroying evidence — it might be catching a real regression. |
50
+ | "All tests pass, so everything works" | Tests only prove what they test. Check the criteria — does each one have a test that would fail if the criterion were not met? Passing tests with missing coverage is a false sense of security. |
@@ -63,6 +63,24 @@ status: <completed | partial | rolled-back>
63
63
  - Internal process notes ("ran tests 3 times before they passed")
64
64
  - Anything derivable from the git diff or PR description
65
65
 
66
+ ## Boundaries
67
+
68
+ ### Always
69
+ - Read the full state.md before writing the summary
70
+ - Include every non-obvious decision in the Key Decisions section
71
+ - Include every deviation from the Blueprint in the Deviations section
72
+ - Write the archive to the main branch, not the worktree
73
+
74
+ ### Ask First
75
+ - Deleting the worktree and feature branch (confirm with user)
76
+ - Omitting sections from the summary
77
+
78
+ ### Never
79
+ - Copy-paste full phase outputs into the summary (distill, don't dump)
80
+ - Include routine implementation details (file lists, command logs)
81
+ - Skip the criteria checklist in the summary
82
+ - Commit the archive on the feature branch instead of main
83
+
66
84
  ## Cleanup
67
85
 
68
86
  After writing the summary: