work-kit-cli 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/src/commands/bootstrap.test.ts +117 -0
- package/cli/src/commands/bootstrap.ts +65 -0
- package/cli/src/commands/complete.ts +18 -1
- package/cli/src/context/extractor.ts +9 -4
- package/cli/src/context/prompt-builder.ts +2 -1
- package/cli/src/context/redactor.test.ts +111 -0
- package/cli/src/context/redactor.ts +38 -0
- package/cli/src/index.ts +17 -0
- package/package.json +1 -1
- package/skills/auto-kit/SKILL.md +8 -4
- package/skills/full-kit/SKILL.md +8 -4
- package/skills/wk-bootstrap/SKILL.md +29 -0
- package/skills/wk-build/SKILL.md +24 -0
- package/skills/wk-build/stages/commit.md +8 -0
- package/skills/wk-build/stages/core.md +10 -0
- package/skills/wk-build/stages/integration.md +8 -0
- package/skills/wk-build/stages/red.md +10 -0
- package/skills/wk-build/stages/refactor.md +10 -0
- package/skills/wk-deploy/SKILL.md +20 -0
- package/skills/wk-deploy/stages/merge.md +9 -1
- package/skills/wk-plan/SKILL.md +23 -0
- package/skills/wk-plan/stages/audit.md +14 -0
- package/skills/wk-plan/stages/blueprint.md +8 -0
- package/skills/wk-plan/stages/clarify.md +8 -0
- package/skills/wk-plan/stages/investigate.md +10 -0
- package/skills/wk-plan/stages/scope.md +8 -0
- package/skills/wk-review/SKILL.md +22 -1
- package/skills/wk-review/stages/compliance.md +11 -3
- package/skills/wk-review/stages/handoff.md +11 -0
- package/skills/wk-review/stages/performance.md +1 -0
- package/skills/wk-review/stages/security.md +11 -0
- package/skills/wk-review/stages/self-review.md +11 -0
- package/skills/wk-test/SKILL.md +20 -0
- package/skills/wk-test/stages/e2e.md +9 -0
- package/skills/wk-test/stages/validate.md +9 -0
- package/skills/wk-test/stages/verify.md +9 -0
- package/skills/wk-wrap-up/SKILL.md +18 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import { describe, it, afterEach } from "node:test";
|
|
2
|
+
import * as assert from "node:assert/strict";
|
|
3
|
+
import * as fs from "node:fs";
|
|
4
|
+
import * as path from "node:path";
|
|
5
|
+
import * as os from "node:os";
|
|
6
|
+
import { randomUUID } from "node:crypto";
|
|
7
|
+
import { bootstrapCommand } from "./bootstrap.js";
|
|
8
|
+
import { initCommand } from "./init.js";
|
|
9
|
+
|
|
10
|
+
function makeTmpDir(): string {
|
|
11
|
+
const dir = path.join(os.tmpdir(), `work-kit-test-${randomUUID()}`);
|
|
12
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
13
|
+
return dir;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
let tmpDirs: string[] = [];
|
|
17
|
+
|
|
18
|
+
afterEach(() => {
|
|
19
|
+
for (const dir of tmpDirs) {
|
|
20
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
21
|
+
}
|
|
22
|
+
tmpDirs = [];
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
describe("bootstrapCommand", () => {
|
|
26
|
+
it("returns inactive when no state exists", () => {
|
|
27
|
+
const tmp = makeTmpDir();
|
|
28
|
+
tmpDirs.push(tmp);
|
|
29
|
+
|
|
30
|
+
const result = bootstrapCommand(tmp);
|
|
31
|
+
assert.equal(result.active, false);
|
|
32
|
+
assert.ok(result.nextAction?.includes("/full-kit"));
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it("returns active state after init", () => {
|
|
36
|
+
const tmp = makeTmpDir();
|
|
37
|
+
tmpDirs.push(tmp);
|
|
38
|
+
|
|
39
|
+
initCommand({
|
|
40
|
+
mode: "full",
|
|
41
|
+
description: "Test feature",
|
|
42
|
+
worktreeRoot: tmp,
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
const result = bootstrapCommand(tmp);
|
|
46
|
+
assert.equal(result.active, true);
|
|
47
|
+
assert.equal(result.slug, "test-feature");
|
|
48
|
+
assert.equal(result.mode, "full-kit");
|
|
49
|
+
assert.equal(result.status, "in-progress");
|
|
50
|
+
assert.equal(result.phase, "plan");
|
|
51
|
+
assert.equal(result.recovery, null);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it("detects stale state", () => {
|
|
55
|
+
const tmp = makeTmpDir();
|
|
56
|
+
tmpDirs.push(tmp);
|
|
57
|
+
|
|
58
|
+
initCommand({
|
|
59
|
+
mode: "full",
|
|
60
|
+
description: "Stale test",
|
|
61
|
+
worktreeRoot: tmp,
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
// Backdate the state.json file to 3 hours ago
|
|
65
|
+
const stateFile = path.join(tmp, ".work-kit", "state.json");
|
|
66
|
+
const threeHoursAgo = new Date(Date.now() - 3 * 60 * 60 * 1000);
|
|
67
|
+
fs.utimesSync(stateFile, threeHoursAgo, threeHoursAgo);
|
|
68
|
+
|
|
69
|
+
const result = bootstrapCommand(tmp);
|
|
70
|
+
assert.equal(result.active, true);
|
|
71
|
+
assert.ok(result.recovery !== null);
|
|
72
|
+
assert.ok(result.recovery?.includes("stale"));
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it("reports completed state", () => {
|
|
76
|
+
const tmp = makeTmpDir();
|
|
77
|
+
tmpDirs.push(tmp);
|
|
78
|
+
|
|
79
|
+
initCommand({
|
|
80
|
+
mode: "full",
|
|
81
|
+
description: "Done feature",
|
|
82
|
+
worktreeRoot: tmp,
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
// Manually set status to completed
|
|
86
|
+
const stateFile = path.join(tmp, ".work-kit", "state.json");
|
|
87
|
+
const state = JSON.parse(fs.readFileSync(stateFile, "utf-8"));
|
|
88
|
+
state.status = "completed";
|
|
89
|
+
fs.writeFileSync(stateFile, JSON.stringify(state, null, 2));
|
|
90
|
+
|
|
91
|
+
const result = bootstrapCommand(tmp);
|
|
92
|
+
assert.equal(result.active, true);
|
|
93
|
+
assert.equal(result.status, "completed");
|
|
94
|
+
assert.ok(result.nextAction?.includes("complete"));
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
it("reports failed state", () => {
|
|
98
|
+
const tmp = makeTmpDir();
|
|
99
|
+
tmpDirs.push(tmp);
|
|
100
|
+
|
|
101
|
+
initCommand({
|
|
102
|
+
mode: "full",
|
|
103
|
+
description: "Failed feature",
|
|
104
|
+
worktreeRoot: tmp,
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
const stateFile = path.join(tmp, ".work-kit", "state.json");
|
|
108
|
+
const state = JSON.parse(fs.readFileSync(stateFile, "utf-8"));
|
|
109
|
+
state.status = "failed";
|
|
110
|
+
fs.writeFileSync(stateFile, JSON.stringify(state, null, 2));
|
|
111
|
+
|
|
112
|
+
const result = bootstrapCommand(tmp);
|
|
113
|
+
assert.equal(result.active, true);
|
|
114
|
+
assert.equal(result.status, "failed");
|
|
115
|
+
assert.ok(result.nextAction?.includes("failed"));
|
|
116
|
+
});
|
|
117
|
+
});
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import { findWorktreeRoot, readState, statePath } from "../state/store.js";
|
|
3
|
+
|
|
4
|
+
export interface BootstrapResult {
|
|
5
|
+
active: boolean;
|
|
6
|
+
slug?: string;
|
|
7
|
+
branch?: string;
|
|
8
|
+
mode?: string;
|
|
9
|
+
phase?: string | null;
|
|
10
|
+
subStage?: string | null;
|
|
11
|
+
status?: string;
|
|
12
|
+
nextAction?: string;
|
|
13
|
+
recovery?: string | null;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function bootstrapCommand(startDir?: string): BootstrapResult {
|
|
17
|
+
const root = findWorktreeRoot(startDir);
|
|
18
|
+
|
|
19
|
+
if (!root) {
|
|
20
|
+
return {
|
|
21
|
+
active: false,
|
|
22
|
+
nextAction:
|
|
23
|
+
"No active work-kit session. Start one with /full-kit <description> or /auto-kit <description>.",
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const state = readState(root);
|
|
28
|
+
|
|
29
|
+
// Check for staleness: if state file hasn't been modified in over 1 hour
|
|
30
|
+
let recovery: string | null = null;
|
|
31
|
+
try {
|
|
32
|
+
const stateFile = statePath(root);
|
|
33
|
+
const stat = fs.statSync(stateFile);
|
|
34
|
+
const hourAgo = Date.now() - 60 * 60 * 1000;
|
|
35
|
+
if (stat.mtimeMs < hourAgo) {
|
|
36
|
+
const hoursAgo = Math.round((Date.now() - stat.mtimeMs) / (60 * 60 * 1000));
|
|
37
|
+
recovery = `State appears stale (last update ~${hoursAgo}h ago). Run \`npx work-kit-cli status\` to diagnose. If the agent crashed mid-stage, run \`npx work-kit-cli next\` to resume.`;
|
|
38
|
+
}
|
|
39
|
+
} catch {
|
|
40
|
+
// Ignore stat errors
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
let nextAction: string;
|
|
44
|
+
if (state.status === "completed") {
|
|
45
|
+
nextAction = "Work-kit session is complete. Run wrap-up or start a new session.";
|
|
46
|
+
} else if (state.status === "failed") {
|
|
47
|
+
nextAction = "Work-kit session failed. Run `npx work-kit-cli status` to see details.";
|
|
48
|
+
} else if (recovery) {
|
|
49
|
+
nextAction = recovery;
|
|
50
|
+
} else {
|
|
51
|
+
nextAction = `Continue ${state.currentPhase ?? "next phase"}${state.currentSubStage ? "/" + state.currentSubStage : ""}. Run \`npx work-kit-cli next\` to get the agent prompt.`;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
return {
|
|
55
|
+
active: true,
|
|
56
|
+
slug: state.slug,
|
|
57
|
+
branch: state.branch,
|
|
58
|
+
mode: state.mode,
|
|
59
|
+
phase: state.currentPhase,
|
|
60
|
+
subStage: state.currentSubStage,
|
|
61
|
+
status: state.status,
|
|
62
|
+
nextAction,
|
|
63
|
+
recovery,
|
|
64
|
+
};
|
|
65
|
+
}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import * as fs from "node:fs";
|
|
2
2
|
import * as path from "node:path";
|
|
3
|
+
import { execFileSync } from "node:child_process";
|
|
3
4
|
import { readState, writeState, findWorktreeRoot, readStateMd } from "../state/store.js";
|
|
4
5
|
import { isPhaseComplete, nextSubStageInPhase } from "../engine/transitions.js";
|
|
5
6
|
import { checkLoopback } from "../engine/loopbacks.js";
|
|
@@ -135,8 +136,24 @@ export function completeCommand(target: string, outcome?: string, worktreeRoot?:
|
|
|
135
136
|
|
|
136
137
|
// ── Archive on completion ──────────────────────────────────────────
|
|
137
138
|
|
|
139
|
+
function resolveMainRepoRoot(worktreeRoot: string): string {
|
|
140
|
+
try {
|
|
141
|
+
// git worktree list --porcelain — first "worktree" line is always the main repo
|
|
142
|
+
const output = execFileSync("git", ["worktree", "list", "--porcelain"], {
|
|
143
|
+
cwd: worktreeRoot,
|
|
144
|
+
encoding: "utf-8",
|
|
145
|
+
timeout: 5000,
|
|
146
|
+
});
|
|
147
|
+
const firstLine = output.split("\n").find(l => l.startsWith("worktree "));
|
|
148
|
+
if (firstLine) return firstLine.slice("worktree ".length).trim();
|
|
149
|
+
} catch {
|
|
150
|
+
// fallback
|
|
151
|
+
}
|
|
152
|
+
return worktreeRoot;
|
|
153
|
+
}
|
|
154
|
+
|
|
138
155
|
function archiveCompleted(worktreeRoot: string, state: WorkKitState): void {
|
|
139
|
-
const mainRoot =
|
|
156
|
+
const mainRoot = resolveMainRepoRoot(worktreeRoot);
|
|
140
157
|
const date = new Date().toISOString().split("T")[0];
|
|
141
158
|
const slug = state.slug;
|
|
142
159
|
const wkDir = path.join(mainRoot, ".claude", "work-kit");
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import { readStateMd } from "../state/store.js";
|
|
2
|
+
import { redactIgnoredBlocks } from "./redactor.js";
|
|
2
3
|
|
|
3
4
|
/**
|
|
4
5
|
* Extract a specific ### section from state.md by heading.
|
|
5
6
|
* Returns the content between the heading and the next ### heading (or end of file).
|
|
6
7
|
*/
|
|
7
|
-
export function extractSection(stateMd: string, heading: string): string | null {
|
|
8
|
+
export function extractSection(stateMd: string, heading: string, redact?: boolean): string | null {
|
|
8
9
|
// Normalize heading — ensure it starts with ###
|
|
9
10
|
const prefix = heading.startsWith("###") ? heading : `### ${heading}`;
|
|
10
11
|
const lines = stateMd.split("\n");
|
|
@@ -25,13 +26,15 @@ export function extractSection(stateMd: string, heading: string): string | null
|
|
|
25
26
|
}
|
|
26
27
|
}
|
|
27
28
|
|
|
28
|
-
|
|
29
|
+
if (captured.length === 0) return null;
|
|
30
|
+
const content = captured.join("\n").trim();
|
|
31
|
+
return redact ? redactIgnoredBlocks(content) : content;
|
|
29
32
|
}
|
|
30
33
|
|
|
31
34
|
/**
|
|
32
35
|
* Extract a ## section (top-level section like Description, Criteria).
|
|
33
36
|
*/
|
|
34
|
-
export function extractTopSection(stateMd: string, heading: string): string | null {
|
|
37
|
+
export function extractTopSection(stateMd: string, heading: string, redact?: boolean): string | null {
|
|
35
38
|
const prefix = heading.startsWith("##") ? heading : `## ${heading}`;
|
|
36
39
|
const lines = stateMd.split("\n");
|
|
37
40
|
let capturing = false;
|
|
@@ -51,7 +54,9 @@ export function extractTopSection(stateMd: string, heading: string): string | nu
|
|
|
51
54
|
}
|
|
52
55
|
}
|
|
53
56
|
|
|
54
|
-
|
|
57
|
+
if (captured.length === 0) return null;
|
|
58
|
+
const content = captured.join("\n").trim();
|
|
59
|
+
return redact ? redactIgnoredBlocks(content) : content;
|
|
55
60
|
}
|
|
56
61
|
|
|
57
62
|
/**
|
|
@@ -3,6 +3,7 @@ import { getContextFor } from "../config/agent-map.js";
|
|
|
3
3
|
import { extractSection, extractTopSection } from "./extractor.js";
|
|
4
4
|
import { readStateMd } from "../state/store.js";
|
|
5
5
|
import { skillFilePath } from "../config/phases.js";
|
|
6
|
+
import { redactIgnoredBlocks } from "./redactor.js";
|
|
6
7
|
|
|
7
8
|
/**
|
|
8
9
|
* Build a complete agent prompt for a given phase/sub-stage.
|
|
@@ -66,5 +67,5 @@ export function buildAgentPrompt(
|
|
|
66
67
|
parts.push(`When done, report your outcome so the orchestrator can run: \`npx work-kit-cli complete ${phase}/${subStage} --outcome <outcome>\``);
|
|
67
68
|
}
|
|
68
69
|
|
|
69
|
-
return parts.join("\n");
|
|
70
|
+
return redactIgnoredBlocks(parts.join("\n"));
|
|
70
71
|
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import { describe, it } from "node:test";
|
|
2
|
+
import * as assert from "node:assert/strict";
|
|
3
|
+
import { redactIgnoredBlocks } from "./redactor.js";
|
|
4
|
+
|
|
5
|
+
describe("redactIgnoredBlocks", () => {
|
|
6
|
+
it("passes through content with no markers", () => {
|
|
7
|
+
const input = "line 1\nline 2\nline 3";
|
|
8
|
+
assert.equal(redactIgnoredBlocks(input), input);
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
it("redacts a block between start and end markers (// style)", () => {
|
|
12
|
+
const input = [
|
|
13
|
+
"before",
|
|
14
|
+
"// @wk-ignore-start",
|
|
15
|
+
"secret line 1",
|
|
16
|
+
"secret line 2",
|
|
17
|
+
"// @wk-ignore-end",
|
|
18
|
+
"after",
|
|
19
|
+
].join("\n");
|
|
20
|
+
|
|
21
|
+
const result = redactIgnoredBlocks(input);
|
|
22
|
+
assert.ok(result.includes("before"));
|
|
23
|
+
assert.ok(result.includes("after"));
|
|
24
|
+
assert.ok(!result.includes("secret"));
|
|
25
|
+
assert.ok(result.includes("[redacted: 4 lines — @wk-ignore]"));
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it("redacts a block with # comment style", () => {
|
|
29
|
+
const input = [
|
|
30
|
+
"before",
|
|
31
|
+
"# @wk-ignore-start",
|
|
32
|
+
"hidden = true",
|
|
33
|
+
"# @wk-ignore-end",
|
|
34
|
+
"after",
|
|
35
|
+
].join("\n");
|
|
36
|
+
|
|
37
|
+
const result = redactIgnoredBlocks(input);
|
|
38
|
+
assert.ok(!result.includes("hidden"));
|
|
39
|
+
assert.ok(result.includes("[redacted: 3 lines — @wk-ignore]"));
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it("redacts a block with HTML comment style", () => {
|
|
43
|
+
const input = [
|
|
44
|
+
"before",
|
|
45
|
+
"<!-- @wk-ignore-start -->",
|
|
46
|
+
"<div>secret</div>",
|
|
47
|
+
"<!-- @wk-ignore-end -->",
|
|
48
|
+
"after",
|
|
49
|
+
].join("\n");
|
|
50
|
+
|
|
51
|
+
const result = redactIgnoredBlocks(input);
|
|
52
|
+
assert.ok(!result.includes("secret"));
|
|
53
|
+
assert.ok(result.includes("[redacted: 3 lines — @wk-ignore]"));
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
it("handles unclosed marker by redacting to EOF", () => {
|
|
57
|
+
const input = [
|
|
58
|
+
"before",
|
|
59
|
+
"// @wk-ignore-start",
|
|
60
|
+
"line 1",
|
|
61
|
+
"line 2",
|
|
62
|
+
"line 3",
|
|
63
|
+
].join("\n");
|
|
64
|
+
|
|
65
|
+
const result = redactIgnoredBlocks(input);
|
|
66
|
+
assert.ok(result.includes("before"));
|
|
67
|
+
assert.ok(!result.includes("line 1"));
|
|
68
|
+
assert.ok(result.includes("(unclosed marker)"));
|
|
69
|
+
assert.ok(result.includes("[redacted: 4 lines"));
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it("handles multiple separate blocks", () => {
|
|
73
|
+
const input = [
|
|
74
|
+
"top",
|
|
75
|
+
"// @wk-ignore-start",
|
|
76
|
+
"hidden1",
|
|
77
|
+
"// @wk-ignore-end",
|
|
78
|
+
"middle",
|
|
79
|
+
"// @wk-ignore-start",
|
|
80
|
+
"hidden2",
|
|
81
|
+
"// @wk-ignore-end",
|
|
82
|
+
"bottom",
|
|
83
|
+
].join("\n");
|
|
84
|
+
|
|
85
|
+
const result = redactIgnoredBlocks(input);
|
|
86
|
+
assert.ok(result.includes("top"));
|
|
87
|
+
assert.ok(result.includes("middle"));
|
|
88
|
+
assert.ok(result.includes("bottom"));
|
|
89
|
+
assert.ok(!result.includes("hidden1"));
|
|
90
|
+
assert.ok(!result.includes("hidden2"));
|
|
91
|
+
// Two separate redaction placeholders
|
|
92
|
+
const matches = result.match(/\[redacted:/g);
|
|
93
|
+
assert.equal(matches?.length, 2);
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
it("handles single-line block (start and end on same concept)", () => {
|
|
97
|
+
const input = [
|
|
98
|
+
"before",
|
|
99
|
+
"// @wk-ignore-start",
|
|
100
|
+
"// @wk-ignore-end",
|
|
101
|
+
"after",
|
|
102
|
+
].join("\n");
|
|
103
|
+
|
|
104
|
+
const result = redactIgnoredBlocks(input);
|
|
105
|
+
assert.ok(result.includes("[redacted: 2 lines — @wk-ignore]"));
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
it("returns empty string for empty input", () => {
|
|
109
|
+
assert.equal(redactIgnoredBlocks(""), "");
|
|
110
|
+
});
|
|
111
|
+
});
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Redact blocks between @wk-ignore-start and @wk-ignore-end markers.
|
|
3
|
+
* Supports comment styles: //, #, --, <!-- -->
|
|
4
|
+
* Replaces annotated blocks with a placeholder.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
const IGNORE_START = /@wk-ignore-start/;
|
|
8
|
+
const IGNORE_END = /@wk-ignore-end/;
|
|
9
|
+
|
|
10
|
+
export function redactIgnoredBlocks(content: string): string {
|
|
11
|
+
const lines = content.split("\n");
|
|
12
|
+
const result: string[] = [];
|
|
13
|
+
let inBlock = false;
|
|
14
|
+
let blockStart = -1;
|
|
15
|
+
let blockLineCount = 0;
|
|
16
|
+
|
|
17
|
+
for (let i = 0; i < lines.length; i++) {
|
|
18
|
+
if (!inBlock && IGNORE_START.test(lines[i])) {
|
|
19
|
+
inBlock = true;
|
|
20
|
+
blockStart = i;
|
|
21
|
+
blockLineCount = 0;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
if (inBlock) {
|
|
25
|
+
blockLineCount++;
|
|
26
|
+
if (IGNORE_END.test(lines[i]) || i === lines.length - 1) {
|
|
27
|
+
// Emit placeholder
|
|
28
|
+
const warning = IGNORE_END.test(lines[i]) ? "" : " (unclosed marker)";
|
|
29
|
+
result.push(`// [redacted: ${blockLineCount} lines — @wk-ignore${warning}]`);
|
|
30
|
+
inBlock = false;
|
|
31
|
+
}
|
|
32
|
+
} else {
|
|
33
|
+
result.push(lines[i]);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return result.join("\n");
|
|
38
|
+
}
|
package/cli/src/index.ts
CHANGED
|
@@ -15,6 +15,7 @@ import { upgradeCommand } from "./commands/upgrade.js";
|
|
|
15
15
|
import { completionsCommand } from "./commands/completions.js";
|
|
16
16
|
import { observeCommand } from "./commands/observe.js";
|
|
17
17
|
import { uninstallCommand } from "./commands/uninstall.js";
|
|
18
|
+
import { bootstrapCommand } from "./commands/bootstrap.js";
|
|
18
19
|
import { bold, green, yellow, red } from "./utils/colors.js";
|
|
19
20
|
import type { Classification, PhaseName } from "./state/schema.js";
|
|
20
21
|
|
|
@@ -250,4 +251,20 @@ program
|
|
|
250
251
|
await uninstallCommand(targetPath);
|
|
251
252
|
});
|
|
252
253
|
|
|
254
|
+
// ── bootstrap ───────────────────────────────────────────────────────
|
|
255
|
+
|
|
256
|
+
program
|
|
257
|
+
.command("bootstrap")
|
|
258
|
+
.description("Detect work-kit state and output session orientation")
|
|
259
|
+
.option("--json", "Output as JSON", true)
|
|
260
|
+
.action((opts) => {
|
|
261
|
+
try {
|
|
262
|
+
const result = bootstrapCommand();
|
|
263
|
+
console.log(JSON.stringify(result, null, 2));
|
|
264
|
+
} catch (e: any) {
|
|
265
|
+
console.error(JSON.stringify({ action: "error", message: e.message }));
|
|
266
|
+
process.exit(1);
|
|
267
|
+
}
|
|
268
|
+
});
|
|
269
|
+
|
|
253
270
|
program.parse();
|
package/package.json
CHANGED
package/skills/auto-kit/SKILL.md
CHANGED
|
@@ -104,10 +104,14 @@ The table is a guide, not a rigid rule. Adjust based on the actual request:
|
|
|
104
104
|
|
|
105
105
|
## Continuing Work (`/auto-kit` with no args)
|
|
106
106
|
|
|
107
|
-
1.
|
|
108
|
-
2.
|
|
109
|
-
|
|
110
|
-
|
|
107
|
+
1. Run `npx work-kit-cli bootstrap` to detect session state
|
|
108
|
+
2. Parse the JSON response:
|
|
109
|
+
- If `active: false` — no session found, ask the user for a description and start new work
|
|
110
|
+
- If `recovery` is set — report the recovery suggestion to the user before continuing
|
|
111
|
+
- If `active: true` — report current state (slug, phase, sub-stage) to the user
|
|
112
|
+
3. `cd` into the worktree directory
|
|
113
|
+
4. Run `npx work-kit-cli next` to get the next action
|
|
114
|
+
5. Follow the execution loop below
|
|
111
115
|
|
|
112
116
|
## Step Validation
|
|
113
117
|
|
package/skills/full-kit/SKILL.md
CHANGED
|
@@ -44,10 +44,14 @@ Do not proceed until `doctor` reports all checks passed.
|
|
|
44
44
|
|
|
45
45
|
## Continuing Work (`/full-kit` with no args)
|
|
46
46
|
|
|
47
|
-
1.
|
|
48
|
-
2.
|
|
49
|
-
|
|
50
|
-
|
|
47
|
+
1. Run `npx work-kit-cli bootstrap` to detect session state
|
|
48
|
+
2. Parse the JSON response:
|
|
49
|
+
- If `active: false` — no session found, ask the user for a description and start new work
|
|
50
|
+
- If `recovery` is set — report the recovery suggestion to the user before continuing
|
|
51
|
+
- If `active: true` — report current state (slug, phase, sub-stage) to the user
|
|
52
|
+
3. `cd` into the worktree directory
|
|
53
|
+
4. Run `npx work-kit-cli next` to get the next action
|
|
54
|
+
5. Follow the execution loop below
|
|
51
55
|
|
|
52
56
|
## Execution Loop
|
|
53
57
|
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: bootstrap
|
|
3
|
+
description: "Session bootstrap — detect work-kit state and orient the agent at session start."
|
|
4
|
+
user-invocable: false
|
|
5
|
+
allowed-tools: Bash, Read
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# Session Bootstrap
|
|
9
|
+
|
|
10
|
+
Run `npx work-kit-cli bootstrap` to detect work-kit state.
|
|
11
|
+
|
|
12
|
+
## If active work exists
|
|
13
|
+
|
|
14
|
+
- Report current state to the user: slug, phase, sub-stage, status
|
|
15
|
+
- If recovery is suggested: follow the recovery instruction
|
|
16
|
+
- Otherwise: run `npx work-kit-cli next` to continue the workflow
|
|
17
|
+
|
|
18
|
+
## If no active work
|
|
19
|
+
|
|
20
|
+
- Inform the user that work-kit is available
|
|
21
|
+
- Available commands: `/full-kit <description>` or `/auto-kit <description>`
|
|
22
|
+
- Do not start work unprompted
|
|
23
|
+
|
|
24
|
+
## If session is stale
|
|
25
|
+
|
|
26
|
+
- Report the staleness warning to the user
|
|
27
|
+
- Run `npx work-kit-cli status` to get full diagnostics
|
|
28
|
+
- If the state is recoverable, run `npx work-kit-cli next` to resume
|
|
29
|
+
- If the state is corrupted, suggest starting fresh
|
package/skills/wk-build/SKILL.md
CHANGED
|
@@ -40,6 +40,29 @@ Throughout every sub-stage, capture three things in the shared state.md sections
|
|
|
40
40
|
|
|
41
41
|
These feed into the final work-kit log summary. If you don't record them here, they're lost.
|
|
42
42
|
|
|
43
|
+
## Boundaries
|
|
44
|
+
|
|
45
|
+
### Always
|
|
46
|
+
- Follow the Blueprint step order unless a dependency requires reordering
|
|
47
|
+
- Run the test suite after every sub-stage that changes code
|
|
48
|
+
- Record every deviation from the Blueprint in the ## Deviations section
|
|
49
|
+
- Match existing codebase patterns found during Plan/Investigate
|
|
50
|
+
- Commit .work-kit/ files separately from feature code
|
|
51
|
+
|
|
52
|
+
### Ask First
|
|
53
|
+
- Redesigning any part of the Blueprint (adapt minimally, don't redesign)
|
|
54
|
+
- Adding dependencies not specified in the Blueprint
|
|
55
|
+
- Changing the data model beyond what Architecture specified
|
|
56
|
+
- Skipping the Red (failing tests) sub-stage
|
|
57
|
+
|
|
58
|
+
### Never
|
|
59
|
+
- Write implementation code before writing failing tests (Red comes before Core)
|
|
60
|
+
- Introduce new conventions that differ from existing codebase patterns
|
|
61
|
+
- Refactor code you did not write or modify in this feature
|
|
62
|
+
- Force push to any branch
|
|
63
|
+
- Include .env files, secrets, or credentials in commits
|
|
64
|
+
- Proceed with failing pre-existing tests without explaining why they changed
|
|
65
|
+
|
|
43
66
|
## Loop-back
|
|
44
67
|
|
|
45
68
|
If **Refactor** returns "broken" (tests failing after refactor):
|
|
@@ -64,6 +87,7 @@ After all sub-stages are done, append a `### Build: Final` section to state.md.
|
|
|
64
87
|
```markdown
|
|
65
88
|
### Build: Final
|
|
66
89
|
|
|
90
|
+
**Verdict:** complete | complete_with_issues
|
|
67
91
|
**PR:** #<number> — <title>
|
|
68
92
|
**PR URL:** <url>
|
|
69
93
|
**Branch:** feature/<slug>
|
|
@@ -41,3 +41,11 @@ description: "Build sub-stage: Create clean commits, push branch, create PR."
|
|
|
41
41
|
- If there are secrets or env files staged, remove them
|
|
42
42
|
- Prefer multiple focused commits over one giant commit
|
|
43
43
|
- PR description should be useful to a reviewer — not a wall of text
|
|
44
|
+
|
|
45
|
+
## Anti-Rationalization
|
|
46
|
+
|
|
47
|
+
| Excuse | Reality |
|
|
48
|
+
|--------|---------|
|
|
49
|
+
| "One big commit is fine for this feature" | Atomic commits make review possible, bisection useful, and reverts safe. A 500-line single commit is a review nightmare and an unrevertable blob. |
|
|
50
|
+
| "The PR description can be minimal since reviewers have context" | Reviewers do not have your context. The PR description is the first thing they read — it determines whether they understand or misunderstand every line of your diff. |
|
|
51
|
+
| "I'll clean up the commit history later" | You will not. Commit hygiene happens at commit time or not at all. Write the message as if you are explaining this change to someone six months from now. |
|
|
@@ -46,3 +46,13 @@ description: "Build sub-stage: Make failing tests pass — service layer, API, b
|
|
|
46
46
|
- Don't build UI yet — that's the UI sub-stage
|
|
47
47
|
- If a test expectation seems wrong, fix the test only if the Blueprint supports it
|
|
48
48
|
- Match existing code patterns exactly — naming, file structure, error handling
|
|
49
|
+
|
|
50
|
+
## Anti-Rationalization
|
|
51
|
+
|
|
52
|
+
| Excuse | Reality |
|
|
53
|
+
|--------|---------|
|
|
54
|
+
| "The Blueprint approach won't work, let me redesign" | Adapt, don't redesign. If the Blueprint truly cannot work, record a Deviation and make the minimal change needed. Full redesigns during Build invalidate the entire Plan phase. |
|
|
55
|
+
| "I should add this extra feature while I'm here" | Scope creep disguised as efficiency. Every addition not in the Blueprint is untested, unreviewed, and unplanned. Add it to a follow-up task instead. |
|
|
56
|
+
| "This pattern is better than what the codebase uses" | Consistency beats local optimization. The next developer expects the established pattern. Introduce new patterns in dedicated refactoring work, not mid-feature. |
|
|
57
|
+
|
|
58
|
+
> **Note:** If you encounter `[redacted: N lines — @wk-ignore]` placeholders in source code, these blocks are intentionally hidden. Do not attempt to reconstruct or work around them.
|
|
@@ -42,3 +42,11 @@ description: "Build sub-stage: Wire everything together, verify full data flow e
|
|
|
42
42
|
- Check TypeScript types across boundaries — mismatches here cause runtime bugs
|
|
43
43
|
- If the dev server is available, actually navigate the flow
|
|
44
44
|
- Document any issues found — they indicate gaps in the Blueprint for future reference
|
|
45
|
+
|
|
46
|
+
## Anti-Rationalization
|
|
47
|
+
|
|
48
|
+
| Excuse | Reality |
|
|
49
|
+
|--------|---------|
|
|
50
|
+
| "Unit tests passing means integration is fine" | Unit tests mock boundaries. Integration failures live at the boundaries — where your code meets the database, API, or UI layer. |
|
|
51
|
+
| "I already verified data flow during Core" | Core verified that individual pieces work. Integration verifies they work together. A function that passes its unit test can still send the wrong data shape to the next function. |
|
|
52
|
+
| "Integration testing is the Test phase's job" | The Test phase runs automated suites. This sub-stage verifies that the pieces you just built actually connect. Finding a wiring bug here takes 5 minutes; finding it in Test takes 30. |
|
|
@@ -30,6 +30,8 @@ description: "Build sub-stage: Write failing tests BEFORE implementation (TDD re
|
|
|
30
30
|
**Test Output:**
|
|
31
31
|
<summary of test run — X tests, Y failing, Z passing (pre-existing)>
|
|
32
32
|
|
|
33
|
+
**Coverage:** <N>/<total> criteria have failing tests
|
|
34
|
+
|
|
33
35
|
**Criteria Coverage:**
|
|
34
36
|
- "<criterion>" → tested by <test name>
|
|
35
37
|
```
|
|
@@ -42,3 +44,11 @@ description: "Build sub-stage: Write failing tests BEFORE implementation (TDD re
|
|
|
42
44
|
- Existing tests must still pass — only NEW tests should fail
|
|
43
45
|
- Match the project's existing test patterns and frameworks
|
|
44
46
|
- If the project has no test framework set up, set one up as part of this step
|
|
47
|
+
|
|
48
|
+
## Anti-Rationalization
|
|
49
|
+
|
|
50
|
+
| Excuse | Reality |
|
|
51
|
+
|--------|---------|
|
|
52
|
+
| "Writing tests after implementation is more efficient" | Writing tests first defines the contract. Tests written after implementation test what you built, not what you should have built — they encode bugs as features. |
|
|
53
|
+
| "The code is simple enough it doesn't need tests" | Simple code is the easiest to test. If you skip tests here, you will also skip them for complex code with a different excuse. The Red stage exists to build the safety net before you need it. |
|
|
54
|
+
| "The Test phase will cover this" | The Test phase verifies the feature works end-to-end. Unit-level coverage must exist before that. Discovering a logic error in Test means rebuilding from Core. |
|
|
@@ -27,6 +27,8 @@ description: "Build sub-stage: Improve code quality while keeping all tests gree
|
|
|
27
27
|
**Refactoring Summary:**
|
|
28
28
|
- <what was improved and why>
|
|
29
29
|
|
|
30
|
+
**Changes Made:** <N> files touched
|
|
31
|
+
**Tests:** before=<N> passing, after=<N> passing
|
|
30
32
|
**Test Status:** passing | broken
|
|
31
33
|
|
|
32
34
|
**If broken:**
|
|
@@ -46,3 +48,11 @@ description: "Build sub-stage: Improve code quality while keeping all tests gree
|
|
|
46
48
|
- Don't refactor code you didn't write/modify in this feature
|
|
47
49
|
- If code is already clean, say so and move on — don't refactor for its own sake
|
|
48
50
|
- Small, incremental changes — not a big-bang rewrite
|
|
51
|
+
|
|
52
|
+
## Anti-Rationalization
|
|
53
|
+
|
|
54
|
+
| Excuse | Reality |
|
|
55
|
+
|--------|---------|
|
|
56
|
+
| "The code is fine as-is, nothing to refactor" | Fresh code always has cleanup opportunities — redundant variables, unclear names, duplicated logic. Read your code as if reviewing someone else's PR. |
|
|
57
|
+
| "I should refactor this unrelated code too" | Refactor only touches code you wrote or modified in this feature. Unrelated refactoring expands the diff, makes review harder, and risks regressions in code you don't fully understand. |
|
|
58
|
+
| "Tests are flaky, the refactoring didn't really break them" | If tests fail after refactoring, the refactoring changed behavior. Flaky tests that suddenly fail consistently are not flaky — they caught something. Investigate before dismissing. |
|
|
@@ -42,6 +42,25 @@ This phase runs as a **fresh agent**. Read only these sections from `.work-kit/s
|
|
|
42
42
|
- `### Build: Final` — PR URL, branch
|
|
43
43
|
- `## Criteria` — for final confirmation
|
|
44
44
|
|
|
45
|
+
## Boundaries
|
|
46
|
+
|
|
47
|
+
### Always
|
|
48
|
+
- Check CI status before merging
|
|
49
|
+
- Rebase on the default branch before merging to catch integration issues
|
|
50
|
+
- Verify the merge actually completed successfully
|
|
51
|
+
- Monitor deployment status after merge (where applicable)
|
|
52
|
+
|
|
53
|
+
### Ask First
|
|
54
|
+
- Resolving non-trivial rebase conflicts (show conflicts to the user first)
|
|
55
|
+
- Rolling back a deployment (except for data corruption or security — those are immediate)
|
|
56
|
+
|
|
57
|
+
### Never
|
|
58
|
+
- Force push to main or master
|
|
59
|
+
- Merge with failing CI checks
|
|
60
|
+
- Skip the rebase step to "save time"
|
|
61
|
+
- Delete branches before confirming the merge succeeded
|
|
62
|
+
- Proceed past a failed deployment without fixing or rolling back
|
|
63
|
+
|
|
45
64
|
## Final Output
|
|
46
65
|
|
|
47
66
|
After all sub-stages are done, append a `### Deploy: Final` section to state.md. This is what **Wrap-up reads**.
|
|
@@ -49,6 +68,7 @@ After all sub-stages are done, append a `### Deploy: Final` section to state.md.
|
|
|
49
68
|
```markdown
|
|
50
69
|
### Deploy: Final
|
|
51
70
|
|
|
71
|
+
**Verdict:** shipped | fix_needed | rolled_back
|
|
52
72
|
**PR:** #<number>
|
|
53
73
|
**Merge status:** merged | fix_needed | abort
|
|
54
74
|
**Deploy status:** deployed | failed | not_applicable
|
|
@@ -35,8 +35,8 @@ description: "Deploy sub-stage: Get the PR merged safely."
|
|
|
35
35
|
```markdown
|
|
36
36
|
### Deploy: Merge
|
|
37
37
|
|
|
38
|
+
**CI Status:** passing | failing | N/A
|
|
38
39
|
**PR:** #<number>
|
|
39
|
-
**CI Status:** passing | failing
|
|
40
40
|
**Conflicts:** none | resolved
|
|
41
41
|
**Merge Method:** squash | merge | rebase
|
|
42
42
|
**Result:** merged | fix_needed | abort
|
|
@@ -57,3 +57,11 @@ description: "Deploy sub-stage: Get the PR merged safely."
|
|
|
57
57
|
- Merge is fully autonomous — do NOT ask the user for permission at any step (review phase already approved it)
|
|
58
58
|
- Push, create PR, and merge without stopping for confirmation
|
|
59
59
|
- The entire sync → push → PR → merge flow should complete in one agent pass
|
|
60
|
+
|
|
61
|
+
## Anti-Rationalization
|
|
62
|
+
|
|
63
|
+
| Excuse | Reality |
|
|
64
|
+
|--------|---------|
|
|
65
|
+
| "CI is probably fine, no need to wait for the check" | "Probably" is not evidence. CI exists to catch what you missed. Wait for the green check — it takes minutes and prevents shipping broken code. |
|
|
66
|
+
| "The conflict is trivial, I'll just force through" | Trivial conflicts still need manual resolution. Force-merging overwrites someone else's work. Resolve conflicts properly — if they are truly trivial, it takes 30 seconds. |
|
|
67
|
+
| "Rebasing will mess up my history, better to merge directly" | A clean rebase on the default branch catches integration issues before they reach main. The few minutes spent rebasing prevent broken builds that affect the entire team. |
|
package/skills/wk-plan/SKILL.md
CHANGED
|
@@ -54,6 +54,8 @@ After all sub-stages are done, append a `### Plan: Final` section to state.md. T
|
|
|
54
54
|
```markdown
|
|
55
55
|
### Plan: Final
|
|
56
56
|
|
|
57
|
+
**Verdict:** ready | revised_with_caveats
|
|
58
|
+
|
|
57
59
|
**Blueprint:**
|
|
58
60
|
<the full ordered implementation plan from Blueprint sub-stage — copy it here>
|
|
59
61
|
|
|
@@ -75,3 +77,24 @@ After all sub-stages are done, append a `### Plan: Final` section to state.md. T
|
|
|
75
77
|
Then:
|
|
76
78
|
- Update state: `**Phase:** plan (complete)`
|
|
77
79
|
- Commit state: `git add .work-kit/ && git commit -m "work-kit: complete plan"`
|
|
80
|
+
|
|
81
|
+
## Boundaries
|
|
82
|
+
|
|
83
|
+
### Always
|
|
84
|
+
- Read every file referenced in the Description before proposing solutions
|
|
85
|
+
- Ask clarifying questions when requirements have multiple valid interpretations
|
|
86
|
+
- Map blast radius by tracing actual code paths, not guessing from file names
|
|
87
|
+
- Include exact file paths in Blueprint steps
|
|
88
|
+
- Map every acceptance criterion to at least one Blueprint step
|
|
89
|
+
|
|
90
|
+
### Ask First
|
|
91
|
+
- Changing the scope after Clarify (user must approve scope changes)
|
|
92
|
+
- Adding acceptance criteria the user did not request
|
|
93
|
+
- Recommending a complexity rating of x-large (confirm before proceeding)
|
|
94
|
+
|
|
95
|
+
### Never
|
|
96
|
+
- Propose solutions during Clarify (that is Sketch's job)
|
|
97
|
+
- Skip Investigate to "save time" — code understanding prevents rework
|
|
98
|
+
- Write vague Blueprint steps like "update relevant files" without exact paths
|
|
99
|
+
- Assume the codebase follows standard patterns without verifying in Investigate
|
|
100
|
+
- Proceed past Audit with unresolved gaps in the Blueprint
|
|
@@ -25,6 +25,12 @@ description: "Plan sub-stage: Audit the Blueprint for gaps, contradictions, and
|
|
|
25
25
|
### Plan: Audit
|
|
26
26
|
|
|
27
27
|
**Result:** proceed | revise
|
|
28
|
+
**Checklist:**
|
|
29
|
+
- [ ] Every criterion maps to at least one Blueprint step
|
|
30
|
+
- [ ] Every Blueprint step has exact file paths
|
|
31
|
+
- [ ] Dependencies are ordered correctly
|
|
32
|
+
- [ ] Error/edge cases are addressed
|
|
33
|
+
- [ ] No scope creep beyond what Scope defined
|
|
28
34
|
|
|
29
35
|
**Gaps Found:**
|
|
30
36
|
- <gap description — or "None">
|
|
@@ -56,3 +62,11 @@ description: "Plan sub-stage: Audit the Blueprint for gaps, contradictions, and
|
|
|
56
62
|
- "Proceed" means you'd bet money this plan works
|
|
57
63
|
- "Revise" is not failure — it's the audit doing its job
|
|
58
64
|
- Max 2 revision loops — after that, proceed with noted caveats
|
|
65
|
+
|
|
66
|
+
## Anti-Rationalization
|
|
67
|
+
|
|
68
|
+
| Excuse | Reality |
|
|
69
|
+
|--------|---------|
|
|
70
|
+
| "The Blueprint looks complete, proceed without nitpicking" | Audit exists because plans always have gaps. If you cannot find any, you are not looking hard enough — check criterion coverage, missing error paths, and dependency order. |
|
|
71
|
+
| "Revising would waste time, the gaps are minor" | A 'minor' gap in the plan becomes a major blocker in Build. Sending back to Blueprint now costs minutes; discovering the gap mid-implementation costs hours. |
|
|
72
|
+
| "I already wrote the Blueprint, so I know it's correct" | Self-review bias is real. Audit requires you to read the Blueprint as if someone else wrote it. Check each criterion against the steps — does every criterion have a step that delivers it? |
|
|
@@ -58,3 +58,11 @@ description: "Plan sub-stage: Produce a full ordered step-by-step implementation
|
|
|
58
58
|
- If a step is "update 5 files", break it into 5 steps
|
|
59
59
|
- The Blueprint is the contract — Build phase follows it literally
|
|
60
60
|
- Include commands to run (migrations, test commands) as steps
|
|
61
|
+
|
|
62
|
+
## Anti-Rationalization
|
|
63
|
+
|
|
64
|
+
| Excuse | Reality |
|
|
65
|
+
|--------|---------|
|
|
66
|
+
| "High-level steps are sufficient, exact paths aren't needed" | Vague steps like "update the API" become ambiguous during Build. Exact file paths eliminate guesswork and prevent the wrong file from being modified. |
|
|
67
|
+
| "The Architecture section already covers the implementation plan" | Architecture describes structure. Blueprint describes execution order. Without a step-by-step plan, the Build agent will invent its own order — often wrong. |
|
|
68
|
+
| "Adding more detail will just slow things down" | A detailed Blueprint is the single highest-leverage artifact in the entire pipeline. Every minute here saves ten in Build. |
|
|
@@ -59,3 +59,11 @@ Update the `## Criteria` section with acceptance criteria, then append:
|
|
|
59
59
|
- **Do ask questions** — ambiguity caught here saves hours later
|
|
60
60
|
- If the request is crystal clear, don't invent questions just to ask them
|
|
61
61
|
- Ask questions only when the answer materially changes what gets built
|
|
62
|
+
|
|
63
|
+
## Anti-Rationalization
|
|
64
|
+
|
|
65
|
+
| Excuse | Reality |
|
|
66
|
+
|--------|---------|
|
|
67
|
+
| "The request is clear enough, no questions needed" | Ambiguity hides in assumptions. One clarifying question now prevents a wrong turn that wastes an entire Build phase. |
|
|
68
|
+
| "I should start reading code to understand better" | That is Investigate's job. Clarify defines *what* to build; Investigate discovers *how*. Mixing them leads to solution-driven requirements. |
|
|
69
|
+
| "Acceptance criteria can be refined later" | Vague criteria produce vague implementations. If you cannot write a testable criterion now, you do not understand the request yet. |
|
|
@@ -45,3 +45,13 @@ description: "Plan sub-stage: Read codebase systematically, trace paths, map bla
|
|
|
45
45
|
- Do NOT propose solutions yet — that's Sketch
|
|
46
46
|
- Note file paths precisely — these will be referenced in Blueprint
|
|
47
47
|
- If the codebase has no tests for affected areas, note that as a risk
|
|
48
|
+
|
|
49
|
+
## Anti-Rationalization
|
|
50
|
+
|
|
51
|
+
| Excuse | Reality |
|
|
52
|
+
|--------|---------|
|
|
53
|
+
| "I already understand the codebase from the description" | You understand the *intent*, not the *implementation*. Blast radius, existing patterns, and hidden dependencies live in the code, not the request. |
|
|
54
|
+
| "Checking more files would waste context" | Skipping investigation wastes far more context when you discover mid-Build that your assumptions were wrong and must restart. |
|
|
55
|
+
| "The blast radius is obvious, no need to trace paths" | Obvious blast radius is the most common source of missed side-effects. Trace the actual call chain — surprises live one hop beyond what seems obvious. |
|
|
56
|
+
|
|
57
|
+
> **Note:** If you encounter `[redacted: N lines — @wk-ignore]` placeholders in source code, these blocks are intentionally hidden. Do not attempt to reconstruct or work around them.
|
|
@@ -44,3 +44,11 @@ description: "Plan sub-stage: Define in/out scope, estimate complexity, refine c
|
|
|
44
44
|
- If Clarify criteria are too vague, sharpen them now
|
|
45
45
|
- "Out of scope" is a decision, not a deferral — explain why
|
|
46
46
|
- Complexity estimate should factor in blast radius from Investigate
|
|
47
|
+
|
|
48
|
+
## Anti-Rationalization
|
|
49
|
+
|
|
50
|
+
| Excuse | Reality |
|
|
51
|
+
|--------|---------|
|
|
52
|
+
| "Everything is in scope, no need to exclude anything" | Unbounded scope is how features balloon. Explicitly listing what is out of scope prevents drift during Build. |
|
|
53
|
+
| "This is too small to scope formally" | Small tasks with unclear boundaries grow silently. A 2-line scope section costs nothing and prevents "while I'm here" additions. |
|
|
54
|
+
| "The scope is implied by the acceptance criteria" | Criteria say what must work. Scope says what you will and will not touch. A criterion can be met many ways — scope constrains which way. |
|
|
@@ -64,6 +64,27 @@ Each writes its own `### Review: <sub-stage>` section to state.md.
|
|
|
64
64
|
|
|
65
65
|
**Handoff agent** reads all 4 review sections + Test: Final → makes the ship decision.
|
|
66
66
|
|
|
67
|
+
## Boundaries
|
|
68
|
+
|
|
69
|
+
### Always
|
|
70
|
+
- Read the full git diff before making any review judgments
|
|
71
|
+
- Fix issues directly when fixable in under 5 minutes
|
|
72
|
+
- Run the test suite after any fixes made during review
|
|
73
|
+
- Check every Blueprint step in the Compliance review
|
|
74
|
+
- Produce a clear ship/no-ship verdict with specific reasoning
|
|
75
|
+
|
|
76
|
+
### Ask First
|
|
77
|
+
- Approving with known failing criteria (explain which and why acceptable)
|
|
78
|
+
- Rejecting a PR (confirm the fundamental problem is not fixable)
|
|
79
|
+
- Making architectural changes during review
|
|
80
|
+
|
|
81
|
+
### Never
|
|
82
|
+
- Approve a PR with critical or high severity security issues
|
|
83
|
+
- Approve without checking acceptance criteria status
|
|
84
|
+
- Rubber-stamp without reading the diff ("looks good" is not a review)
|
|
85
|
+
- Make changes_requested without specifying exactly what needs to change
|
|
86
|
+
- Skip any of the 4 parallel review sub-stages
|
|
87
|
+
|
|
67
88
|
## Final Output
|
|
68
89
|
|
|
69
90
|
After Handoff completes, append a `### Review: Final` section to state.md. This is what **Deploy and Wrap-up read**.
|
|
@@ -71,7 +92,7 @@ After Handoff completes, append a `### Review: Final` section to state.md. This
|
|
|
71
92
|
```markdown
|
|
72
93
|
### Review: Final
|
|
73
94
|
|
|
74
|
-
**
|
|
95
|
+
**Verdict:** approved | changes_requested | rejected
|
|
75
96
|
|
|
76
97
|
**Summary:** <1-2 sentences — overall assessment>
|
|
77
98
|
|
|
@@ -28,9 +28,9 @@ description: "Review sub-stage: Compare final code against Blueprint."
|
|
|
28
28
|
|
|
29
29
|
**Result:** compliant | deviations_found
|
|
30
30
|
|
|
31
|
-
**Blueprint Steps:**
|
|
32
|
-
- Step 1: <
|
|
33
|
-
- Step 2: <
|
|
31
|
+
**Blueprint Steps:** (every step MUST appear with a status)
|
|
32
|
+
- Step 1: <done | deviated | skipped>
|
|
33
|
+
- Step 2: <done | deviated | skipped>
|
|
34
34
|
- ...
|
|
35
35
|
|
|
36
36
|
**Deviations:**
|
|
@@ -46,3 +46,11 @@ description: "Review sub-stage: Compare final code against Blueprint."
|
|
|
46
46
|
- But deviations need justification — "I felt like it" is not acceptable
|
|
47
47
|
- Missing steps are a red flag — they need to be implemented or explicitly dropped with reason
|
|
48
48
|
- Scope creep should be called out even if the extra code is good
|
|
49
|
+
|
|
50
|
+
## Anti-Rationalization
|
|
51
|
+
|
|
52
|
+
| Excuse | Reality |
|
|
53
|
+
|--------|---------|
|
|
54
|
+
| "The deviations are improvements over the Blueprint" | Improvements still need documentation. If the implementation differs from the plan, record why — future readers need to know the deviation was intentional, not accidental. |
|
|
55
|
+
| "The Blueprint was wrong, so compliance doesn't apply" | If the Blueprint was wrong, that is itself a finding worth recording. Compliance review catches plan-vs-reality drift — both accidental deviations and deliberate corrections need documentation. |
|
|
56
|
+
| "Minor scope additions don't count as scope creep" | Minor additions compound. Each one is "just a small thing" until the PR is 3x the original scope. If it was not in the Blueprint, it is scope creep — document it as a Deviation. |
|
|
@@ -35,6 +35,9 @@ description: "Review sub-stage: Finalize PR, make ship/no-ship decision."
|
|
|
35
35
|
**Concerns:**
|
|
36
36
|
- <any remaining concerns — or "None">
|
|
37
37
|
|
|
38
|
+
**Criteria Met:** <N>/<total>
|
|
39
|
+
**Blockers:** <N> (list each if > 0)
|
|
40
|
+
|
|
38
41
|
**Decision:** approved | changes_requested | rejected
|
|
39
42
|
|
|
40
43
|
**If changes_requested:**
|
|
@@ -57,3 +60,11 @@ description: "Review sub-stage: Finalize PR, make ship/no-ship decision."
|
|
|
57
60
|
- Don't block on cosmetic issues — fix them directly before finalizing
|
|
58
61
|
- The PR should be ready for a human reviewer after this step
|
|
59
62
|
- If you're unsure between approved and changes_requested, ask the user
|
|
63
|
+
|
|
64
|
+
## Anti-Rationalization
|
|
65
|
+
|
|
66
|
+
| Excuse | Reality |
|
|
67
|
+
|--------|---------|
|
|
68
|
+
| "Changes_requested would slow things down, it's good enough" | Shipping known issues to "save time" moves the cost to production users and the next developer. Requesting changes now is faster than a hotfix later. |
|
|
69
|
+
| "The gaps are minor, we can fix them after merge" | After merge, the context is gone, the branch is deleted, and the priority shifts. Post-merge fixes have a completion rate near zero. Fix it now or accept it will never be fixed. |
|
|
70
|
+
| "Requesting changes will frustrate the developer" | A clear, specific change request is more respectful than silently approving broken code. Developers prefer honest feedback over discovering issues in production. |
|
|
@@ -29,6 +29,9 @@ Fix issues directly when possible. Document what you can't fix.
|
|
|
29
29
|
```markdown
|
|
30
30
|
### Review: Security
|
|
31
31
|
|
|
32
|
+
> **Note:** If you encounter `[redacted: N lines — @wk-ignore]` placeholders, these blocks are excluded from security review. If you suspect a security issue may exist within a redacted area, flag it for human review rather than attempting to reconstruct the code.
|
|
33
|
+
|
|
34
|
+
**Verdict:** clear | risks_noted | blocked
|
|
32
35
|
**Findings:**
|
|
33
36
|
- <finding with severity: critical/high/medium/low — or "None">
|
|
34
37
|
|
|
@@ -47,3 +50,11 @@ Fix issues directly when possible. Document what you can't fix.
|
|
|
47
50
|
- Not every feature touches all 10 categories — skip irrelevant ones
|
|
48
51
|
- Don't add security theater (unnecessary complexity for non-existent threats)
|
|
49
52
|
- If you find a critical issue, fix it immediately and note it prominently
|
|
53
|
+
|
|
54
|
+
## Anti-Rationalization
|
|
55
|
+
|
|
56
|
+
| Excuse | Reality |
|
|
57
|
+
|--------|---------|
|
|
58
|
+
| "This feature doesn't touch auth, so there are no security concerns" | Security is not just authentication. Input validation, data exposure, injection, CSRF, and insecure defaults exist in every feature that handles user data or external input. |
|
|
59
|
+
| "Input validation is handled elsewhere" | Verify that claim. "Handled elsewhere" is the most common source of security gaps — each layer assumes another layer validates. Check the actual validation at every boundary. |
|
|
60
|
+
| "This is internal-only, security doesn't matter" | Internal APIs become external when architectures change. Internal networks get compromised. Treat every input as potentially hostile — the cost of basic validation is negligible. |
|
|
@@ -27,6 +27,9 @@ description: "Review sub-stage: Review your own diff for obvious issues."
|
|
|
27
27
|
```markdown
|
|
28
28
|
### Review: Self-Review
|
|
29
29
|
|
|
30
|
+
> **Note:** If you encounter `[redacted: N lines — @wk-ignore]` placeholders in source code, these blocks are intentionally hidden. Do not attempt to reconstruct or work around them.
|
|
31
|
+
|
|
32
|
+
**Verdict:** clean | issues_remain
|
|
30
33
|
**Issues Found:** <N>
|
|
31
34
|
**Issues Fixed:** <M>
|
|
32
35
|
**Remaining Concerns:**
|
|
@@ -39,3 +42,11 @@ description: "Review sub-stage: Review your own diff for obvious issues."
|
|
|
39
42
|
- Remove ALL debug code (console.log, debugger statements, etc.)
|
|
40
43
|
- This is about catching careless mistakes, not redesigning the architecture
|
|
41
44
|
- Be honest — pretending your code is perfect helps no one
|
|
45
|
+
|
|
46
|
+
## Anti-Rationalization
|
|
47
|
+
|
|
48
|
+
| Excuse | Reality |
|
|
49
|
+
|--------|---------|
|
|
50
|
+
| "My code is already clean, nothing to review" | You wrote it minutes ago — you cannot objectively review your own fresh code. Read it as if someone else wrote it. Look for naming issues, missing error handling, and unclear logic. |
|
|
51
|
+
| "These are minor style issues, not worth fixing" | Accumulated minor issues make code hard to read and maintain. Fix them now while the context is fresh — they take seconds each but compound into significant tech debt. |
|
|
52
|
+
| "The linter didn't flag anything, so the code is fine" | Linters catch syntax and formatting. They do not catch unclear names, missing edge cases, redundant logic, or poor abstractions. Self-review catches what linters cannot. |
|
package/skills/wk-test/SKILL.md
CHANGED
|
@@ -53,6 +53,25 @@ Agent: E2E ──┘
|
|
|
53
53
|
|
|
54
54
|
Each sub-agent reads the same Context Input sections and writes its own `### Test: <sub-stage>` section to state.md.
|
|
55
55
|
|
|
56
|
+
## Boundaries
|
|
57
|
+
|
|
58
|
+
### Always
|
|
59
|
+
- Run the full test suite, not just new tests
|
|
60
|
+
- Provide explicit evidence for every satisfied criterion (test name, output, or code reference)
|
|
61
|
+
- Report honest confidence levels — do not inflate confidence
|
|
62
|
+
- Fix regressions immediately rather than documenting them for later
|
|
63
|
+
|
|
64
|
+
### Ask First
|
|
65
|
+
- Marking a criterion as "not testable" (explain why and get confirmation)
|
|
66
|
+
- Changing or reinterpreting acceptance criteria discovered during testing
|
|
67
|
+
- Disabling or modifying pre-existing tests
|
|
68
|
+
|
|
69
|
+
### Never
|
|
70
|
+
- Skip failing tests or disable them to make the suite pass
|
|
71
|
+
- Claim a criterion is satisfied without specific evidence
|
|
72
|
+
- Write E2E tests that test implementation details rather than user behavior
|
|
73
|
+
- Modify feature code during Test phase (report issues, don't fix)
|
|
74
|
+
|
|
56
75
|
## Final Output
|
|
57
76
|
|
|
58
77
|
After all sub-stages are done, append a `### Test: Final` section to state.md. This is what **Review agents read**.
|
|
@@ -60,6 +79,7 @@ After all sub-stages are done, append a `### Test: Final` section to state.md. T
|
|
|
60
79
|
```markdown
|
|
61
80
|
### Test: Final
|
|
62
81
|
|
|
82
|
+
**Verdict:** pass | gaps_found
|
|
63
83
|
**Suite status:** all passing | <N> failures
|
|
64
84
|
**Total tests:** <count> (passing: <N>, failing: <N>)
|
|
65
85
|
|
|
@@ -22,6 +22,7 @@ description: "Test sub-stage: Test user flows end-to-end."
|
|
|
22
22
|
```markdown
|
|
23
23
|
### Test: E2E
|
|
24
24
|
|
|
25
|
+
**Verdict:** pass | fail
|
|
25
26
|
**Tests Written:**
|
|
26
27
|
- `<test file>`: <flow description>
|
|
27
28
|
|
|
@@ -42,3 +43,11 @@ description: "Test sub-stage: Test user flows end-to-end."
|
|
|
42
43
|
- Focus on user-visible behavior, not internal implementation
|
|
43
44
|
- Screenshots are evidence — capture them for key states
|
|
44
45
|
- If a flow fails, fix the implementation (not the test) unless the test expectation is wrong
|
|
46
|
+
|
|
47
|
+
## Anti-Rationalization
|
|
48
|
+
|
|
49
|
+
| Excuse | Reality |
|
|
50
|
+
|--------|---------|
|
|
51
|
+
| "Manual verification counts as E2E testing" | Manual verification is not repeatable, not documented, and not run in CI. If you cannot automate it, at minimum document the exact manual steps with expected results. |
|
|
52
|
+
| "Unit tests already cover this flow" | Unit tests mock boundaries. E2E tests verify the real flow across boundaries — database, API, UI. A function can pass its unit test and still fail in the real pipeline. |
|
|
53
|
+
| "E2E tests are slow and fragile, not worth the effort" | Slow tests that catch real bugs are more valuable than fast tests that miss them. Write focused E2E tests for critical paths, not exhaustive ones for every edge case. |
|
|
@@ -32,6 +32,7 @@ Also append:
|
|
|
32
32
|
```markdown
|
|
33
33
|
### Test: Validate
|
|
34
34
|
|
|
35
|
+
**Verdict:** pass | gaps_found
|
|
35
36
|
**Criteria Status:**
|
|
36
37
|
- Satisfied: <N> / <total>
|
|
37
38
|
- Gaps: <list of unsatisfied criteria>
|
|
@@ -49,3 +50,11 @@ Also append:
|
|
|
49
50
|
- If a criterion is genuinely not testable, explain why
|
|
50
51
|
- Low confidence should trigger concern in the Review phase
|
|
51
52
|
- Criteria should not change during Test — if a new criterion is discovered, note it but don't add it to the checklist mid-test
|
|
53
|
+
|
|
54
|
+
## Anti-Rationalization
|
|
55
|
+
|
|
56
|
+
| Excuse | Reality |
|
|
57
|
+
|--------|---------|
|
|
58
|
+
| "The test suite passing counts as evidence for all criteria" | A passing suite proves the tests pass, not that the criteria are met. Each criterion needs a specific test or evidence mapped to it — "tests pass" is not a mapping. |
|
|
59
|
+
| "This criterion is obviously satisfied, no explicit evidence needed" | If it is obvious, it is easy to provide evidence. If you cannot point to specific evidence, the criterion might not actually be met — your confidence is based on assumption, not proof. |
|
|
60
|
+
| "Low confidence is fine because the tests pass" | Low confidence means you are not sure the criterion is met. That is a signal to investigate further, not to accept and move on. The purpose of Validate is to resolve uncertainty, not document it. |
|
|
@@ -24,6 +24,7 @@ description: "Test sub-stage: Run existing test suite, check for regressions."
|
|
|
24
24
|
```markdown
|
|
25
25
|
### Test: Verify
|
|
26
26
|
|
|
27
|
+
**Verdict:** pass | fail
|
|
27
28
|
**Suite Result:** pass | fail
|
|
28
29
|
**Total Tests:** <N> passing, <M> failing
|
|
29
30
|
**Regressions Found:**
|
|
@@ -39,3 +40,11 @@ description: "Test sub-stage: Run existing test suite, check for regressions."
|
|
|
39
40
|
- Do NOT disable tests to make the suite pass
|
|
40
41
|
- If a pre-existing test fails and it's a legitimate behavior change, update the test with a comment explaining why
|
|
41
42
|
- Run the suite at least twice — once to find issues, once to confirm fixes
|
|
43
|
+
|
|
44
|
+
## Anti-Rationalization
|
|
45
|
+
|
|
46
|
+
| Excuse | Reality |
|
|
47
|
+
|--------|---------|
|
|
48
|
+
| "Disabling this flaky test is easier than fixing it" | Disabling tests erodes the safety net. A flaky test is a test with a real problem — intermittent failures often reveal race conditions or state leaks that will bite production. |
|
|
49
|
+
| "The failing test was testing the old behavior" | Then update the test to match the new behavior and verify the update is intentional. Deleting a test because it fails is destroying evidence — it might be catching a real regression. |
|
|
50
|
+
| "All tests pass, so everything works" | Tests only prove what they test. Check the criteria — does each one have a test that would fail if the criterion were not met? Passing tests with missing coverage is a false sense of security. |
|
|
@@ -63,6 +63,24 @@ status: <completed | partial | rolled-back>
|
|
|
63
63
|
- Internal process notes ("ran tests 3 times before they passed")
|
|
64
64
|
- Anything derivable from the git diff or PR description
|
|
65
65
|
|
|
66
|
+
## Boundaries
|
|
67
|
+
|
|
68
|
+
### Always
|
|
69
|
+
- Read the full state.md before writing the summary
|
|
70
|
+
- Include every non-obvious decision in the Key Decisions section
|
|
71
|
+
- Include every deviation from the Blueprint in the Deviations section
|
|
72
|
+
- Write the archive to the main branch, not the worktree
|
|
73
|
+
|
|
74
|
+
### Ask First
|
|
75
|
+
- Deleting the worktree and feature branch (confirm with user)
|
|
76
|
+
- Omitting sections from the summary
|
|
77
|
+
|
|
78
|
+
### Never
|
|
79
|
+
- Copy-paste full phase outputs into the summary (distill, don't dump)
|
|
80
|
+
- Include routine implementation details (file lists, command logs)
|
|
81
|
+
- Skip the criteria checklist in the summary
|
|
82
|
+
- Commit the archive on the feature branch instead of main
|
|
83
|
+
|
|
66
84
|
## Cleanup
|
|
67
85
|
|
|
68
86
|
After writing the summary:
|