work-kit-cli 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -1
- package/cli/src/commands/bootstrap.test.ts +1 -1
- package/cli/src/commands/bootstrap.ts +22 -14
- package/cli/src/commands/complete.ts +76 -2
- package/cli/src/commands/doctor.ts +51 -2
- package/cli/src/commands/extract.ts +47 -25
- package/cli/src/commands/init.test.ts +3 -1
- package/cli/src/commands/init.ts +22 -15
- package/cli/src/commands/learn.test.ts +2 -2
- package/cli/src/commands/learn.ts +2 -1
- package/cli/src/config/agent-map.ts +10 -2
- package/cli/src/config/constants.ts +7 -0
- package/cli/src/config/loopback-routes.ts +6 -0
- package/cli/src/config/model-routing.ts +7 -1
- package/cli/src/config/workflow.ts +12 -6
- package/cli/src/index.ts +2 -2
- package/cli/src/state/helpers.test.ts +1 -1
- package/cli/src/state/schema.ts +11 -4
- package/cli/src/state/validators.test.ts +21 -2
- package/cli/src/state/validators.ts +2 -2
- package/cli/src/utils/knowledge.ts +7 -1
- package/cli/src/workflow/gates.ts +1 -0
- package/cli/src/workflow/parallel.ts +6 -1
- package/cli/src/workflow/transitions.test.ts +2 -2
- package/package.json +2 -2
- package/skills/auto-kit/SKILL.md +8 -1
- package/skills/full-kit/SKILL.md +14 -7
- package/skills/wk-bootstrap/SKILL.md +8 -0
- package/skills/wk-debug/SKILL.md +127 -0
- package/skills/wk-define/SKILL.md +87 -0
- package/skills/wk-define/steps/refine.md +71 -0
- package/skills/wk-define/steps/spec.md +70 -0
- package/skills/wk-plan/steps/architecture.md +16 -0
- package/skills/wk-test/steps/browser.md +92 -0
- package/skills/wk-test/steps/e2e.md +45 -23
- package/skills/wk-wrap-up/steps/knowledge.md +8 -3
|
@@ -14,7 +14,8 @@ export const PHASE_ORDER: PhaseName[] = [...PHASE_NAMES];
|
|
|
14
14
|
// ── Prerequisites ───────────────────────────────────────────────────
|
|
15
15
|
|
|
16
16
|
export const PHASE_PREREQUISITES: Record<PhaseName, PhaseName | null> = {
|
|
17
|
-
|
|
17
|
+
define: null,
|
|
18
|
+
plan: "define", // when define is fully skipped, validators treat the skipped phase as satisfied
|
|
18
19
|
build: "plan",
|
|
19
20
|
test: "build",
|
|
20
21
|
review: "test",
|
|
@@ -36,55 +37,60 @@ type InclusionRule = "YES" | "skip" | "if UI" | "if DB" | "optional";
|
|
|
36
37
|
|
|
37
38
|
const WORKFLOW_MATRIX: Record<Classification, Record<string, InclusionRule>> = {
|
|
38
39
|
"bug-fix": {
|
|
40
|
+
"define/refine": "skip", "define/spec": "skip",
|
|
39
41
|
"plan/clarify": "YES", "plan/investigate": "YES", "plan/sketch": "skip", "plan/scope": "skip",
|
|
40
42
|
"plan/ux-flow": "skip", "plan/architecture": "skip", "plan/blueprint": "skip", "plan/audit": "skip",
|
|
41
43
|
"build/setup": "skip", "build/migration": "skip", "build/red": "YES", "build/core": "YES",
|
|
42
44
|
"build/ui": "if UI", "build/refactor": "skip", "build/integration": "skip", "build/commit": "YES",
|
|
43
|
-
"test/verify": "YES", "test/e2e": "skip", "test/validate": "YES",
|
|
45
|
+
"test/verify": "YES", "test/browser": "skip", "test/e2e": "skip", "test/validate": "YES",
|
|
44
46
|
"review/self-review": "YES", "review/security": "skip", "review/performance": "skip",
|
|
45
47
|
"review/compliance": "skip", "review/handoff": "YES",
|
|
46
48
|
"deploy/merge": "YES", "deploy/monitor": "optional", "deploy/remediate": "optional",
|
|
47
49
|
"wrap-up/summary": "YES", "wrap-up/knowledge": "skip",
|
|
48
50
|
},
|
|
49
51
|
"small-change": {
|
|
52
|
+
"define/refine": "skip", "define/spec": "skip",
|
|
50
53
|
"plan/clarify": "YES", "plan/investigate": "skip", "plan/sketch": "skip", "plan/scope": "skip",
|
|
51
54
|
"plan/ux-flow": "skip", "plan/architecture": "skip", "plan/blueprint": "skip", "plan/audit": "skip",
|
|
52
55
|
"build/setup": "skip", "build/migration": "skip", "build/red": "skip", "build/core": "YES",
|
|
53
56
|
"build/ui": "if UI", "build/refactor": "skip", "build/integration": "skip", "build/commit": "YES",
|
|
54
|
-
"test/verify": "YES", "test/e2e": "skip", "test/validate": "skip",
|
|
57
|
+
"test/verify": "YES", "test/browser": "skip", "test/e2e": "skip", "test/validate": "skip",
|
|
55
58
|
"review/self-review": "YES", "review/security": "skip", "review/performance": "skip",
|
|
56
59
|
"review/compliance": "skip", "review/handoff": "YES",
|
|
57
60
|
"deploy/merge": "YES", "deploy/monitor": "optional", "deploy/remediate": "optional",
|
|
58
61
|
"wrap-up/summary": "YES", "wrap-up/knowledge": "skip",
|
|
59
62
|
},
|
|
60
63
|
refactor: {
|
|
64
|
+
"define/refine": "skip", "define/spec": "skip",
|
|
61
65
|
"plan/clarify": "YES", "plan/investigate": "YES", "plan/sketch": "skip", "plan/scope": "skip",
|
|
62
66
|
"plan/ux-flow": "skip", "plan/architecture": "skip", "plan/blueprint": "skip", "plan/audit": "skip",
|
|
63
67
|
"build/setup": "skip", "build/migration": "skip", "build/red": "skip", "build/core": "YES",
|
|
64
68
|
"build/ui": "if UI", "build/refactor": "YES", "build/integration": "skip", "build/commit": "YES",
|
|
65
|
-
"test/verify": "YES", "test/e2e": "skip", "test/validate": "skip",
|
|
69
|
+
"test/verify": "YES", "test/browser": "skip", "test/e2e": "skip", "test/validate": "skip",
|
|
66
70
|
"review/self-review": "YES", "review/security": "skip", "review/performance": "YES",
|
|
67
71
|
"review/compliance": "skip", "review/handoff": "YES",
|
|
68
72
|
"deploy/merge": "YES", "deploy/monitor": "optional", "deploy/remediate": "optional",
|
|
69
73
|
"wrap-up/summary": "YES", "wrap-up/knowledge": "YES",
|
|
70
74
|
},
|
|
71
75
|
feature: {
|
|
76
|
+
"define/refine": "YES", "define/spec": "YES",
|
|
72
77
|
"plan/clarify": "YES", "plan/investigate": "YES", "plan/sketch": "YES", "plan/scope": "YES",
|
|
73
78
|
"plan/ux-flow": "if UI", "plan/architecture": "YES", "plan/blueprint": "YES", "plan/audit": "skip",
|
|
74
79
|
"build/setup": "YES", "build/migration": "if DB", "build/red": "YES", "build/core": "YES",
|
|
75
80
|
"build/ui": "if UI", "build/refactor": "skip", "build/integration": "YES", "build/commit": "YES",
|
|
76
|
-
"test/verify": "YES", "test/e2e": "if UI", "test/validate": "YES",
|
|
81
|
+
"test/verify": "YES", "test/browser": "if UI", "test/e2e": "if UI", "test/validate": "YES",
|
|
77
82
|
"review/self-review": "YES", "review/security": "YES", "review/performance": "skip",
|
|
78
83
|
"review/compliance": "YES", "review/handoff": "YES",
|
|
79
84
|
"deploy/merge": "YES", "deploy/monitor": "optional", "deploy/remediate": "optional",
|
|
80
85
|
"wrap-up/summary": "YES", "wrap-up/knowledge": "YES",
|
|
81
86
|
},
|
|
82
87
|
"large-feature": {
|
|
88
|
+
"define/refine": "YES", "define/spec": "YES",
|
|
83
89
|
"plan/clarify": "YES", "plan/investigate": "YES", "plan/sketch": "YES", "plan/scope": "YES",
|
|
84
90
|
"plan/ux-flow": "if UI", "plan/architecture": "YES", "plan/blueprint": "YES", "plan/audit": "YES",
|
|
85
91
|
"build/setup": "YES", "build/migration": "if DB", "build/red": "YES", "build/core": "YES",
|
|
86
92
|
"build/ui": "if UI", "build/refactor": "YES", "build/integration": "YES", "build/commit": "YES",
|
|
87
|
-
"test/verify": "YES", "test/e2e": "YES", "test/validate": "YES",
|
|
93
|
+
"test/verify": "YES", "test/browser": "if UI", "test/e2e": "YES", "test/validate": "YES",
|
|
88
94
|
"review/self-review": "YES", "review/security": "YES", "review/performance": "YES",
|
|
89
95
|
"review/compliance": "YES", "review/handoff": "YES",
|
|
90
96
|
"deploy/merge": "YES", "deploy/monitor": "optional", "deploy/remediate": "optional",
|
package/cli/src/index.ts
CHANGED
|
@@ -361,8 +361,8 @@ program
|
|
|
361
361
|
|
|
362
362
|
program
|
|
363
363
|
.command("learn")
|
|
364
|
-
.description("Append a knowledge entry (lesson/convention/risk/workflow) to .work-kit-knowledge/")
|
|
365
|
-
.requiredOption("--type <type>", "Entry type: lesson, convention, risk, workflow")
|
|
364
|
+
.description("Append a knowledge entry (lesson/convention/risk/workflow/decision) to .work-kit-knowledge/")
|
|
365
|
+
.requiredOption("--type <type>", "Entry type: lesson, convention, risk, workflow, decision")
|
|
366
366
|
.requiredOption("--text <text>", "Free-form text. Secrets are auto-redacted at write time.")
|
|
367
367
|
.option("--scope <glob>", "Optional path glob (stored, not yet used for filtering)")
|
|
368
368
|
.option("--phase <phase>", "Override session phase auto-fill")
|
package/cli/src/state/schema.ts
CHANGED
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
// ── Phase & Step Types ──────────────────────────────────────────────
|
|
2
2
|
|
|
3
|
-
export const PHASE_NAMES = ["plan", "build", "test", "review", "deploy", "wrap-up"] as const;
|
|
3
|
+
export const PHASE_NAMES = ["define", "plan", "build", "test", "review", "deploy", "wrap-up"] as const;
|
|
4
4
|
export type PhaseName = (typeof PHASE_NAMES)[number];
|
|
5
5
|
|
|
6
|
+
export const DEFINE_STEPS = ["refine", "spec"] as const;
|
|
6
7
|
export const PLAN_STEPS = ["clarify", "investigate", "sketch", "scope", "ux-flow", "architecture", "blueprint", "audit"] as const;
|
|
7
8
|
export const BUILD_STEPS = ["setup", "migration", "red", "core", "ui", "refactor", "integration", "commit"] as const;
|
|
8
|
-
export const TEST_STEPS = ["verify", "e2e", "validate"] as const;
|
|
9
|
+
export const TEST_STEPS = ["verify", "e2e", "browser", "validate"] as const;
|
|
9
10
|
export const REVIEW_STEPS = ["self-review", "security", "performance", "compliance", "handoff"] as const;
|
|
10
11
|
export const DEPLOY_STEPS = ["merge", "monitor", "remediate"] as const;
|
|
11
12
|
export const WRAPUP_STEPS = ["summary", "knowledge"] as const;
|
|
12
13
|
|
|
14
|
+
export type DefineStep = (typeof DEFINE_STEPS)[number];
|
|
13
15
|
export type PlanStep = (typeof PLAN_STEPS)[number];
|
|
14
16
|
export type BuildStep = (typeof BUILD_STEPS)[number];
|
|
15
17
|
export type TestStep = (typeof TEST_STEPS)[number];
|
|
@@ -17,9 +19,10 @@ export type ReviewStep = (typeof REVIEW_STEPS)[number];
|
|
|
17
19
|
export type DeployStep = (typeof DEPLOY_STEPS)[number];
|
|
18
20
|
export type WrapUpStep = (typeof WRAPUP_STEPS)[number];
|
|
19
21
|
|
|
20
|
-
export type StepName = PlanStep | BuildStep | TestStep | ReviewStep | DeployStep | WrapUpStep;
|
|
22
|
+
export type StepName = DefineStep | PlanStep | BuildStep | TestStep | ReviewStep | DeployStep | WrapUpStep;
|
|
21
23
|
|
|
22
24
|
export const STEPS_BY_PHASE: Record<PhaseName, readonly string[]> = {
|
|
25
|
+
define: DEFINE_STEPS,
|
|
23
26
|
plan: PLAN_STEPS,
|
|
24
27
|
build: BUILD_STEPS,
|
|
25
28
|
test: TEST_STEPS,
|
|
@@ -87,6 +90,7 @@ export const STEP_OUTCOMES = [
|
|
|
87
90
|
"changes_requested", // review handoff requested changes
|
|
88
91
|
"fix_needed", // deploy merge blocked, fix required
|
|
89
92
|
"fix_and_redeploy", // remediation requires another deploy cycle
|
|
93
|
+
"needs_debug", // step hit an error it can't resolve — invoke wk-debug, then return
|
|
90
94
|
"blocked", // step cannot proceed without external input
|
|
91
95
|
"skipped", // step intentionally skipped at runtime
|
|
92
96
|
] as const;
|
|
@@ -127,6 +131,8 @@ export interface LoopbackRecord {
|
|
|
127
131
|
to: Location;
|
|
128
132
|
reason: string;
|
|
129
133
|
timestamp: string;
|
|
134
|
+
/** "debug" loopbacks are virtual: the agent spawns wk-debug then retries the same step. */
|
|
135
|
+
kind?: "standard" | "debug";
|
|
130
136
|
}
|
|
131
137
|
|
|
132
138
|
// ── Workflow (auto-kit) ─────────────────────────────────────────────
|
|
@@ -144,7 +150,7 @@ export type WorkStatus = "in-progress" | "paused" | "completed" | "failed";
|
|
|
144
150
|
// ── Main State ──────────────────────────────────────────────────────
|
|
145
151
|
|
|
146
152
|
export interface WorkKitState {
|
|
147
|
-
version:
|
|
153
|
+
version: 3;
|
|
148
154
|
slug: string;
|
|
149
155
|
branch: string;
|
|
150
156
|
started: string;
|
|
@@ -182,6 +188,7 @@ export interface AgentSpec {
|
|
|
182
188
|
export type Action =
|
|
183
189
|
| { action: "spawn_agent"; phase: PhaseName; step: string; skillFile: string; agentPrompt: string; onComplete: string; model?: ModelTier }
|
|
184
190
|
| { action: "spawn_parallel_agents"; agents: AgentSpec[]; thenSequential?: AgentSpec; onComplete: string }
|
|
191
|
+
| { action: "spawn_debug_agent"; origin: Location; iteration: number; skillFile: string; agentPrompt: string; onComplete: string; model?: ModelTier }
|
|
185
192
|
| { action: "wait_for_user"; message: string }
|
|
186
193
|
| { action: "loopback"; from: Location; to: Location; reason: string }
|
|
187
194
|
| { action: "complete"; message: string }
|
|
@@ -14,7 +14,7 @@ function makeState(): WorkKitState {
|
|
|
14
14
|
phases[phase] = { status: "pending", steps };
|
|
15
15
|
}
|
|
16
16
|
return {
|
|
17
|
-
version:
|
|
17
|
+
version: 3,
|
|
18
18
|
slug: "test",
|
|
19
19
|
branch: "feature/test",
|
|
20
20
|
started: "2026-01-01",
|
|
@@ -38,14 +38,32 @@ function completePhase(state: WorkKitState, phase: PhaseName): void {
|
|
|
38
38
|
}
|
|
39
39
|
|
|
40
40
|
describe("validatePhasePrerequisites", () => {
|
|
41
|
-
it("
|
|
41
|
+
it("define has no prerequisites — valid", () => {
|
|
42
42
|
const state = makeState();
|
|
43
|
+
const result = validatePhasePrerequisites(state, "define");
|
|
44
|
+
assert.equal(result.valid, true);
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
it("plan with define complete — valid", () => {
|
|
48
|
+
const state = makeState();
|
|
49
|
+
completePhase(state, "define");
|
|
50
|
+
const result = validatePhasePrerequisites(state, "plan");
|
|
51
|
+
assert.equal(result.valid, true);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it("plan with define skipped — valid (skipped satisfies prerequisite)", () => {
|
|
55
|
+
const state = makeState();
|
|
56
|
+
state.phases.define.status = "skipped";
|
|
57
|
+
for (const s of Object.values(state.phases.define.steps)) {
|
|
58
|
+
s.status = "skipped";
|
|
59
|
+
}
|
|
43
60
|
const result = validatePhasePrerequisites(state, "plan");
|
|
44
61
|
assert.equal(result.valid, true);
|
|
45
62
|
});
|
|
46
63
|
|
|
47
64
|
it("build with plan incomplete — invalid", () => {
|
|
48
65
|
const state = makeState();
|
|
66
|
+
completePhase(state, "define");
|
|
49
67
|
const result = validatePhasePrerequisites(state, "build");
|
|
50
68
|
assert.equal(result.valid, false);
|
|
51
69
|
assert.equal(result.missingPrerequisite, "plan");
|
|
@@ -53,6 +71,7 @@ describe("validatePhasePrerequisites", () => {
|
|
|
53
71
|
|
|
54
72
|
it("build with plan complete — valid", () => {
|
|
55
73
|
const state = makeState();
|
|
74
|
+
completePhase(state, "define");
|
|
56
75
|
completePhase(state, "plan");
|
|
57
76
|
const result = validatePhasePrerequisites(state, "build");
|
|
58
77
|
assert.equal(result.valid, true);
|
|
@@ -66,9 +66,9 @@ export function validatePhasePrerequisites(state: WorkKitState, phase: PhaseName
|
|
|
66
66
|
return { valid: true, message: "Prerequisites met for deploy" };
|
|
67
67
|
}
|
|
68
68
|
|
|
69
|
-
// General case
|
|
69
|
+
// General case: completed OR fully skipped both satisfy the prerequisite.
|
|
70
70
|
const prereqState = state.phases[prereq];
|
|
71
|
-
if (prereqState.status !== "completed") {
|
|
71
|
+
if (prereqState.status !== "completed" && prereqState.status !== "skipped") {
|
|
72
72
|
return {
|
|
73
73
|
valid: false,
|
|
74
74
|
message: `${phase} requires ${prereq} to be complete. Current: ${prereqState.status}`,
|
|
@@ -13,7 +13,7 @@ export const AUTO_BLOCK_START = "<!-- work-kit:auto:start -->";
|
|
|
13
13
|
export const AUTO_BLOCK_END = "<!-- work-kit:auto:end -->";
|
|
14
14
|
export const MANUAL_HEADER = "## Manual";
|
|
15
15
|
|
|
16
|
-
export const KNOWLEDGE_TYPES = ["lesson", "convention", "risk", "workflow"] as const;
|
|
16
|
+
export const KNOWLEDGE_TYPES = ["lesson", "convention", "risk", "workflow", "decision"] as const;
|
|
17
17
|
export type KnowledgeType = (typeof KNOWLEDGE_TYPES)[number];
|
|
18
18
|
|
|
19
19
|
export function isKnowledgeType(value: string): value is KnowledgeType {
|
|
@@ -25,6 +25,7 @@ const TYPE_TO_FILE: Record<KnowledgeType, string> = {
|
|
|
25
25
|
convention: "conventions.md",
|
|
26
26
|
risk: "risks.md",
|
|
27
27
|
workflow: "workflow.md",
|
|
28
|
+
decision: "decisions.md",
|
|
28
29
|
};
|
|
29
30
|
|
|
30
31
|
const FILE_TO_TITLE: Record<string, string> = {
|
|
@@ -32,6 +33,7 @@ const FILE_TO_TITLE: Record<string, string> = {
|
|
|
32
33
|
"conventions.md": "Conventions",
|
|
33
34
|
"risks.md": "Risks",
|
|
34
35
|
"workflow.md": "Workflow Feedback",
|
|
36
|
+
"decisions.md": "Decisions",
|
|
35
37
|
};
|
|
36
38
|
|
|
37
39
|
const FILE_TO_BLURB: Record<string, string> = {
|
|
@@ -43,6 +45,8 @@ const FILE_TO_BLURB: Record<string, string> = {
|
|
|
43
45
|
"Known fragile or dangerous areas. Touch these with care.",
|
|
44
46
|
"workflow.md":
|
|
45
47
|
"Feedback about the work-kit workflow itself as observed in this project — skill quality, step skips, loopbacks, failure modes. Mined manually to improve work-kit upstream.",
|
|
48
|
+
"decisions.md":
|
|
49
|
+
"Architectural and design decisions made during work-kit sessions: what was chosen, what was rejected, why. Format mirrors a lightweight ADR — read these before re-litigating a settled choice.",
|
|
46
50
|
};
|
|
47
51
|
|
|
48
52
|
// ── Path Resolvers ──────────────────────────────────────────────────
|
|
@@ -180,6 +184,8 @@ benefits.
|
|
|
180
184
|
- **lessons.md** — things you learned about this codebase (project-specific).
|
|
181
185
|
- **conventions.md** — codified rules this project follows.
|
|
182
186
|
- **risks.md** — fragile or dangerous areas to handle with care.
|
|
187
|
+
- **decisions.md** — architectural choices made during sessions: what was
|
|
188
|
+
picked, what was rejected, why. Read before re-litigating a settled choice.
|
|
183
189
|
- **workflow.md** — feedback about the work-kit workflow itself as observed
|
|
184
190
|
in this project. Mined manually across projects to improve work-kit.
|
|
185
191
|
|
|
@@ -14,6 +14,7 @@ export const WAIT_AFTER_PHASE: Set<PhaseName> = new Set([
|
|
|
14
14
|
// ── Phase Display Names ──────────────────────────────────────────────
|
|
15
15
|
|
|
16
16
|
export const PHASE_DISPLAY_NAMES: Record<PhaseName, string> = {
|
|
17
|
+
define: "Define",
|
|
17
18
|
plan: "Plan",
|
|
18
19
|
build: "Build",
|
|
19
20
|
test: "Test",
|
|
@@ -15,7 +15,12 @@ export interface ParallelGroup {
|
|
|
15
15
|
*/
|
|
16
16
|
export const DEFAULT_PARALLEL_GROUPS: Record<string, ParallelGroup> = {
|
|
17
17
|
test: {
|
|
18
|
-
|
|
18
|
+
// verify (test suite), e2e (Playwright/etc), and browser (Chrome DevTools
|
|
19
|
+
// MCP) all run as independent observations. validate consolidates them.
|
|
20
|
+
// browser is auto-skipped on non-UI classifications, so the parallel set
|
|
21
|
+
// shrinks naturally. browser.md is responsible for graceful behavior if
|
|
22
|
+
// the dev server is shared with e2e.
|
|
23
|
+
parallel: ["verify", "e2e", "browser"],
|
|
19
24
|
thenSequential: "validate",
|
|
20
25
|
},
|
|
21
26
|
review: {
|
|
@@ -14,7 +14,7 @@ function makeState(): WorkKitState {
|
|
|
14
14
|
phases[phase] = { status: "pending", steps };
|
|
15
15
|
}
|
|
16
16
|
return {
|
|
17
|
-
version:
|
|
17
|
+
version: 3,
|
|
18
18
|
slug: "test",
|
|
19
19
|
branch: "feature/test",
|
|
20
20
|
started: "2026-01-01",
|
|
@@ -91,7 +91,7 @@ describe("determineNextStep", () => {
|
|
|
91
91
|
state.currentPhase = null;
|
|
92
92
|
const result = determineNextStep(state);
|
|
93
93
|
assert.equal(result.type, "phase-boundary");
|
|
94
|
-
assert.equal(result.phase, "
|
|
94
|
+
assert.equal(result.phase, "define");
|
|
95
95
|
});
|
|
96
96
|
|
|
97
97
|
it("returns step for current phase with pending work", () => {
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "work-kit-cli",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "Structured development workflow for Claude Code. Two modes,
|
|
3
|
+
"version": "0.5.0",
|
|
4
|
+
"description": "Structured development workflow for Claude Code. Two modes, 7 phases, 31 steps, plus debug recovery.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
7
7
|
"work-kit": "cli/bin/work-kit.mjs",
|
package/skills/auto-kit/SKILL.md
CHANGED
|
@@ -26,13 +26,16 @@ Do not proceed until `doctor` reports all checks passed.
|
|
|
26
26
|
|
|
27
27
|
These are the building blocks you pick from:
|
|
28
28
|
|
|
29
|
+
- **Define:** Refine, Spec *(included for `feature` and `large-feature` only)*
|
|
29
30
|
- **Plan:** Clarify, Investigate, Sketch, Scope, UX Flow, Architecture, Blueprint, Audit
|
|
30
31
|
- **Build:** Setup, Migration, Red, Core, UI, Refactor, Integration, Commit
|
|
31
|
-
- **Test:** Verify, E2E, Validate
|
|
32
|
+
- **Test:** Verify, E2E, Browser, Validate *(Browser uses Chrome DevTools MCP, included for `if UI`)*
|
|
32
33
|
- **Review:** Self-Review, Security, Performance, Compliance, Handoff
|
|
33
34
|
- **Deploy:** Merge, Monitor, Remediate (optional)
|
|
34
35
|
- **Wrap-up**
|
|
35
36
|
|
|
37
|
+
**Debug recovery:** any step can report outcome `needs_debug`. The CLI auto-spawns the **wk-debug** skill (5-step triage), then the originating step retries. Max 2 debug attempts per step.
|
|
38
|
+
|
|
36
39
|
## Starting New Work (`/auto-kit <description>`)
|
|
37
40
|
|
|
38
41
|
### Step 1: Analyze
|
|
@@ -55,6 +58,8 @@ Based on the classification, select steps. Use this table as a starting point, t
|
|
|
55
58
|
|
|
56
59
|
| Step | bug-fix | small-change | refactor | feature | large-feature |
|
|
57
60
|
|------------------------|---------|--------------|----------|---------|---------------|
|
|
61
|
+
| **Define: Refine** | skip | skip | skip | YES | YES |
|
|
62
|
+
| **Define: Spec** | skip | skip | skip | YES | YES |
|
|
58
63
|
| **Plan: Clarify** | YES | YES | YES | YES | YES |
|
|
59
64
|
| **Plan: Investigate** | YES | skip | YES | YES | YES |
|
|
60
65
|
| **Plan: Sketch** | skip | skip | skip | YES | YES |
|
|
@@ -73,6 +78,7 @@ Based on the classification, select steps. Use this table as a starting point, t
|
|
|
73
78
|
| **Build: Commit** | YES | YES | YES | YES | YES |
|
|
74
79
|
| **Test: Verify** | YES | YES | YES | YES | YES |
|
|
75
80
|
| **Test: E2E** | skip | skip | skip | if UI | YES |
|
|
81
|
+
| **Test: Browser** | skip | skip | skip | if UI | if UI |
|
|
76
82
|
| **Test: Validate** | YES | skip | skip | YES | YES |
|
|
77
83
|
| **Review: Self-Review**| YES | YES | YES | YES | YES |
|
|
78
84
|
| **Review: Security** | skip | skip | skip | YES | YES |
|
|
@@ -205,6 +211,7 @@ The CLI manages all state transitions, prerequisites, and loopbacks. Follow this
|
|
|
205
211
|
3. Follow the action type:
|
|
206
212
|
- **`spawn_agent`**: Use the Agent tool with the provided `agentPrompt`. Pass `skillFile` path for reference. **If the action includes a `model` field, pass it as the Agent tool's `model` parameter; if the field is absent, do not set `model` (let Claude Code's default pick).** After the agent completes: `work-kit complete <phase>/<step> --outcome <outcome>`
|
|
207
213
|
- **`spawn_parallel_agents`**: Spawn all agents in the `agents` array in parallel using the Agent tool. **For each agent, pass its `model` field as the Agent tool's `model` parameter when present; omit when absent.** Wait for all to complete. Then spawn `thenSequential` if provided (same rule for its `model` field). After all complete: `work-kit complete <onComplete target>`
|
|
214
|
+
- **`spawn_debug_agent`**: A previous step reported `needs_debug`. Spawn the **wk-debug** skill via the Agent tool with the provided `agentPrompt` and `skillFile`. Use the `model` field if present. Do **not** call `work-kit complete` for the debug agent — when it finishes, simply run `work-kit next` and the originating step will retry automatically.
|
|
208
215
|
- **`wait_for_user`**: Report the message to the user and stop. Wait for them to say "proceed" before running `work-kit next` again.
|
|
209
216
|
- **`loopback`**: Report the loopback to the user, then run `work-kit next` to continue from the target.
|
|
210
217
|
- **`complete`**: Done — run wrap-up if not already done.
|
package/skills/full-kit/SKILL.md
CHANGED
|
@@ -24,12 +24,17 @@ Do not proceed until `doctor` reports all checks passed.
|
|
|
24
24
|
|
|
25
25
|
## Phases
|
|
26
26
|
|
|
27
|
-
1. **
|
|
28
|
-
2. **
|
|
29
|
-
3. **
|
|
30
|
-
4. **
|
|
31
|
-
5. **
|
|
32
|
-
6. **
|
|
27
|
+
1. **Define** (2 steps) — Refine → Spec *(catches vague asks before Plan investigates)*
|
|
28
|
+
2. **Plan** (8 steps) — Clarify → Investigate → Sketch → Scope → UX Flow → Architecture → Blueprint → Audit
|
|
29
|
+
3. **Build** (8 steps) — Setup → Migration → Red → Core → UI → Refactor → Integration → Commit
|
|
30
|
+
4. **Test** (4 steps) — Verify, E2E, Browser (parallel) → Validate
|
|
31
|
+
5. **Review** (5 steps) — Self-Review → Security → Performance → Compliance → Handoff
|
|
32
|
+
6. **Deploy** (3 steps) — Merge → Monitor → Remediate
|
|
33
|
+
7. **Wrap-up** — Synthesize work-kit summary, clean up worktree
|
|
34
|
+
|
|
35
|
+
**Browser test step** uses the Chrome DevTools MCP server. If it isn't installed, `work-kit doctor` warns but does not block — the browser step is skipped gracefully.
|
|
36
|
+
|
|
37
|
+
**Debug recovery:** any step can report outcome `needs_debug` when it hits an error it can't resolve. The CLI will automatically spawn the **wk-debug** skill (5-step triage), then the originating step retries. Max 2 debug attempts per step before surfacing to you.
|
|
33
38
|
|
|
34
39
|
## Starting New Work (`/full-kit <description>`)
|
|
35
40
|
|
|
@@ -80,6 +85,7 @@ The CLI manages all state transitions, prerequisites, and loopbacks. Follow this
|
|
|
80
85
|
3. Follow the action type:
|
|
81
86
|
- **`spawn_agent`**: Use the Agent tool with the provided `agentPrompt`. Pass `skillFile` path for reference. **If the action includes a `model` field, pass it as the Agent tool's `model` parameter; if the field is absent, do not set `model` (let Claude Code's default pick).** After the agent completes: `work-kit complete <phase>/<step> --outcome <outcome>`
|
|
82
87
|
- **`spawn_parallel_agents`**: Spawn all agents in the `agents` array in parallel using the Agent tool. **For each agent, pass its `model` field as the Agent tool's `model` parameter when present; omit when absent.** Wait for all to complete. Then spawn `thenSequential` if provided (same rule for its `model` field). After all complete: `work-kit complete <onComplete target>`
|
|
88
|
+
- **`spawn_debug_agent`**: A previous step reported `needs_debug`. Spawn the **wk-debug** skill via the Agent tool with the provided `agentPrompt` and `skillFile`. Use the `model` field if present. Do **not** call `work-kit complete` for the debug agent — when it finishes writing its `.work-kit/debug-*.md` file, simply run `work-kit next` and the originating step will retry automatically.
|
|
83
89
|
- **`wait_for_user`**: Report the message to the user and stop. Wait for them to say "proceed" before running `work-kit next` again. (Only appears in `--gated` mode.)
|
|
84
90
|
- **`loopback`**: Report the loopback to the user, then run `work-kit next` to continue from the target.
|
|
85
91
|
- **`complete`**: Done — run wrap-up if not already done.
|
|
@@ -93,7 +99,8 @@ Prerequisites are enforced by the CLI (`work-kit validate <phase>`). You don't n
|
|
|
93
99
|
|
|
94
100
|
| Phase | Requires |
|
|
95
101
|
|----------|-----------------------------------|
|
|
96
|
-
|
|
|
102
|
+
| Define | — (first phase, always allowed) |
|
|
103
|
+
| Plan | Define (complete or skipped) |
|
|
97
104
|
| Build | Plan (complete) |
|
|
98
105
|
| Test | Build (complete) |
|
|
99
106
|
| Review | Test (complete) |
|
|
@@ -16,6 +16,7 @@ Run `work-kit bootstrap` to detect work-kit state.
|
|
|
16
16
|
- `knowledge.lessons` — project-specific learnings from prior sessions
|
|
17
17
|
- `knowledge.conventions` — codified rules this project follows
|
|
18
18
|
- `knowledge.risks` — fragile or dangerous areas to handle with care
|
|
19
|
+
- `knowledge.decisions` — architectural choices made in past sessions (what was picked, what was rejected, why) — read this **before** proposing any choice that might re-litigate a settled one
|
|
19
20
|
- Read each of these silently into your working context — they're prior knowledge you should respect when planning and building. Briefly mention to the user that prior knowledge was loaded (one line; do not dump the full text into the chat).
|
|
20
21
|
- `workflow.md` is intentionally NOT loaded — it's a write-only artifact for human curators.
|
|
21
22
|
- If recovery is suggested: follow the recovery instruction
|
|
@@ -27,6 +28,13 @@ Run `work-kit bootstrap` to detect work-kit state.
|
|
|
27
28
|
- Available commands: `/full-kit <description>` or `/auto-kit <description>`
|
|
28
29
|
- Do not start work unprompted
|
|
29
30
|
|
|
31
|
+
## v0.5 capabilities to be aware of
|
|
32
|
+
|
|
33
|
+
- **Define phase** — runs before Plan for `feature` and `large-feature` work, refines vague asks into a concrete spec.
|
|
34
|
+
- **wk-debug** — auto-invoked when any step reports outcome `needs_debug`. You don't trigger it; the orchestrator does.
|
|
35
|
+
- **test/browser** — exercises the running app via Chrome DevTools MCP. Skips gracefully if the MCP isn't installed (doctor will warn at session start).
|
|
36
|
+
- **decisions in knowledge layer** — `## Decisions` bullets matching `**<context>**: chose X over Y — <why>` are auto-graduated to `.work-kit-knowledge/decisions.md` during wrap-up.
|
|
37
|
+
|
|
30
38
|
## If session is stale
|
|
31
39
|
|
|
32
40
|
- Report the staleness warning to the user
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: debug
|
|
3
|
+
description: "Mid-pipeline triage skill — invoked automatically when a step reports outcome=needs_debug. Five-step methodology to find and fix (or escalate) the failure."
|
|
4
|
+
user-invocable: false
|
|
5
|
+
allowed-tools: Bash, Read, Write, Edit, Glob, Grep
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
You are the **Debug Triage Lead**. Another agent has hit something it can't resolve and reported `needs_debug`. Your job is **not** to do that agent's work — your job is to find out *why* it's stuck so it can retry with a clear path forward.
|
|
9
|
+
|
|
10
|
+
You are invoked by the work-kit orchestrator (not directly by the user) when any step reports outcome `needs_debug`. After you finish, the originating step will retry. You get **at most 2 invocations per origin step** before the orchestrator surfaces the failure to the user.
|
|
11
|
+
|
|
12
|
+
## Inputs you'll receive
|
|
13
|
+
|
|
14
|
+
The orchestrator hands you:
|
|
15
|
+
- `origin` — the phase/step that triggered debug (e.g. `build/core`, `test/verify`)
|
|
16
|
+
- `iteration` — 1 or 2 (how many debug attempts have already happened for this origin)
|
|
17
|
+
- A snapshot of the relevant state.md sections for the origin step
|
|
18
|
+
|
|
19
|
+
You should also read:
|
|
20
|
+
- `.work-kit/state.md` — full session state
|
|
21
|
+
- The most recent `### <Phase>: <Step>` section the originating agent wrote
|
|
22
|
+
- Any `.work-kit/debug-*.md` files from previous debug iterations (if `iteration > 1`)
|
|
23
|
+
|
|
24
|
+
## The 5 steps
|
|
25
|
+
|
|
26
|
+
Work through these in order. Don't skip.
|
|
27
|
+
|
|
28
|
+
### 1. Reproduce
|
|
29
|
+
Confirm the failure deterministically. Run the exact command, request, or scenario that broke. If you can't reproduce, that's information — record it and skip to step 5 (escalate).
|
|
30
|
+
|
|
31
|
+
### 2. Isolate
|
|
32
|
+
Shrink the failing case to the smallest input that still fails. Identify the boundary: does it fail before X, after Y, only with Z? Narrowing the surface area is more valuable than guessing causes.
|
|
33
|
+
|
|
34
|
+
### 3. Hypothesize
|
|
35
|
+
List candidate causes, ranked by likelihood. Be honest about confidence:
|
|
36
|
+
- **High** — direct evidence points here
|
|
37
|
+
- **Medium** — pattern matches a known failure mode
|
|
38
|
+
- **Low** — possible but speculative
|
|
39
|
+
|
|
40
|
+
Three hypotheses is usually enough. More is procrastination.
|
|
41
|
+
|
|
42
|
+
### 4. Test
|
|
43
|
+
Make the cheapest hypothesis-killing observation first. The goal is to *eliminate* hypotheses, not to prove the favorite one. Read the relevant code, check a log, run a smaller variant. Each observation should rule something out.
|
|
44
|
+
|
|
45
|
+
### 5. Fix or escalate
|
|
46
|
+
- **Fix** — if the cause is obvious and small, apply the minimal fix. Do NOT scope-creep into surrounding cleanup. Verify the fix addresses the original failure (re-run step 1).
|
|
47
|
+
- **Escalate** — if the fix requires architectural change, user input, or work outside the originating step's scope, write a clear escalation: what's known, what's unknown, what would unblock it.
|
|
48
|
+
|
|
49
|
+
## Output
|
|
50
|
+
|
|
51
|
+
Write your full triage to `.work-kit/debug-<ISO-timestamp>.md`:
|
|
52
|
+
|
|
53
|
+
```markdown
|
|
54
|
+
# Debug — <origin-phase>/<origin-step> (iteration <N>)
|
|
55
|
+
|
|
56
|
+
## 1. Reproduce
|
|
57
|
+
**Confirmed:** yes | no
|
|
58
|
+
<what you ran, what happened>
|
|
59
|
+
|
|
60
|
+
## 2. Isolate
|
|
61
|
+
**Minimal failing case:** <description or exact command>
|
|
62
|
+
**Boundary:** <fails when X; works when Y>
|
|
63
|
+
|
|
64
|
+
## 3. Hypotheses
|
|
65
|
+
1. [high|med|low] <hypothesis>
|
|
66
|
+
2. [high|med|low] <hypothesis>
|
|
67
|
+
3. [high|med|low] <hypothesis>
|
|
68
|
+
|
|
69
|
+
## 4. Tests
|
|
70
|
+
- <observation 1> → ruled out: <which hypothesis>
|
|
71
|
+
- <observation 2> → ruled out: <which>
|
|
72
|
+
- <observation 3> → confirmed: <which>
|
|
73
|
+
|
|
74
|
+
## 5. Outcome
|
|
75
|
+
**Verdict:** fixed | escalated | unreproducible
|
|
76
|
+
|
|
77
|
+
**If fixed:**
|
|
78
|
+
- **Root cause:** <one sentence>
|
|
79
|
+
- **Fix applied:** <files changed, what changed>
|
|
80
|
+
- **Verification:** <how you confirmed the fix worked>
|
|
81
|
+
|
|
82
|
+
**If escalated:**
|
|
83
|
+
- **What's known:** <facts>
|
|
84
|
+
- **What's unknown:** <gaps>
|
|
85
|
+
- **What would unblock:** <user input needed | architectural change | scope expansion>
|
|
86
|
+
- **Recommended next step:** <concrete suggestion>
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Then append a **single-line breadcrumb** to `.work-kit/state.md` under `## Observations`:
|
|
90
|
+
|
|
91
|
+
```markdown
|
|
92
|
+
- [risk] debug:<origin-phase>/<origin-step>: <one-sentence cause + verdict>
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
This lets `wrap-up/knowledge` graduate the debug finding into the project's risks file.
|
|
96
|
+
|
|
97
|
+
## After you finish
|
|
98
|
+
|
|
99
|
+
The orchestrator will:
|
|
100
|
+
- See your debug-*.md file
|
|
101
|
+
- Re-spawn the originating step
|
|
102
|
+
- That agent will see your `### Debug: <origin>` summary in its prompt context (if applicable) and your fixes in the working tree
|
|
103
|
+
|
|
104
|
+
You don't need to explicitly hand off — just write the file and exit. **Do not call `work-kit complete`** for the originating step. The retry will do that.
|
|
105
|
+
|
|
106
|
+
## Boundaries
|
|
107
|
+
|
|
108
|
+
### Always
|
|
109
|
+
- Reproduce before hypothesizing. Skipping reproduce is the #1 cause of bad debug sessions.
|
|
110
|
+
- Cap hypotheses at 3 unless the failure is genuinely complex.
|
|
111
|
+
- Write the file even if you escalate. The escalation is the deliverable.
|
|
112
|
+
- Re-run the failing case after applying a fix. "Should work" is not "does work".
|
|
113
|
+
|
|
114
|
+
### Never
|
|
115
|
+
- Expand scope beyond the failing case. You are not refactoring, you are unsticking.
|
|
116
|
+
- Disable tests, skip checks, or comment out failing code to "pass" the retry.
|
|
117
|
+
- Loop forever — if iteration 2 still can't fix it, ESCALATE. The orchestrator will surface to the user.
|
|
118
|
+
- Touch files unrelated to the failure boundary you isolated in step 2.
|
|
119
|
+
|
|
120
|
+
## Anti-Rationalization
|
|
121
|
+
|
|
122
|
+
| Excuse | Reality |
|
|
123
|
+
|--------|---------|
|
|
124
|
+
| "I can see the problem, I'll just fix it without reproducing" | Skipping reproduce means you might fix the wrong thing. Reproduce takes 30 seconds. Do it. |
|
|
125
|
+
| "Disabling this check will make the test pass" | Yes, and it will hide the real failure. Debug exists to find causes, not symptoms. |
|
|
126
|
+
| "I'll keep trying hypotheses until one works" | That's gambling, not debugging. Each test must *eliminate* something. If you're not narrowing the space, stop and re-isolate. |
|
|
127
|
+
| "This is a deeper issue, I should rewrite the module" | Out of scope. Escalate it. Architectural rewrites belong in a separate work-kit session. |
|