work-kit-cli 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -1
- package/cli/src/commands/bootstrap.test.ts +1 -1
- package/cli/src/commands/bootstrap.ts +22 -14
- package/cli/src/commands/complete.ts +76 -2
- package/cli/src/commands/doctor.ts +51 -2
- package/cli/src/commands/extract.ts +47 -25
- package/cli/src/commands/init.test.ts +3 -1
- package/cli/src/commands/init.ts +22 -15
- package/cli/src/commands/learn.test.ts +2 -2
- package/cli/src/commands/learn.ts +2 -1
- package/cli/src/config/agent-map.ts +10 -2
- package/cli/src/config/constants.ts +7 -0
- package/cli/src/config/loopback-routes.ts +6 -0
- package/cli/src/config/model-routing.ts +7 -1
- package/cli/src/config/workflow.ts +12 -6
- package/cli/src/index.ts +2 -2
- package/cli/src/state/helpers.test.ts +1 -1
- package/cli/src/state/schema.ts +11 -4
- package/cli/src/state/validators.test.ts +21 -2
- package/cli/src/state/validators.ts +2 -2
- package/cli/src/utils/knowledge.ts +7 -1
- package/cli/src/workflow/gates.ts +1 -0
- package/cli/src/workflow/parallel.ts +6 -1
- package/cli/src/workflow/transitions.test.ts +2 -2
- package/package.json +2 -2
- package/skills/auto-kit/SKILL.md +8 -1
- package/skills/full-kit/SKILL.md +14 -7
- package/skills/wk-bootstrap/SKILL.md +8 -0
- package/skills/wk-debug/SKILL.md +127 -0
- package/skills/wk-define/SKILL.md +87 -0
- package/skills/wk-define/steps/refine.md +71 -0
- package/skills/wk-define/steps/spec.md +70 -0
- package/skills/wk-plan/steps/architecture.md +16 -0
- package/skills/wk-test/steps/browser.md +92 -0
- package/skills/wk-test/steps/e2e.md +45 -23
- package/skills/wk-wrap-up/steps/knowledge.md +8 -3
package/README.md
CHANGED
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
# work-kit
|
|
2
2
|
|
|
3
|
-
Structured development workflow for [Claude Code](https://claude.com/claude-code). Two modes,
|
|
3
|
+
Structured development workflow for [Claude Code](https://claude.com/claude-code). Two modes, 7 phases, 31 steps, plus auto-debug recovery — orchestrated by a TypeScript CLI with reusable skill files.
|
|
4
|
+
|
|
5
|
+
## What's new in v0.5
|
|
6
|
+
|
|
7
|
+
- **Define phase** runs before Plan to refine vague asks into a concrete spec (auto-skipped for bug fixes/refactors).
|
|
8
|
+
- **wk-debug** triage skill auto-fires when any step reports `needs_debug`, then the originating step retries (max 2 iterations). Not user-invocable — fires from inside the pipeline.
|
|
9
|
+
- **`test/browser`** drives the running app via Chrome DevTools MCP and verifies user-facing acceptance criteria in a real browser. Skips gracefully if the MCP isn't installed.
|
|
10
|
+
- **`decision` knowledge type** auto-graduates `## Decisions` bullets into `.work-kit-knowledge/decisions.md` so future sessions don't re-litigate settled choices.
|
|
4
11
|
|
|
5
12
|
## Installation
|
|
6
13
|
|
|
@@ -48,7 +48,7 @@ describe("bootstrapCommand", () => {
|
|
|
48
48
|
assert.equal(result.slug, "test-feature");
|
|
49
49
|
assert.equal(result.mode, "full-kit");
|
|
50
50
|
assert.equal(result.status, "in-progress");
|
|
51
|
-
assert.equal(result.phase, "
|
|
51
|
+
assert.equal(result.phase, "define");
|
|
52
52
|
assert.equal(result.recovery, null);
|
|
53
53
|
});
|
|
54
54
|
|
|
@@ -2,12 +2,13 @@ import fs from "node:fs";
|
|
|
2
2
|
import { findWorktreeRoot, readState, writeState, statePath } from "../state/store.js";
|
|
3
3
|
import { unpause } from "../state/helpers.js";
|
|
4
4
|
import { CLI_BINARY, STALE_THRESHOLD_MS } from "../config/constants.js";
|
|
5
|
-
import { fileForType, readKnowledgeFile } from "../utils/knowledge.js";
|
|
5
|
+
import { fileForType, readKnowledgeFile, KNOWLEDGE_TYPES, type KnowledgeType } from "../utils/knowledge.js";
|
|
6
6
|
|
|
7
7
|
export interface BootstrapKnowledge {
|
|
8
8
|
lessons?: string;
|
|
9
9
|
conventions?: string;
|
|
10
10
|
risks?: string;
|
|
11
|
+
decisions?: string;
|
|
11
12
|
}
|
|
12
13
|
|
|
13
14
|
export interface BootstrapResult {
|
|
@@ -24,8 +25,8 @@ export interface BootstrapResult {
|
|
|
24
25
|
nextAction?: string;
|
|
25
26
|
recovery?: string | null;
|
|
26
27
|
/**
|
|
27
|
-
* Project-level knowledge files (lessons/conventions/risks) read
|
|
28
|
-
* <mainRepoRoot>/.work-kit-knowledge/. Capped at 200 lines per file.
|
|
28
|
+
* Project-level knowledge files (lessons/conventions/risks/decisions) read
|
|
29
|
+
* from <mainRepoRoot>/.work-kit-knowledge/. Capped at 200 lines per file.
|
|
29
30
|
* workflow.md is intentionally excluded — it's a write-only artifact for
|
|
30
31
|
* human curators, not session context.
|
|
31
32
|
*/
|
|
@@ -89,26 +90,33 @@ export function bootstrapCommand(startDir?: string, options: BootstrapOptions =
|
|
|
89
90
|
nextAction = `Continue ${state.currentPhase ?? "next phase"}${state.currentStep ? "/" + state.currentStep : ""}. Run \`${CLI_BINARY} next\` to get the agent prompt.`;
|
|
90
91
|
}
|
|
91
92
|
|
|
92
|
-
// Load project-level knowledge files (best effort, never breaks bootstrap).
|
|
93
93
|
// workflow.md is intentionally excluded — it's a write-only artifact for
|
|
94
94
|
// human curators, not session context.
|
|
95
|
+
const INJECTED_TYPES: KnowledgeType[] = KNOWLEDGE_TYPES.filter(
|
|
96
|
+
(t) => t !== "workflow"
|
|
97
|
+
) as KnowledgeType[];
|
|
98
|
+
// Map each knowledge type to its plural field name on BootstrapKnowledge.
|
|
99
|
+
const TYPE_TO_FIELD: Record<Exclude<KnowledgeType, "workflow">, keyof BootstrapKnowledge> = {
|
|
100
|
+
lesson: "lessons",
|
|
101
|
+
convention: "conventions",
|
|
102
|
+
risk: "risks",
|
|
103
|
+
decision: "decisions",
|
|
104
|
+
};
|
|
105
|
+
|
|
95
106
|
let knowledge: BootstrapKnowledge | undefined;
|
|
96
107
|
try {
|
|
97
108
|
const mainRepoRoot = state.metadata?.mainRepoRoot;
|
|
98
109
|
if (mainRepoRoot) {
|
|
99
|
-
const
|
|
100
|
-
const
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
...(conventions && { conventions }),
|
|
106
|
-
...(risks && { risks }),
|
|
107
|
-
};
|
|
110
|
+
const collected: BootstrapKnowledge = {};
|
|
111
|
+
for (const type of INJECTED_TYPES) {
|
|
112
|
+
const content = readKnowledgeFile(mainRepoRoot, fileForType(type));
|
|
113
|
+
if (content) {
|
|
114
|
+
collected[TYPE_TO_FIELD[type as Exclude<KnowledgeType, "workflow">]] = content;
|
|
115
|
+
}
|
|
108
116
|
}
|
|
117
|
+
if (Object.keys(collected).length > 0) knowledge = collected;
|
|
109
118
|
}
|
|
110
119
|
} catch (err: any) {
|
|
111
|
-
// Non-fatal: log to stderr but don't break bootstrap
|
|
112
120
|
process.stderr.write(`work-kit: failed to load knowledge files: ${err.message}\n`);
|
|
113
121
|
}
|
|
114
122
|
|
|
@@ -5,9 +5,12 @@ import { isPhaseComplete, nextStepInPhase } from "../workflow/transitions.js";
|
|
|
5
5
|
import { checkLoopback, countLoopbacksForRoute } from "../workflow/loopbacks.js";
|
|
6
6
|
import { PHASE_ORDER } from "../config/workflow.js";
|
|
7
7
|
import { parseLocation, resetToLocation } from "../state/helpers.js";
|
|
8
|
-
import { TRACKER_DIR, ARCHIVE_DIR, INDEX_FILE, SUMMARY_FILE, MAX_LOOPBACKS_PER_ROUTE, CLI_BINARY } from "../config/constants.js";
|
|
9
|
-
import { isStepOutcome, STEP_OUTCOMES, type Action, type PhaseName, type StepOutcome, type WorkKitState } from "../state/schema.js";
|
|
8
|
+
import { TRACKER_DIR, ARCHIVE_DIR, INDEX_FILE, SUMMARY_FILE, MAX_LOOPBACKS_PER_ROUTE, MAX_DEBUG_ITERATIONS, SKILL_DIR_PREFIX, CLI_BINARY } from "../config/constants.js";
|
|
9
|
+
import { isStepOutcome, STEP_OUTCOMES, type Action, type Location, type PhaseName, type StepOutcome, type StepState, type WorkKitState } from "../state/schema.js";
|
|
10
10
|
import { stateMdPath } from "../state/store.js";
|
|
11
|
+
import { resolveModel } from "../config/model-routing.js";
|
|
12
|
+
|
|
13
|
+
const DEBUG_SKILL_FILE = `.claude/skills/${SKILL_DIR_PREFIX}debug/SKILL.md`;
|
|
11
14
|
|
|
12
15
|
export function completeCommand(target: string, outcome?: string, worktreeRoot?: string): Action {
|
|
13
16
|
const root = worktreeRoot || findWorktreeRoot();
|
|
@@ -50,6 +53,10 @@ export function completeCommand(target: string, outcome?: string, worktreeRoot?:
|
|
|
50
53
|
return { action: "error", message: `${phase}/${step} is skipped and cannot be completed. Add it to the workflow first.` };
|
|
51
54
|
}
|
|
52
55
|
|
|
56
|
+
if (typedOutcome === "needs_debug") {
|
|
57
|
+
return handleNeedsDebug(root, state, stepState, { phase, step });
|
|
58
|
+
}
|
|
59
|
+
|
|
53
60
|
stepState.status = "completed";
|
|
54
61
|
stepState.completedAt = new Date().toISOString();
|
|
55
62
|
if (typedOutcome) {
|
|
@@ -143,6 +150,73 @@ export function completeCommand(target: string, outcome?: string, worktreeRoot?:
|
|
|
143
150
|
|
|
144
151
|
// ── Archive on completion ──────────────────────────────────────────
|
|
145
152
|
|
|
153
|
+
/**
|
|
154
|
+
* Divert a step that reported `needs_debug` into the wk-debug skill. The
|
|
155
|
+
* originating step stays in-progress so the next `next()` call retries it
|
|
156
|
+
* after the debug agent finishes. Bails to `wait_for_user` once the per-step
|
|
157
|
+
* iteration cap is reached.
|
|
158
|
+
*/
|
|
159
|
+
function handleNeedsDebug(
|
|
160
|
+
root: string,
|
|
161
|
+
state: WorkKitState,
|
|
162
|
+
stepState: StepState,
|
|
163
|
+
origin: Location
|
|
164
|
+
): Action {
|
|
165
|
+
const debugCount = state.loopbacks.filter(
|
|
166
|
+
(lb) => lb.kind === "debug" && lb.from.phase === origin.phase && lb.from.step === origin.step
|
|
167
|
+
).length;
|
|
168
|
+
|
|
169
|
+
if (debugCount >= MAX_DEBUG_ITERATIONS) {
|
|
170
|
+
writeState(root, state);
|
|
171
|
+
return {
|
|
172
|
+
action: "wait_for_user",
|
|
173
|
+
message: `${origin.phase}/${origin.step} reported needs_debug but max debug iterations (${MAX_DEBUG_ITERATIONS}) reached. Surface to user — manual intervention required.`,
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
const iteration = debugCount + 1;
|
|
178
|
+
state.loopbacks.push({
|
|
179
|
+
from: origin,
|
|
180
|
+
to: origin,
|
|
181
|
+
reason: `Step reported needs_debug — invoking wk-debug (iteration ${iteration})`,
|
|
182
|
+
timestamp: new Date().toISOString(),
|
|
183
|
+
kind: "debug",
|
|
184
|
+
});
|
|
185
|
+
stepState.status = "in-progress";
|
|
186
|
+
delete stepState.outcome;
|
|
187
|
+
delete stepState.completedAt;
|
|
188
|
+
writeState(root, state);
|
|
189
|
+
|
|
190
|
+
const agentPrompt = [
|
|
191
|
+
`# Debug Triage`,
|
|
192
|
+
``,
|
|
193
|
+
`**Origin:** ${origin.phase}/${origin.step}`,
|
|
194
|
+
`**Iteration:** ${iteration} of ${MAX_DEBUG_ITERATIONS}`,
|
|
195
|
+
`**Worktree:** ${root}`,
|
|
196
|
+
``,
|
|
197
|
+
`## Instructions`,
|
|
198
|
+
`Read and follow the skill file: \`${DEBUG_SKILL_FILE}\``,
|
|
199
|
+
``,
|
|
200
|
+
`The originating step (${origin.phase}/${origin.step}) hit something it cannot resolve.`,
|
|
201
|
+
`Read \`.work-kit/state.md\` and the originating agent's working notes for that step.`,
|
|
202
|
+
``,
|
|
203
|
+
`Run the 5-step triage methodology. Write your full report to \`.work-kit/debug-<ISO-timestamp>.md\`.`,
|
|
204
|
+
`Do NOT call \`work-kit complete\` for the originating step — when you finish, the orchestrator will re-run \`work-kit next\` and the originating step will retry automatically.`,
|
|
205
|
+
].join("\n");
|
|
206
|
+
|
|
207
|
+
const debugModel = resolveModel(state, origin.phase, origin.step);
|
|
208
|
+
|
|
209
|
+
return {
|
|
210
|
+
action: "spawn_debug_agent",
|
|
211
|
+
origin,
|
|
212
|
+
iteration,
|
|
213
|
+
skillFile: DEBUG_SKILL_FILE,
|
|
214
|
+
agentPrompt,
|
|
215
|
+
onComplete: `${CLI_BINARY} next`,
|
|
216
|
+
...(debugModel && { model: debugModel }),
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
|
|
146
220
|
function archiveFolderName(slug: string, completedAt: string): string {
|
|
147
221
|
return `${slug}-${completedAt.split("T")[0]}`;
|
|
148
222
|
}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import * as fs from "node:fs";
|
|
2
2
|
import * as path from "node:path";
|
|
3
|
+
import * as os from "node:os";
|
|
3
4
|
import { execFileSync } from "node:child_process";
|
|
4
5
|
import { findWorktreeRoot, readState, stateExists } from "../state/store.js";
|
|
5
6
|
|
|
@@ -35,7 +36,7 @@ export function doctorCommand(worktreeRoot?: string): { ok: boolean; checks: Che
|
|
|
35
36
|
}
|
|
36
37
|
|
|
37
38
|
// 3. Phase skill files
|
|
38
|
-
const phases = ["wk-plan", "wk-build", "wk-test", "wk-review", "wk-deploy", "wk-wrap-up"];
|
|
39
|
+
const phases = ["wk-define", "wk-plan", "wk-build", "wk-test", "wk-review", "wk-deploy", "wk-wrap-up", "wk-debug"];
|
|
39
40
|
let phasesMissing = 0;
|
|
40
41
|
for (const phase of phases) {
|
|
41
42
|
const phasePath = path.join(skillsDir, phase, "SKILL.md");
|
|
@@ -49,6 +50,26 @@ export function doctorCommand(worktreeRoot?: string): { ok: boolean; checks: Che
|
|
|
49
50
|
checks.push({ name: "skill:phases", status: "fail", message: `${phasesMissing} phase skill(s) missing from ${skillsDir}` });
|
|
50
51
|
}
|
|
51
52
|
|
|
53
|
+
// 3b. Chrome DevTools MCP availability (used by test/browser).
|
|
54
|
+
// Warn-only: if missing, the browser step skips itself but the rest of the
|
|
55
|
+
// pipeline runs unaffected.
|
|
56
|
+
const cdpMcpAvailable = detectChromeDevtoolsMcp();
|
|
57
|
+
if (cdpMcpAvailable === "yes") {
|
|
58
|
+
checks.push({ name: "mcp:chrome-devtools", status: "pass", message: "Chrome DevTools MCP detected" });
|
|
59
|
+
} else if (cdpMcpAvailable === "unknown") {
|
|
60
|
+
checks.push({
|
|
61
|
+
name: "mcp:chrome-devtools",
|
|
62
|
+
status: "warn",
|
|
63
|
+
message: "Chrome DevTools MCP could not be detected. The test/browser step will be skipped if invoked.",
|
|
64
|
+
});
|
|
65
|
+
} else {
|
|
66
|
+
checks.push({
|
|
67
|
+
name: "mcp:chrome-devtools",
|
|
68
|
+
status: "warn",
|
|
69
|
+
message: "Chrome DevTools MCP not configured. test/browser will skip — install the MCP server to enable live browser verification.",
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
|
|
52
73
|
// 4. Git available
|
|
53
74
|
try {
|
|
54
75
|
const gitVersion = execFileSync("git", ["--version"], { encoding: "utf-8" }).trim();
|
|
@@ -62,7 +83,7 @@ export function doctorCommand(worktreeRoot?: string): { ok: boolean; checks: Che
|
|
|
62
83
|
if (root && stateExists(root)) {
|
|
63
84
|
try {
|
|
64
85
|
const state = readState(root);
|
|
65
|
-
if (state.version ===
|
|
86
|
+
if (state.version === 3 && state.slug && state.status) {
|
|
66
87
|
checks.push({ name: "state", status: "pass", message: `Active work-kit: "${state.slug}" (${state.status})` });
|
|
67
88
|
} else {
|
|
68
89
|
checks.push({ name: "state", status: "warn", message: "tracker.json exists but has unexpected structure" });
|
|
@@ -77,3 +98,31 @@ export function doctorCommand(worktreeRoot?: string): { ok: boolean; checks: Che
|
|
|
77
98
|
const ok = checks.every((c) => c.status !== "fail");
|
|
78
99
|
return { ok, checks };
|
|
79
100
|
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Best-effort detection of the Chrome DevTools MCP server. We can't call MCP
|
|
104
|
+
* tools from the CLI, so we scan the most common config trails for any
|
|
105
|
+
* chrome-devtools-flavored server entry. Returns "yes" on a hit, "no" if a
|
|
106
|
+
* config exists but doesn't mention it, and "unknown" if no config exists.
|
|
107
|
+
*/
|
|
108
|
+
function detectChromeDevtoolsMcp(): "yes" | "no" | "unknown" {
|
|
109
|
+
const claudeDir = path.join(os.homedir(), ".claude");
|
|
110
|
+
const candidates = [
|
|
111
|
+
path.join(claudeDir, "settings.json"),
|
|
112
|
+
path.join(claudeDir, "mcp.json"),
|
|
113
|
+
path.join(process.cwd(), ".mcp.json"),
|
|
114
|
+
];
|
|
115
|
+
|
|
116
|
+
let sawAny = false;
|
|
117
|
+
for (const file of candidates) {
|
|
118
|
+
let raw: string;
|
|
119
|
+
try {
|
|
120
|
+
raw = fs.readFileSync(file, "utf-8");
|
|
121
|
+
} catch {
|
|
122
|
+
continue; // missing or unreadable — skip
|
|
123
|
+
}
|
|
124
|
+
sawAny = true;
|
|
125
|
+
if (/chrome[-_]?devtools/i.test(raw)) return "yes";
|
|
126
|
+
}
|
|
127
|
+
return sawAny ? "no" : "unknown";
|
|
128
|
+
}
|
|
@@ -43,47 +43,69 @@ function emptyByType(): Record<KnowledgeType, number> {
|
|
|
43
43
|
const OBSERVATION_RE = /^-\s*\[([a-z]+)(?::([a-z0-9-]+\/[a-z0-9-]+))?\]\s*(.+)$/i;
|
|
44
44
|
|
|
45
45
|
/**
|
|
46
|
-
*
|
|
47
|
-
*
|
|
48
|
-
*
|
|
46
|
+
* A bullet under `## Decisions` is harvested when it follows the documented
|
|
47
|
+
* shape `**<context>**: chose <X> over <Y> — <why>`. Free-form lines are
|
|
48
|
+
* skipped (not errors). The leading `**context**:` becomes the entry's title.
|
|
49
|
+
*/
|
|
50
|
+
const DECISION_RE = /^-\s*\*\*([^*]+)\*\*\s*:\s*(.+)$/;
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Walk state.md once and emit raw entries from:
|
|
54
|
+
* - `## Observations` — typed bullets (`- [lesson|convention|risk|workflow|decision] text`)
|
|
55
|
+
* - `## Decisions` — bullets matching `**<context>**: chose X over Y — <why>`
|
|
56
|
+
*
|
|
57
|
+
* `## Deviations` stays scratch — agents routinely dump test plans there.
|
|
49
58
|
*/
|
|
50
59
|
function parseStateMd(stateMd: string): RawEntry[] {
|
|
51
60
|
const out: RawEntry[] = [];
|
|
52
61
|
if (!stateMd) return out;
|
|
53
62
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
// test plans, acceptance-criteria checklists, and self-review dumps. Auto-
|
|
57
|
-
// routing them floods workflow.md with noise. Agents opt into harvesting by
|
|
58
|
-
// writing typed bullets (`- [lesson|convention|risk|workflow] text`) under
|
|
59
|
-
// `## Observations`.
|
|
60
|
-
let inObservations = false;
|
|
63
|
+
type Section = "observations" | "decisions" | "other";
|
|
64
|
+
let section: Section = "other";
|
|
61
65
|
|
|
62
66
|
for (const rawLine of stateMd.split("\n")) {
|
|
63
67
|
const trimmed = rawLine.trim();
|
|
64
68
|
|
|
65
69
|
if (trimmed.startsWith("## ")) {
|
|
66
|
-
|
|
70
|
+
const heading = trimmed.slice(3).trim().toLowerCase();
|
|
71
|
+
if (heading === "observations") section = "observations";
|
|
72
|
+
else if (heading === "decisions") section = "decisions";
|
|
73
|
+
else section = "other";
|
|
67
74
|
continue;
|
|
68
75
|
}
|
|
69
76
|
|
|
70
|
-
if (
|
|
77
|
+
if (section === "other") continue;
|
|
71
78
|
if (!trimmed.startsWith("-") || trimmed.startsWith("<!--")) continue;
|
|
72
79
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
80
|
+
if (section === "observations") {
|
|
81
|
+
const m = trimmed.match(OBSERVATION_RE);
|
|
82
|
+
if (!m) continue;
|
|
83
|
+
const tag = m[1].toLowerCase();
|
|
84
|
+
if (!isKnowledgeType(tag)) continue;
|
|
85
|
+
const phaseStep = m[2];
|
|
86
|
+
const text = m[3].trim();
|
|
87
|
+
if (text.length === 0) continue;
|
|
88
|
+
const entry: RawEntry = { type: tag, text, source: "auto-state-md" };
|
|
89
|
+
if (phaseStep) {
|
|
90
|
+
const [p, s] = phaseStep.split("/");
|
|
91
|
+
entry.phase = p;
|
|
92
|
+
entry.step = s;
|
|
93
|
+
}
|
|
94
|
+
out.push(entry);
|
|
95
|
+
continue;
|
|
85
96
|
}
|
|
86
|
-
|
|
97
|
+
|
|
98
|
+
// section === "decisions"
|
|
99
|
+
const m = trimmed.match(DECISION_RE);
|
|
100
|
+
if (!m) continue;
|
|
101
|
+
const context = m[1].trim();
|
|
102
|
+
const rationale = m[2].trim();
|
|
103
|
+
if (context.length === 0 || rationale.length === 0) continue;
|
|
104
|
+
out.push({
|
|
105
|
+
type: "decision",
|
|
106
|
+
text: `**${context}**: ${rationale}`,
|
|
107
|
+
source: "auto-state-md",
|
|
108
|
+
});
|
|
87
109
|
}
|
|
88
110
|
|
|
89
111
|
return out;
|
|
@@ -40,7 +40,9 @@ describe("initCommand", () => {
|
|
|
40
40
|
);
|
|
41
41
|
assert.equal(state.slug, "add-user-login");
|
|
42
42
|
assert.equal(state.status, "in-progress");
|
|
43
|
-
assert.equal(state.currentPhase, "
|
|
43
|
+
assert.equal(state.currentPhase, "define");
|
|
44
|
+
assert.equal(state.currentStep, "refine");
|
|
45
|
+
assert.equal(state.version, 3);
|
|
44
46
|
});
|
|
45
47
|
|
|
46
48
|
it("returns spawn_agent action", () => {
|
package/cli/src/commands/init.ts
CHANGED
|
@@ -44,7 +44,16 @@ function buildPhases(workflow?: WorkflowStep[]): Record<PhaseName, PhaseState> {
|
|
|
44
44
|
return phases;
|
|
45
45
|
}
|
|
46
46
|
|
|
47
|
-
function generateStateMd(
|
|
47
|
+
function generateStateMd(
|
|
48
|
+
slug: string,
|
|
49
|
+
branch: string,
|
|
50
|
+
mode: string,
|
|
51
|
+
description: string,
|
|
52
|
+
firstPhase: string,
|
|
53
|
+
firstStep: string,
|
|
54
|
+
classification?: string,
|
|
55
|
+
workflow?: WorkflowStep[]
|
|
56
|
+
): string {
|
|
48
57
|
const title = slug.replace(/-/g, " ").replace(/\b\w/g, (c) => c.toUpperCase());
|
|
49
58
|
const date = new Date().toISOString().split("T")[0];
|
|
50
59
|
|
|
@@ -60,8 +69,8 @@ function generateStateMd(slug: string, branch: string, mode: string, description
|
|
|
60
69
|
md += `**Classification:** ${classification}\n`;
|
|
61
70
|
}
|
|
62
71
|
|
|
63
|
-
md += `**Phase:**
|
|
64
|
-
**Step:**
|
|
72
|
+
md += `**Phase:** ${firstPhase}
|
|
73
|
+
**Step:** ${firstStep}
|
|
65
74
|
**Status:** in-progress
|
|
66
75
|
|
|
67
76
|
## Description
|
|
@@ -182,21 +191,19 @@ export function initCommand(options: {
|
|
|
182
191
|
workflow = buildFullWorkflow();
|
|
183
192
|
}
|
|
184
193
|
|
|
185
|
-
//
|
|
186
|
-
|
|
187
|
-
let
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
firstStep = first.step;
|
|
194
|
-
}
|
|
194
|
+
// First active step is always the first `included` entry in the workflow.
|
|
195
|
+
// For full-kit that's define/refine; for auto-kit it depends on classification.
|
|
196
|
+
let firstPhase: PhaseName = "define";
|
|
197
|
+
let firstStep = "refine";
|
|
198
|
+
const first = workflow?.find((s) => s.included);
|
|
199
|
+
if (first) {
|
|
200
|
+
firstPhase = first.phase;
|
|
201
|
+
firstStep = first.step;
|
|
195
202
|
}
|
|
196
203
|
|
|
197
204
|
// Build state
|
|
198
205
|
const state: WorkKitState = {
|
|
199
|
-
version:
|
|
206
|
+
version: 3,
|
|
200
207
|
slug,
|
|
201
208
|
branch,
|
|
202
209
|
started: new Date().toISOString(),
|
|
@@ -221,7 +228,7 @@ export function initCommand(options: {
|
|
|
221
228
|
|
|
222
229
|
// Write state files
|
|
223
230
|
writeState(worktreeRoot, state);
|
|
224
|
-
writeStateMd(worktreeRoot, generateStateMd(slug, branch, modeLabel, description, classification, workflow));
|
|
231
|
+
writeStateMd(worktreeRoot, generateStateMd(slug, branch, modeLabel, description, firstPhase, firstStep, classification, workflow));
|
|
225
232
|
|
|
226
233
|
const model = resolveModel(state, firstPhase, firstStep);
|
|
227
234
|
|
|
@@ -146,8 +146,8 @@ describe("learnCommand", () => {
|
|
|
146
146
|
path.join(tmp, KNOWLEDGE_DIR, "lessons.md"),
|
|
147
147
|
"utf-8"
|
|
148
148
|
);
|
|
149
|
-
//
|
|
150
|
-
assert.ok(content.includes("
|
|
149
|
+
// Full-kit init now starts at define/refine (Define is the new first phase)
|
|
150
|
+
assert.ok(content.includes("define/refine"));
|
|
151
151
|
});
|
|
152
152
|
|
|
153
153
|
it("extracts typed bullets from state.md ## Observations", () => {
|
|
@@ -4,6 +4,7 @@ import {
|
|
|
4
4
|
ensureKnowledgeDir,
|
|
5
5
|
fileForType,
|
|
6
6
|
isKnowledgeType,
|
|
7
|
+
KNOWLEDGE_TYPES,
|
|
7
8
|
redact,
|
|
8
9
|
type KnowledgeEntry,
|
|
9
10
|
type KnowledgeType,
|
|
@@ -36,7 +37,7 @@ export function learnCommand(opts: LearnOptions): LearnResult {
|
|
|
36
37
|
if (!opts.type || !isKnowledgeType(opts.type)) {
|
|
37
38
|
return {
|
|
38
39
|
action: "error",
|
|
39
|
-
message: `Invalid --type "${opts.type}". Must be one of:
|
|
40
|
+
message: `Invalid --type "${opts.type}". Must be one of: ${KNOWLEDGE_TYPES.join(", ")}.`,
|
|
40
41
|
};
|
|
41
42
|
}
|
|
42
43
|
if (!opts.text || opts.text.trim().length === 0) {
|
|
@@ -12,8 +12,11 @@ export interface AgentContext {
|
|
|
12
12
|
|
|
13
13
|
// Phase-level context (what the phase runner agent reads)
|
|
14
14
|
export const PHASE_CONTEXT: Record<PhaseName, AgentContext> = {
|
|
15
|
+
define: {
|
|
16
|
+
sections: ["## Description"],
|
|
17
|
+
},
|
|
15
18
|
plan: {
|
|
16
|
-
sections: ["## Description", "## Criteria"],
|
|
19
|
+
sections: ["## Description", "### Define: Final", "## Criteria"],
|
|
17
20
|
},
|
|
18
21
|
build: {
|
|
19
22
|
sections: ["### Plan: Final", "## Criteria", "## Description"],
|
|
@@ -34,10 +37,15 @@ export const PHASE_CONTEXT: Record<PhaseName, AgentContext> = {
|
|
|
34
37
|
|
|
35
38
|
// Step-level context (for parallel sub-agents that need specific sections)
|
|
36
39
|
export const STEP_CONTEXT: Record<string, AgentContext> = {
|
|
40
|
+
// Define steps
|
|
41
|
+
"define/refine": { sections: ["## Description"] },
|
|
42
|
+
"define/spec": { sections: ["## Description", "### Define: Refine"] },
|
|
43
|
+
|
|
37
44
|
// Test steps
|
|
38
45
|
"test/verify": { sections: ["### Build: Final", "## Criteria"] },
|
|
46
|
+
"test/browser": { sections: ["### Build: Final", "## Criteria", "### Plan: UX Flow"] },
|
|
39
47
|
"test/e2e": { sections: ["### Build: Final", "### Plan: Final"] },
|
|
40
|
-
"test/validate": { sections: ["### Test: Verify", "### Test: E2E", "## Criteria"] },
|
|
48
|
+
"test/validate": { sections: ["### Test: Verify", "### Test: Browser", "### Test: E2E", "## Criteria"] },
|
|
41
49
|
|
|
42
50
|
// Review steps
|
|
43
51
|
"review/self-review": { sections: ["### Build: Final"], needsGitDiff: true },
|
|
@@ -48,6 +48,13 @@ export const KNOWLEDGE_LOCK = ".lock";
|
|
|
48
48
|
|
|
49
49
|
export const MAX_LOOPBACKS_PER_ROUTE = 2;
|
|
50
50
|
|
|
51
|
+
/**
|
|
52
|
+
* Max times wk-debug can be invoked for the same originating step before the
|
|
53
|
+
* orchestrator surfaces the failure to the user. Tracked separately from
|
|
54
|
+
* standard loopbacks via `LoopbackRecord.kind === "debug"`.
|
|
55
|
+
*/
|
|
56
|
+
export const MAX_DEBUG_ITERATIONS = 2;
|
|
57
|
+
|
|
51
58
|
// ── Staleness ───────────────────────────────────────────────────────
|
|
52
59
|
|
|
53
60
|
/** Threshold (ms) after which an in-progress state is considered stale. */
|
|
@@ -12,6 +12,12 @@ export interface LoopbackRoute {
|
|
|
12
12
|
}
|
|
13
13
|
|
|
14
14
|
export const LOOPBACK_ROUTES: LoopbackRoute[] = [
|
|
15
|
+
{
|
|
16
|
+
from: { phase: "define", step: "spec" },
|
|
17
|
+
triggerOutcome: "revise",
|
|
18
|
+
to: { phase: "define", step: "refine" },
|
|
19
|
+
reason: "Spec found ambiguity — looping back to Refine",
|
|
20
|
+
},
|
|
15
21
|
{
|
|
16
22
|
from: { phase: "plan", step: "audit" },
|
|
17
23
|
triggerOutcome: "revise",
|
|
@@ -32,6 +32,7 @@ const HARD_DEFAULT: ModelTier = "sonnet";
|
|
|
32
32
|
// ── Phase defaults ──────────────────────────────────────────────────
|
|
33
33
|
|
|
34
34
|
export const BY_PHASE: Record<PhaseName, ModelTier> = {
|
|
35
|
+
define: "opus",
|
|
35
36
|
plan: "sonnet",
|
|
36
37
|
build: "sonnet",
|
|
37
38
|
test: "sonnet",
|
|
@@ -43,6 +44,10 @@ export const BY_PHASE: Record<PhaseName, ModelTier> = {
|
|
|
43
44
|
// ── Step-level overrides (phase/step keys) ──────────────────────────
|
|
44
45
|
|
|
45
46
|
export const BY_STEP: Record<string, ModelTier> = {
|
|
47
|
+
// Define — refining a vague ask is reasoning-heavy
|
|
48
|
+
"define/refine": "opus",
|
|
49
|
+
"define/spec": "sonnet",
|
|
50
|
+
|
|
46
51
|
// Plan — research/design-heavy steps benefit from opus
|
|
47
52
|
"plan/clarify": "sonnet",
|
|
48
53
|
"plan/investigate": "opus",
|
|
@@ -63,8 +68,9 @@ export const BY_STEP: Record<string, ModelTier> = {
|
|
|
63
68
|
"build/integration": "sonnet",
|
|
64
69
|
"build/commit": "haiku",
|
|
65
70
|
|
|
66
|
-
// Test — verify is mechanical, e2e/validate need judgment
|
|
71
|
+
// Test — verify is mechanical, browser/e2e/validate need judgment
|
|
67
72
|
"test/verify": "haiku",
|
|
73
|
+
"test/browser": "sonnet",
|
|
68
74
|
"test/e2e": "sonnet",
|
|
69
75
|
"test/validate": "sonnet",
|
|
70
76
|
|