work-kit-cli 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +11 -0
  2. package/cli/src/commands/bootstrap.test.ts +40 -0
  3. package/cli/src/commands/bootstrap.ts +38 -0
  4. package/cli/src/commands/extract.ts +217 -0
  5. package/cli/src/commands/init.test.ts +50 -0
  6. package/cli/src/commands/init.ts +32 -5
  7. package/cli/src/commands/learn.test.ts +217 -0
  8. package/cli/src/commands/learn.ts +104 -0
  9. package/cli/src/commands/next.ts +30 -10
  10. package/cli/src/commands/observe.ts +16 -21
  11. package/cli/src/commands/pause-resume.test.ts +2 -2
  12. package/cli/src/commands/resume.ts +95 -7
  13. package/cli/src/commands/setup.ts +144 -0
  14. package/cli/src/commands/status.ts +2 -0
  15. package/cli/src/commands/workflow.ts +19 -9
  16. package/cli/src/config/constants.ts +10 -0
  17. package/cli/src/config/model-routing.test.ts +190 -0
  18. package/cli/src/config/model-routing.ts +208 -0
  19. package/cli/src/config/workflow.ts +5 -5
  20. package/cli/src/index.ts +70 -5
  21. package/cli/src/observer/data.ts +132 -9
  22. package/cli/src/observer/renderer.ts +34 -36
  23. package/cli/src/observer/watcher.ts +28 -16
  24. package/cli/src/state/schema.ts +50 -3
  25. package/cli/src/state/store.ts +39 -4
  26. package/cli/src/utils/fs.ts +13 -0
  27. package/cli/src/utils/knowledge.ts +471 -0
  28. package/package.json +1 -1
  29. package/skills/auto-kit/SKILL.md +27 -10
  30. package/skills/full-kit/SKILL.md +25 -8
  31. package/skills/resume-kit/SKILL.md +44 -8
  32. package/skills/wk-bootstrap/SKILL.md +6 -0
  33. package/skills/wk-build/SKILL.md +3 -2
  34. package/skills/wk-deploy/SKILL.md +1 -0
  35. package/skills/wk-plan/SKILL.md +3 -2
  36. package/skills/wk-review/SKILL.md +1 -0
  37. package/skills/wk-test/SKILL.md +1 -0
  38. package/skills/wk-test/steps/e2e.md +15 -12
  39. package/skills/wk-wrap-up/SKILL.md +15 -2
  40. package/skills/wk-wrap-up/steps/knowledge.md +76 -0
@@ -1,8 +1,11 @@
1
1
  import * as fs from "node:fs";
2
2
  import * as path from "node:path";
3
3
  import * as readline from "node:readline";
4
+ import { spawnSync } from "node:child_process";
4
5
  import { doctorCommand } from "./doctor.js";
5
6
  import { bold, dim, green, yellow, red, cyan } from "../utils/colors.js";
7
+ import { ensureKnowledgeDir, KNOWLEDGE_DIR, KNOWLEDGE_LOCK } from "../utils/knowledge.js";
8
+ import { ensureGitignored } from "./init.js";
6
9
 
7
10
  const SKILLS_SOURCE = path.resolve(import.meta.dirname, "..", "..", "..", "skills");
8
11
 
@@ -195,6 +198,141 @@ function installHooks(projectDir: string): { added: number; file: string } {
195
198
  return { added: WK_HOOKS.length, file: settingsFile };
196
199
  }
197
200
 
201
+ // ── Playwright detection / install ─────────────────────────────────
202
+ //
203
+ // Work-kit's Test phase requires a real E2E framework. We standardize on
204
+ // Playwright. setup/upgrade detect whether the target project already has
205
+ // it; if not, we offer to install it (and optionally scaffold a config).
206
+
207
+ type PackageManager = "pnpm" | "yarn" | "npm";
208
+
209
+ function detectPackageManager(projectDir: string): PackageManager {
210
+ if (fs.existsSync(path.join(projectDir, "pnpm-lock.yaml"))) return "pnpm";
211
+ if (fs.existsSync(path.join(projectDir, "yarn.lock"))) return "yarn";
212
+ return "npm";
213
+ }
214
+
215
+ function hasPlaywrightInstalled(projectDir: string): boolean {
216
+ const pkgPath = path.join(projectDir, "package.json");
217
+ if (!fs.existsSync(pkgPath)) return false;
218
+ try {
219
+ const pkg = JSON.parse(fs.readFileSync(pkgPath, "utf-8")) as {
220
+ dependencies?: Record<string, string>;
221
+ devDependencies?: Record<string, string>;
222
+ };
223
+ const deps = { ...(pkg.dependencies || {}), ...(pkg.devDependencies || {}) };
224
+ return Boolean(deps["@playwright/test"] || deps["playwright"]);
225
+ } catch {
226
+ return false;
227
+ }
228
+ }
229
+
230
+ function hasPlaywrightConfig(projectDir: string): boolean {
231
+ return ["playwright.config.ts", "playwright.config.js", "playwright.config.mjs", "playwright.config.cjs"]
232
+ .some((f) => fs.existsSync(path.join(projectDir, f)));
233
+ }
234
+
235
+ function runStreamed(cmd: string, args: string[], cwd: string): boolean {
236
+ const result = spawnSync(cmd, args, { cwd, stdio: "inherit" });
237
+ return result.status === 0;
238
+ }
239
+
240
+ function installPlaywrightPackage(pm: PackageManager, projectDir: string): boolean {
241
+ const args =
242
+ pm === "pnpm" ? ["add", "-D", "@playwright/test"] :
243
+ pm === "yarn" ? ["add", "-D", "@playwright/test"] :
244
+ ["install", "-D", "@playwright/test"];
245
+ console.error(` ${dim(`$ ${pm} ${args.join(" ")}`)}`);
246
+ return runStreamed(pm, args, projectDir);
247
+ }
248
+
249
+ function installPlaywrightBrowsers(projectDir: string): boolean {
250
+ // Chromium-only — fastest install, covers most E2E needs.
251
+ console.error(` ${dim("$ npx playwright install chromium")}`);
252
+ return runStreamed("npx", ["playwright", "install", "chromium"], projectDir);
253
+ }
254
+
255
+ function scaffoldPlaywrightConfig(pm: PackageManager, projectDir: string): boolean {
256
+ // `npm init playwright@latest` works regardless of pm (it just runs the create-playwright bin).
257
+ // Yarn/pnpm have their own equivalents but npm init is universally available.
258
+ console.error(` ${dim("$ npm init playwright@latest -- --quiet --browser=chromium --no-examples")}`);
259
+ return runStreamed(
260
+ "npm",
261
+ ["init", "playwright@latest", "--", "--quiet", "--browser=chromium", "--no-examples"],
262
+ projectDir
263
+ );
264
+ }
265
+
266
+ async function ensurePlaywright(projectDir: string): Promise<void> {
267
+ console.error(`\nChecking Playwright (required for work-kit's E2E test step)...`);
268
+
269
+ // Non-Node project — nothing to do.
270
+ if (!fs.existsSync(path.join(projectDir, "package.json"))) {
271
+ console.error(` ${dim("No package.json found — skipping Playwright setup.")}`);
272
+ return;
273
+ }
274
+
275
+ const pm = detectPackageManager(projectDir);
276
+ const installed = hasPlaywrightInstalled(projectDir);
277
+ const configured = hasPlaywrightConfig(projectDir);
278
+
279
+ if (installed && configured) {
280
+ console.error(` ${green("\u2713")} Playwright already installed and configured.`);
281
+ return;
282
+ }
283
+
284
+ if (!installed) {
285
+ const answer = (await promptUser(` Install Playwright (@playwright/test) via ${pm}? [y/N]: `)).toLowerCase();
286
+ if (answer !== "y" && answer !== "yes") {
287
+ console.error(` ${yellow("!")} Skipped. The wk-test E2E step will fail until Playwright is installed.`);
288
+ return;
289
+ }
290
+ if (!installPlaywrightPackage(pm, projectDir)) {
291
+ console.error(` ${red("\u2717")} Failed to install @playwright/test.`);
292
+ return;
293
+ }
294
+ if (!installPlaywrightBrowsers(projectDir)) {
295
+ console.error(` ${red("\u2717")} Failed to install Chromium browser.`);
296
+ return;
297
+ }
298
+ console.error(` ${green("+")} Installed @playwright/test and Chromium.`);
299
+ }
300
+
301
+ if (!hasPlaywrightConfig(projectDir)) {
302
+ const answer = (await promptUser(` No playwright.config found. Scaffold one now? [y/N]: `)).toLowerCase();
303
+ if (answer !== "y" && answer !== "yes") {
304
+ console.error(` ${yellow("!")} Skipped scaffolding. Create a playwright.config.ts before running wk-test.`);
305
+ return;
306
+ }
307
+ if (!scaffoldPlaywrightConfig(pm, projectDir)) {
308
+ console.error(` ${red("\u2717")} Scaffolding failed. Run \`npm init playwright@latest\` manually.`);
309
+ return;
310
+ }
311
+ console.error(` ${green("+")} Playwright config scaffolded.`);
312
+ }
313
+ }
314
+
315
+ // Project knowledge files (lessons/conventions/risks/workflow) are committed
316
+ // to the repo. Only the lockfile is gitignored.
317
+ function setupKnowledgeDir(projectDir: string): void {
318
+ console.error(`\nScaffolding ${KNOWLEDGE_DIR}/ (project knowledge files)...`);
319
+ try {
320
+ const { created } = ensureKnowledgeDir(projectDir);
321
+ if (created.length > 0) {
322
+ for (const f of created) {
323
+ console.error(` ${green("+")} ${KNOWLEDGE_DIR}/${f}`);
324
+ }
325
+ console.error(` ${yellow("!")} ${bold("These files are committed to your repo.")} Don't write secrets in them.`);
326
+ console.error(` ${dim("work-kit redacts known secret shapes at write time, but the regex sweep is best-effort.")}`);
327
+ } else {
328
+ console.error(` ${dim("Already scaffolded.")}`);
329
+ }
330
+ ensureGitignored(projectDir, `${KNOWLEDGE_DIR}/${KNOWLEDGE_LOCK}`);
331
+ } catch (err) {
332
+ console.error(` ${red("\u2717")} ${(err as Error).message}`);
333
+ }
334
+ }
335
+
198
336
  async function promptUser(question: string): Promise<string> {
199
337
  const rl = readline.createInterface({ input: process.stdin, output: process.stderr });
200
338
  return new Promise((resolve) => {
@@ -278,6 +416,12 @@ export async function setupCommand(targetPath?: string): Promise<void> {
278
416
  console.error(` ${red("✗")} ${(err as Error).message}`);
279
417
  }
280
418
 
419
+ // Ensure Playwright is available — wk-test's E2E step requires it.
420
+ await ensurePlaywright(projectDir);
421
+
422
+ // Scaffold the project-level knowledge directory.
423
+ setupKnowledgeDir(projectDir);
424
+
281
425
  // Run doctor against the target project
282
426
  console.error("\nRunning doctor...");
283
427
  const result = doctorCommand(projectDir);
@@ -6,6 +6,7 @@ interface StatusOutput {
6
6
  branch: string;
7
7
  mode: string;
8
8
  classification?: string;
9
+ modelPolicy: string;
9
10
  status: string;
10
11
  currentPhase: string | null;
11
12
  currentStep: string | null;
@@ -40,6 +41,7 @@ export function statusCommand(worktreeRoot?: string): StatusOutput {
40
41
  branch: state.branch,
41
42
  mode: state.mode,
42
43
  ...(state.classification && { classification: state.classification }),
44
+ modelPolicy: state.modelPolicy ?? "auto",
43
45
  status: state.status,
44
46
  currentPhase: state.currentPhase,
45
47
  currentStep: state.currentStep,
@@ -1,11 +1,13 @@
1
1
  import { readState, writeState, findWorktreeRoot } from "../state/store.js";
2
- import { STEPS_BY_PHASE, PHASE_NAMES, MODE_AUTO } from "../state/schema.js";
2
+ import { STEPS_BY_PHASE, PHASE_NAMES, MODE_AUTO, PhaseName } from "../state/schema.js";
3
3
  import { parseLocation } from "../state/helpers.js";
4
+ import { resolveModel } from "../config/model-routing.js";
4
5
  import type { Action } from "../state/schema.js";
5
6
 
6
7
  interface WorkflowStatus {
7
8
  action: "workflow_status";
8
- workflow: { step: string; status: string }[];
9
+ modelPolicy: string;
10
+ workflow: { step: string; status: string; model?: string }[];
9
11
  }
10
12
 
11
13
  export type WorkflowResult = Action | WorkflowStatus;
@@ -113,13 +115,21 @@ export function workflowCommand(opts: {
113
115
  return { action: "wait_for_user", message: `Removed ${opts.remove} from workflow.` };
114
116
  }
115
117
 
116
- // No add/remove — show current workflow
118
+ // No add/remove — show current workflow with resolved model per step
117
119
  const workflow = state.workflow
118
120
  .filter((s) => s.included)
119
- .map((s) => ({
120
- step: `${s.phase}/${s.step}`,
121
- status: state.phases[s.phase]?.steps[s.step]?.status || "unknown",
122
- }));
123
-
124
- return { action: "workflow_status", workflow };
121
+ .map((s) => {
122
+ const model = resolveModel(state, s.phase as PhaseName, s.step);
123
+ return {
124
+ step: `${s.phase}/${s.step}`,
125
+ status: state.phases[s.phase]?.steps[s.step]?.status || "unknown",
126
+ ...(model && { model }),
127
+ };
128
+ });
129
+
130
+ return {
131
+ action: "workflow_status",
132
+ modelPolicy: state.modelPolicy ?? "auto",
133
+ workflow,
134
+ };
125
135
  }
@@ -34,6 +34,16 @@ export const CLI_NPX_BINARY = "npx work-kit-cli";
34
34
  */
35
35
  export const PROJECT_CONFIG_FILE = ".work-kit-config.json";
36
36
 
37
+ // ── Knowledge ───────────────────────────────────────────────────────
38
+
39
+ /**
40
+ * Project knowledge directory at the main repo root. Holds curated
41
+ * lessons/conventions/risks/workflow files. Committed to git; only
42
+ * KNOWLEDGE_LOCK is gitignored.
43
+ */
44
+ export const KNOWLEDGE_DIR = ".work-kit-knowledge";
45
+ export const KNOWLEDGE_LOCK = ".lock";
46
+
37
47
  // ── Limits ──────────────────────────────────────────────────────────
38
48
 
39
49
  export const MAX_LOOPBACKS_PER_ROUTE = 2;
@@ -0,0 +1,190 @@
1
+ import { describe, it, afterEach } from "node:test";
2
+ import * as assert from "node:assert/strict";
3
+ import * as fs from "node:fs";
4
+ import * as path from "node:path";
5
+ import * as os from "node:os";
6
+ import { randomUUID } from "node:crypto";
7
+ import { resolveModel, BY_PHASE, BY_STEP } from "./model-routing.js";
8
+ import type { WorkKitState, ModelPolicy, Classification } from "../state/schema.js";
9
+
10
+ function makeTmpDir(): string {
11
+ const dir = path.join(os.tmpdir(), `wk-model-routing-${randomUUID()}`);
12
+ fs.mkdirSync(path.join(dir, ".work-kit"), { recursive: true });
13
+ return dir;
14
+ }
15
+
16
+ let tmpDirs: string[] = [];
17
+
18
+ afterEach(() => {
19
+ for (const dir of tmpDirs) fs.rmSync(dir, { recursive: true, force: true });
20
+ tmpDirs = [];
21
+ });
22
+
23
+ function fakeState(opts: {
24
+ worktreeRoot: string;
25
+ policy?: ModelPolicy;
26
+ classification?: Classification;
27
+ mode?: "auto-kit" | "full-kit";
28
+ }): Pick<WorkKitState, "modelPolicy" | "classification" | "mode"> & { metadata: { worktreeRoot: string } } {
29
+ return {
30
+ modelPolicy: opts.policy,
31
+ classification: opts.classification,
32
+ mode: (opts.mode ?? "full-kit") as any,
33
+ metadata: { worktreeRoot: opts.worktreeRoot },
34
+ };
35
+ }
36
+
37
+ describe("resolveModel — defaults", () => {
38
+ it("uses step default when no policy or override", () => {
39
+ const tmp = makeTmpDir(); tmpDirs.push(tmp);
40
+ const state = fakeState({ worktreeRoot: tmp });
41
+ assert.equal(resolveModel(state, "plan", "investigate"), "opus");
42
+ assert.equal(resolveModel(state, "build", "commit"), "haiku");
43
+ assert.equal(resolveModel(state, "review", "security"), "opus");
44
+ });
45
+
46
+ it("falls back to phase default for unknown step", () => {
47
+ const tmp = makeTmpDir(); tmpDirs.push(tmp);
48
+ const state = fakeState({ worktreeRoot: tmp });
49
+ // Pick a phase/step that isn't in BY_STEP
50
+ const key = "plan/__nonexistent__";
51
+ assert.ok(!(key in BY_STEP));
52
+ assert.equal(resolveModel(state, "plan", "__nonexistent__"), BY_PHASE.plan);
53
+ });
54
+ });
55
+
56
+ describe("resolveModel — session policy", () => {
57
+ it("policy=opus forces opus for every step, even mechanical ones", () => {
58
+ const tmp = makeTmpDir(); tmpDirs.push(tmp);
59
+ const state = fakeState({ worktreeRoot: tmp, policy: "opus" });
60
+ assert.equal(resolveModel(state, "build", "commit"), "opus");
61
+ assert.equal(resolveModel(state, "deploy", "monitor"), "opus");
62
+ assert.equal(resolveModel(state, "plan", "investigate"), "opus");
63
+ });
64
+
65
+ it("policy=haiku forces haiku everywhere", () => {
66
+ const tmp = makeTmpDir(); tmpDirs.push(tmp);
67
+ const state = fakeState({ worktreeRoot: tmp, policy: "haiku" });
68
+ assert.equal(resolveModel(state, "plan", "investigate"), "haiku");
69
+ assert.equal(resolveModel(state, "review", "security"), "haiku");
70
+ });
71
+
72
+ it("policy=inherit returns undefined so no model is passed", () => {
73
+ const tmp = makeTmpDir(); tmpDirs.push(tmp);
74
+ const state = fakeState({ worktreeRoot: tmp, policy: "inherit" });
75
+ assert.equal(resolveModel(state, "plan", "investigate"), undefined);
76
+ assert.equal(resolveModel(state, "build", "core"), undefined);
77
+ assert.equal(resolveModel(state, "deploy", "merge"), undefined);
78
+ });
79
+
80
+ it("policy=auto is equivalent to omitting it", () => {
81
+ const tmp = makeTmpDir(); tmpDirs.push(tmp);
82
+ const autoState = fakeState({ worktreeRoot: tmp, policy: "auto" });
83
+ const unsetState = fakeState({ worktreeRoot: tmp });
84
+ assert.equal(
85
+ resolveModel(autoState, "plan", "investigate"),
86
+ resolveModel(unsetState, "plan", "investigate")
87
+ );
88
+ });
89
+ });
90
+
91
+ describe("resolveModel — classification", () => {
92
+ it("small-change knocks plan/investigate down to haiku in auto-kit mode", () => {
93
+ const tmp = makeTmpDir(); tmpDirs.push(tmp);
94
+ const state = fakeState({
95
+ worktreeRoot: tmp,
96
+ classification: "small-change",
97
+ mode: "auto-kit",
98
+ });
99
+ assert.equal(resolveModel(state, "plan", "investigate"), "haiku");
100
+ });
101
+
102
+ it("bug-fix keeps plan/investigate on opus (not in its override map)", () => {
103
+ const tmp = makeTmpDir(); tmpDirs.push(tmp);
104
+ const state = fakeState({
105
+ worktreeRoot: tmp,
106
+ classification: "bug-fix",
107
+ mode: "auto-kit",
108
+ });
109
+ assert.equal(resolveModel(state, "plan", "investigate"), "opus");
110
+ assert.equal(resolveModel(state, "plan", "blueprint"), "sonnet");
111
+ });
112
+
113
+ it("refactor promotes review/performance to opus", () => {
114
+ const tmp = makeTmpDir(); tmpDirs.push(tmp);
115
+ const state = fakeState({
116
+ worktreeRoot: tmp,
117
+ classification: "refactor",
118
+ mode: "auto-kit",
119
+ });
120
+ assert.equal(resolveModel(state, "review", "performance"), "opus");
121
+ });
122
+
123
+ it("classification overrides are ignored in full-kit mode", () => {
124
+ const tmp = makeTmpDir(); tmpDirs.push(tmp);
125
+ const state = fakeState({
126
+ worktreeRoot: tmp,
127
+ classification: "small-change",
128
+ mode: "full-kit",
129
+ });
130
+ assert.equal(resolveModel(state, "plan", "investigate"), "opus");
131
+ });
132
+
133
+ it("session policy beats classification override", () => {
134
+ const tmp = makeTmpDir(); tmpDirs.push(tmp);
135
+ const state = fakeState({
136
+ worktreeRoot: tmp,
137
+ classification: "small-change",
138
+ mode: "auto-kit",
139
+ policy: "opus",
140
+ });
141
+ assert.equal(resolveModel(state, "plan", "investigate"), "opus");
142
+ });
143
+ });
144
+
145
+ describe("resolveModel — workspace JSON override", () => {
146
+ it("workspace model-config.json beats session policy", () => {
147
+ const tmp = makeTmpDir(); tmpDirs.push(tmp);
148
+ fs.writeFileSync(
149
+ path.join(tmp, ".work-kit", "model-config.json"),
150
+ JSON.stringify({ "build/commit": "sonnet" })
151
+ );
152
+ const state = fakeState({ worktreeRoot: tmp, policy: "opus" });
153
+ assert.equal(resolveModel(state, "build", "commit"), "sonnet");
154
+ // Other steps still forced to opus by the policy
155
+ assert.equal(resolveModel(state, "plan", "investigate"), "opus");
156
+ });
157
+
158
+ it("workspace JSON beats step default", () => {
159
+ const tmp = makeTmpDir(); tmpDirs.push(tmp);
160
+ fs.writeFileSync(
161
+ path.join(tmp, ".work-kit", "model-config.json"),
162
+ JSON.stringify({ "plan/investigate": "haiku" })
163
+ );
164
+ const state = fakeState({ worktreeRoot: tmp });
165
+ assert.equal(resolveModel(state, "plan", "investigate"), "haiku");
166
+ });
167
+
168
+ it("malformed JSON falls back silently to defaults", () => {
169
+ const tmp = makeTmpDir(); tmpDirs.push(tmp);
170
+ fs.writeFileSync(
171
+ path.join(tmp, ".work-kit", "model-config.json"),
172
+ "{not json"
173
+ );
174
+ const state = fakeState({ worktreeRoot: tmp });
175
+ assert.equal(resolveModel(state, "plan", "investigate"), "opus");
176
+ });
177
+
178
+ it("invalid tier values in JSON are ignored", () => {
179
+ const tmp = makeTmpDir(); tmpDirs.push(tmp);
180
+ fs.writeFileSync(
181
+ path.join(tmp, ".work-kit", "model-config.json"),
182
+ JSON.stringify({ "plan/investigate": "turbo", "build/core": "opus" })
183
+ );
184
+ const state = fakeState({ worktreeRoot: tmp });
185
+ // Bad value ignored → falls back to step default
186
+ assert.equal(resolveModel(state, "plan", "investigate"), "opus");
187
+ // Good value applied
188
+ assert.equal(resolveModel(state, "build", "core"), "opus");
189
+ });
190
+ });
@@ -0,0 +1,208 @@
1
+ import * as fs from "node:fs";
2
+ import * as path from "node:path";
3
+ import * as os from "node:os";
4
+ import {
5
+ ModelTier,
6
+ PhaseName,
7
+ WorkKitState,
8
+ Classification,
9
+ isModelTier,
10
+ } from "../state/schema.js";
11
+ import { STATE_DIR } from "../state/store.js";
12
+
13
+ /**
14
+ * Per-phase/step model routing.
15
+ *
16
+ * Resolution order (highest precedence first):
17
+ * 1. Workspace override .work-kit/model-config.json (per-session, per-step map)
18
+ * 2. User global override ~/.claude/work-kit/models.json (per-user, all projects)
19
+ * 3. Session model policy state.modelPolicy (set at init via skill flag)
20
+ * 4. Classification BY_CLASSIFICATION (auto-kit only)
21
+ * 5. Step default BY_STEP
22
+ * 6. Phase default BY_PHASE
23
+ * 7. Hard default "sonnet"
24
+ *
25
+ * When state.modelPolicy is "inherit" (or layered overrides have not set a
26
+ * value), resolveModel() returns `undefined` so the orchestrator skill omits
27
+ * the `model` parameter on the Agent tool — identical to pre-routing behavior.
28
+ */
29
+
30
+ const HARD_DEFAULT: ModelTier = "sonnet";
31
+
32
+ // ── Phase defaults ──────────────────────────────────────────────────
33
+
34
+ export const BY_PHASE: Record<PhaseName, ModelTier> = {
35
+ plan: "sonnet",
36
+ build: "sonnet",
37
+ test: "sonnet",
38
+ review: "sonnet",
39
+ deploy: "haiku",
40
+ "wrap-up": "sonnet",
41
+ };
42
+
43
+ // ── Step-level overrides (phase/step keys) ──────────────────────────
44
+
45
+ export const BY_STEP: Record<string, ModelTier> = {
46
+ // Plan — research/design-heavy steps benefit from opus
47
+ "plan/clarify": "sonnet",
48
+ "plan/investigate": "opus",
49
+ "plan/sketch": "sonnet",
50
+ "plan/scope": "sonnet",
51
+ "plan/ux-flow": "sonnet",
52
+ "plan/architecture": "opus",
53
+ "plan/blueprint": "opus",
54
+ "plan/audit": "opus",
55
+
56
+ // Build — mechanical steps drop to haiku, implementation stays sonnet
57
+ "build/setup": "haiku",
58
+ "build/migration": "sonnet",
59
+ "build/red": "sonnet",
60
+ "build/core": "sonnet",
61
+ "build/ui": "sonnet",
62
+ "build/refactor": "sonnet",
63
+ "build/integration": "sonnet",
64
+ "build/commit": "haiku",
65
+
66
+ // Test — verify is mechanical, e2e/validate need judgment
67
+ "test/verify": "haiku",
68
+ "test/e2e": "sonnet",
69
+ "test/validate": "sonnet",
70
+
71
+ // Review — security & compliance get opus; rest sonnet
72
+ "review/self-review": "sonnet",
73
+ "review/security": "opus",
74
+ "review/performance": "sonnet",
75
+ "review/compliance": "opus",
76
+ "review/handoff": "sonnet",
77
+
78
+ // Deploy — mostly mechanical
79
+ "deploy/merge": "haiku",
80
+ "deploy/monitor": "haiku",
81
+ "deploy/remediate": "sonnet",
82
+
83
+ // Wrap-up — synthesis
84
+ "wrap-up/summary": "sonnet",
85
+ };
86
+
87
+ // ── Classification overrides (auto-kit only) ────────────────────────
88
+
89
+ export const BY_CLASSIFICATION: Record<Classification, Partial<Record<string, ModelTier>>> = {
90
+ "small-change": {
91
+ // Trivial work: knock plan and reviews down a tier
92
+ "plan/clarify": "haiku",
93
+ "plan/investigate": "haiku",
94
+ "plan/sketch": "haiku",
95
+ "plan/scope": "haiku",
96
+ "plan/ux-flow": "haiku",
97
+ "plan/architecture": "haiku",
98
+ "plan/blueprint": "haiku",
99
+ "plan/audit": "haiku",
100
+ "review/security": "sonnet",
101
+ "review/compliance": "sonnet",
102
+ },
103
+ "bug-fix": {
104
+ // Bug fixes still need opus for investigate; rest can relax
105
+ "plan/clarify": "sonnet",
106
+ "plan/sketch": "sonnet",
107
+ "plan/scope": "sonnet",
108
+ "plan/architecture": "sonnet",
109
+ "plan/blueprint": "sonnet",
110
+ "plan/audit": "sonnet",
111
+ },
112
+ refactor: {
113
+ // Perf review matters most for refactors — promote it
114
+ "review/performance": "opus",
115
+ },
116
+ feature: {},
117
+ "large-feature": {},
118
+ };
119
+
120
+ // ── JSON override loading ───────────────────────────────────────────
121
+
122
+ type OverrideMap = Partial<Record<string, ModelTier>>;
123
+
124
+ interface LoadedOverrides {
125
+ workspace: OverrideMap;
126
+ userGlobal: OverrideMap;
127
+ }
128
+
129
+ /**
130
+ * Read+validate the optional JSON override files. Silently returns empty
131
+ * maps on any read/parse/validation error — overrides are strictly opt-in
132
+ * and must never block the workflow.
133
+ */
134
+ export function loadOverrides(worktreeRoot: string): LoadedOverrides {
135
+ return {
136
+ workspace: readJsonMap(path.join(worktreeRoot, STATE_DIR, "model-config.json")),
137
+ userGlobal: readJsonMap(path.join(os.homedir(), ".claude", "work-kit", "models.json")),
138
+ };
139
+ }
140
+
141
+ function readJsonMap(filePath: string): OverrideMap {
142
+ try {
143
+ if (!fs.existsSync(filePath)) return {};
144
+ const raw = fs.readFileSync(filePath, "utf-8");
145
+ const parsed = JSON.parse(raw);
146
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return {};
147
+ const out: OverrideMap = {};
148
+ for (const [key, value] of Object.entries(parsed)) {
149
+ if (typeof value === "string" && isModelTier(value)) {
150
+ out[key] = value;
151
+ }
152
+ }
153
+ return out;
154
+ } catch {
155
+ return {};
156
+ }
157
+ }
158
+
159
+ // ── Resolution ──────────────────────────────────────────────────────
160
+
161
+ /**
162
+ * Resolve the model tier for a given phase/step in a given session.
163
+ *
164
+ * Returns `undefined` when the session policy is "inherit" (or when an
165
+ * override file explicitly maps to inherit via a missing entry — this does
166
+ * not happen today but stays consistent with "no opinion" semantics).
167
+ *
168
+ * Callers treat `undefined` as "do not pass a model parameter to the Agent
169
+ * tool" — identical to pre-routing behavior.
170
+ */
171
+ export function resolveModel(
172
+ state: Pick<WorkKitState, "modelPolicy" | "classification" | "mode"> & { metadata: { worktreeRoot: string } },
173
+ phase: PhaseName,
174
+ step: string
175
+ ): ModelTier | undefined {
176
+ const key = `${phase}/${step}`;
177
+ const policy = state.modelPolicy ?? "auto";
178
+
179
+ // Policy "inherit" short-circuits everything: no model override at all.
180
+ if (policy === "inherit") {
181
+ return undefined;
182
+ }
183
+
184
+ // Layers 1 & 2: JSON overrides win over everything else.
185
+ const overrides = loadOverrides(state.metadata.worktreeRoot);
186
+ if (overrides.workspace[key]) return overrides.workspace[key];
187
+ if (overrides.userGlobal[key]) return overrides.userGlobal[key];
188
+
189
+ // Layer 3: Forced policy (opus/sonnet/haiku) beats all routing.
190
+ if (policy !== "auto") {
191
+ return policy;
192
+ }
193
+
194
+ // Layer 4: Classification override (auto-kit only).
195
+ if (state.mode === "auto-kit" && state.classification) {
196
+ const classOverride = BY_CLASSIFICATION[state.classification][key];
197
+ if (classOverride) return classOverride;
198
+ }
199
+
200
+ // Layer 5: Step default.
201
+ if (BY_STEP[key]) return BY_STEP[key];
202
+
203
+ // Layer 6: Phase default.
204
+ if (BY_PHASE[phase]) return BY_PHASE[phase];
205
+
206
+ // Layer 7: Hard default.
207
+ return HARD_DEFAULT;
208
+ }