work-kit-cli 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -0
- package/cli/src/commands/bootstrap.test.ts +40 -0
- package/cli/src/commands/bootstrap.ts +38 -0
- package/cli/src/commands/extract.ts +207 -0
- package/cli/src/commands/init.test.ts +50 -0
- package/cli/src/commands/init.ts +32 -5
- package/cli/src/commands/learn.test.ts +244 -0
- package/cli/src/commands/learn.ts +104 -0
- package/cli/src/commands/next.ts +30 -10
- package/cli/src/commands/observe.ts +16 -21
- package/cli/src/commands/pause-resume.test.ts +2 -2
- package/cli/src/commands/resume.ts +95 -7
- package/cli/src/commands/setup.ts +160 -0
- package/cli/src/commands/status.ts +2 -0
- package/cli/src/commands/workflow.ts +19 -9
- package/cli/src/config/constants.ts +10 -0
- package/cli/src/config/model-routing.test.ts +190 -0
- package/cli/src/config/model-routing.ts +208 -0
- package/cli/src/config/workflow.ts +5 -5
- package/cli/src/index.ts +70 -5
- package/cli/src/observer/data.ts +132 -9
- package/cli/src/observer/renderer.ts +34 -36
- package/cli/src/observer/watcher.ts +28 -16
- package/cli/src/state/schema.ts +50 -3
- package/cli/src/state/store.ts +39 -4
- package/cli/src/utils/fs.ts +13 -0
- package/cli/src/utils/knowledge.ts +471 -0
- package/package.json +1 -1
- package/skills/auto-kit/SKILL.md +27 -10
- package/skills/full-kit/SKILL.md +25 -8
- package/skills/resume-kit/SKILL.md +44 -8
- package/skills/wk-bootstrap/SKILL.md +6 -0
- package/skills/wk-build/SKILL.md +3 -2
- package/skills/wk-deploy/SKILL.md +1 -0
- package/skills/wk-plan/SKILL.md +3 -2
- package/skills/wk-review/SKILL.md +1 -0
- package/skills/wk-test/SKILL.md +1 -0
- package/skills/wk-test/steps/e2e.md +15 -12
- package/skills/wk-wrap-up/SKILL.md +15 -2
- package/skills/wk-wrap-up/steps/knowledge.md +76 -0
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
import * as fs from "node:fs";
|
|
2
2
|
import * as path from "node:path";
|
|
3
3
|
import * as readline from "node:readline";
|
|
4
|
+
import { spawnSync } from "node:child_process";
|
|
4
5
|
import { doctorCommand } from "./doctor.js";
|
|
5
6
|
import { bold, dim, green, yellow, red, cyan } from "../utils/colors.js";
|
|
7
|
+
import { ensureKnowledgeDir, KNOWLEDGE_DIR, KNOWLEDGE_LOCK } from "../utils/knowledge.js";
|
|
8
|
+
import { ensureGitignored } from "./init.js";
|
|
6
9
|
|
|
7
10
|
const SKILLS_SOURCE = path.resolve(import.meta.dirname, "..", "..", "..", "skills");
|
|
8
11
|
|
|
@@ -195,6 +198,157 @@ function installHooks(projectDir: string): { added: number; file: string } {
|
|
|
195
198
|
return { added: WK_HOOKS.length, file: settingsFile };
|
|
196
199
|
}
|
|
197
200
|
|
|
201
|
+
// ── Playwright detection / install ─────────────────────────────────
|
|
202
|
+
//
|
|
203
|
+
// Work-kit's Test phase requires a real E2E framework. We standardize on
|
|
204
|
+
// Playwright. setup/upgrade detect whether the target project already has
|
|
205
|
+
// it; if not, we offer to install it (and optionally scaffold a config).
|
|
206
|
+
|
|
207
|
+
type PackageManager = "pnpm" | "yarn" | "npm";
|
|
208
|
+
|
|
209
|
+
function detectPackageManager(projectDir: string): PackageManager {
|
|
210
|
+
if (fs.existsSync(path.join(projectDir, "pnpm-lock.yaml"))) return "pnpm";
|
|
211
|
+
if (fs.existsSync(path.join(projectDir, "yarn.lock"))) return "yarn";
|
|
212
|
+
return "npm";
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
function hasPlaywrightInstalled(projectDir: string): boolean {
|
|
216
|
+
const pkgPath = path.join(projectDir, "package.json");
|
|
217
|
+
if (!fs.existsSync(pkgPath)) return false;
|
|
218
|
+
try {
|
|
219
|
+
const pkg = JSON.parse(fs.readFileSync(pkgPath, "utf-8")) as {
|
|
220
|
+
dependencies?: Record<string, string>;
|
|
221
|
+
devDependencies?: Record<string, string>;
|
|
222
|
+
};
|
|
223
|
+
const deps = { ...(pkg.dependencies || {}), ...(pkg.devDependencies || {}) };
|
|
224
|
+
return Boolean(deps["@playwright/test"] || deps["playwright"]);
|
|
225
|
+
} catch {
|
|
226
|
+
return false;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function hasPlaywrightConfig(projectDir: string): boolean {
|
|
231
|
+
return ["playwright.config.ts", "playwright.config.js", "playwright.config.mjs", "playwright.config.cjs"]
|
|
232
|
+
.some((f) => fs.existsSync(path.join(projectDir, f)));
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function runStreamed(cmd: string, args: string[], cwd: string): boolean {
|
|
236
|
+
const result = spawnSync(cmd, args, { cwd, stdio: "inherit" });
|
|
237
|
+
return result.status === 0;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
function installPlaywrightPackage(pm: PackageManager, projectDir: string): boolean {
|
|
241
|
+
const args =
|
|
242
|
+
pm === "pnpm" ? ["add", "-D", "@playwright/test"] :
|
|
243
|
+
pm === "yarn" ? ["add", "-D", "@playwright/test"] :
|
|
244
|
+
["install", "-D", "@playwright/test"];
|
|
245
|
+
console.error(` ${dim(`$ ${pm} ${args.join(" ")}`)}`);
|
|
246
|
+
if (runStreamed(pm, args, projectDir)) return true;
|
|
247
|
+
|
|
248
|
+
// The most common npm failure here is ERESOLVE — the user's project has
|
|
249
|
+
// a pre-existing peer-dep conflict that npm refuses to resolve. Retry with
|
|
250
|
+
// --legacy-peer-deps so Playwright still installs; the user's underlying
|
|
251
|
+
// conflict is left for them to fix separately.
|
|
252
|
+
if (pm === "npm") {
|
|
253
|
+
console.error(` ${yellow("!")} npm install failed (likely peer-dep conflict). Retrying with --legacy-peer-deps...`);
|
|
254
|
+
const fallbackArgs = [...args, "--legacy-peer-deps"];
|
|
255
|
+
console.error(` ${dim(`$ ${pm} ${fallbackArgs.join(" ")}`)}`);
|
|
256
|
+
if (runStreamed(pm, fallbackArgs, projectDir)) {
|
|
257
|
+
console.error(` ${dim("Note: installed with --legacy-peer-deps. Your project still has the original peer-dep conflict — fix it separately when convenient.")}`);
|
|
258
|
+
return true;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
return false;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
function installPlaywrightBrowsers(projectDir: string): boolean {
|
|
266
|
+
// Chromium-only — fastest install, covers most E2E needs.
|
|
267
|
+
console.error(` ${dim("$ npx playwright install chromium")}`);
|
|
268
|
+
return runStreamed("npx", ["playwright", "install", "chromium"], projectDir);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
function scaffoldPlaywrightConfig(pm: PackageManager, projectDir: string): boolean {
|
|
272
|
+
// `npm init playwright@latest` works regardless of pm (it just runs the create-playwright bin).
|
|
273
|
+
// Yarn/pnpm have their own equivalents but npm init is universally available.
|
|
274
|
+
console.error(` ${dim("$ npm init playwright@latest -- --quiet --browser=chromium --no-examples")}`);
|
|
275
|
+
return runStreamed(
|
|
276
|
+
"npm",
|
|
277
|
+
["init", "playwright@latest", "--", "--quiet", "--browser=chromium", "--no-examples"],
|
|
278
|
+
projectDir
|
|
279
|
+
);
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
async function ensurePlaywright(projectDir: string): Promise<void> {
|
|
283
|
+
console.error(`\nChecking Playwright (required for work-kit's E2E test step)...`);
|
|
284
|
+
|
|
285
|
+
// Non-Node project — nothing to do.
|
|
286
|
+
if (!fs.existsSync(path.join(projectDir, "package.json"))) {
|
|
287
|
+
console.error(` ${dim("No package.json found — skipping Playwright setup.")}`);
|
|
288
|
+
return;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
const pm = detectPackageManager(projectDir);
|
|
292
|
+
const installed = hasPlaywrightInstalled(projectDir);
|
|
293
|
+
const configured = hasPlaywrightConfig(projectDir);
|
|
294
|
+
|
|
295
|
+
if (installed && configured) {
|
|
296
|
+
console.error(` ${green("\u2713")} Playwright already installed and configured.`);
|
|
297
|
+
return;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
if (!installed) {
|
|
301
|
+
const answer = (await promptUser(` Install Playwright (@playwright/test) via ${pm}? [y/N]: `)).toLowerCase();
|
|
302
|
+
if (answer !== "y" && answer !== "yes") {
|
|
303
|
+
console.error(` ${yellow("!")} Skipped. The wk-test E2E step will fail until Playwright is installed.`);
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
if (!installPlaywrightPackage(pm, projectDir)) {
|
|
307
|
+
console.error(` ${red("\u2717")} Failed to install @playwright/test.`);
|
|
308
|
+
return;
|
|
309
|
+
}
|
|
310
|
+
if (!installPlaywrightBrowsers(projectDir)) {
|
|
311
|
+
console.error(` ${red("\u2717")} Failed to install Chromium browser.`);
|
|
312
|
+
return;
|
|
313
|
+
}
|
|
314
|
+
console.error(` ${green("+")} Installed @playwright/test and Chromium.`);
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
if (!hasPlaywrightConfig(projectDir)) {
|
|
318
|
+
const answer = (await promptUser(` No playwright.config found. Scaffold one now? [y/N]: `)).toLowerCase();
|
|
319
|
+
if (answer !== "y" && answer !== "yes") {
|
|
320
|
+
console.error(` ${yellow("!")} Skipped scaffolding. Create a playwright.config.ts before running wk-test.`);
|
|
321
|
+
return;
|
|
322
|
+
}
|
|
323
|
+
if (!scaffoldPlaywrightConfig(pm, projectDir)) {
|
|
324
|
+
console.error(` ${red("\u2717")} Scaffolding failed. Run \`npm init playwright@latest\` manually.`);
|
|
325
|
+
return;
|
|
326
|
+
}
|
|
327
|
+
console.error(` ${green("+")} Playwright config scaffolded.`);
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// Project knowledge files (lessons/conventions/risks/workflow) are committed
|
|
332
|
+
// to the repo. Only the lockfile is gitignored.
|
|
333
|
+
function setupKnowledgeDir(projectDir: string): void {
|
|
334
|
+
console.error(`\nScaffolding ${KNOWLEDGE_DIR}/ (project knowledge files)...`);
|
|
335
|
+
try {
|
|
336
|
+
const { created } = ensureKnowledgeDir(projectDir);
|
|
337
|
+
if (created.length > 0) {
|
|
338
|
+
for (const f of created) {
|
|
339
|
+
console.error(` ${green("+")} ${KNOWLEDGE_DIR}/${f}`);
|
|
340
|
+
}
|
|
341
|
+
console.error(` ${yellow("!")} ${bold("These files are committed to your repo.")} Don't write secrets in them.`);
|
|
342
|
+
console.error(` ${dim("work-kit redacts known secret shapes at write time, but the regex sweep is best-effort.")}`);
|
|
343
|
+
} else {
|
|
344
|
+
console.error(` ${dim("Already scaffolded.")}`);
|
|
345
|
+
}
|
|
346
|
+
ensureGitignored(projectDir, `${KNOWLEDGE_DIR}/${KNOWLEDGE_LOCK}`);
|
|
347
|
+
} catch (err) {
|
|
348
|
+
console.error(` ${red("\u2717")} ${(err as Error).message}`);
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
|
|
198
352
|
async function promptUser(question: string): Promise<string> {
|
|
199
353
|
const rl = readline.createInterface({ input: process.stdin, output: process.stderr });
|
|
200
354
|
return new Promise((resolve) => {
|
|
@@ -278,6 +432,12 @@ export async function setupCommand(targetPath?: string): Promise<void> {
|
|
|
278
432
|
console.error(` ${red("✗")} ${(err as Error).message}`);
|
|
279
433
|
}
|
|
280
434
|
|
|
435
|
+
// Ensure Playwright is available — wk-test's E2E step requires it.
|
|
436
|
+
await ensurePlaywright(projectDir);
|
|
437
|
+
|
|
438
|
+
// Scaffold the project-level knowledge directory.
|
|
439
|
+
setupKnowledgeDir(projectDir);
|
|
440
|
+
|
|
281
441
|
// Run doctor against the target project
|
|
282
442
|
console.error("\nRunning doctor...");
|
|
283
443
|
const result = doctorCommand(projectDir);
|
|
@@ -6,6 +6,7 @@ interface StatusOutput {
|
|
|
6
6
|
branch: string;
|
|
7
7
|
mode: string;
|
|
8
8
|
classification?: string;
|
|
9
|
+
modelPolicy: string;
|
|
9
10
|
status: string;
|
|
10
11
|
currentPhase: string | null;
|
|
11
12
|
currentStep: string | null;
|
|
@@ -40,6 +41,7 @@ export function statusCommand(worktreeRoot?: string): StatusOutput {
|
|
|
40
41
|
branch: state.branch,
|
|
41
42
|
mode: state.mode,
|
|
42
43
|
...(state.classification && { classification: state.classification }),
|
|
44
|
+
modelPolicy: state.modelPolicy ?? "auto",
|
|
43
45
|
status: state.status,
|
|
44
46
|
currentPhase: state.currentPhase,
|
|
45
47
|
currentStep: state.currentStep,
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
import { readState, writeState, findWorktreeRoot } from "../state/store.js";
|
|
2
|
-
import { STEPS_BY_PHASE, PHASE_NAMES, MODE_AUTO } from "../state/schema.js";
|
|
2
|
+
import { STEPS_BY_PHASE, PHASE_NAMES, MODE_AUTO, PhaseName } from "../state/schema.js";
|
|
3
3
|
import { parseLocation } from "../state/helpers.js";
|
|
4
|
+
import { resolveModel } from "../config/model-routing.js";
|
|
4
5
|
import type { Action } from "../state/schema.js";
|
|
5
6
|
|
|
6
7
|
interface WorkflowStatus {
|
|
7
8
|
action: "workflow_status";
|
|
8
|
-
|
|
9
|
+
modelPolicy: string;
|
|
10
|
+
workflow: { step: string; status: string; model?: string }[];
|
|
9
11
|
}
|
|
10
12
|
|
|
11
13
|
export type WorkflowResult = Action | WorkflowStatus;
|
|
@@ -113,13 +115,21 @@ export function workflowCommand(opts: {
|
|
|
113
115
|
return { action: "wait_for_user", message: `Removed ${opts.remove} from workflow.` };
|
|
114
116
|
}
|
|
115
117
|
|
|
116
|
-
// No add/remove — show current workflow
|
|
118
|
+
// No add/remove — show current workflow with resolved model per step
|
|
117
119
|
const workflow = state.workflow
|
|
118
120
|
.filter((s) => s.included)
|
|
119
|
-
.map((s) =>
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
121
|
+
.map((s) => {
|
|
122
|
+
const model = resolveModel(state, s.phase as PhaseName, s.step);
|
|
123
|
+
return {
|
|
124
|
+
step: `${s.phase}/${s.step}`,
|
|
125
|
+
status: state.phases[s.phase]?.steps[s.step]?.status || "unknown",
|
|
126
|
+
...(model && { model }),
|
|
127
|
+
};
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
return {
|
|
131
|
+
action: "workflow_status",
|
|
132
|
+
modelPolicy: state.modelPolicy ?? "auto",
|
|
133
|
+
workflow,
|
|
134
|
+
};
|
|
125
135
|
}
|
|
@@ -34,6 +34,16 @@ export const CLI_NPX_BINARY = "npx work-kit-cli";
|
|
|
34
34
|
*/
|
|
35
35
|
export const PROJECT_CONFIG_FILE = ".work-kit-config.json";
|
|
36
36
|
|
|
37
|
+
// ── Knowledge ───────────────────────────────────────────────────────
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Project knowledge directory at the main repo root. Holds curated
|
|
41
|
+
* lessons/conventions/risks/workflow files. Committed to git; only
|
|
42
|
+
* KNOWLEDGE_LOCK is gitignored.
|
|
43
|
+
*/
|
|
44
|
+
export const KNOWLEDGE_DIR = ".work-kit-knowledge";
|
|
45
|
+
export const KNOWLEDGE_LOCK = ".lock";
|
|
46
|
+
|
|
37
47
|
// ── Limits ──────────────────────────────────────────────────────────
|
|
38
48
|
|
|
39
49
|
export const MAX_LOOPBACKS_PER_ROUTE = 2;
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
import { describe, it, afterEach } from "node:test";
|
|
2
|
+
import * as assert from "node:assert/strict";
|
|
3
|
+
import * as fs from "node:fs";
|
|
4
|
+
import * as path from "node:path";
|
|
5
|
+
import * as os from "node:os";
|
|
6
|
+
import { randomUUID } from "node:crypto";
|
|
7
|
+
import { resolveModel, BY_PHASE, BY_STEP } from "./model-routing.js";
|
|
8
|
+
import type { WorkKitState, ModelPolicy, Classification } from "../state/schema.js";
|
|
9
|
+
|
|
10
|
+
function makeTmpDir(): string {
|
|
11
|
+
const dir = path.join(os.tmpdir(), `wk-model-routing-${randomUUID()}`);
|
|
12
|
+
fs.mkdirSync(path.join(dir, ".work-kit"), { recursive: true });
|
|
13
|
+
return dir;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
let tmpDirs: string[] = [];
|
|
17
|
+
|
|
18
|
+
afterEach(() => {
|
|
19
|
+
for (const dir of tmpDirs) fs.rmSync(dir, { recursive: true, force: true });
|
|
20
|
+
tmpDirs = [];
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
function fakeState(opts: {
|
|
24
|
+
worktreeRoot: string;
|
|
25
|
+
policy?: ModelPolicy;
|
|
26
|
+
classification?: Classification;
|
|
27
|
+
mode?: "auto-kit" | "full-kit";
|
|
28
|
+
}): Pick<WorkKitState, "modelPolicy" | "classification" | "mode"> & { metadata: { worktreeRoot: string } } {
|
|
29
|
+
return {
|
|
30
|
+
modelPolicy: opts.policy,
|
|
31
|
+
classification: opts.classification,
|
|
32
|
+
mode: (opts.mode ?? "full-kit") as any,
|
|
33
|
+
metadata: { worktreeRoot: opts.worktreeRoot },
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
describe("resolveModel — defaults", () => {
|
|
38
|
+
it("uses step default when no policy or override", () => {
|
|
39
|
+
const tmp = makeTmpDir(); tmpDirs.push(tmp);
|
|
40
|
+
const state = fakeState({ worktreeRoot: tmp });
|
|
41
|
+
assert.equal(resolveModel(state, "plan", "investigate"), "opus");
|
|
42
|
+
assert.equal(resolveModel(state, "build", "commit"), "haiku");
|
|
43
|
+
assert.equal(resolveModel(state, "review", "security"), "opus");
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it("falls back to phase default for unknown step", () => {
|
|
47
|
+
const tmp = makeTmpDir(); tmpDirs.push(tmp);
|
|
48
|
+
const state = fakeState({ worktreeRoot: tmp });
|
|
49
|
+
// Pick a phase/step that isn't in BY_STEP
|
|
50
|
+
const key = "plan/__nonexistent__";
|
|
51
|
+
assert.ok(!(key in BY_STEP));
|
|
52
|
+
assert.equal(resolveModel(state, "plan", "__nonexistent__"), BY_PHASE.plan);
|
|
53
|
+
});
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
describe("resolveModel — session policy", () => {
|
|
57
|
+
it("policy=opus forces opus for every step, even mechanical ones", () => {
|
|
58
|
+
const tmp = makeTmpDir(); tmpDirs.push(tmp);
|
|
59
|
+
const state = fakeState({ worktreeRoot: tmp, policy: "opus" });
|
|
60
|
+
assert.equal(resolveModel(state, "build", "commit"), "opus");
|
|
61
|
+
assert.equal(resolveModel(state, "deploy", "monitor"), "opus");
|
|
62
|
+
assert.equal(resolveModel(state, "plan", "investigate"), "opus");
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
it("policy=haiku forces haiku everywhere", () => {
|
|
66
|
+
const tmp = makeTmpDir(); tmpDirs.push(tmp);
|
|
67
|
+
const state = fakeState({ worktreeRoot: tmp, policy: "haiku" });
|
|
68
|
+
assert.equal(resolveModel(state, "plan", "investigate"), "haiku");
|
|
69
|
+
assert.equal(resolveModel(state, "review", "security"), "haiku");
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it("policy=inherit returns undefined so no model is passed", () => {
|
|
73
|
+
const tmp = makeTmpDir(); tmpDirs.push(tmp);
|
|
74
|
+
const state = fakeState({ worktreeRoot: tmp, policy: "inherit" });
|
|
75
|
+
assert.equal(resolveModel(state, "plan", "investigate"), undefined);
|
|
76
|
+
assert.equal(resolveModel(state, "build", "core"), undefined);
|
|
77
|
+
assert.equal(resolveModel(state, "deploy", "merge"), undefined);
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
it("policy=auto is equivalent to omitting it", () => {
|
|
81
|
+
const tmp = makeTmpDir(); tmpDirs.push(tmp);
|
|
82
|
+
const autoState = fakeState({ worktreeRoot: tmp, policy: "auto" });
|
|
83
|
+
const unsetState = fakeState({ worktreeRoot: tmp });
|
|
84
|
+
assert.equal(
|
|
85
|
+
resolveModel(autoState, "plan", "investigate"),
|
|
86
|
+
resolveModel(unsetState, "plan", "investigate")
|
|
87
|
+
);
|
|
88
|
+
});
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
describe("resolveModel — classification", () => {
|
|
92
|
+
it("small-change knocks plan/investigate down to haiku in auto-kit mode", () => {
|
|
93
|
+
const tmp = makeTmpDir(); tmpDirs.push(tmp);
|
|
94
|
+
const state = fakeState({
|
|
95
|
+
worktreeRoot: tmp,
|
|
96
|
+
classification: "small-change",
|
|
97
|
+
mode: "auto-kit",
|
|
98
|
+
});
|
|
99
|
+
assert.equal(resolveModel(state, "plan", "investigate"), "haiku");
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it("bug-fix keeps plan/investigate on opus (not in its override map)", () => {
|
|
103
|
+
const tmp = makeTmpDir(); tmpDirs.push(tmp);
|
|
104
|
+
const state = fakeState({
|
|
105
|
+
worktreeRoot: tmp,
|
|
106
|
+
classification: "bug-fix",
|
|
107
|
+
mode: "auto-kit",
|
|
108
|
+
});
|
|
109
|
+
assert.equal(resolveModel(state, "plan", "investigate"), "opus");
|
|
110
|
+
assert.equal(resolveModel(state, "plan", "blueprint"), "sonnet");
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it("refactor promotes review/performance to opus", () => {
|
|
114
|
+
const tmp = makeTmpDir(); tmpDirs.push(tmp);
|
|
115
|
+
const state = fakeState({
|
|
116
|
+
worktreeRoot: tmp,
|
|
117
|
+
classification: "refactor",
|
|
118
|
+
mode: "auto-kit",
|
|
119
|
+
});
|
|
120
|
+
assert.equal(resolveModel(state, "review", "performance"), "opus");
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
it("classification overrides are ignored in full-kit mode", () => {
|
|
124
|
+
const tmp = makeTmpDir(); tmpDirs.push(tmp);
|
|
125
|
+
const state = fakeState({
|
|
126
|
+
worktreeRoot: tmp,
|
|
127
|
+
classification: "small-change",
|
|
128
|
+
mode: "full-kit",
|
|
129
|
+
});
|
|
130
|
+
assert.equal(resolveModel(state, "plan", "investigate"), "opus");
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it("session policy beats classification override", () => {
|
|
134
|
+
const tmp = makeTmpDir(); tmpDirs.push(tmp);
|
|
135
|
+
const state = fakeState({
|
|
136
|
+
worktreeRoot: tmp,
|
|
137
|
+
classification: "small-change",
|
|
138
|
+
mode: "auto-kit",
|
|
139
|
+
policy: "opus",
|
|
140
|
+
});
|
|
141
|
+
assert.equal(resolveModel(state, "plan", "investigate"), "opus");
|
|
142
|
+
});
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
describe("resolveModel — workspace JSON override", () => {
|
|
146
|
+
it("workspace model-config.json beats session policy", () => {
|
|
147
|
+
const tmp = makeTmpDir(); tmpDirs.push(tmp);
|
|
148
|
+
fs.writeFileSync(
|
|
149
|
+
path.join(tmp, ".work-kit", "model-config.json"),
|
|
150
|
+
JSON.stringify({ "build/commit": "sonnet" })
|
|
151
|
+
);
|
|
152
|
+
const state = fakeState({ worktreeRoot: tmp, policy: "opus" });
|
|
153
|
+
assert.equal(resolveModel(state, "build", "commit"), "sonnet");
|
|
154
|
+
// Other steps still forced to opus by the policy
|
|
155
|
+
assert.equal(resolveModel(state, "plan", "investigate"), "opus");
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
it("workspace JSON beats step default", () => {
|
|
159
|
+
const tmp = makeTmpDir(); tmpDirs.push(tmp);
|
|
160
|
+
fs.writeFileSync(
|
|
161
|
+
path.join(tmp, ".work-kit", "model-config.json"),
|
|
162
|
+
JSON.stringify({ "plan/investigate": "haiku" })
|
|
163
|
+
);
|
|
164
|
+
const state = fakeState({ worktreeRoot: tmp });
|
|
165
|
+
assert.equal(resolveModel(state, "plan", "investigate"), "haiku");
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
it("malformed JSON falls back silently to defaults", () => {
|
|
169
|
+
const tmp = makeTmpDir(); tmpDirs.push(tmp);
|
|
170
|
+
fs.writeFileSync(
|
|
171
|
+
path.join(tmp, ".work-kit", "model-config.json"),
|
|
172
|
+
"{not json"
|
|
173
|
+
);
|
|
174
|
+
const state = fakeState({ worktreeRoot: tmp });
|
|
175
|
+
assert.equal(resolveModel(state, "plan", "investigate"), "opus");
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
it("invalid tier values in JSON are ignored", () => {
|
|
179
|
+
const tmp = makeTmpDir(); tmpDirs.push(tmp);
|
|
180
|
+
fs.writeFileSync(
|
|
181
|
+
path.join(tmp, ".work-kit", "model-config.json"),
|
|
182
|
+
JSON.stringify({ "plan/investigate": "turbo", "build/core": "opus" })
|
|
183
|
+
);
|
|
184
|
+
const state = fakeState({ worktreeRoot: tmp });
|
|
185
|
+
// Bad value ignored → falls back to step default
|
|
186
|
+
assert.equal(resolveModel(state, "plan", "investigate"), "opus");
|
|
187
|
+
// Good value applied
|
|
188
|
+
assert.equal(resolveModel(state, "build", "core"), "opus");
|
|
189
|
+
});
|
|
190
|
+
});
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
import * as fs from "node:fs";
|
|
2
|
+
import * as path from "node:path";
|
|
3
|
+
import * as os from "node:os";
|
|
4
|
+
import {
|
|
5
|
+
ModelTier,
|
|
6
|
+
PhaseName,
|
|
7
|
+
WorkKitState,
|
|
8
|
+
Classification,
|
|
9
|
+
isModelTier,
|
|
10
|
+
} from "../state/schema.js";
|
|
11
|
+
import { STATE_DIR } from "../state/store.js";
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Per-phase/step model routing.
|
|
15
|
+
*
|
|
16
|
+
* Resolution order (highest precedence first):
|
|
17
|
+
* 1. Workspace override .work-kit/model-config.json (per-session, per-step map)
|
|
18
|
+
* 2. User global override ~/.claude/work-kit/models.json (per-user, all projects)
|
|
19
|
+
* 3. Session model policy state.modelPolicy (set at init via skill flag)
|
|
20
|
+
* 4. Classification BY_CLASSIFICATION (auto-kit only)
|
|
21
|
+
* 5. Step default BY_STEP
|
|
22
|
+
* 6. Phase default BY_PHASE
|
|
23
|
+
* 7. Hard default "sonnet"
|
|
24
|
+
*
|
|
25
|
+
* When state.modelPolicy is "inherit" (or layered overrides have not set a
|
|
26
|
+
* value), resolveModel() returns `undefined` so the orchestrator skill omits
|
|
27
|
+
* the `model` parameter on the Agent tool — identical to pre-routing behavior.
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
const HARD_DEFAULT: ModelTier = "sonnet";
|
|
31
|
+
|
|
32
|
+
// ── Phase defaults ──────────────────────────────────────────────────
|
|
33
|
+
|
|
34
|
+
export const BY_PHASE: Record<PhaseName, ModelTier> = {
|
|
35
|
+
plan: "sonnet",
|
|
36
|
+
build: "sonnet",
|
|
37
|
+
test: "sonnet",
|
|
38
|
+
review: "sonnet",
|
|
39
|
+
deploy: "haiku",
|
|
40
|
+
"wrap-up": "sonnet",
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
// ── Step-level overrides (phase/step keys) ──────────────────────────
|
|
44
|
+
|
|
45
|
+
export const BY_STEP: Record<string, ModelTier> = {
|
|
46
|
+
// Plan — research/design-heavy steps benefit from opus
|
|
47
|
+
"plan/clarify": "sonnet",
|
|
48
|
+
"plan/investigate": "opus",
|
|
49
|
+
"plan/sketch": "sonnet",
|
|
50
|
+
"plan/scope": "sonnet",
|
|
51
|
+
"plan/ux-flow": "sonnet",
|
|
52
|
+
"plan/architecture": "opus",
|
|
53
|
+
"plan/blueprint": "opus",
|
|
54
|
+
"plan/audit": "opus",
|
|
55
|
+
|
|
56
|
+
// Build — mechanical steps drop to haiku, implementation stays sonnet
|
|
57
|
+
"build/setup": "haiku",
|
|
58
|
+
"build/migration": "sonnet",
|
|
59
|
+
"build/red": "sonnet",
|
|
60
|
+
"build/core": "sonnet",
|
|
61
|
+
"build/ui": "sonnet",
|
|
62
|
+
"build/refactor": "sonnet",
|
|
63
|
+
"build/integration": "sonnet",
|
|
64
|
+
"build/commit": "haiku",
|
|
65
|
+
|
|
66
|
+
// Test — verify is mechanical, e2e/validate need judgment
|
|
67
|
+
"test/verify": "haiku",
|
|
68
|
+
"test/e2e": "sonnet",
|
|
69
|
+
"test/validate": "sonnet",
|
|
70
|
+
|
|
71
|
+
// Review — security & compliance get opus; rest sonnet
|
|
72
|
+
"review/self-review": "sonnet",
|
|
73
|
+
"review/security": "opus",
|
|
74
|
+
"review/performance": "sonnet",
|
|
75
|
+
"review/compliance": "opus",
|
|
76
|
+
"review/handoff": "sonnet",
|
|
77
|
+
|
|
78
|
+
// Deploy — mostly mechanical
|
|
79
|
+
"deploy/merge": "haiku",
|
|
80
|
+
"deploy/monitor": "haiku",
|
|
81
|
+
"deploy/remediate": "sonnet",
|
|
82
|
+
|
|
83
|
+
// Wrap-up — synthesis
|
|
84
|
+
"wrap-up/summary": "sonnet",
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
// ── Classification overrides (auto-kit only) ────────────────────────
|
|
88
|
+
|
|
89
|
+
export const BY_CLASSIFICATION: Record<Classification, Partial<Record<string, ModelTier>>> = {
|
|
90
|
+
"small-change": {
|
|
91
|
+
// Trivial work: knock plan and reviews down a tier
|
|
92
|
+
"plan/clarify": "haiku",
|
|
93
|
+
"plan/investigate": "haiku",
|
|
94
|
+
"plan/sketch": "haiku",
|
|
95
|
+
"plan/scope": "haiku",
|
|
96
|
+
"plan/ux-flow": "haiku",
|
|
97
|
+
"plan/architecture": "haiku",
|
|
98
|
+
"plan/blueprint": "haiku",
|
|
99
|
+
"plan/audit": "haiku",
|
|
100
|
+
"review/security": "sonnet",
|
|
101
|
+
"review/compliance": "sonnet",
|
|
102
|
+
},
|
|
103
|
+
"bug-fix": {
|
|
104
|
+
// Bug fixes still need opus for investigate; rest can relax
|
|
105
|
+
"plan/clarify": "sonnet",
|
|
106
|
+
"plan/sketch": "sonnet",
|
|
107
|
+
"plan/scope": "sonnet",
|
|
108
|
+
"plan/architecture": "sonnet",
|
|
109
|
+
"plan/blueprint": "sonnet",
|
|
110
|
+
"plan/audit": "sonnet",
|
|
111
|
+
},
|
|
112
|
+
refactor: {
|
|
113
|
+
// Perf review matters most for refactors — promote it
|
|
114
|
+
"review/performance": "opus",
|
|
115
|
+
},
|
|
116
|
+
feature: {},
|
|
117
|
+
"large-feature": {},
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
// ── JSON override loading ───────────────────────────────────────────
|
|
121
|
+
|
|
122
|
+
type OverrideMap = Partial<Record<string, ModelTier>>;
|
|
123
|
+
|
|
124
|
+
interface LoadedOverrides {
|
|
125
|
+
workspace: OverrideMap;
|
|
126
|
+
userGlobal: OverrideMap;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Read+validate the optional JSON override files. Silently returns empty
|
|
131
|
+
* maps on any read/parse/validation error — overrides are strictly opt-in
|
|
132
|
+
* and must never block the workflow.
|
|
133
|
+
*/
|
|
134
|
+
export function loadOverrides(worktreeRoot: string): LoadedOverrides {
|
|
135
|
+
return {
|
|
136
|
+
workspace: readJsonMap(path.join(worktreeRoot, STATE_DIR, "model-config.json")),
|
|
137
|
+
userGlobal: readJsonMap(path.join(os.homedir(), ".claude", "work-kit", "models.json")),
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function readJsonMap(filePath: string): OverrideMap {
|
|
142
|
+
try {
|
|
143
|
+
if (!fs.existsSync(filePath)) return {};
|
|
144
|
+
const raw = fs.readFileSync(filePath, "utf-8");
|
|
145
|
+
const parsed = JSON.parse(raw);
|
|
146
|
+
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return {};
|
|
147
|
+
const out: OverrideMap = {};
|
|
148
|
+
for (const [key, value] of Object.entries(parsed)) {
|
|
149
|
+
if (typeof value === "string" && isModelTier(value)) {
|
|
150
|
+
out[key] = value;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
return out;
|
|
154
|
+
} catch {
|
|
155
|
+
return {};
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// ── Resolution ──────────────────────────────────────────────────────
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Resolve the model tier for a given phase/step in a given session.
|
|
163
|
+
*
|
|
164
|
+
* Returns `undefined` when the session policy is "inherit" (or when an
|
|
165
|
+
* override file explicitly maps to inherit via a missing entry — this does
|
|
166
|
+
* not happen today but stays consistent with "no opinion" semantics).
|
|
167
|
+
*
|
|
168
|
+
* Callers treat `undefined` as "do not pass a model parameter to the Agent
|
|
169
|
+
* tool" — identical to pre-routing behavior.
|
|
170
|
+
*/
|
|
171
|
+
export function resolveModel(
|
|
172
|
+
state: Pick<WorkKitState, "modelPolicy" | "classification" | "mode"> & { metadata: { worktreeRoot: string } },
|
|
173
|
+
phase: PhaseName,
|
|
174
|
+
step: string
|
|
175
|
+
): ModelTier | undefined {
|
|
176
|
+
const key = `${phase}/${step}`;
|
|
177
|
+
const policy = state.modelPolicy ?? "auto";
|
|
178
|
+
|
|
179
|
+
// Policy "inherit" short-circuits everything: no model override at all.
|
|
180
|
+
if (policy === "inherit") {
|
|
181
|
+
return undefined;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Layers 1 & 2: JSON overrides win over everything else.
|
|
185
|
+
const overrides = loadOverrides(state.metadata.worktreeRoot);
|
|
186
|
+
if (overrides.workspace[key]) return overrides.workspace[key];
|
|
187
|
+
if (overrides.userGlobal[key]) return overrides.userGlobal[key];
|
|
188
|
+
|
|
189
|
+
// Layer 3: Forced policy (opus/sonnet/haiku) beats all routing.
|
|
190
|
+
if (policy !== "auto") {
|
|
191
|
+
return policy;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// Layer 4: Classification override (auto-kit only).
|
|
195
|
+
if (state.mode === "auto-kit" && state.classification) {
|
|
196
|
+
const classOverride = BY_CLASSIFICATION[state.classification][key];
|
|
197
|
+
if (classOverride) return classOverride;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Layer 5: Step default.
|
|
201
|
+
if (BY_STEP[key]) return BY_STEP[key];
|
|
202
|
+
|
|
203
|
+
// Layer 6: Phase default.
|
|
204
|
+
if (BY_PHASE[phase]) return BY_PHASE[phase];
|
|
205
|
+
|
|
206
|
+
// Layer 7: Hard default.
|
|
207
|
+
return HARD_DEFAULT;
|
|
208
|
+
}
|