@slowdini/slow-powers-opencode 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -72
- package/bootstrap.md +1 -7
- package/opencode/plugins/slow-powers.js +1 -1
- package/package.json +14 -17
- package/skills/evaluating-skills/SKILL.md +90 -338
- package/skills/evaluating-skills/evals/baseline/BASELINE.md +23 -0
- package/skills/evaluating-skills/evals/baseline/NOTES.md +40 -0
- package/skills/evaluating-skills/evals/baseline/benchmark.json +54 -0
- package/skills/evaluating-skills/evals/baseline/grading/deterministic-edit-skip__new_skill.json +39 -0
- package/skills/evaluating-skills/evals/baseline/grading/deterministic-edit-skip__old_skill.json +39 -0
- package/skills/evaluating-skills/evals/baseline/grading/did-my-revision-help__new_skill.json +39 -0
- package/skills/evaluating-skills/evals/baseline/grading/did-my-revision-help__old_skill.json +39 -0
- package/skills/evaluating-skills/evals/baseline/grading/is-new-skill-ready-to-ship__new_skill.json +32 -0
- package/skills/evaluating-skills/evals/baseline/grading/is-new-skill-ready-to-ship__old_skill.json +32 -0
- package/skills/test-driven-development/evals/baseline/NOTES.md +2 -2
- package/skills/evaluating-skills/examples/verifying-development-work-evals.json +0 -30
- package/skills/evaluating-skills/harness-details/claude.md +0 -194
- package/skills/evaluating-skills/harness-parity.md +0 -155
- package/skills/evaluating-skills/runner/README.md +0 -163
- package/skills/evaluating-skills/runner/adapters/claude-code-session.test.ts +0 -56
- package/skills/evaluating-skills/runner/adapters/claude-code-session.ts +0 -43
- package/skills/evaluating-skills/runner/adapters/claude-code-transcript.test.ts +0 -485
- package/skills/evaluating-skills/runner/adapters/claude-code-transcript.ts +0 -242
- package/skills/evaluating-skills/runner/aggregate.test.ts +0 -484
- package/skills/evaluating-skills/runner/aggregate.ts +0 -269
- package/skills/evaluating-skills/runner/context.test.ts +0 -181
- package/skills/evaluating-skills/runner/context.ts +0 -90
- package/skills/evaluating-skills/runner/detect-stray-writes.test.ts +0 -396
- package/skills/evaluating-skills/runner/detect-stray-writes.ts +0 -288
- package/skills/evaluating-skills/runner/fill-transcripts.test.ts +0 -73
- package/skills/evaluating-skills/runner/fill-transcripts.ts +0 -154
- package/skills/evaluating-skills/runner/grade.test.ts +0 -347
- package/skills/evaluating-skills/runner/grade.ts +0 -603
- package/skills/evaluating-skills/runner/guard/guard.ts +0 -49
- package/skills/evaluating-skills/runner/guard/install.test.ts +0 -92
- package/skills/evaluating-skills/runner/guard/install.ts +0 -147
- package/skills/evaluating-skills/runner/guard/policy.test.ts +0 -128
- package/skills/evaluating-skills/runner/guard/policy.ts +0 -74
- package/skills/evaluating-skills/runner/plugin-shadow.test.ts +0 -228
- package/skills/evaluating-skills/runner/plugin-shadow.ts +0 -201
- package/skills/evaluating-skills/runner/profiles/claude-code/plan-mode.md +0 -11
- package/skills/evaluating-skills/runner/promote-baseline.test.ts +0 -281
- package/skills/evaluating-skills/runner/promote-baseline.ts +0 -204
- package/skills/evaluating-skills/runner/record-runs.test.ts +0 -314
- package/skills/evaluating-skills/runner/record-runs.ts +0 -209
- package/skills/evaluating-skills/runner/run.test.ts +0 -1703
- package/skills/evaluating-skills/runner/run.ts +0 -1388
- package/skills/evaluating-skills/runner/sandbox-policy.ts +0 -94
- package/skills/evaluating-skills/runner/types.ts +0 -121
- package/skills/evaluating-skills/runner/validate-all.ts +0 -54
- package/skills/evaluating-skills/runner/validate-schema.test.ts +0 -99
- package/skills/evaluating-skills/runner/validate-schema.ts +0 -51
- package/skills/evaluating-skills/runner/validate.test.ts +0 -56
- package/skills/evaluating-skills/runner/validate.ts +0 -21
- package/skills/evaluating-skills/runner/workspace-teardown.test.ts +0 -227
- package/skills/evaluating-skills/runner/workspace-teardown.ts +0 -136
- package/skills/evaluating-skills/schema/evals.schema.json +0 -105
- package/skills/evaluating-skills/schema/grading.schema.json +0 -84
- package/skills/evaluating-skills/schema/run-record.schema.json +0 -80
- package/skills/evaluating-skills/schema/stray-writes.schema.json +0 -80
- package/skills/evaluating-skills/templates/eval-task-prompt.md +0 -69
- package/skills/evaluating-skills/templates/evals.json.example +0 -17
- package/skills/evaluating-skills/templates/judge-prompt.md +0 -56
- package/skills/evaluating-skills/templates/revise-skill-prompt.md +0 -56
|
@@ -1,1703 +0,0 @@
|
|
|
1
|
-
import { afterAll, beforeAll, describe, expect, test } from "bun:test";
|
|
2
|
-
import {
|
|
3
|
-
existsSync,
|
|
4
|
-
mkdirSync,
|
|
5
|
-
readdirSync,
|
|
6
|
-
readFileSync,
|
|
7
|
-
rmSync,
|
|
8
|
-
writeFileSync,
|
|
9
|
-
} from "node:fs";
|
|
10
|
-
import { tmpdir } from "node:os";
|
|
11
|
-
import { join } from "node:path";
|
|
12
|
-
import {
|
|
13
|
-
buildDispatchTask,
|
|
14
|
-
buildFinalizeCommands,
|
|
15
|
-
buildIngestCommands,
|
|
16
|
-
cleanupStagedSkills,
|
|
17
|
-
redactSkillFromBootstrap,
|
|
18
|
-
registerStagedSkillForCleanup,
|
|
19
|
-
runSteps,
|
|
20
|
-
STAGED_SIBLING_MANIFEST,
|
|
21
|
-
STAGED_SKILL_PREFIX,
|
|
22
|
-
selectEvals,
|
|
23
|
-
stageSiblingSkills,
|
|
24
|
-
stageSkillForCC,
|
|
25
|
-
} from "./run";
|
|
26
|
-
import type { Eval } from "./types";
|
|
27
|
-
import { SNAPSHOT_META } from "./workspace-teardown";
|
|
28
|
-
|
|
29
|
-
const FIXTURE_ROOT = join(tmpdir(), `slow-powers-run-test-${process.pid}`);
|
|
30
|
-
|
|
31
|
-
beforeAll(() => {
|
|
32
|
-
mkdirSync(FIXTURE_ROOT, { recursive: true });
|
|
33
|
-
});
|
|
34
|
-
|
|
35
|
-
afterAll(() => {
|
|
36
|
-
rmSync(FIXTURE_ROOT, { recursive: true, force: true });
|
|
37
|
-
});
|
|
38
|
-
|
|
39
|
-
describe("selectEvals", () => {
|
|
40
|
-
const mkEvals = (...ids: string[]): Eval[] =>
|
|
41
|
-
ids.map((id) => ({ id, prompt: `p-${id}`, expected_output: `o-${id}` }));
|
|
42
|
-
|
|
43
|
-
test("returns the full list unchanged when neither flag is set", () => {
|
|
44
|
-
const evals = mkEvals("a", "b", "c");
|
|
45
|
-
expect(selectEvals(evals, {})).toEqual(evals);
|
|
46
|
-
});
|
|
47
|
-
|
|
48
|
-
test("--only keeps just the named ids, preserving config order", () => {
|
|
49
|
-
const evals = mkEvals("a", "b", "c");
|
|
50
|
-
const got = selectEvals(evals, { only: ["c", "a"] });
|
|
51
|
-
expect(got.map((e) => e.id)).toEqual(["a", "c"]);
|
|
52
|
-
});
|
|
53
|
-
|
|
54
|
-
test("--skip drops the named ids", () => {
|
|
55
|
-
const evals = mkEvals("a", "b", "c");
|
|
56
|
-
const got = selectEvals(evals, { skip: ["b"] });
|
|
57
|
-
expect(got.map((e) => e.id)).toEqual(["a", "c"]);
|
|
58
|
-
});
|
|
59
|
-
|
|
60
|
-
test("throws on an unknown id, listing the unknown and the available ids", () => {
|
|
61
|
-
const evals = mkEvals("a", "b");
|
|
62
|
-
expect(() => selectEvals(evals, { only: ["a", "nope"] })).toThrow(
|
|
63
|
-
/unknown eval id\(s\): nope\. Available ids: a, b/,
|
|
64
|
-
);
|
|
65
|
-
});
|
|
66
|
-
|
|
67
|
-
test("throws when both --only and --skip are given", () => {
|
|
68
|
-
const evals = mkEvals("a", "b");
|
|
69
|
-
expect(() => selectEvals(evals, { only: ["a"], skip: ["b"] })).toThrow(
|
|
70
|
-
/only one of --only \/ --skip/,
|
|
71
|
-
);
|
|
72
|
-
});
|
|
73
|
-
|
|
74
|
-
test("throws when a flag resolves to an empty id list", () => {
|
|
75
|
-
const evals = mkEvals("a", "b");
|
|
76
|
-
expect(() => selectEvals(evals, { only: [] })).toThrow(
|
|
77
|
-
/at least one eval id/,
|
|
78
|
-
);
|
|
79
|
-
});
|
|
80
|
-
});
|
|
81
|
-
|
|
82
|
-
describe("stageSkillForCC", () => {
|
|
83
|
-
test("writes SKILL.md to <repoRoot>/.claude/skills/<slug>/SKILL.md and returns the slug", () => {
|
|
84
|
-
const repoRoot = join(FIXTURE_ROOT, "stage-basic");
|
|
85
|
-
mkdirSync(repoRoot, { recursive: true });
|
|
86
|
-
const content =
|
|
87
|
-
"---\nname: example\ndescription: example skill\n---\n\nbody\n";
|
|
88
|
-
|
|
89
|
-
const slug = stageSkillForCC({
|
|
90
|
-
content,
|
|
91
|
-
iteration: 3,
|
|
92
|
-
condition: "with_skill",
|
|
93
|
-
skillName: "verification-before-completion",
|
|
94
|
-
repoRoot,
|
|
95
|
-
});
|
|
96
|
-
|
|
97
|
-
expect(slug).toBe(
|
|
98
|
-
`${STAGED_SKILL_PREFIX}3-with_skill__verification-before-completion`,
|
|
99
|
-
);
|
|
100
|
-
const stagedPath = join(repoRoot, ".claude", "skills", slug, "SKILL.md");
|
|
101
|
-
expect(existsSync(stagedPath)).toBe(true);
|
|
102
|
-
expect(readFileSync(stagedPath, "utf8")).toBe(content);
|
|
103
|
-
});
|
|
104
|
-
|
|
105
|
-
test("overwrites an existing staged skill at the same slug", () => {
|
|
106
|
-
const repoRoot = join(FIXTURE_ROOT, "stage-overwrite");
|
|
107
|
-
mkdirSync(repoRoot, { recursive: true });
|
|
108
|
-
|
|
109
|
-
stageSkillForCC({
|
|
110
|
-
content: "first",
|
|
111
|
-
iteration: 1,
|
|
112
|
-
condition: "with_skill",
|
|
113
|
-
skillName: "s",
|
|
114
|
-
repoRoot,
|
|
115
|
-
});
|
|
116
|
-
const slug = stageSkillForCC({
|
|
117
|
-
content: "second",
|
|
118
|
-
iteration: 1,
|
|
119
|
-
condition: "with_skill",
|
|
120
|
-
skillName: "s",
|
|
121
|
-
repoRoot,
|
|
122
|
-
});
|
|
123
|
-
|
|
124
|
-
const stagedPath = join(repoRoot, ".claude", "skills", slug, "SKILL.md");
|
|
125
|
-
expect(readFileSync(stagedPath, "utf8")).toBe("second");
|
|
126
|
-
});
|
|
127
|
-
|
|
128
|
-
test("copies sibling assets from assetsDir alongside the staged SKILL.md", () => {
|
|
129
|
-
const repoRoot = join(FIXTURE_ROOT, "stage-assets");
|
|
130
|
-
const assetsDir = join(FIXTURE_ROOT, "stage-assets-src");
|
|
131
|
-
mkdirSync(repoRoot, { recursive: true });
|
|
132
|
-
mkdirSync(join(assetsDir, "scripts"), { recursive: true });
|
|
133
|
-
writeFileSync(join(assetsDir, "SKILL.md"), "the source skill md");
|
|
134
|
-
writeFileSync(join(assetsDir, "code-review.md"), "review guidance");
|
|
135
|
-
writeFileSync(
|
|
136
|
-
join(assetsDir, "scripts", "helper.ts"),
|
|
137
|
-
"export const x = 1",
|
|
138
|
-
);
|
|
139
|
-
|
|
140
|
-
const slug = stageSkillForCC({
|
|
141
|
-
content: "staged content",
|
|
142
|
-
iteration: 1,
|
|
143
|
-
condition: "new_skill",
|
|
144
|
-
skillName: "s",
|
|
145
|
-
repoRoot,
|
|
146
|
-
assetsDir,
|
|
147
|
-
});
|
|
148
|
-
|
|
149
|
-
const stagedDir = join(repoRoot, ".claude", "skills", slug);
|
|
150
|
-
// SKILL.md comes from `content`, not the assetsDir copy.
|
|
151
|
-
expect(readFileSync(join(stagedDir, "SKILL.md"), "utf8")).toBe(
|
|
152
|
-
"staged content",
|
|
153
|
-
);
|
|
154
|
-
expect(readFileSync(join(stagedDir, "code-review.md"), "utf8")).toBe(
|
|
155
|
-
"review guidance",
|
|
156
|
-
);
|
|
157
|
-
expect(readFileSync(join(stagedDir, "scripts", "helper.ts"), "utf8")).toBe(
|
|
158
|
-
"export const x = 1",
|
|
159
|
-
);
|
|
160
|
-
});
|
|
161
|
-
|
|
162
|
-
test("excludes SKILL.md, evals/, and the snapshot meta file from the asset copy", () => {
|
|
163
|
-
const repoRoot = join(FIXTURE_ROOT, "stage-assets-excludes");
|
|
164
|
-
const assetsDir = join(FIXTURE_ROOT, "stage-assets-excludes-src");
|
|
165
|
-
mkdirSync(join(assetsDir, "evals", "fixtures"), { recursive: true });
|
|
166
|
-
mkdirSync(repoRoot, { recursive: true });
|
|
167
|
-
writeFileSync(join(assetsDir, "SKILL.md"), "src skill md");
|
|
168
|
-
writeFileSync(join(assetsDir, "code-review.md"), "keep me");
|
|
169
|
-
writeFileSync(join(assetsDir, SNAPSHOT_META), '{"source":"ref"}');
|
|
170
|
-
writeFileSync(join(assetsDir, "evals", "evals.json"), "{}");
|
|
171
|
-
|
|
172
|
-
const slug = stageSkillForCC({
|
|
173
|
-
content: "staged",
|
|
174
|
-
iteration: 1,
|
|
175
|
-
condition: "old_skill",
|
|
176
|
-
skillName: "s",
|
|
177
|
-
repoRoot,
|
|
178
|
-
assetsDir,
|
|
179
|
-
});
|
|
180
|
-
|
|
181
|
-
const stagedDir = join(repoRoot, ".claude", "skills", slug);
|
|
182
|
-
expect(existsSync(join(stagedDir, "code-review.md"))).toBe(true);
|
|
183
|
-
expect(existsSync(join(stagedDir, "evals"))).toBe(false);
|
|
184
|
-
expect(existsSync(join(stagedDir, SNAPSHOT_META))).toBe(false);
|
|
185
|
-
// SKILL.md exists (from content) but the assetsDir SKILL.md didn't overwrite it.
|
|
186
|
-
expect(readFileSync(join(stagedDir, "SKILL.md"), "utf8")).toBe("staged");
|
|
187
|
-
});
|
|
188
|
-
|
|
189
|
-
test("stages SKILL.md alone when assetsDir is omitted", () => {
|
|
190
|
-
const repoRoot = join(FIXTURE_ROOT, "stage-no-assets");
|
|
191
|
-
mkdirSync(repoRoot, { recursive: true });
|
|
192
|
-
|
|
193
|
-
const slug = stageSkillForCC({
|
|
194
|
-
content: "solo",
|
|
195
|
-
iteration: 1,
|
|
196
|
-
condition: "with_skill",
|
|
197
|
-
skillName: "s",
|
|
198
|
-
repoRoot,
|
|
199
|
-
});
|
|
200
|
-
|
|
201
|
-
const stagedDir = join(repoRoot, ".claude", "skills", slug);
|
|
202
|
-
expect(readdirSync(stagedDir)).toEqual(["SKILL.md"]);
|
|
203
|
-
});
|
|
204
|
-
|
|
205
|
-
test("stageNameOverride stages under the verbatim name instead of the eval slug", () => {
|
|
206
|
-
const repoRoot = join(FIXTURE_ROOT, "stage-override");
|
|
207
|
-
mkdirSync(repoRoot, { recursive: true });
|
|
208
|
-
const content =
|
|
209
|
-
"---\nname: example\ndescription: example skill\n---\n\nbody\n";
|
|
210
|
-
|
|
211
|
-
const slug = stageSkillForCC({
|
|
212
|
-
content,
|
|
213
|
-
iteration: 2,
|
|
214
|
-
condition: "with_skill",
|
|
215
|
-
skillName: "verification-before-completion",
|
|
216
|
-
repoRoot,
|
|
217
|
-
stageNameOverride: "verification-before-completion",
|
|
218
|
-
});
|
|
219
|
-
|
|
220
|
-
expect(slug).toBe("verification-before-completion");
|
|
221
|
-
const stagedPath = join(repoRoot, ".claude", "skills", slug, "SKILL.md");
|
|
222
|
-
expect(existsSync(stagedPath)).toBe(true);
|
|
223
|
-
expect(readFileSync(stagedPath, "utf8")).toBe(content);
|
|
224
|
-
});
|
|
225
|
-
});
|
|
226
|
-
|
|
227
|
-
describe("registerStagedSkillForCleanup", () => {
|
|
228
|
-
test("appends the custom dir to the manifest so cleanup removes it", () => {
|
|
229
|
-
const root = join(FIXTURE_ROOT, "register-cleanup");
|
|
230
|
-
const skillsDir = join(root, ".claude", "skills");
|
|
231
|
-
mkdirSync(skillsDir, { recursive: true });
|
|
232
|
-
// A sibling manifest already exists (written by stageSiblingSkills).
|
|
233
|
-
writeFileSync(
|
|
234
|
-
join(skillsDir, STAGED_SIBLING_MANIFEST),
|
|
235
|
-
`${JSON.stringify(
|
|
236
|
-
{
|
|
237
|
-
created_at: "x",
|
|
238
|
-
staged_under_test: "verification-before-completion",
|
|
239
|
-
created_entries: [{ name: "sibling-a", preexisting: false }],
|
|
240
|
-
},
|
|
241
|
-
null,
|
|
242
|
-
2,
|
|
243
|
-
)}\n`,
|
|
244
|
-
);
|
|
245
|
-
const customDir = join(skillsDir, "verification-before-completion");
|
|
246
|
-
mkdirSync(customDir, { recursive: true });
|
|
247
|
-
writeFileSync(join(customDir, "SKILL.md"), "staged");
|
|
248
|
-
|
|
249
|
-
registerStagedSkillForCleanup(root, "verification-before-completion");
|
|
250
|
-
|
|
251
|
-
const manifest = JSON.parse(
|
|
252
|
-
readFileSync(join(skillsDir, STAGED_SIBLING_MANIFEST), "utf8"),
|
|
253
|
-
) as { created_entries: Array<{ name: string }> };
|
|
254
|
-
expect(manifest.created_entries.map((e) => e.name).sort()).toEqual([
|
|
255
|
-
"sibling-a",
|
|
256
|
-
"verification-before-completion",
|
|
257
|
-
]);
|
|
258
|
-
|
|
259
|
-
cleanupStagedSkills(root);
|
|
260
|
-
expect(existsSync(customDir)).toBe(false);
|
|
261
|
-
});
|
|
262
|
-
|
|
263
|
-
test("is idempotent — registering the same name twice does not duplicate it", () => {
|
|
264
|
-
const root = join(FIXTURE_ROOT, "register-idempotent");
|
|
265
|
-
const skillsDir = join(root, ".claude", "skills");
|
|
266
|
-
mkdirSync(skillsDir, { recursive: true });
|
|
267
|
-
writeFileSync(
|
|
268
|
-
join(skillsDir, STAGED_SIBLING_MANIFEST),
|
|
269
|
-
`${JSON.stringify(
|
|
270
|
-
{
|
|
271
|
-
created_at: "x",
|
|
272
|
-
staged_under_test: "foo",
|
|
273
|
-
created_entries: [],
|
|
274
|
-
},
|
|
275
|
-
null,
|
|
276
|
-
2,
|
|
277
|
-
)}\n`,
|
|
278
|
-
);
|
|
279
|
-
|
|
280
|
-
registerStagedSkillForCleanup(root, "foo-staged");
|
|
281
|
-
registerStagedSkillForCleanup(root, "foo-staged");
|
|
282
|
-
|
|
283
|
-
const manifest = JSON.parse(
|
|
284
|
-
readFileSync(join(skillsDir, STAGED_SIBLING_MANIFEST), "utf8"),
|
|
285
|
-
) as { created_entries: Array<{ name: string }> };
|
|
286
|
-
expect(
|
|
287
|
-
manifest.created_entries.filter((e) => e.name === "foo-staged").length,
|
|
288
|
-
).toBe(1);
|
|
289
|
-
});
|
|
290
|
-
});
|
|
291
|
-
|
|
292
|
-
describe("cleanupStagedSkills", () => {
|
|
293
|
-
test("removes only directories with the staged-skill prefix under .claude/skills", () => {
|
|
294
|
-
const repoRoot = join(FIXTURE_ROOT, "cleanup");
|
|
295
|
-
const skillsDir = join(repoRoot, ".claude", "skills");
|
|
296
|
-
mkdirSync(skillsDir, { recursive: true });
|
|
297
|
-
|
|
298
|
-
const stagedA = join(skillsDir, `${STAGED_SKILL_PREFIX}1-with_skill__foo`);
|
|
299
|
-
const stagedB = join(skillsDir, `${STAGED_SKILL_PREFIX}1-new_skill__bar`);
|
|
300
|
-
const productionLike = join(skillsDir, "user-custom-skill");
|
|
301
|
-
mkdirSync(stagedA, { recursive: true });
|
|
302
|
-
mkdirSync(stagedB, { recursive: true });
|
|
303
|
-
mkdirSync(productionLike, { recursive: true });
|
|
304
|
-
|
|
305
|
-
cleanupStagedSkills(repoRoot);
|
|
306
|
-
|
|
307
|
-
expect(existsSync(stagedA)).toBe(false);
|
|
308
|
-
expect(existsSync(stagedB)).toBe(false);
|
|
309
|
-
expect(existsSync(productionLike)).toBe(true);
|
|
310
|
-
});
|
|
311
|
-
|
|
312
|
-
test("is a no-op when .claude/skills does not exist", () => {
|
|
313
|
-
const repoRoot = join(FIXTURE_ROOT, "cleanup-empty");
|
|
314
|
-
mkdirSync(repoRoot, { recursive: true });
|
|
315
|
-
expect(() => cleanupStagedSkills(repoRoot)).not.toThrow();
|
|
316
|
-
});
|
|
317
|
-
});
|
|
318
|
-
|
|
319
|
-
describe("stageSiblingSkills", () => {
|
|
320
|
-
function buildSourceSkills(root: string): string {
|
|
321
|
-
const src = join(root, "src-skills");
|
|
322
|
-
mkdirSync(join(src, "alpha", "evals"), { recursive: true });
|
|
323
|
-
writeFileSync(join(src, "alpha", "SKILL.md"), "alpha content");
|
|
324
|
-
writeFileSync(join(src, "alpha", "helper.md"), "alpha helper");
|
|
325
|
-
writeFileSync(join(src, "alpha", "evals", "evals.json"), "{}");
|
|
326
|
-
mkdirSync(join(src, "beta"), { recursive: true });
|
|
327
|
-
writeFileSync(join(src, "beta", "SKILL.md"), "beta content");
|
|
328
|
-
mkdirSync(join(src, "gamma"), { recursive: true });
|
|
329
|
-
writeFileSync(join(src, "gamma", "SKILL.md"), "gamma content");
|
|
330
|
-
return src;
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
test("stages each sibling at .claude/skills/<name>/ with full content minus evals/", () => {
|
|
334
|
-
const root = join(FIXTURE_ROOT, "sibling-basic");
|
|
335
|
-
mkdirSync(root, { recursive: true });
|
|
336
|
-
const src = buildSourceSkills(root);
|
|
337
|
-
|
|
338
|
-
stageSiblingSkills({
|
|
339
|
-
skillUnderTest: "gamma",
|
|
340
|
-
skillsSourceDir: src,
|
|
341
|
-
repoRoot: root,
|
|
342
|
-
});
|
|
343
|
-
|
|
344
|
-
const skillsDir = join(root, ".claude", "skills");
|
|
345
|
-
expect(readFileSync(join(skillsDir, "alpha", "SKILL.md"), "utf8")).toBe(
|
|
346
|
-
"alpha content",
|
|
347
|
-
);
|
|
348
|
-
expect(readFileSync(join(skillsDir, "alpha", "helper.md"), "utf8")).toBe(
|
|
349
|
-
"alpha helper",
|
|
350
|
-
);
|
|
351
|
-
expect(existsSync(join(skillsDir, "alpha", "evals"))).toBe(false);
|
|
352
|
-
expect(readFileSync(join(skillsDir, "beta", "SKILL.md"), "utf8")).toBe(
|
|
353
|
-
"beta content",
|
|
354
|
-
);
|
|
355
|
-
expect(existsSync(join(skillsDir, "gamma"))).toBe(false);
|
|
356
|
-
|
|
357
|
-
const manifestPath = join(skillsDir, STAGED_SIBLING_MANIFEST);
|
|
358
|
-
expect(existsSync(manifestPath)).toBe(true);
|
|
359
|
-
const written = JSON.parse(readFileSync(manifestPath, "utf8")) as {
|
|
360
|
-
created_entries: Array<{ name: string; preexisting: boolean }>;
|
|
361
|
-
};
|
|
362
|
-
expect(written.created_entries.map((e) => e.name).sort()).toEqual([
|
|
363
|
-
"alpha",
|
|
364
|
-
"beta",
|
|
365
|
-
]);
|
|
366
|
-
for (const e of written.created_entries) {
|
|
367
|
-
expect(e.preexisting).toBe(false);
|
|
368
|
-
}
|
|
369
|
-
});
|
|
370
|
-
|
|
371
|
-
test("backs up colliding pre-existing entries and records them in the manifest", () => {
|
|
372
|
-
const root = join(FIXTURE_ROOT, "sibling-collide");
|
|
373
|
-
mkdirSync(root, { recursive: true });
|
|
374
|
-
const src = buildSourceSkills(root);
|
|
375
|
-
|
|
376
|
-
const skillsDir = join(root, ".claude", "skills");
|
|
377
|
-
mkdirSync(join(skillsDir, "alpha"), { recursive: true });
|
|
378
|
-
writeFileSync(join(skillsDir, "alpha", "SKILL.md"), "USER OWNED");
|
|
379
|
-
|
|
380
|
-
stageSiblingSkills({
|
|
381
|
-
skillUnderTest: "gamma",
|
|
382
|
-
skillsSourceDir: src,
|
|
383
|
-
repoRoot: root,
|
|
384
|
-
});
|
|
385
|
-
|
|
386
|
-
expect(readFileSync(join(skillsDir, "alpha", "SKILL.md"), "utf8")).toBe(
|
|
387
|
-
"alpha content",
|
|
388
|
-
);
|
|
389
|
-
const manifest = JSON.parse(
|
|
390
|
-
readFileSync(join(skillsDir, STAGED_SIBLING_MANIFEST), "utf8"),
|
|
391
|
-
) as {
|
|
392
|
-
created_entries: Array<{
|
|
393
|
-
name: string;
|
|
394
|
-
preexisting: boolean;
|
|
395
|
-
backup_path?: string;
|
|
396
|
-
}>;
|
|
397
|
-
};
|
|
398
|
-
const alphaEntry = manifest.created_entries.find((e) => e.name === "alpha");
|
|
399
|
-
expect(alphaEntry).toBeDefined();
|
|
400
|
-
expect(alphaEntry?.preexisting).toBe(true);
|
|
401
|
-
expect(alphaEntry?.backup_path).toBeDefined();
|
|
402
|
-
const backupPath = alphaEntry?.backup_path as string;
|
|
403
|
-
expect(existsSync(backupPath)).toBe(true);
|
|
404
|
-
expect(readFileSync(join(backupPath, "SKILL.md"), "utf8")).toBe(
|
|
405
|
-
"USER OWNED",
|
|
406
|
-
);
|
|
407
|
-
});
|
|
408
|
-
|
|
409
|
-
test("skips the skill-under-test even if it appears in the source skills dir", () => {
|
|
410
|
-
const root = join(FIXTURE_ROOT, "sibling-skip-under-test");
|
|
411
|
-
mkdirSync(root, { recursive: true });
|
|
412
|
-
const src = buildSourceSkills(root);
|
|
413
|
-
|
|
414
|
-
stageSiblingSkills({
|
|
415
|
-
skillUnderTest: "alpha",
|
|
416
|
-
skillsSourceDir: src,
|
|
417
|
-
repoRoot: root,
|
|
418
|
-
});
|
|
419
|
-
|
|
420
|
-
const skillsDir = join(root, ".claude", "skills");
|
|
421
|
-
expect(existsSync(join(skillsDir, "alpha"))).toBe(false);
|
|
422
|
-
expect(existsSync(join(skillsDir, "beta"))).toBe(true);
|
|
423
|
-
expect(existsSync(join(skillsDir, "gamma"))).toBe(true);
|
|
424
|
-
});
|
|
425
|
-
});
|
|
426
|
-
|
|
427
|
-
describe("cleanupStagedSkills (manifest-aware)", () => {
|
|
428
|
-
test("removes manifest-listed sibling entries and restores backed-up pre-existing content", () => {
|
|
429
|
-
const root = join(FIXTURE_ROOT, "cleanup-restore");
|
|
430
|
-
mkdirSync(root, { recursive: true });
|
|
431
|
-
const src = join(root, "src-skills");
|
|
432
|
-
mkdirSync(join(src, "alpha"), { recursive: true });
|
|
433
|
-
writeFileSync(join(src, "alpha", "SKILL.md"), "new alpha");
|
|
434
|
-
mkdirSync(join(src, "beta"), { recursive: true });
|
|
435
|
-
writeFileSync(join(src, "beta", "SKILL.md"), "new beta");
|
|
436
|
-
|
|
437
|
-
const skillsDir = join(root, ".claude", "skills");
|
|
438
|
-
mkdirSync(join(skillsDir, "alpha"), { recursive: true });
|
|
439
|
-
writeFileSync(join(skillsDir, "alpha", "SKILL.md"), "USER ALPHA");
|
|
440
|
-
|
|
441
|
-
stageSiblingSkills({
|
|
442
|
-
skillUnderTest: "x",
|
|
443
|
-
skillsSourceDir: src,
|
|
444
|
-
repoRoot: root,
|
|
445
|
-
});
|
|
446
|
-
expect(readFileSync(join(skillsDir, "alpha", "SKILL.md"), "utf8")).toBe(
|
|
447
|
-
"new alpha",
|
|
448
|
-
);
|
|
449
|
-
expect(readFileSync(join(skillsDir, "beta", "SKILL.md"), "utf8")).toBe(
|
|
450
|
-
"new beta",
|
|
451
|
-
);
|
|
452
|
-
|
|
453
|
-
cleanupStagedSkills(root);
|
|
454
|
-
|
|
455
|
-
expect(readFileSync(join(skillsDir, "alpha", "SKILL.md"), "utf8")).toBe(
|
|
456
|
-
"USER ALPHA",
|
|
457
|
-
);
|
|
458
|
-
expect(existsSync(join(skillsDir, "beta"))).toBe(false);
|
|
459
|
-
expect(existsSync(join(skillsDir, STAGED_SIBLING_MANIFEST))).toBe(false);
|
|
460
|
-
});
|
|
461
|
-
|
|
462
|
-
test("still sweeps prefix-staged entries when no manifest is present", () => {
|
|
463
|
-
const root = join(FIXTURE_ROOT, "cleanup-legacy");
|
|
464
|
-
const skillsDir = join(root, ".claude", "skills");
|
|
465
|
-
mkdirSync(skillsDir, { recursive: true });
|
|
466
|
-
mkdirSync(join(skillsDir, `${STAGED_SKILL_PREFIX}1-with_skill__foo`), {
|
|
467
|
-
recursive: true,
|
|
468
|
-
});
|
|
469
|
-
mkdirSync(join(skillsDir, "user-custom"), { recursive: true });
|
|
470
|
-
|
|
471
|
-
cleanupStagedSkills(root);
|
|
472
|
-
|
|
473
|
-
expect(
|
|
474
|
-
existsSync(join(skillsDir, `${STAGED_SKILL_PREFIX}1-with_skill__foo`)),
|
|
475
|
-
).toBe(false);
|
|
476
|
-
expect(existsSync(join(skillsDir, "user-custom"))).toBe(true);
|
|
477
|
-
});
|
|
478
|
-
});
|
|
479
|
-
|
|
480
|
-
describe("cleanupStagedSkills (runner-created .claude/skills)", () => {
|
|
481
|
-
test("removes the whole .claude/skills tree when the runner created it, and prunes an empty .claude", () => {
|
|
482
|
-
const root = join(FIXTURE_ROOT, "cleanup-created");
|
|
483
|
-
mkdirSync(root, { recursive: true });
|
|
484
|
-
const src = join(root, "src-skills");
|
|
485
|
-
mkdirSync(join(src, "alpha"), { recursive: true });
|
|
486
|
-
writeFileSync(join(src, "alpha", "SKILL.md"), "alpha");
|
|
487
|
-
|
|
488
|
-
// .claude/skills did NOT pre-exist — stageSiblingSkills creates it.
|
|
489
|
-
stageSiblingSkills({
|
|
490
|
-
skillUnderTest: "x",
|
|
491
|
-
skillsSourceDir: src,
|
|
492
|
-
repoRoot: root,
|
|
493
|
-
});
|
|
494
|
-
// A stray, non-prefixed dir a recursive eval might have left behind.
|
|
495
|
-
mkdirSync(join(root, ".claude", "skills", "stray-leftover"), {
|
|
496
|
-
recursive: true,
|
|
497
|
-
});
|
|
498
|
-
|
|
499
|
-
cleanupStagedSkills(root);
|
|
500
|
-
|
|
501
|
-
expect(existsSync(join(root, ".claude", "skills"))).toBe(false);
|
|
502
|
-
// .claude held nothing else, so it is pruned too.
|
|
503
|
-
expect(existsSync(join(root, ".claude"))).toBe(false);
|
|
504
|
-
});
|
|
505
|
-
|
|
506
|
-
test("keeps .claude (and settings.json) when the runner created only skills/", () => {
|
|
507
|
-
const root = join(FIXTURE_ROOT, "cleanup-keep-settings");
|
|
508
|
-
const claudeDir = join(root, ".claude");
|
|
509
|
-
mkdirSync(claudeDir, { recursive: true });
|
|
510
|
-
writeFileSync(join(claudeDir, "settings.json"), "{}");
|
|
511
|
-
const src = join(root, "src-skills");
|
|
512
|
-
mkdirSync(join(src, "alpha"), { recursive: true });
|
|
513
|
-
writeFileSync(join(src, "alpha", "SKILL.md"), "alpha");
|
|
514
|
-
|
|
515
|
-
// .claude exists but .claude/skills does not — runner creates skills/.
|
|
516
|
-
stageSiblingSkills({
|
|
517
|
-
skillUnderTest: "x",
|
|
518
|
-
skillsSourceDir: src,
|
|
519
|
-
repoRoot: root,
|
|
520
|
-
});
|
|
521
|
-
|
|
522
|
-
cleanupStagedSkills(root);
|
|
523
|
-
|
|
524
|
-
expect(existsSync(join(claudeDir, "skills"))).toBe(false);
|
|
525
|
-
expect(existsSync(claudeDir)).toBe(true);
|
|
526
|
-
expect(existsSync(join(claudeDir, "settings.json"))).toBe(true);
|
|
527
|
-
});
|
|
528
|
-
|
|
529
|
-
test("leaves a pre-existing .claude/skills dir in place (surgical restore only)", () => {
|
|
530
|
-
const root = join(FIXTURE_ROOT, "cleanup-preexisting-skillsdir");
|
|
531
|
-
const skillsDir = join(root, ".claude", "skills");
|
|
532
|
-
// The user already had a .claude/skills with their own skill.
|
|
533
|
-
mkdirSync(join(skillsDir, "user-owned"), { recursive: true });
|
|
534
|
-
writeFileSync(join(skillsDir, "user-owned", "SKILL.md"), "USER");
|
|
535
|
-
const src = join(root, "src-skills");
|
|
536
|
-
mkdirSync(join(src, "alpha"), { recursive: true });
|
|
537
|
-
writeFileSync(join(src, "alpha", "SKILL.md"), "alpha");
|
|
538
|
-
|
|
539
|
-
stageSiblingSkills({
|
|
540
|
-
skillUnderTest: "x",
|
|
541
|
-
skillsSourceDir: src,
|
|
542
|
-
repoRoot: root,
|
|
543
|
-
});
|
|
544
|
-
|
|
545
|
-
cleanupStagedSkills(root);
|
|
546
|
-
|
|
547
|
-
expect(existsSync(skillsDir)).toBe(true);
|
|
548
|
-
expect(
|
|
549
|
-
readFileSync(join(skillsDir, "user-owned", "SKILL.md"), "utf8"),
|
|
550
|
-
).toBe("USER");
|
|
551
|
-
expect(existsSync(join(skillsDir, "alpha"))).toBe(false);
|
|
552
|
-
});
|
|
553
|
-
});
|
|
554
|
-
|
|
555
|
-
describe("buildDispatchTask bootstrap injection", () => {
|
|
556
|
-
const baseOpts = {
|
|
557
|
-
evalId: "e1",
|
|
558
|
-
condition: "with_skill",
|
|
559
|
-
skillPath: null,
|
|
560
|
-
stagedSkillSlug: "slow-powers-eval-1-with_skill__foo" as string | null,
|
|
561
|
-
userPrompt: "do the thing",
|
|
562
|
-
fixtures: [] as string[],
|
|
563
|
-
outputsDir: "/tmp/out",
|
|
564
|
-
condDir: "/tmp/cond",
|
|
565
|
-
skillName: "foo",
|
|
566
|
-
availableSkills: [] as {
|
|
567
|
-
name: string;
|
|
568
|
-
path: string;
|
|
569
|
-
description: string;
|
|
570
|
-
}[],
|
|
571
|
-
};
|
|
572
|
-
|
|
573
|
-
test("prepends <session-start-context> for claude-code when bootstrapContent is provided", () => {
|
|
574
|
-
const task = buildDispatchTask({
|
|
575
|
-
...baseOpts,
|
|
576
|
-
bootstrapContent: "BOOT-LOADED",
|
|
577
|
-
});
|
|
578
|
-
expect(task.dispatch_prompt.startsWith("<session-start-context>")).toBe(
|
|
579
|
-
true,
|
|
580
|
-
);
|
|
581
|
-
expect(task.dispatch_prompt).toContain("BOOT-LOADED");
|
|
582
|
-
expect(task.dispatch_prompt).toContain("</session-start-context>");
|
|
583
|
-
});
|
|
584
|
-
|
|
585
|
-
test("omits <session-start-context> when bootstrapContent is null and nothing is staged", () => {
|
|
586
|
-
const task = buildDispatchTask({
|
|
587
|
-
...baseOpts,
|
|
588
|
-
bootstrapContent: null,
|
|
589
|
-
});
|
|
590
|
-
expect(task.dispatch_prompt).not.toContain("<session-start-context>");
|
|
591
|
-
});
|
|
592
|
-
|
|
593
|
-
test("emits a harness-native available-skills block (no <session-start-context>) when bootstrapContent is null", () => {
|
|
594
|
-
const task = buildDispatchTask({
|
|
595
|
-
...baseOpts,
|
|
596
|
-
bootstrapContent: null,
|
|
597
|
-
availableSkills: [
|
|
598
|
-
{ name: "foo", path: "/x/foo/SKILL.md", description: "the foo skill" },
|
|
599
|
-
],
|
|
600
|
-
});
|
|
601
|
-
// Without a bootstrap, there is no SessionStart block — only the skills list.
|
|
602
|
-
expect(task.dispatch_prompt).not.toContain("<session-start-context>");
|
|
603
|
-
expect(task.dispatch_prompt).toContain(
|
|
604
|
-
"The following skills are available for use with the Skill tool:",
|
|
605
|
-
);
|
|
606
|
-
expect(task.dispatch_prompt).toContain("- foo: the foo skill");
|
|
607
|
-
// The eval-flavored wording and custom format are gone.
|
|
608
|
-
expect(task.dispatch_prompt).not.toContain("staged and discoverable");
|
|
609
|
-
expect(task.dispatch_prompt).not.toContain("*Trigger:*");
|
|
610
|
-
// No product framing should appear without a bootstrap file.
|
|
611
|
-
expect(task.dispatch_prompt).not.toContain("loaded at session start");
|
|
612
|
-
});
|
|
613
|
-
|
|
614
|
-
test("renders the available-skills block as its own section, outside <session-start-context>, after the verbatim bootstrap", () => {
|
|
615
|
-
const task = buildDispatchTask({
|
|
616
|
-
...baseOpts,
|
|
617
|
-
bootstrapContent: "BOOT-LOADED",
|
|
618
|
-
availableSkills: [
|
|
619
|
-
{ name: "foo", path: "/x/foo/SKILL.md", description: "the foo skill" },
|
|
620
|
-
],
|
|
621
|
-
});
|
|
622
|
-
const prompt = task.dispatch_prompt;
|
|
623
|
-
// The skills list is a separate block, not bundled inside the SessionStart
|
|
624
|
-
// context (which carries bootstrap content only).
|
|
625
|
-
const sscEnd = prompt.indexOf("</session-start-context>");
|
|
626
|
-
const listIdx = prompt.indexOf(
|
|
627
|
-
"The following skills are available for use with the Skill tool:",
|
|
628
|
-
);
|
|
629
|
-
const bootIdx = prompt.indexOf("BOOT-LOADED");
|
|
630
|
-
expect(sscEnd).toBeGreaterThan(-1);
|
|
631
|
-
expect(bootIdx).toBeGreaterThan(-1);
|
|
632
|
-
expect(bootIdx).toBeLessThan(sscEnd);
|
|
633
|
-
expect(listIdx).toBeGreaterThan(sscEnd);
|
|
634
|
-
});
|
|
635
|
-
|
|
636
|
-
test("sets dispatch_prompt_path to dispatch-prompt.txt under the condition dir", () => {
|
|
637
|
-
const task = buildDispatchTask({
|
|
638
|
-
...baseOpts,
|
|
639
|
-
bootstrapContent: null,
|
|
640
|
-
});
|
|
641
|
-
expect(task.dispatch_prompt_path).toBe("/tmp/cond/dispatch-prompt.txt");
|
|
642
|
-
});
|
|
643
|
-
|
|
644
|
-
const SAMPLE_DIRECTORY = [
|
|
645
|
-
"## Active Skills Directory",
|
|
646
|
-
"",
|
|
647
|
-
"* **`test-driven-development`**",
|
|
648
|
-
" * *Trigger:* Use whenever implementing code.",
|
|
649
|
-
"* **`systematic-debugging`**",
|
|
650
|
-
" * *Trigger:* Use when debugging.",
|
|
651
|
-
].join("\n");
|
|
652
|
-
|
|
653
|
-
test("redactSkillFromBootstrap removes the skill-under-test's directory entry", () => {
|
|
654
|
-
const redacted = redactSkillFromBootstrap(
|
|
655
|
-
SAMPLE_DIRECTORY,
|
|
656
|
-
"test-driven-development",
|
|
657
|
-
);
|
|
658
|
-
expect(redacted).not.toContain("test-driven-development");
|
|
659
|
-
expect(redacted).not.toContain("Use whenever implementing code.");
|
|
660
|
-
// Sibling entries and the heading survive.
|
|
661
|
-
expect(redacted).toContain("systematic-debugging");
|
|
662
|
-
expect(redacted).toContain("Use when debugging.");
|
|
663
|
-
expect(redacted).toContain("## Active Skills Directory");
|
|
664
|
-
});
|
|
665
|
-
|
|
666
|
-
test("redacts the skill-under-test from bootstrap in the skill-absent condition", () => {
|
|
667
|
-
const withoutSkill = buildDispatchTask({
|
|
668
|
-
...baseOpts,
|
|
669
|
-
condition: "without_skill",
|
|
670
|
-
skillPath: null,
|
|
671
|
-
stagedSkillSlug: null,
|
|
672
|
-
skillName: "test-driven-development",
|
|
673
|
-
bootstrapContent: SAMPLE_DIRECTORY,
|
|
674
|
-
});
|
|
675
|
-
expect(withoutSkill.dispatch_prompt).not.toContain(
|
|
676
|
-
"test-driven-development",
|
|
677
|
-
);
|
|
678
|
-
// A sibling skill named in the same bootstrap is untouched.
|
|
679
|
-
expect(withoutSkill.dispatch_prompt).toContain("systematic-debugging");
|
|
680
|
-
|
|
681
|
-
const withSkill = buildDispatchTask({
|
|
682
|
-
...baseOpts,
|
|
683
|
-
condition: "with_skill",
|
|
684
|
-
skillPath: null,
|
|
685
|
-
stagedSkillSlug: "slow-powers-eval-1-with_skill__test-driven-development",
|
|
686
|
-
skillName: "test-driven-development",
|
|
687
|
-
bootstrapContent: SAMPLE_DIRECTORY,
|
|
688
|
-
});
|
|
689
|
-
expect(withSkill.dispatch_prompt).toContain("test-driven-development");
|
|
690
|
-
});
|
|
691
|
-
|
|
692
|
-
test("names the staged slug for disambiguation without instructing invocation", () => {
|
|
693
|
-
const task = buildDispatchTask({
|
|
694
|
-
...baseOpts,
|
|
695
|
-
bootstrapContent: "BOOT-LOADED",
|
|
696
|
-
});
|
|
697
|
-
// The slug is still surfaced so a deliberate invocation targets the staged
|
|
698
|
-
// version and the meta-check can find it — but we no longer assert a plugin
|
|
699
|
-
// is "loaded" or tell the agent to prefer the slug over the bare name, which
|
|
700
|
-
// invited it to hunt for a global copy (issue #144 global-plugin leakage).
|
|
701
|
-
expect(task.dispatch_prompt).toContain(
|
|
702
|
-
"slow-powers-eval-1-with_skill__foo",
|
|
703
|
-
);
|
|
704
|
-
// ...but the over-promoting invoke imperative (issue #119) is gone, so
|
|
705
|
-
// invocation reflects the skill's own triggering rather than an order.
|
|
706
|
-
expect(task.dispatch_prompt).not.toContain("invoke that slug");
|
|
707
|
-
expect(task.dispatch_prompt).not.toContain("if the skill applies");
|
|
708
|
-
expect(task.dispatch_prompt).not.toContain("under evaluation");
|
|
709
|
-
// ...and the leakage-inviting framing is gone (issue #144): no claim that a
|
|
710
|
-
// plugin is loaded, no "use the slug rather than the bare name" contrast.
|
|
711
|
-
expect(task.dispatch_prompt).not.toContain("plugin loaded");
|
|
712
|
-
expect(task.dispatch_prompt).not.toContain("rather than the bare name");
|
|
713
|
-
});
|
|
714
|
-
|
|
715
|
-
test("without-skill condition under realistic env carries no eval-announcing skill commentary", () => {
|
|
716
|
-
const task = buildDispatchTask({
|
|
717
|
-
...baseOpts,
|
|
718
|
-
skillPath: null,
|
|
719
|
-
stagedSkillSlug: null,
|
|
720
|
-
bootstrapContent: "BOOT-LOADED",
|
|
721
|
-
});
|
|
722
|
-
// The arm stays silent about the absent skill: the available-skills block
|
|
723
|
-
// already omits it, so nothing announces that this is an eval control arm.
|
|
724
|
-
expect(task.dispatch_prompt).not.toContain("No skill is loaded");
|
|
725
|
-
expect(task.dispatch_prompt.toLowerCase()).not.toContain("not available");
|
|
726
|
-
expect(task.dispatch_prompt).not.toContain("under evaluation");
|
|
727
|
-
});
|
|
728
|
-
|
|
729
|
-
test("without-skill condition without bootstrap (e.g. --no-stage) keeps the legacy 'No skill is loaded' wording", () => {
|
|
730
|
-
const task = buildDispatchTask({
|
|
731
|
-
...baseOpts,
|
|
732
|
-
skillPath: null,
|
|
733
|
-
stagedSkillSlug: null,
|
|
734
|
-
bootstrapContent: null,
|
|
735
|
-
});
|
|
736
|
-
expect(task.dispatch_prompt).toContain("No skill is loaded");
|
|
737
|
-
});
|
|
738
|
-
});
|
|
739
|
-
|
|
740
|
-
describe("buildDispatchTask plan-mode injection", () => {
|
|
741
|
-
const baseOpts = {
|
|
742
|
-
evalId: "e1",
|
|
743
|
-
condition: "with_skill",
|
|
744
|
-
skillPath: null,
|
|
745
|
-
stagedSkillSlug: "slow-powers-eval-1-with_skill__foo" as string | null,
|
|
746
|
-
userPrompt: "BUILD-THE-TODO-APP",
|
|
747
|
-
fixtures: [] as string[],
|
|
748
|
-
outputsDir: "/tmp/out",
|
|
749
|
-
condDir: "/tmp/cond",
|
|
750
|
-
skillName: "foo",
|
|
751
|
-
bootstrapContent: null as string | null,
|
|
752
|
-
availableSkills: [
|
|
753
|
-
{ name: "foo", path: "/x/foo/SKILL.md", description: "the foo skill" },
|
|
754
|
-
] as { name: string; path: string; description: string }[],
|
|
755
|
-
};
|
|
756
|
-
|
|
757
|
-
test("omits the plan-mode block when planModeContent is null/absent", () => {
|
|
758
|
-
const task = buildDispatchTask({ ...baseOpts });
|
|
759
|
-
expect(task.dispatch_prompt).not.toContain("<system-reminder>");
|
|
760
|
-
const withNull = buildDispatchTask({ ...baseOpts, planModeContent: null });
|
|
761
|
-
expect(withNull.dispatch_prompt).not.toContain("<system-reminder>");
|
|
762
|
-
});
|
|
763
|
-
|
|
764
|
-
test("injects the rendered plan-mode block when planModeContent is provided", () => {
|
|
765
|
-
const task = buildDispatchTask({
|
|
766
|
-
...baseOpts,
|
|
767
|
-
planModeContent: "Plan mode is active. PLAN-RAIL-MARKER.",
|
|
768
|
-
});
|
|
769
|
-
expect(task.dispatch_prompt).toContain("<system-reminder>");
|
|
770
|
-
expect(task.dispatch_prompt).toContain("PLAN-RAIL-MARKER.");
|
|
771
|
-
expect(task.dispatch_prompt).toContain("</system-reminder>");
|
|
772
|
-
});
|
|
773
|
-
|
|
774
|
-
test("places the plan-mode block after the available-skills block and before the user request", () => {
|
|
775
|
-
const prompt = buildDispatchTask({
|
|
776
|
-
...baseOpts,
|
|
777
|
-
planModeContent: "PLAN-RAIL-MARKER",
|
|
778
|
-
}).dispatch_prompt;
|
|
779
|
-
const skillsIdx = prompt.indexOf(
|
|
780
|
-
"The following skills are available for use with the Skill tool:",
|
|
781
|
-
);
|
|
782
|
-
const planIdx = prompt.indexOf("<system-reminder>");
|
|
783
|
-
const promptIdx = prompt.indexOf("BUILD-THE-TODO-APP");
|
|
784
|
-
expect(skillsIdx).toBeGreaterThan(-1);
|
|
785
|
-
expect(planIdx).toBeGreaterThan(skillsIdx);
|
|
786
|
-
expect(promptIdx).toBeGreaterThan(planIdx);
|
|
787
|
-
});
|
|
788
|
-
|
|
789
|
-
test("injects an identical plan-mode block in the with- and without-skill arms", () => {
|
|
790
|
-
const planModeContent = "Plan mode is active. PLAN-RAIL-MARKER.";
|
|
791
|
-
const rendered =
|
|
792
|
-
"<system-reminder>\nPlan mode is active. PLAN-RAIL-MARKER.\n</system-reminder>";
|
|
793
|
-
const withSkill = buildDispatchTask({
|
|
794
|
-
...baseOpts,
|
|
795
|
-
condition: "with_skill",
|
|
796
|
-
stagedSkillSlug: "slow-powers-eval-1-with_skill__foo",
|
|
797
|
-
planModeContent,
|
|
798
|
-
});
|
|
799
|
-
const withoutSkill = buildDispatchTask({
|
|
800
|
-
...baseOpts,
|
|
801
|
-
condition: "without_skill",
|
|
802
|
-
skillPath: null,
|
|
803
|
-
stagedSkillSlug: null,
|
|
804
|
-
availableSkills: [],
|
|
805
|
-
planModeContent,
|
|
806
|
-
});
|
|
807
|
-
expect(withSkill.dispatch_prompt).toContain(rendered);
|
|
808
|
-
expect(withoutSkill.dispatch_prompt).toContain(rendered);
|
|
809
|
-
});
|
|
810
|
-
});
|
|
811
|
-
|
|
812
|
-
describe("run.ts user-mode end-to-end (--skill-dir, isolated CWD)", () => {
|
|
813
|
-
const RUN_TS = join(import.meta.dir, "run.ts");
|
|
814
|
-
|
|
815
|
-
function setup(
|
|
816
|
-
name: string,
|
|
817
|
-
evals: Eval[] = [
|
|
818
|
-
{ id: "e1", prompt: "review this MR", expected_output: "a review" },
|
|
819
|
-
],
|
|
820
|
-
): { skillDir: string; cwd: string } {
|
|
821
|
-
const root = join(FIXTURE_ROOT, name);
|
|
822
|
-
const skillDir = join(root, "skill-dir");
|
|
823
|
-
const skillSub = join(skillDir, "mr-review");
|
|
824
|
-
mkdirSync(join(skillSub, "evals"), { recursive: true });
|
|
825
|
-
writeFileSync(
|
|
826
|
-
join(skillSub, "SKILL.md"),
|
|
827
|
-
"---\nname: mr-review\ndescription: review merge requests\n---\n\nbody\n",
|
|
828
|
-
);
|
|
829
|
-
writeFileSync(
|
|
830
|
-
join(skillSub, "evals", "evals.json"),
|
|
831
|
-
JSON.stringify({ skill_name: "mr-review", evals }),
|
|
832
|
-
);
|
|
833
|
-
const cwd = join(root, "work");
|
|
834
|
-
mkdirSync(cwd, { recursive: true });
|
|
835
|
-
return { skillDir, cwd };
|
|
836
|
-
}
|
|
837
|
-
|
|
838
|
-
function runCli(args: string[], cwd: string) {
|
|
839
|
-
return Bun.spawnSync(["bun", "run", RUN_TS, ...args], {
|
|
840
|
-
cwd,
|
|
841
|
-
stdout: "pipe",
|
|
842
|
-
stderr: "pipe",
|
|
843
|
-
});
|
|
844
|
-
}
|
|
845
|
-
|
|
846
|
-
test("stages only the skill-under-test and writes workspace under CWD", () => {
|
|
847
|
-
const { skillDir, cwd } = setup("usermode-basic");
|
|
848
|
-
const res = runCli(
|
|
849
|
-
[
|
|
850
|
-
"--skill-dir",
|
|
851
|
-
skillDir,
|
|
852
|
-
"--skill",
|
|
853
|
-
"mr-review",
|
|
854
|
-
"--mode",
|
|
855
|
-
"new-skill",
|
|
856
|
-
"--dry-run",
|
|
857
|
-
],
|
|
858
|
-
cwd,
|
|
859
|
-
);
|
|
860
|
-
expect(res.exitCode).toBe(0);
|
|
861
|
-
|
|
862
|
-
const dispatchJson = join(
|
|
863
|
-
cwd,
|
|
864
|
-
"skills-workspace",
|
|
865
|
-
"mr-review",
|
|
866
|
-
"iteration-1",
|
|
867
|
-
"dispatch.json",
|
|
868
|
-
);
|
|
869
|
-
expect(existsSync(dispatchJson)).toBe(true);
|
|
870
|
-
|
|
871
|
-
const stagedSkillsDir = join(cwd, ".claude", "skills");
|
|
872
|
-
const entries = readdirSync(stagedSkillsDir).filter(
|
|
873
|
-
(e) => e !== STAGED_SIBLING_MANIFEST,
|
|
874
|
-
);
|
|
875
|
-
expect(entries).toEqual(["slow-powers-eval-1-with_skill__mr-review"]);
|
|
876
|
-
});
|
|
877
|
-
|
|
878
|
-
test("--plan-mode injects the resolved profile into every dispatch and records plan_mode in dispatch.json", () => {
|
|
879
|
-
const { skillDir, cwd } = setup("usermode-plan-mode");
|
|
880
|
-
const res = runCli(
|
|
881
|
-
[
|
|
882
|
-
"--skill-dir",
|
|
883
|
-
skillDir,
|
|
884
|
-
"--skill",
|
|
885
|
-
"mr-review",
|
|
886
|
-
"--mode",
|
|
887
|
-
"new-skill",
|
|
888
|
-
"--plan-mode",
|
|
889
|
-
"--dry-run",
|
|
890
|
-
],
|
|
891
|
-
cwd,
|
|
892
|
-
);
|
|
893
|
-
expect(res.exitCode).toBe(0);
|
|
894
|
-
|
|
895
|
-
const iterationDir = join(
|
|
896
|
-
cwd,
|
|
897
|
-
"skills-workspace",
|
|
898
|
-
"mr-review",
|
|
899
|
-
"iteration-1",
|
|
900
|
-
);
|
|
901
|
-
const dispatch = JSON.parse(
|
|
902
|
-
readFileSync(join(iterationDir, "dispatch.json"), "utf8"),
|
|
903
|
-
) as {
|
|
904
|
-
plan_mode: boolean;
|
|
905
|
-
tasks: Array<{ condition: string; dispatch_prompt_path: string }>;
|
|
906
|
-
};
|
|
907
|
-
expect(dispatch.plan_mode).toBe(true);
|
|
908
|
-
|
|
909
|
-
// Both arms carry the same harness-injected plan-mode operating context.
|
|
910
|
-
for (const t of dispatch.tasks) {
|
|
911
|
-
const prompt = readFileSync(t.dispatch_prompt_path, "utf8");
|
|
912
|
-
expect(prompt).toContain("<system-reminder>");
|
|
913
|
-
expect(prompt).toContain("Plan mode is active");
|
|
914
|
-
expect(prompt).toContain("ExitPlanMode");
|
|
915
|
-
}
|
|
916
|
-
});
|
|
917
|
-
|
|
918
|
-
test("without --plan-mode, dispatch.json records plan_mode:false and no plan-mode block is injected", () => {
|
|
919
|
-
const { skillDir, cwd } = setup("usermode-no-plan-mode");
|
|
920
|
-
const res = runCli(
|
|
921
|
-
[
|
|
922
|
-
"--skill-dir",
|
|
923
|
-
skillDir,
|
|
924
|
-
"--skill",
|
|
925
|
-
"mr-review",
|
|
926
|
-
"--mode",
|
|
927
|
-
"new-skill",
|
|
928
|
-
"--dry-run",
|
|
929
|
-
],
|
|
930
|
-
cwd,
|
|
931
|
-
);
|
|
932
|
-
expect(res.exitCode).toBe(0);
|
|
933
|
-
|
|
934
|
-
const iterationDir = join(
|
|
935
|
-
cwd,
|
|
936
|
-
"skills-workspace",
|
|
937
|
-
"mr-review",
|
|
938
|
-
"iteration-1",
|
|
939
|
-
);
|
|
940
|
-
const dispatch = JSON.parse(
|
|
941
|
-
readFileSync(join(iterationDir, "dispatch.json"), "utf8"),
|
|
942
|
-
) as {
|
|
943
|
-
plan_mode: boolean;
|
|
944
|
-
tasks: Array<{ dispatch_prompt_path: string }>;
|
|
945
|
-
};
|
|
946
|
-
expect(dispatch.plan_mode).toBe(false);
|
|
947
|
-
for (const t of dispatch.tasks) {
|
|
948
|
-
const prompt = readFileSync(t.dispatch_prompt_path, "utf8");
|
|
949
|
-
expect(prompt).not.toContain("<system-reminder>");
|
|
950
|
-
}
|
|
951
|
-
});
|
|
952
|
-
|
|
953
|
-
test("--stage-name stages the SUT under the verbatim name, threads it everywhere, and registers it for cleanup", () => {
|
|
954
|
-
const { skillDir, cwd } = setup("usermode-stage-name");
|
|
955
|
-
const res = runCli(
|
|
956
|
-
[
|
|
957
|
-
"--skill-dir",
|
|
958
|
-
skillDir,
|
|
959
|
-
"--skill",
|
|
960
|
-
"mr-review",
|
|
961
|
-
"--mode",
|
|
962
|
-
"new-skill",
|
|
963
|
-
"--stage-name",
|
|
964
|
-
"mr-review",
|
|
965
|
-
"--dry-run",
|
|
966
|
-
],
|
|
967
|
-
cwd,
|
|
968
|
-
);
|
|
969
|
-
expect(res.exitCode).toBe(0);
|
|
970
|
-
|
|
971
|
-
// Staged dir is the natural name, not the conspicuous eval slug.
|
|
972
|
-
const stagedSkillsDir = join(cwd, ".claude", "skills");
|
|
973
|
-
const entries = readdirSync(stagedSkillsDir).filter(
|
|
974
|
-
(e) => e !== STAGED_SIBLING_MANIFEST,
|
|
975
|
-
);
|
|
976
|
-
expect(entries).toEqual(["mr-review"]);
|
|
977
|
-
|
|
978
|
-
const iterationDir = join(
|
|
979
|
-
cwd,
|
|
980
|
-
"skills-workspace",
|
|
981
|
-
"mr-review",
|
|
982
|
-
"iteration-1",
|
|
983
|
-
);
|
|
984
|
-
|
|
985
|
-
// conditions.json carries the natural slug — the grader meta-check reads it.
|
|
986
|
-
const conditions = JSON.parse(
|
|
987
|
-
readFileSync(join(iterationDir, "conditions.json"), "utf8"),
|
|
988
|
-
) as {
|
|
989
|
-
conditions: Array<{ name: string; staged_skill_slug: string | null }>;
|
|
990
|
-
};
|
|
991
|
-
const withSkill = conditions.conditions.find(
|
|
992
|
-
(c) => c.name === "with_skill",
|
|
993
|
-
);
|
|
994
|
-
expect(withSkill?.staged_skill_slug).toBe("mr-review");
|
|
995
|
-
|
|
996
|
-
// The custom dir is registered for cleanup (prefix scan won't catch it).
|
|
997
|
-
const manifest = JSON.parse(
|
|
998
|
-
readFileSync(join(stagedSkillsDir, STAGED_SIBLING_MANIFEST), "utf8"),
|
|
999
|
-
) as { created_entries: Array<{ name: string }> };
|
|
1000
|
-
expect(manifest.created_entries.map((e) => e.name)).toContain("mr-review");
|
|
1001
|
-
|
|
1002
|
-
// The dispatch prompt disambiguates to the natural identifier, not the slug.
|
|
1003
|
-
const dispatch = JSON.parse(
|
|
1004
|
-
readFileSync(join(iterationDir, "dispatch.json"), "utf8"),
|
|
1005
|
-
) as {
|
|
1006
|
-
tasks: Array<{ condition: string; dispatch_prompt_path: string }>;
|
|
1007
|
-
};
|
|
1008
|
-
const task = dispatch.tasks.find((t) => t.condition === "with_skill");
|
|
1009
|
-
const prompt = readFileSync(task?.dispatch_prompt_path ?? "", "utf8");
|
|
1010
|
-
expect(prompt).toContain("registered under the identifier `mr-review`");
|
|
1011
|
-
expect(prompt).not.toContain("slow-powers-eval-");
|
|
1012
|
-
});
|
|
1013
|
-
|
|
1014
|
-
test("--stage-name refuses to clobber a pre-existing same-named dir", () => {
|
|
1015
|
-
const { skillDir, cwd } = setup("usermode-stage-name-clobber");
|
|
1016
|
-
const preexisting = join(cwd, ".claude", "skills", "my-real-skill");
|
|
1017
|
-
mkdirSync(preexisting, { recursive: true });
|
|
1018
|
-
writeFileSync(join(preexisting, "SKILL.md"), "USER OWNED");
|
|
1019
|
-
|
|
1020
|
-
const res = runCli(
|
|
1021
|
-
[
|
|
1022
|
-
"--skill-dir",
|
|
1023
|
-
skillDir,
|
|
1024
|
-
"--skill",
|
|
1025
|
-
"mr-review",
|
|
1026
|
-
"--mode",
|
|
1027
|
-
"new-skill",
|
|
1028
|
-
"--stage-name",
|
|
1029
|
-
"my-real-skill",
|
|
1030
|
-
"--dry-run",
|
|
1031
|
-
],
|
|
1032
|
-
cwd,
|
|
1033
|
-
);
|
|
1034
|
-
expect(res.exitCode).not.toBe(0);
|
|
1035
|
-
expect(readFileSync(join(preexisting, "SKILL.md"), "utf8")).toBe(
|
|
1036
|
-
"USER OWNED",
|
|
1037
|
-
);
|
|
1038
|
-
});
|
|
1039
|
-
|
|
1040
|
-
test("dispatch prompt lists only the skill-under-test, no other skills, and no product framing without --bootstrap", () => {
|
|
1041
|
-
const { skillDir, cwd } = setup("usermode-prompt");
|
|
1042
|
-
const res = runCli(
|
|
1043
|
-
[
|
|
1044
|
-
"--skill-dir",
|
|
1045
|
-
skillDir,
|
|
1046
|
-
"--skill",
|
|
1047
|
-
"mr-review",
|
|
1048
|
-
"--mode",
|
|
1049
|
-
"new-skill",
|
|
1050
|
-
"--dry-run",
|
|
1051
|
-
],
|
|
1052
|
-
cwd,
|
|
1053
|
-
);
|
|
1054
|
-
expect(res.exitCode).toBe(0);
|
|
1055
|
-
|
|
1056
|
-
const dispatch = JSON.parse(
|
|
1057
|
-
readFileSync(
|
|
1058
|
-
join(
|
|
1059
|
-
cwd,
|
|
1060
|
-
"skills-workspace",
|
|
1061
|
-
"mr-review",
|
|
1062
|
-
"iteration-1",
|
|
1063
|
-
"dispatch.json",
|
|
1064
|
-
),
|
|
1065
|
-
"utf8",
|
|
1066
|
-
),
|
|
1067
|
-
) as {
|
|
1068
|
-
tasks: Array<{
|
|
1069
|
-
condition: string;
|
|
1070
|
-
dispatch_prompt?: string;
|
|
1071
|
-
dispatch_prompt_path: string;
|
|
1072
|
-
}>;
|
|
1073
|
-
};
|
|
1074
|
-
|
|
1075
|
-
const withSkill = dispatch.tasks.find((t) => t.condition === "with_skill");
|
|
1076
|
-
expect(withSkill).toBeDefined();
|
|
1077
|
-
// The full prompt is no longer inlined in dispatch.json — it lives in a file.
|
|
1078
|
-
expect(withSkill?.dispatch_prompt).toBeUndefined();
|
|
1079
|
-
const prompt = readFileSync(withSkill?.dispatch_prompt_path ?? "", "utf8");
|
|
1080
|
-
expect(prompt).toContain(
|
|
1081
|
-
"The following skills are available for use with the Skill tool:",
|
|
1082
|
-
);
|
|
1083
|
-
expect(prompt).toContain("- mr-review:");
|
|
1084
|
-
expect(prompt).not.toContain("test-driven-development");
|
|
1085
|
-
expect(prompt).not.toContain("writing-skills");
|
|
1086
|
-
// No product framing (EXTREMELY-IMPORTANT etc.) without a --bootstrap file.
|
|
1087
|
-
expect(prompt).not.toContain("EXTREMELY-IMPORTANT");
|
|
1088
|
-
expect(prompt).not.toContain("loaded at session start");
|
|
1089
|
-
});
|
|
1090
|
-
|
|
1091
|
-
test("writes each dispatch prompt to a file and drops the inline prompt from dispatch.json", () => {
|
|
1092
|
-
const { skillDir, cwd } = setup("usermode-prompt-file");
|
|
1093
|
-
const res = runCli(
|
|
1094
|
-
[
|
|
1095
|
-
"--skill-dir",
|
|
1096
|
-
skillDir,
|
|
1097
|
-
"--skill",
|
|
1098
|
-
"mr-review",
|
|
1099
|
-
"--mode",
|
|
1100
|
-
"new-skill",
|
|
1101
|
-
"--dry-run",
|
|
1102
|
-
],
|
|
1103
|
-
cwd,
|
|
1104
|
-
);
|
|
1105
|
-
expect(res.exitCode).toBe(0);
|
|
1106
|
-
|
|
1107
|
-
const dispatch = JSON.parse(
|
|
1108
|
-
readFileSync(
|
|
1109
|
-
join(
|
|
1110
|
-
cwd,
|
|
1111
|
-
"skills-workspace",
|
|
1112
|
-
"mr-review",
|
|
1113
|
-
"iteration-1",
|
|
1114
|
-
"dispatch.json",
|
|
1115
|
-
),
|
|
1116
|
-
"utf8",
|
|
1117
|
-
),
|
|
1118
|
-
) as {
|
|
1119
|
-
tasks: Array<{ dispatch_prompt?: string; dispatch_prompt_path: string }>;
|
|
1120
|
-
};
|
|
1121
|
-
|
|
1122
|
-
expect(dispatch.tasks.length).toBeGreaterThan(0);
|
|
1123
|
-
for (const t of dispatch.tasks) {
|
|
1124
|
-
// Nothing inlined; everything goes through the file pointer.
|
|
1125
|
-
expect(t.dispatch_prompt).toBeUndefined();
|
|
1126
|
-
expect(t.dispatch_prompt_path.endsWith("dispatch-prompt.txt")).toBe(true);
|
|
1127
|
-
expect(existsSync(t.dispatch_prompt_path)).toBe(true);
|
|
1128
|
-
const contents = readFileSync(t.dispatch_prompt_path, "utf8");
|
|
1129
|
-
expect(contents.length).toBeGreaterThan(0);
|
|
1130
|
-
expect(contents).toContain("User request:");
|
|
1131
|
-
}
|
|
1132
|
-
});
|
|
1133
|
-
|
|
1134
|
-
test("--guard installs a PreToolUse hook; teardown-guard removes it", () => {
|
|
1135
|
-
const { skillDir, cwd } = setup("usermode-guard");
|
|
1136
|
-
const settingsPath = join(cwd, ".claude", "settings.local.json");
|
|
1137
|
-
|
|
1138
|
-
const res = runCli(
|
|
1139
|
-
[
|
|
1140
|
-
"--skill-dir",
|
|
1141
|
-
skillDir,
|
|
1142
|
-
"--skill",
|
|
1143
|
-
"mr-review",
|
|
1144
|
-
"--mode",
|
|
1145
|
-
"new-skill",
|
|
1146
|
-
"--guard",
|
|
1147
|
-
],
|
|
1148
|
-
cwd,
|
|
1149
|
-
);
|
|
1150
|
-
expect(res.exitCode).toBe(0);
|
|
1151
|
-
expect(existsSync(settingsPath)).toBe(true);
|
|
1152
|
-
const settings = JSON.parse(readFileSync(settingsPath, "utf8"));
|
|
1153
|
-
expect(settings.hooks.PreToolUse[0].matcher).toContain("Write");
|
|
1154
|
-
|
|
1155
|
-
const down = runCli(
|
|
1156
|
-
["teardown-guard", "--skill-dir", skillDir, "--skill", "mr-review"],
|
|
1157
|
-
cwd,
|
|
1158
|
-
);
|
|
1159
|
-
expect(down.exitCode).toBe(0);
|
|
1160
|
-
expect(existsSync(settingsPath)).toBe(false);
|
|
1161
|
-
});
|
|
1162
|
-
|
|
1163
|
-
test("teardown removes the guard AND the staged skill set the runner created", () => {
|
|
1164
|
-
const { skillDir, cwd } = setup("usermode-teardown");
|
|
1165
|
-
const settingsPath = join(cwd, ".claude", "settings.local.json");
|
|
1166
|
-
const stagedSkillsDir = join(cwd, ".claude", "skills");
|
|
1167
|
-
|
|
1168
|
-
const res = runCli(
|
|
1169
|
-
[
|
|
1170
|
-
"--skill-dir",
|
|
1171
|
-
skillDir,
|
|
1172
|
-
"--skill",
|
|
1173
|
-
"mr-review",
|
|
1174
|
-
"--mode",
|
|
1175
|
-
"new-skill",
|
|
1176
|
-
"--guard",
|
|
1177
|
-
],
|
|
1178
|
-
cwd,
|
|
1179
|
-
);
|
|
1180
|
-
expect(res.exitCode).toBe(0);
|
|
1181
|
-
expect(existsSync(settingsPath)).toBe(true);
|
|
1182
|
-
expect(existsSync(stagedSkillsDir)).toBe(true);
|
|
1183
|
-
|
|
1184
|
-
const down = runCli(
|
|
1185
|
-
["teardown", "--skill-dir", skillDir, "--skill", "mr-review"],
|
|
1186
|
-
cwd,
|
|
1187
|
-
);
|
|
1188
|
-
expect(down.exitCode).toBe(0);
|
|
1189
|
-
// Guard gone, staged skills gone, and the .claude scaffolding the runner
|
|
1190
|
-
// created in this throwaway cwd (no settings.json) is pruned entirely.
|
|
1191
|
-
expect(existsSync(settingsPath)).toBe(false);
|
|
1192
|
-
expect(existsSync(stagedSkillsDir)).toBe(false);
|
|
1193
|
-
expect(existsSync(join(cwd, ".claude"))).toBe(false);
|
|
1194
|
-
// The run only produced scaffolding (no results), so teardown reclaims the
|
|
1195
|
-
// workspace too — a completed run leaves nothing uncommitted behind.
|
|
1196
|
-
expect(existsSync(join(cwd, "skills-workspace"))).toBe(false);
|
|
1197
|
-
});
|
|
1198
|
-
|
|
1199
|
-
test("teardown preserves an iteration with uncommitted results and warns", () => {
|
|
1200
|
-
const { skillDir, cwd } = setup("usermode-teardown-keep");
|
|
1201
|
-
|
|
1202
|
-
const res = runCli(
|
|
1203
|
-
["--skill-dir", skillDir, "--skill", "mr-review", "--mode", "new-skill"],
|
|
1204
|
-
cwd,
|
|
1205
|
-
);
|
|
1206
|
-
expect(res.exitCode).toBe(0);
|
|
1207
|
-
|
|
1208
|
-
// Simulate a graded-but-not-promoted run: drop an aggregate into the
|
|
1209
|
-
// iteration the runner just created.
|
|
1210
|
-
const iterationDir = join(
|
|
1211
|
-
cwd,
|
|
1212
|
-
"skills-workspace",
|
|
1213
|
-
"mr-review",
|
|
1214
|
-
"iteration-1",
|
|
1215
|
-
);
|
|
1216
|
-
writeFileSync(
|
|
1217
|
-
join(iterationDir, "benchmark.json"),
|
|
1218
|
-
`${JSON.stringify({ delta: { pass_rate: 0.4 } })}\n`,
|
|
1219
|
-
);
|
|
1220
|
-
|
|
1221
|
-
const down = runCli(
|
|
1222
|
-
["teardown", "--skill-dir", skillDir, "--skill", "mr-review"],
|
|
1223
|
-
cwd,
|
|
1224
|
-
);
|
|
1225
|
-
expect(down.exitCode).toBe(0);
|
|
1226
|
-
|
|
1227
|
-
// Uncommitted results are preserved, and the user is told how to commit.
|
|
1228
|
-
expect(existsSync(iterationDir)).toBe(true);
|
|
1229
|
-
const out =
|
|
1230
|
-
new TextDecoder().decode(down.stdout) +
|
|
1231
|
-
new TextDecoder().decode(down.stderr);
|
|
1232
|
-
expect(out).toContain("iteration-1");
|
|
1233
|
-
expect(out).toContain("promote-baseline");
|
|
1234
|
-
});
|
|
1235
|
-
|
|
1236
|
-
test("a normal run does not install a guard", () => {
|
|
1237
|
-
const { skillDir, cwd } = setup("usermode-noguard");
|
|
1238
|
-
const res = runCli(
|
|
1239
|
-
[
|
|
1240
|
-
"--skill-dir",
|
|
1241
|
-
skillDir,
|
|
1242
|
-
"--skill",
|
|
1243
|
-
"mr-review",
|
|
1244
|
-
"--mode",
|
|
1245
|
-
"new-skill",
|
|
1246
|
-
"--dry-run",
|
|
1247
|
-
],
|
|
1248
|
-
cwd,
|
|
1249
|
-
);
|
|
1250
|
-
expect(res.exitCode).toBe(0);
|
|
1251
|
-
expect(existsSync(join(cwd, ".claude", "settings.local.json"))).toBe(false);
|
|
1252
|
-
});
|
|
1253
|
-
|
|
1254
|
-
test("namespaces agent_description per iteration+run and records run_nonce", () => {
|
|
1255
|
-
const { skillDir, cwd } = setup("usermode-nonce");
|
|
1256
|
-
const res = runCli(
|
|
1257
|
-
[
|
|
1258
|
-
"--skill-dir",
|
|
1259
|
-
skillDir,
|
|
1260
|
-
"--skill",
|
|
1261
|
-
"mr-review",
|
|
1262
|
-
"--mode",
|
|
1263
|
-
"new-skill",
|
|
1264
|
-
"--dry-run",
|
|
1265
|
-
],
|
|
1266
|
-
cwd,
|
|
1267
|
-
);
|
|
1268
|
-
expect(res.exitCode).toBe(0);
|
|
1269
|
-
|
|
1270
|
-
const iterationDir = join(
|
|
1271
|
-
cwd,
|
|
1272
|
-
"skills-workspace",
|
|
1273
|
-
"mr-review",
|
|
1274
|
-
"iteration-1",
|
|
1275
|
-
);
|
|
1276
|
-
const dispatch = JSON.parse(
|
|
1277
|
-
readFileSync(join(iterationDir, "dispatch.json"), "utf8"),
|
|
1278
|
-
) as {
|
|
1279
|
-
run_nonce: string;
|
|
1280
|
-
tasks: Array<{ condition: string; agent_description: string }>;
|
|
1281
|
-
};
|
|
1282
|
-
expect(typeof dispatch.run_nonce).toBe("string");
|
|
1283
|
-
expect(dispatch.run_nonce.length).toBeGreaterThan(0);
|
|
1284
|
-
|
|
1285
|
-
for (const t of dispatch.tasks) {
|
|
1286
|
-
// <eval_id>:<condition>:i<iteration>-<nonce> — unique across iterations
|
|
1287
|
-
// and re-runs so fill-transcripts can't cross-match a colliding agent.
|
|
1288
|
-
expect(t.agent_description).toMatch(
|
|
1289
|
-
new RegExp(`:${t.condition}:i1-${dispatch.run_nonce}$`),
|
|
1290
|
-
);
|
|
1291
|
-
}
|
|
1292
|
-
|
|
1293
|
-
const conditions = JSON.parse(
|
|
1294
|
-
readFileSync(join(iterationDir, "conditions.json"), "utf8"),
|
|
1295
|
-
) as { run_nonce?: string };
|
|
1296
|
-
expect(conditions.run_nonce).toBe(dispatch.run_nonce);
|
|
1297
|
-
});
|
|
1298
|
-
|
|
1299
|
-
test("--bootstrap content is prepended verbatim before the available-skills block", () => {
|
|
1300
|
-
const { skillDir, cwd } = setup("usermode-bootstrap");
|
|
1301
|
-
const bootstrapPath = join(cwd, "my-bootstrap.md");
|
|
1302
|
-
writeFileSync(bootstrapPath, "MY CUSTOM EVAL FRAMING");
|
|
1303
|
-
const res = runCli(
|
|
1304
|
-
[
|
|
1305
|
-
"--skill-dir",
|
|
1306
|
-
skillDir,
|
|
1307
|
-
"--skill",
|
|
1308
|
-
"mr-review",
|
|
1309
|
-
"--mode",
|
|
1310
|
-
"new-skill",
|
|
1311
|
-
"--bootstrap",
|
|
1312
|
-
bootstrapPath,
|
|
1313
|
-
"--dry-run",
|
|
1314
|
-
],
|
|
1315
|
-
cwd,
|
|
1316
|
-
);
|
|
1317
|
-
expect(res.exitCode).toBe(0);
|
|
1318
|
-
|
|
1319
|
-
const dispatch = JSON.parse(
|
|
1320
|
-
readFileSync(
|
|
1321
|
-
join(
|
|
1322
|
-
cwd,
|
|
1323
|
-
"skills-workspace",
|
|
1324
|
-
"mr-review",
|
|
1325
|
-
"iteration-1",
|
|
1326
|
-
"dispatch.json",
|
|
1327
|
-
),
|
|
1328
|
-
"utf8",
|
|
1329
|
-
),
|
|
1330
|
-
) as {
|
|
1331
|
-
tasks: Array<{ condition: string; dispatch_prompt_path: string }>;
|
|
1332
|
-
};
|
|
1333
|
-
const withSkill = dispatch.tasks.find((t) => t.condition === "with_skill");
|
|
1334
|
-
const prompt = withSkill
|
|
1335
|
-
? readFileSync(withSkill.dispatch_prompt_path, "utf8")
|
|
1336
|
-
: "";
|
|
1337
|
-
const bootIdx = prompt.indexOf("MY CUSTOM EVAL FRAMING");
|
|
1338
|
-
const listIdx = prompt.indexOf(
|
|
1339
|
-
"The following skills are available for use with the Skill tool:",
|
|
1340
|
-
);
|
|
1341
|
-
expect(bootIdx).toBeGreaterThan(-1);
|
|
1342
|
-
expect(listIdx).toBeGreaterThan(bootIdx);
|
|
1343
|
-
});
|
|
1344
|
-
|
|
1345
|
-
test("--only restricts dispatches to the named eval ids", () => {
|
|
1346
|
-
const { skillDir, cwd } = setup("usermode-only", [
|
|
1347
|
-
{ id: "e1", prompt: "review MR 1", expected_output: "a review" },
|
|
1348
|
-
{ id: "e2", prompt: "review MR 2", expected_output: "a review" },
|
|
1349
|
-
]);
|
|
1350
|
-
const res = runCli(
|
|
1351
|
-
[
|
|
1352
|
-
"--skill-dir",
|
|
1353
|
-
skillDir,
|
|
1354
|
-
"--skill",
|
|
1355
|
-
"mr-review",
|
|
1356
|
-
"--mode",
|
|
1357
|
-
"new-skill",
|
|
1358
|
-
"--only",
|
|
1359
|
-
"e1",
|
|
1360
|
-
"--dry-run",
|
|
1361
|
-
],
|
|
1362
|
-
cwd,
|
|
1363
|
-
);
|
|
1364
|
-
expect(res.exitCode).toBe(0);
|
|
1365
|
-
|
|
1366
|
-
const dispatch = JSON.parse(
|
|
1367
|
-
readFileSync(
|
|
1368
|
-
join(
|
|
1369
|
-
cwd,
|
|
1370
|
-
"skills-workspace",
|
|
1371
|
-
"mr-review",
|
|
1372
|
-
"iteration-1",
|
|
1373
|
-
"dispatch.json",
|
|
1374
|
-
),
|
|
1375
|
-
"utf8",
|
|
1376
|
-
),
|
|
1377
|
-
) as { tasks: Array<{ eval_id: string }> };
|
|
1378
|
-
|
|
1379
|
-
expect(dispatch.tasks.map((t) => t.eval_id).sort()).toEqual(["e1", "e1"]);
|
|
1380
|
-
// The "N evals × 2 conditions" line reflects the filtered set.
|
|
1381
|
-
expect(new TextDecoder().decode(res.stdout)).toContain(
|
|
1382
|
-
"1 evals × 2 conditions",
|
|
1383
|
-
);
|
|
1384
|
-
});
|
|
1385
|
-
|
|
1386
|
-
test("--only with an unknown id exits non-zero and names the unknown id", () => {
|
|
1387
|
-
const { skillDir, cwd } = setup("usermode-only-unknown", [
|
|
1388
|
-
{ id: "e1", prompt: "review MR 1", expected_output: "a review" },
|
|
1389
|
-
]);
|
|
1390
|
-
const res = runCli(
|
|
1391
|
-
[
|
|
1392
|
-
"--skill-dir",
|
|
1393
|
-
skillDir,
|
|
1394
|
-
"--skill",
|
|
1395
|
-
"mr-review",
|
|
1396
|
-
"--mode",
|
|
1397
|
-
"new-skill",
|
|
1398
|
-
"--only",
|
|
1399
|
-
"nope",
|
|
1400
|
-
"--dry-run",
|
|
1401
|
-
],
|
|
1402
|
-
cwd,
|
|
1403
|
-
);
|
|
1404
|
-
expect(res.exitCode).not.toBe(0);
|
|
1405
|
-
expect(new TextDecoder().decode(res.stderr)).toContain(
|
|
1406
|
-
"unknown eval id(s): nope",
|
|
1407
|
-
);
|
|
1408
|
-
});
|
|
1409
|
-
});
|
|
1410
|
-
|
|
1411
|
-
describe("snapshot --ref (read baseline from a git ref, issue #122)", () => {
|
|
1412
|
-
const RUN_TS = join(import.meta.dir, "run.ts");
|
|
1413
|
-
|
|
1414
|
-
function git(args: string[], cwd: string) {
|
|
1415
|
-
const res = Bun.spawnSync(
|
|
1416
|
-
[
|
|
1417
|
-
"git",
|
|
1418
|
-
"-c",
|
|
1419
|
-
"user.email=eval@test",
|
|
1420
|
-
"-c",
|
|
1421
|
-
"user.name=eval",
|
|
1422
|
-
"-c",
|
|
1423
|
-
"commit.gpgsign=false",
|
|
1424
|
-
...args,
|
|
1425
|
-
],
|
|
1426
|
-
{ cwd, stdout: "pipe", stderr: "pipe" },
|
|
1427
|
-
);
|
|
1428
|
-
if (res.exitCode !== 0)
|
|
1429
|
-
throw new Error(`git ${args.join(" ")} failed: ${res.stderr.toString()}`);
|
|
1430
|
-
return res;
|
|
1431
|
-
}
|
|
1432
|
-
|
|
1433
|
-
function runCli(args: string[], cwd: string) {
|
|
1434
|
-
return Bun.spawnSync(["bun", "run", RUN_TS, ...args], {
|
|
1435
|
-
cwd,
|
|
1436
|
-
stdout: "pipe",
|
|
1437
|
-
stderr: "pipe",
|
|
1438
|
-
});
|
|
1439
|
-
}
|
|
1440
|
-
|
|
1441
|
-
/**
|
|
1442
|
-
* Builds a git repo at <root> containing a `mr-review` skill committed as v1,
|
|
1443
|
-
* then overwrites the working-tree SKILL.md with v2 (uncommitted). Returns the
|
|
1444
|
-
* paths a snapshot needs, so a test can assert `--ref HEAD` reads v1 while the
|
|
1445
|
-
* working tree keeps v2.
|
|
1446
|
-
*/
|
|
1447
|
-
function setupRepo(
|
|
1448
|
-
name: string,
|
|
1449
|
-
opts: { extraCommitted?: Record<string, string> } = {},
|
|
1450
|
-
): { root: string; skillDir: string; skillSub: string; cwd: string } {
|
|
1451
|
-
const root = join(FIXTURE_ROOT, name);
|
|
1452
|
-
const skillDir = join(root, "skill-dir");
|
|
1453
|
-
const skillSub = join(skillDir, "mr-review");
|
|
1454
|
-
mkdirSync(skillSub, { recursive: true });
|
|
1455
|
-
writeFileSync(join(skillSub, "SKILL.md"), "v1 baseline\n");
|
|
1456
|
-
for (const [rel, content] of Object.entries(opts.extraCommitted ?? {})) {
|
|
1457
|
-
const p = join(skillSub, rel);
|
|
1458
|
-
mkdirSync(join(p, ".."), { recursive: true });
|
|
1459
|
-
writeFileSync(p, content);
|
|
1460
|
-
}
|
|
1461
|
-
|
|
1462
|
-
git(["init", "-q"], root);
|
|
1463
|
-
git(["add", "-A"], root);
|
|
1464
|
-
git(["commit", "-q", "-m", "v1"], root);
|
|
1465
|
-
|
|
1466
|
-
// Working tree diverges to v2; the commit still holds v1.
|
|
1467
|
-
writeFileSync(join(skillSub, "SKILL.md"), "v2 working tree\n");
|
|
1468
|
-
|
|
1469
|
-
const cwd = join(root, "work");
|
|
1470
|
-
mkdirSync(cwd, { recursive: true });
|
|
1471
|
-
return { root, skillDir, skillSub, cwd };
|
|
1472
|
-
}
|
|
1473
|
-
|
|
1474
|
-
function snapshotPath(cwd: string, label: string, rel: string): string {
|
|
1475
|
-
return join(cwd, "skills-workspace", "mr-review", "snapshots", label, rel);
|
|
1476
|
-
}
|
|
1477
|
-
|
|
1478
|
-
test("snapshots the SKILL.md committed at the ref, leaving the working tree untouched", () => {
|
|
1479
|
-
const { skillDir, skillSub, cwd } = setupRepo("ref-old-content");
|
|
1480
|
-
const res = runCli(
|
|
1481
|
-
[
|
|
1482
|
-
"snapshot",
|
|
1483
|
-
"--skill-dir",
|
|
1484
|
-
skillDir,
|
|
1485
|
-
"--skill",
|
|
1486
|
-
"mr-review",
|
|
1487
|
-
"--label",
|
|
1488
|
-
"old",
|
|
1489
|
-
"--ref",
|
|
1490
|
-
"HEAD",
|
|
1491
|
-
],
|
|
1492
|
-
cwd,
|
|
1493
|
-
);
|
|
1494
|
-
expect(res.exitCode).toBe(0);
|
|
1495
|
-
|
|
1496
|
-
// Snapshot holds the committed v1...
|
|
1497
|
-
expect(readFileSync(snapshotPath(cwd, "old", "SKILL.md"), "utf8")).toBe(
|
|
1498
|
-
"v1 baseline\n",
|
|
1499
|
-
);
|
|
1500
|
-
// ...and the working tree still holds the edited v2 (no clobber).
|
|
1501
|
-
expect(readFileSync(join(skillSub, "SKILL.md"), "utf8")).toBe(
|
|
1502
|
-
"v2 working tree\n",
|
|
1503
|
-
);
|
|
1504
|
-
});
|
|
1505
|
-
|
|
1506
|
-
test("captures sibling assets at the ref but excludes evals/", () => {
|
|
1507
|
-
const { skillDir, cwd } = setupRepo("ref-assets", {
|
|
1508
|
-
extraCommitted: {
|
|
1509
|
-
"assets/notes.md": "asset body\n",
|
|
1510
|
-
"evals/evals.json": '{"skill_name":"mr-review","evals":[]}',
|
|
1511
|
-
},
|
|
1512
|
-
});
|
|
1513
|
-
const res = runCli(
|
|
1514
|
-
[
|
|
1515
|
-
"snapshot",
|
|
1516
|
-
"--skill-dir",
|
|
1517
|
-
skillDir,
|
|
1518
|
-
"--skill",
|
|
1519
|
-
"mr-review",
|
|
1520
|
-
"--label",
|
|
1521
|
-
"old",
|
|
1522
|
-
"--ref",
|
|
1523
|
-
"HEAD",
|
|
1524
|
-
],
|
|
1525
|
-
cwd,
|
|
1526
|
-
);
|
|
1527
|
-
expect(res.exitCode).toBe(0);
|
|
1528
|
-
|
|
1529
|
-
expect(existsSync(snapshotPath(cwd, "old", "assets/notes.md"))).toBe(true);
|
|
1530
|
-
expect(
|
|
1531
|
-
readFileSync(snapshotPath(cwd, "old", "assets/notes.md"), "utf8"),
|
|
1532
|
-
).toBe("asset body\n");
|
|
1533
|
-
expect(existsSync(snapshotPath(cwd, "old", "evals"))).toBe(false);
|
|
1534
|
-
});
|
|
1535
|
-
|
|
1536
|
-
test("records ref provenance so teardown can reclaim the snapshot", () => {
|
|
1537
|
-
const { skillDir, cwd } = setupRepo("ref-meta");
|
|
1538
|
-
const res = runCli(
|
|
1539
|
-
[
|
|
1540
|
-
"snapshot",
|
|
1541
|
-
"--skill-dir",
|
|
1542
|
-
skillDir,
|
|
1543
|
-
"--skill",
|
|
1544
|
-
"mr-review",
|
|
1545
|
-
"--label",
|
|
1546
|
-
"old",
|
|
1547
|
-
"--ref",
|
|
1548
|
-
"HEAD",
|
|
1549
|
-
],
|
|
1550
|
-
cwd,
|
|
1551
|
-
);
|
|
1552
|
-
expect(res.exitCode).toBe(0);
|
|
1553
|
-
|
|
1554
|
-
const meta = JSON.parse(
|
|
1555
|
-
readFileSync(snapshotPath(cwd, "old", SNAPSHOT_META), "utf8"),
|
|
1556
|
-
) as { source: string; ref: string };
|
|
1557
|
-
expect(meta.source).toBe("ref");
|
|
1558
|
-
expect(meta.ref).toBe("HEAD");
|
|
1559
|
-
});
|
|
1560
|
-
|
|
1561
|
-
test("a ref that does not exist fails with a clear message", () => {
|
|
1562
|
-
const { skillDir, cwd } = setupRepo("ref-bad");
|
|
1563
|
-
const res = runCli(
|
|
1564
|
-
[
|
|
1565
|
-
"snapshot",
|
|
1566
|
-
"--skill-dir",
|
|
1567
|
-
skillDir,
|
|
1568
|
-
"--skill",
|
|
1569
|
-
"mr-review",
|
|
1570
|
-
"--label",
|
|
1571
|
-
"old",
|
|
1572
|
-
"--ref",
|
|
1573
|
-
"does-not-exist",
|
|
1574
|
-
],
|
|
1575
|
-
cwd,
|
|
1576
|
-
);
|
|
1577
|
-
expect(res.exitCode).not.toBe(0);
|
|
1578
|
-
expect(new TextDecoder().decode(res.stderr)).toContain("does-not-exist");
|
|
1579
|
-
});
|
|
1580
|
-
|
|
1581
|
-
test("without --ref, snapshot still reads the working tree (v2)", () => {
|
|
1582
|
-
const { skillDir, cwd } = setupRepo("ref-default-path");
|
|
1583
|
-
const res = runCli(
|
|
1584
|
-
[
|
|
1585
|
-
"snapshot",
|
|
1586
|
-
"--skill-dir",
|
|
1587
|
-
skillDir,
|
|
1588
|
-
"--skill",
|
|
1589
|
-
"mr-review",
|
|
1590
|
-
"--label",
|
|
1591
|
-
"wt",
|
|
1592
|
-
],
|
|
1593
|
-
cwd,
|
|
1594
|
-
);
|
|
1595
|
-
expect(res.exitCode).toBe(0);
|
|
1596
|
-
expect(readFileSync(snapshotPath(cwd, "wt", "SKILL.md"), "utf8")).toBe(
|
|
1597
|
-
"v2 working tree\n",
|
|
1598
|
-
);
|
|
1599
|
-
});
|
|
1600
|
-
|
|
1601
|
-
test("records working-tree provenance so teardown preserves the snapshot", () => {
|
|
1602
|
-
const { skillDir, cwd } = setupRepo("wt-meta");
|
|
1603
|
-
const res = runCli(
|
|
1604
|
-
[
|
|
1605
|
-
"snapshot",
|
|
1606
|
-
"--skill-dir",
|
|
1607
|
-
skillDir,
|
|
1608
|
-
"--skill",
|
|
1609
|
-
"mr-review",
|
|
1610
|
-
"--label",
|
|
1611
|
-
"wt",
|
|
1612
|
-
],
|
|
1613
|
-
cwd,
|
|
1614
|
-
);
|
|
1615
|
-
expect(res.exitCode).toBe(0);
|
|
1616
|
-
|
|
1617
|
-
const meta = JSON.parse(
|
|
1618
|
-
readFileSync(snapshotPath(cwd, "wt", SNAPSHOT_META), "utf8"),
|
|
1619
|
-
) as { source: string };
|
|
1620
|
-
expect(meta.source).toBe("working-tree");
|
|
1621
|
-
});
|
|
1622
|
-
});
|
|
1623
|
-
|
|
1624
|
-
describe("ingest / finalize step plans", () => {
|
|
1625
|
-
const opts = {
|
|
1626
|
-
runnerDir: "/runner",
|
|
1627
|
-
skillDir: "/skills",
|
|
1628
|
-
skill: "mr-review",
|
|
1629
|
-
iteration: 2,
|
|
1630
|
-
subagentsDir: "/subagents",
|
|
1631
|
-
};
|
|
1632
|
-
|
|
1633
|
-
test("buildIngestCommands runs record → fill → stray-writes → grade, in order", () => {
|
|
1634
|
-
const steps = buildIngestCommands(opts);
|
|
1635
|
-
expect(steps.map((s) => s.label)).toEqual([
|
|
1636
|
-
"record-runs",
|
|
1637
|
-
"fill-transcripts",
|
|
1638
|
-
"detect-stray-writes",
|
|
1639
|
-
"grade",
|
|
1640
|
-
]);
|
|
1641
|
-
// Every step is a bun invocation of the sibling script with the shared flags.
|
|
1642
|
-
for (const step of steps) {
|
|
1643
|
-
expect(step.argv.slice(0, 2)).toEqual(["bun", "run"]);
|
|
1644
|
-
expect(step.argv[2]).toBe(`/runner/${step.label}.ts`);
|
|
1645
|
-
expect(step.argv).toContain("--skill-dir");
|
|
1646
|
-
expect(step.argv).toContain("/skills");
|
|
1647
|
-
expect(step.argv).toContain("--skill");
|
|
1648
|
-
expect(step.argv).toContain("mr-review");
|
|
1649
|
-
expect(step.argv).toContain("--iteration");
|
|
1650
|
-
expect(step.argv).toContain("2");
|
|
1651
|
-
}
|
|
1652
|
-
// The transcript-reading steps get --subagents-dir; the others must not.
|
|
1653
|
-
const byLabel = Object.fromEntries(steps.map((s) => [s.label, s.argv]));
|
|
1654
|
-
expect(byLabel["record-runs"]).toContain("--subagents-dir");
|
|
1655
|
-
expect(byLabel["fill-transcripts"]).toContain("--subagents-dir");
|
|
1656
|
-
expect(byLabel["detect-stray-writes"]).not.toContain("--subagents-dir");
|
|
1657
|
-
expect(byLabel.grade).not.toContain("--subagents-dir");
|
|
1658
|
-
});
|
|
1659
|
-
|
|
1660
|
-
test("buildFinalizeCommands runs grade --finalize then aggregate", () => {
|
|
1661
|
-
const steps = buildFinalizeCommands({
|
|
1662
|
-
runnerDir: "/runner",
|
|
1663
|
-
skillDir: "/skills",
|
|
1664
|
-
skill: "mr-review",
|
|
1665
|
-
iteration: 2,
|
|
1666
|
-
});
|
|
1667
|
-
expect(steps.map((s) => s.label)).toEqual([
|
|
1668
|
-
"grade --finalize",
|
|
1669
|
-
"aggregate",
|
|
1670
|
-
]);
|
|
1671
|
-
expect(steps[0].argv[2]).toBe("/runner/grade.ts");
|
|
1672
|
-
expect(steps[0].argv).toContain("--finalize");
|
|
1673
|
-
expect(steps[1].argv[2]).toBe("/runner/aggregate.ts");
|
|
1674
|
-
});
|
|
1675
|
-
|
|
1676
|
-
test("runSteps stops at the first failing step and reports it", () => {
|
|
1677
|
-
const ran: string[] = [];
|
|
1678
|
-
const result = runSteps(
|
|
1679
|
-
[
|
|
1680
|
-
{ label: "a", argv: ["x"] },
|
|
1681
|
-
{ label: "b", argv: ["y"] },
|
|
1682
|
-
{ label: "c", argv: ["z"] },
|
|
1683
|
-
],
|
|
1684
|
-
(step) => {
|
|
1685
|
-
ran.push(step.label);
|
|
1686
|
-
return step.label === "b" ? 1 : 0;
|
|
1687
|
-
},
|
|
1688
|
-
);
|
|
1689
|
-
expect(ran).toEqual(["a", "b"]); // c never runs after b fails
|
|
1690
|
-
expect(result.failedAt).toBe("b");
|
|
1691
|
-
});
|
|
1692
|
-
|
|
1693
|
-
test("runSteps runs everything and reports no failure on success", () => {
|
|
1694
|
-
const result = runSteps(
|
|
1695
|
-
[
|
|
1696
|
-
{ label: "a", argv: ["x"] },
|
|
1697
|
-
{ label: "b", argv: ["y"] },
|
|
1698
|
-
],
|
|
1699
|
-
() => 0,
|
|
1700
|
-
);
|
|
1701
|
-
expect(result.failedAt).toBeNull();
|
|
1702
|
-
});
|
|
1703
|
-
});
|