@slowdini/slow-powers-opencode 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/README.md +37 -65
  2. package/bootstrap.md +1 -7
  3. package/opencode/plugins/slow-powers.js +1 -1
  4. package/package.json +14 -13
  5. package/skills/evaluating-skills/SKILL.md +91 -337
  6. package/skills/evaluating-skills/evals/baseline/BASELINE.md +23 -0
  7. package/skills/evaluating-skills/evals/baseline/NOTES.md +40 -0
  8. package/skills/evaluating-skills/evals/baseline/benchmark.json +54 -0
  9. package/skills/evaluating-skills/evals/baseline/grading/deterministic-edit-skip__new_skill.json +39 -0
  10. package/skills/evaluating-skills/evals/baseline/grading/deterministic-edit-skip__old_skill.json +39 -0
  11. package/skills/evaluating-skills/evals/baseline/grading/did-my-revision-help__new_skill.json +39 -0
  12. package/skills/evaluating-skills/evals/baseline/grading/did-my-revision-help__old_skill.json +39 -0
  13. package/skills/evaluating-skills/evals/baseline/grading/is-new-skill-ready-to-ship__new_skill.json +32 -0
  14. package/skills/evaluating-skills/evals/baseline/grading/is-new-skill-ready-to-ship__old_skill.json +32 -0
  15. package/skills/test-driven-development/evals/baseline/NOTES.md +2 -2
  16. package/skills/verifying-development-work/SKILL.md +17 -6
  17. package/skills/verifying-development-work/code-review.md +68 -0
  18. package/skills/verifying-development-work/comment-review.md +85 -0
  19. package/skills/verifying-development-work/evals/baseline/BASELINE.md +7 -6
  20. package/skills/verifying-development-work/evals/baseline/NOTES.md +83 -149
  21. package/skills/verifying-development-work/evals/baseline/benchmark.json +32 -31
  22. package/skills/verifying-development-work/evals/baseline/grading/comment-hygiene-at-handoff__new_skill.json +53 -0
  23. package/skills/verifying-development-work/evals/baseline/grading/comment-hygiene-at-handoff__old_skill.json +53 -0
  24. package/skills/verifying-development-work/evals/baseline/grading/wrap-it-up-handoff__new_skill.json +53 -0
  25. package/skills/verifying-development-work/evals/baseline/grading/wrap-it-up-handoff__old_skill.json +53 -0
  26. package/skills/verifying-development-work/evals/evals.json +34 -2
  27. package/skills/verifying-development-work/evals/fixtures/comment-hygiene-at-handoff/slugify.test.ts +14 -0
  28. package/skills/verifying-development-work/evals/fixtures/comment-hygiene-at-handoff/slugify.ts +25 -0
  29. package/skills/evaluating-skills/examples/verifying-development-work-evals.json +0 -30
  30. package/skills/evaluating-skills/harness-details/claude.md +0 -158
  31. package/skills/evaluating-skills/runner/README.md +0 -154
  32. package/skills/evaluating-skills/runner/adapters/claude-code-session.test.ts +0 -56
  33. package/skills/evaluating-skills/runner/adapters/claude-code-session.ts +0 -43
  34. package/skills/evaluating-skills/runner/adapters/claude-code-transcript.test.ts +0 -263
  35. package/skills/evaluating-skills/runner/adapters/claude-code-transcript.ts +0 -146
  36. package/skills/evaluating-skills/runner/aggregate.test.ts +0 -264
  37. package/skills/evaluating-skills/runner/aggregate.ts +0 -248
  38. package/skills/evaluating-skills/runner/context.test.ts +0 -181
  39. package/skills/evaluating-skills/runner/context.ts +0 -90
  40. package/skills/evaluating-skills/runner/detect-stray-writes.test.ts +0 -103
  41. package/skills/evaluating-skills/runner/detect-stray-writes.ts +0 -192
  42. package/skills/evaluating-skills/runner/fill-transcripts.test.ts +0 -73
  43. package/skills/evaluating-skills/runner/fill-transcripts.ts +0 -154
  44. package/skills/evaluating-skills/runner/grade.test.ts +0 -347
  45. package/skills/evaluating-skills/runner/grade.ts +0 -603
  46. package/skills/evaluating-skills/runner/guard/guard.ts +0 -49
  47. package/skills/evaluating-skills/runner/guard/install.test.ts +0 -92
  48. package/skills/evaluating-skills/runner/guard/install.ts +0 -147
  49. package/skills/evaluating-skills/runner/guard/policy.test.ts +0 -71
  50. package/skills/evaluating-skills/runner/guard/policy.ts +0 -74
  51. package/skills/evaluating-skills/runner/plugin-shadow.test.ts +0 -228
  52. package/skills/evaluating-skills/runner/plugin-shadow.ts +0 -201
  53. package/skills/evaluating-skills/runner/profiles/claude-code/plan-mode.md +0 -11
  54. package/skills/evaluating-skills/runner/promote-baseline.test.ts +0 -230
  55. package/skills/evaluating-skills/runner/promote-baseline.ts +0 -186
  56. package/skills/evaluating-skills/runner/run.test.ts +0 -1180
  57. package/skills/evaluating-skills/runner/run.ts +0 -1029
  58. package/skills/evaluating-skills/runner/sandbox-policy.ts +0 -74
  59. package/skills/evaluating-skills/runner/types.ts +0 -112
  60. package/skills/evaluating-skills/runner/validate-all.ts +0 -54
  61. package/skills/evaluating-skills/runner/validate-schema.test.ts +0 -99
  62. package/skills/evaluating-skills/runner/validate-schema.ts +0 -51
  63. package/skills/evaluating-skills/runner/validate.test.ts +0 -56
  64. package/skills/evaluating-skills/runner/validate.ts +0 -21
  65. package/skills/evaluating-skills/schema/evals.schema.json +0 -105
  66. package/skills/evaluating-skills/schema/grading.schema.json +0 -84
  67. package/skills/evaluating-skills/schema/run-record.schema.json +0 -80
  68. package/skills/evaluating-skills/schema/stray-writes.schema.json +0 -68
  69. package/skills/evaluating-skills/templates/eval-task-prompt.md +0 -67
  70. package/skills/evaluating-skills/templates/evals.json.example +0 -17
  71. package/skills/evaluating-skills/templates/judge-prompt.md +0 -56
  72. package/skills/evaluating-skills/templates/revise-skill-prompt.md +0 -56
  73. package/skills/verifying-development-work/evals/baseline/grading/bug-fixed-without-reproducing__with_skill.json +0 -39
  74. package/skills/verifying-development-work/evals/baseline/grading/bug-fixed-without-reproducing__without_skill.json +0 -24
  75. package/skills/verifying-development-work/evals/baseline/grading/build-implied-by-edit__with_skill.json +0 -46
  76. package/skills/verifying-development-work/evals/baseline/grading/build-implied-by-edit__without_skill.json +0 -31
  77. package/skills/verifying-development-work/evals/baseline/grading/claim-without-running__with_skill.json +0 -46
  78. package/skills/verifying-development-work/evals/baseline/grading/claim-without-running__without_skill.json +0 -31
  79. package/skills/verifying-development-work/evals/baseline/grading/seeded-done-tests-pass-ship-it__with_skill.json +0 -46
  80. package/skills/verifying-development-work/evals/baseline/grading/seeded-done-tests-pass-ship-it__without_skill.json +0 -31
  81. package/skills/verifying-development-work/evals/baseline/grading/wrap-it-up-handoff__with_skill.json +0 -53
  82. package/skills/verifying-development-work/evals/baseline/grading/wrap-it-up-handoff__without_skill.json +0 -38
@@ -1,248 +0,0 @@
1
- #!/usr/bin/env bun
2
- import { existsSync, readdirSync, readFileSync, writeFileSync } from "node:fs";
3
- import { join } from "node:path";
4
- import { detectRunContext } from "./context";
5
- import {
6
- type PluginShadowReport,
7
- shadowValidityWarnings,
8
- } from "./plugin-shadow";
9
- import type { ConditionsRecord, GradingResult, TimingRecord } from "./types";
10
-
11
- function die(msg: string): never {
12
- console.error(`error: ${msg}`);
13
- process.exit(1);
14
- }
15
-
16
- function parseArgs(argv: string[]) {
17
- const flag = (name: string): string | undefined => {
18
- const i = argv.indexOf(`--${name}`);
19
- if (i === -1) return undefined;
20
- return argv[i + 1];
21
- };
22
- const iteration = flag("iteration");
23
- if (!iteration) die("missing --iteration");
24
- return { iteration };
25
- }
26
-
27
- type Series = number[];
28
-
29
- function mean(values: Series): number {
30
- if (values.length === 0) return 0;
31
- return values.reduce((a, b) => a + b, 0) / values.length;
32
- }
33
-
34
- function stddev(values: Series, m = mean(values)): number {
35
- if (values.length < 2) return 0;
36
- const v = values.reduce((s, x) => s + (x - m) ** 2, 0) / values.length;
37
- return Math.sqrt(v);
38
- }
39
-
40
- function round(n: number, dp: number): number {
41
- const p = 10 ** dp;
42
- return Math.round(n * p) / p;
43
- }
44
-
45
- function stats(values: Series, dp: number) {
46
- const m = mean(values);
47
- return {
48
- mean: round(m, dp),
49
- stddev: round(stddev(values, m), dp),
50
- n: values.length,
51
- };
52
- }
53
-
54
- const aggArgv = Bun.argv.slice(2);
55
- const { iteration } = parseArgs(aggArgv);
56
- const aggCtx = detectRunContext(aggArgv);
57
- const iterationDir = join(
58
- aggCtx.workspaceRoot,
59
- aggCtx.skillName,
60
- `iteration-${iteration}`,
61
- );
62
- if (!existsSync(iterationDir)) die(`not found: ${iterationDir}`);
63
-
64
- const conditionsPath = join(iterationDir, "conditions.json");
65
- if (!existsSync(conditionsPath)) die(`missing: ${conditionsPath}`);
66
- const conditions: ConditionsRecord = JSON.parse(
67
- readFileSync(conditionsPath, "utf8"),
68
- );
69
- const conditionNames = conditions.conditions.map((c) => c.name);
70
- if (conditionNames.length !== 2)
71
- die(`expected exactly 2 conditions, got ${conditionNames.length}`);
72
-
73
- const evalDirs = readdirSync(iterationDir).filter((d) => d.startsWith("eval-"));
74
- if (evalDirs.length === 0) die("no eval directories found");
75
-
76
- type Bucket = {
77
- passRates: Series;
78
- durations: Series;
79
- tokens: Series;
80
- skillInvoked: boolean[];
81
- hadSkillLoaded: boolean;
82
- };
83
- const byCondition: Record<string, Bucket> = {};
84
- const conditionSkillPaths = new Map<string, string | null>();
85
- for (const c of conditions.conditions) {
86
- conditionSkillPaths.set(c.name, c.skill_path);
87
- byCondition[c.name] = {
88
- passRates: [],
89
- durations: [],
90
- tokens: [],
91
- skillInvoked: [],
92
- hadSkillLoaded: !!c.skill_path,
93
- };
94
- }
95
-
96
- let missingGradings = 0;
97
- for (const evalDir of evalDirs) {
98
- for (const cond of conditionNames) {
99
- const condDir = join(iterationDir, evalDir, cond);
100
- const gradingPath = join(condDir, "grading.json");
101
- const timingPath = join(condDir, "timing.json");
102
- if (!existsSync(gradingPath)) {
103
- console.warn(`warn: missing grading for ${evalDir}/${cond}`);
104
- missingGradings++;
105
- continue;
106
- }
107
- const grading: GradingResult = JSON.parse(
108
- readFileSync(gradingPath, "utf8"),
109
- );
110
- byCondition[cond].passRates.push(grading.summary.pass_rate);
111
- if (grading.meta_summary?.skill_invoked != null)
112
- byCondition[cond].skillInvoked.push(grading.meta_summary.skill_invoked);
113
- if (existsSync(timingPath)) {
114
- const timing: TimingRecord = JSON.parse(readFileSync(timingPath, "utf8"));
115
- if (typeof timing.total_tokens === "number")
116
- byCondition[cond].tokens.push(timing.total_tokens);
117
- if (typeof timing.duration_ms === "number")
118
- byCondition[cond].durations.push(timing.duration_ms);
119
- }
120
- }
121
- }
122
-
123
- type ConditionSummary = {
124
- pass_rate: ReturnType<typeof stats>;
125
- duration_ms: ReturnType<typeof stats>;
126
- total_tokens: ReturnType<typeof stats>;
127
- skill_invocation_rate?: number | null;
128
- skill_invocation_n?: number;
129
- };
130
-
131
- const runSummary: Record<string, ConditionSummary> = {};
132
- for (const cond of conditionNames) {
133
- const bucket = byCondition[cond];
134
- const summary: ConditionSummary = {
135
- pass_rate: stats(bucket.passRates, 3),
136
- duration_ms: stats(bucket.durations, 0),
137
- total_tokens: stats(bucket.tokens, 0),
138
- };
139
- if (bucket.hadSkillLoaded) {
140
- summary.skill_invocation_n = bucket.skillInvoked.length;
141
- summary.skill_invocation_rate =
142
- bucket.skillInvoked.length === 0
143
- ? null
144
- : round(
145
- bucket.skillInvoked.filter(Boolean).length /
146
- bucket.skillInvoked.length,
147
- 3,
148
- );
149
- }
150
- runSummary[cond] = summary;
151
- }
152
-
153
- const [a, b] = conditionNames;
154
- const delta = {
155
- direction: `${a} - ${b}`,
156
- pass_rate: round(
157
- runSummary[a].pass_rate.mean - runSummary[b].pass_rate.mean,
158
- 3,
159
- ),
160
- duration_ms: round(
161
- runSummary[a].duration_ms.mean - runSummary[b].duration_ms.mean,
162
- 0,
163
- ),
164
- total_tokens: round(
165
- runSummary[a].total_tokens.mean - runSummary[b].total_tokens.mean,
166
- 0,
167
- ),
168
- };
169
-
170
- const validityWarnings: string[] = [];
171
- for (const cond of conditionNames) {
172
- const s = runSummary[cond];
173
- if (s.skill_invocation_rate != null && s.skill_invocation_rate < 1) {
174
- validityWarnings.push(
175
- `condition '${cond}' had skill loaded but invocation rate ${(s.skill_invocation_rate * 100).toFixed(0)}% (${s.skill_invocation_n} runs checked) — substantive results may not reflect skill effectiveness.`,
176
- );
177
- }
178
- }
179
-
180
- // Stray-write findings (from `evals:detect-stray-writes`, if it ran) taint a
181
- // run the same way a missed skill invocation does: a subagent that edited the
182
- // real repo or installed packages is no longer a clean data point.
183
- const strayPath = join(iterationDir, "stray-writes.json");
184
- if (existsSync(strayPath)) {
185
- try {
186
- const stray = JSON.parse(readFileSync(strayPath, "utf8")) as {
187
- runs?: Array<{
188
- eval_id: string;
189
- condition: string;
190
- violations?: unknown[];
191
- }>;
192
- };
193
- for (const r of stray.runs ?? []) {
194
- const n = r.violations?.length ?? 0;
195
- if (n > 0)
196
- validityWarnings.push(
197
- `${r.eval_id}/${r.condition} wrote ${n} file(s) outside its outputs dir — data point may be tainted (see stray-writes.json).`,
198
- );
199
- }
200
- } catch {
201
- // ignore a malformed report rather than failing aggregation
202
- }
203
- }
204
-
205
- // Plugin-shadow findings (from the runner's build-time preflight, Claude Code)
206
- // taint a run the same way a missed invocation does: a staged skill also served
207
- // by an enabled plugin means subagents could discover both copies, so the
208
- // with/without comparison may not reflect the staged skill alone.
209
- const shadowPath = join(iterationDir, "plugin-shadow.json");
210
- if (existsSync(shadowPath)) {
211
- try {
212
- const report = JSON.parse(
213
- readFileSync(shadowPath, "utf8"),
214
- ) as PluginShadowReport;
215
- for (const w of shadowValidityWarnings(report)) validityWarnings.push(w);
216
- } catch {
217
- // ignore a malformed report rather than failing aggregation
218
- }
219
- }
220
-
221
- const benchmark = {
222
- generated: new Date().toISOString(),
223
- mode: conditions.mode,
224
- baseline: conditions.baseline,
225
- conditions_compared: [a, b],
226
- missing_gradings: missingGradings,
227
- validity_warnings: validityWarnings,
228
- run_summary: runSummary,
229
- delta,
230
- };
231
-
232
- const outPath = join(iterationDir, "benchmark.json");
233
- writeFileSync(outPath, `${JSON.stringify(benchmark, null, 2)}\n`);
234
- console.log(`Wrote ${outPath}`);
235
- if (missingGradings > 0)
236
- console.warn(
237
- `note: ${missingGradings} grading.json file(s) were missing — benchmark is incomplete.`,
238
- );
239
- for (const warning of validityWarnings) console.warn(`⚠ ${warning}`);
240
- if (validityWarnings.length === 0) {
241
- for (const cond of conditionNames) {
242
- const s = runSummary[cond];
243
- if (s.skill_invocation_rate === 1)
244
- console.log(
245
- `✓ ${cond}: skill invocation rate 100% (${s.skill_invocation_n} runs) — substantive results are valid.`,
246
- );
247
- }
248
- }
@@ -1,181 +0,0 @@
1
- import { afterAll, beforeAll, describe, expect, test } from "bun:test";
2
- import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
3
- import { tmpdir } from "node:os";
4
- import { join, resolve } from "node:path";
5
- import { detectRunContext } from "./context";
6
-
7
- const FIXTURE_ROOT = join(tmpdir(), `slow-powers-context-test-${process.pid}`);
8
-
9
- function fixturePath(name: string): string {
10
- return join(FIXTURE_ROOT, name);
11
- }
12
-
13
- function makeSkillDir(root: string, skills: string[]): string {
14
- const dir = join(root, "skill-dir");
15
- mkdirSync(dir, { recursive: true });
16
- for (const name of skills) {
17
- const sub = join(dir, name);
18
- mkdirSync(sub, { recursive: true });
19
- writeFileSync(
20
- join(sub, "SKILL.md"),
21
- `---\nname: ${name}\ndescription: ${name} skill\n---\n\nbody\n`,
22
- );
23
- }
24
- return dir;
25
- }
26
-
27
- beforeAll(() => {
28
- mkdirSync(FIXTURE_ROOT, { recursive: true });
29
- });
30
-
31
- afterAll(() => {
32
- rmSync(FIXTURE_ROOT, { recursive: true, force: true });
33
- });
34
-
35
- describe("detectRunContext", () => {
36
- test("dies when --skill-dir is missing", () => {
37
- expect(() => detectRunContext(["--skill", "foo"])).toThrow(/--skill-dir/);
38
- });
39
-
40
- test("dies when --skill is missing", () => {
41
- const root = fixturePath("missing-skill");
42
- const skillDir = makeSkillDir(root, ["foo"]);
43
- expect(() => detectRunContext(["--skill-dir", skillDir])).toThrow(
44
- /--skill/,
45
- );
46
- });
47
-
48
- test("dies when --skill-dir is not a directory", () => {
49
- expect(() =>
50
- detectRunContext([
51
- "--skill-dir",
52
- "/nonexistent/does-not-exist-12345",
53
- "--skill",
54
- "foo",
55
- ]),
56
- ).toThrow(/--skill-dir/);
57
- });
58
-
59
- test("dies when skill subdir does not exist", () => {
60
- const root = fixturePath("missing-subdir");
61
- const skillDir = makeSkillDir(root, ["foo"]);
62
- expect(() =>
63
- detectRunContext(["--skill-dir", skillDir, "--skill", "bar"]),
64
- ).toThrow(/skill not found/);
65
- });
66
-
67
- test("dies when --bootstrap path is passed but file does not exist", () => {
68
- const root = fixturePath("bad-bootstrap");
69
- const skillDir = makeSkillDir(root, ["foo"]);
70
- expect(() =>
71
- detectRunContext([
72
- "--skill-dir",
73
- skillDir,
74
- "--skill",
75
- "foo",
76
- "--bootstrap",
77
- "/nonexistent/no-bootstrap-12345.md",
78
- ]),
79
- ).toThrow(/--bootstrap/);
80
- });
81
-
82
- test("returns RunContext with absolute paths when --skill-dir and --skill are valid", () => {
83
- const root = fixturePath("happy-path");
84
- const skillDir = makeSkillDir(root, ["mr-review"]);
85
- const ctx = detectRunContext([
86
- "--skill-dir",
87
- skillDir,
88
- "--skill",
89
- "mr-review",
90
- ]);
91
- expect(ctx.skillDir).toBe(resolve(skillDir));
92
- expect(ctx.skillName).toBe("mr-review");
93
- expect(ctx.skillSubdir).toBe(resolve(skillDir, "mr-review"));
94
- expect(ctx.siblingSkillNames).toEqual([]);
95
- expect(ctx.bootstrapPath).toBeNull();
96
- expect(ctx.harness).toBe("claude-code");
97
- });
98
-
99
- test("enumerates siblings excluding the skill-under-test", () => {
100
- const root = fixturePath("siblings");
101
- const skillDir = makeSkillDir(root, ["alpha", "beta", "gamma"]);
102
- const ctx = detectRunContext(["--skill-dir", skillDir, "--skill", "beta"]);
103
- expect(ctx.siblingSkillNames.sort()).toEqual(["alpha", "gamma"]);
104
- });
105
-
106
- test("ignores entries in --skill-dir that do not have a SKILL.md", () => {
107
- const root = fixturePath("not-skills");
108
- const skillDir = makeSkillDir(root, ["real"]);
109
- mkdirSync(join(skillDir, "node_modules"), { recursive: true });
110
- mkdirSync(join(skillDir, "no-skill-md-here"), { recursive: true });
111
- writeFileSync(join(skillDir, "loose-file.txt"), "hello");
112
- const ctx = detectRunContext(["--skill-dir", skillDir, "--skill", "real"]);
113
- expect(ctx.siblingSkillNames).toEqual([]);
114
- });
115
-
116
- test("workspaceRoot defaults to <CWD>/skills-workspace when --workspace-dir is omitted", () => {
117
- const root = fixturePath("workspace-default");
118
- const skillDir = makeSkillDir(root, ["foo"]);
119
- const ctx = detectRunContext(["--skill-dir", skillDir, "--skill", "foo"]);
120
- expect(ctx.workspaceRoot).toBe(resolve(process.cwd(), "skills-workspace"));
121
- });
122
-
123
- test("workspaceRoot honors --workspace-dir override (resolved absolute)", () => {
124
- const root = fixturePath("workspace-override");
125
- const skillDir = makeSkillDir(root, ["foo"]);
126
- const customWs = join(root, "custom-ws");
127
- mkdirSync(customWs, { recursive: true });
128
- const ctx = detectRunContext([
129
- "--skill-dir",
130
- skillDir,
131
- "--skill",
132
- "foo",
133
- "--workspace-dir",
134
- customWs,
135
- ]);
136
- expect(ctx.workspaceRoot).toBe(resolve(customWs));
137
- });
138
-
139
- test("stageRoot defaults to CWD", () => {
140
- const root = fixturePath("stage-default");
141
- const skillDir = makeSkillDir(root, ["foo"]);
142
- const ctx = detectRunContext(["--skill-dir", skillDir, "--skill", "foo"]);
143
- expect(ctx.stageRoot).toBe(resolve(process.cwd()));
144
- });
145
-
146
- test("--bootstrap path is resolved absolute when file exists", () => {
147
- const root = fixturePath("bootstrap-ok");
148
- const skillDir = makeSkillDir(root, ["foo"]);
149
- const bootstrapPath = join(root, "my-bootstrap.md");
150
- writeFileSync(bootstrapPath, "BOOT");
151
- const ctx = detectRunContext([
152
- "--skill-dir",
153
- skillDir,
154
- "--skill",
155
- "foo",
156
- "--bootstrap",
157
- bootstrapPath,
158
- ]);
159
- expect(ctx.bootstrapPath).toBe(resolve(bootstrapPath));
160
- });
161
-
162
- test("unknown --harness value is rejected", () => {
163
- const root = fixturePath("harness-bad");
164
- const skillDir = makeSkillDir(root, ["foo"]);
165
- expect(() =>
166
- detectRunContext([
167
- "--skill-dir",
168
- skillDir,
169
- "--skill",
170
- "foo",
171
- "--harness",
172
- "vscode",
173
- ]),
174
- ).toThrow(/harness/);
175
- });
176
- });
177
-
178
- // Sanity: ensure existsSync helper from node:fs is what we expect
179
- test.skip("smoke: existsSync points at node:fs", () => {
180
- expect(typeof existsSync).toBe("function");
181
- });
@@ -1,90 +0,0 @@
1
- import { existsSync, readdirSync, statSync } from "node:fs";
2
- import { resolve } from "node:path";
3
-
4
- export type Harness = "claude-code";
5
-
6
- export type RunContext = {
7
- skillDir: string;
8
- skillName: string;
9
- skillSubdir: string;
10
- siblingSkillNames: string[];
11
- workspaceRoot: string;
12
- stageRoot: string;
13
- bootstrapPath: string | null;
14
- harness: Harness;
15
- };
16
-
17
- function die(msg: string): never {
18
- throw new Error(msg);
19
- }
20
-
21
- function flag(argv: string[], name: string): string | undefined {
22
- const i = argv.indexOf(`--${name}`);
23
- if (i === -1) return undefined;
24
- const v = argv[i + 1];
25
- if (v === undefined || v.startsWith("--")) {
26
- die(`flag --${name} requires a value`);
27
- }
28
- return v;
29
- }
30
-
31
- export function detectRunContext(argv: string[]): RunContext {
32
- const skillDirRaw = flag(argv, "skill-dir");
33
- if (!skillDirRaw) die("missing required flag --skill-dir <path>");
34
- const skillDir = resolve(skillDirRaw);
35
- if (!existsSync(skillDir) || !statSync(skillDir).isDirectory()) {
36
- die(`--skill-dir is not a directory: ${skillDir}`);
37
- }
38
-
39
- const skillName = flag(argv, "skill");
40
- if (!skillName) die("missing required flag --skill <name>");
41
-
42
- const skillSubdir = resolve(skillDir, skillName);
43
- const skillMd = resolve(skillSubdir, "SKILL.md");
44
- if (!existsSync(skillMd)) {
45
- die(`skill not found: ${skillMd}`);
46
- }
47
-
48
- const bootstrapRaw = flag(argv, "bootstrap");
49
- let bootstrapPath: string | null = null;
50
- if (bootstrapRaw) {
51
- const resolved = resolve(bootstrapRaw);
52
- if (!existsSync(resolved)) {
53
- die(`--bootstrap file not found: ${resolved}`);
54
- }
55
- bootstrapPath = resolved;
56
- }
57
-
58
- const workspaceRaw = flag(argv, "workspace-dir");
59
- const workspaceRoot = workspaceRaw
60
- ? resolve(workspaceRaw)
61
- : resolve(process.cwd(), "skills-workspace");
62
-
63
- const stageRoot = resolve(process.cwd());
64
-
65
- const harnessRaw = flag(argv, "harness") ?? "claude-code";
66
- if (harnessRaw !== "claude-code") {
67
- die(`unknown --harness: ${harnessRaw}. Supported: claude-code`);
68
- }
69
- const harness = harnessRaw as Harness;
70
-
71
- const siblingSkillNames: string[] = [];
72
- for (const entry of readdirSync(skillDir)) {
73
- if (entry === skillName) continue;
74
- const sub = resolve(skillDir, entry);
75
- if (!statSync(sub).isDirectory()) continue;
76
- if (!existsSync(resolve(sub, "SKILL.md"))) continue;
77
- siblingSkillNames.push(entry);
78
- }
79
-
80
- return {
81
- skillDir,
82
- skillName,
83
- skillSubdir,
84
- siblingSkillNames,
85
- workspaceRoot,
86
- stageRoot,
87
- bootstrapPath,
88
- harness,
89
- };
90
- }
@@ -1,103 +0,0 @@
1
- import { describe, expect, test } from "bun:test";
2
- import { join } from "node:path";
3
- import { detectStrayWrites } from "./detect-stray-writes";
4
-
5
- const OUTPUTS = "/work/iteration-1/eval-x/with_skill/outputs";
6
- const REPO = "/work/repo";
7
-
8
- describe("detectStrayWrites", () => {
9
- test("a Write inside the outputs dir is clean", () => {
10
- const findings = detectStrayWrites(
11
- [
12
- {
13
- name: "Write",
14
- args: { file_path: join(OUTPUTS, "answer.md") },
15
- ordinal: 0,
16
- },
17
- ],
18
- OUTPUTS,
19
- REPO,
20
- );
21
- expect(findings.violations).toHaveLength(0);
22
- expect(findings.warnings).toHaveLength(0);
23
- });
24
-
25
- test("a Write outside the outputs dir is a violation", () => {
26
- const findings = detectStrayWrites(
27
- [
28
- {
29
- name: "Write",
30
- args: { file_path: join(REPO, "runner/run.ts") },
31
- ordinal: 2,
32
- },
33
- ],
34
- OUTPUTS,
35
- REPO,
36
- );
37
- expect(findings.violations).toHaveLength(1);
38
- expect(findings.violations[0]).toMatchObject({
39
- tool: "Write",
40
- path: join(REPO, "runner/run.ts"),
41
- ordinal: 2,
42
- });
43
- });
44
-
45
- test("an Edit/MultiEdit/NotebookEdit outside outputs is a violation", () => {
46
- const findings = detectStrayWrites(
47
- [
48
- { name: "Edit", args: { file_path: "/etc/hosts" }, ordinal: 0 },
49
- {
50
- name: "NotebookEdit",
51
- args: { notebook_path: "/tmp/x.ipynb" },
52
- ordinal: 1,
53
- },
54
- ],
55
- OUTPUTS,
56
- REPO,
57
- );
58
- expect(findings.violations.map((v) => v.tool).sort()).toEqual([
59
- "Edit",
60
- "NotebookEdit",
61
- ]);
62
- });
63
-
64
- test("an install command is a warning", () => {
65
- const findings = detectStrayWrites(
66
- [{ name: "Bash", args: { command: "npm install left-pad" }, ordinal: 0 }],
67
- OUTPUTS,
68
- REPO,
69
- );
70
- expect(findings.warnings).toHaveLength(1);
71
- expect(findings.warnings[0].tool).toBe("Bash");
72
- expect(findings.warnings[0].reason).toMatch(/install/i);
73
- });
74
-
75
- test("a mutating Bash command scoped to the outputs dir is not flagged", () => {
76
- const findings = detectStrayWrites(
77
- [
78
- {
79
- name: "Bash",
80
- args: { command: `echo hi > ${join(OUTPUTS, "log.txt")}` },
81
- ordinal: 0,
82
- },
83
- ],
84
- OUTPUTS,
85
- REPO,
86
- );
87
- expect(findings.warnings).toHaveLength(0);
88
- });
89
-
90
- test("read-only tools are never flagged", () => {
91
- const findings = detectStrayWrites(
92
- [
93
- { name: "Read", args: { file_path: "/anywhere" }, ordinal: 0 },
94
- { name: "Grep", args: { pattern: "x" }, ordinal: 1 },
95
- { name: "Bash", args: { command: "ls -la /" }, ordinal: 2 },
96
- ],
97
- OUTPUTS,
98
- REPO,
99
- );
100
- expect(findings.violations).toHaveLength(0);
101
- expect(findings.warnings).toHaveLength(0);
102
- });
103
- });