@slowdini/slow-powers-opencode 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/README.md +34 -72
  2. package/bootstrap.md +1 -7
  3. package/opencode/plugins/slow-powers.js +69 -5
  4. package/package.json +14 -17
  5. package/skills/evaluating-skills/SKILL.md +90 -338
  6. package/skills/evaluating-skills/evals/baseline/BASELINE.md +23 -0
  7. package/skills/evaluating-skills/evals/baseline/NOTES.md +40 -0
  8. package/skills/evaluating-skills/evals/baseline/benchmark.json +54 -0
  9. package/skills/evaluating-skills/evals/baseline/grading/deterministic-edit-skip__new_skill.json +39 -0
  10. package/skills/evaluating-skills/evals/baseline/grading/deterministic-edit-skip__old_skill.json +39 -0
  11. package/skills/evaluating-skills/evals/baseline/grading/did-my-revision-help__new_skill.json +39 -0
  12. package/skills/evaluating-skills/evals/baseline/grading/did-my-revision-help__old_skill.json +39 -0
  13. package/skills/evaluating-skills/evals/baseline/grading/is-new-skill-ready-to-ship__new_skill.json +32 -0
  14. package/skills/evaluating-skills/evals/baseline/grading/is-new-skill-ready-to-ship__old_skill.json +32 -0
  15. package/skills/hardening-plans/SKILL.md +29 -7
  16. package/skills/hardening-plans/evals/baseline/BASELINE.md +11 -6
  17. package/skills/hardening-plans/evals/baseline/NOTES.md +72 -58
  18. package/skills/hardening-plans/evals/baseline/benchmark.json +25 -25
  19. package/skills/hardening-plans/evals/baseline/grading/concrete-todo-app-plan__new_skill.json +2 -2
  20. package/skills/hardening-plans/evals/baseline/grading/concrete-todo-app-plan__old_skill.json +2 -2
  21. package/skills/hardening-plans/evals/baseline/grading/docs-refactor-plan-mode__new_skill.json +39 -0
  22. package/skills/hardening-plans/evals/baseline/grading/docs-refactor-plan-mode__old_skill.json +39 -0
  23. package/skills/hardening-plans/evals/baseline/grading/oauth-task-breakdown-cold__new_skill.json +39 -0
  24. package/skills/hardening-plans/evals/baseline/grading/oauth-task-breakdown-cold__old_skill.json +39 -0
  25. package/skills/hardening-plans/evals/baseline/grading/research-plan-no-required-skill__new_skill.json +32 -0
  26. package/skills/hardening-plans/evals/baseline/grading/research-plan-no-required-skill__old_skill.json +32 -0
  27. package/skills/hardening-plans/evals/baseline/grading/seeded-plan-mode-todo-app-adversarial__new_skill.json +39 -0
  28. package/skills/hardening-plans/evals/baseline/grading/seeded-plan-mode-todo-app-adversarial__old_skill.json +39 -0
  29. package/skills/hardening-plans/evals/baseline/grading/seeded-plan-mode-todo-app__new_skill.json +39 -0
  30. package/skills/hardening-plans/evals/baseline/grading/seeded-plan-mode-todo-app__old_skill.json +39 -0
  31. package/skills/hardening-plans/evals/baseline/grading/seeded-review-catches-defects__new_skill.json +3 -3
  32. package/skills/hardening-plans/evals/baseline/grading/seeded-review-catches-defects__old_skill.json +8 -8
  33. package/skills/hardening-plans/evals/baseline/grading/structural-refactor-cold__new_skill.json +39 -0
  34. package/skills/hardening-plans/evals/baseline/grading/structural-refactor-cold__old_skill.json +39 -0
  35. package/skills/hardening-plans/evals/evals.json +46 -0
  36. package/skills/test-driven-development/evals/baseline/NOTES.md +2 -2
  37. package/skills/evaluating-skills/examples/verifying-development-work-evals.json +0 -30
  38. package/skills/evaluating-skills/harness-details/claude.md +0 -194
  39. package/skills/evaluating-skills/harness-parity.md +0 -155
  40. package/skills/evaluating-skills/runner/README.md +0 -163
  41. package/skills/evaluating-skills/runner/adapters/claude-code-session.test.ts +0 -56
  42. package/skills/evaluating-skills/runner/adapters/claude-code-session.ts +0 -43
  43. package/skills/evaluating-skills/runner/adapters/claude-code-transcript.test.ts +0 -485
  44. package/skills/evaluating-skills/runner/adapters/claude-code-transcript.ts +0 -242
  45. package/skills/evaluating-skills/runner/aggregate.test.ts +0 -484
  46. package/skills/evaluating-skills/runner/aggregate.ts +0 -269
  47. package/skills/evaluating-skills/runner/context.test.ts +0 -181
  48. package/skills/evaluating-skills/runner/context.ts +0 -90
  49. package/skills/evaluating-skills/runner/detect-stray-writes.test.ts +0 -396
  50. package/skills/evaluating-skills/runner/detect-stray-writes.ts +0 -288
  51. package/skills/evaluating-skills/runner/fill-transcripts.test.ts +0 -73
  52. package/skills/evaluating-skills/runner/fill-transcripts.ts +0 -154
  53. package/skills/evaluating-skills/runner/grade.test.ts +0 -347
  54. package/skills/evaluating-skills/runner/grade.ts +0 -603
  55. package/skills/evaluating-skills/runner/guard/guard.ts +0 -49
  56. package/skills/evaluating-skills/runner/guard/install.test.ts +0 -92
  57. package/skills/evaluating-skills/runner/guard/install.ts +0 -147
  58. package/skills/evaluating-skills/runner/guard/policy.test.ts +0 -128
  59. package/skills/evaluating-skills/runner/guard/policy.ts +0 -74
  60. package/skills/evaluating-skills/runner/plugin-shadow.test.ts +0 -228
  61. package/skills/evaluating-skills/runner/plugin-shadow.ts +0 -201
  62. package/skills/evaluating-skills/runner/profiles/claude-code/plan-mode.md +0 -11
  63. package/skills/evaluating-skills/runner/promote-baseline.test.ts +0 -281
  64. package/skills/evaluating-skills/runner/promote-baseline.ts +0 -204
  65. package/skills/evaluating-skills/runner/record-runs.test.ts +0 -314
  66. package/skills/evaluating-skills/runner/record-runs.ts +0 -209
  67. package/skills/evaluating-skills/runner/run.test.ts +0 -1703
  68. package/skills/evaluating-skills/runner/run.ts +0 -1388
  69. package/skills/evaluating-skills/runner/sandbox-policy.ts +0 -94
  70. package/skills/evaluating-skills/runner/types.ts +0 -121
  71. package/skills/evaluating-skills/runner/validate-all.ts +0 -54
  72. package/skills/evaluating-skills/runner/validate-schema.test.ts +0 -99
  73. package/skills/evaluating-skills/runner/validate-schema.ts +0 -51
  74. package/skills/evaluating-skills/runner/validate.test.ts +0 -56
  75. package/skills/evaluating-skills/runner/validate.ts +0 -21
  76. package/skills/evaluating-skills/runner/workspace-teardown.test.ts +0 -227
  77. package/skills/evaluating-skills/runner/workspace-teardown.ts +0 -136
  78. package/skills/evaluating-skills/schema/evals.schema.json +0 -105
  79. package/skills/evaluating-skills/schema/grading.schema.json +0 -84
  80. package/skills/evaluating-skills/schema/run-record.schema.json +0 -80
  81. package/skills/evaluating-skills/schema/stray-writes.schema.json +0 -80
  82. package/skills/evaluating-skills/templates/eval-task-prompt.md +0 -69
  83. package/skills/evaluating-skills/templates/evals.json.example +0 -17
  84. package/skills/evaluating-skills/templates/judge-prompt.md +0 -56
  85. package/skills/evaluating-skills/templates/revise-skill-prompt.md +0 -56
@@ -1,269 +0,0 @@
1
- #!/usr/bin/env bun
2
- import { existsSync, readdirSync, readFileSync, writeFileSync } from "node:fs";
3
- import { join } from "node:path";
4
- import { detectRunContext } from "./context";
5
- import {
6
- type PluginShadowReport,
7
- shadowValidityWarnings,
8
- } from "./plugin-shadow";
9
- import type { ConditionsRecord, GradingResult, TimingRecord } from "./types";
10
-
11
- function die(msg: string): never {
12
- console.error(`error: ${msg}`);
13
- process.exit(1);
14
- }
15
-
16
- function parseArgs(argv: string[]) {
17
- const flag = (name: string): string | undefined => {
18
- const i = argv.indexOf(`--${name}`);
19
- if (i === -1) return undefined;
20
- return argv[i + 1];
21
- };
22
- const iteration = flag("iteration");
23
- if (!iteration) die("missing --iteration");
24
- return { iteration };
25
- }
26
-
27
- type Series = number[];
28
-
29
- function mean(values: Series): number {
30
- if (values.length === 0) return 0;
31
- return values.reduce((a, b) => a + b, 0) / values.length;
32
- }
33
-
34
- function stddev(values: Series, m = mean(values)): number {
35
- if (values.length < 2) return 0;
36
- const v = values.reduce((s, x) => s + (x - m) ** 2, 0) / values.length;
37
- return Math.sqrt(v);
38
- }
39
-
40
- function round(n: number, dp: number): number {
41
- const p = 10 ** dp;
42
- return Math.round(n * p) / p;
43
- }
44
-
45
- function stats(values: Series, dp: number) {
46
- const m = mean(values);
47
- return {
48
- mean: round(m, dp),
49
- stddev: round(stddev(values, m), dp),
50
- n: values.length,
51
- };
52
- }
53
-
54
- const aggArgv = Bun.argv.slice(2);
55
- const { iteration } = parseArgs(aggArgv);
56
- const aggCtx = detectRunContext(aggArgv);
57
- const iterationDir = join(
58
- aggCtx.workspaceRoot,
59
- aggCtx.skillName,
60
- `iteration-${iteration}`,
61
- );
62
- if (!existsSync(iterationDir)) die(`not found: ${iterationDir}`);
63
-
64
- const conditionsPath = join(iterationDir, "conditions.json");
65
- if (!existsSync(conditionsPath)) die(`missing: ${conditionsPath}`);
66
- const conditions: ConditionsRecord = JSON.parse(
67
- readFileSync(conditionsPath, "utf8"),
68
- );
69
- const conditionNames = conditions.conditions.map((c) => c.name);
70
- if (conditionNames.length !== 2)
71
- die(`expected exactly 2 conditions, got ${conditionNames.length}`);
72
-
73
- const evalDirs = readdirSync(iterationDir).filter((d) => d.startsWith("eval-"));
74
- if (evalDirs.length === 0) die("no eval directories found");
75
-
76
- type Bucket = {
77
- passRates: Series;
78
- durations: Series;
79
- tokens: Series;
80
- skillInvoked: boolean[];
81
- hadSkillLoaded: boolean;
82
- };
83
- const byCondition: Record<string, Bucket> = {};
84
- const conditionSkillPaths = new Map<string, string | null>();
85
- for (const c of conditions.conditions) {
86
- conditionSkillPaths.set(c.name, c.skill_path);
87
- byCondition[c.name] = {
88
- passRates: [],
89
- durations: [],
90
- tokens: [],
91
- skillInvoked: [],
92
- hadSkillLoaded: !!c.skill_path,
93
- };
94
- }
95
-
96
- let missingGradings = 0;
97
- // Timing provenance across all runs in the comparison. "completion-event"
98
- // (the agent-captured default, also assumed when `source` is absent) and
99
- // "transcript" (record-runs backfill, includes cache accounting) measure
100
- // different things — a delta mixing them is comparing two metrics.
101
- const timingSources = new Set<string>();
102
- for (const evalDir of evalDirs) {
103
- for (const cond of conditionNames) {
104
- const condDir = join(iterationDir, evalDir, cond);
105
- const gradingPath = join(condDir, "grading.json");
106
- const timingPath = join(condDir, "timing.json");
107
- if (!existsSync(gradingPath)) {
108
- console.warn(`warn: missing grading for ${evalDir}/${cond}`);
109
- missingGradings++;
110
- continue;
111
- }
112
- const grading: GradingResult = JSON.parse(
113
- readFileSync(gradingPath, "utf8"),
114
- );
115
- byCondition[cond].passRates.push(grading.summary.pass_rate);
116
- if (grading.meta_summary?.skill_invoked != null)
117
- byCondition[cond].skillInvoked.push(grading.meta_summary.skill_invoked);
118
- if (existsSync(timingPath)) {
119
- const timing: TimingRecord = JSON.parse(readFileSync(timingPath, "utf8"));
120
- if (typeof timing.total_tokens === "number")
121
- byCondition[cond].tokens.push(timing.total_tokens);
122
- if (typeof timing.duration_ms === "number")
123
- byCondition[cond].durations.push(timing.duration_ms);
124
- if (
125
- typeof timing.total_tokens === "number" ||
126
- typeof timing.duration_ms === "number"
127
- )
128
- timingSources.add(timing.source ?? "completion-event");
129
- }
130
- }
131
- }
132
-
133
- type ConditionSummary = {
134
- pass_rate: ReturnType<typeof stats>;
135
- duration_ms: ReturnType<typeof stats>;
136
- total_tokens: ReturnType<typeof stats>;
137
- skill_invocation_rate?: number | null;
138
- skill_invocation_n?: number;
139
- };
140
-
141
- const runSummary: Record<string, ConditionSummary> = {};
142
- for (const cond of conditionNames) {
143
- const bucket = byCondition[cond];
144
- const summary: ConditionSummary = {
145
- pass_rate: stats(bucket.passRates, 3),
146
- duration_ms: stats(bucket.durations, 0),
147
- total_tokens: stats(bucket.tokens, 0),
148
- };
149
- if (bucket.hadSkillLoaded) {
150
- summary.skill_invocation_n = bucket.skillInvoked.length;
151
- summary.skill_invocation_rate =
152
- bucket.skillInvoked.length === 0
153
- ? null
154
- : round(
155
- bucket.skillInvoked.filter(Boolean).length /
156
- bucket.skillInvoked.length,
157
- 3,
158
- );
159
- }
160
- runSummary[cond] = summary;
161
- }
162
-
163
- const [a, b] = conditionNames;
164
- const delta = {
165
- direction: `${a} - ${b}`,
166
- pass_rate: round(
167
- runSummary[a].pass_rate.mean - runSummary[b].pass_rate.mean,
168
- 3,
169
- ),
170
- duration_ms: round(
171
- runSummary[a].duration_ms.mean - runSummary[b].duration_ms.mean,
172
- 0,
173
- ),
174
- total_tokens: round(
175
- runSummary[a].total_tokens.mean - runSummary[b].total_tokens.mean,
176
- 0,
177
- ),
178
- };
179
-
180
- const validityWarnings: string[] = [];
181
- if (timingSources.size > 1) {
182
- validityWarnings.push(
183
- `runs mix timing sources (${[...timingSources].sort().join(", ")}) — transcript-derived totals include cache accounting, so the token/duration delta compares two different metrics. Re-record one side or read the delta as a rough signal only.`,
184
- );
185
- }
186
- for (const cond of conditionNames) {
187
- const s = runSummary[cond];
188
- if (s.skill_invocation_rate != null && s.skill_invocation_rate < 1) {
189
- validityWarnings.push(
190
- `condition '${cond}' had skill loaded but invocation rate ${(s.skill_invocation_rate * 100).toFixed(0)}% (${s.skill_invocation_n} runs checked) — substantive results may not reflect skill effectiveness.`,
191
- );
192
- }
193
- }
194
-
195
- // Stray-write findings (from `evals:detect-stray-writes`, if it ran) taint a
196
- // run the same way a missed skill invocation does: a subagent that edited the
197
- // real repo or installed packages is no longer a clean data point.
198
- const strayPath = join(iterationDir, "stray-writes.json");
199
- if (existsSync(strayPath)) {
200
- try {
201
- const stray = JSON.parse(readFileSync(strayPath, "utf8")) as {
202
- runs?: Array<{
203
- eval_id: string;
204
- condition: string;
205
- violations?: unknown[];
206
- live_source_reads?: unknown[];
207
- }>;
208
- };
209
- for (const r of stray.runs ?? []) {
210
- const n = r.violations?.length ?? 0;
211
- if (n > 0)
212
- validityWarnings.push(
213
- `${r.eval_id}/${r.condition} wrote ${n} file(s) outside its outputs dir — data point may be tainted (see stray-writes.json).`,
214
- );
215
- const reads = r.live_source_reads?.length ?? 0;
216
- if (reads > 0)
217
- validityWarnings.push(
218
- `${r.eval_id}/${r.condition} read the live skill source ${reads} time(s) instead of its staged copy — the arm may be contaminated (staged-slug resolution race; see stray-writes.json).`,
219
- );
220
- }
221
- } catch {
222
- // ignore a malformed report rather than failing aggregation
223
- }
224
- }
225
-
226
- // Plugin-shadow findings (from the runner's build-time preflight, Claude Code)
227
- // taint a run the same way a missed invocation does: a staged skill also served
228
- // by an enabled plugin means subagents could discover both copies, so the
229
- // with/without comparison may not reflect the staged skill alone.
230
- const shadowPath = join(iterationDir, "plugin-shadow.json");
231
- if (existsSync(shadowPath)) {
232
- try {
233
- const report = JSON.parse(
234
- readFileSync(shadowPath, "utf8"),
235
- ) as PluginShadowReport;
236
- for (const w of shadowValidityWarnings(report)) validityWarnings.push(w);
237
- } catch {
238
- // ignore a malformed report rather than failing aggregation
239
- }
240
- }
241
-
242
- const benchmark = {
243
- generated: new Date().toISOString(),
244
- mode: conditions.mode,
245
- baseline: conditions.baseline,
246
- conditions_compared: [a, b],
247
- missing_gradings: missingGradings,
248
- validity_warnings: validityWarnings,
249
- run_summary: runSummary,
250
- delta,
251
- };
252
-
253
- const outPath = join(iterationDir, "benchmark.json");
254
- writeFileSync(outPath, `${JSON.stringify(benchmark, null, 2)}\n`);
255
- console.log(`Wrote ${outPath}`);
256
- if (missingGradings > 0)
257
- console.warn(
258
- `note: ${missingGradings} grading.json file(s) were missing — benchmark is incomplete.`,
259
- );
260
- for (const warning of validityWarnings) console.warn(`⚠ ${warning}`);
261
- if (validityWarnings.length === 0) {
262
- for (const cond of conditionNames) {
263
- const s = runSummary[cond];
264
- if (s.skill_invocation_rate === 1)
265
- console.log(
266
- `✓ ${cond}: skill invocation rate 100% (${s.skill_invocation_n} runs) — substantive results are valid.`,
267
- );
268
- }
269
- }
@@ -1,181 +0,0 @@
1
- import { afterAll, beforeAll, describe, expect, test } from "bun:test";
2
- import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
3
- import { tmpdir } from "node:os";
4
- import { join, resolve } from "node:path";
5
- import { detectRunContext } from "./context";
6
-
7
- const FIXTURE_ROOT = join(tmpdir(), `slow-powers-context-test-${process.pid}`);
8
-
9
- function fixturePath(name: string): string {
10
- return join(FIXTURE_ROOT, name);
11
- }
12
-
13
- function makeSkillDir(root: string, skills: string[]): string {
14
- const dir = join(root, "skill-dir");
15
- mkdirSync(dir, { recursive: true });
16
- for (const name of skills) {
17
- const sub = join(dir, name);
18
- mkdirSync(sub, { recursive: true });
19
- writeFileSync(
20
- join(sub, "SKILL.md"),
21
- `---\nname: ${name}\ndescription: ${name} skill\n---\n\nbody\n`,
22
- );
23
- }
24
- return dir;
25
- }
26
-
27
- beforeAll(() => {
28
- mkdirSync(FIXTURE_ROOT, { recursive: true });
29
- });
30
-
31
- afterAll(() => {
32
- rmSync(FIXTURE_ROOT, { recursive: true, force: true });
33
- });
34
-
35
- describe("detectRunContext", () => {
36
- test("dies when --skill-dir is missing", () => {
37
- expect(() => detectRunContext(["--skill", "foo"])).toThrow(/--skill-dir/);
38
- });
39
-
40
- test("dies when --skill is missing", () => {
41
- const root = fixturePath("missing-skill");
42
- const skillDir = makeSkillDir(root, ["foo"]);
43
- expect(() => detectRunContext(["--skill-dir", skillDir])).toThrow(
44
- /--skill/,
45
- );
46
- });
47
-
48
- test("dies when --skill-dir is not a directory", () => {
49
- expect(() =>
50
- detectRunContext([
51
- "--skill-dir",
52
- "/nonexistent/does-not-exist-12345",
53
- "--skill",
54
- "foo",
55
- ]),
56
- ).toThrow(/--skill-dir/);
57
- });
58
-
59
- test("dies when skill subdir does not exist", () => {
60
- const root = fixturePath("missing-subdir");
61
- const skillDir = makeSkillDir(root, ["foo"]);
62
- expect(() =>
63
- detectRunContext(["--skill-dir", skillDir, "--skill", "bar"]),
64
- ).toThrow(/skill not found/);
65
- });
66
-
67
- test("dies when --bootstrap path is passed but file does not exist", () => {
68
- const root = fixturePath("bad-bootstrap");
69
- const skillDir = makeSkillDir(root, ["foo"]);
70
- expect(() =>
71
- detectRunContext([
72
- "--skill-dir",
73
- skillDir,
74
- "--skill",
75
- "foo",
76
- "--bootstrap",
77
- "/nonexistent/no-bootstrap-12345.md",
78
- ]),
79
- ).toThrow(/--bootstrap/);
80
- });
81
-
82
- test("returns RunContext with absolute paths when --skill-dir and --skill are valid", () => {
83
- const root = fixturePath("happy-path");
84
- const skillDir = makeSkillDir(root, ["mr-review"]);
85
- const ctx = detectRunContext([
86
- "--skill-dir",
87
- skillDir,
88
- "--skill",
89
- "mr-review",
90
- ]);
91
- expect(ctx.skillDir).toBe(resolve(skillDir));
92
- expect(ctx.skillName).toBe("mr-review");
93
- expect(ctx.skillSubdir).toBe(resolve(skillDir, "mr-review"));
94
- expect(ctx.siblingSkillNames).toEqual([]);
95
- expect(ctx.bootstrapPath).toBeNull();
96
- expect(ctx.harness).toBe("claude-code");
97
- });
98
-
99
- test("enumerates siblings excluding the skill-under-test", () => {
100
- const root = fixturePath("siblings");
101
- const skillDir = makeSkillDir(root, ["alpha", "beta", "gamma"]);
102
- const ctx = detectRunContext(["--skill-dir", skillDir, "--skill", "beta"]);
103
- expect(ctx.siblingSkillNames.sort()).toEqual(["alpha", "gamma"]);
104
- });
105
-
106
- test("ignores entries in --skill-dir that do not have a SKILL.md", () => {
107
- const root = fixturePath("not-skills");
108
- const skillDir = makeSkillDir(root, ["real"]);
109
- mkdirSync(join(skillDir, "node_modules"), { recursive: true });
110
- mkdirSync(join(skillDir, "no-skill-md-here"), { recursive: true });
111
- writeFileSync(join(skillDir, "loose-file.txt"), "hello");
112
- const ctx = detectRunContext(["--skill-dir", skillDir, "--skill", "real"]);
113
- expect(ctx.siblingSkillNames).toEqual([]);
114
- });
115
-
116
- test("workspaceRoot defaults to <CWD>/skills-workspace when --workspace-dir is omitted", () => {
117
- const root = fixturePath("workspace-default");
118
- const skillDir = makeSkillDir(root, ["foo"]);
119
- const ctx = detectRunContext(["--skill-dir", skillDir, "--skill", "foo"]);
120
- expect(ctx.workspaceRoot).toBe(resolve(process.cwd(), "skills-workspace"));
121
- });
122
-
123
- test("workspaceRoot honors --workspace-dir override (resolved absolute)", () => {
124
- const root = fixturePath("workspace-override");
125
- const skillDir = makeSkillDir(root, ["foo"]);
126
- const customWs = join(root, "custom-ws");
127
- mkdirSync(customWs, { recursive: true });
128
- const ctx = detectRunContext([
129
- "--skill-dir",
130
- skillDir,
131
- "--skill",
132
- "foo",
133
- "--workspace-dir",
134
- customWs,
135
- ]);
136
- expect(ctx.workspaceRoot).toBe(resolve(customWs));
137
- });
138
-
139
- test("stageRoot defaults to CWD", () => {
140
- const root = fixturePath("stage-default");
141
- const skillDir = makeSkillDir(root, ["foo"]);
142
- const ctx = detectRunContext(["--skill-dir", skillDir, "--skill", "foo"]);
143
- expect(ctx.stageRoot).toBe(resolve(process.cwd()));
144
- });
145
-
146
- test("--bootstrap path is resolved absolute when file exists", () => {
147
- const root = fixturePath("bootstrap-ok");
148
- const skillDir = makeSkillDir(root, ["foo"]);
149
- const bootstrapPath = join(root, "my-bootstrap.md");
150
- writeFileSync(bootstrapPath, "BOOT");
151
- const ctx = detectRunContext([
152
- "--skill-dir",
153
- skillDir,
154
- "--skill",
155
- "foo",
156
- "--bootstrap",
157
- bootstrapPath,
158
- ]);
159
- expect(ctx.bootstrapPath).toBe(resolve(bootstrapPath));
160
- });
161
-
162
- test("unknown --harness value is rejected", () => {
163
- const root = fixturePath("harness-bad");
164
- const skillDir = makeSkillDir(root, ["foo"]);
165
- expect(() =>
166
- detectRunContext([
167
- "--skill-dir",
168
- skillDir,
169
- "--skill",
170
- "foo",
171
- "--harness",
172
- "vscode",
173
- ]),
174
- ).toThrow(/harness/);
175
- });
176
- });
177
-
178
- // Sanity: ensure existsSync helper from node:fs is what we expect
179
- test.skip("smoke: existsSync points at node:fs", () => {
180
- expect(typeof existsSync).toBe("function");
181
- });
@@ -1,90 +0,0 @@
1
- import { existsSync, readdirSync, statSync } from "node:fs";
2
- import { resolve } from "node:path";
3
-
4
- export type Harness = "claude-code";
5
-
6
- export type RunContext = {
7
- skillDir: string;
8
- skillName: string;
9
- skillSubdir: string;
10
- siblingSkillNames: string[];
11
- workspaceRoot: string;
12
- stageRoot: string;
13
- bootstrapPath: string | null;
14
- harness: Harness;
15
- };
16
-
17
- function die(msg: string): never {
18
- throw new Error(msg);
19
- }
20
-
21
- function flag(argv: string[], name: string): string | undefined {
22
- const i = argv.indexOf(`--${name}`);
23
- if (i === -1) return undefined;
24
- const v = argv[i + 1];
25
- if (v === undefined || v.startsWith("--")) {
26
- die(`flag --${name} requires a value`);
27
- }
28
- return v;
29
- }
30
-
31
- export function detectRunContext(argv: string[]): RunContext {
32
- const skillDirRaw = flag(argv, "skill-dir");
33
- if (!skillDirRaw) die("missing required flag --skill-dir <path>");
34
- const skillDir = resolve(skillDirRaw);
35
- if (!existsSync(skillDir) || !statSync(skillDir).isDirectory()) {
36
- die(`--skill-dir is not a directory: ${skillDir}`);
37
- }
38
-
39
- const skillName = flag(argv, "skill");
40
- if (!skillName) die("missing required flag --skill <name>");
41
-
42
- const skillSubdir = resolve(skillDir, skillName);
43
- const skillMd = resolve(skillSubdir, "SKILL.md");
44
- if (!existsSync(skillMd)) {
45
- die(`skill not found: ${skillMd}`);
46
- }
47
-
48
- const bootstrapRaw = flag(argv, "bootstrap");
49
- let bootstrapPath: string | null = null;
50
- if (bootstrapRaw) {
51
- const resolved = resolve(bootstrapRaw);
52
- if (!existsSync(resolved)) {
53
- die(`--bootstrap file not found: ${resolved}`);
54
- }
55
- bootstrapPath = resolved;
56
- }
57
-
58
- const workspaceRaw = flag(argv, "workspace-dir");
59
- const workspaceRoot = workspaceRaw
60
- ? resolve(workspaceRaw)
61
- : resolve(process.cwd(), "skills-workspace");
62
-
63
- const stageRoot = resolve(process.cwd());
64
-
65
- const harnessRaw = flag(argv, "harness") ?? "claude-code";
66
- if (harnessRaw !== "claude-code") {
67
- die(`unknown --harness: ${harnessRaw}. Supported: claude-code`);
68
- }
69
- const harness = harnessRaw as Harness;
70
-
71
- const siblingSkillNames: string[] = [];
72
- for (const entry of readdirSync(skillDir)) {
73
- if (entry === skillName) continue;
74
- const sub = resolve(skillDir, entry);
75
- if (!statSync(sub).isDirectory()) continue;
76
- if (!existsSync(resolve(sub, "SKILL.md"))) continue;
77
- siblingSkillNames.push(entry);
78
- }
79
-
80
- return {
81
- skillDir,
82
- skillName,
83
- skillSubdir,
84
- siblingSkillNames,
85
- workspaceRoot,
86
- stageRoot,
87
- bootstrapPath,
88
- harness,
89
- };
90
- }