@slowdini/slow-powers-opencode 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +34 -72
  2. package/bootstrap.md +1 -7
  3. package/opencode/plugins/slow-powers.js +1 -1
  4. package/package.json +14 -17
  5. package/skills/evaluating-skills/SKILL.md +90 -338
  6. package/skills/evaluating-skills/evals/baseline/BASELINE.md +23 -0
  7. package/skills/evaluating-skills/evals/baseline/NOTES.md +40 -0
  8. package/skills/evaluating-skills/evals/baseline/benchmark.json +54 -0
  9. package/skills/evaluating-skills/evals/baseline/grading/deterministic-edit-skip__new_skill.json +39 -0
  10. package/skills/evaluating-skills/evals/baseline/grading/deterministic-edit-skip__old_skill.json +39 -0
  11. package/skills/evaluating-skills/evals/baseline/grading/did-my-revision-help__new_skill.json +39 -0
  12. package/skills/evaluating-skills/evals/baseline/grading/did-my-revision-help__old_skill.json +39 -0
  13. package/skills/evaluating-skills/evals/baseline/grading/is-new-skill-ready-to-ship__new_skill.json +32 -0
  14. package/skills/evaluating-skills/evals/baseline/grading/is-new-skill-ready-to-ship__old_skill.json +32 -0
  15. package/skills/test-driven-development/evals/baseline/NOTES.md +2 -2
  16. package/skills/evaluating-skills/examples/verifying-development-work-evals.json +0 -30
  17. package/skills/evaluating-skills/harness-details/claude.md +0 -194
  18. package/skills/evaluating-skills/harness-parity.md +0 -155
  19. package/skills/evaluating-skills/runner/README.md +0 -163
  20. package/skills/evaluating-skills/runner/adapters/claude-code-session.test.ts +0 -56
  21. package/skills/evaluating-skills/runner/adapters/claude-code-session.ts +0 -43
  22. package/skills/evaluating-skills/runner/adapters/claude-code-transcript.test.ts +0 -485
  23. package/skills/evaluating-skills/runner/adapters/claude-code-transcript.ts +0 -242
  24. package/skills/evaluating-skills/runner/aggregate.test.ts +0 -484
  25. package/skills/evaluating-skills/runner/aggregate.ts +0 -269
  26. package/skills/evaluating-skills/runner/context.test.ts +0 -181
  27. package/skills/evaluating-skills/runner/context.ts +0 -90
  28. package/skills/evaluating-skills/runner/detect-stray-writes.test.ts +0 -396
  29. package/skills/evaluating-skills/runner/detect-stray-writes.ts +0 -288
  30. package/skills/evaluating-skills/runner/fill-transcripts.test.ts +0 -73
  31. package/skills/evaluating-skills/runner/fill-transcripts.ts +0 -154
  32. package/skills/evaluating-skills/runner/grade.test.ts +0 -347
  33. package/skills/evaluating-skills/runner/grade.ts +0 -603
  34. package/skills/evaluating-skills/runner/guard/guard.ts +0 -49
  35. package/skills/evaluating-skills/runner/guard/install.test.ts +0 -92
  36. package/skills/evaluating-skills/runner/guard/install.ts +0 -147
  37. package/skills/evaluating-skills/runner/guard/policy.test.ts +0 -128
  38. package/skills/evaluating-skills/runner/guard/policy.ts +0 -74
  39. package/skills/evaluating-skills/runner/plugin-shadow.test.ts +0 -228
  40. package/skills/evaluating-skills/runner/plugin-shadow.ts +0 -201
  41. package/skills/evaluating-skills/runner/profiles/claude-code/plan-mode.md +0 -11
  42. package/skills/evaluating-skills/runner/promote-baseline.test.ts +0 -281
  43. package/skills/evaluating-skills/runner/promote-baseline.ts +0 -204
  44. package/skills/evaluating-skills/runner/record-runs.test.ts +0 -314
  45. package/skills/evaluating-skills/runner/record-runs.ts +0 -209
  46. package/skills/evaluating-skills/runner/run.test.ts +0 -1703
  47. package/skills/evaluating-skills/runner/run.ts +0 -1388
  48. package/skills/evaluating-skills/runner/sandbox-policy.ts +0 -94
  49. package/skills/evaluating-skills/runner/types.ts +0 -121
  50. package/skills/evaluating-skills/runner/validate-all.ts +0 -54
  51. package/skills/evaluating-skills/runner/validate-schema.test.ts +0 -99
  52. package/skills/evaluating-skills/runner/validate-schema.ts +0 -51
  53. package/skills/evaluating-skills/runner/validate.test.ts +0 -56
  54. package/skills/evaluating-skills/runner/validate.ts +0 -21
  55. package/skills/evaluating-skills/runner/workspace-teardown.test.ts +0 -227
  56. package/skills/evaluating-skills/runner/workspace-teardown.ts +0 -136
  57. package/skills/evaluating-skills/schema/evals.schema.json +0 -105
  58. package/skills/evaluating-skills/schema/grading.schema.json +0 -84
  59. package/skills/evaluating-skills/schema/run-record.schema.json +0 -80
  60. package/skills/evaluating-skills/schema/stray-writes.schema.json +0 -80
  61. package/skills/evaluating-skills/templates/eval-task-prompt.md +0 -69
  62. package/skills/evaluating-skills/templates/evals.json.example +0 -17
  63. package/skills/evaluating-skills/templates/judge-prompt.md +0 -56
  64. package/skills/evaluating-skills/templates/revise-skill-prompt.md +0 -56
@@ -1,269 +0,0 @@
1
- #!/usr/bin/env bun
2
- import { existsSync, readdirSync, readFileSync, writeFileSync } from "node:fs";
3
- import { join } from "node:path";
4
- import { detectRunContext } from "./context";
5
- import {
6
- type PluginShadowReport,
7
- shadowValidityWarnings,
8
- } from "./plugin-shadow";
9
- import type { ConditionsRecord, GradingResult, TimingRecord } from "./types";
10
-
11
- function die(msg: string): never {
12
- console.error(`error: ${msg}`);
13
- process.exit(1);
14
- }
15
-
16
- function parseArgs(argv: string[]) {
17
- const flag = (name: string): string | undefined => {
18
- const i = argv.indexOf(`--${name}`);
19
- if (i === -1) return undefined;
20
- return argv[i + 1];
21
- };
22
- const iteration = flag("iteration");
23
- if (!iteration) die("missing --iteration");
24
- return { iteration };
25
- }
26
-
27
- type Series = number[];
28
-
29
- function mean(values: Series): number {
30
- if (values.length === 0) return 0;
31
- return values.reduce((a, b) => a + b, 0) / values.length;
32
- }
33
-
34
- function stddev(values: Series, m = mean(values)): number {
35
- if (values.length < 2) return 0;
36
- const v = values.reduce((s, x) => s + (x - m) ** 2, 0) / values.length;
37
- return Math.sqrt(v);
38
- }
39
-
40
- function round(n: number, dp: number): number {
41
- const p = 10 ** dp;
42
- return Math.round(n * p) / p;
43
- }
44
-
45
- function stats(values: Series, dp: number) {
46
- const m = mean(values);
47
- return {
48
- mean: round(m, dp),
49
- stddev: round(stddev(values, m), dp),
50
- n: values.length,
51
- };
52
- }
53
-
54
- const aggArgv = Bun.argv.slice(2);
55
- const { iteration } = parseArgs(aggArgv);
56
- const aggCtx = detectRunContext(aggArgv);
57
- const iterationDir = join(
58
- aggCtx.workspaceRoot,
59
- aggCtx.skillName,
60
- `iteration-${iteration}`,
61
- );
62
- if (!existsSync(iterationDir)) die(`not found: ${iterationDir}`);
63
-
64
- const conditionsPath = join(iterationDir, "conditions.json");
65
- if (!existsSync(conditionsPath)) die(`missing: ${conditionsPath}`);
66
- const conditions: ConditionsRecord = JSON.parse(
67
- readFileSync(conditionsPath, "utf8"),
68
- );
69
- const conditionNames = conditions.conditions.map((c) => c.name);
70
- if (conditionNames.length !== 2)
71
- die(`expected exactly 2 conditions, got ${conditionNames.length}`);
72
-
73
- const evalDirs = readdirSync(iterationDir).filter((d) => d.startsWith("eval-"));
74
- if (evalDirs.length === 0) die("no eval directories found");
75
-
76
- type Bucket = {
77
- passRates: Series;
78
- durations: Series;
79
- tokens: Series;
80
- skillInvoked: boolean[];
81
- hadSkillLoaded: boolean;
82
- };
83
- const byCondition: Record<string, Bucket> = {};
84
- const conditionSkillPaths = new Map<string, string | null>();
85
- for (const c of conditions.conditions) {
86
- conditionSkillPaths.set(c.name, c.skill_path);
87
- byCondition[c.name] = {
88
- passRates: [],
89
- durations: [],
90
- tokens: [],
91
- skillInvoked: [],
92
- hadSkillLoaded: !!c.skill_path,
93
- };
94
- }
95
-
96
- let missingGradings = 0;
97
- // Timing provenance across all runs in the comparison. "completion-event"
98
- // (the agent-captured default, also assumed when `source` is absent) and
99
- // "transcript" (record-runs backfill, includes cache accounting) measure
100
- // different things — a delta mixing them is comparing two metrics.
101
- const timingSources = new Set<string>();
102
- for (const evalDir of evalDirs) {
103
- for (const cond of conditionNames) {
104
- const condDir = join(iterationDir, evalDir, cond);
105
- const gradingPath = join(condDir, "grading.json");
106
- const timingPath = join(condDir, "timing.json");
107
- if (!existsSync(gradingPath)) {
108
- console.warn(`warn: missing grading for ${evalDir}/${cond}`);
109
- missingGradings++;
110
- continue;
111
- }
112
- const grading: GradingResult = JSON.parse(
113
- readFileSync(gradingPath, "utf8"),
114
- );
115
- byCondition[cond].passRates.push(grading.summary.pass_rate);
116
- if (grading.meta_summary?.skill_invoked != null)
117
- byCondition[cond].skillInvoked.push(grading.meta_summary.skill_invoked);
118
- if (existsSync(timingPath)) {
119
- const timing: TimingRecord = JSON.parse(readFileSync(timingPath, "utf8"));
120
- if (typeof timing.total_tokens === "number")
121
- byCondition[cond].tokens.push(timing.total_tokens);
122
- if (typeof timing.duration_ms === "number")
123
- byCondition[cond].durations.push(timing.duration_ms);
124
- if (
125
- typeof timing.total_tokens === "number" ||
126
- typeof timing.duration_ms === "number"
127
- )
128
- timingSources.add(timing.source ?? "completion-event");
129
- }
130
- }
131
- }
132
-
133
- type ConditionSummary = {
134
- pass_rate: ReturnType<typeof stats>;
135
- duration_ms: ReturnType<typeof stats>;
136
- total_tokens: ReturnType<typeof stats>;
137
- skill_invocation_rate?: number | null;
138
- skill_invocation_n?: number;
139
- };
140
-
141
- const runSummary: Record<string, ConditionSummary> = {};
142
- for (const cond of conditionNames) {
143
- const bucket = byCondition[cond];
144
- const summary: ConditionSummary = {
145
- pass_rate: stats(bucket.passRates, 3),
146
- duration_ms: stats(bucket.durations, 0),
147
- total_tokens: stats(bucket.tokens, 0),
148
- };
149
- if (bucket.hadSkillLoaded) {
150
- summary.skill_invocation_n = bucket.skillInvoked.length;
151
- summary.skill_invocation_rate =
152
- bucket.skillInvoked.length === 0
153
- ? null
154
- : round(
155
- bucket.skillInvoked.filter(Boolean).length /
156
- bucket.skillInvoked.length,
157
- 3,
158
- );
159
- }
160
- runSummary[cond] = summary;
161
- }
162
-
163
- const [a, b] = conditionNames;
164
- const delta = {
165
- direction: `${a} - ${b}`,
166
- pass_rate: round(
167
- runSummary[a].pass_rate.mean - runSummary[b].pass_rate.mean,
168
- 3,
169
- ),
170
- duration_ms: round(
171
- runSummary[a].duration_ms.mean - runSummary[b].duration_ms.mean,
172
- 0,
173
- ),
174
- total_tokens: round(
175
- runSummary[a].total_tokens.mean - runSummary[b].total_tokens.mean,
176
- 0,
177
- ),
178
- };
179
-
180
- const validityWarnings: string[] = [];
181
- if (timingSources.size > 1) {
182
- validityWarnings.push(
183
- `runs mix timing sources (${[...timingSources].sort().join(", ")}) — transcript-derived totals include cache accounting, so the token/duration delta compares two different metrics. Re-record one side or read the delta as a rough signal only.`,
184
- );
185
- }
186
- for (const cond of conditionNames) {
187
- const s = runSummary[cond];
188
- if (s.skill_invocation_rate != null && s.skill_invocation_rate < 1) {
189
- validityWarnings.push(
190
- `condition '${cond}' had skill loaded but invocation rate ${(s.skill_invocation_rate * 100).toFixed(0)}% (${s.skill_invocation_n} runs checked) — substantive results may not reflect skill effectiveness.`,
191
- );
192
- }
193
- }
194
-
195
- // Stray-write findings (from `evals:detect-stray-writes`, if it ran) taint a
196
- // run the same way a missed skill invocation does: a subagent that edited the
197
- // real repo or installed packages is no longer a clean data point.
198
- const strayPath = join(iterationDir, "stray-writes.json");
199
- if (existsSync(strayPath)) {
200
- try {
201
- const stray = JSON.parse(readFileSync(strayPath, "utf8")) as {
202
- runs?: Array<{
203
- eval_id: string;
204
- condition: string;
205
- violations?: unknown[];
206
- live_source_reads?: unknown[];
207
- }>;
208
- };
209
- for (const r of stray.runs ?? []) {
210
- const n = r.violations?.length ?? 0;
211
- if (n > 0)
212
- validityWarnings.push(
213
- `${r.eval_id}/${r.condition} wrote ${n} file(s) outside its outputs dir — data point may be tainted (see stray-writes.json).`,
214
- );
215
- const reads = r.live_source_reads?.length ?? 0;
216
- if (reads > 0)
217
- validityWarnings.push(
218
- `${r.eval_id}/${r.condition} read the live skill source ${reads} time(s) instead of its staged copy — the arm may be contaminated (staged-slug resolution race; see stray-writes.json).`,
219
- );
220
- }
221
- } catch {
222
- // ignore a malformed report rather than failing aggregation
223
- }
224
- }
225
-
226
- // Plugin-shadow findings (from the runner's build-time preflight, Claude Code)
227
- // taint a run the same way a missed invocation does: a staged skill also served
228
- // by an enabled plugin means subagents could discover both copies, so the
229
- // with/without comparison may not reflect the staged skill alone.
230
- const shadowPath = join(iterationDir, "plugin-shadow.json");
231
- if (existsSync(shadowPath)) {
232
- try {
233
- const report = JSON.parse(
234
- readFileSync(shadowPath, "utf8"),
235
- ) as PluginShadowReport;
236
- for (const w of shadowValidityWarnings(report)) validityWarnings.push(w);
237
- } catch {
238
- // ignore a malformed report rather than failing aggregation
239
- }
240
- }
241
-
242
- const benchmark = {
243
- generated: new Date().toISOString(),
244
- mode: conditions.mode,
245
- baseline: conditions.baseline,
246
- conditions_compared: [a, b],
247
- missing_gradings: missingGradings,
248
- validity_warnings: validityWarnings,
249
- run_summary: runSummary,
250
- delta,
251
- };
252
-
253
- const outPath = join(iterationDir, "benchmark.json");
254
- writeFileSync(outPath, `${JSON.stringify(benchmark, null, 2)}\n`);
255
- console.log(`Wrote ${outPath}`);
256
- if (missingGradings > 0)
257
- console.warn(
258
- `note: ${missingGradings} grading.json file(s) were missing — benchmark is incomplete.`,
259
- );
260
- for (const warning of validityWarnings) console.warn(`⚠ ${warning}`);
261
- if (validityWarnings.length === 0) {
262
- for (const cond of conditionNames) {
263
- const s = runSummary[cond];
264
- if (s.skill_invocation_rate === 1)
265
- console.log(
266
- `✓ ${cond}: skill invocation rate 100% (${s.skill_invocation_n} runs) — substantive results are valid.`,
267
- );
268
- }
269
- }
@@ -1,181 +0,0 @@
1
- import { afterAll, beforeAll, describe, expect, test } from "bun:test";
2
- import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
3
- import { tmpdir } from "node:os";
4
- import { join, resolve } from "node:path";
5
- import { detectRunContext } from "./context";
6
-
7
- const FIXTURE_ROOT = join(tmpdir(), `slow-powers-context-test-${process.pid}`);
8
-
9
- function fixturePath(name: string): string {
10
- return join(FIXTURE_ROOT, name);
11
- }
12
-
13
- function makeSkillDir(root: string, skills: string[]): string {
14
- const dir = join(root, "skill-dir");
15
- mkdirSync(dir, { recursive: true });
16
- for (const name of skills) {
17
- const sub = join(dir, name);
18
- mkdirSync(sub, { recursive: true });
19
- writeFileSync(
20
- join(sub, "SKILL.md"),
21
- `---\nname: ${name}\ndescription: ${name} skill\n---\n\nbody\n`,
22
- );
23
- }
24
- return dir;
25
- }
26
-
27
- beforeAll(() => {
28
- mkdirSync(FIXTURE_ROOT, { recursive: true });
29
- });
30
-
31
- afterAll(() => {
32
- rmSync(FIXTURE_ROOT, { recursive: true, force: true });
33
- });
34
-
35
- describe("detectRunContext", () => {
36
- test("dies when --skill-dir is missing", () => {
37
- expect(() => detectRunContext(["--skill", "foo"])).toThrow(/--skill-dir/);
38
- });
39
-
40
- test("dies when --skill is missing", () => {
41
- const root = fixturePath("missing-skill");
42
- const skillDir = makeSkillDir(root, ["foo"]);
43
- expect(() => detectRunContext(["--skill-dir", skillDir])).toThrow(
44
- /--skill/,
45
- );
46
- });
47
-
48
- test("dies when --skill-dir is not a directory", () => {
49
- expect(() =>
50
- detectRunContext([
51
- "--skill-dir",
52
- "/nonexistent/does-not-exist-12345",
53
- "--skill",
54
- "foo",
55
- ]),
56
- ).toThrow(/--skill-dir/);
57
- });
58
-
59
- test("dies when skill subdir does not exist", () => {
60
- const root = fixturePath("missing-subdir");
61
- const skillDir = makeSkillDir(root, ["foo"]);
62
- expect(() =>
63
- detectRunContext(["--skill-dir", skillDir, "--skill", "bar"]),
64
- ).toThrow(/skill not found/);
65
- });
66
-
67
- test("dies when --bootstrap path is passed but file does not exist", () => {
68
- const root = fixturePath("bad-bootstrap");
69
- const skillDir = makeSkillDir(root, ["foo"]);
70
- expect(() =>
71
- detectRunContext([
72
- "--skill-dir",
73
- skillDir,
74
- "--skill",
75
- "foo",
76
- "--bootstrap",
77
- "/nonexistent/no-bootstrap-12345.md",
78
- ]),
79
- ).toThrow(/--bootstrap/);
80
- });
81
-
82
- test("returns RunContext with absolute paths when --skill-dir and --skill are valid", () => {
83
- const root = fixturePath("happy-path");
84
- const skillDir = makeSkillDir(root, ["mr-review"]);
85
- const ctx = detectRunContext([
86
- "--skill-dir",
87
- skillDir,
88
- "--skill",
89
- "mr-review",
90
- ]);
91
- expect(ctx.skillDir).toBe(resolve(skillDir));
92
- expect(ctx.skillName).toBe("mr-review");
93
- expect(ctx.skillSubdir).toBe(resolve(skillDir, "mr-review"));
94
- expect(ctx.siblingSkillNames).toEqual([]);
95
- expect(ctx.bootstrapPath).toBeNull();
96
- expect(ctx.harness).toBe("claude-code");
97
- });
98
-
99
- test("enumerates siblings excluding the skill-under-test", () => {
100
- const root = fixturePath("siblings");
101
- const skillDir = makeSkillDir(root, ["alpha", "beta", "gamma"]);
102
- const ctx = detectRunContext(["--skill-dir", skillDir, "--skill", "beta"]);
103
- expect(ctx.siblingSkillNames.sort()).toEqual(["alpha", "gamma"]);
104
- });
105
-
106
- test("ignores entries in --skill-dir that do not have a SKILL.md", () => {
107
- const root = fixturePath("not-skills");
108
- const skillDir = makeSkillDir(root, ["real"]);
109
- mkdirSync(join(skillDir, "node_modules"), { recursive: true });
110
- mkdirSync(join(skillDir, "no-skill-md-here"), { recursive: true });
111
- writeFileSync(join(skillDir, "loose-file.txt"), "hello");
112
- const ctx = detectRunContext(["--skill-dir", skillDir, "--skill", "real"]);
113
- expect(ctx.siblingSkillNames).toEqual([]);
114
- });
115
-
116
- test("workspaceRoot defaults to <CWD>/skills-workspace when --workspace-dir is omitted", () => {
117
- const root = fixturePath("workspace-default");
118
- const skillDir = makeSkillDir(root, ["foo"]);
119
- const ctx = detectRunContext(["--skill-dir", skillDir, "--skill", "foo"]);
120
- expect(ctx.workspaceRoot).toBe(resolve(process.cwd(), "skills-workspace"));
121
- });
122
-
123
- test("workspaceRoot honors --workspace-dir override (resolved absolute)", () => {
124
- const root = fixturePath("workspace-override");
125
- const skillDir = makeSkillDir(root, ["foo"]);
126
- const customWs = join(root, "custom-ws");
127
- mkdirSync(customWs, { recursive: true });
128
- const ctx = detectRunContext([
129
- "--skill-dir",
130
- skillDir,
131
- "--skill",
132
- "foo",
133
- "--workspace-dir",
134
- customWs,
135
- ]);
136
- expect(ctx.workspaceRoot).toBe(resolve(customWs));
137
- });
138
-
139
- test("stageRoot defaults to CWD", () => {
140
- const root = fixturePath("stage-default");
141
- const skillDir = makeSkillDir(root, ["foo"]);
142
- const ctx = detectRunContext(["--skill-dir", skillDir, "--skill", "foo"]);
143
- expect(ctx.stageRoot).toBe(resolve(process.cwd()));
144
- });
145
-
146
- test("--bootstrap path is resolved absolute when file exists", () => {
147
- const root = fixturePath("bootstrap-ok");
148
- const skillDir = makeSkillDir(root, ["foo"]);
149
- const bootstrapPath = join(root, "my-bootstrap.md");
150
- writeFileSync(bootstrapPath, "BOOT");
151
- const ctx = detectRunContext([
152
- "--skill-dir",
153
- skillDir,
154
- "--skill",
155
- "foo",
156
- "--bootstrap",
157
- bootstrapPath,
158
- ]);
159
- expect(ctx.bootstrapPath).toBe(resolve(bootstrapPath));
160
- });
161
-
162
- test("unknown --harness value is rejected", () => {
163
- const root = fixturePath("harness-bad");
164
- const skillDir = makeSkillDir(root, ["foo"]);
165
- expect(() =>
166
- detectRunContext([
167
- "--skill-dir",
168
- skillDir,
169
- "--skill",
170
- "foo",
171
- "--harness",
172
- "vscode",
173
- ]),
174
- ).toThrow(/harness/);
175
- });
176
- });
177
-
178
- // Sanity: ensure existsSync helper from node:fs is what we expect
179
- test.skip("smoke: existsSync points at node:fs", () => {
180
- expect(typeof existsSync).toBe("function");
181
- });
@@ -1,90 +0,0 @@
1
- import { existsSync, readdirSync, statSync } from "node:fs";
2
- import { resolve } from "node:path";
3
-
4
- export type Harness = "claude-code";
5
-
6
- export type RunContext = {
7
- skillDir: string;
8
- skillName: string;
9
- skillSubdir: string;
10
- siblingSkillNames: string[];
11
- workspaceRoot: string;
12
- stageRoot: string;
13
- bootstrapPath: string | null;
14
- harness: Harness;
15
- };
16
-
17
- function die(msg: string): never {
18
- throw new Error(msg);
19
- }
20
-
21
- function flag(argv: string[], name: string): string | undefined {
22
- const i = argv.indexOf(`--${name}`);
23
- if (i === -1) return undefined;
24
- const v = argv[i + 1];
25
- if (v === undefined || v.startsWith("--")) {
26
- die(`flag --${name} requires a value`);
27
- }
28
- return v;
29
- }
30
-
31
- export function detectRunContext(argv: string[]): RunContext {
32
- const skillDirRaw = flag(argv, "skill-dir");
33
- if (!skillDirRaw) die("missing required flag --skill-dir <path>");
34
- const skillDir = resolve(skillDirRaw);
35
- if (!existsSync(skillDir) || !statSync(skillDir).isDirectory()) {
36
- die(`--skill-dir is not a directory: ${skillDir}`);
37
- }
38
-
39
- const skillName = flag(argv, "skill");
40
- if (!skillName) die("missing required flag --skill <name>");
41
-
42
- const skillSubdir = resolve(skillDir, skillName);
43
- const skillMd = resolve(skillSubdir, "SKILL.md");
44
- if (!existsSync(skillMd)) {
45
- die(`skill not found: ${skillMd}`);
46
- }
47
-
48
- const bootstrapRaw = flag(argv, "bootstrap");
49
- let bootstrapPath: string | null = null;
50
- if (bootstrapRaw) {
51
- const resolved = resolve(bootstrapRaw);
52
- if (!existsSync(resolved)) {
53
- die(`--bootstrap file not found: ${resolved}`);
54
- }
55
- bootstrapPath = resolved;
56
- }
57
-
58
- const workspaceRaw = flag(argv, "workspace-dir");
59
- const workspaceRoot = workspaceRaw
60
- ? resolve(workspaceRaw)
61
- : resolve(process.cwd(), "skills-workspace");
62
-
63
- const stageRoot = resolve(process.cwd());
64
-
65
- const harnessRaw = flag(argv, "harness") ?? "claude-code";
66
- if (harnessRaw !== "claude-code") {
67
- die(`unknown --harness: ${harnessRaw}. Supported: claude-code`);
68
- }
69
- const harness = harnessRaw as Harness;
70
-
71
- const siblingSkillNames: string[] = [];
72
- for (const entry of readdirSync(skillDir)) {
73
- if (entry === skillName) continue;
74
- const sub = resolve(skillDir, entry);
75
- if (!statSync(sub).isDirectory()) continue;
76
- if (!existsSync(resolve(sub, "SKILL.md"))) continue;
77
- siblingSkillNames.push(entry);
78
- }
79
-
80
- return {
81
- skillDir,
82
- skillName,
83
- skillSubdir,
84
- siblingSkillNames,
85
- workspaceRoot,
86
- stageRoot,
87
- bootstrapPath,
88
- harness,
89
- };
90
- }