@slowdini/slow-powers-opencode 0.1.1 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,11 +13,14 @@ import {
13
13
  buildDispatchTask,
14
14
  cleanupStagedSkills,
15
15
  redactSkillFromBootstrap,
16
+ registerStagedSkillForCleanup,
16
17
  STAGED_SIBLING_MANIFEST,
17
18
  STAGED_SKILL_PREFIX,
19
+ selectEvals,
18
20
  stageSiblingSkills,
19
21
  stageSkillForCC,
20
22
  } from "./run";
23
+ import type { Eval } from "./types";
21
24
 
22
25
  const FIXTURE_ROOT = join(tmpdir(), `slow-powers-run-test-${process.pid}`);
23
26
 
@@ -29,6 +32,49 @@ afterAll(() => {
29
32
  rmSync(FIXTURE_ROOT, { recursive: true, force: true });
30
33
  });
31
34
 
35
+ describe("selectEvals", () => {
36
+ const mkEvals = (...ids: string[]): Eval[] =>
37
+ ids.map((id) => ({ id, prompt: `p-${id}`, expected_output: `o-${id}` }));
38
+
39
+ test("returns the full list unchanged when neither flag is set", () => {
40
+ const evals = mkEvals("a", "b", "c");
41
+ expect(selectEvals(evals, {})).toEqual(evals);
42
+ });
43
+
44
+ test("--only keeps just the named ids, preserving config order", () => {
45
+ const evals = mkEvals("a", "b", "c");
46
+ const got = selectEvals(evals, { only: ["c", "a"] });
47
+ expect(got.map((e) => e.id)).toEqual(["a", "c"]);
48
+ });
49
+
50
+ test("--skip drops the named ids", () => {
51
+ const evals = mkEvals("a", "b", "c");
52
+ const got = selectEvals(evals, { skip: ["b"] });
53
+ expect(got.map((e) => e.id)).toEqual(["a", "c"]);
54
+ });
55
+
56
+ test("throws on an unknown id, listing the unknown and the available ids", () => {
57
+ const evals = mkEvals("a", "b");
58
+ expect(() => selectEvals(evals, { only: ["a", "nope"] })).toThrow(
59
+ /unknown eval id\(s\): nope\. Available ids: a, b/,
60
+ );
61
+ });
62
+
63
+ test("throws when both --only and --skip are given", () => {
64
+ const evals = mkEvals("a", "b");
65
+ expect(() => selectEvals(evals, { only: ["a"], skip: ["b"] })).toThrow(
66
+ /only one of --only \/ --skip/,
67
+ );
68
+ });
69
+
70
+ test("throws when a flag resolves to an empty id list", () => {
71
+ const evals = mkEvals("a", "b");
72
+ expect(() => selectEvals(evals, { only: [] })).toThrow(
73
+ /at least one eval id/,
74
+ );
75
+ });
76
+ });
77
+
32
78
  describe("stageSkillForCC", () => {
33
79
  test("writes SKILL.md to <repoRoot>/.claude/skills/<slug>/SKILL.md and returns the slug", () => {
34
80
  const repoRoot = join(FIXTURE_ROOT, "stage-basic");
@@ -74,6 +120,92 @@ describe("stageSkillForCC", () => {
74
120
  const stagedPath = join(repoRoot, ".claude", "skills", slug, "SKILL.md");
75
121
  expect(readFileSync(stagedPath, "utf8")).toBe("second");
76
122
  });
123
+
124
+ test("stageNameOverride stages under the verbatim name instead of the eval slug", () => {
125
+ const repoRoot = join(FIXTURE_ROOT, "stage-override");
126
+ mkdirSync(repoRoot, { recursive: true });
127
+ const content =
128
+ "---\nname: example\ndescription: example skill\n---\n\nbody\n";
129
+
130
+ const slug = stageSkillForCC({
131
+ content,
132
+ iteration: 2,
133
+ condition: "with_skill",
134
+ skillName: "verification-before-completion",
135
+ repoRoot,
136
+ stageNameOverride: "verification-before-completion",
137
+ });
138
+
139
+ expect(slug).toBe("verification-before-completion");
140
+ const stagedPath = join(repoRoot, ".claude", "skills", slug, "SKILL.md");
141
+ expect(existsSync(stagedPath)).toBe(true);
142
+ expect(readFileSync(stagedPath, "utf8")).toBe(content);
143
+ });
144
+ });
145
+
146
+ describe("registerStagedSkillForCleanup", () => {
147
+ test("appends the custom dir to the manifest so cleanup removes it", () => {
148
+ const root = join(FIXTURE_ROOT, "register-cleanup");
149
+ const skillsDir = join(root, ".claude", "skills");
150
+ mkdirSync(skillsDir, { recursive: true });
151
+ // A sibling manifest already exists (written by stageSiblingSkills).
152
+ writeFileSync(
153
+ join(skillsDir, STAGED_SIBLING_MANIFEST),
154
+ `${JSON.stringify(
155
+ {
156
+ created_at: "x",
157
+ staged_under_test: "verification-before-completion",
158
+ created_entries: [{ name: "sibling-a", preexisting: false }],
159
+ },
160
+ null,
161
+ 2,
162
+ )}\n`,
163
+ );
164
+ const customDir = join(skillsDir, "verification-before-completion");
165
+ mkdirSync(customDir, { recursive: true });
166
+ writeFileSync(join(customDir, "SKILL.md"), "staged");
167
+
168
+ registerStagedSkillForCleanup(root, "verification-before-completion");
169
+
170
+ const manifest = JSON.parse(
171
+ readFileSync(join(skillsDir, STAGED_SIBLING_MANIFEST), "utf8"),
172
+ ) as { created_entries: Array<{ name: string }> };
173
+ expect(manifest.created_entries.map((e) => e.name).sort()).toEqual([
174
+ "sibling-a",
175
+ "verification-before-completion",
176
+ ]);
177
+
178
+ cleanupStagedSkills(root);
179
+ expect(existsSync(customDir)).toBe(false);
180
+ });
181
+
182
+ test("is idempotent — registering the same name twice does not duplicate it", () => {
183
+ const root = join(FIXTURE_ROOT, "register-idempotent");
184
+ const skillsDir = join(root, ".claude", "skills");
185
+ mkdirSync(skillsDir, { recursive: true });
186
+ writeFileSync(
187
+ join(skillsDir, STAGED_SIBLING_MANIFEST),
188
+ `${JSON.stringify(
189
+ {
190
+ created_at: "x",
191
+ staged_under_test: "foo",
192
+ created_entries: [],
193
+ },
194
+ null,
195
+ 2,
196
+ )}\n`,
197
+ );
198
+
199
+ registerStagedSkillForCleanup(root, "foo-staged");
200
+ registerStagedSkillForCleanup(root, "foo-staged");
201
+
202
+ const manifest = JSON.parse(
203
+ readFileSync(join(skillsDir, STAGED_SIBLING_MANIFEST), "utf8"),
204
+ ) as { created_entries: Array<{ name: string }> };
205
+ expect(
206
+ manifest.created_entries.filter((e) => e.name === "foo-staged").length,
207
+ ).toBe(1);
208
+ });
77
209
  });
78
210
 
79
211
  describe("cleanupStagedSkills", () => {
@@ -302,7 +434,7 @@ describe("buildDispatchTask bootstrap injection", () => {
302
434
  expect(task.dispatch_prompt).not.toContain("<session-start-context>");
303
435
  });
304
436
 
305
- test("emits <session-start-context> with a staged-skills inventory even when bootstrapContent is null", () => {
437
+ test("emits a harness-native available-skills block (no <session-start-context>) when bootstrapContent is null", () => {
306
438
  const task = buildDispatchTask({
307
439
  ...baseOpts,
308
440
  bootstrapContent: null,
@@ -310,15 +442,20 @@ describe("buildDispatchTask bootstrap injection", () => {
310
442
  { name: "foo", path: "/x/foo/SKILL.md", description: "the foo skill" },
311
443
  ],
312
444
  });
313
- expect(task.dispatch_prompt).toContain("<session-start-context>");
314
- expect(task.dispatch_prompt).toContain("staged and discoverable");
315
- expect(task.dispatch_prompt).toContain("* `foo`");
316
- expect(task.dispatch_prompt).toContain("*Trigger:* the foo skill");
445
+ // Without a bootstrap, there is no SessionStart block — only the skills list.
446
+ expect(task.dispatch_prompt).not.toContain("<session-start-context>");
447
+ expect(task.dispatch_prompt).toContain(
448
+ "The following skills are available for use with the Skill tool:",
449
+ );
450
+ expect(task.dispatch_prompt).toContain("- foo: the foo skill");
451
+ // The eval-flavored wording and custom format are gone.
452
+ expect(task.dispatch_prompt).not.toContain("staged and discoverable");
453
+ expect(task.dispatch_prompt).not.toContain("*Trigger:*");
317
454
  // No product framing should appear without a bootstrap file.
318
455
  expect(task.dispatch_prompt).not.toContain("loaded at session start");
319
456
  });
320
457
 
321
- test("staged-skills inventory follows the verbatim bootstrap content when both are present", () => {
458
+ test("renders the available-skills block as its own section, outside <session-start-context>, after the verbatim bootstrap", () => {
322
459
  const task = buildDispatchTask({
323
460
  ...baseOpts,
324
461
  bootstrapContent: "BOOT-LOADED",
@@ -326,10 +463,18 @@ describe("buildDispatchTask bootstrap injection", () => {
326
463
  { name: "foo", path: "/x/foo/SKILL.md", description: "the foo skill" },
327
464
  ],
328
465
  });
329
- const bootIdx = task.dispatch_prompt.indexOf("BOOT-LOADED");
330
- const invIdx = task.dispatch_prompt.indexOf("staged and discoverable");
466
+ const prompt = task.dispatch_prompt;
467
+ // The skills list is a separate block, not bundled inside the SessionStart
468
+ // context (which carries bootstrap content only).
469
+ const sscEnd = prompt.indexOf("</session-start-context>");
470
+ const listIdx = prompt.indexOf(
471
+ "The following skills are available for use with the Skill tool:",
472
+ );
473
+ const bootIdx = prompt.indexOf("BOOT-LOADED");
474
+ expect(sscEnd).toBeGreaterThan(-1);
331
475
  expect(bootIdx).toBeGreaterThan(-1);
332
- expect(invIdx).toBeGreaterThan(bootIdx);
476
+ expect(bootIdx).toBeLessThan(sscEnd);
477
+ expect(listIdx).toBeGreaterThan(sscEnd);
333
478
  });
334
479
 
335
480
  test("sets dispatch_prompt_path to dispatch-prompt.txt under the condition dir", () => {
@@ -388,25 +533,41 @@ describe("buildDispatchTask bootstrap injection", () => {
388
533
  expect(withSkill.dispatch_prompt).toContain("test-driven-development");
389
534
  });
390
535
 
391
- test("references staged slug in skill block for claude-code", () => {
536
+ test("names the staged slug for disambiguation without instructing invocation", () => {
392
537
  const task = buildDispatchTask({
393
538
  ...baseOpts,
394
539
  bootstrapContent: "BOOT-LOADED",
395
540
  });
541
+ // The slug is still surfaced so a deliberate invocation targets the staged
542
+ // version and the meta-check can find it — but we no longer assert a plugin
543
+ // is "loaded" or tell the agent to prefer the slug over the bare name, which
544
+ // invited it to hunt for a global copy (issue #144 global-plugin leakage).
396
545
  expect(task.dispatch_prompt).toContain(
397
546
  "slow-powers-eval-1-with_skill__foo",
398
547
  );
548
+ // ...but the over-promoting invoke imperative (issue #119) is gone, so
549
+ // invocation reflects the skill's own triggering rather than an order.
550
+ expect(task.dispatch_prompt).not.toContain("invoke that slug");
551
+ expect(task.dispatch_prompt).not.toContain("if the skill applies");
552
+ expect(task.dispatch_prompt).not.toContain("under evaluation");
553
+ // ...and the leakage-inviting framing is gone (issue #144): no claim that a
554
+ // plugin is loaded, no "use the slug rather than the bare name" contrast.
555
+ expect(task.dispatch_prompt).not.toContain("plugin loaded");
556
+ expect(task.dispatch_prompt).not.toContain("rather than the bare name");
399
557
  });
400
558
 
401
- test("without-skill condition under realistic env reflects 'this skill removed, others available' rather than 'no skill loaded'", () => {
559
+ test("without-skill condition under realistic env carries no eval-announcing skill commentary", () => {
402
560
  const task = buildDispatchTask({
403
561
  ...baseOpts,
404
562
  skillPath: null,
405
563
  stagedSkillSlug: null,
406
564
  bootstrapContent: "BOOT-LOADED",
407
565
  });
566
+ // The arm stays silent about the absent skill: the available-skills block
567
+ // already omits it, so nothing announces that this is an eval control arm.
408
568
  expect(task.dispatch_prompt).not.toContain("No skill is loaded");
409
- expect(task.dispatch_prompt.toLowerCase()).toContain("not available");
569
+ expect(task.dispatch_prompt.toLowerCase()).not.toContain("not available");
570
+ expect(task.dispatch_prompt).not.toContain("under evaluation");
410
571
  });
411
572
 
412
573
  test("without-skill condition without bootstrap (e.g. --no-stage) keeps the legacy 'No skill is loaded' wording", () => {
@@ -420,10 +581,87 @@ describe("buildDispatchTask bootstrap injection", () => {
420
581
  });
421
582
  });
422
583
 
584
+ describe("buildDispatchTask plan-mode injection", () => {
585
+ const baseOpts = {
586
+ evalId: "e1",
587
+ condition: "with_skill",
588
+ skillPath: null,
589
+ stagedSkillSlug: "slow-powers-eval-1-with_skill__foo" as string | null,
590
+ userPrompt: "BUILD-THE-TODO-APP",
591
+ fixtures: [] as string[],
592
+ outputsDir: "/tmp/out",
593
+ condDir: "/tmp/cond",
594
+ skillName: "foo",
595
+ bootstrapContent: null as string | null,
596
+ availableSkills: [
597
+ { name: "foo", path: "/x/foo/SKILL.md", description: "the foo skill" },
598
+ ] as { name: string; path: string; description: string }[],
599
+ };
600
+
601
+ test("omits the plan-mode block when planModeContent is null/absent", () => {
602
+ const task = buildDispatchTask({ ...baseOpts });
603
+ expect(task.dispatch_prompt).not.toContain("<system-reminder>");
604
+ const withNull = buildDispatchTask({ ...baseOpts, planModeContent: null });
605
+ expect(withNull.dispatch_prompt).not.toContain("<system-reminder>");
606
+ });
607
+
608
+ test("injects the rendered plan-mode block when planModeContent is provided", () => {
609
+ const task = buildDispatchTask({
610
+ ...baseOpts,
611
+ planModeContent: "Plan mode is active. PLAN-RAIL-MARKER.",
612
+ });
613
+ expect(task.dispatch_prompt).toContain("<system-reminder>");
614
+ expect(task.dispatch_prompt).toContain("PLAN-RAIL-MARKER.");
615
+ expect(task.dispatch_prompt).toContain("</system-reminder>");
616
+ });
617
+
618
+ test("places the plan-mode block after the available-skills block and before the user request", () => {
619
+ const prompt = buildDispatchTask({
620
+ ...baseOpts,
621
+ planModeContent: "PLAN-RAIL-MARKER",
622
+ }).dispatch_prompt;
623
+ const skillsIdx = prompt.indexOf(
624
+ "The following skills are available for use with the Skill tool:",
625
+ );
626
+ const planIdx = prompt.indexOf("<system-reminder>");
627
+ const promptIdx = prompt.indexOf("BUILD-THE-TODO-APP");
628
+ expect(skillsIdx).toBeGreaterThan(-1);
629
+ expect(planIdx).toBeGreaterThan(skillsIdx);
630
+ expect(promptIdx).toBeGreaterThan(planIdx);
631
+ });
632
+
633
+ test("injects an identical plan-mode block in the with- and without-skill arms", () => {
634
+ const planModeContent = "Plan mode is active. PLAN-RAIL-MARKER.";
635
+ const rendered =
636
+ "<system-reminder>\nPlan mode is active. PLAN-RAIL-MARKER.\n</system-reminder>";
637
+ const withSkill = buildDispatchTask({
638
+ ...baseOpts,
639
+ condition: "with_skill",
640
+ stagedSkillSlug: "slow-powers-eval-1-with_skill__foo",
641
+ planModeContent,
642
+ });
643
+ const withoutSkill = buildDispatchTask({
644
+ ...baseOpts,
645
+ condition: "without_skill",
646
+ skillPath: null,
647
+ stagedSkillSlug: null,
648
+ availableSkills: [],
649
+ planModeContent,
650
+ });
651
+ expect(withSkill.dispatch_prompt).toContain(rendered);
652
+ expect(withoutSkill.dispatch_prompt).toContain(rendered);
653
+ });
654
+ });
655
+
423
656
  describe("run.ts user-mode end-to-end (--skill-dir, isolated CWD)", () => {
424
657
  const RUN_TS = join(import.meta.dir, "run.ts");
425
658
 
426
- function setup(name: string): { skillDir: string; cwd: string } {
659
+ function setup(
660
+ name: string,
661
+ evals: Eval[] = [
662
+ { id: "e1", prompt: "review this MR", expected_output: "a review" },
663
+ ],
664
+ ): { skillDir: string; cwd: string } {
427
665
  const root = join(FIXTURE_ROOT, name);
428
666
  const skillDir = join(root, "skill-dir");
429
667
  const skillSub = join(skillDir, "mr-review");
@@ -434,12 +672,7 @@ describe("run.ts user-mode end-to-end (--skill-dir, isolated CWD)", () => {
434
672
  );
435
673
  writeFileSync(
436
674
  join(skillSub, "evals", "evals.json"),
437
- JSON.stringify({
438
- skill_name: "mr-review",
439
- evals: [
440
- { id: "e1", prompt: "review this MR", expected_output: "a review" },
441
- ],
442
- }),
675
+ JSON.stringify({ skill_name: "mr-review", evals }),
443
676
  );
444
677
  const cwd = join(root, "work");
445
678
  mkdirSync(cwd, { recursive: true });
@@ -486,6 +719,168 @@ describe("run.ts user-mode end-to-end (--skill-dir, isolated CWD)", () => {
486
719
  expect(entries).toEqual(["slow-powers-eval-1-with_skill__mr-review"]);
487
720
  });
488
721
 
722
+ test("--plan-mode injects the resolved profile into every dispatch and records plan_mode in dispatch.json", () => {
723
+ const { skillDir, cwd } = setup("usermode-plan-mode");
724
+ const res = runCli(
725
+ [
726
+ "--skill-dir",
727
+ skillDir,
728
+ "--skill",
729
+ "mr-review",
730
+ "--mode",
731
+ "new-skill",
732
+ "--plan-mode",
733
+ "--dry-run",
734
+ ],
735
+ cwd,
736
+ );
737
+ expect(res.exitCode).toBe(0);
738
+
739
+ const iterationDir = join(
740
+ cwd,
741
+ "skills-workspace",
742
+ "mr-review",
743
+ "iteration-1",
744
+ );
745
+ const dispatch = JSON.parse(
746
+ readFileSync(join(iterationDir, "dispatch.json"), "utf8"),
747
+ ) as {
748
+ plan_mode: boolean;
749
+ tasks: Array<{ condition: string; dispatch_prompt_path: string }>;
750
+ };
751
+ expect(dispatch.plan_mode).toBe(true);
752
+
753
+ // Both arms carry the same harness-injected plan-mode operating context.
754
+ for (const t of dispatch.tasks) {
755
+ const prompt = readFileSync(t.dispatch_prompt_path, "utf8");
756
+ expect(prompt).toContain("<system-reminder>");
757
+ expect(prompt).toContain("Plan mode is active");
758
+ expect(prompt).toContain("ExitPlanMode");
759
+ }
760
+ });
761
+
762
+ test("without --plan-mode, dispatch.json records plan_mode:false and no plan-mode block is injected", () => {
763
+ const { skillDir, cwd } = setup("usermode-no-plan-mode");
764
+ const res = runCli(
765
+ [
766
+ "--skill-dir",
767
+ skillDir,
768
+ "--skill",
769
+ "mr-review",
770
+ "--mode",
771
+ "new-skill",
772
+ "--dry-run",
773
+ ],
774
+ cwd,
775
+ );
776
+ expect(res.exitCode).toBe(0);
777
+
778
+ const iterationDir = join(
779
+ cwd,
780
+ "skills-workspace",
781
+ "mr-review",
782
+ "iteration-1",
783
+ );
784
+ const dispatch = JSON.parse(
785
+ readFileSync(join(iterationDir, "dispatch.json"), "utf8"),
786
+ ) as {
787
+ plan_mode: boolean;
788
+ tasks: Array<{ dispatch_prompt_path: string }>;
789
+ };
790
+ expect(dispatch.plan_mode).toBe(false);
791
+ for (const t of dispatch.tasks) {
792
+ const prompt = readFileSync(t.dispatch_prompt_path, "utf8");
793
+ expect(prompt).not.toContain("<system-reminder>");
794
+ }
795
+ });
796
+
797
+ test("--stage-name stages the SUT under the verbatim name, threads it everywhere, and registers it for cleanup", () => {
798
+ const { skillDir, cwd } = setup("usermode-stage-name");
799
+ const res = runCli(
800
+ [
801
+ "--skill-dir",
802
+ skillDir,
803
+ "--skill",
804
+ "mr-review",
805
+ "--mode",
806
+ "new-skill",
807
+ "--stage-name",
808
+ "mr-review",
809
+ "--dry-run",
810
+ ],
811
+ cwd,
812
+ );
813
+ expect(res.exitCode).toBe(0);
814
+
815
+ // Staged dir is the natural name, not the conspicuous eval slug.
816
+ const stagedSkillsDir = join(cwd, ".claude", "skills");
817
+ const entries = readdirSync(stagedSkillsDir).filter(
818
+ (e) => e !== STAGED_SIBLING_MANIFEST,
819
+ );
820
+ expect(entries).toEqual(["mr-review"]);
821
+
822
+ const iterationDir = join(
823
+ cwd,
824
+ "skills-workspace",
825
+ "mr-review",
826
+ "iteration-1",
827
+ );
828
+
829
+ // conditions.json carries the natural slug — the grader meta-check reads it.
830
+ const conditions = JSON.parse(
831
+ readFileSync(join(iterationDir, "conditions.json"), "utf8"),
832
+ ) as {
833
+ conditions: Array<{ name: string; staged_skill_slug: string | null }>;
834
+ };
835
+ const withSkill = conditions.conditions.find(
836
+ (c) => c.name === "with_skill",
837
+ );
838
+ expect(withSkill?.staged_skill_slug).toBe("mr-review");
839
+
840
+ // The custom dir is registered for cleanup (prefix scan won't catch it).
841
+ const manifest = JSON.parse(
842
+ readFileSync(join(stagedSkillsDir, STAGED_SIBLING_MANIFEST), "utf8"),
843
+ ) as { created_entries: Array<{ name: string }> };
844
+ expect(manifest.created_entries.map((e) => e.name)).toContain("mr-review");
845
+
846
+ // The dispatch prompt disambiguates to the natural identifier, not the slug.
847
+ const dispatch = JSON.parse(
848
+ readFileSync(join(iterationDir, "dispatch.json"), "utf8"),
849
+ ) as {
850
+ tasks: Array<{ condition: string; dispatch_prompt_path: string }>;
851
+ };
852
+ const task = dispatch.tasks.find((t) => t.condition === "with_skill");
853
+ const prompt = readFileSync(task?.dispatch_prompt_path ?? "", "utf8");
854
+ expect(prompt).toContain("registered under the identifier `mr-review`");
855
+ expect(prompt).not.toContain("slow-powers-eval-");
856
+ });
857
+
858
+ test("--stage-name refuses to clobber a pre-existing same-named dir", () => {
859
+ const { skillDir, cwd } = setup("usermode-stage-name-clobber");
860
+ const preexisting = join(cwd, ".claude", "skills", "my-real-skill");
861
+ mkdirSync(preexisting, { recursive: true });
862
+ writeFileSync(join(preexisting, "SKILL.md"), "USER OWNED");
863
+
864
+ const res = runCli(
865
+ [
866
+ "--skill-dir",
867
+ skillDir,
868
+ "--skill",
869
+ "mr-review",
870
+ "--mode",
871
+ "new-skill",
872
+ "--stage-name",
873
+ "my-real-skill",
874
+ "--dry-run",
875
+ ],
876
+ cwd,
877
+ );
878
+ expect(res.exitCode).not.toBe(0);
879
+ expect(readFileSync(join(preexisting, "SKILL.md"), "utf8")).toBe(
880
+ "USER OWNED",
881
+ );
882
+ });
883
+
489
884
  test("dispatch prompt lists only the skill-under-test, no other skills, and no product framing without --bootstrap", () => {
490
885
  const { skillDir, cwd } = setup("usermode-prompt");
491
886
  const res = runCli(
@@ -526,8 +921,10 @@ describe("run.ts user-mode end-to-end (--skill-dir, isolated CWD)", () => {
526
921
  // The full prompt is no longer inlined in dispatch.json — it lives in a file.
527
922
  expect(withSkill?.dispatch_prompt).toBeUndefined();
528
923
  const prompt = readFileSync(withSkill?.dispatch_prompt_path ?? "", "utf8");
529
- expect(prompt).toContain("<session-start-context>");
530
- expect(prompt).toContain("* `mr-review`");
924
+ expect(prompt).toContain(
925
+ "The following skills are available for use with the Skill tool:",
926
+ );
927
+ expect(prompt).toContain("- mr-review:");
531
928
  expect(prompt).not.toContain("test-driven-development");
532
929
  expect(prompt).not.toContain("writing-skills");
533
930
  // No product framing (EXTREMELY-IMPORTANT etc.) without a --bootstrap file.
@@ -670,7 +1067,7 @@ describe("run.ts user-mode end-to-end (--skill-dir, isolated CWD)", () => {
670
1067
  expect(conditions.run_nonce).toBe(dispatch.run_nonce);
671
1068
  });
672
1069
 
673
- test("--bootstrap content is prepended verbatim before the staged-skills inventory", () => {
1070
+ test("--bootstrap content is prepended verbatim before the available-skills block", () => {
674
1071
  const { skillDir, cwd } = setup("usermode-bootstrap");
675
1072
  const bootstrapPath = join(cwd, "my-bootstrap.md");
676
1073
  writeFileSync(bootstrapPath, "MY CUSTOM EVAL FRAMING");
@@ -709,8 +1106,75 @@ describe("run.ts user-mode end-to-end (--skill-dir, isolated CWD)", () => {
709
1106
  ? readFileSync(withSkill.dispatch_prompt_path, "utf8")
710
1107
  : "";
711
1108
  const bootIdx = prompt.indexOf("MY CUSTOM EVAL FRAMING");
712
- const invIdx = prompt.indexOf("staged and discoverable");
1109
+ const listIdx = prompt.indexOf(
1110
+ "The following skills are available for use with the Skill tool:",
1111
+ );
713
1112
  expect(bootIdx).toBeGreaterThan(-1);
714
- expect(invIdx).toBeGreaterThan(bootIdx);
1113
+ expect(listIdx).toBeGreaterThan(bootIdx);
1114
+ });
1115
+
1116
+ test("--only restricts dispatches to the named eval ids", () => {
1117
+ const { skillDir, cwd } = setup("usermode-only", [
1118
+ { id: "e1", prompt: "review MR 1", expected_output: "a review" },
1119
+ { id: "e2", prompt: "review MR 2", expected_output: "a review" },
1120
+ ]);
1121
+ const res = runCli(
1122
+ [
1123
+ "--skill-dir",
1124
+ skillDir,
1125
+ "--skill",
1126
+ "mr-review",
1127
+ "--mode",
1128
+ "new-skill",
1129
+ "--only",
1130
+ "e1",
1131
+ "--dry-run",
1132
+ ],
1133
+ cwd,
1134
+ );
1135
+ expect(res.exitCode).toBe(0);
1136
+
1137
+ const dispatch = JSON.parse(
1138
+ readFileSync(
1139
+ join(
1140
+ cwd,
1141
+ "skills-workspace",
1142
+ "mr-review",
1143
+ "iteration-1",
1144
+ "dispatch.json",
1145
+ ),
1146
+ "utf8",
1147
+ ),
1148
+ ) as { tasks: Array<{ eval_id: string }> };
1149
+
1150
+ expect(dispatch.tasks.map((t) => t.eval_id).sort()).toEqual(["e1", "e1"]);
1151
+ // The "N evals × 2 conditions" line reflects the filtered set.
1152
+ expect(new TextDecoder().decode(res.stdout)).toContain(
1153
+ "1 evals × 2 conditions",
1154
+ );
1155
+ });
1156
+
1157
+ test("--only with an unknown id exits non-zero and names the unknown id", () => {
1158
+ const { skillDir, cwd } = setup("usermode-only-unknown", [
1159
+ { id: "e1", prompt: "review MR 1", expected_output: "a review" },
1160
+ ]);
1161
+ const res = runCli(
1162
+ [
1163
+ "--skill-dir",
1164
+ skillDir,
1165
+ "--skill",
1166
+ "mr-review",
1167
+ "--mode",
1168
+ "new-skill",
1169
+ "--only",
1170
+ "nope",
1171
+ "--dry-run",
1172
+ ],
1173
+ cwd,
1174
+ );
1175
+ expect(res.exitCode).not.toBe(0);
1176
+ expect(new TextDecoder().decode(res.stderr)).toContain(
1177
+ "unknown eval id(s): nope",
1178
+ );
715
1179
  });
716
1180
  });