@slowdini/slow-powers-opencode 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/README.md +37 -65
  2. package/bootstrap.md +1 -7
  3. package/opencode/plugins/slow-powers.js +1 -1
  4. package/package.json +14 -13
  5. package/skills/evaluating-skills/SKILL.md +91 -337
  6. package/skills/evaluating-skills/evals/baseline/BASELINE.md +23 -0
  7. package/skills/evaluating-skills/evals/baseline/NOTES.md +40 -0
  8. package/skills/evaluating-skills/evals/baseline/benchmark.json +54 -0
  9. package/skills/evaluating-skills/evals/baseline/grading/deterministic-edit-skip__new_skill.json +39 -0
  10. package/skills/evaluating-skills/evals/baseline/grading/deterministic-edit-skip__old_skill.json +39 -0
  11. package/skills/evaluating-skills/evals/baseline/grading/did-my-revision-help__new_skill.json +39 -0
  12. package/skills/evaluating-skills/evals/baseline/grading/did-my-revision-help__old_skill.json +39 -0
  13. package/skills/evaluating-skills/evals/baseline/grading/is-new-skill-ready-to-ship__new_skill.json +32 -0
  14. package/skills/evaluating-skills/evals/baseline/grading/is-new-skill-ready-to-ship__old_skill.json +32 -0
  15. package/skills/test-driven-development/evals/baseline/NOTES.md +2 -2
  16. package/skills/verifying-development-work/SKILL.md +17 -6
  17. package/skills/verifying-development-work/code-review.md +68 -0
  18. package/skills/verifying-development-work/comment-review.md +85 -0
  19. package/skills/verifying-development-work/evals/baseline/BASELINE.md +7 -6
  20. package/skills/verifying-development-work/evals/baseline/NOTES.md +83 -149
  21. package/skills/verifying-development-work/evals/baseline/benchmark.json +32 -31
  22. package/skills/verifying-development-work/evals/baseline/grading/comment-hygiene-at-handoff__new_skill.json +53 -0
  23. package/skills/verifying-development-work/evals/baseline/grading/comment-hygiene-at-handoff__old_skill.json +53 -0
  24. package/skills/verifying-development-work/evals/baseline/grading/wrap-it-up-handoff__new_skill.json +53 -0
  25. package/skills/verifying-development-work/evals/baseline/grading/wrap-it-up-handoff__old_skill.json +53 -0
  26. package/skills/verifying-development-work/evals/evals.json +34 -2
  27. package/skills/verifying-development-work/evals/fixtures/comment-hygiene-at-handoff/slugify.test.ts +14 -0
  28. package/skills/verifying-development-work/evals/fixtures/comment-hygiene-at-handoff/slugify.ts +25 -0
  29. package/skills/evaluating-skills/examples/verifying-development-work-evals.json +0 -30
  30. package/skills/evaluating-skills/harness-details/claude.md +0 -158
  31. package/skills/evaluating-skills/runner/README.md +0 -154
  32. package/skills/evaluating-skills/runner/adapters/claude-code-session.test.ts +0 -56
  33. package/skills/evaluating-skills/runner/adapters/claude-code-session.ts +0 -43
  34. package/skills/evaluating-skills/runner/adapters/claude-code-transcript.test.ts +0 -263
  35. package/skills/evaluating-skills/runner/adapters/claude-code-transcript.ts +0 -146
  36. package/skills/evaluating-skills/runner/aggregate.test.ts +0 -264
  37. package/skills/evaluating-skills/runner/aggregate.ts +0 -248
  38. package/skills/evaluating-skills/runner/context.test.ts +0 -181
  39. package/skills/evaluating-skills/runner/context.ts +0 -90
  40. package/skills/evaluating-skills/runner/detect-stray-writes.test.ts +0 -103
  41. package/skills/evaluating-skills/runner/detect-stray-writes.ts +0 -192
  42. package/skills/evaluating-skills/runner/fill-transcripts.test.ts +0 -73
  43. package/skills/evaluating-skills/runner/fill-transcripts.ts +0 -154
  44. package/skills/evaluating-skills/runner/grade.test.ts +0 -347
  45. package/skills/evaluating-skills/runner/grade.ts +0 -603
  46. package/skills/evaluating-skills/runner/guard/guard.ts +0 -49
  47. package/skills/evaluating-skills/runner/guard/install.test.ts +0 -92
  48. package/skills/evaluating-skills/runner/guard/install.ts +0 -147
  49. package/skills/evaluating-skills/runner/guard/policy.test.ts +0 -71
  50. package/skills/evaluating-skills/runner/guard/policy.ts +0 -74
  51. package/skills/evaluating-skills/runner/plugin-shadow.test.ts +0 -228
  52. package/skills/evaluating-skills/runner/plugin-shadow.ts +0 -201
  53. package/skills/evaluating-skills/runner/profiles/claude-code/plan-mode.md +0 -11
  54. package/skills/evaluating-skills/runner/promote-baseline.test.ts +0 -230
  55. package/skills/evaluating-skills/runner/promote-baseline.ts +0 -186
  56. package/skills/evaluating-skills/runner/run.test.ts +0 -1180
  57. package/skills/evaluating-skills/runner/run.ts +0 -1029
  58. package/skills/evaluating-skills/runner/sandbox-policy.ts +0 -74
  59. package/skills/evaluating-skills/runner/types.ts +0 -112
  60. package/skills/evaluating-skills/runner/validate-all.ts +0 -54
  61. package/skills/evaluating-skills/runner/validate-schema.test.ts +0 -99
  62. package/skills/evaluating-skills/runner/validate-schema.ts +0 -51
  63. package/skills/evaluating-skills/runner/validate.test.ts +0 -56
  64. package/skills/evaluating-skills/runner/validate.ts +0 -21
  65. package/skills/evaluating-skills/schema/evals.schema.json +0 -105
  66. package/skills/evaluating-skills/schema/grading.schema.json +0 -84
  67. package/skills/evaluating-skills/schema/run-record.schema.json +0 -80
  68. package/skills/evaluating-skills/schema/stray-writes.schema.json +0 -68
  69. package/skills/evaluating-skills/templates/eval-task-prompt.md +0 -67
  70. package/skills/evaluating-skills/templates/evals.json.example +0 -17
  71. package/skills/evaluating-skills/templates/judge-prompt.md +0 -56
  72. package/skills/evaluating-skills/templates/revise-skill-prompt.md +0 -56
  73. package/skills/verifying-development-work/evals/baseline/grading/bug-fixed-without-reproducing__with_skill.json +0 -39
  74. package/skills/verifying-development-work/evals/baseline/grading/bug-fixed-without-reproducing__without_skill.json +0 -24
  75. package/skills/verifying-development-work/evals/baseline/grading/build-implied-by-edit__with_skill.json +0 -46
  76. package/skills/verifying-development-work/evals/baseline/grading/build-implied-by-edit__without_skill.json +0 -31
  77. package/skills/verifying-development-work/evals/baseline/grading/claim-without-running__with_skill.json +0 -46
  78. package/skills/verifying-development-work/evals/baseline/grading/claim-without-running__without_skill.json +0 -31
  79. package/skills/verifying-development-work/evals/baseline/grading/seeded-done-tests-pass-ship-it__with_skill.json +0 -46
  80. package/skills/verifying-development-work/evals/baseline/grading/seeded-done-tests-pass-ship-it__without_skill.json +0 -31
  81. package/skills/verifying-development-work/evals/baseline/grading/wrap-it-up-handoff__with_skill.json +0 -53
  82. package/skills/verifying-development-work/evals/baseline/grading/wrap-it-up-handoff__without_skill.json +0 -38
package/README.md CHANGED
@@ -1,58 +1,43 @@
1
1
  # Slow-powers
2
2
 
3
- Slow-powers gives your agent superpowers. It's a complete software development
4
- methodology for coding agents — a set of composable skills plus a bootstrap
5
- that ensures the agent reaches for them at the right moments.
3
+ Slow-powers is an agent skill set for professional software development. It enhances plan mode and debugging work, enforces best practices, and works _with_ the features of modern agents, instead of replacing them. It's a plugin for people who don't install plugins.
6
4
 
7
5
  ## About this fork
8
6
 
9
- Slow-powers is a fork of [obra/superpowers](https://github.com/obra/superpowers)
10
- at v5.1.0. We preserve the overall workflow of superpowers, while fixing bugs
11
- and clarifying skill content.
7
+ Slow-powers is a fork of [obra/superpowers](https://github.com/obra/superpowers). Much of the skill content is sourced from upstream, with rewrites focusing on clarity, token efficiency, and enhancing instead of replacing plan mode.
12
8
 
13
9
  ## Quickstart
14
10
 
15
- Give your agent superpowers with slow-powers: [Claude Code](#claude-code) · [Codex CLI](#codex-cli) · [OpenCode](#opencode). Support varies per harness — see the [feature support](#feature-support) table.
11
+ [Claude Code](#claude-code) · [Codex CLI](#codex-cli) · [OpenCode](#opencode)
16
12
 
17
- ## Feature support
13
+ ## How it works
18
14
 
19
- | Harness | Status | Notes |
20
- |-----------------|----------|----------------------------------------------------------------|
21
- | Claude Code | Full | Reference implementation |
22
- | Codex CLI | Partial | Plugin manifest + shared hooks; no eval transcript adapter |
23
- | OpenCode | Partial | JS plugin with bootstrap injection; no eval transcript adapter |
15
+ Slow-powers is designed to improve the actual day-to-day work of software developers working with agents. It instructs agents to check for skills first, and use the ones that apply. The shipped skills fill real gaps in agentic development, but all discoverable skills benefit from the skill-enforcing guidance.
24
16
 
25
- Contributors closing parity gaps should follow [`harness-parity-check.md`](./harness-parity-check.md): it audits which Slow-powers features are wired up for a given harness and preps an agent to close one gap.
17
+ ### Start in plan mode
26
18
 
27
- ## How it works
19
+ Even small features are developed better with a plan. Slow-powers hardens the plan to catch hallucinations and other mistakes before you review it. During implementation, skills guide the agent through best practices, working in isolation, following test-driven development, and reviewing and verifying its work before it hands it back to you.
28
20
 
29
- Slow-powers integrates directly into your agent's session, providing a highly disciplined set of technical execution utilities. It enforces strict test-driven development (TDD), systematic scientific debugging, rigorous verification checks, safe workspace isolation so new work doesn't collide with existing work, and clean branch-finishing hygiene. It also enhances native agent planning phases with strict rules: banning placeholders, enforcing atomic task granularity, and requiring TDD-first checklists.
21
+ ### Debugging
30
22
 
31
- ## Installation
23
+ Slow-powers guides agents through an evidence-backed, no-guess debugging approach. No "It works now!" without proof.
24
+
25
+ ### Writing skills
26
+
27
+ Skills for writing skills! Slow-powers skills are all written and evaluated following the same guidelines and processes it ships. Back up your own skills with real stats, and understand their cost in time and tokens.
32
28
 
33
- Installation differs by harness. If you use more than one, install
34
- Slow-powers separately for each.
29
+ Skill evaluations are powered by [@slowdini/eval-runner](https://github.com/slowdini/eval-runner)
30
+
31
+ ## Installation
35
32
 
36
33
  ### Install with your agent
37
34
 
38
- Don't want to look up the steps? Open the harness you want Slow-powers on and
39
- paste this prompt to its agent — it'll read the guide, work out which harness
40
- it's in, and do the install for you:
35
+ Open the harness you want Slow-powers on and paste this prompt:
41
36
 
42
37
  ```text
43
- Install the "slow-powers" plugin for the coding-agent harness you are currently
44
- running in. Read the installation guide at
45
- https://github.com/slowdini/slow-powers#installation, determine which harness
46
- this is (Claude Code, Codex CLI, or OpenCode), and follow the matching steps —
47
- run the documented marketplace/install commands for Claude Code or Codex, or add
48
- the package to the `plugin` array in opencode.json for OpenCode. Then tell me
49
- exactly what you changed and what I need to do to finish (e.g. restart the
50
- session so the skills load).
38
+ Install the slow-powers plugin from https://github.com/slowdini/slow-powers#installation for this harness.
51
39
  ```
52
40
 
53
- The per-harness instructions below are the source of truth the agent follows —
54
- and the reference for installing by hand.
55
-
56
41
  ### Claude Code
57
42
 
58
43
  ```
@@ -60,6 +45,9 @@ and the reference for installing by hand.
60
45
  /plugin install slow-powers@slow-powers
61
46
  ```
62
47
 
48
+ You can also browse and install it interactively: run `claude`, open
49
+ `/plugin`, choose the `slowdini` marketplace, and install `slow-powers`.
50
+
63
51
  ### Codex CLI
64
52
 
65
53
  ```bash
@@ -69,15 +57,10 @@ codex plugin add slow-powers@slowdini
69
57
 
70
58
  You can also browse and install it interactively: run `codex`, open
71
59
  `/plugins`, choose the `slowdini` marketplace, and install `slow-powers`.
72
- Start a new Codex thread after installing so the bundled skills are loaded.
73
-
74
- Slow-powers includes a plugin-bundled `SessionStart` hook for bootstrap
75
- context. Codex hooks are stable, but plugin hooks must be reviewed and trusted
76
- before Codex runs them.
77
60
 
78
61
  ### OpenCode
79
62
 
80
- Add Slow-powers to the `plugin` array in your `opencode.json` (global or project-level):
63
+ Add Slow-powers to the `plugin` array in `~/.config/opencode/opencode.json`:
81
64
 
82
65
  ```json
83
66
  {
@@ -85,44 +68,34 @@ Add Slow-powers to the `plugin` array in your `opencode.json` (global or project
85
68
  }
86
69
  ```
87
70
 
88
- This installs the latest published version from npm.
89
-
90
- ## The Core Execution Utilities
91
-
92
- Slow-powers provides a set of highly focused, execution-level skills that ensure your agent operates with maximum discipline:
93
-
94
- 1. **`working-in-isolation`** — Establishes an isolated workspace so new work doesn't collide with existing or in-progress work, keeping protected branches like `main` clean.
95
- 2. **`test-driven-development`** — Enforces a strict RED-GREEN-REFACTOR cycle, ensuring all production code is backed by failing test verification first.
96
- 3. **`systematic-debugging`** — Guides the agent to locate the root cause of failures via scientific hypothesis testing, avoiding "guess-and-check" thrashing.
97
- 4. **`verifying-development-work`** — Requires running actual test/build commands and presenting concrete evidence before any success claim, with a final review pass over the change before work is handed back.
98
- 5. **`writing-skills`** — Handles future custom skill authoring and updates.
99
-
100
- ## What's inside
71
+ ## The skills
101
72
 
102
- **Testing & Verification** `test-driven-development`, `verifying-development-work`
73
+ Slow-powers provides a set of highly focused skills that ensure your agent operates with maximum discipline:
103
74
 
104
- **Debugging**`systematic-debugging`
105
-
106
- **Workspace & Git Hygiene** `working-in-isolation`
107
-
108
- **Meta & Extension** `writing-skills`
75
+ 1. **`hardening-plans`** Instructs the agent to re-review any plans before it hands them back to you, looking for hallucinations, logical inconsistencies, and other common plan mistakes.
76
+ 2. **`systematic-debugging`** — Guides the agent to locate the root cause of failures via scientific hypothesis testing, avoiding "guess-and-check" thrashing.
77
+ 3. **`working-in-isolation`** Establishes an isolated workspace (worktree or branch) so new work doesn't collide with existing or in-progress work, keeping protected branches like `main` clean.
78
+ 4. **`test-driven-development`** — Enforces a strict RED-GREEN-REFACTOR cycle, ensuring all code is backed by failing test verification first.
79
+ 5. **`verifying-development-work`**Requires running actual test/build commands and presenting concrete evidence before any success claim, with a final review pass over the change, code AND comments, before work is handed back.
80
+ 6. **`writing-skills`** — Helps write and edit skills, following the same best practices that guide slow-powers itself.
81
+ 7. **`evaluating-skills`** — Teaches the agent how to run skill evals, so the value of skills and prose changes can be objectively assessed.
109
82
 
110
83
  ## Intended Workflows
111
84
 
112
- The skills declare lightweight prerequisite / next-step gates so the agent knows the intended sequence. These gates **suggest** what comes before and after a skill once it is invoked; they do **not** restrict when any skill can be invoked. An agent may invoke `test-driven-development`, `verifying-development-work`, or any other skill at any point.
85
+ The skills declare prerequisite / next-step gates so the agent follows an intended skill sequence. These gates **suggest** what comes before and after a skill once it is invoked; they do **not** restrict when any skill can be invoked.
113
86
 
114
87
  **Plan mode:** plan mode → `hardening-plans` → `working-in-isolation` → `test-driven-development` → `verifying-development-work`
115
88
 
116
89
  **Debugging:** (`working-in-isolation`) → `systematic-debugging` → `verifying-development-work`
117
90
 
118
- `hardening-plans` points to `test-driven-development` as its next step, and `test-driven-development` requires `working-in-isolation` first — so isolation is reached as TDD's prerequisite, producing the plan-mode order above.
119
-
120
91
  ## Philosophy
121
92
 
93
+ Slow-powers skills follow a few opinionated principles:
94
+
122
95
  - Test-Driven Development — write tests first, always
123
- - Systematic over ad-hoc process over guessing
124
- - Complexity reductionsimplicity as a primary goal
125
- - Evidence over claimsverify before declaring success
96
+ - Plan modeeven small features should start with a plan
97
+ - Prefer branches to worktrees branches are easier for human review and testing, worktrees are better for agent isolation
98
+ - Skills need evalsevals prove a new skill is better than no skill, and an edit to an existing skill is valuable
126
99
 
127
100
  ## Repository structure
128
101
 
@@ -136,7 +109,6 @@ Flat layout — skills and assets live at root, harness-specific integration liv
136
109
  - `opencode/` — OpenCode plugin
137
110
  - `.claude-plugin/marketplace.json` — Claude Code marketplace registry
138
111
  - `package.json` — OpenCode plugin manifest + dev tooling
139
- - `harness-parity-check.md` — Instructions for an agent in any harness to audit feature gaps and prep to close one
140
112
 
141
113
  ## Releasing
142
114
 
package/bootstrap.md CHANGED
@@ -1,15 +1,9 @@
1
- # Instructions for using Slow-powers Skills
2
-
3
1
  <EXTREMELY-IMPORTANT>
4
2
  If you think there is even a 1% chance a skill might apply to what you are doing, you ABSOLUTELY MUST invoke the skill.
5
3
  IF A SKILL APPLIES TO YOUR TASK, YOU DO NOT HAVE A CHOICE. YOU MUST USE IT.
6
4
  This is not negotiable. This is not optional. You cannot rationalize your way out of this.
7
5
  </EXTREMELY-IMPORTANT>
8
6
 
9
- These skills are quality gates on procedures you already run. They don't grant abilities — they enhance how you execute work you already know how to do.
10
-
11
- When you reach a gate moment — about to code, hand off a plan, debug, claim done, finish a branch — the matching skill's description surfaces it. Load it then, even if your procedure already feels complete. That "feels complete" is the gate's target.
12
-
13
7
  ## The Rule
14
8
 
15
9
  **Invoke relevant or requested skills BEFORE any response or action.** Even a 1% chance a skill might apply means that you should invoke the skill to check. If an invoked skill turns out to be wrong for the situation, you don't need to use it.
@@ -55,7 +49,7 @@ These thoughts mean STOP — you're rationalizing:
55
49
 
56
50
  ## Instruction Priority
57
51
 
58
- Slow-powers skills override default system behavior where they conflict, but user instructions always take precedence:
52
+ Skills override default system behavior where they conflict, but user instructions always take precedence:
59
53
  1. **User's explicit instructions** (CLAUDE.md, AGENTS.md, direct requests) — highest priority
60
54
  2. **Slow-powers skills / bootstrap guidelines** — override default system prompt behavior where they conflict
61
55
  3. **Default system prompt** — lowest priority
@@ -15,7 +15,7 @@ const bootstrapPath = path.resolve(__dirname, "../../bootstrap.md");
15
15
  // First line of bootstrap.md — used as an idempotency check so we don't
16
16
  // re-inject when OpenCode reruns the transform on an already-transformed
17
17
  // message array. Specific enough that user prompts won't accidentally match.
18
- const bootstrapLeadingPhrase = "# Instructions for using Slow-powers Skills";
18
+ const bootstrapLeadingPhrase = "<EXTREMELY-IMPORTANT>";
19
19
 
20
20
  // Module-level cache for bootstrap content.
21
21
  // The bootstrap.md file does not change during a session, so reading it
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@slowdini/slow-powers-opencode",
3
- "version": "0.2.0",
3
+ "version": "0.4.0",
4
4
  "description": "Slow-powers — structured development workflows for coding agents (TDD, debugging, verification, git hygiene)",
5
5
  "type": "module",
6
6
  "main": "./opencode/plugins/slow-powers.js",
@@ -36,15 +36,19 @@
36
36
  },
37
37
  "scripts": {
38
38
  "test": "bun test --path-ignore-patterns='skills-workspace/**'",
39
- "evals": "bun run skills/evaluating-skills/runner/run.ts --skill-dir ./skills --bootstrap ./bootstrap.md",
40
- "evals:snapshot": "bun run skills/evaluating-skills/runner/run.ts snapshot --skill-dir ./skills",
41
- "evals:validate": "bun run skills/evaluating-skills/runner/validate-all.ts --skill-dir ./skills",
42
- "evals:fill-transcripts": "bun run skills/evaluating-skills/runner/fill-transcripts.ts --skill-dir ./skills",
43
- "evals:detect-stray-writes": "bun run skills/evaluating-skills/runner/detect-stray-writes.ts --skill-dir ./skills",
44
- "evals:teardown-guard": "bun run skills/evaluating-skills/runner/run.ts teardown-guard --skill-dir ./skills",
45
- "evals:grade": "bun run skills/evaluating-skills/runner/grade.ts --skill-dir ./skills",
46
- "evals:aggregate": "bun run skills/evaluating-skills/runner/aggregate.ts --skill-dir ./skills",
47
- "evals:promote-baseline": "bun run skills/evaluating-skills/runner/promote-baseline.ts --skill-dir ./skills",
39
+ "evals": "bunx @slowdini/eval-runner run --skill-dir ./skills --bootstrap ./bootstrap.md",
40
+ "evals:snapshot": "bunx @slowdini/eval-runner snapshot --skill-dir ./skills",
41
+ "evals:validate": "bunx @slowdini/eval-runner validate --skill-dir ./skills",
42
+ "evals:ingest": "bunx @slowdini/eval-runner ingest --skill-dir ./skills",
43
+ "evals:finalize": "bunx @slowdini/eval-runner finalize --skill-dir ./skills",
44
+ "evals:record-runs": "bunx @slowdini/eval-runner record-runs --skill-dir ./skills",
45
+ "evals:fill-transcripts": "bunx @slowdini/eval-runner fill-transcripts --skill-dir ./skills",
46
+ "evals:detect-stray-writes": "bunx @slowdini/eval-runner detect-stray-writes --skill-dir ./skills",
47
+ "evals:teardown-guard": "bunx @slowdini/eval-runner teardown-guard --skill-dir ./skills",
48
+ "evals:teardown": "bunx @slowdini/eval-runner teardown --skill-dir ./skills",
49
+ "evals:grade": "bunx @slowdini/eval-runner grade --skill-dir ./skills",
50
+ "evals:aggregate": "bunx @slowdini/eval-runner aggregate --skill-dir ./skills",
51
+ "evals:promote-baseline": "bunx @slowdini/eval-runner promote-baseline --skill-dir ./skills",
48
52
  "check": "biome check --write .",
49
53
  "check:ci": "biome check --error-on-warnings .",
50
54
  "typecheck": "tsc --noEmit",
@@ -58,8 +62,5 @@
58
62
  "husky": "^9.1.7",
59
63
  "lint-staged": "^17.0.4",
60
64
  "typescript": "^6.0.3"
61
- },
62
- "dependencies": {
63
- "ajv": "^8.20.0"
64
65
  }
65
66
  }