openhermes 4.1.0 → 4.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/CONTEXT.md +9 -0
  2. package/ETHOS.md +6 -3
  3. package/LICENSE +21 -21
  4. package/README.md +120 -79
  5. package/bootstrap.ts +284 -41
  6. package/harness/agents/oh-browser.md +97 -0
  7. package/harness/agents/oh-builder.md +78 -0
  8. package/harness/agents/oh-facade.md +75 -0
  9. package/harness/agents/oh-fusion.md +45 -0
  10. package/harness/agents/oh-gauntlet.md +71 -0
  11. package/harness/agents/oh-grill.md +71 -0
  12. package/harness/agents/oh-investigate.md +60 -0
  13. package/harness/agents/oh-manifest.md +95 -0
  14. package/harness/agents/oh-plan-review.md +40 -0
  15. package/harness/agents/oh-planner.md +50 -0
  16. package/harness/agents/oh-refactor.md +37 -0
  17. package/harness/agents/oh-retro.md +46 -0
  18. package/harness/agents/oh-review.md +85 -0
  19. package/harness/agents/oh-security.md +83 -0
  20. package/harness/agents/oh-ship.md +76 -0
  21. package/harness/agents/oh-skill-craft.md +38 -0
  22. package/harness/agents/openhermes.md +106 -62
  23. package/harness/codex/AUTOPILOT.md +178 -0
  24. package/harness/codex/CHARTER.md +81 -0
  25. package/harness/commands/oh-doctor.md +193 -14
  26. package/harness/commands/oh-log.md +18 -0
  27. package/harness/instructions/SHELL.md +76 -0
  28. package/harness/skills/oh-ascii/DEEP.md +292 -0
  29. package/harness/skills/oh-ascii/SKILL.md +31 -0
  30. package/harness/skills/oh-ascii/scripts/check_ascii_alignment.py +596 -0
  31. package/harness/skills/oh-browser/DEEP.md +54 -0
  32. package/harness/skills/oh-browser/SKILL.md +30 -0
  33. package/harness/skills/oh-builder/DEEP.md +63 -0
  34. package/harness/skills/oh-builder/SKILL.md +16 -89
  35. package/harness/skills/oh-expert/DEEP.md +85 -0
  36. package/harness/skills/oh-expert/SKILL.md +19 -106
  37. package/harness/skills/oh-facade/DEEP.md +182 -0
  38. package/harness/skills/oh-facade/SKILL.md +34 -0
  39. package/harness/skills/oh-freeze/DEEP.md +18 -0
  40. package/harness/skills/oh-freeze/SKILL.md +15 -15
  41. package/harness/skills/oh-full-output/DEEP.md +25 -0
  42. package/harness/skills/oh-full-output/SKILL.md +28 -0
  43. package/harness/skills/oh-fusion/DEEP.md +120 -0
  44. package/harness/skills/oh-fusion/SKILL.md +36 -0
  45. package/harness/skills/oh-gauntlet/DEEP.md +77 -0
  46. package/harness/skills/oh-gauntlet/SKILL.md +17 -105
  47. package/harness/skills/oh-grill/DEEP.md +51 -0
  48. package/harness/skills/oh-grill/SKILL.md +16 -63
  49. package/harness/skills/oh-guard/DEEP.md +19 -0
  50. package/harness/skills/oh-guard/SKILL.md +15 -20
  51. package/harness/skills/oh-handoff/DEEP.md +48 -0
  52. package/harness/skills/oh-handoff/SKILL.md +18 -19
  53. package/harness/skills/oh-health/DEEP.md +74 -0
  54. package/harness/skills/oh-health/SKILL.md +17 -76
  55. package/harness/skills/oh-init/DEEP.md +85 -0
  56. package/harness/skills/oh-init/SKILL.md +17 -197
  57. package/harness/skills/oh-investigate/DEEP.md +171 -0
  58. package/harness/skills/oh-investigate/SKILL.md +18 -61
  59. package/harness/skills/oh-issue/DEEP.md +21 -0
  60. package/harness/skills/oh-issue/SKILL.md +16 -23
  61. package/harness/skills/oh-learn/DEEP.md +44 -0
  62. package/harness/skills/oh-learn/SKILL.md +17 -79
  63. package/harness/skills/oh-manifest/DEEP.md +92 -0
  64. package/harness/skills/oh-manifest/SKILL.md +15 -107
  65. package/harness/skills/oh-plan-review/DEEP.md +90 -0
  66. package/harness/skills/oh-plan-review/SKILL.md +19 -114
  67. package/harness/skills/oh-planner/DEEP.md +172 -0
  68. package/harness/skills/oh-planner/SKILL.md +16 -143
  69. package/harness/skills/oh-prd/DEEP.md +45 -0
  70. package/harness/skills/oh-prd/SKILL.md +15 -22
  71. package/harness/skills/oh-refactor/DEEP.md +122 -0
  72. package/harness/skills/oh-refactor/SKILL.md +33 -0
  73. package/harness/skills/oh-retro/DEEP.md +26 -0
  74. package/harness/skills/oh-retro/SKILL.md +17 -20
  75. package/harness/skills/oh-review/DEEP.md +87 -0
  76. package/harness/skills/oh-review/SKILL.md +17 -96
  77. package/harness/skills/oh-security/DEEP.md +83 -0
  78. package/harness/skills/oh-security/SKILL.md +18 -96
  79. package/harness/skills/oh-ship/DEEP.md +141 -0
  80. package/harness/skills/oh-ship/SKILL.md +18 -26
  81. package/harness/skills/oh-skill-craft/DEEP.md +369 -0
  82. package/harness/skills/oh-skill-craft/SKILL.md +20 -93
  83. package/harness/skills/oh-skills-link/DEEP.md +16 -0
  84. package/harness/skills/oh-skills-link/SKILL.md +15 -16
  85. package/harness/skills/oh-skills-list/DEEP.md +20 -0
  86. package/harness/skills/oh-skills-list/SKILL.md +14 -18
  87. package/harness/skills/oh-triage/DEEP.md +23 -0
  88. package/harness/skills/oh-triage/SKILL.md +15 -20
  89. package/harness/skills/oh-worktree/DEEP.md +169 -0
  90. package/harness/skills/oh-worktree/SKILL.md +32 -0
  91. package/lib/harness-resolver.ts +10 -12
  92. package/package.json +9 -4
  93. package/scripts/count-tokens.mjs +158 -0
  94. package/scripts/oh-doctor.ps1 +342 -0
  95. package/harness/codex/CONSTITUTION.md +0 -70
  96. package/harness/codex/ROUTING.md +0 -127
  97. package/harness/instructions/RUNTIME.md +0 -55
  98. package/harness/skills/oh-caveman/SKILL.md +0 -33
  99. package/lib/logger.ts +0 -69
@@ -0,0 +1,369 @@
1
+ # oh-skill-craft — Deep Reference
2
+
3
+ ## Skill Structure and Template
4
+
5
+ ### Directory Layout
6
+
7
+ Every skill lives in its own directory under the harness:
8
+
9
+ ```
10
+ harness/skills/<oh-name>/
11
+ ├── SKILL.md # Main instructions (required)
12
+ ├── REFERENCE.md # Extended docs (if SKILL.md > 100 lines)
13
+ └── scripts/ # For deterministic operations (validation, formatting)
14
+ ```
15
+
16
+ ### Template
17
+
18
+ Every SKILL.md follows this structure:
19
+
20
+ ```markdown
21
+ ---
22
+ name: oh-<name>
23
+ description: "Brief. Use when [triggers]."
24
+ tier: <2|3|4>
25
+ route:
26
+ pass: <next>
27
+ fail: <fallback>
28
+ blocker: surface
29
+ ---
30
+
31
+ # oh-<name>
32
+
33
+ <one-paragraph summary>
34
+
35
+ ## When to Use
36
+
37
+ ## Steps
38
+
39
+ 1. Step
40
+
41
+ ## Anti-patterns
42
+
43
+ - List
44
+ ```
45
+
46
+ ### Field Guide
47
+
48
+ | Field | Purpose |
49
+ |-------|---------|
50
+ | `name` | Must match `^[a-z0-9]+(-[a-z0-9]+)*$` and directory name |
51
+ | `description` | Max 200 chars. First sentence = function, second = trigger context. |
52
+ | `tier` | 2=tool (deterministic), 3=strategic (analysis/decisions), 4=autonomous (multi-step process) |
53
+ | `route.pass` | Next skill after successful completion |
54
+ | `route.fail` | Fallback skill on failure or edge case |
55
+ | `route.blocker` | Where to surface blockers (usually "surface") |
56
+
57
+ ## Output Location and Review Checklist
58
+
59
+ ### Output Location
60
+
61
+ Skills are stored in two locations with a precedence rule:
62
+
63
+ | Location | Path | Behavior |
64
+ |----------|------|----------|
65
+ | **User-written** | `~/.config/opencode/skills/` | Survives npm update. User edits persist across reinstalls. |
66
+ | **Built-in** | `harness/skills/` in the package | Gets replaced on package update. |
67
+
68
+ **Name conflict rule**: On name conflict, user version wins. If a user has `~/.config/opencode/skills/oh-expert/SKILL.md`, that takes precedence over the built-in version.
69
+
70
+ ### Review Checklist
71
+
72
+ Before marking a skill complete, verify every item:
73
+
74
+ - [ ] **Description includes triggers** — "Use when..." phrasing in description field
75
+ - [ ] **SKILL.md under 100 lines** — If longer, chunk into sections or create REFERENCE.md
76
+ - [ ] **No time-sensitive info** — No dates, version numbers, or ephemeral references
77
+ - [ ] **Consistent oh- prefix and terminology** — Follow naming conventions from existing skills
78
+ - [ ] **Concrete examples included** — Show real usage, not abstract descriptions
79
+ - [ ] **Anti-patterns documented** — What NOT to do, common mistakes
80
+ - [ ] **Tests still pass** — Run `npm test` (or project-equivalent) to verify no regressions
81
+
82
+ ## Eval-Driven Iteration
83
+
84
+ After drafting a skill, iterate with evidence — not guessing. Test prompts should be substantive multi-step tasks that mirror real usage. The model handles simple tasks without a skill — evals reveal whether the skill pulls its weight on hard cases.
85
+
86
+ ### 6-Step Loop
87
+
88
+ #### 1. Create Test Cases
89
+ Write 2-3 realistic multi-step prompts that mirror real usage. Save to `evals/evals.json`:
90
+
91
+ ```json
92
+ {
93
+ "skill_name": "oh-<name>",
94
+ "evals": [
95
+ {
96
+ "id": 1,
97
+ "prompt": "Realistic multi-step task the skill should handle",
98
+ "expected_output": "Concrete expected result description",
99
+ "files": []
100
+ }
101
+ ]
102
+ }
103
+ ```
104
+
105
+ #### 2. Spawn Runs
106
+ Launch parallel subagents: **with-skill** (load the skill, execute prompt) vs **baseline** (no skill loaded, or previous version). Use the same prompt for both.
107
+
108
+ Save outputs to:
109
+ - `iteration-N/eval-ID/with_skill/`
110
+ - `iteration-N/eval-ID/baseline/`
111
+
112
+ #### 3. Draft Assertions
113
+ While runs execute, draft objectively verifiable assertions for each test case. Good assertions:
114
+ - Have descriptive names
115
+ - Can be checked programmatically (output contains X, file Y was created, step Z was followed)
116
+ - Update `evals/evals.json` with these assertions
117
+
118
+ #### 4. Grade
119
+ Aggregate results per assertion:
120
+ - **Pass rates** — Which assertions pass/fail with vs without the skill
121
+ - **Timing** — How long each run takes
122
+ - **Token usage** — Cost comparison
123
+
124
+ Look for:
125
+ - **Non-discriminating** assertions — always pass regardless of skill → remove them (they add noise)
126
+ - **High-variance** results — possibly flaky tests → investigate
127
+ - **Time/token tradeoffs** — does the skill justify its cost in latency and tokens?
128
+
129
+ #### 5. Improve
130
+ Revise the skill based on failures. Important rules:
131
+ - **Generalize** from specific failure patterns — don't overfit to 2-3 test cases
132
+ - The goal is a skill that works across a million prompts, not just your test set
133
+ - Keep instructions lean — every word should earn its place
134
+
135
+ #### 6. Loop
136
+ Rerun all tests into a new iteration directory. Repeat until one of:
137
+ - User is satisfied
138
+ - All feedback is positive
139
+ - No meaningful progress between iterations
140
+
141
+ ## Description Optimization
142
+
143
+ After the skill body is solid, optimize its `description` field for triggering accuracy. The description is what the routing system uses to match queries to skills.
144
+
145
+ ### Process
146
+
147
+ #### 1. Create 20 Eval Queries
148
+ Construct a balanced eval set:
149
+
150
+ | Type | Count | Purpose |
151
+ |------|-------|---------|
152
+ | **Should-trigger** | 10 | Different phrasings and contexts where this skill is the correct answer |
153
+ | **Should-not-trigger** | 10 | Near-misses that share keywords but need a different skill |
154
+
155
+ #### 2. Quality Rules
156
+ - Queries must be **realistic** — phrases users actually type, not academic exercises
157
+ - Include **concrete details** — "create a skill for validating YAML configs" not "make a skill"
158
+ - Should-not-trigger queries should be **genuinely confusable** — if they're obviously unrelated, the test is useless
159
+
160
+ #### 3. Iterate Description
161
+ Write candidate descriptions. For each candidate:
162
+ - Score it against the eval set
163
+ - How many should-trigger queries does it catch?
164
+ - How many should-not-trigger does it correctly reject?
165
+ - Tune phrasing, keywords, and structure
166
+
167
+ #### 4. Select Winner
168
+ The description with the best precision/recall balance wins. Record it in the skill frontmatter.
169
+
170
+ ## Effectiveness and Testing
171
+
172
+ ### 1. TDD for Skills Methodology
173
+
174
+ **Writing skills IS Test-Driven Development applied to process documentation.**
175
+
176
+ You write test cases (pressure scenarios), watch them fail (baseline agent behavior), write the skill (the documentation), watch tests pass (agents comply), and refactor (close loopholes).
177
+
178
+ **Core principle:** If you didn't watch an agent fail without the skill, you don't know if the skill teaches the right thing.
179
+
180
+ #### TDD Mapping
181
+
182
+ | TDD Concept | Skill Creation |
183
+ |-------------|----------------|
184
+ | **Test case** | Pressure scenario with subagent |
185
+ | **Production code** | Skill document (`SKILL.md`) |
186
+ | **Test fails (RED)** | Agent violates rule without skill (baseline) |
187
+ | **Test passes (GREEN)** | Agent complies with skill present |
188
+ | **Refactor** | Close loopholes while maintaining compliance |
189
+ | **Write test first** | Run baseline scenario BEFORE writing skill |
190
+ | **Watch it fail** | Document exact rationalizations agent uses |
191
+ | **Minimal code** | Write skill addressing those specific violations |
192
+ | **Watch it pass** | Verify agent now complies |
193
+ | **Refactor cycle** | Find new rationalizations → plug → re-verify |
194
+
195
+ #### The Iron Law
196
+ ```
197
+ NO SKILL WITHOUT A FAILING TEST FIRST
198
+ ```
199
+ This applies to NEW skills AND EDITS to existing skills. No exceptions.
200
+
201
+ #### RED Phase — Write Failing Test
202
+ Run a pressure scenario with a subagent WITHOUT the skill. Document exact behavior:
203
+ - What choices did they make?
204
+ - What rationalizations did they use (verbatim)?
205
+ - Which pressures triggered violations?
206
+
207
+ #### GREEN Phase — Write Minimal Skill
208
+ Write a skill that addresses those specific rationalizations. Do not add extra content for hypothetical cases. Run the same scenario WITH the skill. The agent should now comply.
209
+
210
+ #### REFACTOR Phase — Close Loopholes
211
+ Agent found a new rationalization? Add an explicit counter. Re-test until bulletproof.
212
+
213
+ ### 2. Claude Search Optimization (CSO)
214
+
215
+ **Critical for discovery — and for correct behavior.** The description field determines both *whether* a skill is loaded and *how* the agent uses it.
216
+
217
+ #### Description = When to Use, NOT What the Skill Does
218
+ When a description summarizes the skill's workflow, the agent may follow the description instead of reading the full skill content. The skill body becomes documentation the agent skips if the description gives away the process.
219
+
220
+ #### Bad vs. Good Descriptions
221
+ ```yaml
222
+ # ❌ BAD: Summarizes workflow
223
+ description: Use when executing plans - dispatches subagent per task with code review between tasks
224
+
225
+ # ❌ BAD: Too much process detail
226
+ description: Use for TDD - write test first, watch it fail, write minimal code, refactor
227
+
228
+ # ❌ BAD: Too abstract, vague
229
+ description: For async testing
230
+
231
+ # ❌ BAD: First person
232
+ description: I can help you with async tests when they're flaky
233
+
234
+ # ✅ GOOD: Just triggering conditions, no workflow summary
235
+ description: Use when executing implementation plans with independent tasks in the current session
236
+
237
+ # ✅ GOOD: Describes the problem
238
+ description: Use when tests have race conditions, timing dependencies, or pass/fail inconsistently
239
+ ```
240
+
241
+ **Rules:**
242
+ - Start with "Use when..." to focus on triggering conditions
243
+ - Describe the *problem*, not *language-specific symptoms*
244
+ - Keep triggers technology-agnostic unless the skill itself is technology-specific
245
+ - Write in third person
246
+ - **NEVER summarize the skill's process or workflow**
247
+
248
+ #### Keyword Coverage
249
+ Use words the agent would search for:
250
+ - **Error messages:** "Hook timed out", "ENOTEMPTY", "race condition"
251
+ - **Symptoms:** "flaky", "hanging", "zombie", "pollution"
252
+ - **Synonyms:** "timeout/hang/freeze", "cleanup/teardown/afterEach"
253
+ - **Tools:** Actual commands, library names, file types
254
+
255
+ ### 3. Bulletproofing Techniques
256
+
257
+ Skills that enforce discipline need to resist rationalization. Agents are smart and will find loopholes when under pressure.
258
+
259
+ #### Close Every Loophole Explicitly
260
+ Don't just state the rule — forbid specific workarounds:
261
+ ```markdown
262
+ <!-- ✅ Good -->
263
+ Write code before test? Delete it. Start over.
264
+ No exceptions:
265
+ - Don't keep it as "reference"
266
+ - Don't "adapt" it while writing tests
267
+ - Don't look at it
268
+ - Delete means delete
269
+ ```
270
+
271
+ #### Address "Spirit vs Letter" Arguments
272
+ Add a foundational principle early:
273
+ ```markdown
274
+ **Violating the letter of the rules is violating the spirit of the rules.**
275
+ ```
276
+
277
+ #### Build Rationalization Table
278
+ | Excuse | Reality |
279
+ |--------|---------|
280
+ | "Skill is obviously clear" | Clear to you ≠ clear to other agents. Test it. |
281
+ | "It's just a reference" | References can have gaps, unclear sections. Test retrieval. |
282
+ | "Testing is overkill" | Untested skills have issues. Always. 15 min testing saves hours. |
283
+ | "I'll test if problems emerge" | Problems = agents can't use skill. Test BEFORE deploying. |
284
+ | "Too tedious to test" | Testing is less tedious than debugging bad skill in production. |
285
+ | "I'm confident it's good" | Overconfidence guarantees issues. Test anyway. |
286
+ | "Academic review is enough" | Reading ≠ using. Test application scenarios. |
287
+ | "No time to test" | Deploying untested skill wastes more time fixing it later. |
288
+
289
+ **All of these mean: Test before deploying. No exceptions.**
290
+
291
+ #### Create Red Flags List
292
+ ```markdown
293
+ ## Red Flags — STOP and Start Over
294
+ - Code before test
295
+ - "I already manually tested it"
296
+ - "Tests after achieve the same purpose"
297
+ - "It's about spirit not ritual"
298
+ - "This is different because..."
299
+ ```
300
+
301
+ ### 4. Pressure Testing Methodology
302
+
303
+ Different skill types need different test approaches.
304
+
305
+ | Skill Type | Test Approach | Success Criteria |
306
+ |------------|---------------|-----------------|
307
+ | Discipline-Enforcing | Academic questions, pressure scenarios, multiple pressures combined | Agent follows rule under maximum pressure |
308
+ | Technique | Application scenarios, variation, missing information tests | Agent applies technique to new scenario |
309
+ | Pattern | Recognition scenarios, application, counter-examples | Agent correctly identifies when/how to apply |
310
+ | Reference | Retrieval scenarios, application, gap testing | Agent finds and applies reference information |
311
+
312
+ #### Combine 3+ Pressures
313
+ For discipline-enforcing skills, combine multiple pressures to find breaking points:
314
+
315
+ | Pressure Type | Description |
316
+ |---------------|-------------|
317
+ | **Time** | "This is urgent, just this once skip the rule" |
318
+ | **Sunk cost** | "I already wrote the code, starting over wastes work" |
319
+ | **Authority** | "The user asked me to do it this way" |
320
+ | **Exhaustion** | "After 10 tests, one shortcut won't matter" |
321
+ | **Social** | "Other agents skip this step, it's fine" |
322
+ | **Economic** | "Testing takes too many tokens" |
323
+
324
+ #### Meta-Testing
325
+ After the agent chooses wrong, ask: "How could the skill be written differently to prevent this?" Use the answer to improve the skill.
326
+
327
+ ### 5. Token Efficiency Targets
328
+
329
+ Every word in a skill costs context.
330
+
331
+ | Skill Type | Target |
332
+ |------------|--------|
333
+ | Getting-started workflows | **<150 words** each |
334
+ | Frequently-loaded skills | **<200 words** total |
335
+ | Other skills | **<500 words** |
336
+
337
+ #### Techniques
338
+ - **Move details to tool help** — Reference `--help` instead of documenting all flags
339
+ - **Use cross-references** — Reference other skills instead of repeating workflow
340
+ - **Compress examples** — Keep examples minimal
341
+ - **Eliminate redundancy** — Don't repeat what's in cross-referenced skills
342
+
343
+ ### 6. Persuasion Principles
344
+
345
+ Discipline-enforcing skills benefit from systematic persuasion mapping (based on Cialdini's principles):
346
+
347
+ | Principle | Application in Skills |
348
+ |-----------|----------------------|
349
+ | **Authority** | State "Required", "Mandatory", "The Iron Law" |
350
+ | **Commitment** | "Once you start, follow through. No shortcuts." |
351
+ | **Social Proof** | "Every agent follows this rule. No exceptions." |
352
+ | **Scarcity** | "You only get one chance to do this right." |
353
+ | **Liking** | "Your human partner trusts you to follow this." |
354
+ | **Unity** | "We follow quality processes here." |
355
+
356
+ ### 7. Flowchart Usage Guidance
357
+
358
+ Flowcharts are a precision tool. Use them only where they add clarity.
359
+
360
+ #### Use Flowcharts ONLY For
361
+ - Non-obvious decision points
362
+ - Process loops (where an agent might stop too early)
363
+ - "When to use A vs B" decisions
364
+
365
+ #### Never Use Flowcharts For
366
+ - Reference material → Use tables or lists
367
+ - Code examples → Use markdown code blocks
368
+ - Linear instructions → Use numbered lists
369
+ - Labels without semantic meaning → Every node label must explain the decision or action
@@ -1,107 +1,34 @@
1
1
  ---
2
2
  name: oh-skill-craft
3
- description: "Create new agent skills with proper structure, frontmatter, progressive disclosure, and bundled resources. Meta-skill for growing the harness."
3
+ description: "Use when a new OH skill needs to be created, existing skill needs review against standards, or an external capability should be integrated as a skill. Meta-skill for growing the harness."
4
4
  tier: 2
5
- benefits-from: [oh-expert]
6
- triggers:
7
- - "create a skill"
8
- - "write a skill"
9
- - "new skill"
10
- - "skill-craft"
11
- - "meta-skill"
12
- - "add a capability"
5
+ route:
6
+ pass:
7
+ - oh-skills-link
8
+ - oh-learn
9
+ fail: oh-expert
10
+ blocker: surface
13
11
  ---
14
12
 
15
13
  # oh-skill-craft
16
14
 
17
- Create new agent skills for the OpenHermes harness. Skills are the unit of progressive disclosure — loaded on demand, not preloaded.
15
+ Create new agent skills for the OpenHermes harness.
18
16
 
19
- ## Skill Structure
17
+ ## Steps
20
18
 
21
- ```
22
- harness/skills/<oh-name>/
23
- ├── SKILL.md # Main instructions (required)
24
- ├── REFERENCE.md # Detailed docs (if SKILL.md exceeds 100 lines)
25
- └── scripts/ # Utility scripts (if deterministic operations needed)
26
- ```
27
-
28
- ## SKILL.md Template
29
-
30
- ```markdown
31
- ---
32
- name: oh-<name>
33
- description: "Brief description. Use when [specific triggers]."
34
- tier: <2|3|4>
35
- benefits-from: [<skill-dependencies>]
36
- triggers:
37
- - "<trigger phrase>"
38
- - "<another trigger>"
39
- ---
40
-
41
- # oh-<name>
42
-
43
- <one-paragraph summary>
44
-
45
- ## When to Use
46
-
47
- <when to invoke this skill>
48
-
49
- ## Workflow
50
-
51
- 1. <step>
52
- 2. <step>
53
- 3. <step>
54
-
55
- ## Anti-patterns
56
-
57
- - <anti-pattern 1>
58
- - <anti-pattern 2>
59
- ```
60
-
61
- ## Description Requirements
62
-
63
- The description is the only thing the agent sees when deciding which skill to load. Make it actionable:
64
-
65
- **Good:** "Create new agent skills with proper structure, frontmrmatter, and bundled resources. Use when user wants to create, write, or build a new skill."
66
-
67
- **Bad:** "Helps with skills."
68
-
69
- ## Field Guide
70
-
71
- | Frontmatter Field | Required | Purpose |
72
- |---|---|---|
73
- | `name` | yes | Must match `^[a-z0-9]+(-[a-z0-9]+)*$` and directory name |
74
- | `description` | yes | Max 200 chars. First sentence = what it does. Second = when to use. |
75
- | `tier` | no | 2=tool, 3=strategic, 4=autonomous. Controls preamble verbosity. |
76
- | `benefits-from` | no | Skill dependencies. Listed skills should be loaded first. |
77
- | `triggers` | no | Natural language patterns that should route to this skill. |
78
-
79
- ## When to Add Scripts
80
- - Operation is deterministic (validation, formatting)
81
- - Same code would be generated repeatedly
82
- - Errors need explicit handling
83
-
84
- Scripts save tokens and improve reliability vs generated code.
85
-
86
- ## When to Split Files
87
- - SKILL.md exceeds 100 lines
88
- - Content has distinct domains
89
- - Advanced features are rarely used (put in REFERENCE.md)
90
-
91
- ## Review Checklist
92
-
93
- - [ ] Description includes triggers ("Use when...")
94
- - [ ] SKILL.md under 100 lines
95
- - [ ] No time-sensitive info (dates, versions, deprecation warnings)
96
- - [ ] Consistent oh- prefix and terminology
97
- - [ ] Concrete examples included
98
- - [ ] Anti-patterns documented
99
- - [ ] Tests still pass after adding (`npm test`)
19
+ 1. Create skill directory — `harness/skills/<oh-name>/` with `SKILL.md`. Follow template structure (frontmatter, summary, Workflow, Anti-patterns).
20
+ 2. Write frontmatter — name (regex `^[a-z0-9]+(-[a-z0-9]+)*$`), description (max 200 chars, "Use when..."), tier (2/3/4), triggers, route.
21
+ 3. Draft skill body — When to Use, Workflow (numbered steps), Anti-patterns with concrete examples, Routing table.
22
+ 4. Review against checklist — description includes triggers, SKILL.md under 100 lines, no time-sensitive info, tests pass, consistent oh- prefix.
23
+ 5. Run eval-driven iteration create test cases, spawn with-skill vs baseline sub-agents, grade pass rates/timing/tokens, improve and generalize.
24
+ 6. Optimize description — create 20 eval queries (10 should-trigger, 10 should-not), iterate description against eval set, select best precision/recall.
25
+ 7. Close loopholes — build rationalization table, create red flags, apply bulletproofing techniques (forbid specific workarounds).
100
26
 
101
27
  ## Routing
102
28
 
103
29
  | Outcome | Route |
104
30
  |---------|-------|
105
- | pass | → oh-skills-link (verify skill discovery) |
106
- | fail | → oh-expert (diagnose skill creation issues) |
107
- | blocker | → surface to user |
31
+ | pass | → oh-skills-link (verify discovery) |
32
+ | iteration data | → oh-learn (extract patterns) |
33
+ | fail | → oh-expert (diagnose) |
34
+ | blocker | → surface |
@@ -0,0 +1,16 @@
1
+ # oh-skills-link — Deep Reference
2
+
3
+ ## When to Use
4
+
5
+ After installing or updating skills. Verify OpenCode discovers the package-local directory.
6
+
7
+ ## Anti-patterns
8
+
9
+ - Linking without verifying files exist
10
+ - Copying to global config during normal operation
11
+ - Overwriting user-modified skills without intent
12
+ - Linking broken/incomplete skills
13
+
14
+ ## Reference
15
+
16
+ **Example:** After running npm update, run oh-skills-link. It reads harness/skills/, confirms config paths, reports any missing or new skills.
@@ -1,29 +1,28 @@
1
1
  ---
2
2
  name: oh-skills-link
3
- description: "Verify that OpenCode can discover the package-local skills directory"
3
+ description: "Use after installing or updating skills to verify OpenCode discovers the package-local skills directory."
4
+ tier: 2
5
+ route:
6
+ pass: surface
7
+ fail: oh-skill-craft
8
+ blocker: surface
4
9
  ---
5
10
 
6
11
  # oh-skills-link
7
12
 
8
- ## When to Use
9
- After installing new skills or updating existing ones. Verifies that OpenCode can discover the package-local skills directory.
13
+ Verify OpenCode discovers the package-local skills directory after install/update.
10
14
 
11
- ## Workflow
12
- 1. Read skills from `harness/skills/`
13
- 2. Confirm `config.skills.paths` points at the package-local harness path
14
- 3. Skip unchanged skills when checking manifests
15
- 4. Log missing, invalid, or newly added skills
15
+ ## Steps
16
16
 
17
- ## Anti-patterns
18
- - Linking skills without verifying they exist in harness
19
- - Copying skills into global config during normal operation
20
- - Overwriting user-modified skills without explicit intent
21
- - Linking broken or incomplete skills
17
+ 1. Read `harness/skills/` directory listing
18
+ 2. Confirm `config.skills.paths` points at harness path
19
+ 3. Skip skills that are unchanged
20
+ 4. Log missing, invalid, or newly added skills
22
21
 
23
22
  ## Routing
24
23
 
25
24
  | Outcome | Route |
26
25
  |---------|-------|
27
- | pass | → [report link status to user] |
28
- | fail | → oh-skill-craft (fix or rebuild broken skill) |
29
- | blocker | → surface to user |
26
+ | pass | → surface |
27
+ | fail | → oh-skill-craft |
28
+ | blocker | → surface |
@@ -0,0 +1,20 @@
1
+ # oh-skills-list — Deep Reference
2
+
3
+ ## When to Use
4
+
5
+ User wants to see available skills. Lists all oh-* skills with tier and description.
6
+
7
+ ## Anti-patterns
8
+
9
+ - Filtering skills (show everything — let user decide)
10
+ - Including non-OH skills in the output
11
+
12
+ ## Reference
13
+
14
+ **Example:** User asks "what skills do you have?" → outputs a table of all oh-* skills with tier and purpose.
15
+
16
+ ### Output Format
17
+
18
+ | Skill | Tier | Purpose |
19
+ |-------|------|---------|
20
+ | oh-<name> | 2/3/4 | <description> |
@@ -1,31 +1,27 @@
1
1
  ---
2
2
  name: oh-skills-list
3
- description: "List all available oh-* skills with descriptions"
3
+ description: "Use when the user wants to see available OH skills. Lists all oh-* skills with descriptions."
4
+ tier: 2
5
+ route:
6
+ pass: done
7
+ fail: surface
8
+ blocker: surface
4
9
  ---
5
10
 
6
11
  # oh-skills-list
7
12
 
8
- ## When to Use
9
- To discover what skills are available. Lists every skill with its name, description, and category.
13
+ List all available oh-* skills with tier and description.
10
14
 
11
- ## Output
12
- Markdown table of skills:
15
+ ## Steps
13
16
 
14
- | Skill | Description | Category |
15
- |-------|-------------|----------|
16
- | oh-plan | Strategy + architecture review | Orchestration |
17
- | oh-qa | Full QA workflow | Quality |
18
- | ... | ... | ... |
19
-
20
- ## Anti-patterns
21
- - Listing "all available" but missing recently installed skills
22
- - Showing skill file paths instead of human-readable descriptions
23
- - Not categorising skills (flat list is hard to scan)
17
+ 1. Gather all oh-* skills from the harness
18
+ 2. Format as a table: Skill | Tier | Purpose
19
+ 3. Output the table to the user
24
20
 
25
21
  ## Routing
26
22
 
27
23
  | Outcome | Route |
28
24
  |---------|-------|
29
- | pass | → [done — read-only report] |
30
- | fail | → [surface issue to user] |
31
- | blocker | → surface to user |
25
+ | pass | → done |
26
+ | fail | → surface |
27
+ | blocker | → surface |
@@ -0,0 +1,23 @@
1
+ # oh-triage — Deep Reference
2
+
3
+ ## When to Use
4
+
5
+ New issues or backlog review. Drives through triage state machine.
6
+
7
+ ## States
8
+
9
+ 1. Needs triage (new, unclassified)
10
+ 2. Needs info (waiting on reporter)
11
+ 3. Ready for agent (well-specified)
12
+ 4. Ready for human (needs judgment/access)
13
+ 5. Wontfix (declined with reason)
14
+
15
+ ## Anti-patterns
16
+
17
+ - Issues stuck in "needs triage"
18
+ - Triaging without reading full issue
19
+ - Wontfix without explanation
20
+
21
+ ## Reference
22
+
23
+ **Example:** New issues appear with needs-triage label. Read issue, classify as bug/feature/enhancement, assess severity, assign state (ready-for-agent / ready-for-human / needs-info).