openhermes 4.1.0 → 4.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ETHOS.md +6 -3
- package/LICENSE +21 -21
- package/README.md +109 -79
- package/bootstrap.ts +214 -8
- package/harness/agents/openhermes.md +45 -55
- package/harness/codex/AUTOPILOT.md +126 -0
- package/harness/codex/CONSTITUTION.md +14 -11
- package/harness/codex/ROUTING.md +35 -70
- package/harness/commands/oh-log.md +18 -0
- package/harness/instructions/RUNTIME.md +27 -52
- package/harness/skills/oh-builder/SKILL.md +13 -8
- package/harness/skills/oh-caveman/SKILL.md +9 -0
- package/harness/skills/oh-expert/SKILL.md +6 -0
- package/harness/skills/oh-facade/SKILL.md +298 -0
- package/harness/skills/oh-freeze/SKILL.md +9 -0
- package/harness/skills/oh-full-output/SKILL.md +81 -0
- package/harness/skills/oh-fusion/SKILL.md +314 -0
- package/harness/skills/oh-gauntlet/SKILL.md +9 -5
- package/harness/skills/oh-grill/SKILL.md +9 -5
- package/harness/skills/oh-guard/SKILL.md +9 -0
- package/harness/skills/oh-handoff/SKILL.md +9 -0
- package/harness/skills/oh-health/SKILL.md +8 -4
- package/harness/skills/oh-init/SKILL.md +28 -94
- package/harness/skills/oh-investigate/SKILL.md +10 -0
- package/harness/skills/oh-issue/SKILL.md +9 -0
- package/harness/skills/oh-learn/SKILL.md +13 -4
- package/harness/skills/oh-manifest/SKILL.md +15 -10
- package/harness/skills/oh-plan-review/SKILL.md +15 -8
- package/harness/skills/oh-planner/SKILL.md +18 -8
- package/harness/skills/oh-prd/SKILL.md +9 -0
- package/harness/skills/oh-refactor/SKILL.md +426 -0
- package/harness/skills/oh-retro/SKILL.md +9 -0
- package/harness/skills/oh-review/SKILL.md +11 -4
- package/harness/skills/oh-security/SKILL.md +4 -0
- package/harness/skills/oh-ship/SKILL.md +10 -0
- package/harness/skills/oh-skill-craft/SKILL.md +88 -0
- package/harness/skills/oh-skills-link/SKILL.md +9 -0
- package/harness/skills/oh-skills-list/SKILL.md +9 -0
- package/harness/skills/oh-triage/SKILL.md +11 -0
- package/lib/harness-resolver.ts +2 -2
- package/lib/logger.ts +7 -1
- package/package.json +6 -3
|
@@ -4,12 +4,19 @@ description: "Two-axis code and design review: Standards (conformance) + Spec (f
|
|
|
4
4
|
tier: 3
|
|
5
5
|
benefits-from: [oh-expert]
|
|
6
6
|
triggers:
|
|
7
|
-
- "review"
|
|
8
|
-
- "code
|
|
9
|
-
- "review
|
|
10
|
-
- "review changes"
|
|
7
|
+
- "code review please"
|
|
8
|
+
- "review the code"
|
|
9
|
+
- "review the PR"
|
|
10
|
+
- "review changes since"
|
|
11
11
|
- "pr review"
|
|
12
12
|
- "design review"
|
|
13
|
+
- "review this code"
|
|
14
|
+
route:
|
|
15
|
+
pass:
|
|
16
|
+
- oh-gauntlet
|
|
17
|
+
- oh-ship
|
|
18
|
+
fail: oh-builder
|
|
19
|
+
blocker: surface
|
|
13
20
|
---
|
|
14
21
|
|
|
15
22
|
# oh-review
|
|
@@ -1,6 +1,16 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: oh-ship
|
|
3
3
|
description: "Deploy and PR pipeline — test, bump, changelog, PR, deploy, verify"
|
|
4
|
+
tier: 4
|
|
5
|
+
triggers:
|
|
6
|
+
- "ship this"
|
|
7
|
+
- "create a PR"
|
|
8
|
+
- "version bump"
|
|
9
|
+
- "publish"
|
|
10
|
+
route:
|
|
11
|
+
pass: oh-retro
|
|
12
|
+
fail: oh-expert
|
|
13
|
+
blocker: surface
|
|
4
14
|
---
|
|
5
15
|
|
|
6
16
|
# oh-ship
|
|
@@ -10,6 +10,10 @@ triggers:
|
|
|
10
10
|
- "skill-craft"
|
|
11
11
|
- "meta-skill"
|
|
12
12
|
- "add a capability"
|
|
13
|
+
route:
|
|
14
|
+
pass: oh-skills-link
|
|
15
|
+
fail: oh-expert
|
|
16
|
+
blocker: surface
|
|
13
17
|
---
|
|
14
18
|
|
|
15
19
|
# oh-skill-craft
|
|
@@ -83,6 +87,10 @@ The description is the only thing the agent sees when deciding which skill to lo
|
|
|
83
87
|
|
|
84
88
|
Scripts save tokens and improve reliability vs generated code.
|
|
85
89
|
|
|
90
|
+
## Output Location
|
|
91
|
+
|
|
92
|
+
Skills created with oh-skill-craft should be written to `~/.config/opencode/skills/` (or `~/.agents/skills/` if the user prefers). Built-in skills live in the package `harness/skills/` and get replaced on npm update. User-written skills in `~/.config/opencode/skills/` survive updates and are auto-discovered on every session. On name conflict with a built-in skill, the user version wins.
|
|
93
|
+
|
|
86
94
|
## When to Split Files
|
|
87
95
|
- SKILL.md exceeds 100 lines
|
|
88
96
|
- Content has distinct domains
|
|
@@ -98,10 +106,90 @@ Scripts save tokens and improve reliability vs generated code.
|
|
|
98
106
|
- [ ] Anti-patterns documented
|
|
99
107
|
- [ ] Tests still pass after adding (`npm test`)
|
|
100
108
|
|
|
109
|
+
## Eval-Driven Iteration
|
|
110
|
+
|
|
111
|
+
After writing the initial skill draft, iterate using test cases and evidence rather than guessing.
|
|
112
|
+
|
|
113
|
+
### 1. Create Test Cases
|
|
114
|
+
|
|
115
|
+
Come up with 2-3 realistic test prompts — the kind of thing a real user would actually say. Save to `evals/evals.json`:
|
|
116
|
+
|
|
117
|
+
```json
|
|
118
|
+
{
|
|
119
|
+
"skill_name": "oh-<name>",
|
|
120
|
+
"evals": [
|
|
121
|
+
{
|
|
122
|
+
"id": 1,
|
|
123
|
+
"prompt": "User's realistic task prompt",
|
|
124
|
+
"expected_output": "Description of expected result",
|
|
125
|
+
"files": []
|
|
126
|
+
}
|
|
127
|
+
]
|
|
128
|
+
}
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Good test prompts are substantive multi-step tasks — not simple queries like "read this file." The model can handle simple tasks without a skill. Complex, multi-step, or specialized queries reveal whether the skill is pulling its weight.
|
|
132
|
+
|
|
133
|
+
### 2. Spawn Runs
|
|
134
|
+
|
|
135
|
+
For each test case, spawn two subagents in parallel:
|
|
136
|
+
- **With-skill run** — load the skill, execute the task
|
|
137
|
+
- **Baseline run** — same prompt without the skill (for new skills) or with the previous version (for improvements)
|
|
138
|
+
|
|
139
|
+
Save outputs to `iteration-<N>/eval-<ID>/with_skill/outputs/` and `iteration-<N>/eval-<ID>/without_skill/outputs/`.
|
|
140
|
+
|
|
141
|
+
### 3. Draft Assertions
|
|
142
|
+
|
|
143
|
+
While runs execute, draft objectively verifiable assertions for each test case. Good assertions have descriptive names and can be checked programmatically where possible. Update `evals/evals.json` with the assertions.
|
|
144
|
+
|
|
145
|
+
### 4. Grade and Compare
|
|
146
|
+
|
|
147
|
+
Grade runs against assertions. Aggregate results into pass rates, timing, and token usage. Look for:
|
|
148
|
+
- Assertions that always pass regardless of skill (non-discriminating — remove them)
|
|
149
|
+
- High-variance evals (possibly flaky tests)
|
|
150
|
+
- Time/token tradeoffs between skill and baseline
|
|
151
|
+
|
|
152
|
+
### 5. Improve
|
|
153
|
+
|
|
154
|
+
Based on results, revise the skill. Generalize from specific failures rather than overfitting to the test cases. The goal is a skill that works across a million different prompts, not just 2-3 examples. Keep instructions lean — remove anything not pulling its weight.
|
|
155
|
+
|
|
156
|
+
### 6. Loop
|
|
157
|
+
|
|
158
|
+
Rerun all test cases into a new iteration directory. Repeat until:
|
|
159
|
+
- User says they're happy
|
|
160
|
+
- All feedback is positive
|
|
161
|
+
- No meaningful progress between iterations
|
|
162
|
+
|
|
163
|
+
## Description Optimization
|
|
164
|
+
|
|
165
|
+
The description field in frontmatter is the primary mechanism for skill triggering. After the skill is solid, optimize the description for accuracy.
|
|
166
|
+
|
|
167
|
+
### Trigger Eval Queries
|
|
168
|
+
|
|
169
|
+
Create 20 eval queries — a mix of should-trigger and should-not-trigger cases:
|
|
170
|
+
|
|
171
|
+
```json
|
|
172
|
+
[
|
|
173
|
+
{"query": "realistic user prompt that should trigger", "should_trigger": true},
|
|
174
|
+
{"query": "near-miss prompt that should NOT trigger", "should_trigger": false}
|
|
175
|
+
]
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
Key principles:
|
|
179
|
+
- **Should-trigger** (8-10): different phrasings of the same intent — formal, casual. Include edge cases and contexts where this skill competes with another but should win.
|
|
180
|
+
- **Should-not-trigger** (8-10): near-misses that share keywords but need a different skill. Avoid obviously irrelevant queries — the hard cases are the adjacent ones.
|
|
181
|
+
|
|
182
|
+
Queries must be realistic — what a user would actually type, with concrete details, not abstract descriptions.
|
|
183
|
+
|
|
184
|
+
### Run Optimization
|
|
185
|
+
|
|
186
|
+
Iterate the description: test current, propose improvements based on failures, re-test. Select the description that scores best on held-out test data. Apply the winner to the skill's frontmatter.
|
|
187
|
+
|
|
101
188
|
## Routing
|
|
102
189
|
|
|
103
190
|
| Outcome | Route |
|
|
104
191
|
|---------|-------|
|
|
105
192
|
| pass | → oh-skills-link (verify skill discovery) |
|
|
193
|
+
| iteration data available | → oh-learn (extract patterns from eval results) |
|
|
106
194
|
| fail | → oh-expert (diagnose skill creation issues) |
|
|
107
195
|
| blocker | → surface to user |
|
|
@@ -1,6 +1,15 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: oh-skills-link
|
|
3
3
|
description: "Verify that OpenCode can discover the package-local skills directory"
|
|
4
|
+
tier: 2
|
|
5
|
+
triggers:
|
|
6
|
+
- "verify skills"
|
|
7
|
+
- "check skill discovery"
|
|
8
|
+
- "link skills"
|
|
9
|
+
route:
|
|
10
|
+
pass: surface
|
|
11
|
+
fail: oh-skill-craft
|
|
12
|
+
blocker: surface
|
|
4
13
|
---
|
|
5
14
|
|
|
6
15
|
# oh-skills-link
|
|
@@ -1,6 +1,15 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: oh-skills-list
|
|
3
3
|
description: "List all available oh-* skills with descriptions"
|
|
4
|
+
tier: 2
|
|
5
|
+
triggers:
|
|
6
|
+
- "list skills"
|
|
7
|
+
- "show skills"
|
|
8
|
+
- "what skills"
|
|
9
|
+
route:
|
|
10
|
+
pass: done
|
|
11
|
+
fail: surface
|
|
12
|
+
blocker: surface
|
|
4
13
|
---
|
|
5
14
|
|
|
6
15
|
# oh-skills-list
|
|
@@ -1,6 +1,17 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: oh-triage
|
|
3
3
|
description: "Issue triage state machine — classify, prioritise, assign"
|
|
4
|
+
tier: 2
|
|
5
|
+
triggers:
|
|
6
|
+
- "triage this issue"
|
|
7
|
+
- "classify this issue"
|
|
8
|
+
- "triage the backlog"
|
|
9
|
+
route:
|
|
10
|
+
pass:
|
|
11
|
+
- oh-issue
|
|
12
|
+
- oh-handoff
|
|
13
|
+
fail: oh-expert
|
|
14
|
+
blocker: surface
|
|
4
15
|
---
|
|
5
16
|
|
|
6
17
|
# oh-triage
|
package/lib/harness-resolver.ts
CHANGED
|
@@ -8,10 +8,10 @@ import { fileURLToPath } from "node:url"
|
|
|
8
8
|
const __dirname = path.dirname(fileURLToPath(import.meta.url))
|
|
9
9
|
const PKG_DIR = path.resolve(__dirname, "..")
|
|
10
10
|
|
|
11
|
-
const REQUIRED_HARNESS_FILES: ReadonlyArray<
|
|
11
|
+
const REQUIRED_HARNESS_FILES: ReadonlyArray<readonly string[]> = [
|
|
12
12
|
["codex", "CONSTITUTION.md"],
|
|
13
13
|
["instructions", "RUNTIME.md"],
|
|
14
|
-
["skills", "oh-
|
|
14
|
+
["skills", "oh-planner", "SKILL.md"],
|
|
15
15
|
]
|
|
16
16
|
|
|
17
17
|
function ancestorDirs(start: string, limit = 6): string[] {
|
package/lib/logger.ts
CHANGED
|
@@ -10,7 +10,13 @@ export interface Logger {
|
|
|
10
10
|
}
|
|
11
11
|
|
|
12
12
|
const LEVELS: Record<string, number> = { debug: 0, info: 1, warn: 2, error: 3 }
|
|
13
|
-
|
|
13
|
+
|
|
14
|
+
function resolveLevel(levelName: string | undefined): number | undefined {
|
|
15
|
+
if (!levelName) return undefined
|
|
16
|
+
return LEVELS[levelName as keyof typeof LEVELS]
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const CURRENT_LEVEL = resolveLevel(process.env.OPENCODE_LOG_LEVEL?.trim().toLowerCase()) ?? (process.env.OPENHERMES_LOG_LEVEL?.trim().toLowerCase() === "debug" ? LEVELS.debug : LEVELS.warn)
|
|
14
20
|
|
|
15
21
|
const LOG_DIR = path.join(os.homedir(), ".local", "share", "opencode", "log")
|
|
16
22
|
const LOG_FILE = path.join(LOG_DIR, "openhermes.log")
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "openhermes",
|
|
3
|
-
"version": "4.
|
|
4
|
-
"description": "OpenCode-native skills, commands, and
|
|
3
|
+
"version": "4.3.0",
|
|
4
|
+
"description": "OpenCode-native orchestration for packaged skills, commands, and agents.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"engines": {
|
|
@@ -48,5 +48,8 @@
|
|
|
48
48
|
"url": "https://github.com/nathwn12/openhermes/issues"
|
|
49
49
|
},
|
|
50
50
|
"homepage": "https://github.com/nathwn12/openhermes#readme",
|
|
51
|
-
"author": "nathwn12"
|
|
51
|
+
"author": "nathwn12",
|
|
52
|
+
"devDependencies": {
|
|
53
|
+
"@types/node": "^25.8.0"
|
|
54
|
+
}
|
|
52
55
|
}
|