openhermes 4.1.0 → 4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/ETHOS.md +6 -3
  2. package/LICENSE +21 -21
  3. package/README.md +109 -79
  4. package/bootstrap.ts +214 -8
  5. package/harness/agents/openhermes.md +45 -55
  6. package/harness/codex/AUTOPILOT.md +126 -0
  7. package/harness/codex/CONSTITUTION.md +14 -11
  8. package/harness/codex/ROUTING.md +35 -70
  9. package/harness/commands/oh-log.md +18 -0
  10. package/harness/instructions/RUNTIME.md +27 -52
  11. package/harness/skills/oh-builder/SKILL.md +13 -8
  12. package/harness/skills/oh-caveman/SKILL.md +9 -0
  13. package/harness/skills/oh-expert/SKILL.md +6 -0
  14. package/harness/skills/oh-facade/SKILL.md +298 -0
  15. package/harness/skills/oh-freeze/SKILL.md +9 -0
  16. package/harness/skills/oh-full-output/SKILL.md +81 -0
  17. package/harness/skills/oh-fusion/SKILL.md +314 -0
  18. package/harness/skills/oh-gauntlet/SKILL.md +9 -5
  19. package/harness/skills/oh-grill/SKILL.md +9 -5
  20. package/harness/skills/oh-guard/SKILL.md +9 -0
  21. package/harness/skills/oh-handoff/SKILL.md +9 -0
  22. package/harness/skills/oh-health/SKILL.md +8 -4
  23. package/harness/skills/oh-init/SKILL.md +28 -94
  24. package/harness/skills/oh-investigate/SKILL.md +10 -0
  25. package/harness/skills/oh-issue/SKILL.md +9 -0
  26. package/harness/skills/oh-learn/SKILL.md +13 -4
  27. package/harness/skills/oh-manifest/SKILL.md +15 -10
  28. package/harness/skills/oh-plan-review/SKILL.md +15 -8
  29. package/harness/skills/oh-planner/SKILL.md +18 -8
  30. package/harness/skills/oh-prd/SKILL.md +9 -0
  31. package/harness/skills/oh-refactor/SKILL.md +426 -0
  32. package/harness/skills/oh-retro/SKILL.md +9 -0
  33. package/harness/skills/oh-review/SKILL.md +11 -4
  34. package/harness/skills/oh-security/SKILL.md +4 -0
  35. package/harness/skills/oh-ship/SKILL.md +10 -0
  36. package/harness/skills/oh-skill-craft/SKILL.md +88 -0
  37. package/harness/skills/oh-skills-link/SKILL.md +9 -0
  38. package/harness/skills/oh-skills-list/SKILL.md +9 -0
  39. package/harness/skills/oh-triage/SKILL.md +11 -0
  40. package/lib/harness-resolver.ts +2 -2
  41. package/lib/logger.ts +7 -1
  42. package/package.json +6 -3
@@ -4,12 +4,19 @@ description: "Two-axis code and design review: Standards (conformance) + Spec (f
4
4
  tier: 3
5
5
  benefits-from: [oh-expert]
6
6
  triggers:
7
- - "review"
8
- - "code review"
9
- - "review since"
10
- - "review changes"
7
+ - "code review please"
8
+ - "review the code"
9
+ - "review the PR"
10
+ - "review changes since"
11
11
  - "pr review"
12
12
  - "design review"
13
+ - "review this code"
14
+ route:
15
+ pass:
16
+ - oh-gauntlet
17
+ - oh-ship
18
+ fail: oh-builder
19
+ blocker: surface
13
20
  ---
14
21
 
15
22
  # oh-review
@@ -11,6 +11,10 @@ triggers:
11
11
  - "pentest"
12
12
  - "security review"
13
13
  - "cso"
14
+ route:
15
+ pass: surface
16
+ fail: oh-investigate
17
+ blocker: surface
14
18
  ---
15
19
 
16
20
  # oh-security
@@ -1,6 +1,16 @@
1
1
  ---
2
2
  name: oh-ship
3
3
  description: "Deploy and PR pipeline — test, bump, changelog, PR, deploy, verify"
4
+ tier: 4
5
+ triggers:
6
+ - "ship this"
7
+ - "create a PR"
8
+ - "version bump"
9
+ - "publish"
10
+ route:
11
+ pass: oh-retro
12
+ fail: oh-expert
13
+ blocker: surface
4
14
  ---
5
15
 
6
16
  # oh-ship
@@ -10,6 +10,10 @@ triggers:
10
10
  - "skill-craft"
11
11
  - "meta-skill"
12
12
  - "add a capability"
13
+ route:
14
+ pass: oh-skills-link
15
+ fail: oh-expert
16
+ blocker: surface
13
17
  ---
14
18
 
15
19
  # oh-skill-craft
@@ -83,6 +87,10 @@ The description is the only thing the agent sees when deciding which skill to lo
83
87
 
84
88
  Scripts save tokens and improve reliability vs generated code.
85
89
 
90
+ ## Output Location
91
+
92
+ Skills created with oh-skill-craft should be written to `~/.config/opencode/skills/` (or `~/.agents/skills/` if the user prefers). Built-in skills live in the package `harness/skills/` and get replaced on npm update. User-written skills in `~/.config/opencode/skills/` survive updates and are auto-discovered on every session. On name conflict with a built-in skill, the user version wins.
93
+
86
94
  ## When to Split Files
87
95
  - SKILL.md exceeds 100 lines
88
96
  - Content has distinct domains
@@ -98,10 +106,90 @@ Scripts save tokens and improve reliability vs generated code.
98
106
  - [ ] Anti-patterns documented
99
107
  - [ ] Tests still pass after adding (`npm test`)
100
108
 
109
+ ## Eval-Driven Iteration
110
+
111
+ After writing the initial skill draft, iterate using test cases and evidence rather than guessing.
112
+
113
+ ### 1. Create Test Cases
114
+
115
+ Come up with 2-3 realistic test prompts — the kind of thing a real user would actually say. Save to `evals/evals.json`:
116
+
117
+ ```json
118
+ {
119
+ "skill_name": "oh-<name>",
120
+ "evals": [
121
+ {
122
+ "id": 1,
123
+ "prompt": "User's realistic task prompt",
124
+ "expected_output": "Description of expected result",
125
+ "files": []
126
+ }
127
+ ]
128
+ }
129
+ ```
130
+
131
+ Good test prompts are substantive multi-step tasks — not simple queries like "read this file." The model can handle simple tasks without a skill. Complex, multi-step, or specialized queries reveal whether the skill is pulling its weight.
132
+
133
+ ### 2. Spawn Runs
134
+
135
+ For each test case, spawn two subagents in parallel:
136
+ - **With-skill run** — load the skill, execute the task
137
+ - **Baseline run** — same prompt without the skill (for new skills) or with the previous version (for improvements)
138
+
139
+ Save outputs to `iteration-<N>/eval-<ID>/with_skill/outputs/` and `iteration-<N>/eval-<ID>/without_skill/outputs/`.
140
+
141
+ ### 3. Draft Assertions
142
+
143
+ While runs execute, draft objectively verifiable assertions for each test case. Good assertions have descriptive names and can be checked programmatically where possible. Update `evals/evals.json` with the assertions.
144
+
145
+ ### 4. Grade and Compare
146
+
147
+ Grade runs against assertions. Aggregate results into pass rates, timing, and token usage. Look for:
148
+ - Assertions that always pass regardless of skill (non-discriminating — remove them)
149
+ - High-variance evals (possibly flaky tests)
150
+ - Time/token tradeoffs between skill and baseline
151
+
152
+ ### 5. Improve
153
+
154
+ Based on results, revise the skill. Generalize from specific failures rather than overfitting to the test cases. The goal is a skill that works across a million different prompts, not just 2-3 examples. Keep instructions lean — remove anything not pulling its weight.
155
+
156
+ ### 6. Loop
157
+
158
+ Rerun all test cases into a new iteration directory. Repeat until:
159
+ - User says they're happy
160
+ - All feedback is positive
161
+ - No meaningful progress between iterations
162
+
163
+ ## Description Optimization
164
+
165
+ The description field in frontmatter is the primary mechanism for skill triggering. After the skill is solid, optimize the description for accuracy.
166
+
167
+ ### Trigger Eval Queries
168
+
169
+ Create 20 eval queries — a mix of should-trigger and should-not-trigger cases:
170
+
171
+ ```json
172
+ [
173
+ {"query": "realistic user prompt that should trigger", "should_trigger": true},
174
+ {"query": "near-miss prompt that should NOT trigger", "should_trigger": false}
175
+ ]
176
+ ```
177
+
178
+ Key principles:
179
+ - **Should-trigger** (8-10): different phrasings of the same intent — formal, casual. Include edge cases and contexts where this skill competes with another but should win.
180
+ - **Should-not-trigger** (8-10): near-misses that share keywords but need a different skill. Avoid obviously irrelevant queries — the hard cases are the adjacent ones.
181
+
182
+ Queries must be realistic — what a user would actually type, with concrete details, not abstract descriptions.
183
+
184
+ ### Run Optimization
185
+
186
+ Iterate the description: test current, propose improvements based on failures, re-test. Select the description that scores best on held-out test data. Apply the winner to the skill's frontmatter.
187
+
101
188
  ## Routing
102
189
 
103
190
  | Outcome | Route |
104
191
  |---------|-------|
105
192
  | pass | → oh-skills-link (verify skill discovery) |
193
+ | iteration data available | → oh-learn (extract patterns from eval results) |
106
194
  | fail | → oh-expert (diagnose skill creation issues) |
107
195
  | blocker | → surface to user |
@@ -1,6 +1,15 @@
1
1
  ---
2
2
  name: oh-skills-link
3
3
  description: "Verify that OpenCode can discover the package-local skills directory"
4
+ tier: 2
5
+ triggers:
6
+ - "verify skills"
7
+ - "check skill discovery"
8
+ - "link skills"
9
+ route:
10
+ pass: surface
11
+ fail: oh-skill-craft
12
+ blocker: surface
4
13
  ---
5
14
 
6
15
  # oh-skills-link
@@ -1,6 +1,15 @@
1
1
  ---
2
2
  name: oh-skills-list
3
3
  description: "List all available oh-* skills with descriptions"
4
+ tier: 2
5
+ triggers:
6
+ - "list skills"
7
+ - "show skills"
8
+ - "what skills"
9
+ route:
10
+ pass: done
11
+ fail: surface
12
+ blocker: surface
4
13
  ---
5
14
 
6
15
  # oh-skills-list
@@ -1,6 +1,17 @@
1
1
  ---
2
2
  name: oh-triage
3
3
  description: "Issue triage state machine — classify, prioritise, assign"
4
+ tier: 2
5
+ triggers:
6
+ - "triage this issue"
7
+ - "classify this issue"
8
+ - "triage the backlog"
9
+ route:
10
+ pass:
11
+ - oh-issue
12
+ - oh-handoff
13
+ fail: oh-expert
14
+ blocker: surface
4
15
  ---
5
16
 
6
17
  # oh-triage
@@ -8,10 +8,10 @@ import { fileURLToPath } from "node:url"
8
8
  const __dirname = path.dirname(fileURLToPath(import.meta.url))
9
9
  const PKG_DIR = path.resolve(__dirname, "..")
10
10
 
11
- const REQUIRED_HARNESS_FILES: ReadonlyArray<[string, string, string]> = [
11
+ const REQUIRED_HARNESS_FILES: ReadonlyArray<readonly string[]> = [
12
12
  ["codex", "CONSTITUTION.md"],
13
13
  ["instructions", "RUNTIME.md"],
14
- ["skills", "oh-plan", "SKILL.md"],
14
+ ["skills", "oh-planner", "SKILL.md"],
15
15
  ]
16
16
 
17
17
  function ancestorDirs(start: string, limit = 6): string[] {
package/lib/logger.ts CHANGED
@@ -10,7 +10,13 @@ export interface Logger {
10
10
  }
11
11
 
12
12
  const LEVELS: Record<string, number> = { debug: 0, info: 1, warn: 2, error: 3 }
13
- const CURRENT_LEVEL = LEVELS[process.env.OPENCODE_LOG_LEVEL?.trim().toLowerCase()] ?? (process.env.OPENHERMES_LOG_LEVEL?.trim().toLowerCase() === "debug" ? LEVELS.debug : LEVELS.warn)
13
+
14
+ function resolveLevel(levelName: string | undefined): number | undefined {
15
+ if (!levelName) return undefined
16
+ return LEVELS[levelName as keyof typeof LEVELS]
17
+ }
18
+
19
+ const CURRENT_LEVEL = resolveLevel(process.env.OPENCODE_LOG_LEVEL?.trim().toLowerCase()) ?? (process.env.OPENHERMES_LOG_LEVEL?.trim().toLowerCase() === "debug" ? LEVELS.debug : LEVELS.warn)
14
20
 
15
21
  const LOG_DIR = path.join(os.homedir(), ".local", "share", "opencode", "log")
16
22
  const LOG_FILE = path.join(LOG_DIR, "openhermes.log")
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "openhermes",
3
- "version": "4.1.0",
4
- "description": "OpenCode-native skills, commands, and rules orchestration for OpenHermes.",
3
+ "version": "4.3.0",
4
+ "description": "OpenCode-native orchestration for packaged skills, commands, and agents.",
5
5
  "type": "module",
6
6
  "license": "MIT",
7
7
  "engines": {
@@ -48,5 +48,8 @@
48
48
  "url": "https://github.com/nathwn12/openhermes/issues"
49
49
  },
50
50
  "homepage": "https://github.com/nathwn12/openhermes#readme",
51
- "author": "nathwn12"
51
+ "author": "nathwn12",
52
+ "devDependencies": {
53
+ "@types/node": "^25.8.0"
54
+ }
52
55
  }