openhermes 2.8.0 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. package/CONTEXT.md +18 -0
  2. package/ETHOS.md +15 -0
  3. package/README.md +135 -292
  4. package/bootstrap.mjs +174 -512
  5. package/harness/agents/openhermes.md +87 -0
  6. package/harness/codex/CONSTITUTION.md +70 -148
  7. package/harness/codex/ROUTING.md +126 -0
  8. package/harness/commands/oh-doctor.md +26 -0
  9. package/harness/instructions/CONVENTIONS.md +206 -206
  10. package/harness/instructions/RUNTIME.md +54 -31
  11. package/harness/skills/oh-builder/SKILL.md +98 -0
  12. package/harness/skills/oh-caveman/SKILL.md +33 -0
  13. package/harness/skills/oh-expert/SKILL.md +121 -0
  14. package/harness/skills/oh-freeze/SKILL.md +28 -0
  15. package/harness/skills/oh-gauntlet/SKILL.md +119 -0
  16. package/harness/skills/oh-grill/SKILL.md +77 -0
  17. package/harness/skills/oh-guard/SKILL.md +33 -0
  18. package/harness/skills/oh-handoff/SKILL.md +33 -0
  19. package/harness/skills/oh-health/SKILL.md +90 -0
  20. package/harness/skills/oh-init/SKILL.md +78 -0
  21. package/harness/skills/oh-investigate/SKILL.md +35 -0
  22. package/harness/skills/oh-issue/SKILL.md +36 -0
  23. package/harness/skills/oh-learn/SKILL.md +28 -0
  24. package/harness/skills/oh-manifest/SKILL.md +84 -0
  25. package/harness/skills/oh-plan-review/SKILL.md +128 -0
  26. package/harness/skills/oh-planner/SKILL.md +159 -0
  27. package/harness/skills/oh-prd/SKILL.md +35 -0
  28. package/harness/skills/oh-retro/SKILL.md +33 -0
  29. package/harness/skills/oh-review/SKILL.md +110 -0
  30. package/harness/skills/oh-security/SKILL.md +110 -0
  31. package/harness/skills/oh-ship/SKILL.md +39 -0
  32. package/harness/skills/oh-skill-craft/SKILL.md +107 -0
  33. package/harness/skills/oh-skills-link/SKILL.md +29 -0
  34. package/harness/skills/oh-skills-list/SKILL.md +31 -0
  35. package/harness/skills/oh-triage/SKILL.md +36 -0
  36. package/index.mjs +3 -60
  37. package/lib/harness-resolver.mjs +77 -0
  38. package/lib/logger.mjs +62 -0
  39. package/package.json +49 -53
  40. package/test/plugins-behavioral.test.mjs +64 -0
  41. package/test/plugins.test.mjs +62 -0
  42. package/autorecall.mjs +0 -237
  43. package/curator.mjs +0 -482
  44. package/harness/commands/build-fix.md +0 -60
  45. package/harness/commands/checkpoint.md +0 -68
  46. package/harness/commands/code-review.md +0 -71
  47. package/harness/commands/doctor.md +0 -42
  48. package/harness/commands/eval.md +0 -89
  49. package/harness/commands/go-build.md +0 -87
  50. package/harness/commands/go-review.md +0 -71
  51. package/harness/commands/harness-audit.md +0 -90
  52. package/harness/commands/learn.md +0 -37
  53. package/harness/commands/loop-start.md +0 -38
  54. package/harness/commands/loop-status.md +0 -30
  55. package/harness/commands/memory-search.md +0 -37
  56. package/harness/commands/model-route.md +0 -32
  57. package/harness/commands/ohc.md +0 -13
  58. package/harness/commands/orchestrate.md +0 -88
  59. package/harness/commands/plan.md +0 -53
  60. package/harness/commands/quality-gate.md +0 -35
  61. package/harness/commands/refactor-clean.md +0 -102
  62. package/harness/commands/rust-build.md +0 -78
  63. package/harness/commands/rust-review.md +0 -65
  64. package/harness/commands/security.md +0 -93
  65. package/harness/commands/setup-pm.md +0 -65
  66. package/harness/commands/skill-create.md +0 -99
  67. package/harness/commands/test-coverage.md +0 -80
  68. package/harness/commands/update-codemaps.md +0 -81
  69. package/harness/commands/update-docs.md +0 -67
  70. package/harness/commands/verify.md +0 -68
  71. package/harness/prompts/architect.txt +0 -189
  72. package/harness/prompts/build-cpp.md +0 -98
  73. package/harness/prompts/build-error-resolver.md +0 -44
  74. package/harness/prompts/build-go.md +0 -340
  75. package/harness/prompts/build-java.md +0 -140
  76. package/harness/prompts/build-kotlin.md +0 -137
  77. package/harness/prompts/build-rust.md +0 -108
  78. package/harness/prompts/code-reviewer.md +0 -40
  79. package/harness/prompts/doc-updater.md +0 -206
  80. package/harness/prompts/docs-lookup.md +0 -71
  81. package/harness/prompts/e2e-runner.txt +0 -317
  82. package/harness/prompts/explore.md +0 -42
  83. package/harness/prompts/harness-optimizer.md +0 -42
  84. package/harness/prompts/loop-operator.md +0 -53
  85. package/harness/prompts/planner.md +0 -37
  86. package/harness/prompts/refactor-cleaner.md +0 -256
  87. package/harness/prompts/review-cpp.md +0 -81
  88. package/harness/prompts/review-database.md +0 -261
  89. package/harness/prompts/review-go.md +0 -257
  90. package/harness/prompts/review-java.md +0 -113
  91. package/harness/prompts/review-kotlin.md +0 -143
  92. package/harness/prompts/review-python.md +0 -101
  93. package/harness/prompts/review-rust.md +0 -77
  94. package/harness/prompts/security-reviewer.md +0 -42
  95. package/harness/prompts/tdd-guide.md +0 -228
  96. package/harness/rules/audit.md +0 -84
  97. package/harness/rules/checkpointing.md +0 -75
  98. package/harness/rules/context-loading.md +0 -33
  99. package/harness/rules/credential-exposure.md +0 -0
  100. package/harness/rules/delegation.md +0 -80
  101. package/harness/rules/handoff.md +0 -267
  102. package/harness/rules/memory-management.md +0 -28
  103. package/harness/rules/precedence.md +0 -52
  104. package/harness/rules/promotion.md +0 -46
  105. package/harness/rules/ranking.md +0 -64
  106. package/harness/rules/retrieval.md +0 -94
  107. package/harness/rules/runtime-guards.md +0 -196
  108. package/harness/rules/self-heal.md +0 -79
  109. package/harness/rules/session-start.md +0 -34
  110. package/harness/rules/skills-management.md +0 -165
  111. package/harness/rules/state-drift.md +0 -192
  112. package/harness/rules/verification.md +0 -88
  113. package/harness/scripts/sync-commands.mjs +0 -259
  114. package/harness/skills/.bundled_manifest +0 -17
  115. package/harness/skills/.usage.json +0 -6
  116. package/harness/skills/api-design/SKILL.md +0 -523
  117. package/harness/skills/backend-patterns/SKILL.md +0 -598
  118. package/harness/skills/coding-standards/SKILL.md +0 -549
  119. package/harness/skills/e2e-testing/SKILL.md +0 -326
  120. package/harness/skills/frontend-patterns/SKILL.md +0 -642
  121. package/harness/skills/frontend-slides/SKILL.md +0 -184
  122. package/harness/skills/security-review/SKILL.md +0 -495
  123. package/harness/skills/strategic-compact/SKILL.md +0 -131
  124. package/harness/skills/tdd-workflow/SKILL.md +0 -463
  125. package/harness/skills/verification-loop/SKILL.md +0 -126
  126. package/lib/ambient-memory.mjs +0 -167
  127. package/lib/handoff.mjs +0 -171
  128. package/lib/hardening.mjs +0 -146
  129. package/lib/memory-tools-plugin.mjs +0 -368
  130. package/lib/ohc/block-sync.mjs +0 -69
  131. package/lib/ohc/compress/search.mjs +0 -152
  132. package/lib/ohc/compress/state.mjs +0 -76
  133. package/lib/ohc/config.mjs +0 -185
  134. package/lib/ohc/message-ids.mjs +0 -178
  135. package/lib/ohc/notify.mjs +0 -135
  136. package/lib/ohc/protected-patterns.mjs +0 -55
  137. package/lib/ohc/prune-apply.mjs +0 -134
  138. package/lib/ohc/pruner.mjs +0 -608
  139. package/lib/ohc/reaper.mjs +0 -70
  140. package/lib/ohc/state.mjs +0 -265
  141. package/lib/ohc/strategies/deduplication.mjs +0 -72
  142. package/lib/ohc/strategies/index.mjs +0 -2
  143. package/lib/ohc/strategies/purge-errors.mjs +0 -43
  144. package/lib/ohc/token-utils.mjs +0 -26
  145. package/lib/ohc/updater.mjs +0 -132
  146. package/lib/paths.mjs +0 -49
  147. package/lib/schema-validator.mjs +0 -79
  148. package/lib/search.mjs +0 -48
  149. package/schemas/audit.schema.json +0 -82
  150. package/schemas/backlog.schema.json +0 -63
  151. package/schemas/checkpoint.schema.json +0 -65
  152. package/schemas/constraint.schema.json +0 -62
  153. package/schemas/decision.schema.json +0 -63
  154. package/schemas/instinct.schema.json +0 -63
  155. package/schemas/loop-state.schema.json +0 -33
  156. package/schemas/mistake.schema.json +0 -64
  157. package/schemas/verification_receipt.schema.json +0 -88
  158. package/skill-builder.mjs +0 -88
@@ -0,0 +1,121 @@
1
+ ---
2
+ name: oh-expert
3
+ description: "AI-expert built-in: shared vocabulary for self-diagnosis, failure modes, attention dynamics, and working patterns"
4
+ tier: 2
5
+ triggers:
6
+ - "why did you get that wrong"
7
+ - "diagnose yourself"
8
+ - "are you sure"
9
+ - "stop agreeing with me"
10
+ - "sycophancy"
11
+ - "hallucination"
12
+ - "attention"
13
+ - "smart zone"
14
+ ---
15
+
16
+ # oh-expert
17
+
18
+ Shared AI-coding vocabulary for agent self-diagnosis. Every failure mode maps to a specific cause and fix. Use this vocabulary precisely — vague terms ("hallucination" alone, "wrong") have no diagnostic value.
19
+
20
+ ## Failure Modes
21
+
22
+ ### Sycophancy
23
+ Confidently agreeable output. The model was trained to favor answers humans liked — agreement is rewarded even when wrong.
24
+
25
+ **Surfaces as:**
26
+ - Caving under pushback — reverses a correct answer when you say "are you sure?"
27
+ - Praising bad input — agrees a broken plan is brilliant before analyzing it
28
+ - Biased framing — review skews positive when you signal authorship
29
+ - Mimicry — repeats your mistakes back as confirmation
30
+
31
+ **Diagnostic test:** Would I have said this without the user's steer? If only tone/framing changed, it is sycophancy.
32
+
33
+ **Fix:** Hide your preferences. Re-ask neutrally — "review this code" not "is this code good?"
34
+
35
+ ### Hallucination (two flavors)
36
+ Confidently-wrong output. Two flavors with different causes and fixes:
37
+
38
+ - **Factuality hallucination** — invented/wrong facts (fake function, wrong API, fake citation). Caused by parametric knowledge gaps. Fix: load contextual knowledge (read docs, read the file).
39
+
40
+ - **Faithfulness hallucination** — output drifts from loaded context, user instructions, or own prior reasoning. Symptom of attention degradation. Fix: clear or compact.
41
+
42
+ **Avoid:** "Hallucination" as bare synonym for "wrong" — without naming the flavor the term has no diagnostic value.
43
+
44
+ ### Attention Degradation
45
+ As a session grows, each token's attention budget spreads across more competitors. Signal on meaningful relationships shrinks; noise from irrelevant context crowds in.
46
+
47
+ **Surfaces as:** The smart zone → dumb zone drift. Inventing generics not in the type file. Ignoring schema pasted at the top.
48
+
49
+ **Fix:** Clear and reload. Do NOT add more docs — the problem is not missing information, it is buried signal.
50
+
51
+ ### Smart Zone / Dumb Zone
52
+ Early in a session the agent is sharp and focused (smart zone). As session grows it drifts into a dumb zone: sloppier, forgetful, more faithfulness hallucinations.
53
+
54
+ **Threshold:** On frontier models, the dumb zone commonly begins around 100k tokens.
55
+
56
+ **Self-diagnosis cue:** "It nailed the first three components and butchered the fourth" = out of smart zone.
57
+
58
+ **Fix:** Clear or compact. Do not push through.
59
+
60
+ ### Non-determinism
61
+ Same input can produce different output. A property of how models generate text. No setting to disable it.
62
+
63
+ **Self-diagnosis cue:** "The model has been awful today" → probably not a worse version, just the distribution. Try again tomorrow.
64
+
65
+ **Avoid:** Over-narrativizing. A string of bad runs is not proof something changed.
66
+
67
+ ### Knowledge Cutoff
68
+ The date past which a model has no parametric knowledge. Post-cutoff libraries/APIs are fabrication traps.
69
+
70
+ **Self-diagnosis cue:** "It keeps writing v3 SDK syntax — we are on v5." → v5 shipped after cutoff. Load current docs.
71
+
72
+ ## Working Patterns
73
+
74
+ ### Progressive Disclosure
75
+ AGENTS.md pays token cost every turn. Put infrequently used instructions behind context pointers (skills).
76
+
77
+ ### Handoff
78
+ Transferring context from one session to another with no return path. Use when: planning session is getting heavy, role switching, kicking off AFK runs. Always write a structured artifact.
79
+
80
+ ### Compaction
81
+ A handoff done in-memory: previous session is summarized and seeds a fresh session. Lossy — detail traded for headroom. Compact manually to control what is kept.
82
+
83
+ ### Subagent
84
+ An agent spawned by another agent via tool call. Runs in own session with own context window. Reports a single result back. Cannot spawn further subagents (one level deep). Use to isolate context.
85
+
86
+ ### Skill vs Tool
87
+ - **Skill**: instructions the agent reads (loaded on demand)
88
+ - **Tool**: function the agent calls (always available)
89
+ Do not confuse them.
90
+
91
+ ## Diagnostic Map
92
+
93
+ | Symptom | Likely Cause | First Move |
94
+ |---|---|---|
95
+ | Reverses answer under pushback | Sycophancy | Re-ask neutrally, hide preference |
96
+ | Invents things in the loaded doc | Faithfulness hallucination / attention degradation | Clear or compact |
97
+ | Invents things not in any doc | Factuality hallucination / parametric gap | Load relevant docs |
98
+ | Sharp early, sloppy late | Smart zone → dumb zone drift | Compact, do not push through |
99
+ | Different results same input | Non-determinism (normal) | Try again |
100
+ | Writes old API syntax | Knowledge cutoff | Load current docs |
101
+ | Agrees with bad ideas | Sycophancy | Phrase prompts neutrally |
102
+ | Ignores context at top of window | Attention budget exhausted | Clear or move critical context closer |
103
+
104
+ ## Avoid These Terms (imprecise or wrong)
105
+
106
+ | Instead of | Use |
107
+ |---|---|
108
+ | "Hallucination" alone | Factuality hallucination or faithfulness hallucination |
109
+ | "Sycophancy" for any pleasing wrong answer | Only when diagnostic test confirms |
110
+ | "Tool" for a skill | Skill = instructions read; Tool = function called |
111
+ | "Memory" (for context window) | Context window |
112
+ | "Working memory" | Contextual knowledge |
113
+ | "Background agent" | AFK |
114
+
115
+ ## Routing
116
+
117
+ | Outcome | Route |
118
+ |---------|-------|
119
+ | pass | → oh-builder (implement fix) or oh-gauntlet (re-test) |
120
+ | fail | → oh-expert (re-diagnose — load fresh context) |
121
+ | blocker | → surface to user |
@@ -0,0 +1,28 @@
1
+ ---
2
+ name: oh-freeze
3
+ description: "Restrict file edits to a specific directory for the session"
4
+ ---
5
+
6
+ # oh-freeze
7
+
8
+ ## When to Use
9
+ When debugging a specific module and you want to prevent accidentally "fixing" unrelated code. Scopes all Edit/Write operations to one directory.
10
+
11
+ ## Workflow
12
+ 1. Specify target directory to freeze
13
+ 2. All Edit/Write operations outside that directory are blocked
14
+ 3. User can explicitly approve cross-boundary edits
15
+ 4. Unfreeze to release the boundary
16
+
17
+ ## Anti-patterns
18
+ - Freezing too broadly (defeats the purpose)
19
+ - Forgetting to unfreeze when task scope expands
20
+ - Using freeze as a substitute for git discipline
21
+
22
+ ## Routing
23
+
24
+ | Outcome | Route |
25
+ |---------|-------|
26
+ | pass | → [return to prior skill — scope lock active] |
27
+ | fail | → [surface issue — freeze not applied] |
28
+ | blocker | → surface to user |
@@ -0,0 +1,119 @@
1
+ ---
2
+ name: oh-gauntlet
3
+ description: "Rigorous multi-axis testing gauntlet: unit, integration, edge cases, dual-axis review. Loops until done or blocker."
4
+ tier: 4
5
+ benefits-from: [oh-expert, oh-builder]
6
+ triggers:
7
+ - "gauntlet"
8
+ - "test everything"
9
+ - "rigorous testing"
10
+ - "review all angles"
11
+ - "qa"
12
+ - "full review"
13
+ - "run the gauntlet"
14
+ - "validate"
15
+ ---
16
+
17
+ # oh-gauntlet
18
+
19
+ Runs the current build through a multi-axis gauntlet: tests, edge cases, standards review, spec review. Spawns parallel sub-agents for independent axes. Loops until everything passes or a blocker is surfaced.
20
+
21
+ ## Gauntlet Stages
22
+
23
+ Each stage runs independently (parallel where possible). A stage that fails loops: fix → re-run → verify → pass or blocker.
24
+
25
+ ### Stage 1: Test Suite
26
+ Run all existing tests. Check both that they pass and that they actually test the right things:
27
+ - **Unit tests** — do they pass? Are they testing behavior or implementation?
28
+ - **Integration tests** — do the real code paths work end-to-end?
29
+ - **Edge case coverage** — empty states, error states, boundary conditions, concurrency
30
+
31
+ If tests are missing or weak, flag what should be added. Do not add them here — surface as finding.
32
+
33
+ ### Stage 2: Dual-Axis Review (parallel sub-agents)
34
+
35
+ Spawn two sub-agents simultaneously:
36
+
37
+ **Standards sub-agent:** Read the repo's documented standards (CONTEXT.md, AGENTS.md, eslint config, ADRs, STYLE.md, CONVENTIONS.md). Then read the diff. Report every place the diff violates a documented standard. Cite the standard source. Distinguish hard violations from judgement calls.
38
+
39
+ **Spec sub-agent:** Read the spec source (plan.md, issue, PRD, or user's description). Then read the diff. Report: (a) requirements that are missing or partial, (b) scope creep (behavior not asked for), (c) requirements that look implemented but wrong. Quote the spec.
40
+
41
+ Report both axes independently — do not merge or rank. A change can pass one and fail the other.
42
+
43
+ ### Stage 3: Edge Case Sweep
44
+ Systematic edge case analysis for the changed code:
45
+ - Error states — what happens when inputs are invalid, files are missing, network fails?
46
+ - Concurrency — race conditions, deadlocks, stale state
47
+ - Security — injection, auth bypass, data leakage, permission escalation
48
+ - Performance — N+1 queries, unbounded loops, memory leaks, unnecessary allocations
49
+ - State transitions — invalid state transitions, partial updates, rollback gaps
50
+
51
+ For each finding: severity (critical/major/minor), location, reproduction path.
52
+
53
+ ### Stage 4: QA Sweep (tiered)
54
+ Systematic testing with iterative fix-verify cycles. Choose tier based on risk:
55
+
56
+ - **Quick** — critical/high severity flows only
57
+ - **Standard** — critical + medium severity, full edge case sweep
58
+ - **Exhaustive** — all of the above + cosmetic, edge cases, cross-browser
59
+
60
+ 1. Execute tests against each user flow, edge case, error state
61
+ 2. Log findings with severity, reproduction steps, evidence
62
+ 3. Fix highest-severity first, commit each fix atomically
63
+ 4. Re-verify after each fix — confirm fix, check for regressions
64
+ 5. Produce health scores (before/after)
65
+
66
+ ### Stage 5: Canary (post-deploy)
67
+ If deploying to production:
68
+
69
+ 1. **Set baseline** — capture pre-deploy screenshots and metrics
70
+ 2. **Deploy check** — verify deploy completed successfully
71
+ 3. **Canary run** — navigate key user flows, capture screenshots, log console errors
72
+ 4. **Compare** — diff against pre-deploy baselines
73
+ 5. **Alert** — surface anomalies, performance regressions, new errors
74
+ 6. **Recovery** — if critical issues found, suggest rollback
75
+
76
+ Output: health status, screenshots (before/after), error log, performance diff, ship/no-go verdict.
77
+
78
+ ### Stage 6: Manual Verification Checklist
79
+ Based on the plan's verification criteria or spec:
80
+ - [ ] Happy path works end-to-end
81
+ - [ ] Error path degrades gracefully
82
+ - [ ] No regression in adjacent areas
83
+ - [ ] Logging/monitoring covers failure modes
84
+ - [ ] Documentation matches behavior (if applicable)
85
+
86
+ ## Loop Protocol
87
+
88
+ 1. Run all 6 stages (skip Stage 5 if not deploying)
89
+ 2. Collect findings by severity
90
+ 3. If 0 criticals and 0 majors → DONE
91
+ 4. If criticals or majors exist → fix highest severity first
92
+ 5. After fix → re-run affected stages only
93
+ 6. If fix is impossible within scope → surface BLOCKER
94
+
95
+ ## Blocker Protocol
96
+
97
+ ```
98
+ BLOCKER: <what failed>
99
+ Context: <what was attempted, why it cannot proceed>
100
+ Options:
101
+ A: <scope reduction>
102
+ B: <alternative approach>
103
+ C: <dependency change>
104
+ ```
105
+
106
+ ## Anti-patterns
107
+ - Running stages sequentially when they can be parallel (Standards and Spec reviews are independent)
108
+ - Mixing Standards and Spec findings (keep axes separate — one can pass while the other fails)
109
+ - Skipping edge case sweep because tests pass (tests confirm behavior, not absence of edge cases)
110
+ - Ignoring minors because no criticals exist (accumulated minors signal design debt)
111
+ - Pushing through critical failures without surfacing blocker
112
+
113
+ ## Routing
114
+
115
+ | Outcome | Route |
116
+ |---------|-------|
117
+ | pass | → oh-ship (all checks pass) |
118
+ | fail | → oh-builder (fix issues found) |
119
+ | blocker | → surface to user |
@@ -0,0 +1,77 @@
1
+ ---
2
+ name: oh-grill
3
+ description: "Stress-test plans and designs through relentless Socratic questioning. Sharpens assumptions, flags blind spots, updates domain docs."
4
+ tier: 3
5
+ benefits-from: [oh-expert, oh-planner]
6
+ triggers:
7
+ - "grill"
8
+ - "stress test this plan"
9
+ - "challenge this"
10
+ - "grill me"
11
+ - "poke holes"
12
+ - "interrogate"
13
+ ---
14
+
15
+ # oh-grill
16
+
17
+ Stress-tests plans and designs through relentless Socratic questioning. Two modes: plain interrogate (quick) or interrogate + update domain docs (thorough).
18
+
19
+ ## When to Use
20
+ Before committing to a plan or design. When the user says "it is writing exactly what I asked for and it is still wrong" — the design concept is not shared yet. Cheaper to resolve in conversation than in code.
21
+
22
+ ## Modes
23
+
24
+ ### Mode A: Grill (quick)
25
+ Challenge the plan without touching files.
26
+
27
+ 1. Read the plan or design doc
28
+ 2. Interview the user one decision at a time — each answer reveals new branches
29
+ 3. Resolve each branch before moving on
30
+ 4. Surface: contradictions, blind spots, unstated assumptions, ambiguous terms
31
+ 5. Propose a recommended answer for each decision
32
+ 6. Output: verified, stress-tested plan with flagged ambiguities
33
+
34
+ ### Mode B: Grill with Docs (thorough)
35
+ Same as Mode A, but persists decisions to CONTEXT.md and ADRs, and extracts a DDD ubiquitous-language glossary.
36
+
37
+ 1. Load existing CONTEXT.md and ADRs
38
+ 2. Grill through the decision tree — each resolved decision may:
39
+ - Update CONTEXT.md domain terms (sharpen fuzzy language)
40
+ - Create a new ADR for architectural decisions
41
+ - Flag an ambiguity in the domain glossary
42
+ 3. **Ubiquitous Language extraction** — after the decision tree resolves, scan the conversation for domain-relevant nouns, verbs, and concepts:
43
+ - Identify problems: same word for different concepts (ambiguity), different words for same concept (synonyms), vague or overloaded terms
44
+ - Propose a canonical glossary with grouped tables (by subdomain, lifecycle, or actor)
45
+ - Write an example dialogue (3-5 exchanges) between dev and domain expert showing natural term usage
46
+ - Write flagged ambiguities section
47
+ 4. Persist changes to CONTEXT.md immediately as language firms up
48
+ 5. Output: updated CONTEXT.md + new ADRs + UBIQUITOUS_LANGUAGE.md (if significant terms emerged) + verified plan with resolution trail
49
+
50
+ ## Technique
51
+
52
+ - Ask one question at a time
53
+ - Propose a recommended answer for each decision
54
+ - Walk the full decision tree before accepting the design
55
+ - Reference domain glossary from CONTEXT.md when terms are ambiguous
56
+ - Cross-reference with existing ADRs when architecture is at stake
57
+
58
+ ## When NOT to Use
59
+ - When you already have a clear, vetted plan and need execution
60
+ - When the user needs a builder, not a critic
61
+ - For trivial decisions that don't change the design's shape
62
+
63
+ ## Anti-patterns
64
+ - Grilling for the sake of grilling (redundant with existing reviews)
65
+ - Asking questions you could answer by reading the plan or codebase
66
+ - Creating ADRs for trivial decisions (not every choice is architecture)
67
+ - Polishing CONTEXT.md prose before concepts are settled
68
+ - Updating domain terms mid-discussion — let the conversation resolve first
69
+ - Not distinguishing between "must resolve now" vs "figure out later"
70
+
71
+ ## Routing
72
+
73
+ | Outcome | Route |
74
+ |---------|-------|
75
+ | pass | → oh-planner (revise plan based on feedback) |
76
+ | fail | → oh-expert (resolve confusion or blind spot) |
77
+ | blocker | → surface to user |
@@ -0,0 +1,33 @@
1
+ ---
2
+ name: oh-guard
3
+ description: "Safety confirmation mode — warn before destructive operations"
4
+ ---
5
+
6
+ # oh-guard
7
+
8
+ ## When to Use
9
+ When touching production, running destructive commands, or working in shared environments. Combines warning prompts with directory-scoped edit locks.
10
+
11
+ ## Workflow
12
+ 1. **Enable guard mode** — set safety level (careful / freeze / full guard)
13
+ 2. **Destructive command warnings** — intercept rm -rf, DROP TABLE, force-push, git reset --hard, kubectl delete
14
+ 3. **Directory scope lock** — restrict file edits to specified directory (freeze)
15
+ 4. **User override** — user can approve or deny each operation
16
+
17
+ ## Modes
18
+ - **Careful** — warn before destructive commands
19
+ - **Freeze** — restrict edits to one directory
20
+ - **Guard** — both careful + freeze
21
+
22
+ ## Anti-patterns
23
+ - Disabling guard because "I know what I'm doing" (narrator: they didn't)
24
+ - Running prod commands outside guard mode
25
+ - Ignoring warnings about irreversible operations
26
+
27
+ ## Routing
28
+
29
+ | Outcome | Route |
30
+ |---------|-------|
31
+ | pass | → [return to prior skill — guard mode active] |
32
+ | fail | → [surface warning — operation denied] |
33
+ | blocker | → surface to user |
@@ -0,0 +1,33 @@
1
+ ---
2
+ name: oh-handoff
3
+ description: "Compact session state into a structured handoff document"
4
+ ---
5
+
6
+ # oh-handoff
7
+
8
+ ## When to Use
9
+ When switching contexts, ending a session, or passing work to another agent or developer. Produces a compact summary that captures everything needed to resume.
10
+
11
+ ## Handoff Document Structure
12
+ - **Context** — what were we doing?
13
+ - **State** — what's done, what's pending, what's blocked?
14
+ - **Decisions** — key decisions made this session
15
+ - **Artifacts** — files changed, created, or referenced
16
+ - **Next steps** — ordered list of what to do next
17
+ - **Risks** — things to be aware of
18
+
19
+ ## Output
20
+ A `HANDOFF.md` or structured text block with all resume-relevant information.
21
+
22
+ ## Anti-patterns
23
+ - Writing a novel (handoff should be scannable in 30 seconds)
24
+ - Omitting decisions (why we chose X over Y is critical context)
25
+ - No next steps ("figure it out" is not a handoff)
26
+
27
+ ## Routing
28
+
29
+ | Outcome | Route |
30
+ |---------|-------|
31
+ | pass | → [end of session — intentional terminal] |
32
+ | fail | → [surface blocker — handoff incomplete] |
33
+ | blocker | → surface to user |
@@ -0,0 +1,90 @@
1
+ ---
2
+ name: oh-health
3
+ description: "Code quality dashboard: runs project tools (typecheck, lint, test, dead code detection), computes weighted composite 0-10 score, persists history, shows trend. Read-only — no fixes."
4
+ tier: 2
5
+ triggers:
6
+ - "health check"
7
+ - "code quality"
8
+ - "quality dashboard"
9
+ - "how healthy is the codebase"
10
+ - "run all checks"
11
+ - "health"
12
+ ---
13
+
14
+ # oh-health
15
+
16
+ Staff Engineer who owns the CI dashboard. Runs every available project tool, scores results 0-10, computes weighted composite, persists history for trend tracking. Read-only — the user decides what to act on.
17
+
18
+ ## Process
19
+
20
+ ### Step 1: Detect Health Stack
21
+ Auto-detect available tools:
22
+ - **Type checker** — `tsc --noEmit` (tsconfig.json present), `mypy` (pyproject.toml), or none
23
+ - **Linter** — biome, eslint, ruff/pylint, or none
24
+ - **Test runner** — from package.json scripts, pytest, cargo test, go test
25
+ - **Dead code** — knip, or none
26
+ - **Shell lint** — shellcheck for .sh files
27
+
28
+ Present detected tools. Optionally persist to CLAUDE.md as `## Health Stack` section for future runs.
29
+
30
+ ### Step 2: Run Tools
31
+ Run each tool sequentially (some share resources). Capture exit code + output summary for each.
32
+
33
+ ### Step 3: Score Each Category
34
+
35
+ | Category | Weight | 10 | 7 | 4 | 0 |
36
+ |---|---|---|---|---|---|
37
+ | Type check | 22% | Clean | <10 errors | <50 errors | 50+ |
38
+ | Lint | 18% | Clean | <5 warnings | <20 warnings | 20+ |
39
+ | Tests | 28% | All pass | >95% pass | >80% pass | <=80% |
40
+ | Dead code | 13% | Clean | <5 unused | <20 unused | 20+ |
41
+ | Shell lint | 9% | Clean | <5 issues | 5+ issues | N/A |
42
+ | Framework | 10% | Native default | Config override | Manual | Unmanaged |
43
+
44
+ Skip unavailable categories and redistribute weight proportionally among remaining.
45
+
46
+ ### Step 4: Present Dashboard
47
+
48
+ ```
49
+ CODE HEALTH DASHBOARD
50
+ ═════════════════════
51
+ Project: <name>
52
+ Branch: <branch>
53
+ Date: <date>
54
+
55
+ Category Score Status Details
56
+ ────────── ───── ──────── ───────
57
+ Type check 10/10 CLEAN 0 errors
58
+ Lint 8/10 WARNING 3 warnings
59
+ Tests 10/10 CLEAN 47/47 passed
60
+ Dead code 7/10 WARNING 4 unused exports
61
+
62
+ COMPOSITE: 9.1 / 10
63
+ ```
64
+
65
+ Status labels: 10=CLEAN, 7-9=WARNING, 4-6=NEEDS WORK, 0-3=CRITICAL.
66
+
67
+ ### Step 5: Persist History
68
+ Append one JSONL line to `.opencode/health-history.jsonl`:
69
+ ```json
70
+ {"ts":"2026-05-14T14:30:00Z","branch":"main","score":9.1,"typecheck":10,"lint":8,"test":10,"deadcode":7,"duration_s":23}
71
+ ```
72
+
73
+ ### Step 6: Trend Analysis + Recommendations
74
+ Read last 10 history entries. Show trend table. For regressions, identify declining categories and specific errors. Rank improvement suggestions by impact (weight × score deficit).
75
+
76
+ ## Rules
77
+
78
+ - **Read-only.** No fixes. Dashboard and recommendations only.
79
+ - **Wrap, don't replace.** Run the project's own tools. Never substitute your own analysis.
80
+ - **Skipped is not failed.** Tool not installed → skip gracefully, redistribute weight.
81
+ - **Show raw output for failures.** Include tool output so user can act without re-running.
82
+ - **Trends require history.** First run: "No trend data yet. Run again after changes to track progress."
83
+
84
+ ## Routing
85
+
86
+ | Outcome | Route |
87
+ |---------|-------|
88
+ | pass | → [report score to user] |
89
+ | fail | → oh-investigate (deepen on degraded metrics) |
90
+ | blocker | → surface to user |
@@ -0,0 +1,78 @@
1
+ ---
2
+ name: oh-init
3
+ description: "Initialize project for agent-assisted development: scaffold CONTEXT.md, AGENTS.md, docs/adr/, configure issue tracker and triage labels."
4
+ tier: 2
5
+ triggers:
6
+ - "init project"
7
+ - "setup project"
8
+ - "initialize"
9
+ - "onboard"
10
+ - "scaffold"
11
+ ---
12
+
13
+ # oh-init
14
+
15
+ Per-repo setup for agent-assisted development. Run once per repo. Walks through configuration decisions one at a time.
16
+
17
+ ## Process
18
+
19
+ ### 1. Issue Tracker
20
+ Detect the git hosting platform:
21
+ - **GitHub** — `gh` CLI
22
+ - **GitLab** — `glab` CLI
23
+ - **Local markdown** — files under `.scratch/<feature>/`
24
+ - **Other** — freeform workflow description
25
+
26
+ Confirm with the user. Write the result to `docs/agents/issue-tracker.md`.
27
+
28
+ ### 2. Triage Labels
29
+ The `triage` skill uses these label strings to move issues through a state machine:
30
+ - `needs-triage` — maintainer needs to evaluate
31
+ - `needs-info` — waiting on reporter
32
+ - `ready-for-agent` — fully specified, AFK-ready
33
+ - `ready-for-human` — needs human implementation
34
+ - `wontfix` — will not be actioned
35
+
36
+ If the repo already has different label names, map them. Write to `docs/agents/triage-labels.md`.
37
+
38
+ ### 3. Domain Docs
39
+ Configure how the project organizes domain language:
40
+ - **Single-context** — one `CONTEXT.md` + `docs/adr/` at repo root
41
+ - **Multi-context** — `CONTEXT-MAP.md` pointing to per-context files
42
+
43
+ Scaffold `CONTEXT.md` with project name, domain description, and placeholder glossary terms. Create `docs/adr/` directory with ADR template.
44
+
45
+ Write to `docs/agents/domain.md`.
46
+
47
+ ### 4. Agent Skills Block
48
+ Add a `## Agent skills` section to `AGENTS.md` (or `CLAUDE.md` if it exists):
49
+
50
+ ```markdown
51
+ ## Agent skills
52
+
53
+ ### Issue tracker
54
+ <summary>. See docs/agents/issue-tracker.md.
55
+
56
+ ### Triage labels
57
+ <summary>. See docs/agents/triage-labels.md.
58
+
59
+ ### Domain docs
60
+ <summary>. See docs/agents/domain.md.
61
+ ```
62
+
63
+ ### 5. Decision Record
64
+ Record: "oh-init completed for project \<name\> on \<date\>."
65
+
66
+ ## Anti-patterns
67
+ - Running init without understanding the project domain
68
+ - Scaffolding CONTEXT.md without populating any terms
69
+ - Creating ADR directory but never writing ADRs
70
+ - Creating both AGENTS.md and CLAUDE.md — edit the one that exists
71
+
72
+ ## Routing
73
+
74
+ | Outcome | Route |
75
+ |---------|-------|
76
+ | pass | → [done — one-time project setup] |
77
+ | fail | → [retry with user corrections] |
78
+ | blocker | → surface to user |
@@ -0,0 +1,35 @@
1
+ ---
2
+ name: oh-investigate
3
+ description: "Systematic bug diagnosis with root cause investigation"
4
+ ---
5
+
6
+ # oh-investigate
7
+
8
+ ## When to Use
9
+ When a bug is reported, a test fails, or unexpected behavior occurs. Use this before attempting any fix.
10
+
11
+ ## Workflow
12
+ 1. **Reproduce** — get a reliable reproduction case (script, test, or steps)
13
+ 2. **Minimise** — strip away unrelated code until the minimal reproduction remains
14
+ 3. **Hypothesise** — list possible root causes, rank by likelihood
15
+ 4. **Instrument** — add logging, assertions, or debug output to test hypothesis
16
+ 5. **Fix** — implement the smallest correct change addressing root cause
17
+ 6. **Regression test** — verify fix doesn't break existing behavior
18
+ 7. **Document** — log the root cause and fix in the handoff, issue, or docs that are actually in scope
19
+
20
+ ## Iron Law
21
+ No fixes without root cause. Surface-level fixes compound into technical debt.
22
+
23
+ ## Anti-patterns
24
+ - Fixing symptoms instead of causes (the same bug reappears next week)
25
+ - Changing code without reproducing the bug first
26
+ - "Shotgun" debugging — changing multiple things hoping one sticks
27
+ - Not documenting root cause for future reference
28
+
29
+ ## Routing
30
+
31
+ | Outcome | Route |
32
+ |---------|-------|
33
+ | pass | → oh-builder (implement the fix) |
34
+ | fail | → oh-expert (deepen diagnosis) |
35
+ | blocker | → surface to user |
@@ -0,0 +1,36 @@
1
+ ---
2
+ name: oh-issue
3
+ description: "Break a plan, spec, or PRD into independently-grabbable GitHub issues"
4
+ ---
5
+
6
+ # oh-issue
7
+
8
+ ## When to Use
9
+ When a plan exists and needs to be broken into actionable issues. Uses tracer-bullet vertical slices for independent work items.
10
+
11
+ ## Workflow
12
+ 1. Read the plan or PRD
13
+ 2. Identify vertical slices — self-contained features that ship independently
14
+ 3. Write each issue with: clear title, acceptance criteria, implementation notes, dependencies
15
+ 4. Use `gh issue create` to publish each issue
16
+ 5. Label and milestone each issue appropriately
17
+
18
+ ## Issue Structure
19
+ - **Title**: action-oriented ("Add user authentication API")
20
+ - **Acceptance criteria**: concrete, testable ("User can sign up with email + password")
21
+ - **Implementation notes**: pointers for the implementer
22
+ - **Dependencies**: what must be done first
23
+ - **Labels**: type, priority, area
24
+
25
+ ## Anti-patterns
26
+ - Horizontal slicing (DB layer / API layer / UI layer — no one ships a layer)
27
+ - Issues too large (3+ days) or too small (< 1 hour)
28
+ - Writing issues without acceptance criteria
29
+
30
+ ## Routing
31
+
32
+ | Outcome | Route |
33
+ |---------|-------|
34
+ | pass | → [done — issues published to tracker] |
35
+ | fail | → oh-planner (re-spec unclear slices) |
36
+ | blocker | → surface to user |