openhermes 4.1.0 → 4.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/CONTEXT.md +9 -0
  2. package/ETHOS.md +6 -3
  3. package/LICENSE +21 -21
  4. package/README.md +120 -79
  5. package/bootstrap.ts +284 -41
  6. package/harness/agents/oh-browser.md +97 -0
  7. package/harness/agents/oh-builder.md +78 -0
  8. package/harness/agents/oh-facade.md +75 -0
  9. package/harness/agents/oh-fusion.md +45 -0
  10. package/harness/agents/oh-gauntlet.md +71 -0
  11. package/harness/agents/oh-grill.md +71 -0
  12. package/harness/agents/oh-investigate.md +60 -0
  13. package/harness/agents/oh-manifest.md +95 -0
  14. package/harness/agents/oh-plan-review.md +40 -0
  15. package/harness/agents/oh-planner.md +50 -0
  16. package/harness/agents/oh-refactor.md +37 -0
  17. package/harness/agents/oh-retro.md +46 -0
  18. package/harness/agents/oh-review.md +85 -0
  19. package/harness/agents/oh-security.md +83 -0
  20. package/harness/agents/oh-ship.md +76 -0
  21. package/harness/agents/oh-skill-craft.md +38 -0
  22. package/harness/agents/openhermes.md +106 -62
  23. package/harness/codex/AUTOPILOT.md +178 -0
  24. package/harness/codex/CHARTER.md +81 -0
  25. package/harness/commands/oh-doctor.md +193 -14
  26. package/harness/commands/oh-log.md +18 -0
  27. package/harness/instructions/SHELL.md +76 -0
  28. package/harness/skills/oh-ascii/DEEP.md +292 -0
  29. package/harness/skills/oh-ascii/SKILL.md +31 -0
  30. package/harness/skills/oh-ascii/scripts/check_ascii_alignment.py +596 -0
  31. package/harness/skills/oh-browser/DEEP.md +54 -0
  32. package/harness/skills/oh-browser/SKILL.md +30 -0
  33. package/harness/skills/oh-builder/DEEP.md +63 -0
  34. package/harness/skills/oh-builder/SKILL.md +16 -89
  35. package/harness/skills/oh-expert/DEEP.md +85 -0
  36. package/harness/skills/oh-expert/SKILL.md +19 -106
  37. package/harness/skills/oh-facade/DEEP.md +182 -0
  38. package/harness/skills/oh-facade/SKILL.md +34 -0
  39. package/harness/skills/oh-freeze/DEEP.md +18 -0
  40. package/harness/skills/oh-freeze/SKILL.md +15 -15
  41. package/harness/skills/oh-full-output/DEEP.md +25 -0
  42. package/harness/skills/oh-full-output/SKILL.md +28 -0
  43. package/harness/skills/oh-fusion/DEEP.md +120 -0
  44. package/harness/skills/oh-fusion/SKILL.md +36 -0
  45. package/harness/skills/oh-gauntlet/DEEP.md +77 -0
  46. package/harness/skills/oh-gauntlet/SKILL.md +17 -105
  47. package/harness/skills/oh-grill/DEEP.md +51 -0
  48. package/harness/skills/oh-grill/SKILL.md +16 -63
  49. package/harness/skills/oh-guard/DEEP.md +19 -0
  50. package/harness/skills/oh-guard/SKILL.md +15 -20
  51. package/harness/skills/oh-handoff/DEEP.md +48 -0
  52. package/harness/skills/oh-handoff/SKILL.md +18 -19
  53. package/harness/skills/oh-health/DEEP.md +74 -0
  54. package/harness/skills/oh-health/SKILL.md +17 -76
  55. package/harness/skills/oh-init/DEEP.md +85 -0
  56. package/harness/skills/oh-init/SKILL.md +17 -197
  57. package/harness/skills/oh-investigate/DEEP.md +171 -0
  58. package/harness/skills/oh-investigate/SKILL.md +18 -61
  59. package/harness/skills/oh-issue/DEEP.md +21 -0
  60. package/harness/skills/oh-issue/SKILL.md +16 -23
  61. package/harness/skills/oh-learn/DEEP.md +44 -0
  62. package/harness/skills/oh-learn/SKILL.md +17 -79
  63. package/harness/skills/oh-manifest/DEEP.md +92 -0
  64. package/harness/skills/oh-manifest/SKILL.md +15 -107
  65. package/harness/skills/oh-plan-review/DEEP.md +90 -0
  66. package/harness/skills/oh-plan-review/SKILL.md +19 -114
  67. package/harness/skills/oh-planner/DEEP.md +172 -0
  68. package/harness/skills/oh-planner/SKILL.md +16 -143
  69. package/harness/skills/oh-prd/DEEP.md +45 -0
  70. package/harness/skills/oh-prd/SKILL.md +15 -22
  71. package/harness/skills/oh-refactor/DEEP.md +122 -0
  72. package/harness/skills/oh-refactor/SKILL.md +33 -0
  73. package/harness/skills/oh-retro/DEEP.md +26 -0
  74. package/harness/skills/oh-retro/SKILL.md +17 -20
  75. package/harness/skills/oh-review/DEEP.md +87 -0
  76. package/harness/skills/oh-review/SKILL.md +17 -96
  77. package/harness/skills/oh-security/DEEP.md +83 -0
  78. package/harness/skills/oh-security/SKILL.md +18 -96
  79. package/harness/skills/oh-ship/DEEP.md +141 -0
  80. package/harness/skills/oh-ship/SKILL.md +18 -26
  81. package/harness/skills/oh-skill-craft/DEEP.md +369 -0
  82. package/harness/skills/oh-skill-craft/SKILL.md +20 -93
  83. package/harness/skills/oh-skills-link/DEEP.md +16 -0
  84. package/harness/skills/oh-skills-link/SKILL.md +15 -16
  85. package/harness/skills/oh-skills-list/DEEP.md +20 -0
  86. package/harness/skills/oh-skills-list/SKILL.md +14 -18
  87. package/harness/skills/oh-triage/DEEP.md +23 -0
  88. package/harness/skills/oh-triage/SKILL.md +15 -20
  89. package/harness/skills/oh-worktree/DEEP.md +169 -0
  90. package/harness/skills/oh-worktree/SKILL.md +32 -0
  91. package/lib/harness-resolver.ts +10 -12
  92. package/package.json +9 -4
  93. package/scripts/count-tokens.mjs +158 -0
  94. package/scripts/oh-doctor.ps1 +342 -0
  95. package/harness/codex/CONSTITUTION.md +0 -70
  96. package/harness/codex/ROUTING.md +0 -127
  97. package/harness/instructions/RUNTIME.md +0 -55
  98. package/harness/skills/oh-caveman/SKILL.md +0 -33
  99. package/lib/logger.ts +0 -69
@@ -1,118 +1,26 @@
1
1
  ---
2
2
  name: oh-manifest
3
- description: "Full build loop: plan build verify loop until done or blocker. Orchestrates oh-planner + oh-builder with auto-decisions."
3
+ description: "Use when running a complete implementation pipeline from plan through verification. Orchestrates oh-planner + oh-builder with auto-decisions."
4
4
  tier: 4
5
- benefits-from: [oh-planner, oh-builder, oh-expert]
6
- triggers:
7
- - "manifest"
8
- - "full build"
9
- - "build loop"
10
- - "build until done"
11
- - "orchestrate"
12
- - "pipeline"
13
- - "run the plan"
5
+ route:
6
+ pass: oh-planner
7
+ fail: oh-expert
8
+ blocker: surface
14
9
  ---
15
10
 
16
11
  # oh-manifest
17
12
 
18
- Full build orchestration loop. Runs pre-flight checks plannerbuilder → verify → repeat until done or a blocker is surfaced. Uses decision principles to auto-resolve intermediate questions. Only interrupts the user for genuine blockers.
13
+ Full build orchestration loop: pre-flight → planbuild → verify → loop.
19
14
 
20
- ## Pipeline
15
+ ## Steps
21
16
 
22
- ### Phase 0: Pre-Flight
23
-
24
- Before any work begins, ALL of these MUST pass:
25
-
26
- - **Quality baseline** existing tests pass (if any). Capture output for before/after comparison.
27
- - **Rollback path**clean `git stash` or a committed state you can return to.
28
- - **Branch isolation** confirm you are on a working branch, not main/master.
29
- - ☐ **Scope documented** — plan or task description exists and is unambiguous.
30
-
31
- If any check fails → **STOP**. Report which check failed and why. Do not proceed to Phase 1 until the blocker is resolved.
32
-
33
- ### Step 1: Plan
34
- - If `.opencode/plan.md` exists, load and verify it is current
35
- - If not, run `oh-planner` (Mode A, B, or C depending on context)
36
- - Auto-decide minor scope decisions using decision principles
37
- - Surface only: premises that need human judgment, or plan/alternative conflicts
38
-
39
- ### Step 2: Build
40
- - For each phase in plan.md, run `oh-builder` (Mode D: From Plan)
41
- - Implements phases in dependency order
42
- - Parallelizable phases may be delegated to sub-agents
43
- - Auto-decide implementation choices using decision principles
44
-
45
- ### Step 3: Verify
46
- - Check each phase against its verification criteria in plan.md
47
- - Run tests if they exist
48
- - If phase passes: mark complete in plan.md, proceed to next
49
- - If phase fails: diagnose (use oh-expert self-diagnosis), fix, re-verify
50
- - If fix is impossible within scope: surface blocker
51
-
52
- ### Step 4: Loop or Done
53
- - All phases complete and verified → DONE
54
- - Phase failed and cannot be fixed → BLOCKER (surface to user with context)
55
- - Phase passed but new work discovered → add to plan, continue loop
56
-
57
- ## Loop Patterns
58
-
59
- Select a pattern based on the nature of the work:
60
-
61
- | Pattern | Use When | Behavior |
62
- |---------|----------|----------|
63
- | **sequential** | Normal feature work | One phase at a time, verify each before next |
64
- | **continuous-pr** | Multi-step refactors | Each phase is its own PR — commit, push, PR per phase |
65
- | **infinite** | Watch mode, CI repair | Continue until external stop signal or budget exhausted |
66
- | **rfc-dag** | Complex dependency chains | Resolve phase ordering by DAG; parallelize independent branches |
67
-
68
- Default is **sequential**. Switch patterns only when the work structure demands it.
69
-
70
- ## Escalation Triggers
71
-
72
- These conditions cause the loop to **pause** and surface to the user:
73
-
74
- | Trigger | Condition | Action |
75
- |---------|-----------|--------|
76
- | **Stall** | 2 consecutive checkpoints with zero measurable progress | Pause. Report what was attempted, what blocked. |
77
- | **Retry storm** | Same error message 3+ times in the loop | Stop retrying. Surface error with attempted fixes. |
78
- | **Cost drift** | Cumulative changes exceed scope documented in pre-flight | Pause. Show diff between planned and actual scope. |
79
- | **Quality regression** | Verify phase scores lower than pre-flight baseline | Pause. Report degraded metrics. Do not push through. |
80
-
81
- These are not optional suggestions. When a trigger fires, the loop **must** pause and report.
82
-
83
- ## Decision Principles
84
-
85
- Auto-resolve these without asking the user:
86
-
87
- 1. **Completeness over cleverness** — cover more cases
88
- 2. **Boil the lake** — fix blast radius, not symptom
89
- 3. **Pragmatic over perfect** — cleaner option that ships today
90
- 4. **DRY but not premature** — third instance is the time to abstract
91
- 5. **Explicit over implicit** — clear code over magic
92
- 6. **Bias toward action** — when in doubt, make progress
93
-
94
- Surface to user only:
95
- - **Premises** — fundamental assumptions that change the nature of the build
96
- - **Dead end** — all viable paths have significant trade-offs
97
- - **Cross-model disagreement** — two approaches both have strong arguments
98
-
99
- ## Blocker Protocol
100
-
101
- When a blocker is encountered:
102
-
103
- 1. **Describe the blocker** — what was attempted, what failed, why it cannot proceed
104
- 2. **Propose alternatives** — scope reduction, dependency change, architectural shift
105
- 3. **Surface to user** with: `BLOCKER: <description> | Options: <A, B, C>`
106
- 4. **Wait for user decision** before continuing
107
-
108
- ## Anti-patterns
109
- - Skipping pre-flight (every loop needs a baseline and a rollback plan)
110
- - Auto-deciding premises (fundamental assumptions need user input)
111
- - Pushing through blockers (surface immediately, don't try 5 workarounds silently)
112
- - Skipping verification (verify every phase, not just the final result)
113
- - Parallelizing dependent phases (respect the dependency order in plan.md)
114
- - Forgetting to update plan.md with completion status
115
- - Ignoring escalation triggers (stall means pause, not try harder)
17
+ 1. Run pre-flight — verify quality baseline (tests pass), rollback path (clean stash or committed state), branch isolation, scope documented.
18
+ 2. Load or create plan — load existing plan or dispatch oh-planner. Auto-decide minor scope via decision principles.
19
+ 3. Dispatch build run oh-builder for each phase in dependency order. Parallelize independent phases via sub-agents.
20
+ 4. Review output — spec compliance first, then code quality. Never reverse the order.
21
+ 5. Verify check each phase against verification criteria. Tests pass mark complete. Fail diagnose, fix, re-verify.
22
+ 6. Handle implementer statusDONE (proceed), DONE_WITH_CONCERNS (read before proceeding), NEEDS_CONTEXT (provide and re-dispatch), BLOCKED (assess type).
23
+ 7. Loop all done DONE. Phase fails BLOCKER (surface with options). New work → add to plan, continue.
116
24
 
117
25
  ## Routing
118
26
 
@@ -120,4 +28,4 @@ When a blocker is encountered:
120
28
  |---------|-------|
121
29
  | pass | → pipeline continues (planner→builder→gauntlet→ship) |
122
30
  | fail | → oh-expert (diagnose loop failure) |
123
- | blocker | → surface to user with context and options |
31
+ | blocker | → surface with context and options |
@@ -0,0 +1,90 @@
1
+ # oh-plan-review — Deep Reference
2
+
3
+ ## Lens Selection
4
+
5
+ | Keywords | Lens |
6
+ |----------|------|
7
+ | architecture, data model, API, types, modules | Engineering |
8
+ | UI, layout, colors, components, screens | Design |
9
+ | CLI, SDK, dev tool, API, npm package, docs | DX |
10
+ | product, strategy, scope, roadmap, business | Strategy |
11
+
12
+ ## Engineering Lens
13
+
14
+ ### Scope Challenge (before reviewing)
15
+ 1. Does existing code already solve any sub-problem?
16
+ 2. Minimum changes to achieve goal?
17
+ 3. 8+ files or 2+ new classes/services → smell. Challenge.
18
+ 4. Does framework have built-in for each pattern?
19
+ 5. AI completeness is cheap — recommend full over shortcuts.
20
+ 6. New artifact types need build/publish pipelines.
21
+
22
+ ### Architecture Review
23
+ One section at a time: Architecture → Code Quality → Tests → Performance. Max 8 issues per section. Discuss each via AskUserQuestion. Anti-skip: evaluate every section; say "No issues found" if clean.
24
+
25
+ ### Cognitive patterns (internalize)
26
+ - State diagnosis (Larson) — falling behind, treading, repaying debt, innovating?
27
+ - Blast radius — worst case = how many systems?
28
+ - Boring by default (McKinley) — proven tech unless you have innovation tokens
29
+ - Reversibility — make wrong answers cheap. Feature flags, incremental rollouts.
30
+ - Essential vs accidental complexity (Brooks) — real problem or self-inflicted?
31
+
32
+ ## Design Lens
33
+
34
+ - Empty states — warmth, action, context when no data
35
+ - Visual hierarchy — what's seen first, second, third?
36
+ - Edge cases — long names, zero results, error, first-time vs power
37
+ - AI slop — generic card grids, 3-column features, hero sections? Flag.
38
+ - Responsive — every viewport intentional, not just stack-on-mobile
39
+ - A11y — keyboard, screen readers, contrast, touch targets
40
+
41
+ **Rule:** Specificity over vibes. "Clean, modern" is not a decision. Name the font, spacing, interaction, motion.
42
+
43
+ ## DX Lens
44
+
45
+ **Evaluate:** Time to Hello World (< 2 min target). Error quality (problem + cause + fix). First 5 min friction. Progressive disclosure — simple case is prod-ready. Pit of Success — right thing is easy, wrong thing is hard.
46
+
47
+ **Modes:**
48
+ - **Expansion** — competitive advantage. Benchmark competitors.
49
+ - **Polish** — bulletproof every touchpoint.
50
+ - **Triage** — critical gaps only. Minimum viable DX.
51
+
52
+ ## Strategy Lens
53
+
54
+ ### Scope Modes
55
+ - **Expansion** — "10x better for 2x effort?" Present as AskUserQuestion.
56
+ - **Selective** — Surface cherry-pickable expansions. Neutral posture.
57
+ - **Hold** — Bulletproof. Catch every failure. No silent reduction.
58
+ - **Reduction** — Ruthless cut to MVP.
59
+
60
+ ### Patterns (internalize)
61
+ - One-way vs two-way doors (Bezos) — most are two-way; move fast
62
+ - Inversion (Munger) — "how do we win?" + "what makes us fail?"
63
+ - Focus as subtraction (Jobs) — fewer things, better
64
+ - Proxy skepticism (Bezos) — metrics serving users or self-referential?
65
+ - Temporal depth — 5-10 year arcs. Regret minimization.
66
+
67
+ ### Prime Directives
68
+ - Zero silent failures. Every failure mode visible.
69
+ - Every error named — exception class, trigger, catch, message.
70
+ - Data flows have shadow paths: nil, empty, upstream error. Trace all four.
71
+ - Observability is first-class — new dashboards/alerts are deliverables.
72
+ - Everything deferred written down or it doesn't exist.
73
+ - You have permission to say "scrap it and do this instead."
74
+
75
+ ## Rules
76
+
77
+ - **Interactive only.** One section at a time via AskUserQuestion.
78
+ - **Anti-skip.** Every section evaluated. Zero findings → say so.
79
+ - **Commit to lens.** Once scope agreed, don't re-argue earlier decisions.
80
+
81
+ ## Output
82
+
83
+ Plan file (`~/.local/share/opencode/openhermes/plans/<project-name>-plan-<nnn>.md`) updated with findings and decisions.
84
+
85
+ ## Anti-patterns
86
+
87
+ - Using the wrong lens for the question
88
+ - Reviewing without reading the full plan first
89
+ - Merging concerns across lenses
90
+ - Skipping the interactive walkthrough
@@ -1,128 +1,33 @@
1
1
  ---
2
2
  name: oh-plan-review
3
- description: "Multi-lens plan review: 4 perspectives in one skill. Choose Engineering (architecture/scope), Design (UX/interaction), DX (API/CLI ergonomics), or Strategy (product/CEO). Interactive — walks through findings one section at a time."
3
+ description: "Use when a plan needs multi-perspective review before execution. Choose Engineering, Design, DX, or Strategy lens — walks through findings one section at a time."
4
4
  tier: 3
5
- benefits-from: [oh-planner, oh-expert]
6
- triggers:
7
- - "plan review"
8
- - "review the plan"
9
- - "architecture review"
10
- - "design review"
11
- - "ux review"
12
- - "dx review"
13
- - "strategy review"
14
- - "eng review"
15
- - "ceo review"
5
+ route:
6
+ pass:
7
+ - oh-grill
8
+ - oh-manifest
9
+ fail: oh-planner
10
+ blocker: surface
16
11
  ---
17
12
 
18
13
  # oh-plan-review
19
14
 
20
- Four review lenses in one skill. Pick the lens that fits the plan's scope or run multiple lenses in sequence for thorough coverage.
15
+ Four-lens plan review. Interactivewalk findings one section at a time.
21
16
 
22
- **Interactive.** Walk findings one section at a time with opinionated recommendations and AskUserQuestion gates. Never dump all findings at once.
17
+ ## Steps
23
18
 
24
- **Read-only.** No code changes. The output is a better plan, not a document about the plan.
25
-
26
- ## Lens Selection
27
-
28
- Ask the user which lens fits, or auto-detect from plan content:
29
-
30
- | Trigger keywords | Recommended lens |
31
- |---|---|
32
- | architecture, data model, API design, file structure, types, modules | Engineering |
33
- | UI, layout, colors, components, screens, mockups, user interface | Design |
34
- | CLI, SDK, developer tool, API, npm package, documentation, onboarding | DX |
35
- | product, strategy, scope, roadmap, competition, business model | Strategy |
36
-
37
- ### Engineering Lens
38
- Scope challenge, architecture review, cognitive patterns for eng managers.
39
-
40
- **Scope Challenge** — Before reviewing anything:
41
- 1. What existing code already partially solves each sub-problem?
42
- 2. What is the minimum set of changes that achieves the stated goal?
43
- 3. Complexity check: 8+ files or 2+ new classes/services → smell. Challenge it.
44
- 4. Search check: does the runtime/framework have built-in support for each pattern the plan introduces?
45
- 5. Completeness check: with AI-assisted coding, the cost of completeness is 10-100x cheaper. Recommend complete lakes over shortcuts.
46
- 6. Distribution check: new artifact types need build/publish pipelines.
47
-
48
- **Architecture Review** — Walk through one section at a time: Architecture → Code Quality → Tests → Performance. Max 8 top issues per section. Use AskUserQuestion to discuss each finding.
49
-
50
- **Anti-skip rule:** Never condense or skip a section. If a section has zero findings, say so — but evaluate it.
51
-
52
- **Cognitive patterns** (internalize, don't enumerate):
53
- - State diagnosis (Larson) — Is your team falling behind, treading water, repaying debt, or innovating?
54
- - Blast radius instinct — What's the worst case and how many systems does it affect?
55
- - Boring by default (McKinley) — Proven technology unless you have innovation tokens to spend.
56
- - Reversibility preference — Feature flags, incremental rollouts. Make wrong answers cheap.
57
- - Essential vs accidental complexity (Brooks) — Is this solving a real problem or one we created?
58
-
59
- ### Design Lens
60
- UX review, interaction state coverage, AI slop detection.
61
-
62
- **Evaluate:**
63
- - Empty states — every screen without data needs warmth, action, context
64
- - Visual hierarchy — what does the user see first, second, third?
65
- - Edge cases — 47-char names, zero results, error states, first-time vs power user
66
- - AI slop — generic card grids, hero sections, 3-column features? Flag them.
67
- - Responsive — every viewport gets intentional design, not just stack-on-mobile
68
- - Accessibility — keyboard nav, screen readers, contrast, touch targets
69
-
70
- **Principle:** Specificity over vibes. "Clean, modern UI" is not a design decision. Name the font, spacing scale, interaction pattern, and motion.
71
-
72
- ### DX Lens
73
- Developer experience audit for APIs, CLIs, SDKs, libraries, platforms.
74
-
75
- **Evaluate:**
76
- - Time to Hello World — target < 2 minutes. Every extra minute drops adoption 20-30%.
77
- - Error quality — every error = problem + cause + fix. No "something went wrong."
78
- - First five minutes — one click to start. No credit card. No demo call.
79
- - Progressive disclosure — simple case is production-ready. Complex case uses the same API.
80
- - Pit of Success — make the right thing easy, the wrong thing hard.
81
-
82
- **Three modes:**
83
- - **DX Expansion** — competitive advantage. Design magical moments. Benchmark competitors.
84
- - **DX Polish** — bulletproof every touchpoint. No friction, no uncertainty.
85
- - **DX Triage** — critical gaps only. Minimum viable DX investment.
86
-
87
- ### Strategy Lens
88
- Product/CEO review with 4 scope modes.
89
-
90
- **Select mode:**
91
- - **Scope Expansion** — "What would make this 10x better for 2x the effort?" Push scope up. Present each expansion as an AskUserQuestion. The user opts in or out.
92
- - **Selective Expansion** — Hold the baseline. Surface expansion opportunities for cherry-picking. Neutral recommendation posture.
93
- - **Hold Scope** — Make it bulletproof. Catch every failure mode. No silent reduction or expansion.
94
- - **Scope Reduction** — Find the minimum viable version. Be ruthless. Cut everything non-essential.
95
-
96
- **Cognitive patterns** (internalize):
97
- - Classification instinct (Bezos) — One-way vs two-way doors. Most things are two-way; move fast.
98
- - Inversion reflex (Munger) — For every "how do we win?" also ask "what would make us fail?"
99
- - Focus as subtraction (Jobs) — Default: do fewer things, better. 350 products → 10.
100
- - Proxy skepticism (Bezos) — Are our metrics still serving users or self-referential?
101
- - Temporal depth — Think in 5-10 year arcs. Apply regret minimization for major bets.
102
-
103
- **Prime directives:**
104
- - Zero silent failures. Every failure mode must be visible.
105
- - Every error has a name. Don't say "handle errors." Name the exception class, trigger, catch, user-facing message.
106
- - Data flows have shadow paths: nil, empty, upstream error. Trace all four.
107
- - Observability is scope, not afterthought. New dashboards and alerts are first-class deliverables.
108
- - Everything deferred must be written down. TODOS.md or it doesn't exist.
109
- - You have permission to say "scrap it and do this instead."
110
-
111
- ## Output
112
-
113
- After each lens, the plan file (`/.opencode/plan.md`) is updated with findings and decisions. The user reviews and accepts changes interactively.
114
-
115
- ## Rules
116
-
117
- - **Interactive only.** One section at a time. Use AskUserQuestion to discuss findings before writing.
118
- - **Anti-skip:** Every section must be evaluated. If zero findings, say "No issues found" and move on.
119
- - **Anti-shortcut:** The plan file is the OUTPUT of the interactive review, not a substitute for it. Findings go through AskUserQuestion before writing.
120
- - **Commit to the chosen lens.** Once scope is agreed, don't re-argue earlier decisions in later sections.
19
+ 1. Select lens match keywords to lens using routing table (architecture → Engineering, UI Design, CLI DX, product → Strategy).
20
+ 2. Read the full plan before reviewing. Understand scope before evaluating.
21
+ 3. Walk through sections one at a time — interactive via AskUserQuestion.
22
+ 4. Apply lens-specific criteria — scope challenge, architecture review, cognitive patterns for Engineering; empty states, hierarchy, a11y for Design; Hello World time, error quality for DX; scope modes, prime directives for Strategy.
23
+ 5. Surface findings per section max 8 issues per section. Zero findings → say so. Anti-skip: evaluate every section.
24
+ 6. Update plan file — record findings and decisions in canonical plan storage.
25
+ 7. Route result pass to execution or stress-testing, fail back to revision.
121
26
 
122
27
  ## Routing
123
28
 
124
29
  | Outcome | Route |
125
30
  |---------|-------|
126
- | pass | → oh-grill (if concerns remain) or oh-manifest (execute plan) |
127
- | fail | → oh-planner (revise plan based on findings) |
128
- | blocker | → surface to user |
31
+ | pass | → oh-grill (if concerns remain) or oh-manifest (execute) |
32
+ | fail | → oh-planner (revise) |
33
+ | blocker | → surface |
@@ -0,0 +1,172 @@
1
+ # oh-planner — Deep Reference
2
+
3
+ ## Mode A: Brainstorm (fuzzy idea)
4
+
5
+ Use when the concept is vague ("what if", "I have an idea") and needs shaping into something concrete.
6
+
7
+ ### Process
8
+
9
+ Ask these 6 clarifying questions in order:
10
+
11
+ 1. **Who specifically needs this?** — Identify the exact user or stakeholder. Not "developers" but "frontend devs doing state management in React 19".
12
+ 2. **What do they do today?** — Current workflow, tooling, and pain points. What's the manual/partial solution?
13
+ 3. **What's the one concrete thing they can't do?** — The single capability gap. If they had one new thing, what would it be?
14
+ 4. **What's the smallest useful version?** — Minimum scope that delivers real value. Strip everything non-essential.
15
+ 5. **What signals success?** — Observable, measurable outcomes. Not "better DX" but "setup drops from 15min to 2min".
16
+ 6. **Does this compound or plateau?** — Will this unlock further improvements (compound) or is it a one-time fix (plateau)? Compound features get deeper investment.
17
+
18
+ ### Output
19
+ Structured design doc covering: user definition, current workflow, capability gap, minimum viable scope, success metrics, growth trajectory.
20
+
21
+ ## Mode B: Architecture Analysis (existing codebase)
22
+
23
+ Use when the codebase feels messy or you need to understand the surface before planning.
24
+
25
+ ### Process
26
+ 1. **Read domain** — Load `CONTEXT.md` (or equivalent domain doc). Understand the language, concepts, and shared terms before touching code. Domain-blind analysis produces wrong recommendations.
27
+ 2. **Map the surface** — Identify module boundaries and responsibilities, dependency direction, public API surfaces vs internal implementation, configuration and extension points.
28
+ 3. **Find deepening opportunities** — Look for duplication, over-coupling, grown-beyond-purpose files, dead code or unused abstractions, inconsistent patterns.
29
+ 4. **Rank by impact** — For each finding, assess effort, value, dependencies, risk.
30
+
31
+ ### Output
32
+ Ranked list of refactoring candidates with effort/value/risk assessment. Each candidate includes: location, problem description, recommended change, and estimated effort.
33
+
34
+ ## Mode C: Structured Plan (non-trivial feature)
35
+
36
+ Use when requirements exist and need a formal plan document to execute from.
37
+
38
+ ### 1. Scope Challenge
39
+ Before writing anything, challenge the scope:
40
+ - **What existing code partially solves it?** — Don't build from scratch if 60% exists.
41
+ - **Minimum changes?** — What's the smallest diff that ships the feature?
42
+ - **Complexity check:** 8+ files changed is a smell. Flag it. Reconsider the approach.
43
+ - **Search check:** For each architecture pattern in your approach, search `{framework} {pattern} built-in`. Flag custom solutions where framework built-ins exist.
44
+ - **Completeness check:** AI-assisted completeness is 10-100x cheaper than human teams. Default to full coverage, not minimal.
45
+ - **Distribution check:** New artifact types may need pipelines (build, test, deploy, publish). Include them.
46
+
47
+ ### 2. Strategy Review
48
+ Challenge premises: Is this the right problem to solve? Identify scope decisions explicitly (what's in, what's out, why). Consider 10x alternatives. Who owns the outcome? Who reviews?
49
+
50
+ ### 3. Architecture Review
51
+ Analyze: data flow, component boundaries, API surface, state model.
52
+
53
+ ### 4. Edge Case Analysis
54
+ Cover: error states, concurrency, failure modes, security.
55
+
56
+ ### 5. Dependency Mapping
57
+ Map what blocks what: identify parallelizable work streams, note external dependencies, order phases so nothing blocks on unfinished upstream work.
58
+
59
+ ### 6. Write Plan
60
+ Produce a structured artifact with: phases, dependencies, verification steps per phase, and exit criteria.
61
+
62
+ ### 7. Self-Review Checklist
63
+ 1. **Spec coverage** — Skim each requirement from the original request. Can you point to a task that implements it? List any gaps and add missing tasks.
64
+ 2. **Placeholder scan** — Search the plan for banned patterns: "TBD", "TODO", "implement later", "handle edge cases", "fill in details". Replace every instance with concrete content.
65
+ 3. **Type consistency** — Do types, method signatures, and property names match across tasks? A function called `clearLayers()` in Task 3 but `clearFullLayers()` in Task 7 is a bug. Fix cross-references.
66
+
67
+ Fix any issues inline — no need to re-review, just fix and move on. If a spec requirement has no task, add the task.
68
+
69
+ ## Mode D: Autoplan (existing plan needs full review)
70
+
71
+ Use when a plan exists and needs comprehensive automated review. Auto-decides 90% of intermediate questions.
72
+
73
+ ### Phase Order
74
+ Runs sequentially: **Strategy → Architecture → Design → Engineering → DX**. Each phase must complete before the next begins. No jumping ahead.
75
+
76
+ ### Auto-Resolution Principles
77
+ | # | Principle | Meaning |
78
+ |---|-----------|---------|
79
+ | 1 | **Completeness over cleverness** | Cover more cases. Clever shortcuts miss edge cases. |
80
+ | 2 | **Boil the lake** | Fix blast radius, not symptom. If a module is misdesigned, refactor it — don't patch around it. |
81
+ | 3 | **Pragmatic over perfect** | Ships today wins. Perfect designs that never ship are worthless. |
82
+ | 4 | **DRY but not premature** | Reuse what exists. But don't abstract until the 3rd concrete instance appears. |
83
+ | 5 | **Explicit over implicit** | Clear code over magic. Magic is fun to write, terrible to debug. |
84
+ | 6 | **Bias toward action** | When in doubt, make progress. Analysis paralysis is a decision too. |
85
+
86
+ ### Never Auto-Decide
87
+ - **Premises** — Core assumptions about what to build. These need human judgment.
88
+ - **Close calls** — Decisions where both options have strong, valid arguments. Surface for discussion.
89
+
90
+ ## Plan Artifact Format
91
+
92
+ Every plan written by oh-planner uses this canonical format.
93
+
94
+ ### Storage
95
+ Canonical path: `~/.local/share/opencode/openhermes/plans/<project>-plan-<nnn>.md`
96
+
97
+ ### Template
98
+ ```markdown
99
+ # PLAN: <project>
100
+
101
+ Plan ID: <project>-plan-<nnn>
102
+ Project: <project>
103
+ Status: active | in-progress | blocked | complete | abandoned
104
+ Created: <ts> | Updated: <ts>
105
+ Project Path: <absolute-path>
106
+ Plan Path: <canonical-path>/<project>-plan-<nnn>.md
107
+ Objective: <short>
108
+
109
+ ## Current State
110
+ — What exists now, what phase we're in.
111
+
112
+ ## Assumptions
113
+ — Decisions we're making without full information.
114
+
115
+ ## Tasks
116
+ - [ ] Task 1
117
+ - [ ] Subtask 1.1
118
+
119
+ ## Active Task
120
+ — What's being worked on right now.
121
+
122
+ ## Subagents
123
+ | Agent | Purpose | Status | Findings |
124
+
125
+ ## Completed
126
+ — Finished tasks with dates.
127
+
128
+ ## Work Log
129
+ — Running log of decisions and progress.
130
+
131
+ ## Blockers
132
+ — What's stopping progress.
133
+
134
+ ## Validation
135
+ - [ ] Static checks
136
+ - [ ] Unit tests
137
+ - [ ] Manual verification
138
+
139
+ ## Decisions
140
+ — Key decisions and their rationale.
141
+
142
+ ## Notes
143
+ — Miscellaneous context.
144
+ ```
145
+
146
+ ### Task Rules
147
+ - **Bite-Sized Granularity** — Each step is one action, 2-5 minutes.
148
+ - **No Placeholder Rule** — Banned: TBD, TODO, "implement later", "fill in details", "add appropriate error handling", "add validation", "handle edge cases", "write tests for the above" (without actual test code), "Similar to Task N".
149
+ - **Complete Code in Every Step** — If a step changes code, show the complete code inline. Use exact file paths always.
150
+ - **Expected Output** — Every test step must include the exact command to run and the expected output.
151
+
152
+ ### Execution Handoff
153
+ After saving a plan, offer the user an execution choice:
154
+ > **Plan saved. Two execution options:**
155
+ > **1. Subagent-Driven (recommended)** — I dispatch a fresh subagent per task with two-stage review between tasks for fast iteration.
156
+ > **2. Inline Execution** — Execute tasks in this session with batch execution and checkpoints.
157
+
158
+ ### Rules
159
+ - **Self-contained** — Tasks, Completed, Subagents, and Work Log live in this one file. No separate `todo.md` or `work-log.md`.
160
+ - **Status tracks lifecycle** — Only use: `active`, `in-progress`, `blocked`, `complete`, `abandoned`.
161
+ - **Validation lives with the plan** — Each plan defines its own verification criteria.
162
+
163
+ ## Anti-patterns
164
+ - Skipping strategy review for complex features (architecture mistakes compound)
165
+ - Wrong granularity — too vague to execute or too detailed to read
166
+ - Re-opening decided debates ("what if we rewrite in Rust?")
167
+ - Perfect > shipped (progress > polish)
168
+ - Not flagging taste decisions to user
169
+ - Big bang rewrites — plan increments, not overhauls
170
+ - Skipping the user-approval gate — implementing before the user has reviewed and approved the design document
171
+ - Placeholders in plan tasks (TBD, TODO, "implement later" — makes plan unexecutable)
172
+ - Missing expected output in test steps