openhermes 4.3.0 → 4.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/CONTEXT.md +9 -0
  2. package/README.md +26 -15
  3. package/bootstrap.ts +161 -124
  4. package/harness/agents/oh-browser.md +97 -0
  5. package/harness/agents/oh-builder.md +78 -0
  6. package/harness/agents/oh-facade.md +75 -0
  7. package/harness/agents/oh-fusion.md +45 -0
  8. package/harness/agents/oh-gauntlet.md +71 -0
  9. package/harness/agents/oh-grill.md +71 -0
  10. package/harness/agents/oh-investigate.md +60 -0
  11. package/harness/agents/oh-manifest.md +95 -0
  12. package/harness/agents/oh-plan-review.md +40 -0
  13. package/harness/agents/oh-planner.md +50 -0
  14. package/harness/agents/oh-refactor.md +37 -0
  15. package/harness/agents/oh-retro.md +46 -0
  16. package/harness/agents/oh-review.md +85 -0
  17. package/harness/agents/oh-security.md +83 -0
  18. package/harness/agents/oh-ship.md +76 -0
  19. package/harness/agents/oh-skill-craft.md +38 -0
  20. package/harness/agents/openhermes.md +107 -53
  21. package/harness/codex/AUTOPILOT.md +143 -91
  22. package/harness/codex/CHARTER.md +81 -0
  23. package/harness/commands/oh-doctor.md +193 -14
  24. package/harness/instructions/SHELL.md +76 -0
  25. package/harness/skills/oh-ascii/DEEP.md +292 -0
  26. package/harness/skills/oh-ascii/SKILL.md +31 -0
  27. package/harness/skills/oh-ascii/scripts/check_ascii_alignment.py +596 -0
  28. package/harness/skills/oh-browser/DEEP.md +54 -0
  29. package/harness/skills/oh-browser/SKILL.md +30 -0
  30. package/harness/skills/oh-builder/DEEP.md +63 -0
  31. package/harness/skills/oh-builder/SKILL.md +12 -90
  32. package/harness/skills/oh-expert/DEEP.md +85 -0
  33. package/harness/skills/oh-expert/SKILL.md +13 -106
  34. package/harness/skills/oh-facade/DEEP.md +182 -0
  35. package/harness/skills/oh-facade/SKILL.md +15 -279
  36. package/harness/skills/oh-freeze/DEEP.md +18 -0
  37. package/harness/skills/oh-freeze/SKILL.md +10 -19
  38. package/harness/skills/oh-full-output/DEEP.md +25 -0
  39. package/harness/skills/oh-full-output/SKILL.md +12 -65
  40. package/harness/skills/oh-fusion/DEEP.md +120 -0
  41. package/harness/skills/oh-fusion/SKILL.md +17 -295
  42. package/harness/skills/oh-gauntlet/DEEP.md +77 -0
  43. package/harness/skills/oh-gauntlet/SKILL.md +13 -105
  44. package/harness/skills/oh-grill/DEEP.md +51 -0
  45. package/harness/skills/oh-grill/SKILL.md +12 -63
  46. package/harness/skills/oh-guard/DEEP.md +19 -0
  47. package/harness/skills/oh-guard/SKILL.md +10 -24
  48. package/harness/skills/oh-handoff/DEEP.md +48 -0
  49. package/harness/skills/oh-handoff/SKILL.md +13 -23
  50. package/harness/skills/oh-health/DEEP.md +74 -0
  51. package/harness/skills/oh-health/SKILL.md +13 -76
  52. package/harness/skills/oh-init/DEEP.md +85 -0
  53. package/harness/skills/oh-init/SKILL.md +13 -127
  54. package/harness/skills/oh-investigate/DEEP.md +171 -0
  55. package/harness/skills/oh-investigate/SKILL.md +13 -66
  56. package/harness/skills/oh-issue/DEEP.md +21 -0
  57. package/harness/skills/oh-issue/SKILL.md +11 -27
  58. package/harness/skills/oh-learn/DEEP.md +44 -0
  59. package/harness/skills/oh-learn/SKILL.md +12 -83
  60. package/harness/skills/oh-manifest/DEEP.md +92 -0
  61. package/harness/skills/oh-manifest/SKILL.md +11 -108
  62. package/harness/skills/oh-plan-review/DEEP.md +90 -0
  63. package/harness/skills/oh-plan-review/SKILL.md +13 -115
  64. package/harness/skills/oh-planner/DEEP.md +172 -0
  65. package/harness/skills/oh-planner/SKILL.md +12 -149
  66. package/harness/skills/oh-prd/DEEP.md +45 -0
  67. package/harness/skills/oh-prd/SKILL.md +10 -26
  68. package/harness/skills/oh-refactor/DEEP.md +122 -0
  69. package/harness/skills/oh-refactor/SKILL.md +17 -410
  70. package/harness/skills/oh-retro/DEEP.md +26 -0
  71. package/harness/skills/oh-retro/SKILL.md +12 -24
  72. package/harness/skills/oh-review/DEEP.md +87 -0
  73. package/harness/skills/oh-review/SKILL.md +11 -97
  74. package/harness/skills/oh-security/DEEP.md +83 -0
  75. package/harness/skills/oh-security/SKILL.md +14 -96
  76. package/harness/skills/oh-ship/DEEP.md +141 -0
  77. package/harness/skills/oh-ship/SKILL.md +13 -31
  78. package/harness/skills/oh-skill-craft/DEEP.md +369 -0
  79. package/harness/skills/oh-skill-craft/SKILL.md +17 -178
  80. package/harness/skills/oh-skills-link/DEEP.md +16 -0
  81. package/harness/skills/oh-skills-link/SKILL.md +10 -20
  82. package/harness/skills/oh-skills-list/DEEP.md +20 -0
  83. package/harness/skills/oh-skills-list/SKILL.md +9 -22
  84. package/harness/skills/oh-triage/DEEP.md +23 -0
  85. package/harness/skills/oh-triage/SKILL.md +8 -24
  86. package/harness/skills/oh-worktree/DEEP.md +169 -0
  87. package/harness/skills/oh-worktree/SKILL.md +32 -0
  88. package/lib/harness-resolver.ts +8 -10
  89. package/package.json +5 -3
  90. package/scripts/count-tokens.mjs +158 -0
  91. package/scripts/oh-doctor.ps1 +342 -0
  92. package/harness/codex/CONSTITUTION.md +0 -73
  93. package/harness/codex/ROUTING.md +0 -92
  94. package/harness/instructions/RUNTIME.md +0 -30
  95. package/harness/skills/oh-caveman/SKILL.md +0 -42
  96. package/lib/logger.ts +0 -75
@@ -0,0 +1,92 @@
1
+ # oh-manifest — Deep Reference
2
+
3
+ ## Phase 0: Pre-Flight
4
+
5
+ ALL must pass before any work:
6
+
7
+ - ☐ **Quality baseline** — existing tests pass. Capture before/after.
8
+ - ☐ **Rollback path** — clean `git stash` or committed state to return to.
9
+ - ☐ **Branch isolation** — working branch, not main/master.
10
+ - ☐ **Scope documented** — plan exists and unambiguous.
11
+
12
+ Any check fails → STOP. Report which. Do not proceed until resolved.
13
+
14
+ **Continuous execution:** Execute all tasks without pausing for progress check-ins between them. Only stop for BLOCKED, genuine ambiguity, or all tasks complete.
15
+
16
+ ## Pipeline
17
+
18
+ ### Step 1: Plan
19
+ If plan exists, load. If not, run oh-planner. Auto-decide minor scope via decision principles. Surface only: premises needing human judgment, or plan/alternative conflicts.
20
+
21
+ ### Step 2: Build
22
+ Run oh-builder for each plan phase in dependency order. Parallelizable phases → sub-agents. Auto-decide implementation choices.
23
+
24
+ **Two-stage review (in order — never reverse):**
25
+ 1. **Spec compliance first** — Does the output match the plan/spec requirements? Quote the spec. No scope creep, no missing requirements.
26
+ 2. **Code quality second** — Only after spec compliance is ✅. Architecture, readability, test quality, edge cases.
27
+
28
+ **Implementer status protocol** — Implementers report one of:
29
+
30
+ | Status | Action |
31
+ |--------|--------|
32
+ | **DONE** | Proceed to spec review |
33
+ | **DONE_WITH_CONCERNS** | Read concerns before proceeding |
34
+ | **NEEDS_CONTEXT** | Provide context, re-dispatch |
35
+ | **BLOCKED** | Assess: context problem? capability gap? task too large? plan wrong? |
36
+
37
+ Never ignore BLOCKED or retry same approach without changes.
38
+
39
+ ### Step 3: Verify
40
+ Check each phase against verification criteria. Tests pass → mark complete. Fail → diagnose (oh-expert), fix, re-verify.
41
+
42
+ ### Step 4: Loop
43
+ All done → DONE. Phase fails → BLOCKER (surface). New work discovered → add to plan, continue.
44
+
45
+ ## Loop Patterns
46
+
47
+ | Pattern | Use | Behavior |
48
+ |---------|-----|----------|
49
+ | sequential | Normal features | One phase at a time, verify each |
50
+ | continuous-pr | Multi-step refactors | Per-phase PRs |
51
+ | infinite | Watch mode, CI repair | Continue until stop signal |
52
+ | rfc-dag | Complex deps | DAG resolution, parallelize independent branches |
53
+
54
+ Default: sequential.
55
+
56
+ ## Escalation Triggers
57
+
58
+ | Trigger | Condition | Action |
59
+ |---------|-----------|--------|
60
+ | Stall | 2 consecutive zero-progress checkpoints | Pause, report attempts |
61
+ | Retry storm | Same error 5+ times | Stop, surface with fixes tried |
62
+ | Cost drift | Cumulative changes exceed scope | Pause, show diff |
63
+ | Quality regression | Verify scores lower than baseline | Pause, report |
64
+
65
+ These are not optional. When triggered, loop **must** pause.
66
+
67
+ ## Decision Principles
68
+
69
+ Auto-resolve: completeness > cleverness, boil the lake, pragmatic > perfect, DRY at 3rd instance, explicit > implicit, bias toward action.
70
+
71
+ Surface only: premises, dead ends, cross-model disagreement.
72
+
73
+ **Model selection guidance:**
74
+ - Mechanical tasks (isolated, 1-2 files, clear spec) → fast cheap model
75
+ - Integration tasks (multi-file, coordination) → standard model
76
+ - Architecture/design/review tasks → most capable model
77
+
78
+ ## Blocker Protocol
79
+
80
+ `BLOCKER: <what> | Options: A, B, C` → wait for decision.
81
+
82
+ ## Anti-patterns
83
+ - Skipping pre-flight
84
+ - Auto-deciding premises
85
+ - Pushing through blockers without surfacing
86
+ - Skipping verification
87
+ - Parallelizing dependent phases
88
+ - Not updating plan file
89
+ - Ignoring escalation triggers
90
+ - Starting code quality review before spec compliance is ✅
91
+ - Ignoring implementer BLOCKED status and retrying with same approach
92
+ - Pausing between tasks for progress updates (breaks flow)
@@ -1,17 +1,7 @@
1
1
  ---
2
2
  name: oh-manifest
3
- description: "Full build loop: plan build verify loop until done or blocker. Orchestrates oh-planner + oh-builder with auto-decisions."
3
+ description: "Use when running a complete implementation pipeline from plan through verification. Orchestrates oh-planner + oh-builder with auto-decisions."
4
4
  tier: 4
5
- benefits-from: [oh-planner, oh-builder, oh-expert]
6
- triggers:
7
- - "run the full build"
8
- - "full build pipeline"
9
- - "build loop"
10
- - "build until done"
11
- - "orchestrate this build"
12
- - "pipeline from plan"
13
- - "run the plan"
14
- - "manifest this"
15
5
  route:
16
6
  pass: oh-planner
17
7
  fail: oh-expert
@@ -20,104 +10,17 @@ route:
20
10
 
21
11
  # oh-manifest
22
12
 
23
- Full build orchestration loop. Runs pre-flight checks plannerbuilder → verify → repeat until done or a blocker is surfaced. Uses decision principles to auto-resolve intermediate questions. Only interrupts the user for genuine blockers.
13
+ Full build orchestration loop: pre-flight → planbuild → verify → loop.
24
14
 
25
- ## Pipeline
15
+ ## Steps
26
16
 
27
- ### Phase 0: Pre-Flight
28
-
29
- Before any work begins, ALL of these MUST pass:
30
-
31
- - **Quality baseline** existing tests pass (if any). Capture output for before/after comparison.
32
- - **Rollback path**clean `git stash` or a committed state you can return to.
33
- - **Branch isolation** confirm you are on a working branch, not main/master.
34
- - ☐ **Scope documented** — plan or task description exists and is unambiguous.
35
-
36
- If any check fails → **STOP**. Report which check failed and why. Do not proceed to Phase 1 until the blocker is resolved.
37
-
38
- ### Step 1: Plan
39
- - If a plan file (`~/.local/share/opencode/openhermes/plans/<project-name>-plan-<nnn>.md`) exists, load and verify it is current
40
- - If not, run `oh-planner` (Mode A, B, or C depending on context)
41
- - Auto-decide minor scope decisions using decision principles
42
- - Surface only: premises that need human judgment, or plan/alternative conflicts
43
-
44
- ### Step 2: Build
45
- - For each phase in the plan file, run `oh-builder` (Mode D: From Plan)
46
- - Implements phases in dependency order
47
- - Parallelizable phases may be delegated to sub-agents
48
- - Auto-decide implementation choices using decision principles
49
-
50
- ### Step 3: Verify
51
- - Check each phase against its verification criteria in the plan file
52
- - Run tests if they exist
53
- - If phase passes: mark complete in plan file, proceed to next
54
- - If phase fails: diagnose (use oh-expert self-diagnosis), fix, re-verify
55
- - If fix is impossible within scope: surface blocker
56
-
57
- ### Step 4: Loop or Done
58
- - All phases complete and verified → DONE
59
- - Phase failed and cannot be fixed → BLOCKER (surface to user with context)
60
- - Phase passed but new work discovered → add to plan, continue loop
61
-
62
- ## Loop Patterns
63
-
64
- Select a pattern based on the nature of the work:
65
-
66
- | Pattern | Use When | Behavior |
67
- |---------|----------|----------|
68
- | **sequential** | Normal feature work | One phase at a time, verify each before next |
69
- | **continuous-pr** | Multi-step refactors | Each phase is its own PR — commit, push, PR per phase |
70
- | **infinite** | Watch mode, CI repair | Continue until external stop signal or budget exhausted |
71
- | **rfc-dag** | Complex dependency chains | Resolve phase ordering by DAG; parallelize independent branches |
72
-
73
- Default is **sequential**. Switch patterns only when the work structure demands it.
74
-
75
- ## Escalation Triggers
76
-
77
- These conditions cause the loop to **pause** and surface to the user:
78
-
79
- | Trigger | Condition | Action |
80
- |---------|-----------|--------|
81
- | **Stall** | 2 consecutive checkpoints with zero measurable progress | Pause. Report what was attempted, what blocked. |
82
- | **Retry storm** | Same error message 3+ times in the loop | Stop retrying. Surface error with attempted fixes. |
83
- | **Cost drift** | Cumulative changes exceed scope documented in pre-flight | Pause. Show diff between planned and actual scope. |
84
- | **Quality regression** | Verify phase scores lower than pre-flight baseline | Pause. Report degraded metrics. Do not push through. |
85
-
86
- These are not optional suggestions. When a trigger fires, the loop **must** pause and report.
87
-
88
- ## Decision Principles
89
-
90
- Auto-resolve these without asking the user:
91
-
92
- 1. **Completeness over cleverness** — cover more cases
93
- 2. **Boil the lake** — fix blast radius, not symptom
94
- 3. **Pragmatic over perfect** — cleaner option that ships today
95
- 4. **DRY but not premature** — third instance is the time to abstract
96
- 5. **Explicit over implicit** — clear code over magic
97
- 6. **Bias toward action** — when in doubt, make progress
98
-
99
- Surface to user only:
100
- - **Premises** — fundamental assumptions that change the nature of the build
101
- - **Dead end** — all viable paths have significant trade-offs
102
- - **Cross-model disagreement** — two approaches both have strong arguments
103
-
104
- ## Blocker Protocol
105
-
106
- When a blocker is encountered:
107
-
108
- 1. **Describe the blocker** — what was attempted, what failed, why it cannot proceed
109
- 2. **Propose alternatives** — scope reduction, dependency change, architectural shift
110
- 3. **Surface to user** with: `BLOCKER: <description> | Options: <A, B, C>`
111
- 4. **Wait for user decision** before continuing
112
-
113
- ## Anti-patterns
114
- - Skipping pre-flight (every loop needs a baseline and a rollback plan)
115
- - Auto-deciding premises (fundamental assumptions need user input)
116
- - Pushing through blockers (surface immediately, don't try 5 workarounds silently)
117
- - Skipping verification (verify every phase, not just the final result)
118
- - Parallelizing dependent phases (respect the dependency order in the plan file)
119
- - Forgetting to update the plan file with completion status
120
- - Ignoring escalation triggers (stall means pause, not try harder)
17
+ 1. Run pre-flight — verify quality baseline (tests pass), rollback path (clean stash or committed state), branch isolation, scope documented.
18
+ 2. Load or create plan — load existing plan or dispatch oh-planner. Auto-decide minor scope via decision principles.
19
+ 3. Dispatch build run oh-builder for each phase in dependency order. Parallelize independent phases via sub-agents.
20
+ 4. Review output — spec compliance first, then code quality. Never reverse the order.
21
+ 5. Verify check each phase against verification criteria. Tests pass mark complete. Fail diagnose, fix, re-verify.
22
+ 6. Handle implementer statusDONE (proceed), DONE_WITH_CONCERNS (read before proceeding), NEEDS_CONTEXT (provide and re-dispatch), BLOCKED (assess type).
23
+ 7. Loop all done DONE. Phase fails BLOCKER (surface with options). New work → add to plan, continue.
121
24
 
122
25
  ## Routing
123
26
 
@@ -125,4 +28,4 @@ When a blocker is encountered:
125
28
  |---------|-------|
126
29
  | pass | → pipeline continues (planner→builder→gauntlet→ship) |
127
30
  | fail | → oh-expert (diagnose loop failure) |
128
- | blocker | → surface to user with context and options |
31
+ | blocker | → surface with context and options |
@@ -0,0 +1,90 @@
1
+ # oh-plan-review — Deep Reference
2
+
3
+ ## Lens Selection
4
+
5
+ | Keywords | Lens |
6
+ |----------|------|
7
+ | architecture, data model, API, types, modules | Engineering |
8
+ | UI, layout, colors, components, screens | Design |
9
+ | CLI, SDK, dev tool, API, npm package, docs | DX |
10
+ | product, strategy, scope, roadmap, business | Strategy |
11
+
12
+ ## Engineering Lens
13
+
14
+ ### Scope Challenge (before reviewing)
15
+ 1. Does existing code already solve any sub-problem?
16
+ 2. Minimum changes to achieve goal?
17
+ 3. 8+ files or 2+ new classes/services → smell. Challenge.
18
+ 4. Does framework have built-in for each pattern?
19
+ 5. AI completeness is cheap — recommend full over shortcuts.
20
+ 6. New artifact types need build/publish pipelines.
21
+
22
+ ### Architecture Review
23
+ One section at a time: Architecture → Code Quality → Tests → Performance. Max 8 issues per section. Discuss each via AskUserQuestion. Anti-skip: evaluate every section; say "No issues found" if clean.
24
+
25
+ ### Cognitive patterns (internalize)
26
+ - State diagnosis (Larson) — falling behind, treading, repaying debt, innovating?
27
+ - Blast radius — worst case = how many systems?
28
+ - Boring by default (McKinley) — proven tech unless you have innovation tokens
29
+ - Reversibility — make wrong answers cheap. Feature flags, incremental rollouts.
30
+ - Essential vs accidental complexity (Brooks) — real problem or self-inflicted?
31
+
32
+ ## Design Lens
33
+
34
+ - Empty states — warmth, action, context when no data
35
+ - Visual hierarchy — what's seen first, second, third?
36
+ - Edge cases — long names, zero results, error, first-time vs power
37
+ - AI slop — generic card grids, 3-column features, hero sections? Flag.
38
+ - Responsive — every viewport intentional, not just stack-on-mobile
39
+ - A11y — keyboard, screen readers, contrast, touch targets
40
+
41
+ **Rule:** Specificity over vibes. "Clean, modern" is not a decision. Name the font, spacing, interaction, motion.
42
+
43
+ ## DX Lens
44
+
45
+ **Evaluate:** Time to Hello World (< 2 min target). Error quality (problem + cause + fix). First 5 min friction. Progressive disclosure — simple case is prod-ready. Pit of Success — right thing is easy, wrong thing is hard.
46
+
47
+ **Modes:**
48
+ - **Expansion** — competitive advantage. Benchmark competitors.
49
+ - **Polish** — bulletproof every touchpoint.
50
+ - **Triage** — critical gaps only. Minimum viable DX.
51
+
52
+ ## Strategy Lens
53
+
54
+ ### Scope Modes
55
+ - **Expansion** — "10x better for 2x effort?" Present as AskUserQuestion.
56
+ - **Selective** — Surface cherry-pickable expansions. Neutral posture.
57
+ - **Hold** — Bulletproof. Catch every failure. No silent reduction.
58
+ - **Reduction** — Ruthless cut to MVP.
59
+
60
+ ### Patterns (internalize)
61
+ - One-way vs two-way doors (Bezos) — most are two-way; move fast
62
+ - Inversion (Munger) — "how do we win?" + "what makes us fail?"
63
+ - Focus as subtraction (Jobs) — fewer things, better
64
+ - Proxy skepticism (Bezos) — metrics serving users or self-referential?
65
+ - Temporal depth — 5-10 year arcs. Regret minimization.
66
+
67
+ ### Prime Directives
68
+ - Zero silent failures. Every failure mode visible.
69
+ - Every error named — exception class, trigger, catch, message.
70
+ - Data flows have shadow paths: nil, empty, upstream error. Trace all four.
71
+ - Observability is first-class — new dashboards/alerts are deliverables.
72
+ - Everything deferred written down or it doesn't exist.
73
+ - You have permission to say "scrap it and do this instead."
74
+
75
+ ## Rules
76
+
77
+ - **Interactive only.** One section at a time via AskUserQuestion.
78
+ - **Anti-skip.** Every section evaluated. Zero findings → say so.
79
+ - **Commit to lens.** Once scope agreed, don't re-argue earlier decisions.
80
+
81
+ ## Output
82
+
83
+ Plan file (`~/.local/share/opencode/openhermes/plans/<project-name>-plan-<nnn>.md`) updated with findings and decisions.
84
+
85
+ ## Anti-patterns
86
+
87
+ - Using the wrong lens for the question
88
+ - Reviewing without reading the full plan first
89
+ - Merging concerns across lenses
90
+ - Skipping the interactive walkthrough
@@ -1,19 +1,7 @@
1
1
  ---
2
2
  name: oh-plan-review
3
- description: "Multi-lens plan review: 4 perspectives in one skill. Choose Engineering (architecture/scope), Design (UX/interaction), DX (API/CLI ergonomics), or Strategy (product/CEO). Interactive — walks through findings one section at a time."
3
+ description: "Use when a plan needs multi-perspective review before execution. Choose Engineering, Design, DX, or Strategy lens — walks through findings one section at a time."
4
4
  tier: 3
5
- benefits-from: [oh-planner, oh-expert]
6
- triggers:
7
- - "review this plan"
8
- - "review the plan file"
9
- - "architecture review of"
10
- - "design review the plan"
11
- - "ux review this plan"
12
- - "dx review the plan"
13
- - "strategy review"
14
- - "engineering review"
15
- - "ceo review"
16
- - "review plan from"
17
5
  route:
18
6
  pass:
19
7
  - oh-grill
@@ -24,112 +12,22 @@ route:
24
12
 
25
13
  # oh-plan-review
26
14
 
27
- Four review lenses in one skill. Pick the lens that fits the plan's scope or run multiple lenses in sequence for thorough coverage.
15
+ Four-lens plan review. Interactivewalk findings one section at a time.
28
16
 
29
- **Interactive.** Walk findings one section at a time with opinionated recommendations and AskUserQuestion gates. Never dump all findings at once.
17
+ ## Steps
30
18
 
31
- **Read-only.** No code changes. The output is a better plan, not a document about the plan.
32
-
33
- ## Lens Selection
34
-
35
- Ask the user which lens fits, or auto-detect from plan content:
36
-
37
- | Trigger keywords | Recommended lens |
38
- |---|---|
39
- | architecture, data model, API design, file structure, types, modules | Engineering |
40
- | UI, layout, colors, components, screens, mockups, user interface | Design |
41
- | CLI, SDK, developer tool, API, npm package, documentation, onboarding | DX |
42
- | product, strategy, scope, roadmap, competition, business model | Strategy |
43
-
44
- ### Engineering Lens
45
- Scope challenge, architecture review, cognitive patterns for eng managers.
46
-
47
- **Scope Challenge** — Before reviewing anything:
48
- 1. What existing code already partially solves each sub-problem?
49
- 2. What is the minimum set of changes that achieves the stated goal?
50
- 3. Complexity check: 8+ files or 2+ new classes/services → smell. Challenge it.
51
- 4. Search check: does the runtime/framework have built-in support for each pattern the plan introduces?
52
- 5. Completeness check: with AI-assisted coding, the cost of completeness is 10-100x cheaper. Recommend complete lakes over shortcuts.
53
- 6. Distribution check: new artifact types need build/publish pipelines.
54
-
55
- **Architecture Review** — Walk through one section at a time: Architecture → Code Quality → Tests → Performance. Max 8 top issues per section. Use AskUserQuestion to discuss each finding.
56
-
57
- **Anti-skip rule:** Never condense or skip a section. If a section has zero findings, say so — but evaluate it.
58
-
59
- **Cognitive patterns** (internalize, don't enumerate):
60
- - State diagnosis (Larson) — Is your team falling behind, treading water, repaying debt, or innovating?
61
- - Blast radius instinct — What's the worst case and how many systems does it affect?
62
- - Boring by default (McKinley) — Proven technology unless you have innovation tokens to spend.
63
- - Reversibility preference — Feature flags, incremental rollouts. Make wrong answers cheap.
64
- - Essential vs accidental complexity (Brooks) — Is this solving a real problem or one we created?
65
-
66
- ### Design Lens
67
- UX review, interaction state coverage, AI slop detection.
68
-
69
- **Evaluate:**
70
- - Empty states — every screen without data needs warmth, action, context
71
- - Visual hierarchy — what does the user see first, second, third?
72
- - Edge cases — 47-char names, zero results, error states, first-time vs power user
73
- - AI slop — generic card grids, hero sections, 3-column features? Flag them.
74
- - Responsive — every viewport gets intentional design, not just stack-on-mobile
75
- - Accessibility — keyboard nav, screen readers, contrast, touch targets
76
-
77
- **Principle:** Specificity over vibes. "Clean, modern UI" is not a design decision. Name the font, spacing scale, interaction pattern, and motion.
78
-
79
- ### DX Lens
80
- Developer experience audit for APIs, CLIs, SDKs, libraries, platforms.
81
-
82
- **Evaluate:**
83
- - Time to Hello World — target < 2 minutes. Every extra minute drops adoption 20-30%.
84
- - Error quality — every error = problem + cause + fix. No "something went wrong."
85
- - First five minutes — one click to start. No credit card. No demo call.
86
- - Progressive disclosure — simple case is production-ready. Complex case uses the same API.
87
- - Pit of Success — make the right thing easy, the wrong thing hard.
88
-
89
- **Three modes:**
90
- - **DX Expansion** — competitive advantage. Design magical moments. Benchmark competitors.
91
- - **DX Polish** — bulletproof every touchpoint. No friction, no uncertainty.
92
- - **DX Triage** — critical gaps only. Minimum viable DX investment.
93
-
94
- ### Strategy Lens
95
- Product/CEO review with 4 scope modes.
96
-
97
- **Select mode:**
98
- - **Scope Expansion** — "What would make this 10x better for 2x the effort?" Push scope up. Present each expansion as an AskUserQuestion. The user opts in or out.
99
- - **Selective Expansion** — Hold the baseline. Surface expansion opportunities for cherry-picking. Neutral recommendation posture.
100
- - **Hold Scope** — Make it bulletproof. Catch every failure mode. No silent reduction or expansion.
101
- - **Scope Reduction** — Find the minimum viable version. Be ruthless. Cut everything non-essential.
102
-
103
- **Cognitive patterns** (internalize):
104
- - Classification instinct (Bezos) — One-way vs two-way doors. Most things are two-way; move fast.
105
- - Inversion reflex (Munger) — For every "how do we win?" also ask "what would make us fail?"
106
- - Focus as subtraction (Jobs) — Default: do fewer things, better. 350 products → 10.
107
- - Proxy skepticism (Bezos) — Are our metrics still serving users or self-referential?
108
- - Temporal depth — Think in 5-10 year arcs. Apply regret minimization for major bets.
109
-
110
- **Prime directives:**
111
- - Zero silent failures. Every failure mode must be visible.
112
- - Every error has a name. Don't say "handle errors." Name the exception class, trigger, catch, user-facing message.
113
- - Data flows have shadow paths: nil, empty, upstream error. Trace all four.
114
- - Observability is scope, not afterthought. New dashboards and alerts are first-class deliverables.
115
- - Everything deferred must be written down. TODOS.md or it doesn't exist.
116
- - You have permission to say "scrap it and do this instead."
117
-
118
- ## Output
119
-
120
- After each lens, the plan file (`~/.local/share/opencode/openhermes/plans/<project-name>-plan-<nnn>.md`) is updated with findings and decisions. The user reviews and accepts changes interactively.
121
-
122
- ## Rules
123
-
124
- - **Interactive only.** One section at a time. Use AskUserQuestion to discuss findings before writing.
125
- - **Anti-skip:** Every section must be evaluated. If zero findings, say "No issues found" and move on.
126
- - **Anti-shortcut:** The plan file is the OUTPUT of the interactive review, not a substitute for it. Findings go through AskUserQuestion before writing.
127
- - **Commit to the chosen lens.** Once scope is agreed, don't re-argue earlier decisions in later sections.
19
+ 1. Select lens match keywords to lens using routing table (architecture → Engineering, UI Design, CLI DX, product → Strategy).
20
+ 2. Read the full plan before reviewing. Understand scope before evaluating.
21
+ 3. Walk through sections one at a time — interactive via AskUserQuestion.
22
+ 4. Apply lens-specific criteria — scope challenge, architecture review, cognitive patterns for Engineering; empty states, hierarchy, a11y for Design; Hello World time, error quality for DX; scope modes, prime directives for Strategy.
23
+ 5. Surface findings per section max 8 issues per section. Zero findings → say so. Anti-skip: evaluate every section.
24
+ 6. Update plan file — record findings and decisions in canonical plan storage.
25
+ 7. Route result pass to execution or stress-testing, fail back to revision.
128
26
 
129
27
  ## Routing
130
28
 
131
29
  | Outcome | Route |
132
30
  |---------|-------|
133
- | pass | → oh-grill (if concerns remain) or oh-manifest (execute plan) |
134
- | fail | → oh-planner (revise plan based on findings) |
135
- | blocker | → surface to user |
31
+ | pass | → oh-grill (if concerns remain) or oh-manifest (execute) |
32
+ | fail | → oh-planner (revise) |
33
+ | blocker | → surface |
@@ -0,0 +1,172 @@
1
+ # oh-planner — Deep Reference
2
+
3
+ ## Mode A: Brainstorm (fuzzy idea)
4
+
5
+ Use when the concept is vague ("what if", "I have an idea") and needs shaping into something concrete.
6
+
7
+ ### Process
8
+
9
+ Ask these 6 clarifying questions in order:
10
+
11
+ 1. **Who specifically needs this?** — Identify the exact user or stakeholder. Not "developers" but "frontend devs doing state management in React 19".
12
+ 2. **What do they do today?** — Current workflow, tooling, and pain points. What's the manual/partial solution?
13
+ 3. **What's the one concrete thing they can't do?** — The single capability gap. If they had one new thing, what would it be?
14
+ 4. **What's the smallest useful version?** — Minimum scope that delivers real value. Strip everything non-essential.
15
+ 5. **What signals success?** — Observable, measurable outcomes. Not "better DX" but "setup drops from 15min to 2min".
16
+ 6. **Does this compound or plateau?** — Will this unlock further improvements (compound) or is it a one-time fix (plateau)? Compound features get deeper investment.
17
+
18
+ ### Output
19
+ Structured design doc covering: user definition, current workflow, capability gap, minimum viable scope, success metrics, growth trajectory.
20
+
21
+ ## Mode B: Architecture Analysis (existing codebase)
22
+
23
+ Use when the codebase feels messy or you need to understand the surface before planning.
24
+
25
+ ### Process
26
+ 1. **Read domain** — Load `CONTEXT.md` (or equivalent domain doc). Understand the language, concepts, and shared terms before touching code. Domain-blind analysis produces wrong recommendations.
27
+ 2. **Map the surface** — Identify module boundaries and responsibilities, dependency direction, public API surfaces vs internal implementation, configuration and extension points.
28
+ 3. **Find deepening opportunities** — Look for duplication, over-coupling, grown-beyond-purpose files, dead code or unused abstractions, inconsistent patterns.
29
+ 4. **Rank by impact** — For each finding, assess effort, value, dependencies, risk.
30
+
31
+ ### Output
32
+ Ranked list of refactoring candidates with effort/value/risk assessment. Each candidate includes: location, problem description, recommended change, and estimated effort.
33
+
34
+ ## Mode C: Structured Plan (non-trivial feature)
35
+
36
+ Use when requirements exist and need a formal plan document to execute from.
37
+
38
+ ### 1. Scope Challenge
39
+ Before writing anything, challenge the scope:
40
+ - **What existing code partially solves it?** — Don't build from scratch if 60% exists.
41
+ - **Minimum changes?** — What's the smallest diff that ships the feature?
42
+ - **Complexity check:** 8+ files changed is a smell. Flag it. Reconsider the approach.
43
+ - **Search check:** For each architecture pattern in your approach, search `{framework} {pattern} built-in`. Flag custom solutions where framework built-ins exist.
44
+ - **Completeness check:** AI-assisted completeness is 10-100x cheaper than human teams. Default to full coverage, not minimal.
45
+ - **Distribution check:** New artifact types may need pipelines (build, test, deploy, publish). Include them.
46
+
47
+ ### 2. Strategy Review
48
+ Challenge premises: Is this the right problem to solve? Identify scope decisions explicitly (what's in, what's out, why). Consider 10x alternatives. Who owns the outcome? Who reviews?
49
+
50
+ ### 3. Architecture Review
51
+ Analyze: data flow, component boundaries, API surface, state model.
52
+
53
+ ### 4. Edge Case Analysis
54
+ Cover: error states, concurrency, failure modes, security.
55
+
56
+ ### 5. Dependency Mapping
57
+ Map what blocks what: identify parallelizable work streams, note external dependencies, order phases so nothing blocks on unfinished upstream work.
58
+
59
+ ### 6. Write Plan
60
+ Produce a structured artifact with: phases, dependencies, verification steps per phase, and exit criteria.
61
+
62
+ ### 7. Self-Review Checklist
63
+ 1. **Spec coverage** — Skim each requirement from the original request. Can you point to a task that implements it? List any gaps and add missing tasks.
64
+ 2. **Placeholder scan** — Search the plan for banned patterns: "TBD", "TODO", "implement later", "handle edge cases", "fill in details". Replace every instance with concrete content.
65
+ 3. **Type consistency** — Do types, method signatures, and property names match across tasks? A function called `clearLayers()` in Task 3 but `clearFullLayers()` in Task 7 is a bug. Fix cross-references.
66
+
67
+ Fix any issues inline — no need to re-review, just fix and move on. If a spec requirement has no task, add the task.
68
+
69
+ ## Mode D: Autoplan (existing plan needs full review)
70
+
71
+ Use when a plan exists and needs comprehensive automated review. Auto-decides 90% of intermediate questions.
72
+
73
+ ### Phase Order
74
+ Runs sequentially: **Strategy → Architecture → Design → Engineering → DX**. Each phase must complete before the next begins. No jumping ahead.
75
+
76
+ ### Auto-Resolution Principles
77
+ | # | Principle | Meaning |
78
+ |---|-----------|---------|
79
+ | 1 | **Completeness over cleverness** | Cover more cases. Clever shortcuts miss edge cases. |
80
+ | 2 | **Boil the lake** | Fix blast radius, not symptom. If a module is misdesigned, refactor it — don't patch around it. |
81
+ | 3 | **Pragmatic over perfect** | Ships today wins. Perfect designs that never ship are worthless. |
82
+ | 4 | **DRY but not premature** | Reuse what exists. But don't abstract until the 3rd concrete instance appears. |
83
+ | 5 | **Explicit over implicit** | Clear code over magic. Magic is fun to write, terrible to debug. |
84
+ | 6 | **Bias toward action** | When in doubt, make progress. Analysis paralysis is a decision too. |
85
+
86
+ ### Never Auto-Decide
87
+ - **Premises** — Core assumptions about what to build. These need human judgment.
88
+ - **Close calls** — Decisions where both options have strong, valid arguments. Surface for discussion.
89
+
90
+ ## Plan Artifact Format
91
+
92
+ Every plan written by oh-planner uses this canonical format.
93
+
94
+ ### Storage
95
+ Canonical path: `~/.local/share/opencode/openhermes/plans/<project>-plan-<nnn>.md`
96
+
97
+ ### Template
98
+ ```markdown
99
+ # PLAN: <project>
100
+
101
+ Plan ID: <project>-plan-<nnn>
102
+ Project: <project>
103
+ Status: active | in-progress | blocked | complete | abandoned
104
+ Created: <ts> | Updated: <ts>
105
+ Project Path: <absolute-path>
106
+ Plan Path: <canonical-path>/<project>-plan-<nnn>.md
107
+ Objective: <short>
108
+
109
+ ## Current State
110
+ — What exists now, what phase we're in.
111
+
112
+ ## Assumptions
113
+ — Decisions we're making without full information.
114
+
115
+ ## Tasks
116
+ - [ ] Task 1
117
+ - [ ] Subtask 1.1
118
+
119
+ ## Active Task
120
+ — What's being worked on right now.
121
+
122
+ ## Subagents
123
+ | Agent | Purpose | Status | Findings |
124
+
125
+ ## Completed
126
+ — Finished tasks with dates.
127
+
128
+ ## Work Log
129
+ — Running log of decisions and progress.
130
+
131
+ ## Blockers
132
+ — What's stopping progress.
133
+
134
+ ## Validation
135
+ - [ ] Static checks
136
+ - [ ] Unit tests
137
+ - [ ] Manual verification
138
+
139
+ ## Decisions
140
+ — Key decisions and their rationale.
141
+
142
+ ## Notes
143
+ — Miscellaneous context.
144
+ ```
145
+
146
+ ### Task Rules
147
+ - **Bite-Sized Granularity** — Each step is one action, 2-5 minutes.
148
+ - **No Placeholder Rule** — Banned: TBD, TODO, "implement later", "fill in details", "add appropriate error handling", "add validation", "handle edge cases", "write tests for the above" (without actual test code), "Similar to Task N".
149
+ - **Complete Code in Every Step** — If a step changes code, show the complete code inline. Use exact file paths always.
150
+ - **Expected Output** — Every test step must include the exact command to run and the expected output.
151
+
152
+ ### Execution Handoff
153
+ After saving a plan, offer the user an execution choice:
154
+ > **Plan saved. Two execution options:**
155
+ > **1. Subagent-Driven (recommended)** — I dispatch a fresh subagent per task with two-stage review between tasks for fast iteration.
156
+ > **2. Inline Execution** — Execute tasks in this session with batch execution and checkpoints.
157
+
158
+ ### Rules
159
+ - **Self-contained** — Tasks, Completed, Subagents, and Work Log live in this one file. No separate `todo.md` or `work-log.md`.
160
+ - **Status tracks lifecycle** — Only use: `active`, `in-progress`, `blocked`, `complete`, `abandoned`.
161
+ - **Validation lives with the plan** — Each plan defines its own verification criteria.
162
+
163
+ ## Anti-patterns
164
+ - Skipping strategy review for complex features (architecture mistakes compound)
165
+ - Wrong granularity — too vague to execute or too detailed to read
166
+ - Re-opening decided debates ("what if we rewrite in Rust?")
167
+ - Perfect > shipped (progress > polish)
168
+ - Not flagging taste decisions to user
169
+ - Big bang rewrites — plan increments, not overhauls
170
+ - Skipping the user-approval gate — implementing before the user has reviewed and approved the design document
171
+ - Placeholders in plan tasks (TBD, TODO, "implement later" — makes plan unexecutable)
172
+ - Missing expected output in test steps