openhermes 4.3.0 → 4.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/CONTEXT.md +9 -0
  2. package/README.md +26 -15
  3. package/bootstrap.ts +161 -124
  4. package/harness/agents/oh-browser.md +97 -0
  5. package/harness/agents/oh-builder.md +78 -0
  6. package/harness/agents/oh-facade.md +75 -0
  7. package/harness/agents/oh-fusion.md +45 -0
  8. package/harness/agents/oh-gauntlet.md +71 -0
  9. package/harness/agents/oh-grill.md +71 -0
  10. package/harness/agents/oh-investigate.md +60 -0
  11. package/harness/agents/oh-manifest.md +95 -0
  12. package/harness/agents/oh-plan-review.md +40 -0
  13. package/harness/agents/oh-planner.md +50 -0
  14. package/harness/agents/oh-refactor.md +37 -0
  15. package/harness/agents/oh-retro.md +46 -0
  16. package/harness/agents/oh-review.md +85 -0
  17. package/harness/agents/oh-security.md +83 -0
  18. package/harness/agents/oh-ship.md +76 -0
  19. package/harness/agents/oh-skill-craft.md +38 -0
  20. package/harness/agents/openhermes.md +107 -53
  21. package/harness/codex/AUTOPILOT.md +143 -91
  22. package/harness/codex/CHARTER.md +81 -0
  23. package/harness/commands/oh-doctor.md +193 -14
  24. package/harness/instructions/SHELL.md +76 -0
  25. package/harness/skills/oh-ascii/DEEP.md +292 -0
  26. package/harness/skills/oh-ascii/SKILL.md +31 -0
  27. package/harness/skills/oh-ascii/scripts/check_ascii_alignment.py +596 -0
  28. package/harness/skills/oh-browser/DEEP.md +54 -0
  29. package/harness/skills/oh-browser/SKILL.md +30 -0
  30. package/harness/skills/oh-builder/DEEP.md +63 -0
  31. package/harness/skills/oh-builder/SKILL.md +12 -90
  32. package/harness/skills/oh-expert/DEEP.md +85 -0
  33. package/harness/skills/oh-expert/SKILL.md +13 -106
  34. package/harness/skills/oh-facade/DEEP.md +182 -0
  35. package/harness/skills/oh-facade/SKILL.md +15 -279
  36. package/harness/skills/oh-freeze/DEEP.md +18 -0
  37. package/harness/skills/oh-freeze/SKILL.md +10 -19
  38. package/harness/skills/oh-full-output/DEEP.md +25 -0
  39. package/harness/skills/oh-full-output/SKILL.md +12 -65
  40. package/harness/skills/oh-fusion/DEEP.md +120 -0
  41. package/harness/skills/oh-fusion/SKILL.md +17 -295
  42. package/harness/skills/oh-gauntlet/DEEP.md +77 -0
  43. package/harness/skills/oh-gauntlet/SKILL.md +13 -105
  44. package/harness/skills/oh-grill/DEEP.md +51 -0
  45. package/harness/skills/oh-grill/SKILL.md +12 -63
  46. package/harness/skills/oh-guard/DEEP.md +19 -0
  47. package/harness/skills/oh-guard/SKILL.md +10 -24
  48. package/harness/skills/oh-handoff/DEEP.md +48 -0
  49. package/harness/skills/oh-handoff/SKILL.md +13 -23
  50. package/harness/skills/oh-health/DEEP.md +74 -0
  51. package/harness/skills/oh-health/SKILL.md +13 -76
  52. package/harness/skills/oh-init/DEEP.md +85 -0
  53. package/harness/skills/oh-init/SKILL.md +13 -127
  54. package/harness/skills/oh-investigate/DEEP.md +171 -0
  55. package/harness/skills/oh-investigate/SKILL.md +13 -66
  56. package/harness/skills/oh-issue/DEEP.md +21 -0
  57. package/harness/skills/oh-issue/SKILL.md +11 -27
  58. package/harness/skills/oh-learn/DEEP.md +44 -0
  59. package/harness/skills/oh-learn/SKILL.md +12 -83
  60. package/harness/skills/oh-manifest/DEEP.md +92 -0
  61. package/harness/skills/oh-manifest/SKILL.md +11 -108
  62. package/harness/skills/oh-plan-review/DEEP.md +90 -0
  63. package/harness/skills/oh-plan-review/SKILL.md +13 -115
  64. package/harness/skills/oh-planner/DEEP.md +172 -0
  65. package/harness/skills/oh-planner/SKILL.md +12 -149
  66. package/harness/skills/oh-prd/DEEP.md +45 -0
  67. package/harness/skills/oh-prd/SKILL.md +10 -26
  68. package/harness/skills/oh-refactor/DEEP.md +122 -0
  69. package/harness/skills/oh-refactor/SKILL.md +17 -410
  70. package/harness/skills/oh-retro/DEEP.md +26 -0
  71. package/harness/skills/oh-retro/SKILL.md +12 -24
  72. package/harness/skills/oh-review/DEEP.md +87 -0
  73. package/harness/skills/oh-review/SKILL.md +11 -97
  74. package/harness/skills/oh-security/DEEP.md +83 -0
  75. package/harness/skills/oh-security/SKILL.md +14 -96
  76. package/harness/skills/oh-ship/DEEP.md +141 -0
  77. package/harness/skills/oh-ship/SKILL.md +13 -31
  78. package/harness/skills/oh-skill-craft/DEEP.md +369 -0
  79. package/harness/skills/oh-skill-craft/SKILL.md +17 -178
  80. package/harness/skills/oh-skills-link/DEEP.md +16 -0
  81. package/harness/skills/oh-skills-link/SKILL.md +10 -20
  82. package/harness/skills/oh-skills-list/DEEP.md +20 -0
  83. package/harness/skills/oh-skills-list/SKILL.md +9 -22
  84. package/harness/skills/oh-triage/DEEP.md +23 -0
  85. package/harness/skills/oh-triage/SKILL.md +8 -24
  86. package/harness/skills/oh-worktree/DEEP.md +169 -0
  87. package/harness/skills/oh-worktree/SKILL.md +32 -0
  88. package/lib/harness-resolver.ts +8 -10
  89. package/package.json +5 -3
  90. package/scripts/count-tokens.mjs +158 -0
  91. package/scripts/oh-doctor.ps1 +342 -0
  92. package/harness/codex/CONSTITUTION.md +0 -73
  93. package/harness/codex/ROUTING.md +0 -92
  94. package/harness/instructions/RUNTIME.md +0 -30
  95. package/harness/skills/oh-caveman/SKILL.md +0 -42
  96. package/lib/logger.ts +0 -75
@@ -1,16 +1,7 @@
1
1
  ---
2
2
  name: oh-review
3
- description: "Two-axis code and design review: Standards (conformance) + Spec (fidelity) in parallel sub-agents. Includes architecture deepening analysis."
3
+ description: "Use when code, design, or PR changes need review before merging. Runs Standards + Spec review in parallel sub-agents. Includes architecture deepening and receiving-review feedback handling."
4
4
  tier: 3
5
- benefits-from: [oh-expert]
6
- triggers:
7
- - "code review please"
8
- - "review the code"
9
- - "review the PR"
10
- - "review changes since"
11
- - "pr review"
12
- - "design review"
13
- - "review this code"
14
5
  route:
15
6
  pass:
16
7
  - oh-gauntlet
@@ -21,97 +12,20 @@ route:
21
12
 
22
13
  # oh-review
23
14
 
24
- Two-axis review of the diff between HEAD and a fixed point. Both axes run as parallel sub-agents, then findings are aggregated. Three modes: **Diff Review**, **Architecture Deepening**, or both in sequence.
15
+ Two-axis review: Standards + Spec, parallel sub-agents. Three modes: Diff Review, Architecture Deepening, Receiving Feedback.
25
16
 
26
- ## When to Use
27
- Before merging any PR or landing changes. When you need a quality gate that catches both code-quality violations and spec deviations.
17
+ ## Steps
28
18
 
29
- ## Mode Selection
30
- - **Diff Review** (default)Standards + Spec review of a changeset
31
- - **Architecture Deepening**Surface refactoring opportunities in the codebase
32
- - **Full Review**Both: diff review first, then architecture deepening pass
33
-
34
- ---
35
-
36
- ## Mode A: Diff Review
37
-
38
- ### 1. Pin the Fixed Point
39
- The user provides a branch, commit SHA, or tag. Capture `git diff <fixed-point>...HEAD` and `git log <fixed-point>..HEAD --oneline`.
40
-
41
- ### 2. Identify the Spec Source
42
- Look for the originating spec in this order:
43
- 1. Issue references in commit messages (`#123`, `Closes #45`) — fetch via `docs/agents/issue-tracker.md`
44
- 2. A path the user passed as an argument
45
- 3. A PRD/spec file under `docs/`, `specs/`, or `.scratch/`
46
- 4. Ask the user
47
-
48
- If no spec exists, the Spec sub-agent skips and reports "no spec available."
49
-
50
- ### 3. Identify the Standards Sources
51
- Collect all files documenting how code should be written:
52
- - AGENTS.md, CLAUDE.md, CONTRIBUTING.md
53
- - CONTEXT.md, ADRs
54
- - eslint/biome/prettier config (note tool-enforced ones — don't re-check)
55
-
56
-
57
- ### 4. Spawn Both Sub-Agents (parallel)
58
-
59
- **Standards sub-agent:** Read the standards docs and the diff. Report per-file/hunk every place the diff violates a documented standard. Cite the standard source + rule. Distinguish hard violations from judgement calls. Skip anything tooling enforces.
60
-
61
- **Spec sub-agent:** Read the spec and the diff. Report: (a) requirements missing or partial, (b) scope creep, (c) requirements implemented but wrong. Quote the spec line for each finding.
62
-
63
- ### 5. Aggregate
64
- Present findings under `## Standards` and `## Spec` headings. Do NOT merge or rerank — the two axes are deliberately separate. End with one-line summary: total findings per axis and the worst single issue.
65
-
66
- ### Safety Check (always run inline before spawning sub-agents)
67
- - SQL injection vectors
68
- - LLM trust boundary violations
69
- - Conditional side effects (test vs prod)
70
- - Hardcoded secrets
71
-
72
- Block immediately if critical safety issue found — do not spawn sub-agents.
73
-
74
- ---
75
-
76
- ## Mode B: Architecture Deepening
77
-
78
- Surface deepening opportunities — refactors that turn shallow modules into deep ones. Uses the **deletion test**: if deleting a module would concentrate complexity (not just move it), the module is earning its keep. If complexity vanishes, the module was a pass-through.
79
-
80
- ### Vocabulary
81
- Use these terms exactly:
82
- - **Module** — anything with an interface and an implementation
83
- - **Depth** — leverage at the interface: lots of behavior behind a small interface
84
- - **Seam** — where an interface lives; a place behavior can be altered without editing in place
85
- - **Leverage** — what callers get from depth
86
- - **Locality** — what maintainers get from depth: change concentrated in one place
87
-
88
- ### Process
89
- 1. **Explore** — Read CONTEXT.md and ADRs. Walk the codebase noting friction:
90
- - Where does understanding one concept require bouncing between many small modules?
91
- - Where are modules shallow (interface as complex as implementation)?
92
- - Where are pure functions extracted for testability but real bugs hide in how they're called?
93
- - Apply the deletion test to suspected shallow modules
94
- 2. **Present candidates** — Numbered list. For each: files, problem, solution, benefits in terms of locality/leverage. Flag ADR conflicts.
95
- 3. **Grilling loop** — Walk the design tree with the user. Side effects: update CONTEXT.md for new terms, offer ADRs for rejected candidates.
96
- 4. **Output** — Ranked refactoring candidates with collision warnings.
97
-
98
- ## Scoring
99
- - Critical safety issue → block immediately (before sub-agents)
100
- - Structural concern → changes requested
101
- - Spec deviation → changes requested (with reference)
102
- - Style/nit → note for follow-up
103
-
104
- ## Anti-patterns
105
- - Reviewing style before safety (wrong priority order)
106
- - Rubber-stamping without reading the diff
107
- - Requesting changes for subjective preferences
108
- - Merging Standards and Spec findings (one axis masks the other)
109
- - Proposing interfaces in deepening mode before the user picks a candidate
19
+ 1. Pin fixed point — capture `git diff <fixed>...HEAD` and `git log <fixed>..HEAD --oneline`.
20
+ 2. Find spec and standards sources issues, user paths, docs, AGENTS.md, ADRs, lint config.
21
+ 3. Run safety check SQL injection, trust boundaries, hardcoded secrets. Block immediately if critical.
22
+ 4. Spawn parallel sub-agents Standards (cite violations per standard) and Spec (quote requirements). Report independently.
23
+ 5. Aggregate findings — present under Standards/Spec sections. Do not merge. End with total + worst issue.
110
24
 
111
25
  ## Routing
112
26
 
113
27
  | Outcome | Route |
114
28
  |---------|-------|
115
- | pass | → oh-gauntlet (if code changes needed) or oh-ship |
116
- | fail | → oh-builder (fix violations found) |
117
- | blocker | → surface to user |
29
+ | pass | → oh-gauntlet or oh-ship |
30
+ | fail | → oh-builder |
31
+ | blocker | → surface |
@@ -0,0 +1,83 @@
1
+ # oh-security — Deep Reference
2
+
3
+ ## When to Use
4
+
5
+ Use when the codebase needs a security audit — secrets scanning, dependency checks, CI/CD review, and threat modeling. Two modes: daily (fast) and comprehensive (deep).
6
+
7
+ Benefits from: oh-expert
8
+
9
+ Triggers: "security audit", "threat model", "check for vulnerabilities", "owasp review", "pentest", "security review", "cso"
10
+
11
+ ## Modes
12
+
13
+ - **Daily** (default) — only flag findings with strong evidence. Skips speculative checks.
14
+ - **Comprehensive** (`--comprehensive`) — surface everything plausible. User decides.
15
+
16
+ ## Phases
17
+
18
+ ### Phase 0: Stack + Architecture Mental Model
19
+
20
+ Detect language, framework, components, trust boundaries, data flows, attack surface.
21
+
22
+ ### Phase 1: Attack Surface Census
23
+
24
+ Public vs authed vs admin endpoints. File uploads, external integrations, WebSocket, webhooks. CI/CD workflows, containers, IaC, deploy targets.
25
+
26
+ ### Phase 2: Secrets Archaeology
27
+
28
+ Git history for leaked credentials (AWS, OpenAI, GitHub, Slack, generic). .env tracking status. CI inline secrets.
29
+
30
+ ### Phase 3: Dependency Supply Chain
31
+
32
+ CVEs in direct deps, install scripts in production deps, lockfile integrity, abandoned packages. Diff-mode limits to changed deps.
33
+
34
+ ### Phase 4: CI/CD Security
35
+
36
+ Unpinned third-party actions, `pull_request_target` misuse, script injection via `${{ github.event.* }}`, secrets as env vars, CODEOWNERS on workflows.
37
+
38
+ ### Phase 5: Infrastructure Shadow
39
+
40
+ Dockerfiles (root, secrets in ARG, missing USER), configs with prod DB URLs, IaC (overly permissive IAM, privileged K8s). Staging → prod refs.
41
+
42
+ ### Phase 6: Webhooks
43
+
44
+ Endpoints without signature verification, TLS verification disabled, overly broad OAuth scopes.
45
+
46
+ ### Phase 7: LLM Security
47
+
48
+ Prompt injection (user input → system prompts), unsanitized LLM output in UI, tool calls without validation, hardcoded AI keys.
49
+
50
+ ### Phase 8: OWASP + STRIDE
51
+
52
+ Map findings to OWASP Top 10 and STRIDE. Coverage gaps identified.
53
+
54
+ ## Anti-patterns
55
+
56
+ - Running daily mode for comprehensive needs (misses deep issues)
57
+ - Skipping secrets archaeology in git history
58
+ - Relying only on automated scanners without manual review
59
+ - Not updating after dependencies change
60
+
61
+ ## Reference
62
+
63
+ ### Output Format
64
+
65
+ ```
66
+ Security Posture Report
67
+ Critical (n): finding — file:line — remediation
68
+ High (n):
69
+ Medium (n):
70
+ Low (n):
71
+ OWASP Coverage: A01-A10
72
+ STRIDE: Spoofing..Elevation of Privilege
73
+ ```
74
+
75
+ ### Rules
76
+
77
+ - Read-only (diagnosis only). Auto-fix low severity only if explicitly asked.
78
+ - Daily: 8/10 gate. Would you stake reputation on it?
79
+ - Comprehensive: 2/10 gate. Surface everything.
80
+ - No false positives on git history. Placeholder values excluded. Rotated secrets still flagged.
81
+ - Prioritize by blast radius: RCE > credential exposure > info leak > best-practice.
82
+ - Distinguish direct vs transitive dependency findings.
83
+ - Use Grep/Glob tools, not bash grep.
@@ -1,16 +1,7 @@
1
1
  ---
2
2
  name: oh-security
3
- description: "Security audit: secrets archaeology, dependency supply chain, CI/CD security, OWASP Top 10, STRIDE threat modeling, LLM security. Two modes: daily (8/10 confidence gate) and comprehensive (2/10 bar)."
3
+ description: "Security audit secrets, dependencies, CI/CD, threat modeling"
4
4
  tier: 3
5
- benefits-from: [oh-expert]
6
- triggers:
7
- - "security audit"
8
- - "threat model"
9
- - "check for vulnerabilities"
10
- - "owasp review"
11
- - "pentest"
12
- - "security review"
13
- - "cso"
14
5
  route:
15
6
  pass: surface
16
7
  fail: oh-investigate
@@ -19,96 +10,23 @@ route:
19
10
 
20
11
  # oh-security
21
12
 
22
- Security audit that finds the doors that are actually unlocked. Two modes: **daily** (8/10 confidence gate — low noise, high signal) and **comprehensive** (2/10 bar — casts a wider net, more findings). Output is a Security Posture Report with severity ratings and remediation plans. Does NOT make code changes — diagnosis only.
13
+ Security audit: secrets scanning, dependency checks, CI/CD review, threat modeling. Read-only — diagnosis only.
23
14
 
24
- ## Mode Selection
15
+ ## Steps
25
16
 
26
- - **Daily** (default) 8/10 confidence gate. Only flag findings with strong evidence. Skips speculative or trace-only checks. Runs all phases but reports only clear findings.
27
- - **Comprehensive** (`--comprehensive`) — 2/10 bar. Surfaces more. Includes trace-only flags, speculative dependency issues, and historical pattern matching.
28
-
29
- ## Phases
30
-
31
- ### Phase 0: Stack Detection + Architecture Mental Model
32
- Detect the project's language stack and framework. Build an explicit mental model: what are the components, trust boundaries, data flows, and attack surface.
33
-
34
- ### Phase 1: Attack Surface Census
35
- Map what an attacker sees:
36
- - Public vs authenticated vs admin endpoints
37
- - File upload points, external integrations, background jobs
38
- - WebSocket channels, webhook receivers
39
- - CI/CD workflows, container configs, IaC, deploy targets
40
-
41
- ### Phase 2: Secrets Archaeology
42
- Scan git history for leaked credentials (AWS keys, OpenAI keys, GitHub tokens, Slack tokens, generic secrets). Check `.env` tracking status. Scan CI configs for inline secrets.
43
-
44
- ### Phase 3: Dependency Supply Chain
45
- Check beyond `npm audit`: known CVEs in direct deps, install scripts in production deps, lockfile integrity, abandoned packages. Diff-mode limits to changed deps.
46
-
47
- ### Phase 4: CI/CD Pipeline Security
48
- Check for unpinned third-party actions, `pull_request_target` misuse, script injection via `${{ github.event.* }}`, secrets exposed as env vars, CODEOWNERS protection on workflow files.
49
-
50
- ### Phase 5: Infrastructure Shadow Surface
51
- Dockerfiles (root user, secrets in ARG, missing USER), config files with prod DB URLs, IaC (overly permissive IAM, privileged K8s). Staging configs referencing prod.
52
-
53
- ### Phase 6: Webhook & Integration Audit
54
- Webhook endpoints without signature verification, TLS verification disabled in prod, overly broad OAuth scopes.
55
-
56
- ### Phase 7: LLM & AI Security
57
- Prompt injection vectors (user input flowing into system prompts), unsanitized LLM output rendered in UI, tool/function calling without validation, hardcoded AI API keys.
58
-
59
- ### Phase 8: OWASP Top 10 + STRIDE
60
- Map findings to OWASP Top 10 categories and STRIDE threat model. Identify gaps in coverage across categories.
61
-
62
- ## Output Format
63
-
64
- ```
65
- Security Posture Report
66
- ══════════════════════
67
- Project: <name>
68
- Branch: <branch>
69
- Mode: daily | comprehensive
70
- Date: <date>
71
-
72
- Critical (n):
73
- - <finding> — <file:line> — <remediation>
74
-
75
- High (n):
76
- - <finding> — <file:line> — <remediation>
77
-
78
- Medium (n):
79
- - <finding> — <file:line> — <remediation>
80
-
81
- Low (n):
82
- - <finding> — <file:line> — <remediation>
83
-
84
- OWASP Coverage:
85
- A01:Broken Access Control — n findings
86
- A02:Cryptographic Failures — n findings
87
- ...
88
-
89
- STRIDE:
90
- Spoofing — n
91
- Tampering — n
92
- Repudiation — n
93
- Info Disclosure — n
94
- Denial of Service — n
95
- Elevation of Privilege — n
96
- ```
97
-
98
- ## Rules
99
-
100
- - **Read-only.** No fixes. Diagnosis only, except for auto-fixable low-severity findings when explicitly asked.
101
- - **Daily mode** gates findings at 8/10 confidence. If you would not stake your reputation on it, skip it in daily mode.
102
- - **Comprehensive mode** gates at 2/10. Surface everything plausible. The user decides.
103
- - **No false positives on git history.** Placeholder values ("your_", "changeme") excluded. Rotated secrets still flagged (they were exposed).
104
- - **Prioritize by blast radius.** Remote code execution > credential exposure > info leak > best-practice gap.
105
- - **Always distinguish direct vs transitive** dependencies in supply chain findings.
106
- - **Use Grep/Glob tools** for searches, not bash grep. The bash blocks below show WHAT to search, not HOW.
17
+ 1. Detect mode: daily (default, 8/10 confidence gate) or comprehensive (`--comprehensive`, 2/10 gate)
18
+ 2. Map stack, architecture, trust boundaries, data flows, and attack surface
19
+ 3. Scan git history, .env files, and CI inline configs for leaked secrets
20
+ 4. Audit dependencies for CVEs, install scripts, lockfile integrity, and abandoned packages
21
+ 5. Review CI/CD security — pinned actions, pull_request_target misuse, script injection, secrets exposure
22
+ 6. Check infrastructure Dockerfiles, IaC, prod DB URLs, staging-to-prod references
23
+ 7. Assess OWASP Top 10 and STRIDE coverage gaps
24
+ 8. Produce Security Posture Report with criticality-ranked findings and route
107
25
 
108
26
  ## Routing
109
27
 
110
28
  | Outcome | Route |
111
29
  |---------|-------|
112
- | pass | [report findings to user] |
113
- | fail | → oh-investigate (deepen on findings) |
114
- | blocker | → surface to user |
30
+ | pass | surface (report findings) |
31
+ | fail | → oh-investigate (deepen) |
32
+ | blocker | → surface |
@@ -0,0 +1,141 @@
1
+ # oh-ship — Deep Reference
2
+
3
+ ## When to Use
4
+
5
+ Code ready to ship. Ships to the **current branch**. PRs are only created when explicitly stated or requested by the user — never automatically.
6
+
7
+ ## Workflow (Steps 1–4)
8
+
9
+ ### 1. Pre-flight
10
+ Run tests, lint, typecheck. If any fail, stop and surface.
11
+
12
+ ### 2. Version bump (conditional)
13
+ Check if a version bump is applicable:
14
+ - If `package.json` or `VERSION` exists and user mentioned a release/bump → semver bump
15
+ - If no version file exists or user didn't request a bump → skip
16
+ - If unsure whether to bump → ask the user
17
+
18
+ ### 3. Changelog
19
+ Generate from commits since last tag. Polish: consistent tense, group by type (features, fixes, breaking). Skip if no tag history.
20
+
21
+ ### 4. Commit
22
+ Stage all changes. Commit message uses conventional commit format with **vague, professional descriptions** — do not leak implementation details. Use the git-commit skill conventions: `<type>[scope]: <short description>`.
23
+
24
+ ## Environment & Options (Steps 5–6)
25
+
26
+ ### 5. Detect Environment
27
+ Before proceeding, determine workspace state:
28
+
29
+ ```bash
30
+ GIT_DIR=$(cd "$(git rev-parse --git-dir)" 2>/dev/null && pwd -P)
31
+ GIT_COMMON=$(cd "$(git rev-parse --git-common-dir)" 2>/dev/null && pwd -P)
32
+ ```
33
+
34
+ | State | Type | Options |
35
+ |-------|------|---------|
36
+ | `GIT_DIR == GIT_COMMON` | Normal repo | Standard 4 |
37
+ | `GIT_DIR != GIT_COMMON`, named branch | Worktree | Standard 4 |
38
+ | `GIT_DIR != GIT_COMMON`, detached HEAD | Externally managed | Reduced 3 |
39
+
40
+ ### 6. Option Presentation
41
+ Core principle: Verify → Detect → Present → Execute → Clean up.
42
+
43
+ Determine base branch (`git merge-base HEAD main` or `git merge-base HEAD master`). Present structured options based on detected environment.
44
+
45
+ **Normal repo or named-branch worktree (4 options):**
46
+ ```
47
+ Implementation complete. What would you like to do?
48
+ 1. Merge into <base> locally
49
+ 2. Push + create a Pull Request
50
+ 3. Keep branch as-is
51
+ 4. Discard this work
52
+ Which option?
53
+ ```
54
+
55
+ **Detached HEAD — externally managed (3 options):**
56
+ ```
57
+ Implementation complete. Detached HEAD — options:
58
+ 1. Push as new branch + create a Pull Request
59
+ 2. Keep as-is
60
+ 3. Discard this work
61
+ Which option?
62
+ ```
63
+
64
+ - **Option 1 (Merge):** Checkout base, pull, merge feature branch, verify tests on merged result. Run Provenance-Based Cleanup, then `git branch -d <branch>`. Done. Steps 7–11 are skipped.
65
+ - **Option 2 (Push + PR):** Continue to Steps 7–11 (Push, PR, Deploy, Verify, Docs Sync).
66
+ - **Option 3 (Keep):** Report "Keeping branch `<name>`." No cleanup. Stop.
67
+ - **Option 4 (Discard):** Require typed "discard" confirmation. On confirm, run Provenance-Based Cleanup, then `git branch -D <branch>`. Done.
68
+
69
+ ## Push & Deploy (Steps 7–11)
70
+
71
+ ### 7. Push to current branch
72
+ `git push origin <current-branch>`. Always the current branch. Never assume a different target.
73
+
74
+ ### 8. PR (only if requested)
75
+ If the user explicitly said "create a PR", "open a pull request", or similar → create PR with summary and test evidence. If the change is very large, you may **suggest** a PR, but do not create one without explicit user confirmation.
76
+
77
+ ### 9. Deploy
78
+ Trigger deploy (platform-specific). If no deploy target is configured, skip.
79
+
80
+ ### 10. Verify
81
+ Smoke test or health check if applicable.
82
+
83
+ ### 11. Post-ship docs sync
84
+ Cross-reference diff against README, CHANGELOG, ARCHITECTURE.md, CONTRIBUTING.md. Update to match what shipped.
85
+
86
+ ## Cleanup
87
+
88
+ ### Provenance-Based Cleanup
89
+ Only runs for Option 1 (Merge) and Option 4 (Discard). Options 2 (Push + PR) and 3 (Keep) always preserve the worktree.
90
+
91
+ 1. **Detect provenance:**
92
+ - `GIT_DIR == GIT_COMMON` → normal repo, no worktree to clean. Done.
93
+ - Worktree path is under `.worktrees/`, `worktrees/`, or similar known paths → we own cleanup.
94
+ - Otherwise → harness-owned workspace. Do NOT remove.
95
+
96
+ 2. **Cleanup (only for owned worktrees):**
97
+ ```bash
98
+ MAIN_ROOT=$(git -C "$(git rev-parse --git-common-dir)/.." rev-parse --show-toplevel)
99
+ cd "$MAIN_ROOT"
100
+ git worktree remove "$WORKTREE_PATH"
101
+ git worktree prune
102
+ ```
103
+
104
+ 3. **Never clean up** harness-owned workspaces. If the platform provides a workspace-exit tool, use it. Otherwise leave in place.
105
+
106
+ ### Correct Ordering
107
+ Merge → verify → cleanup → delete branch. Never delete before cleanup — `git branch -d` fails when worktree still references the branch. Always `cd` to main repo root before `git worktree remove`.
108
+
109
+ ## Quick Reference
110
+
111
+ | Option | Merge | Push | Keep Worktree | Cleanup | Delete Branch |
112
+ |--------|-------|------|---------------|---------|---------------|
113
+ | 1. Merge locally | yes | — | — | yes | yes (`-d`) |
114
+ | 2. Push + PR | — | yes | yes | — | — |
115
+ | 3. Keep as-is | — | — | yes | — | — |
116
+ | 4. Discard | — | — | — | yes | yes (`-D`) |
117
+
118
+ ## Branch Protocol
119
+ - **Always push to the current branch.** Detect it with `git branch --show-current`.
120
+ - **Always confirm before any branch-sensitive operation.** If the current branch is `main` or `master`, ask: *"Current branch is main. Are you sure? Do you mean a feature/dev branch?"*
121
+ - **Never auto-create a PR.** The user must explicitly say "create a PR" or you may suggest one for massive changes, but never execute without confirmation.
122
+ - **Never merge.** Merging is the user's decision.
123
+
124
+ ## Branch Confirmation Rules
125
+ Before these operations, ALWAYS confirm the branch with the user:
126
+ - Pushing to `main` / `master` / `production` — ask "Are you sure? Do you mean a dev branch?"
127
+ - Creating a PR — confirm source and target branches
128
+ - Deploying — confirm which environment
129
+ - Version bump — confirm the bump type (major/minor/patch)
130
+
131
+ ## Anti-patterns
132
+ - Skipping pre-flight ("just a quick fix")
133
+ - Auto-creating a PR without the user asking
134
+ - Pushing to main without confirmation
135
+ - Merging without user instruction
136
+ - Deploy without post-deploy verification
137
+ - Not tagging releases
138
+ - Deleting branch before removing worktree
139
+ - Running `git worktree remove` from inside the worktree
140
+ - Cleaning up harness-owned worktrees (provenance check required)
141
+ - Discarding work without typed confirmation
@@ -1,12 +1,7 @@
1
1
  ---
2
2
  name: oh-ship
3
- description: "Deploy and PR pipeline test, bump, changelog, PR, deploy, verify"
3
+ description: "Use when code is ready to ship. Tests, version bump, commit, push to current branch, deploy, and verify. PRs only on request."
4
4
  tier: 4
5
- triggers:
6
- - "ship this"
7
- - "create a PR"
8
- - "version bump"
9
- - "publish"
10
5
  route:
11
6
  pass: oh-retro
12
7
  fail: oh-expert
@@ -15,35 +10,22 @@ route:
15
10
 
16
11
  # oh-ship
17
12
 
18
- ## When to Use
19
- When code is ready to ship. Runs the full release pipeline from test to PR to deploy verification.
13
+ Complete ship pipeline: pre-flight → version → changelog → commit → detect → present → execute.
20
14
 
21
- ## Workflow
22
- 1. **Pre-flight** — run test suite, lint, typecheck
23
- 2. **Version bump** — read VERSION file or package.json, bump according to semver
24
- 3. **Changelog** — generate from commit history since last tag. Polish voice: consistent tense, group by type (features, fixes, breaking)
25
- 4. **Commit + push** — create release commit with changelog
26
- 5. **PR** — create GitHub PR with summary, test evidence, deploy plan
27
- 6. **Merge** — merge PR after CI passes
28
- 7. **Deploy** — trigger deploy (platform-specific)
29
- 8. **Verify** — canary check, health endpoints, smoke tests
30
- 9. **Post-ship docs sync** — read all project docs (README, ARCHITECTURE.md, CONTRIBUTING.md), cross-reference the diff, update to match what shipped:
31
- - README: new features, changed APIs, updated examples
32
- - CHANGELOG: verify polish against what actually merged
33
- - ARCHITECTURE.md / CONTRIBUTING.md: reflect any structural or workflow changes
34
- - TODOS.md: remove completed items, add any new deferred items discovered during ship
35
- - VERSION file: bump if not already done
15
+ ## Steps
36
16
 
37
- ## Anti-patterns
38
- - Skipping pre-flight checks ("just a quick fix")
39
- - Bumping version without changelog
40
- - Deploying without post-deploy verification
41
- - Not tagging releases
17
+ 1. Run pre-flight — tests, lint, typecheck. Stop and surface if any fail.
18
+ 2. Version bump conditional. If `package.json` or `VERSION` exists and user mentioned release, semver bump. Skip or ask if unsure.
19
+ 3. Generate changelog from commits since last tag. Group by type (features, fixes, breaking). Skip if no tag history.
20
+ 4. Commit stage all changes. Use conventional commit format with vague professional descriptions.
21
+ 5. Detect environment — normal repo, worktree, or detached HEAD. Determine base branch.
22
+ 6. Present structured options — Merge locally, Push + PR, Keep branch, or Discard.
23
+ 7. Execute chosen option — merge (verify + cleanup + delete), push (push + PR + deploy + verify + docs sync), keep, or discard (require typed confirmation).
42
24
 
43
25
  ## Routing
44
26
 
45
27
  | Outcome | Route |
46
28
  |---------|-------|
47
- | pass | → oh-retro (post-ship review) |
48
- | fail | → oh-expert (diagnose deployment failure) |
49
- | blocker | → surface to user |
29
+ | pass | → surface (report success) |
30
+ | fail | → oh-expert (diagnose) |
31
+ | blocker | → surface |