@pieerry/harness-kit 3.3.1 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/.claude/{plugins → agents}/product-manager/README.md +32 -30
  2. package/.claude/{plugins → agents}/product-manager/guides/examples/good-prp-example.md +2 -2
  3. package/.claude/{plugins → agents}/product-manager/guides/pipeline.md +7 -7
  4. package/.claude/{plugins → agents}/product-manager/sensors/prp-links.md +1 -1
  5. package/.claude/{plugins → agents}/product-manager/skills/prd/SKILL.md +2 -2
  6. package/.claude/{plugins → agents}/product-manager/skills/prp/SKILL.md +4 -4
  7. package/.claude/agents/product-manager.md +2 -2
  8. package/.claude/agents/staff-software-engineer/README.md +87 -0
  9. package/.claude/{plugins → agents}/staff-software-engineer/guides/conventions-override.md +3 -3
  10. package/.claude/{plugins → agents}/staff-software-engineer/guides/pipeline.md +4 -4
  11. package/.claude/{plugins → agents}/staff-software-engineer/sensors/dev-structure.md +2 -2
  12. package/.claude/{plugins → agents}/staff-software-engineer/sensors/pr-structure.md +3 -3
  13. package/.claude/{plugins → agents}/staff-software-engineer/sensors/test-structure.md +2 -2
  14. package/.claude/agents/staff-software-engineer.md +4 -4
  15. package/.claude/commands/pipeline/reset.md +1 -1
  16. package/.claude/commands/product-manager/prd.md +11 -11
  17. package/.claude/commands/product-manager/prp.md +12 -12
  18. package/.claude/commands/product-manager/run.md +4 -4
  19. package/.claude/commands/sse/dev.md +11 -11
  20. package/.claude/commands/sse/plan.md +11 -12
  21. package/.claude/commands/sse/pr.md +6 -7
  22. package/.claude/commands/sse/run.md +4 -4
  23. package/.claude/commands/sse/test.md +5 -5
  24. package/.claude/conventions/README.md +1 -1
  25. package/.claude/hooks/activity-pre-read.sh +4 -4
  26. package/.claude/hooks/status-line.sh +11 -11
  27. package/.claude/{plugins/product-manager/hooks → runtime/hooks/product-manager}/post-eval-prd.sh +11 -9
  28. package/.claude/{plugins/product-manager/hooks → runtime/hooks/product-manager}/post-eval-prp.sh +11 -9
  29. package/.claude/{plugins/product-manager/hooks → runtime/hooks/product-manager}/post-write-prd.sh +7 -5
  30. package/.claude/{plugins/product-manager/hooks → runtime/hooks/product-manager}/post-write-prp.sh +8 -6
  31. package/.claude/{plugins/product-manager/hooks → runtime/hooks/product-manager}/pre-prp-check.sh +5 -3
  32. package/.claude/{plugins/staff-software-engineer/hooks → runtime/hooks/staff-software-engineer}/post-eval-sse.sh +13 -11
  33. package/.claude/{plugins/staff-software-engineer/hooks → runtime/hooks/staff-software-engineer}/post-write-sse.sh +11 -9
  34. package/.claude/runtime/scripts/product-manager/__pycache__/confluence-publish.cpython-314.pyc +0 -0
  35. package/.claude/runtime/scripts/product-manager/__pycache__/link-validator.cpython-314.pyc +0 -0
  36. package/.claude/runtime/scripts/product-manager/__pycache__/sensor-runner.cpython-314.pyc +0 -0
  37. package/.claude/runtime/scripts/product-manager/__pycache__/token-phase.cpython-314.pyc +0 -0
  38. package/.claude/runtime/scripts/product-manager/confluence-publish.py +206 -0
  39. package/.claude/{plugins/product-manager/scripts → runtime/scripts/product-manager}/link-validator.py +1 -1
  40. package/.claude/{plugins/product-manager/scripts → runtime/scripts/product-manager}/token-phase.py +2 -2
  41. package/.claude/scripts/__pycache__/activity.cpython-314.pyc +0 -0
  42. package/.claude/scripts/__pycache__/pipeline.cpython-314.pyc +0 -0
  43. package/.claude/scripts/__pycache__/pr-monitor.cpython-314.pyc +0 -0
  44. package/.claude/scripts/pipeline.py +6 -6
  45. package/.claude/settings.json +7 -7
  46. package/.claude/settings.local.json +11 -3
  47. package/AGENTS.md +141 -0
  48. package/CLAUDE.md +9 -7
  49. package/README.md +122 -269
  50. package/VERSION +1 -1
  51. package/bin/hk.js +16 -8
  52. package/package.json +5 -3
  53. package/setup/install.sh +63 -44
  54. package/setup/update.sh +5 -0
  55. package/.claude/plugins/product-manager/scripts/confluence-publish.py +0 -205
  56. package/.claude/plugins/staff-software-engineer/README.md +0 -90
  57. /package/.claude/{plugins → agents}/product-manager/evals/prd-quality.md +0 -0
  58. /package/.claude/{plugins → agents}/product-manager/evals/prd-readiness.md +0 -0
  59. /package/.claude/{plugins → agents}/product-manager/evals/prp-context-readiness.md +0 -0
  60. /package/.claude/{plugins → agents}/product-manager/evals/prp-quality.md +0 -0
  61. /package/.claude/{plugins → agents}/product-manager/guides/examples/good-prd-example.md +0 -0
  62. /package/.claude/{plugins → agents}/product-manager/guides/prd-guidelines.md +0 -0
  63. /package/.claude/{plugins → agents}/product-manager/guides/product-guidelines.md +0 -0
  64. /package/.claude/{plugins → agents}/product-manager/guides/prp-guidelines.md +0 -0
  65. /package/.claude/{plugins → agents}/product-manager/guides/templates/prd.md +0 -0
  66. /package/.claude/{plugins → agents}/product-manager/guides/templates/prp.md +0 -0
  67. /package/.claude/{plugins → agents}/product-manager/guides/writing-style.md +0 -0
  68. /package/.claude/{plugins → agents}/product-manager/sensors/prd-acceptance-criteria.md +0 -0
  69. /package/.claude/{plugins → agents}/product-manager/sensors/prd-structure.md +0 -0
  70. /package/.claude/{plugins → agents}/product-manager/sensors/prp-context-quality.md +0 -0
  71. /package/.claude/{plugins → agents}/product-manager/sensors/prp-structure.md +0 -0
  72. /package/.claude/{plugins → agents}/staff-software-engineer/evals/dev-quality.md +0 -0
  73. /package/.claude/{plugins → agents}/staff-software-engineer/evals/plan-quality.md +0 -0
  74. /package/.claude/{plugins → agents}/staff-software-engineer/evals/pr-quality.md +0 -0
  75. /package/.claude/{plugins → agents}/staff-software-engineer/evals/test-quality.md +0 -0
  76. /package/.claude/{plugins → agents}/staff-software-engineer/guides/coding-style.md +0 -0
  77. /package/.claude/{plugins → agents}/staff-software-engineer/guides/commit-style.md +0 -0
  78. /package/.claude/{plugins → agents}/staff-software-engineer/sensors/code-conventions.md +0 -0
  79. /package/.claude/{plugins → agents}/staff-software-engineer/sensors/plan-structure.md +0 -0
  80. /package/.claude/{plugins → agents}/staff-software-engineer/sensors/test-coverage.md +0 -0
  81. /package/.claude/{plugins → agents}/staff-software-engineer/skills/backend/SKILL.md +0 -0
  82. /package/.claude/{plugins → agents}/staff-software-engineer/skills/devops/SKILL.md +0 -0
  83. /package/.claude/{plugins → agents}/staff-software-engineer/skills/mobile/SKILL.md +0 -0
  84. /package/.claude/{plugins → agents}/staff-software-engineer/skills/web/SKILL.md +0 -0
  85. /package/.claude/{plugins/product-manager/outputs → runtime/outputs/pm}/.markers/.gitkeep +0 -0
  86. /package/.claude/{plugins/staff-software-engineer/outputs → runtime/outputs/sse}/.markers/.gitkeep +0 -0
  87. /package/.claude/{plugins/product-manager/scripts → runtime/scripts/product-manager}/sensor-runner.py +0 -0
package/README.md CHANGED
@@ -2,365 +2,217 @@
2
2
 
3
3
  # harness-kit
4
4
 
5
- Claude Code harness for product + engineering delivery.
6
- From idea to merged PR, one pipeline.
5
+ From idea to merged PR. One pipeline. Six stages.
7
6
 
8
- [![Version](https://img.shields.io/badge/version-3.3.1-blue.svg)](VERSION)
9
- [![Claude Code](https://img.shields.io/badge/Claude%20Code-plugin-8b5cf6.svg)](https://claude.ai/code)
10
- [![Plugins](https://img.shields.io/badge/plugins-2-success.svg)](#layout)
11
- [![Pipeline](https://img.shields.io/badge/stages-6-informational.svg)](#usage)
7
+ [![Version](https://img.shields.io/badge/version-4.0.1-blue.svg)](VERSION)
8
+ [![Claude Code](https://img.shields.io/badge/Claude%20Code-AGENTS.md-8b5cf6.svg)](https://claude.ai/code)
9
+ [![Agents](https://img.shields.io/badge/agents-2-success.svg)](#agents)
12
10
  [![License](https://img.shields.io/badge/license-MIT-lightgrey.svg)](LICENSE)
13
11
 
14
12
  <br/>
15
13
 
16
14
  ![harness-kit demo](demo/preview.gif)
17
15
 
18
- <sub>110s walkthrough · install 6 commands auto-watch PR until merged. Each command scene names its **guide · ref · sensor · eval**; final summary shows token spend per phase.</sub>
16
+ <sub>~2min walkthrough · agents · skills · install · 6 commands · sensors+evals matrix · auto-watch PR until merged.</sub>
19
17
 
20
18
  </div>
21
19
 
22
20
  ---
23
21
 
24
- ## What this is
22
+ ## What it is
25
23
 
26
- harness-kit turns a target repo into a Claude Code workspace where **product and engineering share one pipeline**. You go from a problem statement to a merged PR through six gated stages — `prd → prp → plan → dev → test → pr` — each producing a markdown artifact, each gated by deterministic sensors and an LLM-judged eval, each accounting for its own token spend.
24
+ Two Claude Code agents `product-manager` and `staff-software-engineer` — sharing one pipeline:
27
25
 
28
- The pipeline is two Claude Code plugins (`product-manager`, `staff-software-engineer`) wired together by a small shell+python harness that tracks state, runs gates, and renders a live status bar. After a PR opens, an in-session monitor polls GitHub on backoff until the PR merges, then auto-clears state so the next feature starts clean.
29
-
30
- **Who it's for:** PMs and engineers who want their `/`-commands to (1) produce real artifacts with named gates, (2) survive session restarts via persisted state, (3) record per-phase token spend so usage is auditable.
31
-
32
- **What it's not:** a code generator, a CI replacement, or an opinionated agent framework. It's a thin harness that defers all model work to Claude Code and all VCS work to `git`/`gh`.
33
-
34
- **Internal docs use caveman-full style** (drop articles + filler, fragments OK) to save input tokens — see [CLAUDE.md](CLAUDE.md). Generated artifacts (PRDs, PRPs, plans, dev/test/pr reports) stay natural English for external stakeholders. Reference templates and good-example artifacts are deliberately left in natural prose so produced docs inherit the right pattern.
35
-
36
- ---
26
+ ```
27
+ prd → prp → plan → dev → test → pr
28
+ ```
37
29
 
38
- ## Table of Contents
39
-
40
- - [Getting Started](#getting-started)
41
- - [Install](#install)
42
- - [Update](#update)
43
- - [Usage](#usage)
44
- - [Workflow](#workflow)
45
- - [Samples](#samples)
46
- - [How it works](#how-it-works)
47
- - [Anatomy of a stage](#anatomy-of-a-stage)
48
- - [Sensors + evals matrix](#sensors--evals-matrix)
49
- - [Status bar](#status-bar)
50
- - [Live activity indicator](#live-activity-indicator)
51
- - [PR monitor](#pr-monitor)
52
- - [Token accounting](#token-accounting)
53
- - [Session-start auto-clear](#session-start-auto-clear)
54
- - [Layout](#layout)
55
- - [Project conventions](#project-conventions)
56
- - [Tooling](#tooling)
30
+ Each stage produces a markdown artifact, gated by **deterministic sensors** (pass/fail) and a **scored eval** (≥ 8.0). After the PR opens, an in-session monitor watches for merge.
57
31
 
58
32
  ---
59
33
 
60
- ## Getting Started
61
-
62
- ### Install
34
+ ## Install
63
35
 
64
36
  ```bash
65
37
  npm i -g @pieerry/harness-kit
66
38
  hk install
67
39
  ```
68
40
 
69
- `hk install` writes plugins into `.claude/plugins/`, drops hooks in `.claude/hooks/` (status-line, pipeline tracking, activity tracker), copies state managers to `.claude/scripts/` (`pipeline.py`, `activity.py`, `pr-monitor.py`), registers slash commands under `.claude/commands/`, generates `.claude/settings.json`, and scaffolds `.claude/conventions/` for your project overrides. Run from the target repo or pass `[target]`. Restart Claude Code after.
70
-
71
- Reinstalling on top of an existing setup backs up the previous `settings.json` to `.claude/settings.json.bak.{timestamp}` before overwriting, so manual customizations are recoverable.
72
-
73
- CLI subcommands:
41
+ Restart Claude Code. Done.
74
42
 
75
- | Command | What it does |
76
- |---------|--------------|
77
- | `hk install [target]` | install plugins into target repo (default: cwd) |
78
- | `hk update [target]` | pull latest source and reinstall |
79
- | `hk uninstall [target]` | remove plugins, hooks, settings, agents (keeps `outputs/` and `conventions/`) |
80
- | `hk status [target]` | installed version + active pipeline stage |
81
- | `hk version` | source version |
82
-
83
- No-npm path:
43
+ Without npm:
84
44
 
85
45
  ```bash
86
46
  git clone https://github.com/Pierry/harness-kit ~/.harness-kit
87
47
  bash ~/.harness-kit/setup/install.sh
88
48
  ```
89
49
 
90
- ### Update
50
+ CLI: `hk install` · `hk update` · `hk uninstall` · `hk status` · `hk version`.
91
51
 
92
- For npm installs:
52
+ ---
93
53
 
94
- ```bash
95
- npm i -g @pieerry/harness-kit@latest
96
- hk update
97
- ```
54
+ ## Getting started
98
55
 
99
- For git-clone installs:
56
+ Pick the flow that matches the task. All of them share the same pipeline state, so you can switch between them mid-feature.
100
57
 
101
- ```bash
102
- hk update
103
- ```
58
+ ### Big task — full pipeline (PM + Eng)
104
59
 
105
- `hk update` pulls latest source (git installs only) and reinstalls. Idempotent. Version is read from the package `VERSION` and recorded in your target at `.claude/.hk-version`. npm users must bump the package first — `hk update` alone won't reach the registry.
60
+ A new feature with stakes, ambiguity, or a Jira ticket attached. You want a written PRD, a thought-through PRP, a plan, code, tests, and a PR.
106
61
 
107
- ### Usage
62
+ ```
63
+ /product-manager:run # drafts PRD then PRP, with sensor + eval gates
64
+ /sse:run # plans, implements, tests, opens PR, watches for merge
65
+ ```
108
66
 
109
- | Command | What it does |
110
- |---------|--------------|
111
- | `/product-manager:prd` | Draft a PRD |
112
- | `/product-manager:prp` | Draft a PRP (needs an approved PRD) |
113
- | `/product-manager:run` | Full PM pipeline (PRD then PRP) |
114
- | `/sse:plan` | Generate plan from an approved PRP |
115
- | `/sse:dev` | Implement the plan, run convention + structure + quality gates |
116
- | `/sse:test` | Run the project test suite + write a structured test report |
117
- | `/sse:pr` | Open the draft PR, then auto-arm `pr-monitor` |
118
- | `/sse:pr-monitor` | Watch the active PR for merge with backoff polling (auto-invoked by `/sse:pr`) |
119
- | `/sse:run` | Full SSE pipeline (plan, dev, test, pr) |
120
- | `/pipeline:continue` | Resume the active pipeline at its next pending stage |
121
- | `/pipeline:reset` | Abandon the active pipeline run (clears state, keeps artifacts) |
67
+ Approve each artifact when prompted. The status bar tracks where you are in the six stages.
122
68
 
123
- Pipeline order: `prd prp plan → dev → test → pr`. Each stage gets an approval marker. Approval requires both the sensor gate (pass) and the eval gate (score ≥ 8.0).
69
+ ### Spec only no code yet
124
70
 
125
- You can enter the pipeline at any stage:
71
+ You need the PRD and PRP to align with stakeholders before any engineering work. Stop after the PRP.
126
72
 
127
- - `prd → prp → plan → dev → test → pr` (full PM + SSE)
128
- - `prd → prp` (PM only, hand off to a separate engineering process)
129
- - `plan → dev → test → pr` (SSE only, when discovery already happened elsewhere)
73
+ ```
74
+ /product-manager:run
75
+ ```
130
76
 
131
- Status bar tracks the shape you started with. Close the session and reopen later `/pipeline:continue` picks up at the next pending stage. `/pipeline:reset` clears the run if you decide to abandon it.
77
+ When eng is ready, hand them the repo and they run `/sse:run` against the approved PRP.
132
78
 
133
- ### Workflow
79
+ ### Dev only — small change, plan in your head
134
80
 
135
- PM session in this workspace:
81
+ A bug fix, a small enhancement, or a refactor where writing a PRD would be theatre. Skip PM, run engineering directly.
136
82
 
137
83
  ```
138
- $ /product-manager:run
139
- > squad? billing
140
- > problem? invoice generation fails for multi-currency customers
141
- > ...
142
- PRD saved at outputs/prd/2026-05-12-billing-multi-currency.md.
143
- sensors: prd-structure ok, prd-acceptance-criteria ok
144
- eval: prd-quality 8.6/10, prd-readiness 8.9/10
145
- guides: prd-guidelines.md, writing-style.md, templates/prd.md
146
- refs: business-info.md, squads/billing/context.md
147
- next: /product-manager:prp
148
-
149
- PRP saved at outputs/prp/2026-05-12-billing-multi-currency.md.
150
- sensors: prp-structure ok, prp-context-quality ok, prp-links ok
151
- eval: prp-quality 8.4/10, prp-context-readiness 9.0/10
152
- guides: prp-guidelines.md, templates/prp.md
153
- refs: prd/2026-05-12-billing-multi-currency.md
154
- next: /sse:plan (ready for handoff)
84
+ /sse:run # plan → dev → test → PR
155
85
  ```
156
86
 
157
- Engineering session in the target service repo:
87
+ Or run a single stage if that's all you need:
158
88
 
159
89
  ```
160
- $ /sse:run
161
- > source PRP? outputs/prp/2026-05-12-billing-multi-currency.md
162
- > area? backend
163
- Plan saved at outputs/plan/2026-05-12-billing-multi-currency.md.
164
- sensors: plan-structure ok (problem, files, gates, scope)
165
- eval: plan-quality 8.3/10
166
- guides: pipeline.md, coding-style.md, skills/backend/SKILL.md
167
- refs: prp/..., conventions/backend.md
168
- next: /sse:dev
169
-
170
- Dev complete. branch feat/PROJ-123-multi-currency.
171
- files changed: 5
172
- commits: 3 (a1b2c3d, d4e5f6g, h7i8j9k)
173
- sensors: code-conventions ok, test-coverage ok, dev-structure ok
174
- eval: dev-quality 8.4/10
175
- guides: coding-style.md, commit-style.md, skills/backend/SKILL.md
176
- next: /sse:test
177
-
178
- Tests passed.
179
- command: ./mvnw test
180
- passed: 24, failed: 0
181
- duration: 12.4s
182
- sensors: test-structure ok
183
- eval: test-quality 8.7/10
184
- next: /sse:pr
185
-
186
- PR opened: https://github.com/your-org/billing-service/pull/567
187
- title: feat(PROJ-123): timezone-aware deadline check
188
- draft: yes
189
- sensors: pr-structure ok
190
- eval: pr-quality 8.9/10
191
-
192
- PR monitor armed for #567. First check in 3min, escalates to 30min cap.
90
+ /sse:plan # just the plan
91
+ /sse:dev # just the code (against an approved plan)
92
+ /sse:test # just the tests
93
+ /sse:pr # just open the PR
193
94
  ```
194
95
 
195
- Every reply names the actual sensors that ran, evals with scores, and guides loaded no generic "ok" lines. The `/sse:run` and `/product-manager:run` summaries aggregate the same shape across phases, plus per-phase token totals from `outputs/tokens/{feature_id}.json`.
96
+ ### Resumepick up where you left off
196
97
 
197
- ### Samples
98
+ Closed the session, restarted Claude Code, or got interrupted. State persists at `.claude/.pipeline-state.json`.
198
99
 
199
- Reference artifacts ship inside the plugins:
100
+ ```
101
+ /pipeline:continue # next pending stage for the active feature
102
+ /pipeline:reset # abandon the active run and start fresh
103
+ ```
200
104
 
201
- - [good PRD example](.claude/plugins/product-manager/guides/examples/good-prd-example.md)
202
- - [good PRP example](.claude/plugins/product-manager/guides/examples/good-prp-example.md)
105
+ When the PR merges, the in-session monitor clears state automatically.
203
106
 
204
107
  ---
205
108
 
206
- ## How it works
207
-
208
- ### Anatomy of a stage
209
-
210
- Every stage in the pipeline runs the same loop. Same four ingredients, every time:
211
-
212
- | Ingredient | What it is | Example |
213
- |-----------|------------|---------|
214
- | **guide** | How to write the artifact. Style + structure rules the LLM follows. | `prd-guidelines.md`, `coding-style.md` |
215
- | **ref** | Context pulled in before drafting. Org/squad data + prior artifacts. | `business-info.md`, `outputs/prp/...md`, `.claude/conventions/backend.md` |
216
- | **sensor** | Must-pass structural check. Deterministic, fast. Blocks approval. | `prd-structure`, `prp-links`, `dev-structure`, `test-structure`, `pr-structure`, `code-conventions`, `test-coverage` |
217
- | **eval** | Scored quality rubric. LLM-judge, threshold 8.0, retried until pass or max attempts. | `prd-quality`, `prp-context-readiness`, `plan-quality`, `dev-quality`, `test-quality`, `pr-quality` |
218
-
219
- Sensors are pass/fail. Evals are scored. Approval markers (`<!-- approved: -->`) gate the next stage. Token totals get appended as an inline `<!-- tokens: ... -->` reference after publish.
220
-
221
- ### Sensors + evals matrix
222
-
223
- | Stage | Sensors (deterministic) | Eval (LLM-judge, ≥ 8.0) |
224
- |-------|-------------------------|--------------------------|
225
- | `prd` | `prd-structure`, `prd-acceptance-criteria` | `prd-quality`, `prd-readiness` |
226
- | `prp` | `prp-structure`, `prp-context-quality`, `prp-links` | `prp-quality`, `prp-context-readiness` |
227
- | `plan` | `plan-structure` | `plan-quality` |
228
- | `dev` | `code-conventions`, `test-coverage`, `dev-structure` | `dev-quality` |
229
- | `test` | `test-structure` | `test-quality` |
230
- | `pr` | `pr-structure` | `pr-quality` |
231
-
232
- Document sensors (`*-structure`) are auto-run by the post-write hook when the artifact lands on disk. Code sensors (`code-conventions`, `test-coverage`) are invoked by `/sse:dev` after each commit. Evals are scored by Claude inside the slash command. Convention: sensor files live at `plugins/{plugin}/sensors/{phase}-*.md`; evals at `plugins/{plugin}/evals/{phase}-quality.md`.
233
-
234
- ### Status bar
235
-
236
- The status line follows the active feature through whatever pipeline shape you started. It is dynamic: a `UserPromptSubmit` hook records intent the moment you type a slash command, and `PostToolUse` hooks update state as artifact files land on disk.
109
+ ## Use it
237
110
 
238
111
  ```
239
- idle · /product-manager:run · /sse:run · /pipeline:continue
240
- starting sse-run [plan+dev+test+pr] · plan pending · next /sse:plan
241
- multi-currency [plan+dev+test+pr] · plan drafting · next /sse:plan · sensor: plan-structure
242
- multi-currency [plan+dev+test+pr] · plan approved · dev pending · next /sse:dev
243
- multi-currency [prd+prp+plan+dev+test+pr] · prp approved · plan drafting · next /sse:plan
244
- multi-currency · complete (prd+prp+plan+dev+test+pr)
112
+ /product-manager:run draft PRD then PRP
113
+ /sse:run plan, dev, test, open PR, watch for merge
114
+ /pipeline:continue resume next pending stage
115
+ /pipeline:reset abandon active run
245
116
  ```
246
117
 
247
- The bracketed list is the pipeline shape — the stages this run will execute. The shape is inferred from the slash command you invoked and extended when you chain commands (e.g. running `/sse:run` after `/product-manager:run` appends `plan+dev+test+pr` to the existing `prd+prp`).
118
+ Need just one stage? Each is its own slash command:
248
119
 
249
- State lives at `.claude/.pipeline-state.json`. Close the session and reopen — the `SessionStart` hook prints a one-line resume hint, and `/pipeline:continue` invokes the next pending stage. `/pipeline:reset` clears the file. Output artifacts under `.claude/plugins/*/outputs/` are never deleted by reset.
120
+ | Stage | Command | Gates |
121
+ |---|---|---|
122
+ | `prd` | `/product-manager:prd` | `prd-structure`, `prd-acceptance-criteria` · `prd-quality`, `prd-readiness` |
123
+ | `prp` | `/product-manager:prp` | `prp-structure`, `prp-context-quality`, `prp-links`, `link-validator` · `prp-quality`, `prp-context-readiness` |
124
+ | `plan` | `/sse:plan` | `plan-structure` · `plan-quality` |
125
+ | `dev` | `/sse:dev` | `code-conventions`, `test-coverage`, `dev-structure` · `dev-quality` |
126
+ | `test` | `/sse:test` | `test-structure` · `test-quality` |
127
+ | `pr` | `/sse:pr` | `pr-structure` · `pr-quality` · auto-arms `/sse:pr-monitor` |
250
128
 
251
- ### Live activity indicator
129
+ Sensors block on failure (Claude regenerates). Evals score; threshold 8.0; retried up to 3 times.
252
130
 
253
- While Claude is reading a sensor, eval, or guide, the status bar appends a cyan tag with the file being touched:
254
-
255
- ```
256
- multi-currency [plan+dev+test+pr] · plan drafting · next /sse:plan · sensor: plan-structure
257
- multi-currency [plan+dev+test+pr] · dev drafting · next /sse:dev · guide: coding-style
258
- multi-currency [plan+dev+test+pr] · plan drafting · next /sse:plan · eval: plan-quality
259
- ```
260
-
261
- Mechanism: a `PreToolUse` Read hook (`activity-pre-read.sh`) detects when Claude reads a file under `plugins/*/sensors/`, `plugins/*/evals/`, or `plugins/*/guides/` and writes the activity to `.claude/.activity` with a 60s TTL. The status-line reads it on each render and clears stale entries. The Claude Code top-of-screen "thinking…" indicator is rendered by the CLI itself and cannot be augmented; the bottom status bar is the available channel.
131
+ ---
262
132
 
263
- ### PR monitor
133
+ ## Agents
264
134
 
265
- After `/sse:pr` opens a PR, it auto-invokes `/sse:pr-monitor`, which polls `gh pr view --json state` on backoff and stays in the session until the PR transitions out of `OPEN`.
135
+ Registered in [`AGENTS.md`](./AGENTS.md) at the repo root. Each ships its own sensors, evals, guides, skills.
266
136
 
267
- | Rung | Interval | Attempts | Cumulative |
268
- |------|----------|----------|------------|
269
- | 1 | 3 min | 5 | 15 min |
270
- | 2 | 6 min | 5 | 45 min |
271
- | 3 | 12 min | 5 | 1h 45m |
272
- | 4 | 24 min | 5 | 3h 45m |
273
- | 5 (cap) | 30 min | ∞ | until merged/closed |
137
+ ### `product-manager` turns a problem into an engineering-ready spec
274
138
 
275
- On `MERGED`: notifies and clears both `.pipeline-state.json` and `.pr-monitor-state.json`. On `CLOSED` without merge: stops cleanly. Mechanism: `ScheduleWakeup` in the active session — closing the session ends the monitor.
139
+ - Skills: `prd`, `prp`
140
+ - Sensors: 5 (structure + acceptance criteria + cross-links)
141
+ - Evals: 4 (quality + readiness for each of PRD, PRP)
142
+ - Guides: `pipeline.md`, `prd-guidelines.md`, `prp-guidelines.md`, `writing-style.md`, `templates/`, `examples/`
143
+ - [Full docs →](.claude/agents/product-manager/README.md)
276
144
 
277
- State: `.claude/.pr-monitor-state.json` records PR number, URL, branch, current interval, and attempt counts.
145
+ ### `staff-software-engineer` turns an approved PRP into a merged PR
278
146
 
279
- ### Token accounting
147
+ - Skills: `backend`, `web`, `mobile`, `devops` (auto-detected from repo)
148
+ - Sensors: 6 (`plan-structure`, `code-conventions`, `test-coverage`, `dev-structure`, `test-structure`, `pr-structure`)
149
+ - Evals: 4 (`plan`, `dev`, `test`, `pr` quality)
150
+ - Guides: `pipeline.md`, `coding-style.md`, `commit-style.md`, `conventions-override.md`
151
+ - [Full docs →](.claude/agents/staff-software-engineer/README.md)
280
152
 
281
- Every phase has its own start/end marker written to `outputs/.markers/{feature_id}.{phase}-{generate|validate}.{start|end}`. When the artifact is approved, the post-eval hook runs `scripts/token-phase.py` for both phases — it reads the Claude session transcript JSONL, sums input/output/cache-read/cache-creation tokens within each window, and appends an entry to `outputs/tokens/{feature_id}.json`.
153
+ ---
282
154
 
283
- Schema:
155
+ ## Anatomy of every stage
284
156
 
285
- ```json
286
- {
287
- "feature_id": "2026-05-12-billing-multi-currency",
288
- "files": { "prd": "outputs/prd/...md", "prp": "outputs/prp/...md" },
289
- "phases": [
290
- { "phase": "prd-generate", "started_at": "...", "ended_at": "...", "tokens": { "input": 1234, "output": 567, "cache_read": 8910, "cache_creation": 234 }, "attempts": 1 }
291
- ],
292
- "totals": { "input": 0, "output": 0, "cache_read": 0, "cache_creation": 0 }
293
- }
294
157
  ```
158
+ GUIDE how to write it pipeline.md · coding-style.md
159
+ REF context to pull in AGENTS.md · prp/<feature>.md · conventions/{area}.md
160
+ SENSOR must-pass structure deterministic, blocks approval
161
+ EVAL scored rubric LLM-judge, threshold 8.0
162
+ ```
163
+
164
+ Approval marker (`<!-- approved: -->`) gates the next stage. Token spend per phase appended as inline `<!-- tokens: ... -->`.
295
165
 
296
- Each plugin keeps its own `outputs/tokens/{feature_id}.json`. The artifact gets an inline `<!-- tokens: outputs/tokens/{feature_id}.json in=N out=N cache_r=N -->` reference appended after approval so the totals are visible from the artifact itself. Query examples in the [product-manager README](.claude/plugins/product-manager/README.md#token-accounting).
166
+ ---
297
167
 
298
- ### Session-start auto-clear
168
+ ## Status bar
299
169
 
300
- When you reopen a session on a branch whose PR is `MERGED` or `CLOSED` and the pipeline state still has `feature_id: null` (i.e. the run was never linked to a feature), the `SessionStart` hook auto-clears `.pipeline-state.json` and prints:
170
+ Live indicator at the bottom of every Claude Code session:
301
171
 
302
172
  ```
303
- previous feature shipped (PR #271 MERGED). pipeline state cleared.
304
- start next with /product-manager:run or /sse:run
173
+ idle · /product-manager:run · /sse:run · /pipeline:continue
174
+ billing-fix [prd+prp+plan+dev+test+pr] · prp approved · plan drafting · next /sse:plan · sensor: plan-structure
175
+ billing-fix · complete (prd/prp/plan/dev/test/pr)
305
176
  ```
306
177
 
307
- This avoids the stale `next /sse:plan` nag after work has already shipped.
178
+ State persists at `.claude/.pipeline-state.json`. Close the session and reopen — `/pipeline:continue` picks up at the next pending stage. When the PR merges, state auto-clears.
308
179
 
309
180
  ---
310
181
 
311
- ## Layout
182
+ ## Project conventions
183
+
184
+ The SSE agent has defaults per area. Override per repo:
312
185
 
313
186
  ```
314
- .
315
- ├── .claude/
316
- │ ├── plugins/
317
- │ │ ├── product-manager/ PRD + PRP plugin
318
- │ │ └── staff-software-engineer/ plan, dev, test, pr plugin
319
- │ ├── commands/ slash commands per plugin namespace
320
- │ ├── agents/ Task-tool-invokable orchestrators
321
- │ ├── hooks/
322
- │ │ ├── status-line.sh pipeline status indicator (with cyan activity)
323
- │ │ ├── pipeline-prompt.sh slash-command intent tracking
324
- │ │ ├── pipeline-postwrite.sh stage-state from artifact writes
325
- │ │ ├── pipeline-postedit.sh stage-state from approval marker
326
- │ │ ├── pipeline-session-start.sh resume hint + PR-merged auto-clear
327
- │ │ └── activity-pre-read.sh surfaces current sensor/eval/guide
328
- │ ├── scripts/
329
- │ │ ├── pipeline.py pipeline state CRUD
330
- │ │ ├── activity.py live activity CRUD (60s TTL)
331
- │ │ ├── pr-monitor.py PR-watch state + backoff schedule
332
- │ │ └── stage-card.md header/footer card convention
333
- │ ├── .pipeline-state.json active feature + per-stage state
334
- │ ├── .pr-monitor-state.json PR being watched
335
- │ ├── .activity current sensor/eval/guide being touched
336
- │ └── settings.json hooks wiring + permissions
337
- ├── context-library/ reusable org/squad context
338
- ├── setup/
339
- │ ├── install.sh target-repo installer
340
- │ └── update.sh pull + reinstall
341
- └── VERSION source of truth for installer
187
+ {your-repo}/.claude/conventions/{backend,web,mobile,devops}.md
342
188
  ```
343
189
 
344
- Plugin documentation:
345
-
346
- - [product-manager](.claude/plugins/product-manager/README.md): PRD and PRP generation, sensor and eval gates, retry loop, token accounting, optional Confluence publish.
347
- - [staff-software-engineer](.claude/plugins/staff-software-engineer/README.md): plan, dev, test, pr stages with per-project conventions override, document + code sensors, quality evals, PR monitor.
190
+ Only the area files you need. The agent reads them on top of defaults. See [`conventions-override.md`](.claude/agents/staff-software-engineer/guides/conventions-override.md).
348
191
 
349
192
  ---
350
193
 
351
- ## Project conventions
194
+ ## Layout
352
195
 
353
- Each target repo can override the SSE plugin defaults with its own files:
196
+ What `hk install` lays down in your repo:
354
197
 
355
198
  ```
356
- {repo}/.claude/conventions/
357
- ├── backend.md
358
- ├── web.md
359
- ├── mobile.md
360
- └── devops.md
199
+ {your-repo}/
200
+ ├── AGENTS.md agent registry + routing
201
+ ├── CLAUDE.md workspace style + role
202
+ └── .claude/
203
+ ├── agents/ agent definitions (sensors, evals, guides, skills)
204
+ ├── commands/ slash command entry points
205
+ ├── hooks/ status-line + lifecycle hooks
206
+ ├── scripts/ pipeline.py · activity.py · pr-monitor.py
207
+ ├── runtime/
208
+ │ ├── hooks/<agent>/ per-agent lifecycle (post-write, post-eval, pre-prp-check)
209
+ │ ├── scripts/<agent>/ per-agent utilities (sensor-runner, token-phase, link-validator)
210
+ │ └── outputs/{pm,sse}/ generated artifacts, markers, tokens
211
+ ├── conventions/ your per-repo overrides
212
+ └── settings.json hook wiring
361
213
  ```
362
214
 
363
- The installer scaffolds `.claude/conventions/README.md` to remind you of the contract. Fill only the area files relevant to the repo. Plugin reads them on top of its defaults. See [conventions-override.md](.claude/plugins/staff-software-engineer/guides/conventions-override.md) for the override mechanics and examples.
215
+ Full path-by-path map in [`AGENTS.md`](./AGENTS.md).
364
216
 
365
217
  ---
366
218
 
@@ -368,12 +220,13 @@ The installer scaffolds `.claude/conventions/README.md` to remind you of the con
368
220
 
369
221
  | Tool | Why |
370
222
  |------|-----|
371
- | [Claude Code](https://claude.ai/code) | the agent runtime |
372
- | [git](https://git-scm.com/) | version control + status bar branch detection |
373
- | [python3](https://www.python.org/) | sensor runner, token accounting, pipeline state, activity tracker, PR monitor |
374
- | [gh CLI](https://cli.github.com/) | install, update, opening PRs via `/sse:pr`, polling merge via `/sse:pr-monitor` |
223
+ | [Claude Code](https://claude.ai/code) | agent runtime |
224
+ | python3 | sensors, token accounting, pipeline state |
225
+ | [gh CLI](https://cli.github.com/) | opens PR, polls for merge |
226
+ | git | branch + commit ops |
227
+
228
+ Optional: `jq` for token JSON queries. `JIRA_USERNAME` + `JIRA_API_TOKEN` to publish PRD/PRP to Confluence.
375
229
 
376
- Optional:
230
+ ---
377
231
 
378
- - [jq](https://stedolan.github.io/jq/) for querying the token JSON files
379
- - `JIRA_USERNAME` and `JIRA_API_TOKEN` env vars to enable Confluence publish (details in the [product-manager README](.claude/plugins/product-manager/README.md#confluence-publish))
232
+ MIT. Built on [Claude Code](https://claude.ai/code). Works in any repo Claude Code touches.
package/VERSION CHANGED
@@ -1 +1 @@
1
- 3.3.1
1
+ 4.0.1
package/bin/hk.js CHANGED
@@ -58,13 +58,18 @@ function cmdUninstall(target) {
58
58
  process.exit(1);
59
59
  }
60
60
  const toRemove = [
61
- '.claude/plugins/product-manager',
62
- '.claude/plugins/staff-software-engineer',
61
+ 'AGENTS.md',
62
+ '.claude/agents/product-manager',
63
+ '.claude/agents/staff-software-engineer',
64
+ '.claude/agents/product-manager.md',
65
+ '.claude/agents/staff-software-engineer.md',
63
66
  '.claude/commands/product-manager',
64
67
  '.claude/commands/sse',
65
68
  '.claude/commands/pipeline',
66
- '.claude/agents/product-manager.md',
67
- '.claude/agents/staff-software-engineer.md',
69
+ '.claude/runtime/hooks/product-manager',
70
+ '.claude/runtime/hooks/staff-software-engineer',
71
+ '.claude/runtime/scripts/product-manager',
72
+ '.claude/runtime/scripts/staff-software-engineer',
68
73
  '.claude/hooks/status-line.sh',
69
74
  '.claude/hooks/pipeline-prompt.sh',
70
75
  '.claude/hooks/pipeline-postwrite.sh',
@@ -80,6 +85,9 @@ function cmdUninstall(target) {
80
85
  '.claude/.activity',
81
86
  '.claude/settings.json',
82
87
  '.claude/.hk-version',
88
+ // legacy v3.x layout (best-effort cleanup if still present)
89
+ '.claude/plugins/product-manager',
90
+ '.claude/plugins/staff-software-engineer',
83
91
  ];
84
92
  for (const rel of toRemove) {
85
93
  const p = path.join(target, rel);
@@ -89,7 +97,7 @@ function cmdUninstall(target) {
89
97
  }
90
98
  }
91
99
  console.log(`uninstalled harness-kit v${v} from ${target}`);
92
- console.log(`note: .claude/conventions/ and outputs/ kept. delete manually if desired.`);
100
+ console.log(`note: CLAUDE.md, .claude/conventions/, .claude/runtime/outputs/, and .claude/.legacy-v3-backup/ kept. delete manually if desired.`);
93
101
  }
94
102
 
95
103
  function cmdStatus(target) {
@@ -117,9 +125,9 @@ function cmdHelp() {
117
125
  console.log(`hk - harness-kit CLI (v${pkgVersion()})
118
126
 
119
127
  usage:
120
- hk install [target] install plugins into target repo (default: cwd)
121
- hk update [target] pull latest source and reinstall
122
- hk uninstall [target] remove installed plugins from target
128
+ hk install [target] install harness into target repo (default: cwd)
129
+ hk update [target] pull latest source and reinstall (auto-backs up v3.x plugins/)
130
+ hk uninstall [target] remove installed harness from target
123
131
  hk status [target] show installed version + active pipeline stage
124
132
  hk version source version
125
133
  hk help this message
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pieerry/harness-kit",
3
- "version": "3.3.1",
3
+ "version": "4.0.1",
4
4
  "description": "Claude Code harness for product + engineering delivery. From idea to merged PR, one pipeline.",
5
5
  "author": "Space Metrics AI",
6
6
  "license": "MIT",
@@ -21,7 +21,8 @@
21
21
  "product-management",
22
22
  "delivery",
23
23
  "pipeline",
24
- "plugin"
24
+ "agents-md",
25
+ "subagent"
25
26
  ],
26
27
  "bin": {
27
28
  "hk": "bin/hk.js",
@@ -35,7 +36,8 @@
35
36
  "VERSION",
36
37
  "README.md",
37
38
  "LICENSE",
38
- "CLAUDE.md"
39
+ "CLAUDE.md",
40
+ "AGENTS.md"
39
41
  ],
40
42
  "engines": {
41
43
  "node": ">=14.0.0"