@harness-engineering/cli 1.8.2 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/skills/claude-code/cleanup-dead-code/SKILL.md +3 -3
- package/dist/agents/skills/claude-code/harness-autopilot/SKILL.md +20 -3
- package/dist/agents/skills/claude-code/harness-brainstorming/SKILL.md +55 -5
- package/dist/agents/skills/claude-code/harness-code-review/SKILL.md +36 -15
- package/dist/agents/skills/claude-code/harness-codebase-cleanup/SKILL.md +1 -1
- package/dist/agents/skills/claude-code/harness-execution/SKILL.md +70 -13
- package/dist/agents/skills/claude-code/harness-planning/SKILL.md +41 -3
- package/dist/agents/skills/claude-code/harness-pre-commit-review/SKILL.md +28 -3
- package/dist/agents/skills/claude-code/harness-release-readiness/SKILL.md +14 -2
- package/dist/agents/skills/claude-code/harness-verification/SKILL.md +18 -2
- package/dist/agents/skills/gemini-cli/cleanup-dead-code/SKILL.md +3 -3
- package/dist/agents/skills/gemini-cli/harness-autopilot/SKILL.md +20 -3
- package/dist/agents/skills/gemini-cli/harness-brainstorming/SKILL.md +55 -5
- package/dist/agents/skills/gemini-cli/harness-code-review/SKILL.md +36 -15
- package/dist/agents/skills/gemini-cli/harness-codebase-cleanup/SKILL.md +1 -1
- package/dist/agents/skills/gemini-cli/harness-execution/SKILL.md +70 -13
- package/dist/agents/skills/gemini-cli/harness-planning/SKILL.md +41 -3
- package/dist/agents/skills/gemini-cli/harness-pre-commit-review/SKILL.md +28 -3
- package/dist/agents/skills/gemini-cli/harness-release-readiness/SKILL.md +14 -2
- package/dist/agents/skills/gemini-cli/harness-verification/SKILL.md +18 -2
- package/dist/agents-md-EMRFLNBC.js +8 -0
- package/dist/architecture-5JNN5L3M.js +13 -0
- package/dist/bin/harness-mcp.d.ts +1 -0
- package/dist/bin/harness-mcp.js +28 -0
- package/dist/bin/harness.js +42 -8
- package/dist/check-phase-gate-WOKIYGAM.js +12 -0
- package/dist/chunk-46YA6FI3.js +293 -0
- package/dist/chunk-4PFMY3H7.js +248 -0
- package/dist/{chunk-LB4GRDDV.js → chunk-72GHBOL2.js} +1 -1
- package/dist/chunk-7X7ZAYMY.js +373 -0
- package/dist/chunk-B7HFEHWP.js +35 -0
- package/dist/chunk-BM3PWGXQ.js +14 -0
- package/dist/chunk-C2ERUR3L.js +255 -0
- package/dist/chunk-CWZ4Y2PO.js +189 -0
- package/dist/{chunk-ULSRSP53.js → chunk-ECUJQS3B.js} +11 -112
- package/dist/chunk-EOLRW32Q.js +72 -0
- package/dist/chunk-F3YDAJFQ.js +125 -0
- package/dist/chunk-F4PTVZWA.js +116 -0
- package/dist/chunk-FPIPT36X.js +187 -0
- package/dist/chunk-FX7SQHGD.js +103 -0
- package/dist/chunk-HIOXKZYF.js +15 -0
- package/dist/chunk-IDZNPTYD.js +16 -0
- package/dist/chunk-JSTQ3AWB.js +31 -0
- package/dist/chunk-K6XAPGML.js +27 -0
- package/dist/chunk-KET4QQZB.js +8 -0
- package/dist/chunk-LXU5M77O.js +4028 -0
- package/dist/chunk-MDUK2J2O.js +67 -0
- package/dist/chunk-MHBMTPW7.js +29 -0
- package/dist/chunk-MO4YQOMB.js +85 -0
- package/dist/chunk-NKDM3FMH.js +52 -0
- package/dist/{chunk-SAB3VXOW.js → chunk-NX6DSZSM.js} +144 -111
- package/dist/chunk-OPXH4CQN.js +62 -0
- package/dist/{chunk-Y7U5AYAL.js → chunk-PAHHT2IK.js} +471 -2719
- package/dist/chunk-PMTFPOCT.js +122 -0
- package/dist/chunk-PSXF277V.js +89 -0
- package/dist/chunk-Q6AB7W5Z.js +135 -0
- package/dist/chunk-QPEH2QPG.js +347 -0
- package/dist/chunk-TEFCFC4H.js +15 -0
- package/dist/chunk-TRAPF4IX.js +185 -0
- package/dist/chunk-VUCPTQ6G.js +67 -0
- package/dist/chunk-W6Y7ZW3Y.js +13 -0
- package/dist/chunk-ZOAWBDWU.js +72 -0
- package/dist/ci-workflow-ZBBUNTHQ.js +8 -0
- package/dist/constants-5JGUXPEK.js +6 -0
- package/dist/create-skill-LUWO46WF.js +11 -0
- package/dist/dist-D4RYGUZE.js +14 -0
- package/dist/{dist-K6KTTN3I.js → dist-I7DB5VKB.js} +237 -0
- package/dist/dist-L7LAAQAS.js +18 -0
- package/dist/{dist-ZODQVGC4.js → dist-PBTNVK6K.js} +8 -6
- package/dist/docs-PTJGD6XI.js +12 -0
- package/dist/engine-SCMZ3G3E.js +8 -0
- package/dist/entropy-YIUBGKY7.js +12 -0
- package/dist/feedback-WEVQSLAA.js +18 -0
- package/dist/generate-agent-definitions-BU5LOJTI.js +15 -0
- package/dist/glob-helper-5OHBUQAI.js +52 -0
- package/dist/graph-loader-RLO3KRIX.js +8 -0
- package/dist/index.d.ts +11 -1
- package/dist/index.js +84 -33
- package/dist/loader-6S6PVGSF.js +10 -0
- package/dist/mcp-BNLBTCXZ.js +34 -0
- package/dist/performance-5TVW6SA6.js +24 -0
- package/dist/review-pipeline-4JTQAWKW.js +9 -0
- package/dist/runner-VMYLHWOC.js +6 -0
- package/dist/runtime-PXIM7UV6.js +9 -0
- package/dist/security-URYTKLGK.js +9 -0
- package/dist/skill-executor-KVS47DAU.js +8 -0
- package/dist/validate-KSDUUK2M.js +12 -0
- package/dist/validate-cross-check-WZAX357V.js +8 -0
- package/dist/version-KFFPOQAX.js +6 -0
- package/package.json +7 -5
- package/dist/create-skill-UZOHMXRU.js +0 -8
- package/dist/validate-cross-check-DLNK423G.js +0 -7
|
@@ -95,7 +95,18 @@ INIT → ASSESS → PLAN → APPROVE_PLAN → EXECUTE → VERIFY → REVIEW →
|
|
|
95
95
|
}
|
|
96
96
|
```
|
|
97
97
|
|
|
98
|
-
5. **Load context.**
|
|
98
|
+
5. **Load context via gather_context.** Use the `gather_context` MCP tool to load all working context efficiently:
|
|
99
|
+
|
|
100
|
+
```json
|
|
101
|
+
gather_context({
|
|
102
|
+
path: "<project-root>",
|
|
103
|
+
intent: "Autopilot phase execution for <spec name>",
|
|
104
|
+
skill: "harness-autopilot",
|
|
105
|
+
include: ["state", "learnings", "handoff", "validation"]
|
|
106
|
+
})
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
This loads learnings (including failure entries tagged `[outcome:failure]`), handoff context, state, and validation results in a single call. Note any relevant learnings or known dead ends for the current phase from the returned `learnings` array.
|
|
99
110
|
|
|
100
111
|
6. **Load roadmap context.** If `docs/roadmap.md` exists, read it to understand:
|
|
101
112
|
- Current project priorities (which features are `in-progress`)
|
|
@@ -328,7 +339,9 @@ INIT → ASSESS → PLAN → APPROVE_PLAN → EXECUTE → VERIFY → REVIEW →
|
|
|
328
339
|
|
|
329
340
|
3. **Mark phase as `complete`** in state.
|
|
330
341
|
|
|
331
|
-
4. **
|
|
342
|
+
4. **Sync roadmap.** If `docs/roadmap.md` exists, call `manage_roadmap` with action `sync` and `apply: true`. This reflects the just-completed phase in the roadmap (e.g., updating the feature from `planned` to `in-progress`). If `manage_roadmap` is unavailable, fall back to direct file manipulation using `syncRoadmap()` from core. Skip silently if no roadmap exists. Do not use `force_sync: true` — the human-always-wins rule applies.
|
|
343
|
+
|
|
344
|
+
5. **Check for next phase:**
|
|
332
345
|
- If more phases remain: "Phase {N} complete. Next: Phase {N+1}: {name} (complexity: {level}). Continue? (yes / stop)"
|
|
333
346
|
- **yes** — Increment `currentPhase`, reset `retryBudget`, transition to ASSESS.
|
|
334
347
|
- **stop** — Save state and exit.
|
|
@@ -372,16 +385,20 @@ INIT → ASSESS → PLAN → APPROVE_PLAN → EXECUTE → VERIFY → REVIEW →
|
|
|
372
385
|
- [skill:harness-autopilot] [outcome:observation] {any notable patterns from the run}
|
|
373
386
|
```
|
|
374
387
|
|
|
375
|
-
5. **
|
|
388
|
+
5. **Update roadmap to done.** If `docs/roadmap.md` exists and the current spec maps to a roadmap feature, call `manage_roadmap` with action `update` to set the feature status to `done`. Derive the feature name from the spec title (H1 heading) or the session's `handoff.json` `summary` field. If `manage_roadmap` is unavailable, fall back to direct file manipulation using `updateFeature()` from core. Skip silently if no roadmap exists or if the feature is not found. Do not use `force_sync: true`.
|
|
389
|
+
|
|
390
|
+
6. **Clean up state:** Set `currentState: "DONE"` in `{sessionDir}/autopilot-state.json`. Do not delete the file — it serves as a record.
|
|
376
391
|
|
|
377
392
|
## Harness Integration
|
|
378
393
|
|
|
379
394
|
- **`harness validate`** — Run during INIT to verify project health. Included in every execution task via harness-execution delegation.
|
|
395
|
+
- **`gather_context`** — Used in INIT phase to load learnings, state, handoff, and validation in a single call instead of reading files individually.
|
|
380
396
|
- **`harness check-deps`** — Delegated to harness-execution (included in task steps).
|
|
381
397
|
- **State file** — `.harness/sessions/<slug>/autopilot-state.json` tracks the orchestration state machine. `.harness/sessions/<slug>/state.json` tracks task-level execution state (managed by harness-execution). The slug is derived from the spec path during INIT.
|
|
382
398
|
- **Handoff** — `.harness/sessions/<slug>/handoff.json` is written by each delegated skill and read by the next. Autopilot writes a final handoff on DONE.
|
|
383
399
|
- **Learnings** — `.harness/learnings.md` (global) is appended by both delegated skills and autopilot itself.
|
|
384
400
|
- **Roadmap context** — During INIT, reads `docs/roadmap.md` (if present) for project-level priorities, blockers, and milestone status. Provides broader context for phase execution decisions.
|
|
401
|
+
- **Roadmap sync** — During PHASE_COMPLETE, calls `manage_roadmap` with `sync` and `apply: true` to reflect phase progress. During DONE, calls `manage_roadmap` with `update` to set feature status to `done`. Both skip silently when no roadmap exists. Neither uses `force_sync: true`.
|
|
385
402
|
|
|
386
403
|
## Success Criteria
|
|
387
404
|
|
|
@@ -45,8 +45,35 @@ If you find yourself writing production code, tests, or scaffolding before the h
|
|
|
45
45
|
path: "<project-root>",
|
|
46
46
|
type: "question",
|
|
47
47
|
question: {
|
|
48
|
-
text: "For auth, should we use
|
|
49
|
-
options: [
|
|
48
|
+
text: "For auth, which approach should we use?",
|
|
49
|
+
options: [
|
|
50
|
+
{
|
|
51
|
+
label: "A) Existing JWT middleware",
|
|
52
|
+
pros: ["Already in codebase", "Team has experience"],
|
|
53
|
+
cons: ["No refresh token support", "Session-only"],
|
|
54
|
+
risk: "low",
|
|
55
|
+
effort: "low"
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
label: "B) OAuth2 via provider X",
|
|
59
|
+
pros: ["Industry standard", "Refresh tokens built-in"],
|
|
60
|
+
cons: ["New dependency", "Learning curve"],
|
|
61
|
+
risk: "medium",
|
|
62
|
+
effort: "medium"
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
label: "C) External auth service",
|
|
66
|
+
pros: ["Zero maintenance", "Enterprise features included"],
|
|
67
|
+
cons: ["Vendor lock-in", "Monthly cost", "Latency"],
|
|
68
|
+
risk: "medium",
|
|
69
|
+
effort: "low"
|
|
70
|
+
}
|
|
71
|
+
],
|
|
72
|
+
recommendation: {
|
|
73
|
+
optionIndex: 0,
|
|
74
|
+
reason: "Sufficient for current requirements. OAuth2 adds complexity we don't need yet.",
|
|
75
|
+
confidence: "high"
|
|
76
|
+
}
|
|
50
77
|
}
|
|
51
78
|
})
|
|
52
79
|
```
|
|
@@ -120,14 +147,24 @@ These keywords flow into the `handoff.json` `contextKeywords` field when the spe
|
|
|
120
147
|
type: "confirmation",
|
|
121
148
|
confirmation: {
|
|
122
149
|
text: "Approve spec at <file-path>?",
|
|
123
|
-
context: "<one-paragraph summary of the design>"
|
|
150
|
+
context: "<one-paragraph summary of the design>",
|
|
151
|
+
impact: "Spec approval unlocks implementation planning. No code changes yet.",
|
|
152
|
+
risk: "low"
|
|
124
153
|
}
|
|
125
154
|
})
|
|
126
155
|
```
|
|
127
156
|
|
|
128
157
|
The human must explicitly approve before this skill is complete.
|
|
129
158
|
|
|
130
|
-
6. **
|
|
159
|
+
6. **Add feature to roadmap.** If `docs/roadmap.md` exists:
|
|
160
|
+
- Derive the feature name from the spec title (the H1 heading of the proposal).
|
|
161
|
+
- Call `manage_roadmap` with action `add`, `status: "planned"`, `milestone: "Current Work"`, and the spec path. Include a one-line summary from the spec overview.
|
|
162
|
+
- If the feature already exists in the roadmap (duplicate name), skip silently — the feature was likely added manually or by a prior brainstorming session.
|
|
163
|
+
- Log: `"Added '<feature-name>' to roadmap as planned"` (informational, not a prompt).
|
|
164
|
+
- If `manage_roadmap` is unavailable, fall back to direct file manipulation using `addFeature()` from core.
|
|
165
|
+
- If no roadmap exists, skip this step silently.
|
|
166
|
+
|
|
167
|
+
7. **Write handoff and suggest transition.** After the human approves the spec:
|
|
131
168
|
|
|
132
169
|
Write `.harness/handoff.json`:
|
|
133
170
|
|
|
@@ -153,7 +190,19 @@ These keywords flow into the `handoff.json` `contextKeywords` field when the spe
|
|
|
153
190
|
"reason": "Spec approved and written to docs/",
|
|
154
191
|
"artifacts": ["<spec file path>"],
|
|
155
192
|
"requiresConfirmation": true,
|
|
156
|
-
"summary": "<Spec title> -- <key design choices>. <N> success criteria, <N> implementation phases."
|
|
193
|
+
"summary": "<Spec title> -- <key design choices>. <N> success criteria, <N> implementation phases.",
|
|
194
|
+
"qualityGate": {
|
|
195
|
+
"checks": [
|
|
196
|
+
{
|
|
197
|
+
"name": "spec-written",
|
|
198
|
+
"passed": true,
|
|
199
|
+
"detail": "Written to docs/changes/<feature>/proposal.md"
|
|
200
|
+
},
|
|
201
|
+
{ "name": "harness-validate", "passed": true },
|
|
202
|
+
{ "name": "human-approved", "passed": true }
|
|
203
|
+
],
|
|
204
|
+
"allPassed": true
|
|
205
|
+
}
|
|
157
206
|
}
|
|
158
207
|
}
|
|
159
208
|
```
|
|
@@ -216,6 +265,7 @@ Converge on a recommendation that addresses all concerns before presenting the d
|
|
|
216
265
|
- **`harness check-docs`** — Run to verify the spec does not conflict with existing documentation.
|
|
217
266
|
- **Spec location** — Specs go to `docs/changes/<feature>/proposal.md`. Follow existing naming patterns.
|
|
218
267
|
- **Handoff to harness-planning** — Once the spec is approved, invoke harness-planning to create the implementation plan from the spec.
|
|
268
|
+
- **Roadmap sync** — After spec approval, call `manage_roadmap` with action `add` to register the new feature as `planned` in `docs/roadmap.md`. Skip silently if no roadmap exists. Duplicates are silently ignored.
|
|
219
269
|
- **`emit_interaction`** -- Call at the end of Phase 4 to suggest transitioning to harness-planning. Uses confirmed transition (waits for user approval).
|
|
220
270
|
|
|
221
271
|
#### Requirement Phrasing
|
|
@@ -122,12 +122,15 @@ Run mechanical checks to establish an exclusion boundary. Any issue caught mecha
|
|
|
122
122
|
|
|
123
123
|
**Checks:**
|
|
124
124
|
|
|
125
|
-
1. **Harness validation:**
|
|
126
|
-
```
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
125
|
+
1. **Harness validation:** Use `assess_project` to run all harness health checks in parallel:
|
|
126
|
+
```json
|
|
127
|
+
assess_project({
|
|
128
|
+
path: "<project-root>",
|
|
129
|
+
checks: ["validate", "deps", "docs"],
|
|
130
|
+
mode: "detailed"
|
|
131
|
+
})
|
|
130
132
|
```
|
|
133
|
+
This runs `harness validate`, `harness check-deps`, and `harness check-docs` in parallel and returns a unified report. Any check failure is reported in the `checks` array with `passed: false`.
|
|
131
134
|
2. **Security scan:** Run `run_security_scan` MCP tool on changed files. Record findings with rule ID, file, line, and remediation.
|
|
132
135
|
3. **Type checking:** Run the project's type checker (e.g., `tsc --noEmit`). Record any type errors.
|
|
133
136
|
4. **Linting:** Run the project's linter (e.g., `eslint`). Record any lint violations.
|
|
@@ -202,13 +205,21 @@ Gather context in this order until the ratio is met:
|
|
|
202
205
|
|
|
203
206
|
#### Graph-Enhanced Context (when available)
|
|
204
207
|
|
|
205
|
-
When a knowledge graph exists at `.harness/graph/`, use
|
|
208
|
+
When a knowledge graph exists at `.harness/graph/`, use `gather_context` for efficient context assembly:
|
|
206
209
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
-
|
|
210
|
+
```json
|
|
211
|
+
gather_context({
|
|
212
|
+
path: "<project-root>",
|
|
213
|
+
intent: "Code review of <change description>",
|
|
214
|
+
skill: "harness-code-review",
|
|
215
|
+
tokenBudget: 8000,
|
|
216
|
+
include: ["graph", "learnings", "validation"]
|
|
217
|
+
})
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
This replaces manual `query_graph` + `get_impact` + `find_context_for` calls with a single composite call that assembles review context in parallel, ranked by relevance. Falls back gracefully when no graph is available (`meta.graphAvailable: false`).
|
|
210
221
|
|
|
211
|
-
|
|
222
|
+
For domain-specific scoping (compliance, bug detection, security, architecture), supplement `gather_context` output with targeted `query_graph` calls as needed.
|
|
212
223
|
|
|
213
224
|
#### Context Assembly Commands
|
|
214
225
|
|
|
@@ -493,7 +504,9 @@ emit_interaction({
|
|
|
493
504
|
type: "confirmation",
|
|
494
505
|
confirmation: {
|
|
495
506
|
text: "Review complete: <Assessment>. Accept review?",
|
|
496
|
-
context: "<N critical, N important, N suggestion findings>"
|
|
507
|
+
context: "<N critical, N important, N suggestion findings>",
|
|
508
|
+
impact: "Accepting the review finalizes findings. If 'approve', ready for merge. If 'request-changes', fixes are needed.",
|
|
509
|
+
risk: "<low if approve, high if critical findings>"
|
|
497
510
|
}
|
|
498
511
|
})
|
|
499
512
|
```
|
|
@@ -528,7 +541,16 @@ Call `emit_interaction`:
|
|
|
528
541
|
"reason": "Review approved with no blocking issues",
|
|
529
542
|
"artifacts": ["<reviewed files>"],
|
|
530
543
|
"requiresConfirmation": true,
|
|
531
|
-
"summary": "Review approved. <N> suggestions noted. Ready to create PR or merge."
|
|
544
|
+
"summary": "Review approved. <N> suggestions noted. Ready to create PR or merge.",
|
|
545
|
+
"qualityGate": {
|
|
546
|
+
"checks": [
|
|
547
|
+
{ "name": "mechanical-checks", "passed": true },
|
|
548
|
+
{ "name": "no-critical-findings", "passed": true },
|
|
549
|
+
{ "name": "no-important-findings", "passed": true },
|
|
550
|
+
{ "name": "harness-validate", "passed": true }
|
|
551
|
+
],
|
|
552
|
+
"allPassed": true
|
|
553
|
+
}
|
|
532
554
|
}
|
|
533
555
|
}
|
|
534
556
|
```
|
|
@@ -591,9 +613,8 @@ _This section is not part of the pipeline. It documents the process for respondi
|
|
|
591
613
|
|
|
592
614
|
## Harness Integration
|
|
593
615
|
|
|
594
|
-
- **`
|
|
595
|
-
- **`
|
|
596
|
-
- **`harness check-docs`** — Run in Phase 2 (MECHANICAL). Documentation drift findings are recorded for the exclusion set.
|
|
616
|
+
- **`assess_project`** — Used in Phase 2 (MECHANICAL) to run `validate`, `deps`, and `docs` checks in parallel. Must pass for the pipeline to continue to AI review. Failures are Critical issues that stop the pipeline.
|
|
617
|
+
- **`gather_context`** — Used in Phase 3 (CONTEXT) for efficient parallel context assembly. Replaces separate graph query calls.
|
|
597
618
|
- **`harness cleanup`** — Optional check during Phase 2 for entropy accumulation in changed files.
|
|
598
619
|
- **Graph queries** — Used in Phase 3 (CONTEXT) for dependency-scoped context and in Phase 5 (VALIDATE) for reachability verification. Graceful fallback when no graph exists.
|
|
599
620
|
- **`emit_interaction`** -- Call after review approval to suggest transitioning to merge/PR creation. Only emitted on APPROVE assessment. Uses confirmed transition (waits for user approval).
|
|
@@ -205,7 +205,7 @@ After removing the `legacy-auth` module:
|
|
|
205
205
|
- **`harness cleanup --type dead-code --json`** -- Dead code detection input
|
|
206
206
|
- **`harness check-deps --json`** -- Architecture violation detection input
|
|
207
207
|
- **`harness skill run harness-hotspot-detector`** -- Hotspot context for safety classification
|
|
208
|
-
- **`
|
|
208
|
+
- **`detect_entropy` MCP tool with `autoFix: true`** -- Detects entropy and applies safe fixes via the MCP server
|
|
209
209
|
- **`harness validate`** -- Final validation after all fixes
|
|
210
210
|
- **`harness check-deps`** -- Final architecture check after all fixes
|
|
211
211
|
|
|
@@ -26,20 +26,27 @@ Deviating from the plan mid-execution introduces untested assumptions, breaks ta
|
|
|
26
26
|
|
|
27
27
|
1. **Load the plan.** Read the plan document from `docs/plans/`. Identify the total task count and any checkpoints.
|
|
28
28
|
|
|
29
|
-
2. **
|
|
29
|
+
2. **Gather context in one call.** Use the `gather_context` MCP tool to load all working context at once:
|
|
30
30
|
|
|
31
|
-
|
|
31
|
+
```json
|
|
32
|
+
gather_context({
|
|
33
|
+
path: "<project-root>",
|
|
34
|
+
intent: "Execute plan tasks starting from current position",
|
|
35
|
+
skill: "harness-execution",
|
|
36
|
+
include: ["state", "learnings", "handoff", "validation"]
|
|
37
|
+
})
|
|
38
|
+
```
|
|
32
39
|
|
|
33
|
-
|
|
40
|
+
This returns `state` (current position — if null, this is a fresh start at Task 1), `learnings` (hard-won insights from previous sessions — do not ignore them), `handoff` (structured context from the previous skill), and `validation` (current project health). If any constituent fails, its field is null and the error is reported in `meta.errors`.
|
|
34
41
|
|
|
35
|
-
|
|
42
|
+
3. **Check for known dead ends.** Review `learnings` entries tagged `[outcome:failure]`. If any match approaches in the current plan, surface warnings before proceeding.
|
|
36
43
|
|
|
37
|
-
|
|
44
|
+
4. **Verify prerequisites.** For the current task:
|
|
38
45
|
- Are dependency tasks marked complete in state?
|
|
39
46
|
- Do the files referenced in the task exist as expected?
|
|
40
47
|
- Does the test suite pass? Run `harness validate` to confirm a clean baseline.
|
|
41
48
|
|
|
42
|
-
|
|
49
|
+
5. **If prerequisites fail,** do not proceed. Report what is missing and which task is blocked.
|
|
43
50
|
|
|
44
51
|
### Graph-Enhanced Context (when available)
|
|
45
52
|
|
|
@@ -75,7 +82,17 @@ For each task, starting from the current position:
|
|
|
75
82
|
|
|
76
83
|
4. **Commit atomically.** Each task produces exactly one commit. Use the commit message specified in the plan. If no message is specified, write a descriptive message in the project's convention.
|
|
77
84
|
|
|
78
|
-
5. **Run mechanical gate.** After each task commit, run the full gate check
|
|
85
|
+
5. **Run mechanical gate.** After each task commit, run the full gate check. Use `assess_project` to run harness checks (including lint) in parallel, then run the test suite:
|
|
86
|
+
|
|
87
|
+
```json
|
|
88
|
+
assess_project({
|
|
89
|
+
path: "<project-root>",
|
|
90
|
+
checks: ["validate", "deps", "lint"],
|
|
91
|
+
mode: "summary"
|
|
92
|
+
})
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Then run the project's test suite (`npx turbo run test` or equivalent). This is binary pass/fail.
|
|
79
96
|
- **All pass →** proceed to the next task.
|
|
80
97
|
- **Any fail →** retry with error context (max 2 attempts).
|
|
81
98
|
- **Still failing after retries →** record the failure in `.harness/failures.md`, escalate, and stop.
|
|
@@ -109,7 +126,9 @@ Plans contain three types of checkpoints. Each requires pausing execution.
|
|
|
109
126
|
type: "confirmation",
|
|
110
127
|
confirmation: {
|
|
111
128
|
text: "Task N complete. Output: <summary>. Continue to Task N+1?",
|
|
112
|
-
context: "<test output or file diff summary>"
|
|
129
|
+
context: "<test output or file diff summary>",
|
|
130
|
+
impact: "Continuing proceeds to the next task. Declining pauses execution for review.",
|
|
131
|
+
risk: "low"
|
|
113
132
|
}
|
|
114
133
|
})
|
|
115
134
|
```
|
|
@@ -125,7 +144,27 @@ Plans contain three types of checkpoints. Each requires pausing execution.
|
|
|
125
144
|
type: "question",
|
|
126
145
|
question: {
|
|
127
146
|
text: "Task N requires a decision: <description>",
|
|
128
|
-
options: [
|
|
147
|
+
options: [
|
|
148
|
+
{
|
|
149
|
+
label: "<option A>",
|
|
150
|
+
pros: ["<pro 1>", "<pro 2>"],
|
|
151
|
+
cons: ["<con 1>"],
|
|
152
|
+
risk: "low",
|
|
153
|
+
effort: "low"
|
|
154
|
+
},
|
|
155
|
+
{
|
|
156
|
+
label: "<option B>",
|
|
157
|
+
pros: ["<pro 1>"],
|
|
158
|
+
cons: ["<con 1>", "<con 2>"],
|
|
159
|
+
risk: "medium",
|
|
160
|
+
effort: "medium"
|
|
161
|
+
}
|
|
162
|
+
],
|
|
163
|
+
recommendation: {
|
|
164
|
+
optionIndex: 0,
|
|
165
|
+
reason: "<why this option is recommended>",
|
|
166
|
+
confidence: "medium"
|
|
167
|
+
}
|
|
129
168
|
}
|
|
130
169
|
})
|
|
131
170
|
```
|
|
@@ -162,7 +201,15 @@ emit_interaction({
|
|
|
162
201
|
completedPhase: "execution",
|
|
163
202
|
suggestedNext: "verification",
|
|
164
203
|
reason: "All plan tasks executed and verified",
|
|
165
|
-
artifacts: ["<list of created/modified files>"]
|
|
204
|
+
artifacts: ["<list of created/modified files>"],
|
|
205
|
+
qualityGate: {
|
|
206
|
+
checks: [
|
|
207
|
+
{ name: "all-tasks-complete", passed: true, detail: "<N>/<N> tasks" },
|
|
208
|
+
{ name: "harness-validate", passed: true },
|
|
209
|
+
{ name: "tests-pass", passed: true }
|
|
210
|
+
],
|
|
211
|
+
allPassed: true
|
|
212
|
+
}
|
|
166
213
|
}
|
|
167
214
|
})
|
|
168
215
|
```
|
|
@@ -219,7 +266,7 @@ Skipping this step means subsequent graph queries (impact analysis, dependency h
|
|
|
219
266
|
}
|
|
220
267
|
```
|
|
221
268
|
|
|
222
|
-
5. **Sync roadmap (
|
|
269
|
+
5. **Sync roadmap (mandatory when present).** If `docs/roadmap.md` exists, call `manage_roadmap` with action `sync` and `apply: true` to update linked feature statuses from the just-completed execution state. Do not use `force_sync: true` — the human-always-wins rule applies. If `manage_roadmap` is unavailable, fall back to direct file manipulation using `syncRoadmap()` from core. If no roadmap exists, skip silently.
|
|
223
270
|
|
|
224
271
|
6. **Learnings are append-only.** Never edit or delete previous learnings. They are a chronological record.
|
|
225
272
|
|
|
@@ -236,7 +283,16 @@ Skipping this step means subsequent graph queries (impact analysis, dependency h
|
|
|
236
283
|
"reason": "All tasks complete",
|
|
237
284
|
"artifacts": ["<list of created/modified files>"],
|
|
238
285
|
"requiresConfirmation": false,
|
|
239
|
-
"summary": "Completed <N> tasks. <N> files created, <N> modified. All quick gates passed."
|
|
286
|
+
"summary": "Completed <N> tasks. <N> files created, <N> modified. All quick gates passed.",
|
|
287
|
+
"qualityGate": {
|
|
288
|
+
"checks": [
|
|
289
|
+
{ "name": "all-tasks-complete", "passed": true, "detail": "<N>/<N> tasks" },
|
|
290
|
+
{ "name": "harness-validate", "passed": true },
|
|
291
|
+
{ "name": "tests-pass", "passed": true },
|
|
292
|
+
{ "name": "no-blockers", "passed": true }
|
|
293
|
+
],
|
|
294
|
+
"allPassed": true
|
|
295
|
+
}
|
|
240
296
|
}
|
|
241
297
|
}
|
|
242
298
|
```
|
|
@@ -265,12 +321,13 @@ These are non-negotiable. When any condition is met, stop immediately.
|
|
|
265
321
|
## Harness Integration
|
|
266
322
|
|
|
267
323
|
- **`harness validate`** — Run after every task completion. Mandatory. No task is complete without a passing validation.
|
|
324
|
+
- **`gather_context`** — Used in PREPARE phase to load state, learnings, handoff, and validation in a single call instead of 4+ separate reads.
|
|
268
325
|
- **`harness check-deps`** — Run when tasks add new imports or modules. Catches boundary violations early.
|
|
269
326
|
- **`harness state show`** — View current execution position and progress.
|
|
270
327
|
- **`harness state learn "<message>"`** — Append a learning from the command line.
|
|
271
328
|
- **`.harness/state.json`** — Read at session start to resume position. Updated after every task.
|
|
272
329
|
- **`.harness/learnings.md`** — Append-only knowledge capture. Read at session start for prior context.
|
|
273
|
-
- **Roadmap sync** — After completing plan execution,
|
|
330
|
+
- **Roadmap sync** — After completing plan execution, call `manage_roadmap` with action `sync` and `apply: true` to update roadmap status. Mandatory when `docs/roadmap.md` exists. Do not use `force_sync: true`. Falls back to `syncRoadmap()` from core if MCP tool is unavailable.
|
|
274
331
|
- **`emit_interaction`** -- Call at plan completion to auto-transition to harness-verification. Uses auto-transition (proceeds immediately without user confirmation).
|
|
275
332
|
|
|
276
333
|
## Success Criteria
|
|
@@ -46,7 +46,34 @@ Work backward from the goal. Do not start with "what should we build?" Start wit
|
|
|
46
46
|
type: "question",
|
|
47
47
|
question: {
|
|
48
48
|
text: "The spec mentions X but does not define behavior for Y. Should we:",
|
|
49
|
-
options: [
|
|
49
|
+
options: [
|
|
50
|
+
{
|
|
51
|
+
label: "A) Include Y in this plan",
|
|
52
|
+
pros: ["Complete feature in one pass", "No follow-up coordination needed"],
|
|
53
|
+
cons: ["Increases plan scope and time", "May delay delivery"],
|
|
54
|
+
risk: "medium",
|
|
55
|
+
effort: "high"
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
label: "B) Defer Y to a follow-up plan",
|
|
59
|
+
pros: ["Keeps current plan focused", "Ship sooner"],
|
|
60
|
+
cons: ["Y remains unhandled", "May need rework when Y is added"],
|
|
61
|
+
risk: "low",
|
|
62
|
+
effort: "low"
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
label: "C) Update the spec first",
|
|
66
|
+
pros: ["Design is complete before planning", "No surprises during execution"],
|
|
67
|
+
cons: ["Blocks planning until spec is updated", "Extra round-trip"],
|
|
68
|
+
risk: "low",
|
|
69
|
+
effort: "medium"
|
|
70
|
+
}
|
|
71
|
+
],
|
|
72
|
+
recommendation: {
|
|
73
|
+
optionIndex: 1,
|
|
74
|
+
reason: "Keeping the current plan focused reduces risk. Y can be addressed in a dedicated follow-up.",
|
|
75
|
+
confidence: "medium"
|
|
76
|
+
}
|
|
50
77
|
}
|
|
51
78
|
})
|
|
52
79
|
```
|
|
@@ -174,7 +201,9 @@ When presenting the task breakdown, use progress markers:
|
|
|
174
201
|
type: "confirmation",
|
|
175
202
|
confirmation: {
|
|
176
203
|
text: "Approve plan at <plan-file-path>?",
|
|
177
|
-
context: "<task count> tasks, <estimated time> minutes. <one-sentence summary>"
|
|
204
|
+
context: "<task count> tasks, <estimated time> minutes. <one-sentence summary>",
|
|
205
|
+
impact: "Approving unlocks task-by-task execution. Plan defines exact file paths, code, and commands.",
|
|
206
|
+
risk: "low"
|
|
178
207
|
}
|
|
179
208
|
})
|
|
180
209
|
```
|
|
@@ -192,7 +221,16 @@ When presenting the task breakdown, use progress markers:
|
|
|
192
221
|
"reason": "Plan approved with all tasks defined",
|
|
193
222
|
"artifacts": ["<plan file path>"],
|
|
194
223
|
"requiresConfirmation": true,
|
|
195
|
-
"summary": "<Plan title> -- <N> tasks, <N> checkpoints. Estimated <time>."
|
|
224
|
+
"summary": "<Plan title> -- <N> tasks, <N> checkpoints. Estimated <time>.",
|
|
225
|
+
"qualityGate": {
|
|
226
|
+
"checks": [
|
|
227
|
+
{ "name": "plan-written", "passed": true, "detail": "Written to docs/plans/" },
|
|
228
|
+
{ "name": "harness-validate", "passed": true },
|
|
229
|
+
{ "name": "observable-truths-traced", "passed": true },
|
|
230
|
+
{ "name": "human-approved", "passed": true }
|
|
231
|
+
],
|
|
232
|
+
"allPassed": true
|
|
233
|
+
}
|
|
196
234
|
}
|
|
197
235
|
}
|
|
198
236
|
```
|
|
@@ -43,6 +43,20 @@ pnpm typecheck 2>&1 || npx tsc --noEmit 2>&1 || make typecheck 2>&1
|
|
|
43
43
|
pnpm test 2>&1 || npm test 2>&1 || make test 2>&1
|
|
44
44
|
```
|
|
45
45
|
|
|
46
|
+
#### 2b. Harness Health Check
|
|
47
|
+
|
|
48
|
+
If the project uses harness, run `assess_project` for harness-specific validation:
|
|
49
|
+
|
|
50
|
+
```json
|
|
51
|
+
assess_project({
|
|
52
|
+
path: "<project-root>",
|
|
53
|
+
checks: ["validate", "deps"],
|
|
54
|
+
mode: "summary"
|
|
55
|
+
})
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
If `healthy: false`, include harness check failures in the mechanical check report. This replaces manually running `harness validate` and `harness check-deps` as separate commands.
|
|
59
|
+
|
|
46
60
|
#### 3. Gate Decision
|
|
47
61
|
|
|
48
62
|
- **Any check fails:** STOP. Report the failure. Do not proceed to AI review. The author must fix mechanical issues first.
|
|
@@ -133,12 +147,21 @@ If no source files are staged, skip the security scan.
|
|
|
133
147
|
|
|
134
148
|
Perform a focused, lightweight review of staged changes. This is NOT a full code review — it catches obvious issues only.
|
|
135
149
|
|
|
136
|
-
#### 1.
|
|
150
|
+
#### 1. Quick Review via review_changes
|
|
137
151
|
|
|
138
|
-
|
|
139
|
-
|
|
152
|
+
Use the `review_changes` MCP tool with `depth: 'quick'` for fast pre-commit analysis:
|
|
153
|
+
|
|
154
|
+
```json
|
|
155
|
+
review_changes({
|
|
156
|
+
path: "<project-root>",
|
|
157
|
+
diff: "<output of git diff --cached>",
|
|
158
|
+
depth: "quick",
|
|
159
|
+
mode: "summary"
|
|
160
|
+
})
|
|
140
161
|
```
|
|
141
162
|
|
|
163
|
+
This runs forbidden pattern checks and size analysis. For the semantic review items below, supplement with manual diff reading.
|
|
164
|
+
|
|
142
165
|
#### 2. Quick Review Checklist
|
|
143
166
|
|
|
144
167
|
Review the staged diff for these high-signal issues only:
|
|
@@ -219,6 +242,8 @@ fi
|
|
|
219
242
|
- Follows Principle 7 (Deterministic-vs-LLM Split) — mechanical checks first, AI review second
|
|
220
243
|
- Reads `.harness/review-learnings.md` for calibration (if present)
|
|
221
244
|
- Complements harness-code-review (full review) — use pre-commit for quick checks, code-review for thorough analysis
|
|
245
|
+
- **`assess_project`** — Used in Phase 1 for harness-specific health checks (validate + deps) in a single call.
|
|
246
|
+
- **`review_changes`** — Used in Phase 4 with `depth: 'quick'` for fast pre-commit diff analysis.
|
|
222
247
|
|
|
223
248
|
## Success Criteria
|
|
224
249
|
|
|
@@ -111,7 +111,19 @@ Run every check below. Record each as **pass**, **warn**, or **fail**:
|
|
|
111
111
|
| `test` script exists in root `package.json` | fail |
|
|
112
112
|
| `lint` script exists in root `package.json` | fail |
|
|
113
113
|
| `typecheck` or `tsc` script exists in root `package.json` | fail |
|
|
114
|
-
| `
|
|
114
|
+
| `assess_project` passes (harness health + lint gate) | fail |
|
|
115
|
+
|
|
116
|
+
For the `assess_project` check, run it with all harness-specific checks including lint:
|
|
117
|
+
|
|
118
|
+
```json
|
|
119
|
+
assess_project({
|
|
120
|
+
path: "<project-root>",
|
|
121
|
+
checks: ["validate", "deps", "docs", "lint"],
|
|
122
|
+
mode: "detailed"
|
|
123
|
+
})
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
If `healthy: false`, each failing check in the `checks` array maps to a separate finding with its `topIssue`. This replaces running `harness validate`, `harness check-deps`, and lint as separate commands.
|
|
115
127
|
|
|
116
128
|
##### i18n Coverage (conditional)
|
|
117
129
|
|
|
@@ -507,7 +519,7 @@ This framing is informational — it does not block anything. It gives the team
|
|
|
507
519
|
|
|
508
520
|
## Harness Integration
|
|
509
521
|
|
|
510
|
-
- **`
|
|
522
|
+
- **`assess_project`** — Used in AUDIT Phase 1 (CI/CD section) to run harness validation, dependency checks, doc coverage, and lint in a single parallel call. Also run after auto-fixes in Phase 3 to verify project health. Automatically inherits new checks added to `assess_project`.
|
|
511
523
|
- **Sub-skill invocations** — Phase 2 dispatches `detect-doc-drift`, `cleanup-dead-code`, `enforce-architecture`, and `diagnostics` as parallel agents. Phase 3 delegates fixes to `align-documentation` and `cleanup-dead-code`.
|
|
512
524
|
- **State file** — `.harness/release-readiness.json` enables session resumption and progress tracking. This file is read at the start of each invocation and written at the end.
|
|
513
525
|
- **Report file** — `release-readiness-report.md` is written to the project root. It is a snapshot, not a tracked artifact — regenerate it on each run.
|
|
@@ -177,7 +177,9 @@ emit_interaction({
|
|
|
177
177
|
type: "confirmation",
|
|
178
178
|
confirmation: {
|
|
179
179
|
text: "Verification report: <VERDICT>. Accept and proceed?",
|
|
180
|
-
context: "<summary: N artifacts checked, N gaps found>"
|
|
180
|
+
context: "<summary: N artifacts checked, N gaps found>",
|
|
181
|
+
impact: "Accepting proceeds to code review. Declining requires gap resolution first.",
|
|
182
|
+
risk: "<low if PASS, high if gaps remain>"
|
|
181
183
|
}
|
|
182
184
|
})
|
|
183
185
|
```
|
|
@@ -212,7 +214,21 @@ Call `emit_interaction`:
|
|
|
212
214
|
"reason": "Verification passed at all 3 levels",
|
|
213
215
|
"artifacts": ["<verified file paths>"],
|
|
214
216
|
"requiresConfirmation": false,
|
|
215
|
-
"summary": "Verification passed: <N> artifacts checked. EXISTS, SUBSTANTIVE, WIRED all passed."
|
|
217
|
+
"summary": "Verification passed: <N> artifacts checked. EXISTS, SUBSTANTIVE, WIRED all passed.",
|
|
218
|
+
"qualityGate": {
|
|
219
|
+
"checks": [
|
|
220
|
+
{ "name": "level1-exists", "passed": true, "detail": "<N> artifacts present" },
|
|
221
|
+
{ "name": "level2-substantive", "passed": true, "detail": "No stubs or placeholders" },
|
|
222
|
+
{
|
|
223
|
+
"name": "level3-wired",
|
|
224
|
+
"passed": true,
|
|
225
|
+
"detail": "All artifacts imported, tested, integrated"
|
|
226
|
+
},
|
|
227
|
+
{ "name": "anti-pattern-scan", "passed": true, "detail": "No matches" },
|
|
228
|
+
{ "name": "harness-validate", "passed": true }
|
|
229
|
+
],
|
|
230
|
+
"allPassed": true
|
|
231
|
+
}
|
|
216
232
|
}
|
|
217
233
|
}
|
|
218
234
|
```
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import {
|
|
2
|
+
checkDependenciesDefinition,
|
|
3
|
+
handleCheckDependencies
|
|
4
|
+
} from "./chunk-MO4YQOMB.js";
|
|
5
|
+
import "./chunk-K6XAPGML.js";
|
|
6
|
+
import "./chunk-IDZNPTYD.js";
|
|
7
|
+
import "./chunk-W6Y7ZW3Y.js";
|
|
8
|
+
import "./chunk-NX6DSZSM.js";
|
|
9
|
+
import "./chunk-MHBMTPW7.js";
|
|
10
|
+
export {
|
|
11
|
+
checkDependenciesDefinition,
|
|
12
|
+
handleCheckDependencies
|
|
13
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
startServer
|
|
4
|
+
} from "../chunk-LXU5M77O.js";
|
|
5
|
+
import "../chunk-CWZ4Y2PO.js";
|
|
6
|
+
import "../chunk-FPIPT36X.js";
|
|
7
|
+
import "../chunk-4PFMY3H7.js";
|
|
8
|
+
import "../chunk-PSXF277V.js";
|
|
9
|
+
import "../chunk-PMTFPOCT.js";
|
|
10
|
+
import "../chunk-ZOAWBDWU.js";
|
|
11
|
+
import "../chunk-FX7SQHGD.js";
|
|
12
|
+
import "../chunk-OPXH4CQN.js";
|
|
13
|
+
import "../chunk-MO4YQOMB.js";
|
|
14
|
+
import "../chunk-K6XAPGML.js";
|
|
15
|
+
import "../chunk-F4PTVZWA.js";
|
|
16
|
+
import "../chunk-IDZNPTYD.js";
|
|
17
|
+
import "../chunk-W6Y7ZW3Y.js";
|
|
18
|
+
import "../chunk-EOLRW32Q.js";
|
|
19
|
+
import "../chunk-B7HFEHWP.js";
|
|
20
|
+
import "../chunk-MDUK2J2O.js";
|
|
21
|
+
import "../chunk-NX6DSZSM.js";
|
|
22
|
+
import "../chunk-MHBMTPW7.js";
|
|
23
|
+
|
|
24
|
+
// src/bin/harness-mcp.ts
|
|
25
|
+
startServer().catch((error) => {
|
|
26
|
+
console.error("Failed to start MCP server:", error);
|
|
27
|
+
process.exit(1);
|
|
28
|
+
});
|