@harness-engineering/cli 1.8.2 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/dist/agents/skills/claude-code/cleanup-dead-code/SKILL.md +3 -3
  2. package/dist/agents/skills/claude-code/harness-autopilot/SKILL.md +20 -3
  3. package/dist/agents/skills/claude-code/harness-brainstorming/SKILL.md +55 -5
  4. package/dist/agents/skills/claude-code/harness-code-review/SKILL.md +36 -15
  5. package/dist/agents/skills/claude-code/harness-codebase-cleanup/SKILL.md +1 -1
  6. package/dist/agents/skills/claude-code/harness-execution/SKILL.md +70 -13
  7. package/dist/agents/skills/claude-code/harness-planning/SKILL.md +41 -3
  8. package/dist/agents/skills/claude-code/harness-pre-commit-review/SKILL.md +28 -3
  9. package/dist/agents/skills/claude-code/harness-release-readiness/SKILL.md +14 -2
  10. package/dist/agents/skills/claude-code/harness-verification/SKILL.md +18 -2
  11. package/dist/agents/skills/gemini-cli/cleanup-dead-code/SKILL.md +3 -3
  12. package/dist/agents/skills/gemini-cli/harness-autopilot/SKILL.md +20 -3
  13. package/dist/agents/skills/gemini-cli/harness-brainstorming/SKILL.md +55 -5
  14. package/dist/agents/skills/gemini-cli/harness-code-review/SKILL.md +36 -15
  15. package/dist/agents/skills/gemini-cli/harness-codebase-cleanup/SKILL.md +1 -1
  16. package/dist/agents/skills/gemini-cli/harness-execution/SKILL.md +70 -13
  17. package/dist/agents/skills/gemini-cli/harness-planning/SKILL.md +41 -3
  18. package/dist/agents/skills/gemini-cli/harness-pre-commit-review/SKILL.md +28 -3
  19. package/dist/agents/skills/gemini-cli/harness-release-readiness/SKILL.md +14 -2
  20. package/dist/agents/skills/gemini-cli/harness-verification/SKILL.md +18 -2
  21. package/dist/agents-md-EMRFLNBC.js +8 -0
  22. package/dist/architecture-5JNN5L3M.js +13 -0
  23. package/dist/bin/harness-mcp.d.ts +1 -0
  24. package/dist/bin/harness-mcp.js +28 -0
  25. package/dist/bin/harness.js +42 -8
  26. package/dist/check-phase-gate-WOKIYGAM.js +12 -0
  27. package/dist/chunk-46YA6FI3.js +293 -0
  28. package/dist/chunk-4PFMY3H7.js +248 -0
  29. package/dist/{chunk-LB4GRDDV.js → chunk-72GHBOL2.js} +1 -1
  30. package/dist/chunk-7X7ZAYMY.js +373 -0
  31. package/dist/chunk-B7HFEHWP.js +35 -0
  32. package/dist/chunk-BM3PWGXQ.js +14 -0
  33. package/dist/chunk-C2ERUR3L.js +255 -0
  34. package/dist/chunk-CWZ4Y2PO.js +189 -0
  35. package/dist/{chunk-ULSRSP53.js → chunk-ECUJQS3B.js} +11 -112
  36. package/dist/chunk-EOLRW32Q.js +72 -0
  37. package/dist/chunk-F3YDAJFQ.js +125 -0
  38. package/dist/chunk-F4PTVZWA.js +116 -0
  39. package/dist/chunk-FPIPT36X.js +187 -0
  40. package/dist/chunk-FX7SQHGD.js +103 -0
  41. package/dist/chunk-HIOXKZYF.js +15 -0
  42. package/dist/chunk-IDZNPTYD.js +16 -0
  43. package/dist/chunk-JSTQ3AWB.js +31 -0
  44. package/dist/chunk-K6XAPGML.js +27 -0
  45. package/dist/chunk-KET4QQZB.js +8 -0
  46. package/dist/chunk-LXU5M77O.js +4028 -0
  47. package/dist/chunk-MDUK2J2O.js +67 -0
  48. package/dist/chunk-MHBMTPW7.js +29 -0
  49. package/dist/chunk-MO4YQOMB.js +85 -0
  50. package/dist/chunk-NKDM3FMH.js +52 -0
  51. package/dist/{chunk-SAB3VXOW.js → chunk-NX6DSZSM.js} +144 -111
  52. package/dist/chunk-OPXH4CQN.js +62 -0
  53. package/dist/{chunk-Y7U5AYAL.js → chunk-PAHHT2IK.js} +471 -2719
  54. package/dist/chunk-PMTFPOCT.js +122 -0
  55. package/dist/chunk-PSXF277V.js +89 -0
  56. package/dist/chunk-Q6AB7W5Z.js +135 -0
  57. package/dist/chunk-QPEH2QPG.js +347 -0
  58. package/dist/chunk-TEFCFC4H.js +15 -0
  59. package/dist/chunk-TRAPF4IX.js +185 -0
  60. package/dist/chunk-VUCPTQ6G.js +67 -0
  61. package/dist/chunk-W6Y7ZW3Y.js +13 -0
  62. package/dist/chunk-ZOAWBDWU.js +72 -0
  63. package/dist/ci-workflow-ZBBUNTHQ.js +8 -0
  64. package/dist/constants-5JGUXPEK.js +6 -0
  65. package/dist/create-skill-LUWO46WF.js +11 -0
  66. package/dist/dist-D4RYGUZE.js +14 -0
  67. package/dist/{dist-K6KTTN3I.js → dist-I7DB5VKB.js} +237 -0
  68. package/dist/dist-L7LAAQAS.js +18 -0
  69. package/dist/{dist-ZODQVGC4.js → dist-PBTNVK6K.js} +8 -6
  70. package/dist/docs-PTJGD6XI.js +12 -0
  71. package/dist/engine-SCMZ3G3E.js +8 -0
  72. package/dist/entropy-YIUBGKY7.js +12 -0
  73. package/dist/feedback-WEVQSLAA.js +18 -0
  74. package/dist/generate-agent-definitions-BU5LOJTI.js +15 -0
  75. package/dist/glob-helper-5OHBUQAI.js +52 -0
  76. package/dist/graph-loader-RLO3KRIX.js +8 -0
  77. package/dist/index.d.ts +11 -1
  78. package/dist/index.js +84 -33
  79. package/dist/loader-6S6PVGSF.js +10 -0
  80. package/dist/mcp-BNLBTCXZ.js +34 -0
  81. package/dist/performance-5TVW6SA6.js +24 -0
  82. package/dist/review-pipeline-4JTQAWKW.js +9 -0
  83. package/dist/runner-VMYLHWOC.js +6 -0
  84. package/dist/runtime-PXIM7UV6.js +9 -0
  85. package/dist/security-URYTKLGK.js +9 -0
  86. package/dist/skill-executor-KVS47DAU.js +8 -0
  87. package/dist/validate-KSDUUK2M.js +12 -0
  88. package/dist/validate-cross-check-WZAX357V.js +8 -0
  89. package/dist/version-KFFPOQAX.js +6 -0
  90. package/package.json +7 -5
  91. package/dist/create-skill-UZOHMXRU.js +0 -8
  92. package/dist/validate-cross-check-DLNK423G.js +0 -7
@@ -74,9 +74,9 @@ For each item categorized as safe:
74
74
 
75
75
  **New fix types:**
76
76
 
77
- - **Dead exports (non-public):** Use `apply_fixes` with `fixTypes: ['dead-exports']`. The tool removes the `export` keyword. If the function/class has zero internal callers too, delete the entire declaration.
78
- - **Commented-out code:** Use `apply_fixes` with `fixTypes: ['commented-code']`. The tool deletes commented-out code blocks. This is cosmetic and only needs lint verification.
79
- - **Orphaned dependencies:** Use `apply_fixes` with `fixTypes: ['orphaned-deps']`. The tool removes the dep from package.json. **Must run `pnpm install && pnpm test` after** to verify nothing breaks.
77
+ - **Dead exports (non-public):** Use `detect_entropy` with `autoFix: true, fixTypes: ['dead-exports']`. The tool removes the `export` keyword. If the function/class has zero internal callers too, delete the entire declaration.
78
+ - **Commented-out code:** Use `detect_entropy` with `autoFix: true, fixTypes: ['commented-code']`. The tool deletes commented-out code blocks. This is cosmetic and only needs lint verification.
79
+ - **Orphaned dependencies:** Use `detect_entropy` with `autoFix: true, fixTypes: ['orphaned-deps']`. The tool removes the dep from package.json. **Must run `pnpm install && pnpm test` after** to verify nothing breaks.
80
80
 
81
81
  ### Phase 3.5: Convergence Loop (Standalone)
82
82
 
@@ -95,7 +95,18 @@ INIT → ASSESS → PLAN → APPROVE_PLAN → EXECUTE → VERIFY → REVIEW →
95
95
  }
96
96
  ```
97
97
 
98
- 5. **Load context.** Read `.harness/learnings.md` and `.harness/failures.md` (global, at `.harness/` root) if they exist. Note any relevant learnings or known dead ends for the current phase.
98
+ 5. **Load context via gather_context.** Use the `gather_context` MCP tool to load all working context efficiently:
99
+
100
+ ```json
101
+ gather_context({
102
+ path: "<project-root>",
103
+ intent: "Autopilot phase execution for <spec name>",
104
+ skill: "harness-autopilot",
105
+ include: ["state", "learnings", "handoff", "validation"]
106
+ })
107
+ ```
108
+
109
+ This loads learnings (including failure entries tagged `[outcome:failure]`), handoff context, state, and validation results in a single call. Note any relevant learnings or known dead ends for the current phase from the returned `learnings` array.
99
110
 
100
111
  6. **Load roadmap context.** If `docs/roadmap.md` exists, read it to understand:
101
112
  - Current project priorities (which features are `in-progress`)
@@ -328,7 +339,9 @@ INIT → ASSESS → PLAN → APPROVE_PLAN → EXECUTE → VERIFY → REVIEW →
328
339
 
329
340
  3. **Mark phase as `complete`** in state.
330
341
 
331
- 4. **Check for next phase:**
342
+ 4. **Sync roadmap.** If `docs/roadmap.md` exists, call `manage_roadmap` with action `sync` and `apply: true`. This reflects the just-completed phase in the roadmap (e.g., updating the feature from `planned` to `in-progress`). If `manage_roadmap` is unavailable, fall back to direct file manipulation using `syncRoadmap()` from core. Skip silently if no roadmap exists. Do not use `force_sync: true` — the human-always-wins rule applies.
343
+
344
+ 5. **Check for next phase:**
332
345
  - If more phases remain: "Phase {N} complete. Next: Phase {N+1}: {name} (complexity: {level}). Continue? (yes / stop)"
333
346
  - **yes** — Increment `currentPhase`, reset `retryBudget`, transition to ASSESS.
334
347
  - **stop** — Save state and exit.
@@ -372,16 +385,20 @@ INIT → ASSESS → PLAN → APPROVE_PLAN → EXECUTE → VERIFY → REVIEW →
372
385
  - [skill:harness-autopilot] [outcome:observation] {any notable patterns from the run}
373
386
  ```
374
387
 
375
- 5. **Clean up state:** Set `currentState: "DONE"` in `{sessionDir}/autopilot-state.json`. Do not delete the file it serves as a record.
388
+ 5. **Update roadmap to done.** If `docs/roadmap.md` exists and the current spec maps to a roadmap feature, call `manage_roadmap` with action `update` to set the feature status to `done`. Derive the feature name from the spec title (H1 heading) or the session's `handoff.json` `summary` field. If `manage_roadmap` is unavailable, fall back to direct file manipulation using `updateFeature()` from core. Skip silently if no roadmap exists or if the feature is not found. Do not use `force_sync: true`.
389
+
390
+ 6. **Clean up state:** Set `currentState: "DONE"` in `{sessionDir}/autopilot-state.json`. Do not delete the file — it serves as a record.
376
391
 
377
392
  ## Harness Integration
378
393
 
379
394
  - **`harness validate`** — Run during INIT to verify project health. Included in every execution task via harness-execution delegation.
395
+ - **`gather_context`** — Used in INIT phase to load learnings, state, handoff, and validation in a single call instead of reading files individually.
380
396
  - **`harness check-deps`** — Delegated to harness-execution (included in task steps).
381
397
  - **State file** — `.harness/sessions/<slug>/autopilot-state.json` tracks the orchestration state machine. `.harness/sessions/<slug>/state.json` tracks task-level execution state (managed by harness-execution). The slug is derived from the spec path during INIT.
382
398
  - **Handoff** — `.harness/sessions/<slug>/handoff.json` is written by each delegated skill and read by the next. Autopilot writes a final handoff on DONE.
383
399
  - **Learnings** — `.harness/learnings.md` (global) is appended by both delegated skills and autopilot itself.
384
400
  - **Roadmap context** — During INIT, reads `docs/roadmap.md` (if present) for project-level priorities, blockers, and milestone status. Provides broader context for phase execution decisions.
401
+ - **Roadmap sync** — During PHASE_COMPLETE, calls `manage_roadmap` with `sync` and `apply: true` to reflect phase progress. During DONE, calls `manage_roadmap` with `update` to set feature status to `done`. Both skip silently when no roadmap exists. Neither uses `force_sync: true`.
385
402
 
386
403
  ## Success Criteria
387
404
 
@@ -45,8 +45,35 @@ If you find yourself writing production code, tests, or scaffolding before the h
45
45
  path: "<project-root>",
46
46
  type: "question",
47
47
  question: {
48
- text: "For auth, should we use:",
49
- options: ["A) existing JWT middleware", "B) OAuth2 via provider X", "C) external service"]
48
+ text: "For auth, which approach should we use?",
49
+ options: [
50
+ {
51
+ label: "A) Existing JWT middleware",
52
+ pros: ["Already in codebase", "Team has experience"],
53
+ cons: ["No refresh token support", "Session-only"],
54
+ risk: "low",
55
+ effort: "low"
56
+ },
57
+ {
58
+ label: "B) OAuth2 via provider X",
59
+ pros: ["Industry standard", "Refresh tokens built-in"],
60
+ cons: ["New dependency", "Learning curve"],
61
+ risk: "medium",
62
+ effort: "medium"
63
+ },
64
+ {
65
+ label: "C) External auth service",
66
+ pros: ["Zero maintenance", "Enterprise features included"],
67
+ cons: ["Vendor lock-in", "Monthly cost", "Latency"],
68
+ risk: "medium",
69
+ effort: "low"
70
+ }
71
+ ],
72
+ recommendation: {
73
+ optionIndex: 0,
74
+ reason: "Sufficient for current requirements. OAuth2 adds complexity we don't need yet.",
75
+ confidence: "high"
76
+ }
50
77
  }
51
78
  })
52
79
  ```
@@ -120,14 +147,24 @@ These keywords flow into the `handoff.json` `contextKeywords` field when the spe
120
147
  type: "confirmation",
121
148
  confirmation: {
122
149
  text: "Approve spec at <file-path>?",
123
- context: "<one-paragraph summary of the design>"
150
+ context: "<one-paragraph summary of the design>",
151
+ impact: "Spec approval unlocks implementation planning. No code changes yet.",
152
+ risk: "low"
124
153
  }
125
154
  })
126
155
  ```
127
156
 
128
157
  The human must explicitly approve before this skill is complete.
129
158
 
130
- 6. **Write handoff and suggest transition.** After the human approves the spec:
159
+ 6. **Add feature to roadmap.** If `docs/roadmap.md` exists:
160
+ - Derive the feature name from the spec title (the H1 heading of the proposal).
161
+ - Call `manage_roadmap` with action `add`, `status: "planned"`, `milestone: "Current Work"`, and the spec path. Include a one-line summary from the spec overview.
162
+ - If the feature already exists in the roadmap (duplicate name), skip silently — the feature was likely added manually or by a prior brainstorming session.
163
+ - Log: `"Added '<feature-name>' to roadmap as planned"` (informational, not a prompt).
164
+ - If `manage_roadmap` is unavailable, fall back to direct file manipulation using `addFeature()` from core.
165
+ - If no roadmap exists, skip this step silently.
166
+
167
+ 7. **Write handoff and suggest transition.** After the human approves the spec:
131
168
 
132
169
  Write `.harness/handoff.json`:
133
170
 
@@ -153,7 +190,19 @@ These keywords flow into the `handoff.json` `contextKeywords` field when the spe
153
190
  "reason": "Spec approved and written to docs/",
154
191
  "artifacts": ["<spec file path>"],
155
192
  "requiresConfirmation": true,
156
- "summary": "<Spec title> -- <key design choices>. <N> success criteria, <N> implementation phases."
193
+ "summary": "<Spec title> -- <key design choices>. <N> success criteria, <N> implementation phases.",
194
+ "qualityGate": {
195
+ "checks": [
196
+ {
197
+ "name": "spec-written",
198
+ "passed": true,
199
+ "detail": "Written to docs/changes/<feature>/proposal.md"
200
+ },
201
+ { "name": "harness-validate", "passed": true },
202
+ { "name": "human-approved", "passed": true }
203
+ ],
204
+ "allPassed": true
205
+ }
157
206
  }
158
207
  }
159
208
  ```
@@ -216,6 +265,7 @@ Converge on a recommendation that addresses all concerns before presenting the d
216
265
  - **`harness check-docs`** — Run to verify the spec does not conflict with existing documentation.
217
266
  - **Spec location** — Specs go to `docs/changes/<feature>/proposal.md`. Follow existing naming patterns.
218
267
  - **Handoff to harness-planning** — Once the spec is approved, invoke harness-planning to create the implementation plan from the spec.
268
+ - **Roadmap sync** — After spec approval, call `manage_roadmap` with action `add` to register the new feature as `planned` in `docs/roadmap.md`. Skip silently if no roadmap exists. Duplicates are silently ignored.
219
269
  - **`emit_interaction`** -- Call at the end of Phase 4 to suggest transitioning to harness-planning. Uses confirmed transition (waits for user approval).
220
270
 
221
271
  #### Requirement Phrasing
@@ -122,12 +122,15 @@ Run mechanical checks to establish an exclusion boundary. Any issue caught mecha
122
122
 
123
123
  **Checks:**
124
124
 
125
- 1. **Harness validation:**
126
- ```bash
127
- harness validate
128
- harness check-deps
129
- harness check-docs
125
+ 1. **Harness validation:** Use `assess_project` to run all harness health checks in parallel:
126
+ ```json
127
+ assess_project({
128
+ path: "<project-root>",
129
+ checks: ["validate", "deps", "docs"],
130
+ mode: "detailed"
131
+ })
130
132
  ```
133
+ This runs `harness validate`, `harness check-deps`, and `harness check-docs` in parallel and returns a unified report. Any check failure is reported in the `checks` array with `passed: false`.
131
134
  2. **Security scan:** Run `run_security_scan` MCP tool on changed files. Record findings with rule ID, file, line, and remediation.
132
135
  3. **Type checking:** Run the project's type checker (e.g., `tsc --noEmit`). Record any type errors.
133
136
  4. **Linting:** Run the project's linter (e.g., `eslint`). Record any lint violations.
@@ -202,13 +205,21 @@ Gather context in this order until the ratio is met:
202
205
 
203
206
  #### Graph-Enhanced Context (when available)
204
207
 
205
- When a knowledge graph exists at `.harness/graph/`, use graph queries for faster, more accurate context:
208
+ When a knowledge graph exists at `.harness/graph/`, use `gather_context` for efficient context assembly:
206
209
 
207
- - `query_graph` — traverse dependency chain from changed files to find all imports and transitive dependencies
208
- - `get_impact` — find all affected tests, docs, and downstream code
209
- - `find_context_for` — assemble review context within token budget, ranked by relevance
210
+ ```json
211
+ gather_context({
212
+ path: "<project-root>",
213
+ intent: "Code review of <change description>",
214
+ skill: "harness-code-review",
215
+ tokenBudget: 8000,
216
+ include: ["graph", "learnings", "validation"]
217
+ })
218
+ ```
219
+
220
+ This replaces manual `query_graph` + `get_impact` + `find_context_for` calls with a single composite call that assembles review context in parallel, ranked by relevance. Falls back gracefully when no graph is available (`meta.graphAvailable: false`).
210
221
 
211
- Graph queries replace manual grep/find commands and discover transitive dependencies that file search misses. Fall back to file-based commands if no graph is available.
222
+ For domain-specific scoping (compliance, bug detection, security, architecture), supplement `gather_context` output with targeted `query_graph` calls as needed.
212
223
 
213
224
  #### Context Assembly Commands
214
225
 
@@ -493,7 +504,9 @@ emit_interaction({
493
504
  type: "confirmation",
494
505
  confirmation: {
495
506
  text: "Review complete: <Assessment>. Accept review?",
496
- context: "<N critical, N important, N suggestion findings>"
507
+ context: "<N critical, N important, N suggestion findings>",
508
+ impact: "Accepting the review finalizes findings. If 'approve', ready for merge. If 'request-changes', fixes are needed.",
509
+ risk: "<low if approve, high if critical findings>"
497
510
  }
498
511
  })
499
512
  ```
@@ -528,7 +541,16 @@ Call `emit_interaction`:
528
541
  "reason": "Review approved with no blocking issues",
529
542
  "artifacts": ["<reviewed files>"],
530
543
  "requiresConfirmation": true,
531
- "summary": "Review approved. <N> suggestions noted. Ready to create PR or merge."
544
+ "summary": "Review approved. <N> suggestions noted. Ready to create PR or merge.",
545
+ "qualityGate": {
546
+ "checks": [
547
+ { "name": "mechanical-checks", "passed": true },
548
+ { "name": "no-critical-findings", "passed": true },
549
+ { "name": "no-important-findings", "passed": true },
550
+ { "name": "harness-validate", "passed": true }
551
+ ],
552
+ "allPassed": true
553
+ }
532
554
  }
533
555
  }
534
556
  ```
@@ -591,9 +613,8 @@ _This section is not part of the pipeline. It documents the process for respondi
591
613
 
592
614
  ## Harness Integration
593
615
 
594
- - **`harness validate`** — Run in Phase 2 (MECHANICAL). Must pass for the pipeline to continue to AI review.
595
- - **`harness check-deps`** — Run in Phase 2 (MECHANICAL). Failures are Critical issues that stop the pipeline.
596
- - **`harness check-docs`** — Run in Phase 2 (MECHANICAL). Documentation drift findings are recorded for the exclusion set.
616
+ - **`assess_project`** — Used in Phase 2 (MECHANICAL) to run `validate`, `deps`, and `docs` checks in parallel. Must pass for the pipeline to continue to AI review. Failures are Critical issues that stop the pipeline.
617
+ - **`gather_context`** — Used in Phase 3 (CONTEXT) for efficient parallel context assembly. Replaces separate graph query calls.
597
618
  - **`harness cleanup`** — Optional check during Phase 2 for entropy accumulation in changed files.
598
619
  - **Graph queries** — Used in Phase 3 (CONTEXT) for dependency-scoped context and in Phase 5 (VALIDATE) for reachability verification. Graceful fallback when no graph exists.
599
620
  - **`emit_interaction`** -- Call after review approval to suggest transitioning to merge/PR creation. Only emitted on APPROVE assessment. Uses confirmed transition (waits for user approval).
@@ -205,7 +205,7 @@ After removing the `legacy-auth` module:
205
205
  - **`harness cleanup --type dead-code --json`** -- Dead code detection input
206
206
  - **`harness check-deps --json`** -- Architecture violation detection input
207
207
  - **`harness skill run harness-hotspot-detector`** -- Hotspot context for safety classification
208
- - **`apply_fixes` MCP tool** -- Applies safe fixes via the MCP server
208
+ - **`detect_entropy` MCP tool with `autoFix: true`** -- Detects entropy and applies safe fixes via the MCP server
209
209
  - **`harness validate`** -- Final validation after all fixes
210
210
  - **`harness check-deps`** -- Final architecture check after all fixes
211
211
 
@@ -26,20 +26,27 @@ Deviating from the plan mid-execution introduces untested assumptions, breaks ta
26
26
 
27
27
  1. **Load the plan.** Read the plan document from `docs/plans/`. Identify the total task count and any checkpoints.
28
28
 
29
- 2. **Load state.** Read `.harness/state.json` to determine current position. If the file does not exist, this is a fresh start — position is Task 1.
29
+ 2. **Gather context in one call.** Use the `gather_context` MCP tool to load all working context at once:
30
30
 
31
- 3. **Load learnings.** Read `.harness/learnings.md` for context from previous sessions. These are hard-won insights — do not ignore them.
31
+ ```json
32
+ gather_context({
33
+ path: "<project-root>",
34
+ intent: "Execute plan tasks starting from current position",
35
+ skill: "harness-execution",
36
+ include: ["state", "learnings", "handoff", "validation"]
37
+ })
38
+ ```
32
39
 
33
- 4. **Load failures.** Read `.harness/failures.md` for known dead ends. If any entries match approaches in the current plan, surface warnings before proceeding.
40
+ This returns `state` (current position — if null, this is a fresh start at Task 1), `learnings` (hard-won insights from previous sessions — do not ignore them), `handoff` (structured context from the previous skill), and `validation` (current project health). If any constituent fails, its field is null and the error is reported in `meta.errors`.
34
41
 
35
- 5. **Load handoff.** Read `.harness/handoff.json` if it exists. Contains structured context from the previous skill (e.g., harness-planning passing context to harness-execution). Use this to prime session state.
42
+ 3. **Check for known dead ends.** Review `learnings` entries tagged `[outcome:failure]`. If any match approaches in the current plan, surface warnings before proceeding.
36
43
 
37
- 6. **Verify prerequisites.** For the current task:
44
+ 4. **Verify prerequisites.** For the current task:
38
45
  - Are dependency tasks marked complete in state?
39
46
  - Do the files referenced in the task exist as expected?
40
47
  - Does the test suite pass? Run `harness validate` to confirm a clean baseline.
41
48
 
42
- 7. **If prerequisites fail,** do not proceed. Report what is missing and which task is blocked.
49
+ 5. **If prerequisites fail,** do not proceed. Report what is missing and which task is blocked.
43
50
 
44
51
  ### Graph-Enhanced Context (when available)
45
52
 
@@ -75,7 +82,17 @@ For each task, starting from the current position:
75
82
 
76
83
  4. **Commit atomically.** Each task produces exactly one commit. Use the commit message specified in the plan. If no message is specified, write a descriptive message in the project's convention.
77
84
 
78
- 5. **Run mechanical gate.** After each task commit, run the full gate check: test suite, linter, type checker, build, and `harness validate`. This is binary pass/fail.
85
+ 5. **Run mechanical gate.** After each task commit, run the full gate check. Use `assess_project` to run harness checks (including lint) in parallel, then run the test suite:
86
+
87
+ ```json
88
+ assess_project({
89
+ path: "<project-root>",
90
+ checks: ["validate", "deps", "lint"],
91
+ mode: "summary"
92
+ })
93
+ ```
94
+
95
+ Then run the project's test suite (`npx turbo run test` or equivalent). This is binary pass/fail.
79
96
  - **All pass →** proceed to the next task.
80
97
  - **Any fail →** retry with error context (max 2 attempts).
81
98
  - **Still failing after retries →** record the failure in `.harness/failures.md`, escalate, and stop.
@@ -109,7 +126,9 @@ Plans contain three types of checkpoints. Each requires pausing execution.
109
126
  type: "confirmation",
110
127
  confirmation: {
111
128
  text: "Task N complete. Output: <summary>. Continue to Task N+1?",
112
- context: "<test output or file diff summary>"
129
+ context: "<test output or file diff summary>",
130
+ impact: "Continuing proceeds to the next task. Declining pauses execution for review.",
131
+ risk: "low"
113
132
  }
114
133
  })
115
134
  ```
@@ -125,7 +144,27 @@ Plans contain three types of checkpoints. Each requires pausing execution.
125
144
  type: "question",
126
145
  question: {
127
146
  text: "Task N requires a decision: <description>",
128
- options: ["<option A>", "<option B>"]
147
+ options: [
148
+ {
149
+ label: "<option A>",
150
+ pros: ["<pro 1>", "<pro 2>"],
151
+ cons: ["<con 1>"],
152
+ risk: "low",
153
+ effort: "low"
154
+ },
155
+ {
156
+ label: "<option B>",
157
+ pros: ["<pro 1>"],
158
+ cons: ["<con 1>", "<con 2>"],
159
+ risk: "medium",
160
+ effort: "medium"
161
+ }
162
+ ],
163
+ recommendation: {
164
+ optionIndex: 0,
165
+ reason: "<why this option is recommended>",
166
+ confidence: "medium"
167
+ }
129
168
  }
130
169
  })
131
170
  ```
@@ -162,7 +201,15 @@ emit_interaction({
162
201
  completedPhase: "execution",
163
202
  suggestedNext: "verification",
164
203
  reason: "All plan tasks executed and verified",
165
- artifacts: ["<list of created/modified files>"]
204
+ artifacts: ["<list of created/modified files>"],
205
+ qualityGate: {
206
+ checks: [
207
+ { name: "all-tasks-complete", passed: true, detail: "<N>/<N> tasks" },
208
+ { name: "harness-validate", passed: true },
209
+ { name: "tests-pass", passed: true }
210
+ ],
211
+ allPassed: true
212
+ }
166
213
  }
167
214
  })
168
215
  ```
@@ -219,7 +266,7 @@ Skipping this step means subsequent graph queries (impact analysis, dependency h
219
266
  }
220
267
  ```
221
268
 
222
- 5. **Sync roadmap (if present).** If `docs/roadmap.md` exists, trigger a roadmap sync to update linked feature statuses based on the just-completed execution state. Use the `manage_roadmap` MCP tool with `sync` action if available, or invoke `/harness:roadmap --sync`. This keeps the roadmap current as plans are executed. If no roadmap exists, skip this step silently.
269
+ 5. **Sync roadmap (mandatory when present).** If `docs/roadmap.md` exists, call `manage_roadmap` with action `sync` and `apply: true` to update linked feature statuses from the just-completed execution state. Do not use `force_sync: true` the human-always-wins rule applies. If `manage_roadmap` is unavailable, fall back to direct file manipulation using `syncRoadmap()` from core. If no roadmap exists, skip silently.
223
270
 
224
271
  6. **Learnings are append-only.** Never edit or delete previous learnings. They are a chronological record.
225
272
 
@@ -236,7 +283,16 @@ Skipping this step means subsequent graph queries (impact analysis, dependency h
236
283
  "reason": "All tasks complete",
237
284
  "artifacts": ["<list of created/modified files>"],
238
285
  "requiresConfirmation": false,
239
- "summary": "Completed <N> tasks. <N> files created, <N> modified. All quick gates passed."
286
+ "summary": "Completed <N> tasks. <N> files created, <N> modified. All quick gates passed.",
287
+ "qualityGate": {
288
+ "checks": [
289
+ { "name": "all-tasks-complete", "passed": true, "detail": "<N>/<N> tasks" },
290
+ { "name": "harness-validate", "passed": true },
291
+ { "name": "tests-pass", "passed": true },
292
+ { "name": "no-blockers", "passed": true }
293
+ ],
294
+ "allPassed": true
295
+ }
240
296
  }
241
297
  }
242
298
  ```
@@ -265,12 +321,13 @@ These are non-negotiable. When any condition is met, stop immediately.
265
321
  ## Harness Integration
266
322
 
267
323
  - **`harness validate`** — Run after every task completion. Mandatory. No task is complete without a passing validation.
324
+ - **`gather_context`** — Used in PREPARE phase to load state, learnings, handoff, and validation in a single call instead of 4+ separate reads.
268
325
  - **`harness check-deps`** — Run when tasks add new imports or modules. Catches boundary violations early.
269
326
  - **`harness state show`** — View current execution position and progress.
270
327
  - **`harness state learn "<message>"`** — Append a learning from the command line.
271
328
  - **`.harness/state.json`** — Read at session start to resume position. Updated after every task.
272
329
  - **`.harness/learnings.md`** — Append-only knowledge capture. Read at session start for prior context.
273
- - **Roadmap sync** — After completing plan execution, sync roadmap status via `manage_roadmap sync` if `docs/roadmap.md` exists. Keeps roadmap current with execution progress.
330
+ - **Roadmap sync** — After completing plan execution, call `manage_roadmap` with action `sync` and `apply: true` to update roadmap status. Mandatory when `docs/roadmap.md` exists. Do not use `force_sync: true`. Falls back to `syncRoadmap()` from core if MCP tool is unavailable.
274
331
  - **`emit_interaction`** -- Call at plan completion to auto-transition to harness-verification. Uses auto-transition (proceeds immediately without user confirmation).
275
332
 
276
333
  ## Success Criteria
@@ -46,7 +46,34 @@ Work backward from the goal. Do not start with "what should we build?" Start wit
46
46
  type: "question",
47
47
  question: {
48
48
  text: "The spec mentions X but does not define behavior for Y. Should we:",
49
- options: ["A) Include Y in this plan", "B) Defer Y to a follow-up plan", "C) Update the spec first"]
49
+ options: [
50
+ {
51
+ label: "A) Include Y in this plan",
52
+ pros: ["Complete feature in one pass", "No follow-up coordination needed"],
53
+ cons: ["Increases plan scope and time", "May delay delivery"],
54
+ risk: "medium",
55
+ effort: "high"
56
+ },
57
+ {
58
+ label: "B) Defer Y to a follow-up plan",
59
+ pros: ["Keeps current plan focused", "Ship sooner"],
60
+ cons: ["Y remains unhandled", "May need rework when Y is added"],
61
+ risk: "low",
62
+ effort: "low"
63
+ },
64
+ {
65
+ label: "C) Update the spec first",
66
+ pros: ["Design is complete before planning", "No surprises during execution"],
67
+ cons: ["Blocks planning until spec is updated", "Extra round-trip"],
68
+ risk: "low",
69
+ effort: "medium"
70
+ }
71
+ ],
72
+ recommendation: {
73
+ optionIndex: 1,
74
+ reason: "Keeping the current plan focused reduces risk. Y can be addressed in a dedicated follow-up.",
75
+ confidence: "medium"
76
+ }
50
77
  }
51
78
  })
52
79
  ```
@@ -174,7 +201,9 @@ When presenting the task breakdown, use progress markers:
174
201
  type: "confirmation",
175
202
  confirmation: {
176
203
  text: "Approve plan at <plan-file-path>?",
177
- context: "<task count> tasks, <estimated time> minutes. <one-sentence summary>"
204
+ context: "<task count> tasks, <estimated time> minutes. <one-sentence summary>",
205
+ impact: "Approving unlocks task-by-task execution. Plan defines exact file paths, code, and commands.",
206
+ risk: "low"
178
207
  }
179
208
  })
180
209
  ```
@@ -192,7 +221,16 @@ When presenting the task breakdown, use progress markers:
192
221
  "reason": "Plan approved with all tasks defined",
193
222
  "artifacts": ["<plan file path>"],
194
223
  "requiresConfirmation": true,
195
- "summary": "<Plan title> -- <N> tasks, <N> checkpoints. Estimated <time>."
224
+ "summary": "<Plan title> -- <N> tasks, <N> checkpoints. Estimated <time>.",
225
+ "qualityGate": {
226
+ "checks": [
227
+ { "name": "plan-written", "passed": true, "detail": "Written to docs/plans/" },
228
+ { "name": "harness-validate", "passed": true },
229
+ { "name": "observable-truths-traced", "passed": true },
230
+ { "name": "human-approved", "passed": true }
231
+ ],
232
+ "allPassed": true
233
+ }
196
234
  }
197
235
  }
198
236
  ```
@@ -43,6 +43,20 @@ pnpm typecheck 2>&1 || npx tsc --noEmit 2>&1 || make typecheck 2>&1
43
43
  pnpm test 2>&1 || npm test 2>&1 || make test 2>&1
44
44
  ```
45
45
 
46
+ #### 2b. Harness Health Check
47
+
48
+ If the project uses harness, run `assess_project` for harness-specific validation:
49
+
50
+ ```json
51
+ assess_project({
52
+ path: "<project-root>",
53
+ checks: ["validate", "deps"],
54
+ mode: "summary"
55
+ })
56
+ ```
57
+
58
+ If `healthy: false`, include harness check failures in the mechanical check report. This replaces manually running `harness validate` and `harness check-deps` as separate commands.
59
+
46
60
  #### 3. Gate Decision
47
61
 
48
62
  - **Any check fails:** STOP. Report the failure. Do not proceed to AI review. The author must fix mechanical issues first.
@@ -133,12 +147,21 @@ If no source files are staged, skip the security scan.
133
147
 
134
148
  Perform a focused, lightweight review of staged changes. This is NOT a full code review — it catches obvious issues only.
135
149
 
136
- #### 1. Get the Staged Diff
150
+ #### 1. Quick Review via review_changes
137
151
 
138
- ```bash
139
- git diff --cached
152
+ Use the `review_changes` MCP tool with `depth: 'quick'` for fast pre-commit analysis:
153
+
154
+ ```json
155
+ review_changes({
156
+ path: "<project-root>",
157
+ diff: "<output of git diff --cached>",
158
+ depth: "quick",
159
+ mode: "summary"
160
+ })
140
161
  ```
141
162
 
163
+ This runs forbidden pattern checks and size analysis. For the semantic review items below, supplement with manual diff reading.
164
+
142
165
  #### 2. Quick Review Checklist
143
166
 
144
167
  Review the staged diff for these high-signal issues only:
@@ -219,6 +242,8 @@ fi
219
242
  - Follows Principle 7 (Deterministic-vs-LLM Split) — mechanical checks first, AI review second
220
243
  - Reads `.harness/review-learnings.md` for calibration (if present)
221
244
  - Complements harness-code-review (full review) — use pre-commit for quick checks, code-review for thorough analysis
245
+ - **`assess_project`** — Used in Phase 1 for harness-specific health checks (validate + deps) in a single call.
246
+ - **`review_changes`** — Used in Phase 4 with `depth: 'quick'` for fast pre-commit diff analysis.
222
247
 
223
248
  ## Success Criteria
224
249
 
@@ -111,7 +111,19 @@ Run every check below. Record each as **pass**, **warn**, or **fail**:
111
111
  | `test` script exists in root `package.json` | fail |
112
112
  | `lint` script exists in root `package.json` | fail |
113
113
  | `typecheck` or `tsc` script exists in root `package.json` | fail |
114
- | `harness validate` passes (project-level health check) | fail |
114
+ | `assess_project` passes (harness health + lint gate) | fail |
115
+
116
+ For the `assess_project` check, run it with all harness-specific checks including lint:
117
+
118
+ ```json
119
+ assess_project({
120
+ path: "<project-root>",
121
+ checks: ["validate", "deps", "docs", "lint"],
122
+ mode: "detailed"
123
+ })
124
+ ```
125
+
126
+ If `healthy: false`, each failing check in the `checks` array maps to a separate finding with its `topIssue`. This replaces running `harness validate`, `harness check-deps`, and lint as separate commands.
115
127
 
116
128
  ##### i18n Coverage (conditional)
117
129
 
@@ -507,7 +519,7 @@ This framing is informational — it does not block anything. It gives the team
507
519
 
508
520
  ## Harness Integration
509
521
 
510
- - **`harness validate`** — Run after auto-fixes to verify project health. Also included in AUDIT phase as a meta-check (does the project pass its own validation?).
522
+ - **`assess_project`** — Used in AUDIT Phase 1 (CI/CD section) to run harness validation, dependency checks, doc coverage, and lint in a single parallel call. Also run after auto-fixes in Phase 3 to verify project health. Automatically inherits new checks added to `assess_project`.
511
523
  - **Sub-skill invocations** — Phase 2 dispatches `detect-doc-drift`, `cleanup-dead-code`, `enforce-architecture`, and `diagnostics` as parallel agents. Phase 3 delegates fixes to `align-documentation` and `cleanup-dead-code`.
512
524
  - **State file** — `.harness/release-readiness.json` enables session resumption and progress tracking. This file is read at the start of each invocation and written at the end.
513
525
  - **Report file** — `release-readiness-report.md` is written to the project root. It is a snapshot, not a tracked artifact — regenerate it on each run.
@@ -177,7 +177,9 @@ emit_interaction({
177
177
  type: "confirmation",
178
178
  confirmation: {
179
179
  text: "Verification report: <VERDICT>. Accept and proceed?",
180
- context: "<summary: N artifacts checked, N gaps found>"
180
+ context: "<summary: N artifacts checked, N gaps found>",
181
+ impact: "Accepting proceeds to code review. Declining requires gap resolution first.",
182
+ risk: "<low if PASS, high if gaps remain>"
181
183
  }
182
184
  })
183
185
  ```
@@ -212,7 +214,21 @@ Call `emit_interaction`:
212
214
  "reason": "Verification passed at all 3 levels",
213
215
  "artifacts": ["<verified file paths>"],
214
216
  "requiresConfirmation": false,
215
- "summary": "Verification passed: <N> artifacts checked. EXISTS, SUBSTANTIVE, WIRED all passed."
217
+ "summary": "Verification passed: <N> artifacts checked. EXISTS, SUBSTANTIVE, WIRED all passed.",
218
+ "qualityGate": {
219
+ "checks": [
220
+ { "name": "level1-exists", "passed": true, "detail": "<N> artifacts present" },
221
+ { "name": "level2-substantive", "passed": true, "detail": "No stubs or placeholders" },
222
+ {
223
+ "name": "level3-wired",
224
+ "passed": true,
225
+ "detail": "All artifacts imported, tested, integrated"
226
+ },
227
+ { "name": "anti-pattern-scan", "passed": true, "detail": "No matches" },
228
+ { "name": "harness-validate", "passed": true }
229
+ ],
230
+ "allPassed": true
231
+ }
216
232
  }
217
233
  }
218
234
  ```
@@ -74,9 +74,9 @@ For each item categorized as safe:
74
74
 
75
75
  **New fix types:**
76
76
 
77
- - **Dead exports (non-public):** Use `apply_fixes` with `fixTypes: ['dead-exports']`. The tool removes the `export` keyword. If the function/class has zero internal callers too, delete the entire declaration.
78
- - **Commented-out code:** Use `apply_fixes` with `fixTypes: ['commented-code']`. The tool deletes commented-out code blocks. This is cosmetic and only needs lint verification.
79
- - **Orphaned dependencies:** Use `apply_fixes` with `fixTypes: ['orphaned-deps']`. The tool removes the dep from package.json. **Must run `pnpm install && pnpm test` after** to verify nothing breaks.
77
+ - **Dead exports (non-public):** Use `detect_entropy` with `autoFix: true, fixTypes: ['dead-exports']`. The tool removes the `export` keyword. If the function/class has zero internal callers too, delete the entire declaration.
78
+ - **Commented-out code:** Use `detect_entropy` with `autoFix: true, fixTypes: ['commented-code']`. The tool deletes commented-out code blocks. This is cosmetic and only needs lint verification.
79
+ - **Orphaned dependencies:** Use `detect_entropy` with `autoFix: true, fixTypes: ['orphaned-deps']`. The tool removes the dep from package.json. **Must run `pnpm install && pnpm test` after** to verify nothing breaks.
80
80
 
81
81
  ### Phase 3.5: Convergence Loop (Standalone)
82
82