@harness-engineering/cli 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/dist/bin/harness.js +1 -1
  2. package/dist/{chunk-IXT3KLVN.js → chunk-APYEWOCR.js} +355 -19
  3. package/dist/index.js +1 -1
  4. package/package.json +6 -4
  5. package/dist/agents/commands/claude-code/harness/add-component.md +0 -34
  6. package/dist/agents/commands/claude-code/harness/align-documentation.md +0 -33
  7. package/dist/agents/commands/claude-code/harness/architecture-advisor.md +0 -41
  8. package/dist/agents/commands/claude-code/harness/brainstorming.md +0 -42
  9. package/dist/agents/commands/claude-code/harness/check-mechanical-constraints.md +0 -32
  10. package/dist/agents/commands/claude-code/harness/cleanup-dead-code.md +0 -33
  11. package/dist/agents/commands/claude-code/harness/code-review.md +0 -33
  12. package/dist/agents/commands/claude-code/harness/debugging.md +0 -43
  13. package/dist/agents/commands/claude-code/harness/detect-doc-drift.md +0 -32
  14. package/dist/agents/commands/claude-code/harness/diagnostics.md +0 -43
  15. package/dist/agents/commands/claude-code/harness/enforce-architecture.md +0 -32
  16. package/dist/agents/commands/claude-code/harness/execution.md +0 -43
  17. package/dist/agents/commands/claude-code/harness/git-workflow.md +0 -32
  18. package/dist/agents/commands/claude-code/harness/initialize-project.md +0 -33
  19. package/dist/agents/commands/claude-code/harness/onboarding.md +0 -32
  20. package/dist/agents/commands/claude-code/harness/parallel-agents.md +0 -35
  21. package/dist/agents/commands/claude-code/harness/planning.md +0 -41
  22. package/dist/agents/commands/claude-code/harness/pre-commit-review.md +0 -38
  23. package/dist/agents/commands/claude-code/harness/refactoring.md +0 -35
  24. package/dist/agents/commands/claude-code/harness/skill-authoring.md +0 -35
  25. package/dist/agents/commands/claude-code/harness/state-management.md +0 -35
  26. package/dist/agents/commands/claude-code/harness/tdd.md +0 -42
  27. package/dist/agents/commands/claude-code/harness/validate-context-engineering.md +0 -32
  28. package/dist/agents/commands/claude-code/harness/verification.md +0 -38
  29. package/dist/agents/commands/gemini-cli/harness/add-component.toml +0 -240
  30. package/dist/agents/commands/gemini-cli/harness/align-documentation.toml +0 -238
  31. package/dist/agents/commands/gemini-cli/harness/architecture-advisor.toml +0 -469
  32. package/dist/agents/commands/gemini-cli/harness/brainstorming.toml +0 -326
  33. package/dist/agents/commands/gemini-cli/harness/check-mechanical-constraints.toml +0 -249
  34. package/dist/agents/commands/gemini-cli/harness/cleanup-dead-code.toml +0 -258
  35. package/dist/agents/commands/gemini-cli/harness/code-review.toml +0 -461
  36. package/dist/agents/commands/gemini-cli/harness/debugging.toml +0 -436
  37. package/dist/agents/commands/gemini-cli/harness/detect-doc-drift.toml +0 -215
  38. package/dist/agents/commands/gemini-cli/harness/diagnostics.toml +0 -401
  39. package/dist/agents/commands/gemini-cli/harness/enforce-architecture.toml +0 -222
  40. package/dist/agents/commands/gemini-cli/harness/execution.toml +0 -381
  41. package/dist/agents/commands/gemini-cli/harness/git-workflow.toml +0 -325
  42. package/dist/agents/commands/gemini-cli/harness/initialize-project.toml +0 -257
  43. package/dist/agents/commands/gemini-cli/harness/onboarding.toml +0 -316
  44. package/dist/agents/commands/gemini-cli/harness/parallel-agents.toml +0 -221
  45. package/dist/agents/commands/gemini-cli/harness/planning.toml +0 -405
  46. package/dist/agents/commands/gemini-cli/harness/pre-commit-review.toml +0 -294
  47. package/dist/agents/commands/gemini-cli/harness/refactoring.toml +0 -209
  48. package/dist/agents/commands/gemini-cli/harness/skill-authoring.toml +0 -350
  49. package/dist/agents/commands/gemini-cli/harness/state-management.toml +0 -354
  50. package/dist/agents/commands/gemini-cli/harness/tdd.toml +0 -247
  51. package/dist/agents/commands/gemini-cli/harness/validate-context-engineering.toml +0 -186
  52. package/dist/agents/commands/gemini-cli/harness/verification.toml +0 -334
@@ -1,381 +0,0 @@
1
- # Generated by harness generate-slash-commands. Do not edit.
2
- description = "Execute a planned set of tasks with harness validation and state tracking"
3
- prompt = """
4
- <context>
5
- Cognitive mode: meticulous-implementer
6
- Type: rigid
7
- State: persistent (files: .harness/state.json, .harness/learnings.md)
8
- </context>
9
-
10
- <objective>
11
- Execute a planned set of tasks with harness validation and state tracking
12
-
13
- Phases:
14
- - prepare: Load state and verify prerequisites
15
- - execute: Implement planned tasks
16
- - verify: Validate each completed task
17
- - persist: Save progress to state files
18
- </objective>
19
-
20
- <execution_context>
21
- --- SKILL.md (agents/skills/claude-code/harness-execution/SKILL.md) ---
22
- # Harness Execution
23
-
24
- > Execute a plan task by task with atomic commits, checkpoint protocol, and persistent knowledge capture. Stop on blockers. Do not guess.
25
-
26
- ## When to Use
27
-
28
- - When an approved plan exists (output of harness-planning) and implementation should begin
29
- - When resuming execution of a previously started plan after a context reset
30
- - When `on_new_feature` or `on_bug_fix` triggers fire and a plan is already in place
31
- - NOT when no plan exists (use harness-planning first)
32
- - NOT when the plan needs revision (update the plan first, then resume execution)
33
- - NOT when exploring or brainstorming (use harness-brainstorming)
34
- - NOT for ad-hoc single-task work that does not follow a plan
35
-
36
- ## Process
37
-
38
- ### Iron Law
39
-
40
- **Execute the plan as written. If the plan is wrong, stop and fix the plan — do not improvise.**
41
-
42
- Deviating from the plan mid-execution introduces untested assumptions, breaks task atomicity, and makes progress untraceable. If a task cannot be completed as written, that is a blocker. Record it and stop.
43
-
44
- ---
45
-
46
- ### Phase 1: PREPARE — Load State and Verify Prerequisites
47
-
48
- 1. **Load the plan.** Read the plan document from `docs/plans/`. Identify the total task count and any checkpoints.
49
-
50
- 2. **Load state.** Read `.harness/state.json` to determine current position. If the file does not exist, this is a fresh start — position is Task 1.
51
-
52
- 3. **Load learnings.** Read `.harness/learnings.md` for context from previous sessions. These are hard-won insights — do not ignore them.
53
-
54
- 4. **Load failures.** Read `.harness/failures.md` for known dead ends. If any entries match approaches in the current plan, surface warnings before proceeding.
55
-
56
- 5. **Load handoff.** Read `.harness/handoff.json` if it exists. Contains structured context from the previous skill (e.g., harness-planning passing context to harness-execution). Use this to prime session state.
57
-
58
- 6. **Verify prerequisites.** For the current task:
59
- - Are dependency tasks marked complete in state?
60
- - Do the files referenced in the task exist as expected?
61
- - Does the test suite pass? Run `harness validate` to confirm a clean baseline.
62
-
63
- 7. **If prerequisites fail,** do not proceed. Report what is missing and which task is blocked.
64
-
65
- ---
66
-
67
- ### Phase 2: EXECUTE — Implement Tasks Atomically
68
-
69
- For each task, starting from the current position:
70
-
71
- 1. **Read the task instructions completely** before writing any code. Understand what files to touch, what tests to write, what the expected outcome is.
72
-
73
- 2. **Follow the task instructions exactly.** The plan contains exact file paths, exact code, and exact commands. Execute them as written.
74
-
75
- 3. **TDD rhythm within each task:**
76
- - Write the test as specified in the task
77
- - Run the test — observe it fail (for the right reason)
78
- - Write the implementation as specified in the task
79
- - Run the test — observe it pass
80
- - Run `harness validate`
81
-
82
- 4. **Commit atomically.** Each task produces exactly one commit. Use the commit message specified in the plan. If no message is specified, write a descriptive message in the project's convention.
83
-
84
- 5. **Run mechanical gate.** After each task commit, run the full gate check: test suite, linter, type checker, build, and `harness validate`. This is binary pass/fail.
85
- - **All pass →** proceed to the next task.
86
- - **Any fail →** retry with error context (max 2 attempts).
87
- - **Still failing after retries →** record the failure in `.harness/failures.md`, escalate, and stop.
88
-
89
- 6. **Update state after each task.** Write to `.harness/state.json`:
90
-
91
- ```json
92
- {
93
- "schemaVersion": 1,
94
- "position": { "phase": "execute", "task": "Task N" },
95
- "progress": { "Task 1": "complete", "Task 2": "complete", "Task 3": "in_progress" },
96
- "lastSession": { "date": "YYYY-MM-DD", "summary": "Completed Tasks 1-2, starting Task 3" }
97
- }
98
- ```
99
-
100
- 7. **Handle checkpoints** according to the checkpoint protocol (see below).
101
-
102
- ---
103
-
104
- ### Checkpoint Protocol
105
-
106
- Plans contain three types of checkpoints. Each requires pausing execution.
107
-
108
- **`[checkpoint:human-verify]` — Show and Confirm**
109
-
110
- 1. Stop execution.
111
- 2. Show the human what was just completed (test output, file diff, running application).
112
- 3. State: "Task N complete. Output: [summary]. Continue to Task N+1?"
113
- 4. Wait for the human to confirm before proceeding.
114
-
115
- **`[checkpoint:decision]` — Present Options and Wait**
116
-
117
- 1. Stop execution.
118
- 2. Present the decision with options exactly as described in the plan.
119
- 3. State: "Task N requires a decision: [options]. Which do you prefer?"
120
- 4. Wait for the human to choose.
121
- 5. Record the decision in `.harness/state.json` under `decisions`.
122
-
123
- **`[checkpoint:human-action]` — Instruct and Wait**
124
-
125
- 1. Stop execution.
126
- 2. Tell the human exactly what they need to do (e.g., "Create an API key at [URL] and paste it here").
127
- 3. State: "Task N requires your action: [instructions]. Let me know when done."
128
- 4. Wait for the human to complete the action and confirm.
129
-
130
- ---
131
-
132
- ### Phase 3: VERIFY — Two-Tier Validation
133
-
134
- **Quick gate (default):** The mechanical gate in Phase 2 Step 5 IS the standard verification. Every task commit must pass it before proceeding. No additional verification step is needed for normal execution.
135
-
136
- **Deep audit (on-demand):** When `--deep` is passed or at milestone boundaries (e.g., end of a phase, final task), invoke the full `harness-verification` skill for 3-level audit:
137
-
138
- 1. **EXISTS** — Do the artifacts the task claims to produce actually exist?
139
- 2. **SUBSTANTIVE** — Do those artifacts contain meaningful, correct content (not stubs or placeholders)?
140
- 3. **WIRED** — Are those artifacts integrated into the system (imported, routed, tested, reachable)?
141
-
142
- If the deep audit fails at any level, treat it as a blocker. Record it and stop.
143
-
144
- ---
145
-
146
- ### Phase 4: PERSIST — Save Progress and Learnings
147
-
148
- Between tasks (especially between sessions):
149
-
150
- 1. **Update `.harness/state.json`** with current position, progress, and `lastSession` context:
151
-
152
- ```json
153
- {
154
- "lastSession": {
155
- "lastSkill": "harness-execution",
156
- "pendingTasks": ["Task 4", "Task 5"]
157
- }
158
- }
159
- ```
160
-
161
- 2. **Append tagged learnings to `.harness/learnings.md`.** Tag every entry with skill and outcome:
162
-
163
- ```markdown
164
- ## YYYY-MM-DD — Task N: <task name>
165
-
166
- - [skill:harness-execution] [outcome:success] What was accomplished
167
- - [skill:harness-execution] [outcome:gotcha] What was surprising or non-obvious
168
- - [skill:harness-execution] [outcome:decision] What was decided and why
169
- ```
170
-
171
- 3. **Record failures in `.harness/failures.md`** if any task was escalated after retry exhaustion (from Phase 2 Step 5). Include the approach attempted and why it failed, so future sessions avoid the same dead end.
172
-
173
- 4. **Write `.harness/handoff.json`** with structured context for the next skill or session:
174
-
175
- ```json
176
- {
177
- "fromSkill": "harness-execution",
178
- "timestamp": "YYYY-MM-DDTHH:MM:SSZ",
179
- "summary": "Completed Tasks 1-3. Task 4 blocked on missing API endpoint.",
180
- "pendingTasks": ["Task 4", "Task 5"],
181
- "blockers": ["Task 4: /api/notifications endpoint not implemented"],
182
- "learnings": ["Date comparison needs UTC normalization"]
183
- }
184
- ```
185
-
186
- 5. **Learnings are append-only.** Never edit or delete previous learnings. They are a chronological record.
187
-
188
- ---
189
-
190
- ### Stopping Conditions
191
-
192
- These are non-negotiable. When any condition is met, stop immediately.
193
-
194
- - **Hit a blocker.** The task cannot be completed as written. Something is missing, broken, or wrong. Do not guess at a fix. Do not improvise. Record the blocker in state and report it: "Blocked on Task N: [specific issue]. The plan needs to be updated."
195
-
196
- - **Test failure after implementation.** The test was supposed to pass but does not. Do not retry blindly. Read the failure. Diagnose the root cause. If the fix is within the current task scope, fix it. If not, stop — the plan may be wrong.
197
-
198
- - **Unclear instruction.** The task says something ambiguous or contradictory. Do not interpret it. Ask: "Task N says [quote]. I interpret this as [interpretation]. Is that correct?"
199
-
200
- - **Harness validation failure.** `harness validate` fails after a task. Do not proceed. The task introduced an architectural violation or constraint breach. Fix it before moving on.
201
-
202
- - **Three consecutive failures on the same task.** After 3 attempts, the task design is likely wrong. Stop. Report: "Task N has failed 3 times. Root cause: [analysis]. The plan may need revision."
203
-
204
- ## Harness Integration
205
-
206
- - **`harness validate`** — Run after every task completion. Mandatory. No task is complete without a passing validation.
207
- - **`harness check-deps`** — Run when tasks add new imports or modules. Catches boundary violations early.
208
- - **`harness state show`** — View current execution position and progress.
209
- - **`harness state learn "<message>"`** — Append a learning from the command line.
210
- - **`.harness/state.json`** — Read at session start to resume position. Updated after every task.
211
- - **`.harness/learnings.md`** — Append-only knowledge capture. Read at session start for prior context.
212
-
213
- ## Success Criteria
214
-
215
- - Every task in the plan is executed in order, atomically, with one commit per task
216
- - `.harness/state.json` accurately reflects current position and progress
217
- - `.harness/learnings.md` contains entries for every session with non-trivial discoveries
218
- - `harness validate` passes after every task
219
- - Checkpoints were honored: execution paused at every `[checkpoint:*]` marker
220
- - No improvisation: tasks were executed as written, or execution was stopped and the blocker was reported
221
- - All stopping conditions were respected (no guessing past blockers, no blind retries)
222
-
223
- ## Examples
224
-
225
- ### Example: Executing a 5-Task Notification Plan
226
-
227
- **Session Start (fresh):**
228
-
229
- ```
230
- Read plan: docs/plans/2026-03-14-notifications-plan.md (5 tasks)
231
- Read state: .harness/state.json — file not found (fresh start, position: Task 1)
232
- Read learnings: .harness/learnings.md — file not found (no prior context)
233
- Run: harness validate — passes. Clean baseline confirmed.
234
- ```
235
-
236
- **Task 1: Define notification types**
237
-
238
- ```
239
- 1. Create src/types/notification.ts with Notification interface
240
- 2. Run: harness validate — passes
241
- 3. Commit: "feat(notifications): define Notification type"
242
- 4. Update state: { position: Task 2, progress: { "Task 1": "complete" } }
243
- ```
244
-
245
- **Task 2: Create notification service (TDD)**
246
-
247
- ```
248
- 1. Write test: src/services/notification-service.test.ts
249
- 2. Run test: FAIL — NotificationService is not defined (correct failure)
250
- 3. Implement: src/services/notification-service.ts
251
- 4. Run test: PASS
252
- 5. Run: harness validate — passes
253
- 6. Commit: "feat(notifications): add NotificationService.create"
254
- 7. Update state: { position: Task 3, progress: { "Task 1": "complete", "Task 2": "complete" } }
255
- ```
256
-
257
- **Task 3: Add list and expiry (TDD) — has checkpoint**
258
-
259
- ```
260
- [checkpoint:human-verify] — "Tasks 1-2 complete. NotificationService can create
261
- notifications. Tests pass. Continue to Task 3 (list and expiry methods)?"
262
- Human: "Continue."
263
-
264
- 1. Write tests: list by userId, filter expired
265
- 2. Run tests: FAIL (methods not implemented)
266
- 3. Implement list() and isExpired()
267
- 4. Run tests: PASS
268
- 5. Run: harness validate — passes
269
- 6. Commit: "feat(notifications): add list and expiry to NotificationService"
270
- 7. Update state, append learning:
271
- "## 2026-03-14 — Task 3: list and expiry
272
- - [gotcha]: Date comparison needed UTC normalization — used Date.now() not new Date()"
273
- ```
274
-
275
- **Context reset mid-plan (resume at Task 4):**
276
-
277
- ```
278
- Read plan: docs/plans/2026-03-14-notifications-plan.md
279
- Read state: .harness/state.json — position: Task 4, Tasks 1-3 complete
280
- Read learnings: .harness/learnings.md — "Date comparison needed UTC normalization"
281
- Run: harness validate — passes. Resume from Task 4.
282
- ```
283
-
284
- ## Gates
285
-
286
- These are hard stops. Violating any gate means the process has broken down.
287
-
288
- - **No execution without a plan.** If no plan document exists, do not start. Use harness-planning to create one.
289
- - **No improvisation.** Execute the plan as written. If the plan says "create file X with code Y," create file X with code Y. Do not add "improvements" or "optimizations" that are not in the plan.
290
- - **No skipping tasks.** Tasks are ordered by dependency. Skipping a task means later tasks may fail. Execute in order.
291
- - **No skipping validation.** `harness validate` runs after every task. No exceptions. A task that passes its tests but fails validation is not complete.
292
- - **No ignoring checkpoints.** If a task has a `[checkpoint:*]` marker, execution must pause. Do not auto-continue past checkpoints.
293
- - **No guessing past blockers.** If a task cannot be completed as written, stop. Report the blocker. Do not invent a workaround.
294
- - **State must be updated.** After every task, `.harness/state.json` must reflect the new position. Skipping state updates makes resume impossible.
295
-
296
- ## Escalation
297
-
298
- - **When a task fails and the fix is outside task scope:** Report: "Task N failed because [reason]. The fix requires changes to [files/tasks outside scope]. The plan needs to be updated at Tasks [X, Y] before I can continue."
299
- - **When the plan references files that do not exist:** The plan is out of date or was written against a different branch. Report: "Task N references [file] which does not exist. Plan may need regeneration."
300
- - **When tests pass but behavior seems wrong:** Do not ignore your instinct, but also do not act on it unilaterally. Report: "Task N passes all tests, but I notice [observation]. Should I investigate before proceeding?"
301
- - **When state is corrupted or inconsistent:** If `.harness/state.json` says Task 5 is complete but the code for Task 5 does not exist, the state is wrong. Report the inconsistency. Do not trust corrupted state — re-verify from Task 1 if needed.
302
- - **When the human wants to skip ahead:** Explain the risk: "Skipping Task N means Tasks [X, Y] that depend on it may fail. If you want to skip, we should update the plan to remove the dependency." Get explicit approval before skipping.
303
-
304
- ## Trace Output (Optional)
305
-
306
- When `.harness/gate.json` has `"trace": true` or `--verbose` is passed, append one-sentence reasoning at each phase boundary to `.harness/trace.md`.
307
-
308
- **Format:** `**[PHASE HH:MM:SS]** summary`
309
-
310
- Example:
311
-
312
- ```markdown
313
- **[PREPARE 14:32:07]** Loaded plan with 5 tasks, resuming from Task 3 per state.json.
314
- **[EXECUTE 14:32:15]** Task 3 committed; mechanical gate passed on first attempt.
315
- **[VERIFY 14:35:42]** Deep audit requested at milestone; all 3 levels passed.
316
- **[PERSIST 14:35:50]** State updated, handoff.json written with 2 pending tasks.
317
- ```
318
-
319
- This is for human debugging only. Not required for normal execution.
320
-
321
-
322
- --- skill.yaml (agents/skills/claude-code/harness-execution/skill.yaml) ---
323
- name: harness-execution
324
- version: "1.0.0"
325
- description: Execute a planned set of tasks with harness validation and state tracking
326
- cognitive_mode: meticulous-implementer
327
- triggers:
328
- - manual
329
- - on_new_feature
330
- - on_bug_fix
331
- platforms:
332
- - claude-code
333
- - gemini-cli
334
- tools:
335
- - Bash
336
- - Read
337
- - Write
338
- - Edit
339
- - Glob
340
- - Grep
341
- cli:
342
- command: harness skill run harness-execution
343
- args:
344
- - name: path
345
- description: Project root path
346
- required: false
347
- mcp:
348
- tool: run_skill
349
- input:
350
- skill: harness-execution
351
- path: string
352
- type: rigid
353
- phases:
354
- - name: prepare
355
- description: Load state and verify prerequisites
356
- required: true
357
- - name: execute
358
- description: Implement planned tasks
359
- required: true
360
- - name: verify
361
- description: Validate each completed task
362
- required: true
363
- - name: persist
364
- description: Save progress to state files
365
- required: true
366
- state:
367
- persistent: true
368
- files:
369
- - .harness/state.json
370
- - .harness/learnings.md
371
- depends_on:
372
- - harness-verification
373
-
374
- </execution_context>
375
-
376
- <process>
377
- 1. Try: invoke mcp__harness__run_skill with skill: "harness-execution"
378
- 2. If MCP unavailable: follow the SKILL.md workflow provided above directly
379
- 3. Pass through any arguments provided by the user
380
- </process>
381
- """