@harness-engineering/cli 1.8.0 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/dist/agents/skills/claude-code/harness-autopilot/SKILL.md +2 -2
  2. package/dist/agents/skills/claude-code/harness-brainstorming/SKILL.md +5 -7
  3. package/dist/agents/skills/claude-code/harness-code-review/SKILL.md +2 -2
  4. package/dist/agents/skills/claude-code/harness-planning/SKILL.md +2 -2
  5. package/dist/agents/skills/claude-code/harness-roadmap/SKILL.md +4 -5
  6. package/dist/agents/skills/gemini-cli/add-harness-component/SKILL.md +192 -0
  7. package/dist/agents/skills/gemini-cli/add-harness-component/skill.yaml +32 -0
  8. package/dist/agents/skills/gemini-cli/align-documentation/SKILL.md +213 -0
  9. package/dist/agents/skills/gemini-cli/align-documentation/skill.yaml +31 -0
  10. package/dist/agents/skills/gemini-cli/check-mechanical-constraints/SKILL.md +191 -0
  11. package/dist/agents/skills/gemini-cli/check-mechanical-constraints/skill.yaml +32 -0
  12. package/dist/agents/skills/gemini-cli/cleanup-dead-code/SKILL.md +245 -0
  13. package/dist/agents/skills/gemini-cli/cleanup-dead-code/skill.yaml +33 -0
  14. package/dist/agents/skills/gemini-cli/detect-doc-drift/SKILL.md +179 -0
  15. package/dist/agents/skills/gemini-cli/detect-doc-drift/skill.yaml +30 -0
  16. package/dist/agents/skills/gemini-cli/enforce-architecture/SKILL.md +240 -0
  17. package/dist/agents/skills/gemini-cli/enforce-architecture/skill.yaml +34 -0
  18. package/dist/agents/skills/gemini-cli/harness-architecture-advisor/SKILL.md +397 -0
  19. package/dist/agents/skills/gemini-cli/harness-architecture-advisor/skill.yaml +48 -0
  20. package/dist/agents/skills/gemini-cli/harness-autopilot/SKILL.md +2 -2
  21. package/dist/agents/skills/gemini-cli/harness-brainstorming/SKILL.md +317 -0
  22. package/dist/agents/skills/gemini-cli/harness-brainstorming/skill.yaml +49 -0
  23. package/dist/agents/skills/gemini-cli/harness-code-review/SKILL.md +681 -0
  24. package/dist/agents/skills/gemini-cli/harness-code-review/skill.yaml +45 -0
  25. package/dist/agents/skills/gemini-cli/harness-debugging/SKILL.md +366 -0
  26. package/dist/agents/skills/gemini-cli/harness-debugging/skill.yaml +47 -0
  27. package/dist/agents/skills/gemini-cli/harness-diagnostics/SKILL.md +318 -0
  28. package/dist/agents/skills/gemini-cli/harness-diagnostics/skill.yaml +50 -0
  29. package/dist/agents/skills/gemini-cli/harness-execution/SKILL.md +382 -0
  30. package/dist/agents/skills/gemini-cli/harness-execution/skill.yaml +51 -0
  31. package/dist/agents/skills/gemini-cli/harness-git-workflow/SKILL.md +268 -0
  32. package/dist/agents/skills/gemini-cli/harness-git-workflow/skill.yaml +31 -0
  33. package/dist/agents/skills/gemini-cli/harness-integrity/SKILL.md +167 -0
  34. package/dist/agents/skills/gemini-cli/harness-integrity/skill.yaml +47 -0
  35. package/dist/agents/skills/gemini-cli/harness-onboarding/SKILL.md +288 -0
  36. package/dist/agents/skills/gemini-cli/harness-onboarding/skill.yaml +30 -0
  37. package/dist/agents/skills/gemini-cli/harness-parallel-agents/SKILL.md +171 -0
  38. package/dist/agents/skills/gemini-cli/harness-parallel-agents/skill.yaml +33 -0
  39. package/dist/agents/skills/gemini-cli/harness-planning/SKILL.md +389 -0
  40. package/dist/agents/skills/gemini-cli/harness-planning/skill.yaml +49 -0
  41. package/dist/agents/skills/gemini-cli/harness-pre-commit-review/SKILL.md +262 -0
  42. package/dist/agents/skills/gemini-cli/harness-pre-commit-review/skill.yaml +33 -0
  43. package/dist/agents/skills/gemini-cli/harness-refactoring/SKILL.md +169 -0
  44. package/dist/agents/skills/gemini-cli/harness-refactoring/skill.yaml +33 -0
  45. package/dist/agents/skills/gemini-cli/harness-roadmap/SKILL.md +4 -5
  46. package/dist/agents/skills/gemini-cli/harness-skill-authoring/SKILL.md +292 -0
  47. package/dist/agents/skills/gemini-cli/harness-skill-authoring/skill.yaml +32 -0
  48. package/dist/agents/skills/gemini-cli/harness-state-management/SKILL.md +309 -0
  49. package/dist/agents/skills/gemini-cli/harness-state-management/skill.yaml +32 -0
  50. package/dist/agents/skills/gemini-cli/harness-tdd/SKILL.md +177 -0
  51. package/dist/agents/skills/gemini-cli/harness-tdd/skill.yaml +48 -0
  52. package/dist/agents/skills/gemini-cli/harness-verification/SKILL.md +328 -0
  53. package/dist/agents/skills/gemini-cli/harness-verification/skill.yaml +42 -0
  54. package/dist/agents/skills/gemini-cli/harness-verify/SKILL.md +159 -0
  55. package/dist/agents/skills/gemini-cli/harness-verify/skill.yaml +40 -0
  56. package/dist/agents/skills/gemini-cli/initialize-harness-project/SKILL.md +224 -0
  57. package/dist/agents/skills/gemini-cli/initialize-harness-project/skill.yaml +31 -0
  58. package/dist/agents/skills/gemini-cli/validate-context-engineering/SKILL.md +150 -0
  59. package/dist/agents/skills/gemini-cli/validate-context-engineering/skill.yaml +31 -0
  60. package/dist/bin/harness.js +3 -3
  61. package/dist/{chunk-SJECMKSS.js → chunk-E2RTDBMG.js} +25 -13
  62. package/dist/{chunk-LNI4T7R6.js → chunk-KJANDVVC.js} +20 -18
  63. package/dist/{chunk-3JWCBVUZ.js → chunk-RT2LYQHF.js} +1 -1
  64. package/dist/{dist-NT3GXHQZ.js → dist-CCM3L3UE.js} +1 -1
  65. package/dist/{dist-BDO5GFEM.js → dist-K6KTTN3I.js} +3 -3
  66. package/dist/index.js +3 -3
  67. package/dist/validate-cross-check-ZGKFQY57.js +7 -0
  68. package/package.json +6 -6
  69. package/dist/agents/skills/node_modules/.bin/glob +0 -17
  70. package/dist/agents/skills/node_modules/.bin/vitest +0 -17
  71. package/dist/agents/skills/node_modules/.bin/yaml +0 -17
  72. package/dist/templates/advanced/docs/specs/.gitkeep +0 -0
  73. package/dist/templates/intermediate/docs/specs/.gitkeep +0 -0
  74. package/dist/validate-cross-check-2OPGCGGU.js +0 -7
@@ -0,0 +1,309 @@
1
+ # Harness State Management
2
+
3
+ > Manage persistent state across agent sessions so that context, decisions, progress, and learnings survive context resets. Load state at session start, track position and decisions throughout, and save state for the next session.
4
+
5
+ ## When to Use
6
+
7
+ - At the start of every session that continues previous work (load state)
8
+ - When completing a task, phase, or milestone (update progress)
9
+ - When making a decision that future sessions need to know about (record decision)
10
+ - When discovering something non-obvious that would be lost on context reset (capture learning)
11
+ - When hitting a blocker that cannot be resolved in the current session (log blocker)
12
+ - At the end of every session (save state)
13
+ - NOT for storing code — code belongs in git commits, not state files
14
+ - NOT for storing large outputs or logs — state should be concise and navigable
15
+ - NOT as a replacement for a plan document — plans live in `docs/`, state tracks progress through plans
16
+
17
+ ## Process
18
+
19
+ ### Phase 1: LOAD — Restore Context from Previous Sessions
20
+
21
+ 0. **Resolve the stream.** State is organized into streams — isolated directories under `.harness/streams/<name>/`. Before loading any state files:
22
+ - If you know which work item you're resuming, pass `--stream <name>` or use `manage_state` with `stream: "<name>"`.
23
+ - Otherwise, the system auto-resolves from the current git branch (e.g., `feature/auth-rework` → `auth-rework` stream) or falls back to the active stream.
24
+ - If resolution fails, ask the user: "Which stream should I use?" and list known streams via `harness state streams list` or the `list_streams` MCP tool.
25
+ - When starting new work on a new branch, create a new stream: `harness state streams create <name> --branch <branch>`.
26
+ - Announce which stream was resolved so the human has visibility.
27
+
28
+ 1. **Read `.harness/state.json`.** This is the primary state file. It contains:
29
+ - Current position (phase, task, step)
30
+ - Progress map (which tasks are complete, in progress, or blocked)
31
+ - Decisions made in previous sessions (date, what, why)
32
+ - Blockers encountered and their status
33
+ - Last session summary
34
+
35
+ 2. **Run `harness state show`** to get a formatted view of current state. This is equivalent to reading the JSON but formatted for readability.
36
+
37
+ 3. **Read `.harness/learnings.md`.** This is the append-only knowledge base. Scan for:
38
+ - Recent learnings (last 2-3 sessions) — these are most likely still relevant
39
+ - Gotchas and warnings — these prevent repeating mistakes
40
+ - Decisions with rationale — these explain why things are the way they are
41
+
42
+ 4. **Read `.harness/failures.md` if exists.** Scan for active anti-patterns and dead ends.
43
+
44
+ 5. **Read `.harness/handoff.json` if exists.** Structured context from last skill.
45
+
46
+ 6. **Check `.harness/archive/` for historical failure logs.**
47
+
48
+ 7. **If no state exists,** this is a fresh start. Create `.harness/state.json` with initial structure:
49
+
50
+ ```json
51
+ {
52
+ "schemaVersion": 1,
53
+ "position": { "phase": "start", "task": null },
54
+ "progress": {},
55
+ "decisions": [],
56
+ "blockers": [],
57
+ "lastSession": { "date": null, "summary": null }
58
+ }
59
+ ```
60
+
61
+ 8. **Announce the loaded context.** Briefly summarize: "Resuming from [position]. [N] tasks complete. [N] blockers. Key learnings: [summary]." This confirms the state was loaded and gives the human visibility.
62
+
63
+ ### Phase 2: TRACK — Maintain State During the Session
64
+
65
+ 1. **Update position when moving between phases or tasks.** Every time work shifts to a new task or phase, update `position` in state:
66
+
67
+ ```json
68
+ "position": { "phase": "execute", "task": "Task 3", "step": "writing tests" }
69
+ ```
70
+
71
+ 2. **Record decisions when they are made.** Decisions are choices that affect future work. Record them immediately — do not wait until the end of the session:
72
+
73
+ ```json
74
+ "decisions": [
75
+ {
76
+ "date": "2026-03-14",
77
+ "what": "Use WebSocket instead of SSE for real-time notifications",
78
+ "why": "SSE does not support bidirectional communication, which Task 5 requires"
79
+ }
80
+ ]
81
+ ```
82
+
83
+ 3. **Log blockers when encountered.** A blocker is anything that prevents the current task from completing:
84
+
85
+ ```json
86
+ "blockers": [
87
+ {
88
+ "date": "2026-03-14",
89
+ "task": "Task 4",
90
+ "description": "Payment gateway API returns 403 — API key may be expired",
91
+ "status": "open"
92
+ }
93
+ ]
94
+ ```
95
+
96
+ 4. **Update progress after each completed task:**
97
+
98
+ ```json
99
+ "progress": {
100
+ "Task 1": "complete",
101
+ "Task 2": "complete",
102
+ "Task 3": "in_progress",
103
+ "Task 4": "blocked"
104
+ }
105
+ ```
106
+
107
+ 5. **Keep state concise.** State is not a log. Each field should contain the current status, not a history of all changes. History belongs in `.harness/learnings.md` and git commits.
108
+
109
+ ### Phase 3: LEARN — Capture Knowledge for Future Sessions
110
+
111
+ 1. **Identify learnings as they happen.** A learning is anything that:
112
+ - Was surprising or non-obvious
113
+ - Took significant effort to figure out
114
+ - Would cause repeated wasted time if forgotten
115
+ - Represents a decision that needs rationale preserved
116
+
117
+ 2. **Capture learnings with `harness state learn`:**
118
+
119
+ ```bash
120
+ harness state learn "Date comparison needs UTC normalization — use Date.now() not new Date()"
121
+ ```
122
+
123
+ This appends to `.harness/learnings.md` with a timestamp.
124
+
125
+ 3. **Or append directly to `.harness/learnings.md`** with structured format:
126
+
127
+ ```markdown
128
+ ## 2026-03-14 — Task 3: Notification Expiry
129
+
130
+ - [learning]: PostgreSQL's `now()` returns timestamp with timezone, but our
131
+ application uses UTC epoch milliseconds. Always convert before comparing.
132
+ - [gotcha]: The notifications table has a unique constraint on (userId, type).
133
+ Use upsert (ON CONFLICT DO UPDATE) instead of plain INSERT.
134
+ - [decision]: Chose to store expiry as epoch milliseconds rather than
135
+ ISO timestamp for consistency with the rest of the codebase.
136
+ ```
137
+
138
+ 4. **Learnings are append-only.** Never edit or delete previous learnings. They are a chronological record. Even if a learning turns out to be wrong, append a correction rather than modifying the original.
139
+
140
+ 5. **What belongs in learnings vs. git commits:**
141
+ - **Learnings:** Context, rationale, gotchas, decisions, warnings — things that explain _why_ and _what to watch out for_
142
+ - **Git commits:** Code changes, what was done — things that explain _what_ changed
143
+ - Example: The commit says "feat: add UTC normalization to date comparison." The learning says "Date comparison needs UTC normalization because PostgreSQL returns timezone-aware timestamps but our app uses epoch milliseconds."
144
+
145
+ ### Phase 4: SAVE — Persist State for Next Session
146
+
147
+ 1. **Update `.harness/state.json`** with final position, progress, and session summary:
148
+
149
+ ```json
150
+ {
151
+ "schemaVersion": 1,
152
+ "position": { "phase": "execute", "task": "Task 4" },
153
+ "progress": {
154
+ "Task 1": "complete",
155
+ "Task 2": "complete",
156
+ "Task 3": "complete"
157
+ },
158
+ "decisions": [ ... ],
159
+ "blockers": [ ... ],
160
+ "lastSession": {
161
+ "date": "2026-03-14",
162
+ "summary": "Completed Tasks 2-3. Task 3 required UTC date normalization (see learnings). Starting Task 4 next session."
163
+ }
164
+ }
165
+ ```
166
+
167
+ 2. **Verify learnings were captured.** Review `.harness/learnings.md` — were all non-obvious discoveries recorded? If something was tricky during the session, it should be in learnings.
168
+
169
+ 3. **State is saved to the active stream.** All writes (state, learnings, handoff, failures) go to the resolved stream's directory (e.g., `.harness/streams/auth-rework/state.json`). Switching to a different stream in the next session does not affect the current stream's files.
170
+
171
+ 4. **Decide whether to commit state files.** State files (`.harness/streams/*/state.json`, `.harness/streams/*/learnings.md`) should be committed to git so other team members and agents can access them. Commit state updates separately from code changes so they do not clutter code diffs.
172
+
173
+ ### Building Institutional Knowledge Over Time
174
+
175
+ The `.harness/learnings.md` file grows over the lifetime of the project. It becomes a valuable resource:
176
+
177
+ - **Week 1:** A few gotchas about the development environment and initial setup decisions.
178
+ - **Month 1:** Patterns emerge — recurring issues, architectural decisions with rationale, team conventions that were established through experience.
179
+ - **Month 6:** New team members read learnings and avoid months of rediscovery. The file captures knowledge that no single person holds.
180
+ - **Year 1:** Learnings are the project's institutional memory. They explain why the architecture looks the way it does, why certain patterns were adopted, and what was tried and abandoned.
181
+
182
+ Treat learnings as a first-class project artifact. They are as valuable as tests and documentation.
183
+
184
+ ### Archival Workflow
185
+
186
+ - **Archive failures:** Move `failures.md` to `.harness/archive/` at milestone boundaries.
187
+ - **Do NOT archive learnings** — permanent. Learnings accumulate for the lifetime of the project.
188
+ - **Do NOT archive state** — git handles history. The current `state.json` is always the source of truth.
189
+ - **Handoff is ephemeral** — overwritten by each skill. No archival needed.
190
+
191
+ ## Harness Integration
192
+
193
+ - **`harness state show [--stream <name>]`** — Display current state in a formatted, readable view. Use at session start to quickly orient.
194
+ - **`harness state reset [--stream <name>]`** — Reset state to initial values. Use when starting a completely new effort and old state is no longer relevant. Use with caution — this discards progress tracking.
195
+ - **`harness state learn "<message>" [--stream <name>]`** — Append a learning with automatic timestamp formatting.
196
+ - **`harness state streams list`** — List all known streams with branch associations and active status.
197
+ - **`harness state streams create <name> [--branch <branch>]`** — Create a new stream, optionally associated with a git branch.
198
+ - **`harness state streams archive <name>`** — Archive a completed stream.
199
+ - **`harness state streams activate <name>`** — Set the active stream for the project.
200
+ - **`.harness/streams/<name>/state.json`** — Primary state file per stream. Read at session start, updated throughout, saved at session end.
201
+ - **`.harness/streams/<name>/learnings.md`** — Append-only knowledge base per stream.
202
+ - **`.harness/streams/<name>/failures.md`** — Active anti-patterns per stream.
203
+ - **`.harness/streams/<name>/handoff.json`** — Structured context from last skill per stream.
204
+ - **`.harness/streams/index.json`** — Stream index tracking known streams, branch associations, and active stream.
205
+ - **`.harness/trace.md`** — Optional reasoning trace. Useful for debugging agent behavior across sessions.
206
+ - **`.harness/archive/`** — Archived failure logs. Check for historical context when encountering recurring issues.
207
+
208
+ ## Success Criteria
209
+
210
+ - State is loaded at the start of every session that continues previous work
211
+ - Position is updated whenever the current phase or task changes
212
+ - Decisions are recorded with date, what, and why — immediately when made, not deferred
213
+ - Blockers are logged with task reference, description, and status
214
+ - Progress is updated after each completed task
215
+ - Learnings are captured for every non-obvious discovery during the session
216
+ - `.harness/learnings.md` entries follow the structured format (date, task, tagged items)
217
+ - Learnings are append-only — no edits or deletions of previous entries
218
+ - State is saved before session end with an accurate session summary
219
+ - State files are committed to git separately from code changes
220
+
221
+ ## Examples
222
+
223
+ ### Example: Starting a New Session (Resuming Work)
224
+
225
+ **LOAD:**
226
+
227
+ ```
228
+ Run: harness state show
229
+ Output:
230
+ Position: execute / Task 3 (writing tests)
231
+ Progress: Task 1 complete, Task 2 complete
232
+ Blockers: none
233
+ Last session: 2026-03-13 — "Completed Tasks 1-2. Task 2 required
234
+ adding a new index on notifications.userId for query performance."
235
+
236
+ Read: .harness/learnings.md
237
+ Most recent:
238
+ - [gotcha]: notifications table needs index on userId — queries
239
+ were timing out without it
240
+ - [decision]: used partial index (WHERE deleted_at IS NULL) to
241
+ avoid indexing soft-deleted rows
242
+
243
+ Summary: "Resuming from Task 3 (writing tests). Tasks 1-2 complete.
244
+ Note: notifications table has a partial index on userId — see learnings."
245
+ ```
246
+
247
+ ### Example: Recording a Decision Mid-Session
248
+
249
+ ```
250
+ Context: Implementing Task 4, need to choose between polling and WebSocket.
251
+
252
+ Record decision:
253
+ date: "2026-03-14"
254
+ what: "Use WebSocket for real-time notification delivery"
255
+ why: "Polling would require 1-second intervals for acceptable latency,
256
+ which creates too much load. WebSocket gives instant delivery with
257
+ one persistent connection per client."
258
+
259
+ Capture learning:
260
+ harness state learn "WebSocket chosen over polling for notifications.
261
+ Polling at 1s intervals = ~86k requests/day per client. WebSocket =
262
+ 1 persistent connection. See Task 4 decision in state."
263
+ ```
264
+
265
+ ### Example: Ending a Session
266
+
267
+ **SAVE:**
268
+
269
+ ```
270
+ Update .harness/state.json:
271
+ {
272
+ "schemaVersion": 1,
273
+ "position": { "phase": "execute", "task": "Task 5" },
274
+ "progress": {
275
+ "Task 1": "complete",
276
+ "Task 2": "complete",
277
+ "Task 3": "complete",
278
+ "Task 4": "complete"
279
+ },
280
+ "decisions": [
281
+ {
282
+ "date": "2026-03-14",
283
+ "what": "Use WebSocket for real-time notification delivery",
284
+ "why": "Polling creates too much load at acceptable latency intervals"
285
+ }
286
+ ],
287
+ "blockers": [],
288
+ "lastSession": {
289
+ "date": "2026-03-14",
290
+ "summary": "Completed Tasks 3-4. Task 3 added expiry logic with UTC normalization. Task 4 implemented WebSocket delivery (chose over polling — see decisions). Starting Task 5 (UI integration) next session."
291
+ }
292
+ }
293
+
294
+ Verify: .harness/learnings.md has entries for UTC normalization and WebSocket decision.
295
+ Commit: git add .harness/ && git commit -m "chore: update harness state after Tasks 3-4"
296
+ ```
297
+
298
+ ### Example: What Belongs Where
299
+
300
+ | Information | Where It Goes | Why |
301
+ | ----------------------------------------------------- | ------------------------------- | ---------------------------------------------------- |
302
+ | "Added WebSocket handler in src/ws/" | Git commit message | Describes what changed in code |
303
+ | "Chose WebSocket over polling because..." | `.harness/state.json` decisions | Records the choice and rationale for future sessions |
304
+ | "WebSocket requires sticky sessions in load balancer" | `.harness/learnings.md` | Non-obvious operational concern future sessions need |
305
+ | "Task 4 complete" | `.harness/state.json` progress | Tracks execution position |
306
+ | "The WebSocket library auto-reconnects by default" | `.harness/learnings.md` | Gotcha that saves future debugging time |
307
+ | "Tried approach X, failed because Y" | `.harness/failures.md` | Active anti-pattern to avoid repeating |
308
+ | "Completed Tasks 1-3, Task 4 pending" | `.harness/handoff.json` | Structured context for next skill |
309
+ | "[PREPARE 10:30] Loaded 3 failures" | `.harness/trace.md` | Reasoning trace for debugging agent behavior |
@@ -0,0 +1,32 @@
1
+ name: harness-state-management
2
+ version: "1.0.0"
3
+ description: Manage persistent session state across harness agent sessions
4
+ cognitive_mode: meticulous-implementer
5
+ triggers:
6
+ - manual
7
+ platforms:
8
+ - claude-code
9
+ - gemini-cli
10
+ tools:
11
+ - Bash
12
+ - Read
13
+ - Write
14
+ - Edit
15
+ - Glob
16
+ cli:
17
+ command: harness skill run harness-state-management
18
+ args:
19
+ - name: path
20
+ description: Project root path
21
+ required: false
22
+ mcp:
23
+ tool: run_skill
24
+ input:
25
+ skill: harness-state-management
26
+ path: string
27
+ type: flexible
28
+ state:
29
+ persistent: true
30
+ files:
31
+ - .harness/state.json
32
+ depends_on: []
@@ -0,0 +1,177 @@
1
+ # Harness TDD
2
+
3
+ > Red-green-refactor cycle integrated with harness validation. No production code exists without a failing test first.
4
+
5
+ ## When to Use
6
+
7
+ - Implementing any new feature, function, module, or component
8
+ - Fixing any bug (write a test that reproduces the bug first)
9
+ - Adding behavior to existing code
10
+ - When `on_new_feature` or `on_bug_fix` triggers fire
11
+ - NOT when doing pure refactoring with existing test coverage (use harness-refactoring instead)
12
+ - NOT when writing documentation, configuration, or non-behavioral files
13
+ - NOT when spiking/prototyping (but convert spikes to TDD before merging)
14
+
15
+ ## Process
16
+
17
+ ### Iron Law
18
+
19
+ **No production code may exist without a failing test that demanded its creation.**
20
+
21
+ If you find yourself writing production code first, STOP. Delete it. Write the test first. This is not a guideline — it is a hard constraint.
22
+
23
+ ### Phase 1: RED — Write a Failing Test
24
+
25
+ 1. **Identify the smallest behavior to test.** One assertion per test. One behavior per cycle. If you are testing two things, split into two cycles.
26
+
27
+ 2. **Write the test file or add to the appropriate test file.** Follow the project's existing test conventions (file naming, framework, location).
28
+
29
+ 3. **Write ONE minimal test** that asserts the expected behavior. The test should:
30
+ - Have a clear, descriptive name that states what behavior is expected
31
+ - Set up only the minimal fixtures needed
32
+ - Make a single assertion about the expected outcome
33
+ - NOT test implementation details — test observable behavior
34
+
35
+ 4. **Run the test suite.** Use the project's test runner (e.g., `npx vitest run path/to/test`, `npm test`, `pytest`).
36
+
37
+ 5. **MANDATORY: Watch the test FAIL.** Read the failure message. Confirm it fails for the RIGHT reason — the behavior is not yet implemented, not because the test is broken. If the test passes, either the behavior already exists (skip this cycle) or the test is wrong (fix the test).
38
+
39
+ 6. **Record the failure.** Note the test name and failure reason. This is your contract for the GREEN phase.
40
+
41
+ ### Phase 2: GREEN — Write the Simplest Code to Pass
42
+
43
+ 1. **Write the MINIMUM production code** that makes the failing test pass. Do not write code for future tests. Do not add error handling you have not tested. Do not generalize.
44
+
45
+ 2. **Resist the urge to write "good" code.** The GREEN phase is about correctness, not elegance. Hardcoded values are acceptable if they pass the test. Duplication is acceptable. You will clean up in REFACTOR.
46
+
47
+ 3. **Run the FULL test suite** (not just the new test). All tests must pass.
48
+
49
+ 4. **MANDATORY: Watch the test PASS.** Read the output. Confirm all tests are green. If any test fails, fix the production code (not the tests) until all pass.
50
+
51
+ 5. **Do not proceed to REFACTOR if any test is red.** Fix first.
52
+
53
+ ### Phase 3: REFACTOR — Clean Up While Green
54
+
55
+ 1. **With all tests passing,** look for opportunities to improve:
56
+ - Remove duplication (DRY)
57
+ - Extract methods or functions for clarity
58
+ - Rename for better readability
59
+ - Simplify conditionals
60
+ - Improve structure without changing behavior
61
+
62
+ 2. **Run the full test suite after EVERY change.** If a test breaks during refactoring, undo the last change immediately. Refactoring must not change behavior.
63
+
64
+ 3. **Keep refactoring steps small.** One rename, one extraction, one simplification at a time. Run tests between each.
65
+
66
+ 4. **If no refactoring is needed, skip this phase.** Not every cycle requires cleanup.
67
+
68
+ ### Phase 4: VALIDATE — Run Harness Checks
69
+
70
+ 1. **Run `harness check-deps`** to verify dependency boundaries are respected. New code must not introduce forbidden imports or layer violations.
71
+
72
+ 2. **Run `harness validate`** to verify the full project health. This catches architectural drift, documentation gaps, and constraint violations.
73
+
74
+ 3. **If either check fails,** fix the issue before committing. The fix may require another RED-GREEN-REFACTOR cycle if it involves behavioral changes.
75
+
76
+ 4. **Commit the cycle.** Each RED-GREEN-REFACTOR-VALIDATE cycle produces one atomic commit. The commit message references what behavior was added (not "add test" — describe the behavior).
77
+
78
+ ### Graph Refresh
79
+
80
+ If a knowledge graph exists at `.harness/graph/`, refresh it after code changes to keep graph queries accurate:
81
+
82
+ ```
83
+ harness scan [path]
84
+ ```
85
+
86
+ Skipping this step means subsequent graph queries (impact analysis, dependency health, test advisor) may return stale results.
87
+
88
+ ### Cycle Rhythm
89
+
90
+ Repeat the 4 phases for each new behavior. A typical feature requires 3-10 cycles. Each cycle should take 2-15 minutes. If a cycle takes longer than 15 minutes, the step is too large — break it down.
91
+
92
+ **Ordering within a feature:**
93
+
94
+ 1. Start with the happy path (simplest success case)
95
+ 2. Add edge cases one at a time
96
+ 3. Add error handling cases
97
+ 4. Add integration points last
98
+
99
+ ## Harness Integration
100
+
101
+ - **`harness check-deps`** — Run in VALIDATE phase after each cycle. Catches forbidden imports and layer boundary violations introduced by new code.
102
+ - **`harness validate`** — Run in VALIDATE phase after each cycle. Full project health check including architecture, documentation, and constraints.
103
+ - **`harness cleanup`** — Run periodically (every 3-5 cycles) to detect entropy accumulation. Address any issues before they compound.
104
+ - **Test runner** — Use the project's configured test runner. Harness does not prescribe a test framework but the test must actually execute and report results.
105
+
106
+ ## Success Criteria
107
+
108
+ - Every production function/method has at least one corresponding test
109
+ - Every test was observed to fail before the production code was written
110
+ - Every test was observed to pass after the production code was written
111
+ - `harness check-deps` passes after each cycle
112
+ - `harness validate` passes after each cycle
113
+ - Each cycle is an atomic commit with a descriptive message
114
+ - No test tests implementation details (only observable behavior)
115
+ - No production code exists that was not demanded by a failing test
116
+
117
+ ## Examples
118
+
119
+ ### Example: Adding a `calculateTotal` function
120
+
121
+ **RED:**
122
+
123
+ ```typescript
124
+ // cart.test.ts
125
+ it('calculates total for items with quantity and price', () => {
126
+ const items = [
127
+ { name: 'Widget', price: 10, quantity: 2 },
128
+ { name: 'Gadget', price: 25, quantity: 1 },
129
+ ];
130
+ expect(calculateTotal(items)).toBe(45);
131
+ });
132
+ ```
133
+
134
+ Run tests. Observe: `ReferenceError: calculateTotal is not defined`. Correct failure — function does not exist yet.
135
+
136
+ **GREEN:**
137
+
138
+ ```typescript
139
+ // cart.ts
140
+ export function calculateTotal(items: Array<{ price: number; quantity: number }>): number {
141
+ return items.reduce((sum, item) => sum + item.price * item.quantity, 0);
142
+ }
143
+ ```
144
+
145
+ Run tests. Observe: all tests pass.
146
+
147
+ **REFACTOR:** No refactoring needed for this simple function. Skip.
148
+
149
+ **VALIDATE:**
150
+
151
+ ```bash
152
+ harness check-deps # Pass
153
+ harness validate # Pass
154
+ git add cart.ts cart.test.ts
155
+ git commit -m "feat(cart): calculate total from item price and quantity"
156
+ ```
157
+
158
+ **Next cycle (RED):** Write a test for empty array input. Watch it fail (or pass — if it passes, the behavior is already handled). Continue.
159
+
160
+ ## Gates
161
+
162
+ These are hard stops. Violating any gate means the process has broken down.
163
+
164
+ - **Code before test = delete it.** If production code is written before a failing test exists, delete the production code and start the cycle correctly.
165
+ - **Must watch fail.** If you did not observe the test fail with the correct failure reason, the RED phase is incomplete. Do not proceed to GREEN.
166
+ - **Must watch pass.** If you did not observe all tests pass after writing production code, the GREEN phase is incomplete. Do not proceed to REFACTOR.
167
+ - **No skipping VALIDATE.** Every cycle must end with `harness check-deps` and `harness validate`. Skipping creates architectural debt that compounds.
168
+ - **No multi-behavior tests.** One test, one assertion, one behavior. Tests that assert multiple unrelated things must be split.
169
+ - **No "I'll write tests later."** There is no later. The test comes first or the code does not get written.
170
+
171
+ ## Escalation
172
+
173
+ - **After 3 failed attempts to make a test pass:** Stop coding. The design may be wrong. Re-examine the interface, the test assumptions, or the architecture. Consider whether the feature needs a different approach. Consult the plan or spec.
174
+ - **When a test cannot be written without complex mocking:** This is a design smell. The code under test has too many dependencies. Refactor the existing code to be more testable before proceeding, or reconsider the abstraction boundary.
175
+ - **When harness checks repeatedly fail:** The new code may be violating architectural constraints intentionally. Escalate to the human to decide whether to update the constraints or change the approach.
176
+ - **When the cycle is taking more than 15 minutes:** The step is too large. Break the current behavior into smaller sub-behaviors and test each one separately.
177
+ - **When you are unsure what to test next:** Review the spec or plan. If no spec exists, use the harness-brainstorming skill to clarify requirements before writing more tests.
@@ -0,0 +1,48 @@
1
+ name: harness-tdd
2
+ version: "1.0.0"
3
+ description: Test-driven development integrated with harness validation
4
+ cognitive_mode: meticulous-implementer
5
+ triggers:
6
+ - manual
7
+ - on_new_feature
8
+ - on_bug_fix
9
+ platforms:
10
+ - claude-code
11
+ - gemini-cli
12
+ tools:
13
+ - Bash
14
+ - Read
15
+ - Write
16
+ - Edit
17
+ - Glob
18
+ - Grep
19
+ cli:
20
+ command: harness skill run harness-tdd
21
+ args:
22
+ - name: path
23
+ description: Project root path
24
+ required: false
25
+ mcp:
26
+ tool: run_skill
27
+ input:
28
+ skill: harness-tdd
29
+ path: string
30
+ type: rigid
31
+ phases:
32
+ - name: red
33
+ description: Write failing test
34
+ required: true
35
+ - name: green
36
+ description: Implement minimal code to pass
37
+ required: true
38
+ - name: refactor
39
+ description: Clean up while keeping tests green
40
+ required: false
41
+ - name: validate
42
+ description: Run harness checks
43
+ required: true
44
+ state:
45
+ persistent: false
46
+ files: []
47
+ depends_on:
48
+ - harness-verification