@harness-engineering/cli 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/dist/bin/harness.js +1 -1
  2. package/dist/{chunk-IXT3KLVN.js → chunk-APYEWOCR.js} +355 -19
  3. package/dist/index.js +1 -1
  4. package/package.json +6 -4
  5. package/dist/agents/commands/claude-code/harness/add-component.md +0 -34
  6. package/dist/agents/commands/claude-code/harness/align-documentation.md +0 -33
  7. package/dist/agents/commands/claude-code/harness/architecture-advisor.md +0 -41
  8. package/dist/agents/commands/claude-code/harness/brainstorming.md +0 -42
  9. package/dist/agents/commands/claude-code/harness/check-mechanical-constraints.md +0 -32
  10. package/dist/agents/commands/claude-code/harness/cleanup-dead-code.md +0 -33
  11. package/dist/agents/commands/claude-code/harness/code-review.md +0 -33
  12. package/dist/agents/commands/claude-code/harness/debugging.md +0 -43
  13. package/dist/agents/commands/claude-code/harness/detect-doc-drift.md +0 -32
  14. package/dist/agents/commands/claude-code/harness/diagnostics.md +0 -43
  15. package/dist/agents/commands/claude-code/harness/enforce-architecture.md +0 -32
  16. package/dist/agents/commands/claude-code/harness/execution.md +0 -43
  17. package/dist/agents/commands/claude-code/harness/git-workflow.md +0 -32
  18. package/dist/agents/commands/claude-code/harness/initialize-project.md +0 -33
  19. package/dist/agents/commands/claude-code/harness/onboarding.md +0 -32
  20. package/dist/agents/commands/claude-code/harness/parallel-agents.md +0 -35
  21. package/dist/agents/commands/claude-code/harness/planning.md +0 -41
  22. package/dist/agents/commands/claude-code/harness/pre-commit-review.md +0 -38
  23. package/dist/agents/commands/claude-code/harness/refactoring.md +0 -35
  24. package/dist/agents/commands/claude-code/harness/skill-authoring.md +0 -35
  25. package/dist/agents/commands/claude-code/harness/state-management.md +0 -35
  26. package/dist/agents/commands/claude-code/harness/tdd.md +0 -42
  27. package/dist/agents/commands/claude-code/harness/validate-context-engineering.md +0 -32
  28. package/dist/agents/commands/claude-code/harness/verification.md +0 -38
  29. package/dist/agents/commands/gemini-cli/harness/add-component.toml +0 -240
  30. package/dist/agents/commands/gemini-cli/harness/align-documentation.toml +0 -238
  31. package/dist/agents/commands/gemini-cli/harness/architecture-advisor.toml +0 -469
  32. package/dist/agents/commands/gemini-cli/harness/brainstorming.toml +0 -326
  33. package/dist/agents/commands/gemini-cli/harness/check-mechanical-constraints.toml +0 -249
  34. package/dist/agents/commands/gemini-cli/harness/cleanup-dead-code.toml +0 -258
  35. package/dist/agents/commands/gemini-cli/harness/code-review.toml +0 -461
  36. package/dist/agents/commands/gemini-cli/harness/debugging.toml +0 -436
  37. package/dist/agents/commands/gemini-cli/harness/detect-doc-drift.toml +0 -215
  38. package/dist/agents/commands/gemini-cli/harness/diagnostics.toml +0 -401
  39. package/dist/agents/commands/gemini-cli/harness/enforce-architecture.toml +0 -222
  40. package/dist/agents/commands/gemini-cli/harness/execution.toml +0 -381
  41. package/dist/agents/commands/gemini-cli/harness/git-workflow.toml +0 -325
  42. package/dist/agents/commands/gemini-cli/harness/initialize-project.toml +0 -257
  43. package/dist/agents/commands/gemini-cli/harness/onboarding.toml +0 -316
  44. package/dist/agents/commands/gemini-cli/harness/parallel-agents.toml +0 -221
  45. package/dist/agents/commands/gemini-cli/harness/planning.toml +0 -405
  46. package/dist/agents/commands/gemini-cli/harness/pre-commit-review.toml +0 -294
  47. package/dist/agents/commands/gemini-cli/harness/refactoring.toml +0 -209
  48. package/dist/agents/commands/gemini-cli/harness/skill-authoring.toml +0 -350
  49. package/dist/agents/commands/gemini-cli/harness/state-management.toml +0 -354
  50. package/dist/agents/commands/gemini-cli/harness/tdd.toml +0 -247
  51. package/dist/agents/commands/gemini-cli/harness/validate-context-engineering.toml +0 -186
  52. package/dist/agents/commands/gemini-cli/harness/verification.toml +0 -334
@@ -1,436 +0,0 @@
1
- # Generated by harness generate-slash-commands. Do not edit.
2
- description = "Systematic debugging with harness validation and state tracking"
3
- prompt = """
4
- <context>
5
- Cognitive mode: diagnostic-investigator
6
- Type: rigid
7
- State: persistent (files: .harness/debug/)
8
- </context>
9
-
10
- <objective>
11
- Systematic debugging with harness validation and state tracking
12
-
13
- Phases:
14
- - investigate: Entropy analysis and root cause search
15
- - analyze: Pattern matching against codebase
16
- - hypothesize: Form and test single hypothesis (optional)
17
- - fix: TDD-style regression test and fix
18
- </objective>
19
-
20
- <execution_context>
21
- --- SKILL.md (agents/skills/claude-code/harness-debugging/SKILL.md) ---
22
- # Harness Debugging
23
-
24
- > 4-phase systematic debugging with entropy analysis and persistent sessions. Phase 1 before ANY fix. "It's probably X" is not a diagnosis.
25
-
26
- ## When to Use
27
-
28
- - When a test fails and the cause is not immediately obvious
29
- - When a feature works in one context but fails in another
30
- - When an error message does not clearly indicate the root cause
31
- - When `on_bug_fix` triggers fire
32
- - When a previous fix attempt did not resolve the issue
33
- - NOT for known issues with documented solutions (apply the solution directly)
34
- - NOT for typos, syntax errors, or other obvious fixes (just fix them)
35
- - NOT for feature development (use harness-tdd instead)
36
-
37
- ## Process
38
-
39
- ### Prerequisite: Start a Debug Session
40
-
41
- Before beginning, create a persistent debug session. This survives context resets and tracks state across multiple attempts.
42
-
43
- ```
44
- .harness/debug/active/<session-id>.md
45
- ```
46
-
47
- Session file format:
48
-
49
- ```markdown
50
- # Debug Session: <brief-description>
51
-
52
- Status: gathering
53
- Started: <timestamp>
54
- Error: <the error message or symptom>
55
-
56
- ## Investigation Log
57
-
58
- (append entries as you go)
59
-
60
- ## Hypotheses
61
-
62
- (track what you have tried)
63
-
64
- ## Resolution
65
-
66
- (filled in when resolved)
67
- ```
68
-
69
- **Status transitions:** `gathering` -> `investigating` -> `fixing` -> `verifying` -> `resolved`
70
-
71
- ---
72
-
73
- ### Phase 1: INVESTIGATE — Understand Before Acting
74
-
75
- **You must complete Phase 1 before writing ANY fix code. No exceptions.**
76
-
77
- #### Step 1: Run Entropy Analysis
78
-
79
- ```bash
80
- harness cleanup
81
- ```
82
-
83
- Review the output. Entropy analysis reveals:
84
-
85
- - Dead code and unused imports near the failure
86
- - Pattern violations that may be contributing
87
- - Documentation drift that may have caused incorrect usage
88
- - Dependency issues that could affect behavior
89
-
90
- Record relevant findings in the session log.
91
-
92
- #### Step 2: Read the Error Carefully
93
-
94
- Read the COMPLETE error message. Not just the first line — the entire stack trace, every warning, every note. Errors often contain the answer.
95
-
96
- Ask yourself:
97
-
98
- - What exactly failed? (Not "it broke" — what specific operation?)
99
- - Where did it fail? (File, line, function)
100
- - What was the input that caused the failure?
101
- - What was the expected behavior vs actual behavior?
102
-
103
- Record the answers in the session log.
104
-
105
- #### Step 3: Reproduce Consistently
106
-
107
- Run the failing scenario multiple times. Confirm it fails every time with the same error. If it is intermittent, record:
108
-
109
- - How often it fails (1 in 3? 1 in 10?)
110
- - Whether the failure mode changes
111
- - Environmental factors (timing, ordering, state)
112
-
113
- If you cannot reproduce the failure, you cannot debug it. Escalate.
114
-
115
- #### Step 4: Check Recent Changes
116
-
117
- ```bash
118
- git log --oneline -20
119
- git diff HEAD~5
120
- ```
121
-
122
- What changed recently? Many bugs are caused by the most recent change. Compare the failing state to the last known working state.
123
-
124
- #### Step 5: Trace Data Flow Backward
125
-
126
- Start at the error location and trace backward:
127
-
128
- 1. What function threw the error?
129
- 2. What called that function? With what arguments?
130
- 3. Where did those arguments come from?
131
- 4. Continue until you find where the actual value diverges from the expected value.
132
-
133
- Read each function in the call chain completely. Do not skim.
134
-
135
- Update the session status to `investigating`.
136
-
137
- ---
138
-
139
- ### Phase 2: ANALYZE — Find the Pattern
140
-
141
- #### Step 1: Find Working Examples
142
-
143
- Search the codebase for similar functionality that WORKS. There is almost always a working example of what you are trying to do.
144
-
145
- ```
146
- Look for:
147
- - Other calls to the same function/API that succeed
148
- - Similar features that work correctly
149
- - Test fixtures that exercise the same code path
150
- - Documentation or comments that describe expected behavior
151
- ```
152
-
153
- #### Step 2: Read Reference Implementations Completely
154
-
155
- When you find a working example, read it in its entirety. Do not cherry-pick lines. Understand:
156
-
157
- - How it sets up the context
158
- - What arguments it passes
159
- - How it handles errors
160
- - What it does differently from the failing code
161
-
162
- #### Step 3: Identify Differences
163
-
164
- Compare the working example to the failing code line by line. The bug is in the differences. Common categories:
165
-
166
- - **Missing setup:** Working code initializes something the failing code skips
167
- - **Wrong arguments:** Type mismatch, wrong order, missing optional parameter
168
- - **State dependency:** Working code runs after some prerequisite; failing code does not
169
- - **Environment:** Working code runs in a different context (different config, different permissions)
170
- - **Timing:** Working code awaits something the failing code does not
171
-
172
- Record all differences in the session log.
173
-
174
- ---
175
-
176
- ### Phase 3: HYPOTHESIZE — One Variable at a Time
177
-
178
- #### Step 1: Form a Single Falsifiable Hypothesis
179
-
180
- Based on your investigation and analysis, state a specific hypothesis:
181
-
182
- ```
183
- "The failure occurs because [specific cause].
184
- If this hypothesis is correct, then [observable prediction].
185
- I can test this by [specific action]."
186
- ```
187
-
188
- A good hypothesis is falsifiable — there is a concrete test that would disprove it. "Something is wrong with the configuration" is not a hypothesis. "The database connection string is missing the port number, causing connection timeout" is a hypothesis.
189
-
190
- #### Step 2: Test ONE Variable
191
-
192
- Change exactly ONE thing to test your hypothesis. If you change multiple things, you cannot determine which one had the effect.
193
-
194
- - Add a single log statement to check a value
195
- - Change one argument to match the working example
196
- - Add one missing setup step
197
-
198
- #### Step 3: Observe the Result
199
-
200
- Run the failing scenario. Did the behavior change?
201
-
202
- - **Hypothesis confirmed:** The change fixed it (or changed the error in the predicted way). Proceed to Phase 4.
203
- - **Hypothesis rejected:** Revert the change. Form a new hypothesis based on what you learned. The rejection itself is valuable data — record it.
204
-
205
- #### Step 4: Create Minimal Reproduction
206
-
207
- If the bug is in a complex system, extract a minimal reproduction:
208
-
209
- - Smallest possible code that exhibits the bug
210
- - Fewest dependencies
211
- - Simplest configuration
212
-
213
- This serves two purposes: it confirms your understanding of the root cause, and it becomes the basis for a regression test.
214
-
215
- Update the session status to `fixing`.
216
-
217
- ---
218
-
219
- ### Phase 4: FIX — Root Cause, Not Symptoms
220
-
221
- #### Step 1: Write the Regression Test
222
-
223
- Before writing the fix, write a test that:
224
-
225
- - Reproduces the exact failure scenario
226
- - Asserts the correct behavior
227
- - Currently FAILS (proving it catches the bug)
228
-
229
- This follows harness-tdd discipline. The fix is driven by a failing test.
230
-
231
- #### Step 2: Implement the Fix
232
-
233
- Write a SINGLE fix that addresses the ROOT CAUSE identified in Phase 3. Not a workaround. Not a symptom suppression. The root cause.
234
-
235
- Characteristics of a good fix:
236
-
237
- - Changes as little code as possible
238
- - Addresses why the bug happened, not just what the bug did
239
- - Does not introduce new complexity
240
- - Would be obvious to someone reading the code later
241
-
242
- Characteristics of a bad fix (revert immediately):
243
-
244
- - Adds a special case or `if` branch for the specific failing input
245
- - Wraps the failure in a try-catch that swallows the error
246
- - Adds a retry loop or delay to "work around" a timing issue
247
- - Changes a type to `any` or removes a type check
248
-
249
- #### Step 3: Verify the Fix
250
-
251
- 1. Run the regression test — must PASS
252
- 2. Run the full test suite — all tests must PASS
253
- 3. Run `harness validate` — must PASS
254
- 4. Run `harness check-deps` — must PASS
255
- 5. Manually verify the original failing scenario works
256
-
257
- #### Step 4: Verify the Test Catches the Bug
258
-
259
- Apply the regression test verification protocol:
260
-
261
- 1. Temporarily revert the fix
262
- 2. Run the regression test — must FAIL
263
- 3. Restore the fix
264
- 4. Run the regression test — must PASS
265
-
266
- If the test passes without the fix, the test does not catch the bug. Rewrite the test.
267
-
268
- #### Step 5: Close the Session
269
-
270
- Update the debug session:
271
-
272
- ```markdown
273
- Status: resolved
274
- Resolved: <timestamp>
275
-
276
- ## Resolution
277
-
278
- Root cause: <what actually caused the bug>
279
- Fix: <what was changed and why>
280
- Regression test: <path to test file>
281
- Learnings: <what to remember for next time>
282
- ```
283
-
284
- Move the session file:
285
-
286
- ```bash
287
- mv .harness/debug/active/<session-id>.md .harness/debug/resolved/
288
- ```
289
-
290
- Append learnings to `.harness/learnings.md` if the bug revealed a pattern that should be remembered.
291
-
292
- Update the session status to `resolved`.
293
-
294
- ## Harness Integration
295
-
296
- - **`harness cleanup`** — Run in Phase 1 INVESTIGATE for entropy analysis. Reveals dead code, pattern violations, and drift near the failure site.
297
- - **`harness validate`** — Run in Phase 4 VERIFY after applying the fix. Confirms the fix does not break project-wide constraints.
298
- - **`harness check-deps`** — Run in Phase 4 VERIFY. Confirms the fix does not introduce dependency violations.
299
- - **`harness state learn`** — Run after resolution to capture learnings for future sessions.
300
- - **Debug session files** — Stored in `.harness/debug/active/` (in progress) and `.harness/debug/resolved/` (completed). These persist across context resets.
301
-
302
- ## Success Criteria
303
-
304
- - Phase 1 INVESTIGATE was completed before any fix was attempted
305
- - Root cause was identified and documented (not just the symptom)
306
- - A regression test exists that fails without the fix and passes with it
307
- - The fix addresses the root cause, not a symptom
308
- - All harness checks pass after the fix
309
- - Debug session file is complete with investigation log, hypotheses, and resolution
310
- - Learnings were captured for future reference
311
-
312
- ## Examples
313
-
314
- ### Example: API Endpoint Returns 500 Instead of 400
315
-
316
- **Phase 1 — INVESTIGATE:**
317
-
318
- ```
319
- harness cleanup: No entropy issues near api/routes/users.ts
320
- Error: "Cannot read properties of undefined (reading 'email')"
321
- Stack trace points to: src/services/user-service.ts:34
322
- Reproduces consistently with POST /users and empty body {}
323
- Recent changes: Added input validation middleware (2 commits ago)
324
- Data flow: request.body -> validate() -> createUser(body.email)
325
- ```
326
-
327
- **Phase 2 — ANALYZE:**
328
-
329
- ```
330
- Working example: POST /orders handles empty body correctly
331
- Difference: /orders validates BEFORE destructuring; /users destructures BEFORE validating
332
- The validation middleware runs but its result is not checked
333
- ```
334
-
335
- **Phase 3 — HYPOTHESIZE:**
336
-
337
- ```
338
- Hypothesis: The validation middleware sets req.validationErrors but the route
339
- handler does not check it before accessing req.body.email.
340
- Test: Add a log before line 34 to check req.validationErrors.
341
- Result: Confirmed — validationErrors contains "email is required" but handler proceeds.
342
- ```
343
-
344
- **Phase 4 — FIX:**
345
-
346
- ```typescript
347
- // Regression test
348
- it('returns 400 when request body is empty', async () => {
349
- const res = await request(app).post('/users').send({});
350
- expect(res.status).toBe(400);
351
- expect(res.body.errors).toContain('email is required');
352
- });
353
-
354
- // Fix: Check validation result before processing
355
- if (req.validationErrors?.length) {
356
- return res.status(400).json({ errors: req.validationErrors });
357
- }
358
- ```
359
-
360
- Revert test: Commenting out the validation check causes the test to fail with 500. Confirmed.
361
-
362
- ## Gates
363
-
364
- - **Phase 1 before ANY fix.** You must complete investigation before writing fix code. Skipping investigation leads to symptom-chasing, which leads to more bugs.
365
- - **One variable at a time.** Changing multiple things simultaneously is forbidden. If you changed two things and the bug is fixed, you do not know which change fixed it (or if the other change introduced a new bug).
366
- - **After 3 failed fix attempts, question the architecture.** If three consecutive hypotheses were wrong or three fixes did not resolve the issue, the problem is likely not where you think it is. Step back. Re-read the investigation log. Consider that the bug might be in a different layer entirely.
367
- - **Never "quick fix now, investigate later."** There is no later. The quick fix becomes permanent. The investigation never happens. The root cause festers. Fix it right or do not fix it.
368
- - **Regression test must fail without fix.** A test that passes whether or not the fix is present is not a regression test. It provides no protection.
369
-
370
- ## Escalation
371
-
372
- - **Red flag: "It's probably X, let me fix that."** STOP. This is guessing, not debugging. You skipped Phase 1. Go back to investigation.
373
- - **Red flag: "One more fix attempt" after 2 failed attempts.** STOP. You are about to hit the 3-attempt wall. Step back and question your mental model of the system. Re-read the code from scratch. Consider that your understanding of how the system works may be wrong.
374
- - **Cannot reproduce the bug:** If you cannot make the bug happen consistently, you cannot debug it scientifically. Document exactly what you tried, what environment you tested in, and escalate. Do not guess at a fix for a bug you cannot reproduce.
375
- - **Bug is in a dependency you do not control:** Document the bug, write a test that demonstrates it, and escalate. If a workaround is needed, clearly mark it as a workaround with a reference to the upstream issue.
376
- - **Investigation reveals a systemic issue:** If the bug is a symptom of a larger architectural problem (e.g., widespread race conditions, fundamental type unsafety), escalate to the human. A local fix will not solve a systemic problem.
377
- - **Debug session exceeds 60 minutes without progress:** Something is wrong with the approach. Stop. Summarize what you know in the session file. Take a break (context reset). Return with fresh eyes and re-read the session file from the beginning.
378
-
379
-
380
- --- skill.yaml (agents/skills/claude-code/harness-debugging/skill.yaml) ---
381
- name: harness-debugging
382
- version: "1.0.0"
383
- description: Systematic debugging with harness validation and state tracking
384
- cognitive_mode: diagnostic-investigator
385
- triggers:
386
- - manual
387
- - on_bug_fix
388
- platforms:
389
- - claude-code
390
- - gemini-cli
391
- tools:
392
- - Bash
393
- - Read
394
- - Write
395
- - Edit
396
- - Glob
397
- - Grep
398
- cli:
399
- command: harness skill run harness-debugging
400
- args:
401
- - name: path
402
- description: Project root path
403
- required: false
404
- mcp:
405
- tool: run_skill
406
- input:
407
- skill: harness-debugging
408
- path: string
409
- type: rigid
410
- phases:
411
- - name: investigate
412
- description: Entropy analysis and root cause search
413
- required: true
414
- - name: analyze
415
- description: Pattern matching against codebase
416
- required: true
417
- - name: hypothesize
418
- description: Form and test single hypothesis
419
- required: false
420
- - name: fix
421
- description: TDD-style regression test and fix
422
- required: true
423
- state:
424
- persistent: true
425
- files:
426
- - .harness/debug/
427
- depends_on: []
428
-
429
- </execution_context>
430
-
431
- <process>
432
- 1. Try: invoke mcp__harness__run_skill with skill: "harness-debugging"
433
- 2. If MCP unavailable: follow the SKILL.md workflow provided above directly
434
- 3. Pass through any arguments provided by the user
435
- </process>
436
- """
@@ -1,215 +0,0 @@
1
- # Generated by harness generate-slash-commands. Do not edit.
2
- description = "Detect documentation that has drifted from code"
3
- prompt = """
4
- <context>
5
- Cognitive mode: diagnostic-investigator
6
- Type: flexible
7
- </context>
8
-
9
- <objective>
10
- Detect documentation that has drifted from code
11
- </objective>
12
-
13
- <execution_context>
14
- --- SKILL.md (agents/skills/claude-code/detect-doc-drift/SKILL.md) ---
15
- # Detect Doc Drift
16
-
17
- > Detect documentation that has drifted from code. Find stale docs before they mislead developers and AI agents.
18
-
19
- ## When to Use
20
-
21
- - After completing a feature, bug fix, or refactoring
22
- - During code review — check if the changed files have associated docs that need updating
23
- - As a periodic hygiene check (weekly or per-sprint)
24
- - When `on_post_feature` or `on_doc_check` triggers fire
25
- - When onboarding reveals confusion caused by outdated documentation
26
- - NOT during active development — wait until the code is stable before checking docs
27
- - NOT for writing new documentation from scratch (use align-documentation instead)
28
-
29
- ## Process
30
-
31
- ### Phase 1: Scan — Run Drift Detection
32
-
33
- 1. **Run `harness check-docs`** to identify all documentation issues. Capture the full output.
34
-
35
- 2. **Run `harness cleanup --type drift`** for a deeper analysis that cross-references code changes against documentation references.
36
-
37
- 3. **Optionally, run `git diff` against a baseline** (last release, last sprint, etc.) to identify which code files changed. This helps prioritize — docs for recently changed files are most likely to be drifted.
38
-
39
- ### Phase 2: Identify — Classify Drift Types
40
-
41
- Categorize each finding into one of these drift types:
42
-
43
- **Renamed but not updated:**
44
- A function, class, variable, or file was renamed in code, but documentation still references the old name. This is the most common type of drift.
45
-
46
- - Example: `calculateShipping()` was renamed to `computeShippingCost()`, but AGENTS.md and three inline comments still say `calculateShipping`.
47
-
48
- **New code with no docs:**
49
- A new module, function, or API was added but no documentation entry exists. This is not "drift" in the strict sense but a gap that grows into drift over time.
50
-
51
- - Example: `src/services/notification-service.ts` was added two sprints ago. It has 5 public exports. No AGENTS.md section, no doc page, no inline doc comments beyond basic JSDoc.
52
-
53
- **Deleted code still referenced:**
54
- A file, function, or feature was removed, but documentation still describes it as if it exists. This actively misleads readers.
55
-
56
- - Example: `src/utils/legacy-parser.ts` was deleted. The architecture doc still includes it in the data flow diagram. AGENTS.md still warns about its quirks.
57
-
58
- **Changed behavior not reflected:**
59
- A function's signature, return type, error handling, or side effects changed, but the documentation describes the old behavior.
60
-
61
- - Example: `createUser()` now throws `ValidationError` instead of returning `null` on invalid input. The API docs still say "returns null if validation fails."
62
-
63
- **Moved code with stale paths:**
64
- A file or module was moved to a different directory, but documentation references the old path.
65
-
66
- - Example: `src/helpers/format.ts` was moved to `src/utils/format.ts`. Three doc files and AGENTS.md reference the old path.
67
-
68
- ### Phase 3: Prioritize — Rank by Impact
69
-
70
- Not all drift is equally harmful. Prioritize fixes:
71
-
72
- **Critical (fix immediately):**
73
-
74
- - Public API documentation that describes wrong behavior — external consumers will write broken code
75
- - AGENTS.md sections that reference deleted files — AI agents will hallucinate about non-existent code
76
- - README getting-started guides with wrong commands — new developers cannot onboard
77
-
78
- **High (fix before next release):**
79
-
80
- - Internal API docs with wrong signatures — developers waste time debugging
81
- - Architecture docs with stale diagrams — wrong mental models lead to wrong decisions
82
- - Frequently accessed docs with broken links — high-traffic pages with dead ends
83
-
84
- **Medium (fix in next sprint):**
85
-
86
- - Internal docs for stable code — low change rate means low confusion rate
87
- - Comments in rarely modified files — few people read them
88
- - Edge case documentation — affects few users
89
-
90
- **Low (fix when convenient):**
91
-
92
- - Stylistic inconsistencies in docs (capitalization, formatting)
93
- - Redundant documentation that says the same thing in multiple places
94
- - Historical notes that are outdated but clearly marked as historical
95
-
96
- ### Phase 4: Report — Generate Actionable Output
97
-
98
- For each drift finding, provide:
99
-
100
- 1. **File and line number** of the drifted documentation
101
- 2. **The specific stale content** (quote the exact text that is wrong)
102
- 3. **What changed in code** (the commit, file, and nature of the change)
103
- 4. **Suggested fix** (the replacement text or action needed)
104
- 5. **Priority tier** (Critical / High / Medium / Low)
105
-
106
- Group findings by documentation file so that fixes can be applied file-by-file.
107
-
108
- ## Harness Integration
109
-
110
- - **`harness check-docs`** — Primary tool. Scans all documentation files for broken references, stale paths, and missing entries.
111
- - **`harness cleanup --type drift`** — Deeper analysis that cross-references git history with documentation references to detect semantic drift.
112
- - **`harness cleanup --type drift --json`** — Machine-readable output for automated pipelines.
113
- - **`harness fix-drift`** — Auto-fix simple drift issues after review (use align-documentation skill for applying fixes).
114
-
115
- ## Success Criteria
116
-
117
- - `harness check-docs` reports zero errors
118
- - All file paths referenced in documentation resolve to existing files
119
- - All function/class names referenced in documentation match current code
120
- - All API documentation matches current function signatures and behavior
121
- - No documentation references deleted files, functions, or features
122
- - Drift findings are prioritized and assigned to the appropriate fix cycle
123
-
124
- ## Examples
125
-
126
- ### Example: Renamed function detected
127
-
128
- **Drift finding:**
129
-
130
- ```
131
- DRIFT: Renamed reference detected
132
- Doc: AGENTS.md:47
133
- Stale text: "Use `calculateShipping()` to compute shipping costs"
134
- Code change: calculateShipping renamed to computeShippingCost (commit a1b2c3d)
135
- File: src/services/shipping.ts:24
136
- Priority: High
137
- Suggested fix: Replace `calculateShipping()` with `computeShippingCost()`
138
- ```
139
-
140
- ### Example: Deleted file still documented
141
-
142
- **Drift finding:**
143
-
144
- ```
145
- DRIFT: Reference to deleted file
146
- Doc: docs/architecture.md:112
147
- Stale text: "The legacy parser (src/utils/legacy-parser.ts) handles XML input"
148
- Code change: File deleted in commit d4e5f6g, functionality merged into unified-parser.ts
149
- Priority: Critical
150
- Suggested fix: Update section to reference unified-parser.ts, remove legacy parser description
151
- ```
152
-
153
- ### Example: New module with no documentation
154
-
155
- **Drift finding:**
156
-
157
- ```
158
- GAP: Undocumented module
159
- File: src/services/notification-service.ts
160
- Created: commit h7i8j9k (3 weeks ago)
161
- Public exports: NotificationService, NotificationType, sendNotification
162
- Imported by: 4 modules
163
- Documentation references: 0
164
- Priority: High
165
- Suggested fix: Add AGENTS.md section describing purpose, constraints, and public API
166
- ```
167
-
168
- ## Escalation
169
-
170
- - **When drift is extensive (>30 findings):** Do not try to fix everything. Focus on Critical and High priority items. Create a tracking issue for the remaining items and schedule them across sprints.
171
- - **When you cannot determine the correct replacement text:** The code change may have been complex. Check the commit message and PR description for context. If still unclear, flag the finding for the original author to resolve.
172
- - **When documentation is in a format you cannot parse:** Some docs may be in wiki pages, Confluence, or other external systems. Report the finding with a link and flag it for manual review.
173
- - **When drift reveals a deeper problem (code changed but nobody knew):** This suggests a process gap. Recommend adding `harness check-docs` to the CI pipeline or pre-merge hooks to catch drift at the source.
174
-
175
-
176
- --- skill.yaml (agents/skills/claude-code/detect-doc-drift/skill.yaml) ---
177
- name: detect-doc-drift
178
- version: "1.0.0"
179
- description: Detect documentation that has drifted from code
180
- cognitive_mode: diagnostic-investigator
181
- triggers:
182
- - manual
183
- - on_pr
184
- platforms:
185
- - claude-code
186
- - gemini-cli
187
- tools:
188
- - Bash
189
- - Read
190
- - Glob
191
- cli:
192
- command: harness skill run detect-doc-drift
193
- args:
194
- - name: path
195
- description: Project root path
196
- required: false
197
- mcp:
198
- tool: run_skill
199
- input:
200
- skill: detect-doc-drift
201
- path: string
202
- type: flexible
203
- state:
204
- persistent: false
205
- files: []
206
- depends_on: []
207
-
208
- </execution_context>
209
-
210
- <process>
211
- 1. Try: invoke mcp__harness__run_skill with skill: "detect-doc-drift"
212
- 2. If MCP unavailable: follow the SKILL.md workflow provided above directly
213
- 3. Pass through any arguments provided by the user
214
- </process>
215
- """