@harness-engineering/cli 1.4.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/dist/agents/personas/architecture-enforcer.yaml +1 -0
  2. package/dist/agents/personas/code-reviewer.yaml +43 -0
  3. package/dist/agents/personas/codebase-health-analyst.yaml +32 -0
  4. package/dist/agents/personas/documentation-maintainer.yaml +2 -0
  5. package/dist/agents/personas/entropy-cleaner.yaml +3 -0
  6. package/dist/agents/personas/graph-maintainer.yaml +27 -0
  7. package/dist/agents/personas/parallel-coordinator.yaml +29 -0
  8. package/dist/agents/personas/performance-guardian.yaml +26 -0
  9. package/dist/agents/personas/security-reviewer.yaml +35 -0
  10. package/dist/agents/personas/task-executor.yaml +41 -0
  11. package/dist/agents/skills/README.md +8 -0
  12. package/dist/agents/skills/claude-code/add-harness-component/SKILL.md +10 -0
  13. package/dist/agents/skills/claude-code/align-documentation/SKILL.md +19 -0
  14. package/dist/agents/skills/claude-code/cleanup-dead-code/SKILL.md +19 -0
  15. package/dist/agents/skills/claude-code/detect-doc-drift/SKILL.md +8 -0
  16. package/dist/agents/skills/claude-code/enforce-architecture/SKILL.md +9 -0
  17. package/dist/agents/skills/claude-code/harness-architecture-advisor/SKILL.md +9 -0
  18. package/dist/agents/skills/claude-code/harness-autopilot/SKILL.md +494 -0
  19. package/dist/agents/skills/claude-code/harness-autopilot/skill.yaml +52 -0
  20. package/dist/agents/skills/claude-code/harness-code-review/SKILL.md +25 -0
  21. package/dist/agents/skills/claude-code/harness-debugging/SKILL.md +10 -0
  22. package/dist/agents/skills/claude-code/harness-dependency-health/SKILL.md +150 -0
  23. package/dist/agents/skills/claude-code/harness-dependency-health/skill.yaml +41 -0
  24. package/dist/agents/skills/claude-code/harness-execution/SKILL.md +19 -0
  25. package/dist/agents/skills/claude-code/harness-hotspot-detector/SKILL.md +135 -0
  26. package/dist/agents/skills/claude-code/harness-hotspot-detector/skill.yaml +44 -0
  27. package/dist/agents/skills/claude-code/harness-impact-analysis/SKILL.md +139 -0
  28. package/dist/agents/skills/claude-code/harness-impact-analysis/skill.yaml +44 -0
  29. package/dist/agents/skills/claude-code/harness-integrity/SKILL.md +20 -6
  30. package/dist/agents/skills/claude-code/harness-knowledge-mapper/SKILL.md +154 -0
  31. package/dist/agents/skills/claude-code/harness-knowledge-mapper/skill.yaml +49 -0
  32. package/dist/agents/skills/claude-code/harness-onboarding/SKILL.md +10 -0
  33. package/dist/agents/skills/claude-code/harness-parallel-agents/SKILL.md +9 -0
  34. package/dist/agents/skills/claude-code/harness-perf/SKILL.md +231 -0
  35. package/dist/agents/skills/claude-code/harness-perf/skill.yaml +47 -0
  36. package/dist/agents/skills/claude-code/harness-perf-tdd/SKILL.md +236 -0
  37. package/dist/agents/skills/claude-code/harness-perf-tdd/skill.yaml +47 -0
  38. package/dist/agents/skills/claude-code/harness-planning/SKILL.md +9 -0
  39. package/dist/agents/skills/claude-code/harness-pre-commit-review/SKILL.md +33 -2
  40. package/dist/agents/skills/claude-code/harness-refactoring/SKILL.md +19 -0
  41. package/dist/agents/skills/claude-code/harness-release-readiness/SKILL.md +657 -0
  42. package/dist/agents/skills/claude-code/harness-release-readiness/skill.yaml +57 -0
  43. package/dist/agents/skills/claude-code/harness-security-review/SKILL.md +206 -0
  44. package/dist/agents/skills/claude-code/harness-security-review/skill.yaml +50 -0
  45. package/dist/agents/skills/claude-code/harness-security-scan/SKILL.md +102 -0
  46. package/dist/agents/skills/claude-code/harness-security-scan/skill.yaml +41 -0
  47. package/dist/agents/skills/claude-code/harness-state-management/SKILL.md +22 -8
  48. package/dist/agents/skills/claude-code/harness-tdd/SKILL.md +10 -0
  49. package/dist/agents/skills/claude-code/harness-test-advisor/SKILL.md +131 -0
  50. package/dist/agents/skills/claude-code/harness-test-advisor/skill.yaml +44 -0
  51. package/dist/agents/skills/claude-code/initialize-harness-project/SKILL.md +10 -0
  52. package/dist/agents/skills/claude-code/validate-context-engineering/SKILL.md +9 -0
  53. package/dist/agents/skills/gemini-cli/harness-autopilot/SKILL.md +494 -0
  54. package/dist/agents/skills/gemini-cli/harness-autopilot/skill.yaml +52 -0
  55. package/dist/agents/skills/gemini-cli/harness-dependency-health/SKILL.md +150 -0
  56. package/dist/agents/skills/gemini-cli/harness-dependency-health/skill.yaml +41 -0
  57. package/dist/agents/skills/gemini-cli/harness-hotspot-detector/SKILL.md +135 -0
  58. package/dist/agents/skills/gemini-cli/harness-hotspot-detector/skill.yaml +44 -0
  59. package/dist/agents/skills/gemini-cli/harness-impact-analysis/SKILL.md +139 -0
  60. package/dist/agents/skills/gemini-cli/harness-impact-analysis/skill.yaml +44 -0
  61. package/dist/agents/skills/gemini-cli/harness-knowledge-mapper/SKILL.md +154 -0
  62. package/dist/agents/skills/gemini-cli/harness-knowledge-mapper/skill.yaml +49 -0
  63. package/dist/agents/skills/gemini-cli/harness-perf/SKILL.md +231 -0
  64. package/dist/agents/skills/gemini-cli/harness-perf/skill.yaml +47 -0
  65. package/dist/agents/skills/gemini-cli/harness-perf-tdd/SKILL.md +236 -0
  66. package/dist/agents/skills/gemini-cli/harness-perf-tdd/skill.yaml +47 -0
  67. package/dist/agents/skills/gemini-cli/harness-release-readiness/SKILL.md +657 -0
  68. package/dist/agents/skills/gemini-cli/harness-release-readiness/skill.yaml +57 -0
  69. package/dist/agents/skills/gemini-cli/harness-security-review/skill.yaml +50 -0
  70. package/dist/agents/skills/gemini-cli/harness-security-scan/SKILL.md +102 -0
  71. package/dist/agents/skills/gemini-cli/harness-security-scan/skill.yaml +41 -0
  72. package/dist/agents/skills/gemini-cli/harness-test-advisor/SKILL.md +131 -0
  73. package/dist/agents/skills/gemini-cli/harness-test-advisor/skill.yaml +44 -0
  74. package/dist/agents/skills/tests/platform-parity.test.ts +131 -0
  75. package/dist/agents/skills/tests/schema.ts +2 -0
  76. package/dist/bin/harness.js +2 -2
  77. package/dist/{chunk-EFZOLZFB.js → chunk-ACMDUQJG.js} +4 -2
  78. package/dist/{chunk-C3J2HW4Y.js → chunk-O6NEKDYP.js} +2002 -487
  79. package/dist/{create-skill-4GKJZB5R.js → create-skill-NZDLMMR6.js} +1 -1
  80. package/dist/index.d.ts +265 -143
  81. package/dist/index.js +30 -4
  82. package/package.json +3 -2
@@ -0,0 +1,236 @@
1
+ # Harness Perf TDD
2
+
3
+ > Red-Green-Refactor with performance assertions. Every feature gets a correctness test AND a benchmark. No optimization without measurement.
4
+
5
+ ## When to Use
6
+
7
+ - Implementing performance-critical features
8
+ - When the spec includes performance requirements (e.g., "must respond in < 100ms")
9
+ - When modifying `@perf-critical` annotated code
10
+ - When adding hot-path logic (parsers, serializers, query resolvers, middleware)
11
+ - NOT for non-performance-sensitive code (use harness-tdd instead)
12
+ - NOT for refactoring existing code that already has benchmarks (use harness-refactoring + harness-perf)
13
+
14
+ ## Process
15
+
16
+ ### Iron Law
17
+
18
+ **No production code exists without both a failing test AND a failing benchmark that demanded its creation.**
19
+
20
+ If you find yourself writing production code before both the test and the benchmark exist, STOP. Write the test. Write the benchmark. Then implement.
21
+
22
+ ---
23
+
24
+ ### Phase 1: RED — Write Failing Test + Benchmark
25
+
26
+ 1. **Write the correctness test** following the same process as harness-tdd Phase 1 (RED):
27
+ - Identify the smallest behavior to test
28
+ - Write ONE minimal test with a clear assertion
29
+ - Follow the project's test conventions
30
+
31
+ 2. **Write a `.bench.ts` benchmark file** alongside the test file:
32
+ - Co-locate with source: `handler.ts` -> `handler.bench.ts`
33
+ - Use Vitest bench syntax for benchmark definitions
34
+ - Set a performance assertion if the spec includes one
35
+
36
+ ```typescript
37
+ import { bench, describe } from 'vitest';
38
+ import { processData } from './handler';
39
+
40
+ describe('processData benchmarks', () => {
41
+ bench('processData with small input', () => {
42
+ processData(smallInput);
43
+ });
44
+
45
+ bench('processData with large input', () => {
46
+ processData(largeInput);
47
+ });
48
+ });
49
+ ```
50
+
51
+ 3. **Run the test** — observe failure. The function is not implemented yet, so the test should fail with "not defined" or "not a function."
52
+
53
+ 4. **Run the benchmark** — observe failure or no baseline. This establishes that the benchmark exists and will track performance once the implementation lands.
54
+
55
+ ---
56
+
57
+ ### Phase 2: GREEN — Pass Test and Benchmark
58
+
59
+ 1. **Write the minimum implementation** to make the correctness test pass. Do not optimize yet. The goal is correctness first.
60
+
61
+ 2. **Run the test** — observe pass. If it fails, fix the implementation until it passes.
62
+
63
+ 3. **Run the benchmark** — capture initial results. This is the first measurement. Note:
64
+ - If a performance assertion exists in the spec, verify it passes
65
+ - If no assertion exists, record the result as a baseline reference
66
+ - Do not optimize at this stage unless the assertion fails
67
+
68
+ 4. **If the performance assertion fails,** you have two options:
69
+ - The implementation approach is fundamentally wrong (e.g., O(n^2) when O(n) is needed) — revise the algorithm
70
+ - The assertion is too aggressive for a first pass — note it and defer to REFACTOR phase
71
+
72
+ ---
73
+
74
+ ### Phase 3: REFACTOR — Optimize While Green
75
+
76
+ This phase is optional. Enter it when:
77
+
78
+ - The benchmark shows room for improvement against the performance requirement
79
+ - Profiling reveals an obvious bottleneck
80
+ - The code can be simplified while maintaining or improving performance
81
+
82
+ 1. **Profile the implementation** if the benchmark result is far from the requirement. Use the benchmark output to identify the bottleneck.
83
+
84
+ 2. **Refactor for performance** — consider:
85
+ - Algorithm improvements (sort, search, data structure choice)
86
+ - Caching or memoization for repeated computations
87
+ - Reducing allocations (object pooling, buffer reuse)
88
+ - Eliminating unnecessary work (early returns, lazy evaluation)
89
+
90
+ 3. **After each change,** run both checks:
91
+ - **Test:** Still passing? If not, the refactor broke correctness. Revert.
92
+ - **Benchmark:** Improved? If not, the refactor was not effective. Consider reverting.
93
+
94
+ 4. **Stop when** the benchmark meets the performance requirement, or when further optimization yields diminishing returns (< 1% improvement per change).
95
+
96
+ 5. **Do not gold-plate.** If the requirement is "< 100ms" and you are at 40ms, stop. Move on.
97
+
98
+ ---
99
+
100
+ ### Phase 4: VALIDATE — Harness Checks
101
+
102
+ 1. **Run `harness check-perf`** to verify no Tier 1 or Tier 2 violations were introduced by the implementation:
103
+ - Cyclomatic complexity within thresholds
104
+ - Coupling metrics acceptable
105
+ - No benchmark regressions in other modules
106
+
107
+ 2. **Run `harness validate`** to verify overall project health:
108
+ - All tests pass
109
+ - Linter clean
110
+ - Type checks pass
111
+
112
+ 3. **Update baselines** if this is a new benchmark:
113
+
114
+ ```bash
115
+ harness perf baselines update
116
+ ```
117
+
118
+ This persists the current benchmark results so future runs can detect regressions.
119
+
120
+ 4. **Commit with a descriptive message** that mentions both the feature and its performance characteristics:
121
+ ```
122
+ feat(parser): add streaming JSON parser (<50ms for 1MB payloads)
123
+ ```
124
+
125
+ ---
126
+
127
+ ## Benchmark File Convention
128
+
129
+ Benchmark files are co-located with their source files, using the `.bench.ts` extension:
130
+
131
+ | Source File | Benchmark File |
132
+ | ----------------------------- | ----------------------------------- |
133
+ | `src/parser/handler.ts` | `src/parser/handler.bench.ts` |
134
+ | `src/api/resolver.ts` | `src/api/resolver.bench.ts` |
135
+ | `packages/core/src/engine.ts` | `packages/core/src/engine.bench.ts` |
136
+
137
+ Each benchmark file should:
138
+
139
+ - Import only from the module under test
140
+ - Define benchmarks in a `describe` block named after the module
141
+ - Include both small-input and large-input cases when applicable
142
+ - Use realistic data (not empty objects or trivial inputs)
143
+
144
+ ---
145
+
146
+ ## Harness Integration
147
+
148
+ - **`harness check-perf`** — Run after implementation to check for violations
149
+ - **`harness perf bench`** — Run benchmarks in isolation
150
+ - **`harness perf baselines update`** — Persist benchmark results as new baselines
151
+ - **`harness validate`** — Full project health check
152
+ - **`harness perf critical-paths`** — View critical path set to understand which benchmarks have stricter thresholds
153
+
154
+ ## Success Criteria
155
+
156
+ - Every new function has both a test file (`.test.ts`) and a bench file (`.bench.ts`)
157
+ - Benchmarks run without errors
158
+ - No Tier 1 performance violations after implementation
159
+ - Baselines are updated for new benchmarks
160
+ - Commit message includes performance context when relevant
161
+
162
+ ## Examples
163
+
164
+ ### Example: Implementing a Performance-Critical Parser
165
+
166
+ **Phase 1: RED**
167
+
168
+ ```typescript
169
+ // src/parser/json-stream.test.ts
170
+ it('parses 1MB JSON in under 50ms', () => {
171
+ const result = parseStream(largeMbPayload);
172
+ expect(result).toEqual(expectedOutput);
173
+ });
174
+
175
+ // src/parser/json-stream.bench.ts
176
+ bench('parseStream 1MB', () => {
177
+ parseStream(largeMbPayload);
178
+ });
179
+ ```
180
+
181
+ Run test: FAIL (parseStream not defined). Run benchmark: FAIL (no implementation).
182
+
183
+ **Phase 2: GREEN**
184
+
185
+ ```typescript
186
+ // src/parser/json-stream.ts
187
+ export function parseStream(input: string): ParsedResult {
188
+ return JSON.parse(input); // simplest correct implementation
189
+ }
190
+ ```
191
+
192
+ Run test: PASS. Run benchmark: 38ms average (meets <50ms requirement).
193
+
194
+ **Phase 3: REFACTOR** — skipped (38ms already meets 50ms target).
195
+
196
+ **Phase 4: VALIDATE**
197
+
198
+ ```
199
+ harness check-perf — no violations
200
+ harness validate — passes
201
+ harness perf baselines update — baseline saved
202
+ git commit -m "feat(parser): add streaming JSON parser (<50ms for 1MB payloads)"
203
+ ```
204
+
205
+ ### Example: Optimizing an Existing Hot Path
206
+
207
+ **Phase 1: RED** — test and benchmark already exist from initial implementation.
208
+
209
+ **Phase 3: REFACTOR**
210
+
211
+ ```
212
+ Before: resolveImports 12ms (requirement: <5ms)
213
+ Change: switch from recursive descent to iterative with stack
214
+ After: resolveImports 3.8ms
215
+ Test: still passing
216
+ ```
217
+
218
+ **Phase 4: VALIDATE**
219
+
220
+ ```
221
+ harness check-perf — complexity reduced from 12 to 8 (improvement)
222
+ harness perf baselines update — new baseline saved
223
+ ```
224
+
225
+ ## Gates
226
+
227
+ - **No code before test AND benchmark.** Both must exist before implementation begins.
228
+ - **No optimization without measurement.** Run the benchmark before and after refactoring. Gut feelings are not measurements.
229
+ - **No skipping VALIDATE.** `harness check-perf` and `harness validate` must pass after every cycle.
230
+ - **No committing without updated baselines.** New benchmarks must have baselines persisted.
231
+
232
+ ## Escalation
233
+
234
+ - **When the performance requirement cannot be met:** Report the best achieved result and propose either relaxing the requirement or redesigning the approach. Include benchmark data.
235
+ - **When benchmarks are flaky:** Increase iteration count, add warmup, or isolate the benchmark from I/O. Report the variance so the team can decide on an acceptable noise margin.
236
+ - **When the test and benchmark have conflicting needs:** Correctness always wins. If a correct implementation cannot meet the performance requirement, escalate to the team for a design decision.
@@ -0,0 +1,47 @@
1
+ name: harness-perf-tdd
2
+ version: "1.0.0"
3
+ description: Performance-aware TDD with benchmark assertions in the red-green-refactor cycle
4
+ cognitive_mode: meticulous-implementer
5
+ triggers:
6
+ - manual
7
+ platforms:
8
+ - claude-code
9
+ - gemini-cli
10
+ tools:
11
+ - Bash
12
+ - Read
13
+ - Write
14
+ - Edit
15
+ - Glob
16
+ - Grep
17
+ cli:
18
+ command: harness skill run harness-perf-tdd
19
+ args:
20
+ - name: path
21
+ description: Project root path
22
+ required: false
23
+ mcp:
24
+ tool: run_skill
25
+ input:
26
+ skill: harness-perf-tdd
27
+ path: string
28
+ type: rigid
29
+ phases:
30
+ - name: red
31
+ description: Write failing test and benchmark assertion
32
+ required: true
33
+ - name: green
34
+ description: Implement to pass test and benchmark
35
+ required: true
36
+ - name: refactor
37
+ description: Optimize while keeping both green
38
+ required: false
39
+ - name: validate
40
+ description: Run harness check-perf and harness validate
41
+ required: true
42
+ state:
43
+ persistent: false
44
+ files: []
45
+ depends_on:
46
+ - harness-tdd
47
+ - harness-perf