agentic-team-templates 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -1
- package/bin/cli.js +4 -1
- package/package.json +8 -3
- package/src/index.js +471 -7
- package/src/index.test.js +947 -0
- package/templates/testing/.cursorrules/advanced-techniques.md +596 -0
- package/templates/testing/.cursorrules/ci-cd-integration.md +603 -0
- package/templates/testing/.cursorrules/overview.md +163 -0
- package/templates/testing/.cursorrules/performance-testing.md +536 -0
- package/templates/testing/.cursorrules/quality-metrics.md +456 -0
- package/templates/testing/.cursorrules/reliability.md +557 -0
- package/templates/testing/.cursorrules/tdd-methodology.md +294 -0
- package/templates/testing/.cursorrules/test-data.md +565 -0
- package/templates/testing/.cursorrules/test-design.md +511 -0
- package/templates/testing/.cursorrules/test-types.md +398 -0
- package/templates/testing/CLAUDE.md +1134 -0
|
@@ -0,0 +1,456 @@
|
|
|
1
|
+
# Quality Metrics
|
|
2
|
+
|
|
3
|
+
Guidelines for measuring test quality beyond simple coverage percentages.
|
|
4
|
+
|
|
5
|
+
## The Problem with Coverage Alone
|
|
6
|
+
|
|
7
|
+
High code coverage doesn't guarantee effective tests. You can have 100% coverage with tests that:
|
|
8
|
+
- Never fail
|
|
9
|
+
- Don't check results
|
|
10
|
+
- Miss edge cases
|
|
11
|
+
- Test implementation, not behavior
|
|
12
|
+
|
|
13
|
+
**Goodhart's Law**: "When a measure becomes a target, it ceases to be a good measure."
|
|
14
|
+
|
|
15
|
+
## Multi-Dimensional Quality Metrics
|
|
16
|
+
|
|
17
|
+
### 1. Code Coverage (Baseline Hygiene)
|
|
18
|
+
|
|
19
|
+
Traditional metrics as a starting point:
|
|
20
|
+
|
|
21
|
+
| Metric | Target | Purpose |
|
|
22
|
+
|--------|--------|---------|
|
|
23
|
+
| Line Coverage | 80%+ | Code is executed |
|
|
24
|
+
| Branch Coverage | 75%+ | Decision paths tested |
|
|
25
|
+
| Function Coverage | 90%+ | Public APIs tested |
|
|
26
|
+
| Statement Coverage | 80%+ | Statements executed |
|
|
27
|
+
|
|
28
|
+
```ts
|
|
29
|
+
// vitest.config.ts
|
|
30
|
+
export default {
|
|
31
|
+
test: {
|
|
32
|
+
coverage: {
|
|
33
|
+
provider: 'v8',
|
|
34
|
+
reporter: ['text', 'json', 'html'],
|
|
35
|
+
thresholds: {
|
|
36
|
+
lines: 80,
|
|
37
|
+
branches: 75,
|
|
38
|
+
functions: 90,
|
|
39
|
+
statements: 80,
|
|
40
|
+
},
|
|
41
|
+
exclude: [
|
|
42
|
+
'**/node_modules/**',
|
|
43
|
+
'**/test/**',
|
|
44
|
+
'**/*.d.ts',
|
|
45
|
+
'**/types/**',
|
|
46
|
+
],
|
|
47
|
+
},
|
|
48
|
+
},
|
|
49
|
+
};
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### 2. Mutation Score (Test Effectiveness)
|
|
53
|
+
|
|
54
|
+
Mutation testing measures whether your tests actually catch bugs.
|
|
55
|
+
|
|
56
|
+
**How it works:**
|
|
57
|
+
1. Tool introduces small changes (mutations) to your code
|
|
58
|
+
2. Tests run against each mutant
|
|
59
|
+
3. "Killed" mutants = tests caught the bug
|
|
60
|
+
4. "Survived" mutants = tests missed the bug
|
|
61
|
+
|
|
62
|
+
```ts
|
|
63
|
+
// Original code
|
|
64
|
+
function isAdult(age: number): boolean {
|
|
65
|
+
return age >= 18;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Mutant 1: Change >= to >
|
|
69
|
+
return age > 18; // Should be killed by test for age=18
|
|
70
|
+
|
|
71
|
+
// Mutant 2: Change 18 to 17
|
|
72
|
+
return age >= 17; // Should be killed by boundary test
|
|
73
|
+
|
|
74
|
+
// Mutant 3: Return opposite
|
|
75
|
+
return age < 18; // Should be killed by any positive test
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
**Target**: 70%+ mutation score
|
|
79
|
+
|
|
80
|
+
**Tools:**
|
|
81
|
+
- Stryker (JavaScript/TypeScript)
|
|
82
|
+
- PITest (Java)
|
|
83
|
+
- mutmut (Python)
|
|
84
|
+
|
|
85
|
+
```json
|
|
86
|
+
// stryker.conf.json
|
|
87
|
+
{
|
|
88
|
+
"$schema": "./node_modules/@stryker-mutator/core/schema/stryker-schema.json",
|
|
89
|
+
"mutate": ["src/**/*.ts", "!src/**/*.test.ts"],
|
|
90
|
+
"testRunner": "vitest",
|
|
91
|
+
"reporters": ["clear-text", "html", "dashboard"],
|
|
92
|
+
"thresholds": {
|
|
93
|
+
"high": 80,
|
|
94
|
+
"low": 60,
|
|
95
|
+
"break": 50
|
|
96
|
+
},
|
|
97
|
+
"mutator": {
|
|
98
|
+
"excludedMutations": ["StringLiteral"]
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### 3. Assertion Density
|
|
104
|
+
|
|
105
|
+
Tests should make meaningful assertions:
|
|
106
|
+
|
|
107
|
+
```ts
|
|
108
|
+
// Bad: No assertions (high coverage, low quality)
|
|
109
|
+
it('processes order', async () => {
|
|
110
|
+
await processOrder(orderData);
|
|
111
|
+
// No expect!
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
// Bad: Weak assertion
|
|
115
|
+
it('processes order', async () => {
|
|
116
|
+
const result = await processOrder(orderData);
|
|
117
|
+
expect(result).toBeDefined(); // Too weak
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
// Good: Meaningful assertions
|
|
121
|
+
it('processes order', async () => {
|
|
122
|
+
const order = await processOrder(orderData);
|
|
123
|
+
expect(order.status).toBe('confirmed');
|
|
124
|
+
expect(order.total).toBe(150);
|
|
125
|
+
expect(order.items).toHaveLength(2);
|
|
126
|
+
});
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
**Target**: 2-5 assertions per test (one logical concept)
|
|
130
|
+
|
|
131
|
+
### 4. Test Execution Time
|
|
132
|
+
|
|
133
|
+
Fast tests get run. Slow tests get skipped.
|
|
134
|
+
|
|
135
|
+
| Test Type | Target | Max Acceptable |
|
|
136
|
+
|-----------|--------|----------------|
|
|
137
|
+
| Unit | <10ms | 50ms |
|
|
138
|
+
| Integration | <500ms | 2s |
|
|
139
|
+
| E2E | <30s | 2min |
|
|
140
|
+
| Full Suite | <5min | 15min |
|
|
141
|
+
|
|
142
|
+
Monitor and alert on regression:
|
|
143
|
+
|
|
144
|
+
```yaml
|
|
145
|
+
# CI quality gate
|
|
146
|
+
- name: Check Test Duration
|
|
147
|
+
run: |
|
|
148
|
+
START=$(date +%s)
|
|
149
|
+
npm test
|
|
150
|
+
END=$(date +%s)
|
|
151
|
+
DURATION=$((END - START))
|
|
152
|
+
|
|
153
|
+
if [ $DURATION -gt 300 ]; then
|
|
154
|
+
echo "Test suite took ${DURATION}s, exceeds 5 minute limit"
|
|
155
|
+
exit 1
|
|
156
|
+
fi
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### 5. Flaky Test Rate
|
|
160
|
+
|
|
161
|
+
Flaky tests erode confidence and waste time.
|
|
162
|
+
|
|
163
|
+
**Target**: 0% flaky tests
|
|
164
|
+
|
|
165
|
+
Track and quarantine:
|
|
166
|
+
|
|
167
|
+
```ts
|
|
168
|
+
// vitest.config.ts
|
|
169
|
+
export default {
|
|
170
|
+
test: {
|
|
171
|
+
retry: 2, // Retry failed tests to identify flakes
|
|
172
|
+
reporters: [
|
|
173
|
+
'default',
|
|
174
|
+
['vitest-flake-tracker', { outputFile: 'flaky-tests.json' }],
|
|
175
|
+
],
|
|
176
|
+
},
|
|
177
|
+
};
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
### 6. Test-to-Code Ratio
|
|
181
|
+
|
|
182
|
+
Healthy projects have similar amounts of test code and production code.
|
|
183
|
+
|
|
184
|
+
**Target**: 1:1 to 1.5:1 (test lines : production lines)
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
# Measure ratio
|
|
188
|
+
PROD_LINES=$(find src -name '*.ts' ! -name '*.test.ts' | xargs wc -l | tail -1 | awk '{print $1}')
|
|
189
|
+
TEST_LINES=$(find src -name '*.test.ts' | xargs wc -l | tail -1 | awk '{print $1}')
|
|
190
|
+
RATIO=$(echo "scale=2; $TEST_LINES / $PROD_LINES" | bc)
|
|
191
|
+
echo "Test-to-code ratio: $RATIO"
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
## Quality Gates in CI
|
|
195
|
+
|
|
196
|
+
### Comprehensive Quality Pipeline
|
|
197
|
+
|
|
198
|
+
```yaml
|
|
199
|
+
# .github/workflows/quality.yml
|
|
200
|
+
name: Quality Gates
|
|
201
|
+
|
|
202
|
+
on: [push, pull_request]
|
|
203
|
+
|
|
204
|
+
jobs:
|
|
205
|
+
static-analysis:
|
|
206
|
+
runs-on: ubuntu-latest
|
|
207
|
+
steps:
|
|
208
|
+
- uses: actions/checkout@v4
|
|
209
|
+
- uses: actions/setup-node@v4
|
|
210
|
+
with:
|
|
211
|
+
node-version: '20'
|
|
212
|
+
cache: 'npm'
|
|
213
|
+
|
|
214
|
+
- run: npm ci
|
|
215
|
+
- run: npm run type-check
|
|
216
|
+
- run: npm run lint
|
|
217
|
+
- run: npm run format:check
|
|
218
|
+
|
|
219
|
+
unit-tests:
|
|
220
|
+
runs-on: ubuntu-latest
|
|
221
|
+
steps:
|
|
222
|
+
- uses: actions/checkout@v4
|
|
223
|
+
- uses: actions/setup-node@v4
|
|
224
|
+
|
|
225
|
+
- run: npm ci
|
|
226
|
+
- run: npm test -- --coverage --reporter=junit
|
|
227
|
+
|
|
228
|
+
- name: Check Coverage Thresholds
|
|
229
|
+
run: |
|
|
230
|
+
COVERAGE=$(jq '.total.lines.pct' coverage/coverage-summary.json)
|
|
231
|
+
if (( $(echo "$COVERAGE < 80" | bc -l) )); then
|
|
232
|
+
echo "Line coverage $COVERAGE% below 80% threshold"
|
|
233
|
+
exit 1
|
|
234
|
+
fi
|
|
235
|
+
|
|
236
|
+
mutation-testing:
|
|
237
|
+
runs-on: ubuntu-latest
|
|
238
|
+
steps:
|
|
239
|
+
- uses: actions/checkout@v4
|
|
240
|
+
- uses: actions/setup-node@v4
|
|
241
|
+
|
|
242
|
+
- run: npm ci
|
|
243
|
+
- run: npx stryker run
|
|
244
|
+
|
|
245
|
+
- name: Check Mutation Score
|
|
246
|
+
run: |
|
|
247
|
+
SCORE=$(jq '.mutationScore' reports/mutation/mutation-score.json)
|
|
248
|
+
if (( $(echo "$SCORE < 70" | bc -l) )); then
|
|
249
|
+
echo "Mutation score $SCORE% below 70% threshold"
|
|
250
|
+
exit 1
|
|
251
|
+
fi
|
|
252
|
+
|
|
253
|
+
test-quality:
|
|
254
|
+
runs-on: ubuntu-latest
|
|
255
|
+
steps:
|
|
256
|
+
- uses: actions/checkout@v4
|
|
257
|
+
- uses: actions/setup-node@v4
|
|
258
|
+
|
|
259
|
+
- run: npm ci
|
|
260
|
+
- run: npm test -- --passWithNoTests=false
|
|
261
|
+
|
|
262
|
+
- name: Check Test Duration
|
|
263
|
+
run: |
|
|
264
|
+
npm test -- --reporter=json > test-results.json
|
|
265
|
+
DURATION=$(jq '.numTotalTests * 100' test-results.json) # Rough estimate
|
|
266
|
+
if [ $DURATION -gt 300000 ]; then
|
|
267
|
+
echo "Tests taking too long"
|
|
268
|
+
exit 1
|
|
269
|
+
fi
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
### Pull Request Quality Checks
|
|
273
|
+
|
|
274
|
+
```yaml
|
|
275
|
+
# .github/workflows/pr-quality.yml
|
|
276
|
+
name: PR Quality
|
|
277
|
+
|
|
278
|
+
on: pull_request
|
|
279
|
+
|
|
280
|
+
jobs:
|
|
281
|
+
changed-files-coverage:
|
|
282
|
+
runs-on: ubuntu-latest
|
|
283
|
+
steps:
|
|
284
|
+
- uses: actions/checkout@v4
|
|
285
|
+
with:
|
|
286
|
+
fetch-depth: 0
|
|
287
|
+
|
|
288
|
+
- name: Get changed files
|
|
289
|
+
id: changed-files
|
|
290
|
+
run: |
|
|
291
|
+
echo "files=$(git diff --name-only origin/${{ github.base_ref }}...HEAD | grep -E '\.(ts|tsx)$' | grep -v '.test.' | tr '\n' ',')" >> $GITHUB_OUTPUT
|
|
292
|
+
|
|
293
|
+
- name: Check new code has tests
|
|
294
|
+
run: |
|
|
295
|
+
for file in $(echo "${{ steps.changed-files.outputs.files }}" | tr ',' '\n'); do
|
|
296
|
+
TEST_FILE="${file%.ts}.test.ts"
|
|
297
|
+
if [ ! -f "$TEST_FILE" ]; then
|
|
298
|
+
echo "Missing test file for: $file"
|
|
299
|
+
exit 1
|
|
300
|
+
fi
|
|
301
|
+
done
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
## Tracking Metrics Over Time
|
|
305
|
+
|
|
306
|
+
### Dashboard Metrics
|
|
307
|
+
|
|
308
|
+
```ts
|
|
309
|
+
// scripts/collect-metrics.ts
|
|
310
|
+
import fs from 'fs';
|
|
311
|
+
|
|
312
|
+
interface QualityMetrics {
|
|
313
|
+
date: string;
|
|
314
|
+
lineCoverage: number;
|
|
315
|
+
branchCoverage: number;
|
|
316
|
+
mutationScore: number;
|
|
317
|
+
testCount: number;
|
|
318
|
+
testDuration: number;
|
|
319
|
+
flakyTests: number;
|
|
320
|
+
testToCodeRatio: number;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
async function collectMetrics(): Promise<QualityMetrics> {
|
|
324
|
+
const coverage = JSON.parse(
|
|
325
|
+
fs.readFileSync('coverage/coverage-summary.json', 'utf8')
|
|
326
|
+
);
|
|
327
|
+
|
|
328
|
+
const mutation = JSON.parse(
|
|
329
|
+
fs.readFileSync('reports/mutation/mutation-score.json', 'utf8')
|
|
330
|
+
);
|
|
331
|
+
|
|
332
|
+
const testResults = JSON.parse(
|
|
333
|
+
fs.readFileSync('test-results.json', 'utf8')
|
|
334
|
+
);
|
|
335
|
+
|
|
336
|
+
return {
|
|
337
|
+
date: new Date().toISOString(),
|
|
338
|
+
lineCoverage: coverage.total.lines.pct,
|
|
339
|
+
branchCoverage: coverage.total.branches.pct,
|
|
340
|
+
mutationScore: mutation.mutationScore,
|
|
341
|
+
testCount: testResults.numTotalTests,
|
|
342
|
+
testDuration: testResults.testResults.reduce(
|
|
343
|
+
(sum, t) => sum + t.duration,
|
|
344
|
+
0
|
|
345
|
+
),
|
|
346
|
+
flakyTests: countFlakyTests(),
|
|
347
|
+
testToCodeRatio: calculateTestToCodeRatio(),
|
|
348
|
+
};
|
|
349
|
+
}
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
### Trend Analysis
|
|
353
|
+
|
|
354
|
+
```ts
|
|
355
|
+
// Check for regressions
|
|
356
|
+
function checkForRegressions(
|
|
357
|
+
current: QualityMetrics,
|
|
358
|
+
previous: QualityMetrics
|
|
359
|
+
): string[] {
|
|
360
|
+
const issues: string[] = [];
|
|
361
|
+
|
|
362
|
+
if (current.lineCoverage < previous.lineCoverage - 2) {
|
|
363
|
+
issues.push(
|
|
364
|
+
`Line coverage dropped from ${previous.lineCoverage}% to ${current.lineCoverage}%`
|
|
365
|
+
);
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
if (current.mutationScore < previous.mutationScore - 5) {
|
|
369
|
+
issues.push(
|
|
370
|
+
`Mutation score dropped from ${previous.mutationScore}% to ${current.mutationScore}%`
|
|
371
|
+
);
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
if (current.testDuration > previous.testDuration * 1.2) {
|
|
375
|
+
issues.push(
|
|
376
|
+
`Test duration increased by ${Math.round((current.testDuration / previous.testDuration - 1) * 100)}%`
|
|
377
|
+
);
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
if (current.flakyTests > previous.flakyTests) {
|
|
381
|
+
issues.push(
|
|
382
|
+
`Flaky tests increased from ${previous.flakyTests} to ${current.flakyTests}`
|
|
383
|
+
);
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
return issues;
|
|
387
|
+
}
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
## Quality Improvement Strategies
|
|
391
|
+
|
|
392
|
+
### When Coverage is Low
|
|
393
|
+
|
|
394
|
+
1. Identify untested files: `npx vitest --coverage --reporter=html`
|
|
395
|
+
2. Prioritize by risk (business logic, security, data handling)
|
|
396
|
+
3. Write integration tests first (higher coverage per test)
|
|
397
|
+
4. Use TDD for new features
|
|
398
|
+
|
|
399
|
+
### When Mutation Score is Low
|
|
400
|
+
|
|
401
|
+
1. Run mutation report: `npx stryker run`
|
|
402
|
+
2. Find survived mutants
|
|
403
|
+
3. Add assertions that would catch those mutations
|
|
404
|
+
4. Focus on boundary conditions and logic branches
|
|
405
|
+
|
|
406
|
+
### When Tests are Slow
|
|
407
|
+
|
|
408
|
+
1. Profile test execution
|
|
409
|
+
2. Parallelize independent tests
|
|
410
|
+
3. Use test database per worker
|
|
411
|
+
4. Mock expensive external calls
|
|
412
|
+
5. Split slow integration tests from fast unit tests
|
|
413
|
+
|
|
414
|
+
### When Flaky Tests Appear
|
|
415
|
+
|
|
416
|
+
1. Quarantine immediately
|
|
417
|
+
2. Add to flaky test tracking
|
|
418
|
+
3. Analyze root cause (timing, state, external deps)
|
|
419
|
+
4. Fix or delete - don't leave flaky tests running
|
|
420
|
+
|
|
421
|
+
## Reporting and Visibility
|
|
422
|
+
|
|
423
|
+
### Test Quality Badge
|
|
424
|
+
|
|
425
|
+
```markdown
|
|
426
|
+

|
|
427
|
+

|
|
428
|
+

|
|
429
|
+
```
|
|
430
|
+
|
|
431
|
+
### Weekly Quality Report
|
|
432
|
+
|
|
433
|
+
```ts
|
|
434
|
+
function generateWeeklyReport(metrics: QualityMetrics[]): string {
|
|
435
|
+
const latest = metrics[metrics.length - 1];
|
|
436
|
+
const weekAgo = metrics[0];
|
|
437
|
+
|
|
438
|
+
return `
|
|
439
|
+
## Test Quality Report
|
|
440
|
+
|
|
441
|
+
### Current Metrics
|
|
442
|
+
- Line Coverage: ${latest.lineCoverage}% (target: 80%)
|
|
443
|
+
- Mutation Score: ${latest.mutationScore}% (target: 70%)
|
|
444
|
+
- Test Count: ${latest.testCount}
|
|
445
|
+
- Avg Test Duration: ${latest.testDuration}ms
|
|
446
|
+
|
|
447
|
+
### Week-over-Week
|
|
448
|
+
- Coverage: ${(latest.lineCoverage - weekAgo.lineCoverage).toFixed(1)}%
|
|
449
|
+
- Mutation Score: ${(latest.mutationScore - weekAgo.mutationScore).toFixed(1)}%
|
|
450
|
+
- Tests Added: ${latest.testCount - weekAgo.testCount}
|
|
451
|
+
|
|
452
|
+
### Action Items
|
|
453
|
+
${generateActionItems(latest)}
|
|
454
|
+
`;
|
|
455
|
+
}
|
|
456
|
+
```
|