claude-flow-novice 2.14.6 → 2.14.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/cfn/run-tests.md +119 -0
- package/.claude/hooks/cfn-post-edit.config.json +11 -4
- package/.claude/skills/cfn-agent-selector/SKILL.md +3 -2
- package/.claude/skills/cfn-loop-orchestration/orchestrate.sh +1 -1
- package/.claude/skills/cfn-product-owner-decision/execute-decision.sh +141 -114
- package/.claude/skills/cfn-redis-coordination/report-completion.sh +86 -0
- package/.claude/skills/cfn-redis-coordination/store-context.sh +34 -0
- package/.claude/skills/pre-edit-backup/backup.sh +130 -0
- package/.claude/skills/pre-edit-backup/cleanup.sh +155 -0
- package/.claude/skills/pre-edit-backup/restore.sh +128 -0
- package/.claude/skills/pre-edit-backup/revert-file.sh +168 -0
- package/claude-assets/agents/README-AGENT_LIFECYCLE.md +522 -0
- package/claude-assets/agents/cfn-dev-team/coordinators/cfn-v3-coordinator.md +6 -3
- package/claude-assets/agents/cfn-dev-team/product-owners/product-owner.md +1 -1
- package/claude-assets/agents/cfn-dev-team/test-agent.md +141 -0
- package/claude-assets/agents/cfn-dev-team/utility/agent-builder.md +35 -0
- package/claude-assets/commands/cfn/run-tests.md +119 -0
- package/claude-assets/hooks/cfn-post-edit.config.json +11 -4
- package/claude-assets/skills/agent-name-validation/README.md +28 -0
- package/claude-assets/skills/agent-name-validation/SKILL.md +168 -0
- package/claude-assets/skills/agent-name-validation/validate-agent-names.sh +47 -0
- package/claude-assets/skills/cfn-agent-selector/SKILL.md +3 -2
- package/claude-assets/skills/cfn-loop-orchestration/orchestrate.sh +1 -1
- package/claude-assets/skills/cfn-product-owner-decision/execute-decision.sh +141 -114
- package/claude-assets/skills/cfn-redis-coordination/report-completion.sh +86 -0
- package/claude-assets/skills/cfn-redis-coordination/store-context.sh +34 -0
- package/claude-assets/skills/cfn-task-classifier/SKILL.md +1 -1
- package/claude-assets/skills/cfn-test-runner/SKILL.md +288 -0
- package/claude-assets/skills/cfn-test-runner/detect-regressions.sh +55 -0
- package/claude-assets/skills/cfn-test-runner/init-benchmark-db.sh +48 -0
- package/claude-assets/skills/cfn-test-runner/run-all-tests.sh +222 -0
- package/claude-assets/skills/cfn-test-runner/store-benchmarks.sh +55 -0
- package/claude-assets/skills/cfn-test-runner/validate-redis-keys.sh +143 -0
- package/claude-assets/skills/hook-pipeline/bash-dependency-checker.sh +89 -0
- package/claude-assets/skills/hook-pipeline/bash-pipe-safety.sh +69 -0
- package/claude-assets/skills/hook-pipeline/enforce-lf.sh +36 -0
- package/claude-assets/skills/hook-pipeline/js-promise-safety.sh +110 -0
- package/claude-assets/skills/hook-pipeline/python-async-safety.py +124 -0
- package/claude-assets/skills/hook-pipeline/python-import-checker.py +114 -0
- package/claude-assets/skills/hook-pipeline/python-subprocess-safety.py +77 -0
- package/claude-assets/skills/hook-pipeline/rust-command-safety.sh +38 -0
- package/claude-assets/skills/hook-pipeline/rust-dependency-checker.sh +50 -0
- package/claude-assets/skills/hook-pipeline/rust-future-safety.sh +50 -0
- package/dist/cli/agent-executor.js +1 -1
- package/dist/cli/agent-executor.js.map +1 -1
- package/dist/cli/agent-prompt-builder.js +40 -30
- package/dist/cli/agent-prompt-builder.js.map +1 -1
- package/dist/cli/config-manager.js +109 -91
- package/dist/cli/config-manager.js.map +1 -1
- package/package.json +2 -1
- package/scripts/init-project.js +4 -1
- package/claude-assets/agents/cfn-dev-team/developers/dev-backend-api.md +0 -147
- package/claude-assets/agents/cfn-dev-team/developers/frontend/spec-mobile-react-native.md +0 -199
- package/claude-assets/agents/cfn-dev-team/documentation/docs-api-openapi.md +0 -98
- package/claude-assets/agents/cfn-dev-team/product-owners/product-owner-agent.md +0 -155
- package/claude-assets/agents/cfn-dev-team/reviewers/quality/analyze-code-quality.md +0 -141
- /package/claude-assets/agents/cfn-dev-team/developers/{backend-dev.md → backend-developer.md} +0 -0
- /package/claude-assets/agents/cfn-dev-team/documentation/{api-docs.md → api-documentation.md} +0 -0
- /package/claude-assets/agents/cfn-dev-team/documentation/{specification.md → specification-agent.md} +0 -0
- /package/claude-assets/agents/cfn-dev-team/reviewers/quality/{code-analyzer.md → code-quality-validator.md} +0 -0
- /package/claude-assets/agents/cfn-dev-team/testers/e2e/{playwright-agent.md → playwright-tester.md} +0 -0
- /package/claude-assets/agents/cfn-dev-team/testers/unit/{tdd-london-swarm.md → tdd-london-unit-swarm.md} +0 -0
- /package/claude-assets/agents/cfn-dev-team/testers/validation/{production-validator.md → validation-production-validator.md} +0 -0
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
# CFN Test Runner Skill
|
|
2
|
+
|
|
3
|
+
**Version:** 1.0.0
|
|
4
|
+
**Status:** Production
|
|
5
|
+
**Purpose:** Unified test execution with benchmarking and regression detection
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Overview
|
|
10
|
+
|
|
11
|
+
Provides comprehensive test execution across all CFN test suites:
|
|
12
|
+
- **Hello World Tests** (Layer 0-7)
|
|
13
|
+
- **CFN Loop E2E Tests** (9 integration tests)
|
|
14
|
+
- **Benchmark Tracking** (SQLite storage)
|
|
15
|
+
- **Regression Detection** (automated alerts)
|
|
16
|
+
|
|
17
|
+
**Key Features:**
|
|
18
|
+
- Single command test execution
|
|
19
|
+
- Historical benchmark comparison
|
|
20
|
+
- Performance regression alerts
|
|
21
|
+
- Git-aware baseline tracking
|
|
22
|
+
- Parallel test execution
|
|
23
|
+
- Comprehensive reporting
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Architecture
|
|
28
|
+
|
|
29
|
+
### Skill Components
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
.claude/skills/cfn-test-runner/
|
|
33
|
+
├── SKILL.md # This file
|
|
34
|
+
├── run-all-tests.sh # Main test runner
|
|
35
|
+
├── store-benchmarks.sh # SQLite benchmark storage
|
|
36
|
+
├── detect-regressions.sh # Regression analysis
|
|
37
|
+
├── init-benchmark-db.sh # Database initialization
|
|
38
|
+
└── generate-report.sh # Test report generation
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## Usage
|
|
44
|
+
|
|
45
|
+
### Run All Tests
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
./.claude/skills/cfn-test-runner/run-all-tests.sh \
|
|
49
|
+
--suite all \
|
|
50
|
+
--benchmark \
|
|
51
|
+
--detect-regressions
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Run Specific Suite
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
# Hello World only
|
|
58
|
+
./.claude/skills/cfn-test-runner/run-all-tests.sh --suite hello-world
|
|
59
|
+
|
|
60
|
+
# CFN E2E only
|
|
61
|
+
./.claude/skills/cfn-test-runner/run-all-tests.sh --suite cfn-e2e
|
|
62
|
+
|
|
63
|
+
# Both
|
|
64
|
+
./.claude/skills/cfn-test-runner/run-all-tests.sh --suite all
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### With Regression Detection
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
./.claude/skills/cfn-test-runner/run-all-tests.sh \
|
|
71
|
+
--suite all \
|
|
72
|
+
--benchmark \
|
|
73
|
+
--detect-regressions \
|
|
74
|
+
--threshold 0.10
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## Parameters
|
|
80
|
+
|
|
81
|
+
### run-all-tests.sh
|
|
82
|
+
|
|
83
|
+
| Parameter | Required | Description | Default |
|
|
84
|
+
|-----------|----------|-------------|---------|
|
|
85
|
+
| `--suite` | No | Test suite: `all`, `hello-world`, `cfn-e2e` | `all` |
|
|
86
|
+
| `--benchmark` | No | Store results in SQLite | `false` |
|
|
87
|
+
| `--detect-regressions` | No | Run regression analysis | `false` |
|
|
88
|
+
| `--threshold` | No | Regression threshold (%) | `0.10` |
|
|
89
|
+
| `--parallel` | No | Run tests in parallel | `false` |
|
|
90
|
+
| `--output` | No | Output format: `text`, `json`, `html` | `text` |
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## Test Suites
|
|
95
|
+
|
|
96
|
+
### Hello World (Layer 0-7)
|
|
97
|
+
|
|
98
|
+
**Purpose:** Validate agent spawning patterns and coordination
|
|
99
|
+
|
|
100
|
+
| Layer | Test | Duration | Critical |
|
|
101
|
+
|-------|------|----------|----------|
|
|
102
|
+
| 0 | Tool Validation | 60s | ✅ |
|
|
103
|
+
| 1-4 | [Future] | - | - |
|
|
104
|
+
| 5 | Coordinator Spawning | 120s | ✅ |
|
|
105
|
+
| 6 | Review Handoff | 180s | ✅ |
|
|
106
|
+
| 7 | Error Retry | 150s | ✅ |
|
|
107
|
+
|
|
108
|
+
**Location:** `tests/hello-world/`
|
|
109
|
+
|
|
110
|
+
### CFN Loop E2E (9 Tests)
|
|
111
|
+
|
|
112
|
+
**Purpose:** Validate full CFN Loop workflow
|
|
113
|
+
|
|
114
|
+
| Test | Component | Duration | Critical |
|
|
115
|
+
|------|-----------|----------|----------|
|
|
116
|
+
| 1 | Coordinator → Orchestrator | 30s | ✅ |
|
|
117
|
+
| 2 | Loop 3 → Gate Check | 60s | ✅ |
|
|
118
|
+
| 3 | Gate Pass → Loop 2 | 30s | ⚠️ |
|
|
119
|
+
| 4 | Loop 2 → Product Owner | 90s | ✅ |
|
|
120
|
+
| 5 | Product Owner Decision | 60s | ✅ |
|
|
121
|
+
| 6 | Iteration Cycle | 120s | ✅ |
|
|
122
|
+
| 7 | Redis Key Structure | 10s | ✅ |
|
|
123
|
+
| 8 | Error Recovery | 60s | ⚠️ |
|
|
124
|
+
| 9 | Cleanup | 10s | ⚠️ |
|
|
125
|
+
|
|
126
|
+
**Location:** `tests/cfn-v3/test-e2e-cfn-loop.sh`
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
## Benchmark Storage
|
|
131
|
+
|
|
132
|
+
### SQLite Schema
|
|
133
|
+
|
|
134
|
+
```sql
|
|
135
|
+
test_suites -- Suite definitions
|
|
136
|
+
test_runs -- Execution records
|
|
137
|
+
test_cases -- Individual tests
|
|
138
|
+
test_results -- Per-test outcomes
|
|
139
|
+
performance_metrics -- Performance data
|
|
140
|
+
regression_alerts -- Detected regressions
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
**Database:** `.artifacts/test-benchmarks.db`
|
|
144
|
+
|
|
145
|
+
### Stored Metrics
|
|
146
|
+
|
|
147
|
+
**Test-Level:**
|
|
148
|
+
- Duration (ms)
|
|
149
|
+
- Status (passed/failed/skipped)
|
|
150
|
+
- Assertion count
|
|
151
|
+
- Error messages
|
|
152
|
+
|
|
153
|
+
**Suite-Level:**
|
|
154
|
+
- Total duration (seconds)
|
|
155
|
+
- Success rate (%)
|
|
156
|
+
- Pass/fail/skip counts
|
|
157
|
+
- Git commit/branch
|
|
158
|
+
|
|
159
|
+
**Performance:**
|
|
160
|
+
- Agent spawn time
|
|
161
|
+
- Redis operation latency
|
|
162
|
+
- File I/O duration
|
|
163
|
+
- Memory usage
|
|
164
|
+
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
## Regression Detection
|
|
168
|
+
|
|
169
|
+
### Automatic Alerts
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
# Critical: Test started failing
|
|
173
|
+
alert_type: test_failure
|
|
174
|
+
severity: critical
|
|
175
|
+
message: "TEST 5: Product Owner Decision FAILED (was passing)"
|
|
176
|
+
|
|
177
|
+
# Warning: Performance degraded
|
|
178
|
+
alert_type: performance_regression
|
|
179
|
+
severity: warning
|
|
180
|
+
message: "Loop 3 spawn time increased 25% (baseline: 5s, current: 6.25s)"
|
|
181
|
+
|
|
182
|
+
# Info: Success rate dropped
|
|
183
|
+
alert_type: success_rate_drop
|
|
184
|
+
severity: info
|
|
185
|
+
message: "Suite success rate: 88% → 77% (threshold: 10%)"
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### Threshold Configuration
|
|
189
|
+
|
|
190
|
+
```bash
|
|
191
|
+
# Default: 10% regression threshold
|
|
192
|
+
--threshold 0.10
|
|
193
|
+
|
|
194
|
+
# Strict: 5% threshold
|
|
195
|
+
--threshold 0.05
|
|
196
|
+
|
|
197
|
+
# Relaxed: 20% threshold
|
|
198
|
+
--threshold 0.20
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
---
|
|
202
|
+
|
|
203
|
+
## Output Formats
|
|
204
|
+
|
|
205
|
+
### Text (Console)
|
|
206
|
+
|
|
207
|
+
```
|
|
208
|
+
==========================================
|
|
209
|
+
CFN Test Suite Results
|
|
210
|
+
==========================================
|
|
211
|
+
|
|
212
|
+
Suite: Hello World
|
|
213
|
+
Layer 0: ✅ PASSED (58.3s)
|
|
214
|
+
Layer 5: ✅ PASSED (115.7s)
|
|
215
|
+
Layer 6: ✅ PASSED (172.4s)
|
|
216
|
+
Layer 7: ✅ PASSED (148.2s)
|
|
217
|
+
|
|
218
|
+
Suite: CFN E2E
|
|
219
|
+
TEST 1: ✅ PASSED (28.1s)
|
|
220
|
+
TEST 2: ✅ PASSED (54.6s)
|
|
221
|
+
TEST 3: ⚠️ SKIPPED
|
|
222
|
+
TEST 4: ✅ PASSED (87.3s)
|
|
223
|
+
TEST 5: ✅ PASSED (59.2s)
|
|
224
|
+
TEST 6: ✅ PASSED (118.9s)
|
|
225
|
+
TEST 7: ✅ PASSED (9.4s)
|
|
226
|
+
|
|
227
|
+
Total: 11 tests, 9 passed, 0 failed, 2 skipped
|
|
228
|
+
Duration: 851.1s
|
|
229
|
+
Success Rate: 81.8%
|
|
230
|
+
|
|
231
|
+
Regressions Detected: 0
|
|
232
|
+
==========================================
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
### JSON (API/CI)
|
|
236
|
+
|
|
237
|
+
```json
|
|
238
|
+
{
|
|
239
|
+
"timestamp": "2025-11-04T02:00:00Z",
|
|
240
|
+
"git_commit": "abc123",
|
|
241
|
+
"git_branch": "main",
|
|
242
|
+
"suites": {
|
|
243
|
+
"hello-world": {
|
|
244
|
+
"total": 4,
|
|
245
|
+
"passed": 4,
|
|
246
|
+
"failed": 0,
|
|
247
|
+
"skipped": 0,
|
|
248
|
+
"duration": 494.6,
|
|
249
|
+
"success_rate": 1.0
|
|
250
|
+
},
|
|
251
|
+
"cfn-e2e": {
|
|
252
|
+
"total": 9,
|
|
253
|
+
"passed": 7,
|
|
254
|
+
"failed": 0,
|
|
255
|
+
"skipped": 2,
|
|
256
|
+
"duration": 356.5,
|
|
257
|
+
"success_rate": 0.778
|
|
258
|
+
}
|
|
259
|
+
},
|
|
260
|
+
"regressions": [],
|
|
261
|
+
"baseline_comparison": {
|
|
262
|
+
"duration_change": "+2.3%",
|
|
263
|
+
"success_rate_change": "+0.0%"
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
### HTML (Report)
|
|
269
|
+
|
|
270
|
+
Generates interactive HTML report with:
|
|
271
|
+
- Trend graphs (Chart.js)
|
|
272
|
+
- Regression highlights
|
|
273
|
+
- Test history
|
|
274
|
+
- Performance metrics
|
|
275
|
+
|
|
276
|
+
---
|
|
277
|
+
|
|
278
|
+
## Baseline Management
|
|
279
|
+
|
|
280
|
+
### Establish Baseline
|
|
281
|
+
|
|
282
|
+
```bash
|
|
283
|
+
# First run establishes baseline
|
|
284
|
+
./.claude/skills/cfn-test-runner/run-all-tests.sh --suite all --benchmark
|
|
285
|
+
|
|
286
|
+
# Mark as baseline
|
|
287
|
+
sqlite3 .artifacts/test-benchmarks.db << EOF
|
|
288
|
+
UPDATE test_runs SET environment = 'baseline' WHERE id = (SELECT MAX(id) FROM test_runs);
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Detect test regressions
|
|
3
|
+
set -euo pipefail
|
|
4
|
+
|
|
5
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
6
|
+
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
|
|
7
|
+
DB_FILE="$PROJECT_ROOT/.artifacts/test-benchmarks.db"
|
|
8
|
+
|
|
9
|
+
THRESHOLD=0.10
|
|
10
|
+
|
|
11
|
+
while [[ $# -gt 0 ]]; do
|
|
12
|
+
case $1 in
|
|
13
|
+
--threshold) THRESHOLD="$2"; shift 2 ;;
|
|
14
|
+
*) shift ;;
|
|
15
|
+
esac
|
|
16
|
+
done
|
|
17
|
+
|
|
18
|
+
# Get latest run
|
|
19
|
+
LATEST_RUN=$(sqlite3 "$DB_FILE" "SELECT id FROM test_runs ORDER BY run_timestamp DESC LIMIT 1")
|
|
20
|
+
|
|
21
|
+
if [ -z "$LATEST_RUN" ]; then
|
|
22
|
+
echo "No test runs found"
|
|
23
|
+
exit 0
|
|
24
|
+
fi
|
|
25
|
+
|
|
26
|
+
# Get baseline (average of last 10 runs excluding latest)
|
|
27
|
+
BASELINE_SUCCESS_RATE=$(sqlite3 "$DB_FILE" "
|
|
28
|
+
SELECT AVG(success_rate) FROM (
|
|
29
|
+
SELECT success_rate FROM test_runs
|
|
30
|
+
WHERE id != $LATEST_RUN
|
|
31
|
+
ORDER BY run_timestamp DESC
|
|
32
|
+
LIMIT 10
|
|
33
|
+
)
|
|
34
|
+
")
|
|
35
|
+
|
|
36
|
+
LATEST_SUCCESS_RATE=$(sqlite3 "$DB_FILE" "SELECT success_rate FROM test_runs WHERE id = $LATEST_RUN")
|
|
37
|
+
|
|
38
|
+
# Check for regression
|
|
39
|
+
REGRESSION=$(awk "BEGIN {print ($BASELINE_SUCCESS_RATE - $LATEST_SUCCESS_RATE) > $THRESHOLD}")
|
|
40
|
+
|
|
41
|
+
if [ "$REGRESSION" = "1" ]; then
|
|
42
|
+
DIFF=$(awk "BEGIN {printf \"%.1f\", ($BASELINE_SUCCESS_RATE - $LATEST_SUCCESS_RATE) * 100}")
|
|
43
|
+
|
|
44
|
+
sqlite3 "$DB_FILE" << EOFSQL
|
|
45
|
+
INSERT INTO regression_alerts (run_id, alert_type, severity, message)
|
|
46
|
+
VALUES ($LATEST_RUN, 'success_rate_drop', 'warning',
|
|
47
|
+
'Success rate dropped ${DIFF}% (baseline: ${BASELINE_SUCCESS_RATE}, current: ${LATEST_SUCCESS_RATE})');
|
|
48
|
+
EOFSQL
|
|
49
|
+
|
|
50
|
+
echo "⚠️ Regression detected: Success rate dropped ${DIFF}%"
|
|
51
|
+
exit 1
|
|
52
|
+
fi
|
|
53
|
+
|
|
54
|
+
echo "✅ No regressions detected"
|
|
55
|
+
exit 0
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Initialize benchmark database
|
|
3
|
+
set -euo pipefail
|
|
4
|
+
|
|
5
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
6
|
+
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
|
|
7
|
+
DB_FILE="$PROJECT_ROOT/.artifacts/test-benchmarks.db"
|
|
8
|
+
|
|
9
|
+
mkdir -p "$PROJECT_ROOT/.artifacts"
|
|
10
|
+
|
|
11
|
+
sqlite3 "$DB_FILE" << 'EOFSQL'
|
|
12
|
+
CREATE TABLE IF NOT EXISTS test_suites (
|
|
13
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
14
|
+
name TEXT NOT NULL UNIQUE,
|
|
15
|
+
description TEXT,
|
|
16
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
17
|
+
);
|
|
18
|
+
|
|
19
|
+
CREATE TABLE IF NOT EXISTS test_runs (
|
|
20
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
21
|
+
suite_id INTEGER NOT NULL,
|
|
22
|
+
run_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
23
|
+
git_commit TEXT,
|
|
24
|
+
git_branch TEXT,
|
|
25
|
+
total_tests INTEGER NOT NULL,
|
|
26
|
+
passed INTEGER NOT NULL,
|
|
27
|
+
failed INTEGER NOT NULL,
|
|
28
|
+
skipped INTEGER NOT NULL,
|
|
29
|
+
duration_seconds REAL NOT NULL,
|
|
30
|
+
success_rate REAL,
|
|
31
|
+
FOREIGN KEY (suite_id) REFERENCES test_suites(id)
|
|
32
|
+
);
|
|
33
|
+
|
|
34
|
+
CREATE TABLE IF NOT EXISTS regression_alerts (
|
|
35
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
36
|
+
run_id INTEGER NOT NULL,
|
|
37
|
+
alert_type TEXT NOT NULL,
|
|
38
|
+
severity TEXT NOT NULL,
|
|
39
|
+
message TEXT NOT NULL,
|
|
40
|
+
acknowledged INTEGER DEFAULT 0,
|
|
41
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
42
|
+
FOREIGN KEY (run_id) REFERENCES test_runs(id)
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
CREATE INDEX IF NOT EXISTS idx_runs_timestamp ON test_runs(run_timestamp DESC);
|
|
46
|
+
EOFSQL
|
|
47
|
+
|
|
48
|
+
echo "✅ Benchmark database initialized: $DB_FILE"
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# CFN Unified Test Runner
|
|
3
|
+
# Version: 1.0.0
|
|
4
|
+
# Purpose: Run all test suites with benchmarking and regression detection
|
|
5
|
+
|
|
6
|
+
set -euo pipefail
|
|
7
|
+
|
|
8
|
+
# Colors
|
|
9
|
+
RED='\033[0;31m'
|
|
10
|
+
GREEN='\033[0;32m'
|
|
11
|
+
YELLOW='\033[1;33m'
|
|
12
|
+
BLUE='\033[0;34m'
|
|
13
|
+
NC='\033[0m'
|
|
14
|
+
|
|
15
|
+
# Configuration
|
|
16
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
17
|
+
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
|
|
18
|
+
DB_FILE="$PROJECT_ROOT/.artifacts/test-benchmarks.db"
|
|
19
|
+
|
|
20
|
+
# Parameters
|
|
21
|
+
SUITE="all"
|
|
22
|
+
BENCHMARK=false
|
|
23
|
+
DETECT_REGRESSIONS=false
|
|
24
|
+
THRESHOLD=0.10
|
|
25
|
+
PARALLEL=false
|
|
26
|
+
OUTPUT_FORMAT="text"
|
|
27
|
+
|
|
28
|
+
# Parse arguments
|
|
29
|
+
while [[ $# -gt 0 ]]; do
|
|
30
|
+
case $1 in
|
|
31
|
+
--suite)
|
|
32
|
+
SUITE="$2"
|
|
33
|
+
shift 2
|
|
34
|
+
;;
|
|
35
|
+
--benchmark)
|
|
36
|
+
BENCHMARK=true
|
|
37
|
+
shift
|
|
38
|
+
;;
|
|
39
|
+
--detect-regressions)
|
|
40
|
+
DETECT_REGRESSIONS=true
|
|
41
|
+
shift
|
|
42
|
+
;;
|
|
43
|
+
--threshold)
|
|
44
|
+
THRESHOLD="$2"
|
|
45
|
+
shift 2
|
|
46
|
+
;;
|
|
47
|
+
--parallel)
|
|
48
|
+
PARALLEL=true
|
|
49
|
+
shift
|
|
50
|
+
;;
|
|
51
|
+
--output)
|
|
52
|
+
OUTPUT_FORMAT="$2"
|
|
53
|
+
shift 2
|
|
54
|
+
;;
|
|
55
|
+
*)
|
|
56
|
+
echo "Unknown parameter: $1"
|
|
57
|
+
exit 1
|
|
58
|
+
;;
|
|
59
|
+
esac
|
|
60
|
+
done
|
|
61
|
+
|
|
62
|
+
# Initialize benchmark database if needed
|
|
63
|
+
if [ "$BENCHMARK" = true ] && [ ! -f "$DB_FILE" ]; then
|
|
64
|
+
echo -e "${BLUE}[INFO]${NC} Initializing benchmark database..."
|
|
65
|
+
"$SCRIPT_DIR/init-benchmark-db.sh"
|
|
66
|
+
fi
|
|
67
|
+
|
|
68
|
+
# Get git info
|
|
69
|
+
GIT_COMMIT=$(git rev-parse --short HEAD 2>/dev/null || echo "unknown")
|
|
70
|
+
GIT_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
|
|
71
|
+
|
|
72
|
+
# Start test run
|
|
73
|
+
START_TIME=$(date +%s)
|
|
74
|
+
TOTAL_TESTS=0
|
|
75
|
+
PASSED_TESTS=0
|
|
76
|
+
FAILED_TESTS=0
|
|
77
|
+
SKIPPED_TESTS=0
|
|
78
|
+
|
|
79
|
+
echo -e "${GREEN}=========================================="
|
|
80
|
+
echo "CFN Test Suite Runner"
|
|
81
|
+
echo -e "==========================================${NC}"
|
|
82
|
+
echo "Suite: $SUITE"
|
|
83
|
+
echo "Benchmark: $BENCHMARK"
|
|
84
|
+
echo "Detect Regressions: $DETECT_REGRESSIONS"
|
|
85
|
+
echo "Git: $GIT_BRANCH @ $GIT_COMMIT"
|
|
86
|
+
echo ""
|
|
87
|
+
|
|
88
|
+
# Run Hello World tests
|
|
89
|
+
if [ "$SUITE" = "all" ] || [ "$SUITE" = "hello-world" ]; then
|
|
90
|
+
echo -e "${BLUE}[INFO]${NC} Running Hello World tests..."
|
|
91
|
+
|
|
92
|
+
HW_START=$(date +%s)
|
|
93
|
+
HW_PASSED=0
|
|
94
|
+
HW_FAILED=0
|
|
95
|
+
HW_SKIPPED=0
|
|
96
|
+
|
|
97
|
+
# Layer 0
|
|
98
|
+
if timeout 90 node "$PROJECT_ROOT/tests/hello-world/layer0-tool-validation.js" > /tmp/hw-layer0.log 2>&1; then
|
|
99
|
+
echo -e "${GREEN}✅ Layer 0: PASSED${NC}"
|
|
100
|
+
((HW_PASSED++))
|
|
101
|
+
else
|
|
102
|
+
echo -e "${RED}❌ Layer 0: FAILED${NC}"
|
|
103
|
+
((HW_FAILED++))
|
|
104
|
+
fi
|
|
105
|
+
|
|
106
|
+
# Layer 5
|
|
107
|
+
if timeout 180 node "$PROJECT_ROOT/tests/hello-world/layer5-coordinator-spawning.js" > /tmp/hw-layer5.log 2>&1; then
|
|
108
|
+
echo -e "${GREEN}✅ Layer 5: PASSED${NC}"
|
|
109
|
+
((HW_PASSED++))
|
|
110
|
+
else
|
|
111
|
+
echo -e "${RED}❌ Layer 5: FAILED${NC}"
|
|
112
|
+
((HW_FAILED++))
|
|
113
|
+
fi
|
|
114
|
+
|
|
115
|
+
# Layer 6
|
|
116
|
+
if timeout 240 node "$PROJECT_ROOT/tests/hello-world/layer6-coordinator-review.js" > /tmp/hw-layer6.log 2>&1; then
|
|
117
|
+
echo -e "${GREEN}✅ Layer 6: PASSED${NC}"
|
|
118
|
+
((HW_PASSED++))
|
|
119
|
+
else
|
|
120
|
+
echo -e "${RED}❌ Layer 6: FAILED${NC}"
|
|
121
|
+
((HW_FAILED++))
|
|
122
|
+
fi
|
|
123
|
+
|
|
124
|
+
# Layer 7
|
|
125
|
+
if timeout 200 node "$PROJECT_ROOT/tests/hello-world/layer7-coordinator-error-retry.js" > /tmp/hw-layer7.log 2>&1; then
|
|
126
|
+
echo -e "${GREEN}✅ Layer 7: PASSED${NC}"
|
|
127
|
+
((HW_PASSED++))
|
|
128
|
+
else
|
|
129
|
+
echo -e "${RED}❌ Layer 7: FAILED${NC}"
|
|
130
|
+
((HW_FAILED++))
|
|
131
|
+
fi
|
|
132
|
+
|
|
133
|
+
HW_END=$(date +%s)
|
|
134
|
+
HW_DURATION=$((HW_END - HW_START))
|
|
135
|
+
|
|
136
|
+
echo -e "${BLUE}Hello World: ${HW_PASSED} passed, ${HW_FAILED} failed, ${HW_SKIPPED} skipped (${HW_DURATION}s)${NC}"
|
|
137
|
+
echo ""
|
|
138
|
+
|
|
139
|
+
TOTAL_TESTS=$((TOTAL_TESTS + HW_PASSED + HW_FAILED + HW_SKIPPED))
|
|
140
|
+
PASSED_TESTS=$((PASSED_TESTS + HW_PASSED))
|
|
141
|
+
FAILED_TESTS=$((FAILED_TESTS + HW_FAILED))
|
|
142
|
+
SKIPPED_TESTS=$((SKIPPED_TESTS + HW_SKIPPED))
|
|
143
|
+
fi
|
|
144
|
+
|
|
145
|
+
# Run CFN E2E tests
|
|
146
|
+
if [ "$SUITE" = "all" ] || [ "$SUITE" = "cfn-e2e" ]; then
|
|
147
|
+
echo -e "${BLUE}[INFO]${NC} Running CFN E2E tests..."
|
|
148
|
+
|
|
149
|
+
E2E_START=$(date +%s)
|
|
150
|
+
|
|
151
|
+
# Run E2E test and capture results
|
|
152
|
+
if timeout 600 bash "$PROJECT_ROOT/tests/cfn-v3/test-e2e-cfn-loop.sh" > /tmp/e2e-output.log 2>&1; then
|
|
153
|
+
E2E_EXIT=0
|
|
154
|
+
else
|
|
155
|
+
E2E_EXIT=$?
|
|
156
|
+
fi
|
|
157
|
+
|
|
158
|
+
# Parse results
|
|
159
|
+
E2E_PASSED=$(grep -c "PASS" /tmp/e2e-output.log || echo 0)
|
|
160
|
+
E2E_FAILED=$(grep -c "FAIL" /tmp/e2e-output.log || echo 0)
|
|
161
|
+
E2E_SKIPPED=$(grep -c "SKIPPED" /tmp/e2e-output.log || echo 0)
|
|
162
|
+
|
|
163
|
+
if [ $E2E_EXIT -eq 0 ]; then
|
|
164
|
+
echo -e "${GREEN}✅ CFN E2E: PASSED${NC}"
|
|
165
|
+
else
|
|
166
|
+
echo -e "${RED}❌ CFN E2E: FAILED (exit $E2E_EXIT)${NC}"
|
|
167
|
+
fi
|
|
168
|
+
|
|
169
|
+
E2E_END=$(date +%s)
|
|
170
|
+
E2E_DURATION=$((E2E_END - E2E_START))
|
|
171
|
+
|
|
172
|
+
echo -e "${BLUE}CFN E2E: ${E2E_PASSED} passed, ${E2E_FAILED} failed, ${E2E_SKIPPED} skipped (${E2E_DURATION}s)${NC}"
|
|
173
|
+
echo ""
|
|
174
|
+
|
|
175
|
+
TOTAL_TESTS=$((TOTAL_TESTS + E2E_PASSED + E2E_FAILED + E2E_SKIPPED))
|
|
176
|
+
PASSED_TESTS=$((PASSED_TESTS + E2E_PASSED))
|
|
177
|
+
FAILED_TESTS=$((FAILED_TESTS + E2E_FAILED))
|
|
178
|
+
SKIPPED_TESTS=$((SKIPPED_TESTS + E2E_SKIPPED))
|
|
179
|
+
fi
|
|
180
|
+
|
|
181
|
+
END_TIME=$(date +%s)
|
|
182
|
+
TOTAL_DURATION=$((END_TIME - START_TIME))
|
|
183
|
+
SUCCESS_RATE=$(awk "BEGIN {printf \"%.1f\", ($PASSED_TESTS / $TOTAL_TESTS) * 100}")
|
|
184
|
+
|
|
185
|
+
# Summary
|
|
186
|
+
echo -e "${GREEN}=========================================="
|
|
187
|
+
echo "Test Summary"
|
|
188
|
+
echo -e "==========================================${NC}"
|
|
189
|
+
echo "Total: $TOTAL_TESTS tests"
|
|
190
|
+
echo "Passed: $PASSED_TESTS"
|
|
191
|
+
echo "Failed: $FAILED_TESTS"
|
|
192
|
+
echo "Skipped: $SKIPPED_TESTS"
|
|
193
|
+
echo "Duration: ${TOTAL_DURATION}s"
|
|
194
|
+
echo "Success Rate: ${SUCCESS_RATE}%"
|
|
195
|
+
echo ""
|
|
196
|
+
|
|
197
|
+
# Store benchmarks
|
|
198
|
+
if [ "$BENCHMARK" = true ]; then
|
|
199
|
+
echo -e "${BLUE}[INFO]${NC} Storing benchmarks..."
|
|
200
|
+
"$SCRIPT_DIR/store-benchmarks.sh" \
|
|
201
|
+
--suite "$SUITE" \
|
|
202
|
+
--total "$TOTAL_TESTS" \
|
|
203
|
+
--passed "$PASSED_TESTS" \
|
|
204
|
+
--failed "$FAILED_TESTS" \
|
|
205
|
+
--skipped "$SKIPPED_TESTS" \
|
|
206
|
+
--duration "$TOTAL_DURATION" \
|
|
207
|
+
--commit "$GIT_COMMIT" \
|
|
208
|
+
--branch "$GIT_BRANCH"
|
|
209
|
+
fi
|
|
210
|
+
|
|
211
|
+
# Detect regressions
|
|
212
|
+
if [ "$DETECT_REGRESSIONS" = true ]; then
|
|
213
|
+
echo -e "${BLUE}[INFO]${NC} Detecting regressions..."
|
|
214
|
+
if ! "$SCRIPT_DIR/detect-regressions.sh" --threshold "$THRESHOLD"; then
|
|
215
|
+
echo -e "${YELLOW}⚠️ Regressions detected!${NC}"
|
|
216
|
+
fi
|
|
217
|
+
fi
|
|
218
|
+
|
|
219
|
+
# Exit with failure if tests failed
|
|
220
|
+
if [ $FAILED_TESTS -gt 0 ]; then
|
|
221
|
+
exit 1
|
|
222
|
+
fi
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Store test benchmarks in SQLite
|
|
3
|
+
set -euo pipefail
|
|
4
|
+
|
|
5
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
6
|
+
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
|
|
7
|
+
DB_FILE="$PROJECT_ROOT/.artifacts/test-benchmarks.db"
|
|
8
|
+
|
|
9
|
+
# Parse arguments
|
|
10
|
+
SUITE=""
|
|
11
|
+
TOTAL=0
|
|
12
|
+
PASSED=0
|
|
13
|
+
FAILED=0
|
|
14
|
+
SKIPPED=0
|
|
15
|
+
DURATION=0
|
|
16
|
+
COMMIT=""
|
|
17
|
+
BRANCH=""
|
|
18
|
+
|
|
19
|
+
while [[ $# -gt 0 ]]; do
|
|
20
|
+
case $1 in
|
|
21
|
+
--suite) SUITE="$2"; shift 2 ;;
|
|
22
|
+
--total) TOTAL="$2"; shift 2 ;;
|
|
23
|
+
--passed) PASSED="$2"; shift 2 ;;
|
|
24
|
+
--failed) FAILED="$2"; shift 2 ;;
|
|
25
|
+
--skipped) SKIPPED="$2"; shift 2 ;;
|
|
26
|
+
--duration) DURATION="$2"; shift 2 ;;
|
|
27
|
+
--commit) COMMIT="$2"; shift 2 ;;
|
|
28
|
+
--branch) BRANCH="$2"; shift 2 ;;
|
|
29
|
+
*) shift ;;
|
|
30
|
+
esac
|
|
31
|
+
done
|
|
32
|
+
|
|
33
|
+
SUCCESS_RATE=$(awk "BEGIN {printf \"%.4f\", ($PASSED / $TOTAL)}")
|
|
34
|
+
|
|
35
|
+
# Get or create suite ID
|
|
36
|
+
SUITE_ID=$(sqlite3 "$DB_FILE" "SELECT id FROM test_suites WHERE name='$SUITE'")
|
|
37
|
+
if [ -z "$SUITE_ID" ]; then
|
|
38
|
+
sqlite3 "$DB_FILE" "INSERT INTO test_suites (name) VALUES ('$SUITE')"
|
|
39
|
+
SUITE_ID=$(sqlite3 "$DB_FILE" "SELECT last_insert_rowid()")
|
|
40
|
+
fi
|
|
41
|
+
|
|
42
|
+
# Insert test run
|
|
43
|
+
sqlite3 "$DB_FILE" << EOFSQL
|
|
44
|
+
INSERT INTO test_runs (
|
|
45
|
+
suite_id, git_commit, git_branch,
|
|
46
|
+
total_tests, passed, failed, skipped,
|
|
47
|
+
duration_seconds, success_rate
|
|
48
|
+
) VALUES (
|
|
49
|
+
$SUITE_ID, '$COMMIT', '$BRANCH',
|
|
50
|
+
$TOTAL, $PASSED, $FAILED, $SKIPPED,
|
|
51
|
+
$DURATION, $SUCCESS_RATE
|
|
52
|
+
);
|
|
53
|
+
EOFSQL
|
|
54
|
+
|
|
55
|
+
echo "✅ Benchmark stored (run_id: $(sqlite3 "$DB_FILE" "SELECT last_insert_rowid()"))"
|