claude-flow-novice 2.14.6 → 2.14.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/.claude/commands/cfn/run-tests.md +119 -0
  2. package/.claude/hooks/cfn-post-edit.config.json +11 -4
  3. package/.claude/skills/cfn-agent-selector/SKILL.md +3 -2
  4. package/.claude/skills/cfn-loop-orchestration/orchestrate.sh +1 -1
  5. package/.claude/skills/cfn-product-owner-decision/execute-decision.sh +207 -113
  6. package/.claude/skills/cfn-product-owner-decision/test-backlog-integration.sh +148 -0
  7. package/.claude/skills/cfn-redis-coordination/report-completion.sh +86 -0
  8. package/.claude/skills/cfn-redis-coordination/store-context.sh +34 -0
  9. package/.claude/skills/pre-edit-backup/backup.sh +130 -0
  10. package/.claude/skills/pre-edit-backup/cleanup.sh +155 -0
  11. package/.claude/skills/pre-edit-backup/restore.sh +128 -0
  12. package/.claude/skills/pre-edit-backup/revert-file.sh +168 -0
  13. package/claude-assets/agents/README-AGENT_LIFECYCLE.md +522 -0
  14. package/claude-assets/agents/cfn-dev-team/coordinators/cfn-v3-coordinator.md +13 -8
  15. package/claude-assets/agents/cfn-dev-team/product-owners/product-owner.md +1 -1
  16. package/claude-assets/agents/cfn-dev-team/test-agent.md +141 -0
  17. package/claude-assets/agents/cfn-dev-team/utility/agent-builder.md +35 -0
  18. package/claude-assets/commands/cfn/run-tests.md +119 -0
  19. package/claude-assets/hooks/cfn-post-edit.config.json +11 -4
  20. package/claude-assets/skills/agent-name-validation/README.md +28 -0
  21. package/claude-assets/skills/agent-name-validation/SKILL.md +168 -0
  22. package/claude-assets/skills/agent-name-validation/validate-agent-names.sh +47 -0
  23. package/claude-assets/skills/cfn-agent-selector/SKILL.md +3 -2
  24. package/claude-assets/skills/cfn-loop-orchestration/orchestrate.sh +1 -1
  25. package/claude-assets/skills/cfn-product-owner-decision/execute-decision.sh +207 -113
  26. package/claude-assets/skills/cfn-product-owner-decision/test-backlog-integration.sh +148 -0
  27. package/claude-assets/skills/cfn-redis-coordination/report-completion.sh +86 -0
  28. package/claude-assets/skills/cfn-redis-coordination/store-context.sh +34 -0
  29. package/claude-assets/skills/cfn-task-classifier/SKILL.md +1 -1
  30. package/claude-assets/skills/cfn-test-runner/SKILL.md +288 -0
  31. package/claude-assets/skills/cfn-test-runner/detect-regressions.sh +55 -0
  32. package/claude-assets/skills/cfn-test-runner/init-benchmark-db.sh +48 -0
  33. package/claude-assets/skills/cfn-test-runner/run-all-tests.sh +222 -0
  34. package/claude-assets/skills/cfn-test-runner/store-benchmarks.sh +55 -0
  35. package/claude-assets/skills/cfn-test-runner/validate-redis-keys.sh +143 -0
  36. package/claude-assets/skills/hook-pipeline/bash-dependency-checker.sh +89 -0
  37. package/claude-assets/skills/hook-pipeline/bash-pipe-safety.sh +69 -0
  38. package/claude-assets/skills/hook-pipeline/enforce-lf.sh +36 -0
  39. package/claude-assets/skills/hook-pipeline/js-promise-safety.sh +110 -0
  40. package/claude-assets/skills/hook-pipeline/python-async-safety.py +124 -0
  41. package/claude-assets/skills/hook-pipeline/python-import-checker.py +114 -0
  42. package/claude-assets/skills/hook-pipeline/python-subprocess-safety.py +77 -0
  43. package/claude-assets/skills/hook-pipeline/rust-command-safety.sh +38 -0
  44. package/claude-assets/skills/hook-pipeline/rust-dependency-checker.sh +50 -0
  45. package/claude-assets/skills/hook-pipeline/rust-future-safety.sh +50 -0
  46. package/dist/agents/agent-loader.js +146 -165
  47. package/dist/agents/agent-loader.js.map +1 -1
  48. package/dist/cli/agent-executor.js +1 -1
  49. package/dist/cli/agent-executor.js.map +1 -1
  50. package/dist/cli/agent-prompt-builder.js +40 -30
  51. package/dist/cli/agent-prompt-builder.js.map +1 -1
  52. package/package.json +2 -1
  53. package/scripts/init-project.js +4 -1
  54. package/scripts/switch-api.sh +7 -7
  55. package/claude-assets/agents/cfn-dev-team/developers/dev-backend-api.md +0 -147
  56. package/claude-assets/agents/cfn-dev-team/developers/frontend/spec-mobile-react-native.md +0 -199
  57. package/claude-assets/agents/cfn-dev-team/documentation/docs-api-openapi.md +0 -98
  58. package/claude-assets/agents/cfn-dev-team/product-owners/product-owner-agent.md +0 -155
  59. package/claude-assets/agents/cfn-dev-team/reviewers/quality/analyze-code-quality.md +0 -141
  60. /package/claude-assets/agents/cfn-dev-team/developers/{backend-dev.md → backend-developer.md} +0 -0
  61. /package/claude-assets/agents/cfn-dev-team/documentation/{api-docs.md → api-documentation.md} +0 -0
  62. /package/claude-assets/agents/cfn-dev-team/documentation/{specification.md → specification-agent.md} +0 -0
  63. /package/claude-assets/agents/cfn-dev-team/reviewers/quality/{code-analyzer.md → code-quality-validator.md} +0 -0
  64. /package/claude-assets/agents/cfn-dev-team/testers/e2e/{playwright-agent.md → playwright-tester.md} +0 -0
  65. /package/claude-assets/agents/cfn-dev-team/testers/unit/{tdd-london-swarm.md → tdd-london-unit-swarm.md} +0 -0
  66. /package/claude-assets/agents/cfn-dev-team/testers/validation/{production-validator.md → validation-production-validator.md} +0 -0
@@ -0,0 +1,288 @@
1
+ # CFN Test Runner Skill
2
+
3
+ **Version:** 1.0.0
4
+ **Status:** Production
5
+ **Purpose:** Unified test execution with benchmarking and regression detection
6
+
7
+ ---
8
+
9
+ ## Overview
10
+
11
+ Provides comprehensive test execution across all CFN test suites:
12
+ - **Hello World Tests** (Layer 0-7)
13
+ - **CFN Loop E2E Tests** (9 integration tests)
14
+ - **Benchmark Tracking** (SQLite storage)
15
+ - **Regression Detection** (automated alerts)
16
+
17
+ **Key Features:**
18
+ - Single command test execution
19
+ - Historical benchmark comparison
20
+ - Performance regression alerts
21
+ - Git-aware baseline tracking
22
+ - Parallel test execution
23
+ - Comprehensive reporting
24
+
25
+ ---
26
+
27
+ ## Architecture
28
+
29
+ ### Skill Components
30
+
31
+ ```
32
+ .claude/skills/cfn-test-runner/
33
+ ├── SKILL.md # This file
34
+ ├── run-all-tests.sh # Main test runner
35
+ ├── store-benchmarks.sh # SQLite benchmark storage
36
+ ├── detect-regressions.sh # Regression analysis
37
+ ├── init-benchmark-db.sh # Database initialization
38
+ └── generate-report.sh # Test report generation
39
+ ```
40
+
41
+ ---
42
+
43
+ ## Usage
44
+
45
+ ### Run All Tests
46
+
47
+ ```bash
48
+ ./.claude/skills/cfn-test-runner/run-all-tests.sh \
49
+ --suite all \
50
+ --benchmark \
51
+ --detect-regressions
52
+ ```
53
+
54
+ ### Run Specific Suite
55
+
56
+ ```bash
57
+ # Hello World only
58
+ ./.claude/skills/cfn-test-runner/run-all-tests.sh --suite hello-world
59
+
60
+ # CFN E2E only
61
+ ./.claude/skills/cfn-test-runner/run-all-tests.sh --suite cfn-e2e
62
+
63
+ # Both
64
+ ./.claude/skills/cfn-test-runner/run-all-tests.sh --suite all
65
+ ```
66
+
67
+ ### With Regression Detection
68
+
69
+ ```bash
70
+ ./.claude/skills/cfn-test-runner/run-all-tests.sh \
71
+ --suite all \
72
+ --benchmark \
73
+ --detect-regressions \
74
+ --threshold 0.10
75
+ ```
76
+
77
+ ---
78
+
79
+ ## Parameters
80
+
81
+ ### run-all-tests.sh
82
+
83
+ | Parameter | Required | Description | Default |
84
+ |-----------|----------|-------------|---------|
85
+ | `--suite` | No | Test suite: `all`, `hello-world`, `cfn-e2e` | `all` |
86
+ | `--benchmark` | No | Store results in SQLite | `false` |
87
+ | `--detect-regressions` | No | Run regression analysis | `false` |
88
+ | `--threshold` | No | Regression threshold (%) | `0.10` |
89
+ | `--parallel` | No | Run tests in parallel | `false` |
90
+ | `--output` | No | Output format: `text`, `json`, `html` | `text` |
91
+
92
+ ---
93
+
94
+ ## Test Suites
95
+
96
+ ### Hello World (Layer 0-7)
97
+
98
+ **Purpose:** Validate agent spawning patterns and coordination
99
+
100
+ | Layer | Test | Duration | Critical |
101
+ |-------|------|----------|----------|
102
+ | 0 | Tool Validation | 60s | ✅ |
103
+ | 1-4 | [Future] | - | - |
104
+ | 5 | Coordinator Spawning | 120s | ✅ |
105
+ | 6 | Review Handoff | 180s | ✅ |
106
+ | 7 | Error Retry | 150s | ✅ |
107
+
108
+ **Location:** `tests/hello-world/`
109
+
110
+ ### CFN Loop E2E (9 Tests)
111
+
112
+ **Purpose:** Validate full CFN Loop workflow
113
+
114
+ | Test | Component | Duration | Critical |
115
+ |------|-----------|----------|----------|
116
+ | 1 | Coordinator → Orchestrator | 30s | ✅ |
117
+ | 2 | Loop 3 → Gate Check | 60s | ✅ |
118
+ | 3 | Gate Pass → Loop 2 | 30s | ⚠️ |
119
+ | 4 | Loop 2 → Product Owner | 90s | ✅ |
120
+ | 5 | Product Owner Decision | 60s | ✅ |
121
+ | 6 | Iteration Cycle | 120s | ✅ |
122
+ | 7 | Redis Key Structure | 10s | ✅ |
123
+ | 8 | Error Recovery | 60s | ⚠️ |
124
+ | 9 | Cleanup | 10s | ⚠️ |
125
+
126
+ **Location:** `tests/cfn-v3/test-e2e-cfn-loop.sh`
127
+
128
+ ---
129
+
130
+ ## Benchmark Storage
131
+
132
+ ### SQLite Schema
133
+
134
+ ```sql
135
+ test_suites -- Suite definitions
136
+ test_runs -- Execution records
137
+ test_cases -- Individual tests
138
+ test_results -- Per-test outcomes
139
+ performance_metrics -- Performance data
140
+ regression_alerts -- Detected regressions
141
+ ```
142
+
143
+ **Database:** `.artifacts/test-benchmarks.db`
144
+
145
+ ### Stored Metrics
146
+
147
+ **Test-Level:**
148
+ - Duration (ms)
149
+ - Status (passed/failed/skipped)
150
+ - Assertion count
151
+ - Error messages
152
+
153
+ **Suite-Level:**
154
+ - Total duration (seconds)
155
+ - Success rate (%)
156
+ - Pass/fail/skip counts
157
+ - Git commit/branch
158
+
159
+ **Performance:**
160
+ - Agent spawn time
161
+ - Redis operation latency
162
+ - File I/O duration
163
+ - Memory usage
164
+
165
+ ---
166
+
167
+ ## Regression Detection
168
+
169
+ ### Automatic Alerts
170
+
171
+ ```bash
172
+ # Critical: Test started failing
173
+ alert_type: test_failure
174
+ severity: critical
175
+ message: "TEST 5: Product Owner Decision FAILED (was passing)"
176
+
177
+ # Warning: Performance degraded
178
+ alert_type: performance_regression
179
+ severity: warning
180
+ message: "Loop 3 spawn time increased 25% (baseline: 5s, current: 6.25s)"
181
+
182
+ # Info: Success rate dropped
183
+ alert_type: success_rate_drop
184
+ severity: info
185
+ message: "Suite success rate: 88% → 77% (threshold: 10%)"
186
+ ```
187
+
188
+ ### Threshold Configuration
189
+
190
+ ```bash
191
+ # Default: 10% regression threshold
192
+ --threshold 0.10
193
+
194
+ # Strict: 5% threshold
195
+ --threshold 0.05
196
+
197
+ # Relaxed: 20% threshold
198
+ --threshold 0.20
199
+ ```
200
+
201
+ ---
202
+
203
+ ## Output Formats
204
+
205
+ ### Text (Console)
206
+
207
+ ```
208
+ ==========================================
209
+ CFN Test Suite Results
210
+ ==========================================
211
+
212
+ Suite: Hello World
213
+ Layer 0: ✅ PASSED (58.3s)
214
+ Layer 5: ✅ PASSED (115.7s)
215
+ Layer 6: ✅ PASSED (172.4s)
216
+ Layer 7: ✅ PASSED (148.2s)
217
+
218
+ Suite: CFN E2E
219
+ TEST 1: ✅ PASSED (28.1s)
220
+ TEST 2: ✅ PASSED (54.6s)
221
+ TEST 3: ⚠️ SKIPPED
222
+ TEST 4: ✅ PASSED (87.3s)
223
+ TEST 5: ✅ PASSED (59.2s)
224
+ TEST 6: ✅ PASSED (118.9s)
225
+ TEST 7: ✅ PASSED (9.4s)
226
+
227
+ Total: 11 tests, 9 passed, 0 failed, 2 skipped
228
+ Duration: 851.1s
229
+ Success Rate: 81.8%
230
+
231
+ Regressions Detected: 0
232
+ ==========================================
233
+ ```
234
+
235
+ ### JSON (API/CI)
236
+
237
+ ```json
238
+ {
239
+ "timestamp": "2025-11-04T02:00:00Z",
240
+ "git_commit": "abc123",
241
+ "git_branch": "main",
242
+ "suites": {
243
+ "hello-world": {
244
+ "total": 4,
245
+ "passed": 4,
246
+ "failed": 0,
247
+ "skipped": 0,
248
+ "duration": 494.6,
249
+ "success_rate": 1.0
250
+ },
251
+ "cfn-e2e": {
252
+ "total": 9,
253
+ "passed": 7,
254
+ "failed": 0,
255
+ "skipped": 2,
256
+ "duration": 356.5,
257
+ "success_rate": 0.778
258
+ }
259
+ },
260
+ "regressions": [],
261
+ "baseline_comparison": {
262
+ "duration_change": "+2.3%",
263
+ "success_rate_change": "+0.0%"
264
+ }
265
+ }
266
+ ```
267
+
268
+ ### HTML (Report)
269
+
270
+ Generates interactive HTML report with:
271
+ - Trend graphs (Chart.js)
272
+ - Regression highlights
273
+ - Test history
274
+ - Performance metrics
275
+
276
+ ---
277
+
278
+ ## Baseline Management
279
+
280
+ ### Establish Baseline
281
+
282
+ ```bash
283
+ # First run establishes baseline
284
+ ./.claude/skills/cfn-test-runner/run-all-tests.sh --suite all --benchmark
285
+
286
+ # Mark as baseline
287
+ sqlite3 .artifacts/test-benchmarks.db << EOF
288
+ UPDATE test_runs SET environment = 'baseline' WHERE id = (SELECT MAX(id) FROM test_runs);
@@ -0,0 +1,55 @@
1
+ #!/bin/bash
2
+ # Detect test regressions
3
+ set -euo pipefail
4
+
5
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
6
+ PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
7
+ DB_FILE="$PROJECT_ROOT/.artifacts/test-benchmarks.db"
8
+
9
+ THRESHOLD=0.10
10
+
11
+ while [[ $# -gt 0 ]]; do
12
+ case $1 in
13
+ --threshold) THRESHOLD="$2"; shift 2 ;;
14
+ *) shift ;;
15
+ esac
16
+ done
17
+
18
+ # Get latest run
19
+ LATEST_RUN=$(sqlite3 "$DB_FILE" "SELECT id FROM test_runs ORDER BY run_timestamp DESC LIMIT 1")
20
+
21
+ if [ -z "$LATEST_RUN" ]; then
22
+ echo "No test runs found"
23
+ exit 0
24
+ fi
25
+
26
+ # Get baseline (average of last 10 runs excluding latest)
27
+ BASELINE_SUCCESS_RATE=$(sqlite3 "$DB_FILE" "
28
+ SELECT AVG(success_rate) FROM (
29
+ SELECT success_rate FROM test_runs
30
+ WHERE id != $LATEST_RUN
31
+ ORDER BY run_timestamp DESC
32
+ LIMIT 10
33
+ )
34
+ ")
35
+
36
+ LATEST_SUCCESS_RATE=$(sqlite3 "$DB_FILE" "SELECT success_rate FROM test_runs WHERE id = $LATEST_RUN")
37
+
38
+ # Check for regression
39
+ REGRESSION=$(awk "BEGIN {print ($BASELINE_SUCCESS_RATE - $LATEST_SUCCESS_RATE) > $THRESHOLD}")
40
+
41
+ if [ "$REGRESSION" = "1" ]; then
42
+ DIFF=$(awk "BEGIN {printf \"%.1f\", ($BASELINE_SUCCESS_RATE - $LATEST_SUCCESS_RATE) * 100}")
43
+
44
+ sqlite3 "$DB_FILE" << EOFSQL
45
+ INSERT INTO regression_alerts (run_id, alert_type, severity, message)
46
+ VALUES ($LATEST_RUN, 'success_rate_drop', 'warning',
47
+ 'Success rate dropped ${DIFF}% (baseline: ${BASELINE_SUCCESS_RATE}, current: ${LATEST_SUCCESS_RATE})');
48
+ EOFSQL
49
+
50
+ echo "⚠️ Regression detected: Success rate dropped ${DIFF}%"
51
+ exit 1
52
+ fi
53
+
54
+ echo "✅ No regressions detected"
55
+ exit 0
@@ -0,0 +1,48 @@
1
+ #!/bin/bash
2
+ # Initialize benchmark database
3
+ set -euo pipefail
4
+
5
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
6
+ PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
7
+ DB_FILE="$PROJECT_ROOT/.artifacts/test-benchmarks.db"
8
+
9
+ mkdir -p "$PROJECT_ROOT/.artifacts"
10
+
11
+ sqlite3 "$DB_FILE" << 'EOFSQL'
12
+ CREATE TABLE IF NOT EXISTS test_suites (
13
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
14
+ name TEXT NOT NULL UNIQUE,
15
+ description TEXT,
16
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
17
+ );
18
+
19
+ CREATE TABLE IF NOT EXISTS test_runs (
20
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
21
+ suite_id INTEGER NOT NULL,
22
+ run_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
23
+ git_commit TEXT,
24
+ git_branch TEXT,
25
+ total_tests INTEGER NOT NULL,
26
+ passed INTEGER NOT NULL,
27
+ failed INTEGER NOT NULL,
28
+ skipped INTEGER NOT NULL,
29
+ duration_seconds REAL NOT NULL,
30
+ success_rate REAL,
31
+ FOREIGN KEY (suite_id) REFERENCES test_suites(id)
32
+ );
33
+
34
+ CREATE TABLE IF NOT EXISTS regression_alerts (
35
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
36
+ run_id INTEGER NOT NULL,
37
+ alert_type TEXT NOT NULL,
38
+ severity TEXT NOT NULL,
39
+ message TEXT NOT NULL,
40
+ acknowledged INTEGER DEFAULT 0,
41
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
42
+ FOREIGN KEY (run_id) REFERENCES test_runs(id)
43
+ );
44
+
45
+ CREATE INDEX IF NOT EXISTS idx_runs_timestamp ON test_runs(run_timestamp DESC);
46
+ EOFSQL
47
+
48
+ echo "✅ Benchmark database initialized: $DB_FILE"
@@ -0,0 +1,222 @@
1
+ #!/bin/bash
2
+ # CFN Unified Test Runner
3
+ # Version: 1.0.0
4
+ # Purpose: Run all test suites with benchmarking and regression detection
5
+
6
+ set -euo pipefail
7
+
8
+ # Colors
9
+ RED='\033[0;31m'
10
+ GREEN='\033[0;32m'
11
+ YELLOW='\033[1;33m'
12
+ BLUE='\033[0;34m'
13
+ NC='\033[0m'
14
+
15
+ # Configuration
16
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
17
+ PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
18
+ DB_FILE="$PROJECT_ROOT/.artifacts/test-benchmarks.db"
19
+
20
+ # Parameters
21
+ SUITE="all"
22
+ BENCHMARK=false
23
+ DETECT_REGRESSIONS=false
24
+ THRESHOLD=0.10
25
+ PARALLEL=false
26
+ OUTPUT_FORMAT="text"
27
+
28
+ # Parse arguments
29
+ while [[ $# -gt 0 ]]; do
30
+ case $1 in
31
+ --suite)
32
+ SUITE="$2"
33
+ shift 2
34
+ ;;
35
+ --benchmark)
36
+ BENCHMARK=true
37
+ shift
38
+ ;;
39
+ --detect-regressions)
40
+ DETECT_REGRESSIONS=true
41
+ shift
42
+ ;;
43
+ --threshold)
44
+ THRESHOLD="$2"
45
+ shift 2
46
+ ;;
47
+ --parallel)
48
+ PARALLEL=true
49
+ shift
50
+ ;;
51
+ --output)
52
+ OUTPUT_FORMAT="$2"
53
+ shift 2
54
+ ;;
55
+ *)
56
+ echo "Unknown parameter: $1"
57
+ exit 1
58
+ ;;
59
+ esac
60
+ done
61
+
62
+ # Initialize benchmark database if needed
63
+ if [ "$BENCHMARK" = true ] && [ ! -f "$DB_FILE" ]; then
64
+ echo -e "${BLUE}[INFO]${NC} Initializing benchmark database..."
65
+ "$SCRIPT_DIR/init-benchmark-db.sh"
66
+ fi
67
+
68
+ # Get git info
69
+ GIT_COMMIT=$(git rev-parse --short HEAD 2>/dev/null || echo "unknown")
70
+ GIT_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
71
+
72
+ # Start test run
73
+ START_TIME=$(date +%s)
74
+ TOTAL_TESTS=0
75
+ PASSED_TESTS=0
76
+ FAILED_TESTS=0
77
+ SKIPPED_TESTS=0
78
+
79
+ echo -e "${GREEN}=========================================="
80
+ echo "CFN Test Suite Runner"
81
+ echo -e "==========================================${NC}"
82
+ echo "Suite: $SUITE"
83
+ echo "Benchmark: $BENCHMARK"
84
+ echo "Detect Regressions: $DETECT_REGRESSIONS"
85
+ echo "Git: $GIT_BRANCH @ $GIT_COMMIT"
86
+ echo ""
87
+
88
+ # Run Hello World tests
89
+ if [ "$SUITE" = "all" ] || [ "$SUITE" = "hello-world" ]; then
90
+ echo -e "${BLUE}[INFO]${NC} Running Hello World tests..."
91
+
92
+ HW_START=$(date +%s)
93
+ HW_PASSED=0
94
+ HW_FAILED=0
95
+ HW_SKIPPED=0
96
+
97
+ # Layer 0
98
+ if timeout 90 node "$PROJECT_ROOT/tests/hello-world/layer0-tool-validation.js" > /tmp/hw-layer0.log 2>&1; then
99
+ echo -e "${GREEN}✅ Layer 0: PASSED${NC}"
100
+ ((HW_PASSED++))
101
+ else
102
+ echo -e "${RED}❌ Layer 0: FAILED${NC}"
103
+ ((HW_FAILED++))
104
+ fi
105
+
106
+ # Layer 5
107
+ if timeout 180 node "$PROJECT_ROOT/tests/hello-world/layer5-coordinator-spawning.js" > /tmp/hw-layer5.log 2>&1; then
108
+ echo -e "${GREEN}✅ Layer 5: PASSED${NC}"
109
+ ((HW_PASSED++))
110
+ else
111
+ echo -e "${RED}❌ Layer 5: FAILED${NC}"
112
+ ((HW_FAILED++))
113
+ fi
114
+
115
+ # Layer 6
116
+ if timeout 240 node "$PROJECT_ROOT/tests/hello-world/layer6-coordinator-review.js" > /tmp/hw-layer6.log 2>&1; then
117
+ echo -e "${GREEN}✅ Layer 6: PASSED${NC}"
118
+ ((HW_PASSED++))
119
+ else
120
+ echo -e "${RED}❌ Layer 6: FAILED${NC}"
121
+ ((HW_FAILED++))
122
+ fi
123
+
124
+ # Layer 7
125
+ if timeout 200 node "$PROJECT_ROOT/tests/hello-world/layer7-coordinator-error-retry.js" > /tmp/hw-layer7.log 2>&1; then
126
+ echo -e "${GREEN}✅ Layer 7: PASSED${NC}"
127
+ ((HW_PASSED++))
128
+ else
129
+ echo -e "${RED}❌ Layer 7: FAILED${NC}"
130
+ ((HW_FAILED++))
131
+ fi
132
+
133
+ HW_END=$(date +%s)
134
+ HW_DURATION=$((HW_END - HW_START))
135
+
136
+ echo -e "${BLUE}Hello World: ${HW_PASSED} passed, ${HW_FAILED} failed, ${HW_SKIPPED} skipped (${HW_DURATION}s)${NC}"
137
+ echo ""
138
+
139
+ TOTAL_TESTS=$((TOTAL_TESTS + HW_PASSED + HW_FAILED + HW_SKIPPED))
140
+ PASSED_TESTS=$((PASSED_TESTS + HW_PASSED))
141
+ FAILED_TESTS=$((FAILED_TESTS + HW_FAILED))
142
+ SKIPPED_TESTS=$((SKIPPED_TESTS + HW_SKIPPED))
143
+ fi
144
+
145
+ # Run CFN E2E tests
146
+ if [ "$SUITE" = "all" ] || [ "$SUITE" = "cfn-e2e" ]; then
147
+ echo -e "${BLUE}[INFO]${NC} Running CFN E2E tests..."
148
+
149
+ E2E_START=$(date +%s)
150
+
151
+ # Run E2E test and capture results
152
+ if timeout 600 bash "$PROJECT_ROOT/tests/cfn-v3/test-e2e-cfn-loop.sh" > /tmp/e2e-output.log 2>&1; then
153
+ E2E_EXIT=0
154
+ else
155
+ E2E_EXIT=$?
156
+ fi
157
+
158
+ # Parse results
159
+ E2E_PASSED=$(grep -c "PASS" /tmp/e2e-output.log || echo 0)
160
+ E2E_FAILED=$(grep -c "FAIL" /tmp/e2e-output.log || echo 0)
161
+ E2E_SKIPPED=$(grep -c "SKIPPED" /tmp/e2e-output.log || echo 0)
162
+
163
+ if [ $E2E_EXIT -eq 0 ]; then
164
+ echo -e "${GREEN}✅ CFN E2E: PASSED${NC}"
165
+ else
166
+ echo -e "${RED}❌ CFN E2E: FAILED (exit $E2E_EXIT)${NC}"
167
+ fi
168
+
169
+ E2E_END=$(date +%s)
170
+ E2E_DURATION=$((E2E_END - E2E_START))
171
+
172
+ echo -e "${BLUE}CFN E2E: ${E2E_PASSED} passed, ${E2E_FAILED} failed, ${E2E_SKIPPED} skipped (${E2E_DURATION}s)${NC}"
173
+ echo ""
174
+
175
+ TOTAL_TESTS=$((TOTAL_TESTS + E2E_PASSED + E2E_FAILED + E2E_SKIPPED))
176
+ PASSED_TESTS=$((PASSED_TESTS + E2E_PASSED))
177
+ FAILED_TESTS=$((FAILED_TESTS + E2E_FAILED))
178
+ SKIPPED_TESTS=$((SKIPPED_TESTS + E2E_SKIPPED))
179
+ fi
180
+
181
+ END_TIME=$(date +%s)
182
+ TOTAL_DURATION=$((END_TIME - START_TIME))
183
+ SUCCESS_RATE=$(awk "BEGIN {printf \"%.1f\", ($PASSED_TESTS / $TOTAL_TESTS) * 100}")
184
+
185
+ # Summary
186
+ echo -e "${GREEN}=========================================="
187
+ echo "Test Summary"
188
+ echo -e "==========================================${NC}"
189
+ echo "Total: $TOTAL_TESTS tests"
190
+ echo "Passed: $PASSED_TESTS"
191
+ echo "Failed: $FAILED_TESTS"
192
+ echo "Skipped: $SKIPPED_TESTS"
193
+ echo "Duration: ${TOTAL_DURATION}s"
194
+ echo "Success Rate: ${SUCCESS_RATE}%"
195
+ echo ""
196
+
197
+ # Store benchmarks
198
+ if [ "$BENCHMARK" = true ]; then
199
+ echo -e "${BLUE}[INFO]${NC} Storing benchmarks..."
200
+ "$SCRIPT_DIR/store-benchmarks.sh" \
201
+ --suite "$SUITE" \
202
+ --total "$TOTAL_TESTS" \
203
+ --passed "$PASSED_TESTS" \
204
+ --failed "$FAILED_TESTS" \
205
+ --skipped "$SKIPPED_TESTS" \
206
+ --duration "$TOTAL_DURATION" \
207
+ --commit "$GIT_COMMIT" \
208
+ --branch "$GIT_BRANCH"
209
+ fi
210
+
211
+ # Detect regressions
212
+ if [ "$DETECT_REGRESSIONS" = true ]; then
213
+ echo -e "${BLUE}[INFO]${NC} Detecting regressions..."
214
+ if ! "$SCRIPT_DIR/detect-regressions.sh" --threshold "$THRESHOLD"; then
215
+ echo -e "${YELLOW}⚠️ Regressions detected!${NC}"
216
+ fi
217
+ fi
218
+
219
+ # Exit with failure if tests failed
220
+ if [ $FAILED_TESTS -gt 0 ]; then
221
+ exit 1
222
+ fi
@@ -0,0 +1,55 @@
1
+ #!/bin/bash
2
+ # Store test benchmarks in SQLite
3
+ set -euo pipefail
4
+
5
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
6
+ PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
7
+ DB_FILE="$PROJECT_ROOT/.artifacts/test-benchmarks.db"
8
+
9
+ # Parse arguments
10
+ SUITE=""
11
+ TOTAL=0
12
+ PASSED=0
13
+ FAILED=0
14
+ SKIPPED=0
15
+ DURATION=0
16
+ COMMIT=""
17
+ BRANCH=""
18
+
19
+ while [[ $# -gt 0 ]]; do
20
+ case $1 in
21
+ --suite) SUITE="$2"; shift 2 ;;
22
+ --total) TOTAL="$2"; shift 2 ;;
23
+ --passed) PASSED="$2"; shift 2 ;;
24
+ --failed) FAILED="$2"; shift 2 ;;
25
+ --skipped) SKIPPED="$2"; shift 2 ;;
26
+ --duration) DURATION="$2"; shift 2 ;;
27
+ --commit) COMMIT="$2"; shift 2 ;;
28
+ --branch) BRANCH="$2"; shift 2 ;;
29
+ *) shift ;;
30
+ esac
31
+ done
32
+
33
+ SUCCESS_RATE=$(awk "BEGIN {printf \"%.4f\", ($PASSED / $TOTAL)}")
34
+
35
+ # Get or create suite ID
36
+ SUITE_ID=$(sqlite3 "$DB_FILE" "SELECT id FROM test_suites WHERE name='$SUITE'")
37
+ if [ -z "$SUITE_ID" ]; then
38
+ sqlite3 "$DB_FILE" "INSERT INTO test_suites (name) VALUES ('$SUITE')"
39
+ SUITE_ID=$(sqlite3 "$DB_FILE" "SELECT last_insert_rowid()")
40
+ fi
41
+
42
+ # Insert test run
43
+ sqlite3 "$DB_FILE" << EOFSQL
44
+ INSERT INTO test_runs (
45
+ suite_id, git_commit, git_branch,
46
+ total_tests, passed, failed, skipped,
47
+ duration_seconds, success_rate
48
+ ) VALUES (
49
+ $SUITE_ID, '$COMMIT', '$BRANCH',
50
+ $TOTAL, $PASSED, $FAILED, $SKIPPED,
51
+ $DURATION, $SUCCESS_RATE
52
+ );
53
+ EOFSQL
54
+
55
+ echo "✅ Benchmark stored (run_id: $(sqlite3 "$DB_FILE" "SELECT last_insert_rowid()"))"