telos-framework 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/behavioral-transformation-agent.md +144 -0
- package/.claude/agents/command-system-agent.md +335 -0
- package/.claude/agents/completion-gate.md +71 -0
- package/.claude/agents/component-implementation-agent.md +174 -0
- package/.claude/agents/devops-agent.md +128 -0
- package/.claude/agents/dynamic-agent-creator.md +103 -0
- package/.claude/agents/enhanced-project-manager-agent.md +145 -0
- package/.claude/agents/enhanced-quality-gate.md +54 -0
- package/.claude/agents/feature-implementation-agent.md +148 -0
- package/.claude/agents/functional-testing-agent.md +51 -0
- package/.claude/agents/hook-integration-agent.md +204 -0
- package/.claude/agents/infrastructure-implementation-agent.md +175 -0
- package/.claude/agents/lib/research-analyzer.js +470 -0
- package/.claude/agents/metrics-collection-agent.md +374 -0
- package/.claude/agents/npx-package-agent.md +246 -0
- package/.claude/agents/polish-implementation-agent.md +151 -0
- package/.claude/agents/prd-agent.md +76 -0
- package/.claude/agents/prd-mvp.md +101 -0
- package/.claude/agents/prd-research-agent.md +482 -0
- package/.claude/agents/quality-agent.md +128 -0
- package/.claude/agents/readiness-gate.md +104 -0
- package/.claude/agents/research-agent.md +173 -0
- package/.claude/agents/routing-agent.md +108 -0
- package/.claude/agents/task-checker.md +163 -0
- package/.claude/agents/task-executor.md +107 -0
- package/.claude/agents/task-orchestrator.md +343 -0
- package/.claude/agents/tdd-validation-agent.md +187 -0
- package/.claude/agents/testing-implementation-agent.md +151 -0
- package/.claude/agents/van-maintenance-agent.md +64 -0
- package/.claude/agents/workflow-agent.md +87 -0
- package/.claude/commands/autocompact.md +41 -0
- package/.claude/commands/continue-handoff.md +98 -0
- package/.claude/commands/mock.md +45 -0
- package/.claude/commands/reset-handoff.md +59 -0
- package/.claude/commands/telos/init.md +326 -0
- package/.claude/commands/telos/quick.md +90 -0
- package/.claude/commands/telos/reset.md +100 -0
- package/.claude/commands/telos/status.md +170 -0
- package/.claude/commands/telos/validate.md +143 -0
- package/.claude/commands/tm/add-dependency/add-dependency.md +55 -0
- package/.claude/commands/tm/add-subtask/add-subtask.md +76 -0
- package/.claude/commands/tm/add-subtask/convert-task-to-subtask.md +71 -0
- package/.claude/commands/tm/add-task/add-task.md +78 -0
- package/.claude/commands/tm/analyze-complexity/analyze-complexity.md +121 -0
- package/.claude/commands/tm/clear-subtasks/clear-all-subtasks.md +93 -0
- package/.claude/commands/tm/clear-subtasks/clear-subtasks.md +86 -0
- package/.claude/commands/tm/complexity-report/complexity-report.md +117 -0
- package/.claude/commands/tm/expand/expand-all-tasks.md +51 -0
- package/.claude/commands/tm/expand/expand-task.md +49 -0
- package/.claude/commands/tm/fix-dependencies/fix-dependencies.md +81 -0
- package/.claude/commands/tm/generate/generate-tasks.md +121 -0
- package/.claude/commands/tm/help.md +81 -0
- package/.claude/commands/tm/init/init-project-quick.md +46 -0
- package/.claude/commands/tm/init/init-project.md +50 -0
- package/.claude/commands/tm/learn.md +103 -0
- package/.claude/commands/tm/list/list-tasks-by-status.md +39 -0
- package/.claude/commands/tm/list/list-tasks-with-subtasks.md +29 -0
- package/.claude/commands/tm/list/list-tasks.md +43 -0
- package/.claude/commands/tm/models/setup-models.md +51 -0
- package/.claude/commands/tm/models/view-models.md +51 -0
- package/.claude/commands/tm/next/next-task.md +66 -0
- package/.claude/commands/tm/parse-prd/parse-prd-with-research.md +48 -0
- package/.claude/commands/tm/parse-prd/parse-prd.md +49 -0
- package/.claude/commands/tm/remove-dependency/remove-dependency.md +62 -0
- package/.claude/commands/tm/remove-subtask/remove-subtask.md +84 -0
- package/.claude/commands/tm/remove-task/remove-task.md +107 -0
- package/.claude/commands/tm/set-status/to-cancelled.md +55 -0
- package/.claude/commands/tm/set-status/to-deferred.md +47 -0
- package/.claude/commands/tm/set-status/to-done.md +44 -0
- package/.claude/commands/tm/set-status/to-in-progress.md +36 -0
- package/.claude/commands/tm/set-status/to-pending.md +32 -0
- package/.claude/commands/tm/set-status/to-review.md +40 -0
- package/.claude/commands/tm/setup/install-taskmaster.md +117 -0
- package/.claude/commands/tm/setup/quick-install-taskmaster.md +22 -0
- package/.claude/commands/tm/show/show-task.md +82 -0
- package/.claude/commands/tm/status/project-status.md +64 -0
- package/.claude/commands/tm/sync-readme/sync-readme.md +117 -0
- package/.claude/commands/tm/tm-main.md +146 -0
- package/.claude/commands/tm/update/update-single-task.md +119 -0
- package/.claude/commands/tm/update/update-task.md +72 -0
- package/.claude/commands/tm/update/update-tasks-from-id.md +108 -0
- package/.claude/commands/tm/utils/analyze-project.md +97 -0
- package/.claude/commands/tm/validate-dependencies/validate-dependencies.md +71 -0
- package/.claude/commands/tm/workflows/auto-implement-tasks.md +97 -0
- package/.claude/commands/tm/workflows/command-pipeline.md +77 -0
- package/.claude/commands/tm/workflows/smart-workflow.md +55 -0
- package/.claude/commands/van.md +150 -0
- package/.claude/docs/README.md +214 -0
- package/.claude/docs/TROUBLESHOOTING.md +126 -0
- package/.claude/hooks/block-destructive-commands.sh +243 -0
- package/.claude/hooks/collective-metrics.sh +291 -0
- package/.claude/hooks/directive-enforcer.sh +117 -0
- package/.claude/hooks/load-behavioral-system.sh +49 -0
- package/.claude/hooks/routing-executor.sh +4 -0
- package/.claude/hooks/test-driven-handoff.sh +653 -0
- package/.claude/settings.json +125 -0
- package/README.md +39 -15
- package/lib/commands/init.js +52 -157
- package/lib/installers/memory-files.js +77 -0
- package/lib/installers/slash-commands.js +77 -0
- package/package.json +7 -2
- package/templates/AGENTS.md +79 -0
- package/templates/CLAUDE.md +54 -0
|
@@ -0,0 +1,653 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# test-driven-handoff.sh
|
|
3
|
+
# TRUE Test-Driven Handoffs with Contract Validation
|
|
4
|
+
# Executes actual test contracts to validate agent handoffs
|
|
5
|
+
|
|
6
|
+
# Set up logging
|
|
7
|
+
LOG_FILE="/tmp/test-driven-handoff.log"
|
|
8
|
+
timestamp() { date '+%Y-%m-%d %H:%M:%S'; }
|
|
9
|
+
|
|
10
|
+
log() {
|
|
11
|
+
echo "[$(timestamp)] $1" >> "$LOG_FILE"
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
# Read JSON input from stdin and clean up only problematic escapes (keep valid JSON escapes)
|
|
15
|
+
INPUT_JSON=$(cat | sed 's/\\!/!/g')
|
|
16
|
+
|
|
17
|
+
# Parse JSON using simpler, more robust extraction
|
|
18
|
+
EVENT=""
|
|
19
|
+
SUBAGENT_NAME=""
|
|
20
|
+
TRANSCRIPT_PATH=""
|
|
21
|
+
|
|
22
|
+
# Try direct jq extraction first
|
|
23
|
+
if command -v jq >/dev/null 2>&1; then
|
|
24
|
+
EVENT=$(echo "$INPUT_JSON" | jq -r '.hook_event_name' 2>/dev/null)
|
|
25
|
+
SUBAGENT_NAME=$(echo "$INPUT_JSON" | jq -r '.tool_input.subagent_type' 2>/dev/null)
|
|
26
|
+
TRANSCRIPT_PATH=$(echo "$INPUT_JSON" | jq -r '.transcript_path' 2>/dev/null)
|
|
27
|
+
fi
|
|
28
|
+
|
|
29
|
+
# Fallback to grep/sed if jq fails or returns null
|
|
30
|
+
if [[ -z "$EVENT" || "$EVENT" == "null" ]]; then
|
|
31
|
+
EVENT=$(echo "$INPUT_JSON" | grep -o '"hook_event_name":"[^"]*"' | cut -d'"' -f4)
|
|
32
|
+
fi
|
|
33
|
+
|
|
34
|
+
if [[ -z "$SUBAGENT_NAME" || "$SUBAGENT_NAME" == "null" ]]; then
|
|
35
|
+
SUBAGENT_NAME=$(echo "$INPUT_JSON" | grep -o '"subagent_type":"[^"]*"' | cut -d'"' -f4)
|
|
36
|
+
fi
|
|
37
|
+
|
|
38
|
+
if [[ -z "$TRANSCRIPT_PATH" || "$TRANSCRIPT_PATH" == "null" ]]; then
|
|
39
|
+
TRANSCRIPT_PATH=$(echo "$INPUT_JSON" | grep -o '"transcript_path":"[^"]*"' | cut -d'"' -f4)
|
|
40
|
+
fi
|
|
41
|
+
|
|
42
|
+
# Debug logging
|
|
43
|
+
log "DEBUG: Extracted EVENT='$EVENT', SUBAGENT_NAME='$SUBAGENT_NAME'"
|
|
44
|
+
|
|
45
|
+
# Get agent output with robust fallback extraction
|
|
46
|
+
AGENT_OUTPUT=""
|
|
47
|
+
|
|
48
|
+
# Try jq first (but expect it to fail due to newlines/control chars)
|
|
49
|
+
AGENT_OUTPUT=$(echo "$INPUT_JSON" | jq -r '.tool_response.content[].text' 2>/dev/null)
|
|
50
|
+
|
|
51
|
+
# If jq fails or returns null, use Python for robust JSON extraction
|
|
52
|
+
if [[ -z "$AGENT_OUTPUT" || "$AGENT_OUTPUT" == "null" ]]; then
|
|
53
|
+
# Use Python for proper JSON parsing that handles escape sequences
|
|
54
|
+
AGENT_OUTPUT=$(echo "$INPUT_JSON" | python3 -c "
|
|
55
|
+
import json, sys
|
|
56
|
+
try:
|
|
57
|
+
data = json.load(sys.stdin)
|
|
58
|
+
content = data.get('tool_response', {}).get('content', [])
|
|
59
|
+
for item in content:
|
|
60
|
+
if 'text' in item:
|
|
61
|
+
print(item['text'])
|
|
62
|
+
break
|
|
63
|
+
except:
|
|
64
|
+
pass
|
|
65
|
+
" 2>/dev/null)
|
|
66
|
+
fi
|
|
67
|
+
|
|
68
|
+
log "DEBUG: Extracted AGENT_OUTPUT length: ${#AGENT_OUTPUT} chars"
|
|
69
|
+
|
|
70
|
+
# If no direct output and transcript available, extract from transcript
|
|
71
|
+
if [[ -z "$AGENT_OUTPUT" && -n "$TRANSCRIPT_PATH" && -f "$TRANSCRIPT_PATH" ]]; then
|
|
72
|
+
# Get the last assistant message from transcript JSONL
|
|
73
|
+
AGENT_OUTPUT=$(tail -10 "$TRANSCRIPT_PATH" | jq -r 'select(.type == "assistant") | .message.content[]? | select(type == "string")' 2>/dev/null | tail -1)
|
|
74
|
+
log "Extracted from transcript: $(echo "$AGENT_OUTPUT" | head -c 100)..."
|
|
75
|
+
fi
|
|
76
|
+
HANDOFF_TOKEN=${HANDOFF_TOKEN:-""}
|
|
77
|
+
CLAUDE_PROJECT_DIR=${CLAUDE_PROJECT_DIR:-"/mnt/h/Active/taskmaster-agent-claude-code"}
|
|
78
|
+
|
|
79
|
+
log "TRUE TEST-DRIVEN HANDOFF VALIDATION - Event: $EVENT, Agent: $SUBAGENT_NAME"
|
|
80
|
+
log "JSON INPUT: $INPUT_JSON"
|
|
81
|
+
|
|
82
|
+
# Validate handoff token format and structure
|
|
83
|
+
validate_handoff_token() {
|
|
84
|
+
local token="$1"
|
|
85
|
+
|
|
86
|
+
if [[ -z "$token" ]]; then
|
|
87
|
+
log "HANDOFF ERROR: No handoff token provided"
|
|
88
|
+
echo "❌ HANDOFF VALIDATION FAILED: Missing handoff token" >&2
|
|
89
|
+
return 1
|
|
90
|
+
fi
|
|
91
|
+
|
|
92
|
+
# Check token format (should contain agent name, timestamp, and task info)
|
|
93
|
+
if ! echo "$token" | grep -q -E "^[A-Z_]+_[0-9]{8}_[0-9]{6}$"; then
|
|
94
|
+
log "HANDOFF WARNING: Handoff token format may be non-standard: $token"
|
|
95
|
+
# Don't fail on format, just warn
|
|
96
|
+
fi
|
|
97
|
+
|
|
98
|
+
log "Handoff token validated: $token"
|
|
99
|
+
return 0
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
# Validate agent output contains required elements
|
|
103
|
+
validate_agent_output() {
|
|
104
|
+
local output="$1"
|
|
105
|
+
local agent="$2"
|
|
106
|
+
|
|
107
|
+
if [[ -z "$output" ]]; then
|
|
108
|
+
log "HANDOFF ERROR: No agent output provided for validation"
|
|
109
|
+
echo "❌ CONTRACT VALIDATION FAILED: Empty agent output" >&2
|
|
110
|
+
return 1
|
|
111
|
+
fi
|
|
112
|
+
|
|
113
|
+
# Check for implementation evidence (files created/modified)
|
|
114
|
+
local has_implementation=false
|
|
115
|
+
if echo "$output" | grep -qi -E "(created|modified|updated|wrote|edited|implemented)"; then
|
|
116
|
+
has_implementation=true
|
|
117
|
+
fi
|
|
118
|
+
|
|
119
|
+
# Check for test evidence if implementation occurred
|
|
120
|
+
if [[ "$has_implementation" == "true" ]]; then
|
|
121
|
+
if ! echo "$output" | grep -qi -E "(test|spec|coverage|validation|verify)"; then
|
|
122
|
+
log "CONTRACT WARNING: Implementation detected without test mention"
|
|
123
|
+
echo "⚠️ CONTRACT WARNING: Implementation completed without test validation" >&2
|
|
124
|
+
echo "📋 RECOMMENDATION: Include test validation for implemented changes" >&2
|
|
125
|
+
fi
|
|
126
|
+
fi
|
|
127
|
+
|
|
128
|
+
# Check for quality indicators
|
|
129
|
+
local quality_score=0
|
|
130
|
+
|
|
131
|
+
# Check for documentation
|
|
132
|
+
if echo "$output" | grep -qi -E "(document|comment|readme|doc)"; then
|
|
133
|
+
((quality_score++))
|
|
134
|
+
fi
|
|
135
|
+
|
|
136
|
+
# Check for error handling
|
|
137
|
+
if echo "$output" | grep -qi -E "(error|exception|handle|catch|validate)"; then
|
|
138
|
+
((quality_score++))
|
|
139
|
+
fi
|
|
140
|
+
|
|
141
|
+
# Check for testing
|
|
142
|
+
if echo "$output" | grep -qi -E "(test|spec|coverage|assert)"; then
|
|
143
|
+
((quality_score++))
|
|
144
|
+
fi
|
|
145
|
+
|
|
146
|
+
log "Quality score for $agent: $quality_score/3"
|
|
147
|
+
|
|
148
|
+
if [[ $quality_score -lt 1 ]]; then
|
|
149
|
+
echo "⚠️ QUALITY WARNING: Low quality handoff detected (score: $quality_score/3)" >&2
|
|
150
|
+
echo "📋 IMPROVEMENT NEEDED: Consider adding tests, documentation, or error handling" >&2
|
|
151
|
+
fi
|
|
152
|
+
|
|
153
|
+
return 0
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
# Validate state contract requirements
|
|
157
|
+
validate_state_contract() {
|
|
158
|
+
local output="$1"
|
|
159
|
+
|
|
160
|
+
# Check for critical state information
|
|
161
|
+
local state_elements=()
|
|
162
|
+
|
|
163
|
+
# Task completion status
|
|
164
|
+
if echo "$output" | grep -qi -E "(complet|finish|done|success)"; then
|
|
165
|
+
state_elements+=("completion_status")
|
|
166
|
+
fi
|
|
167
|
+
|
|
168
|
+
# File changes
|
|
169
|
+
if echo "$output" | grep -qi -E "(file|path|created|modified)"; then
|
|
170
|
+
state_elements+=("file_changes")
|
|
171
|
+
fi
|
|
172
|
+
|
|
173
|
+
# Next steps or routing
|
|
174
|
+
if echo "$output" | grep -qi -E "(next|route|handoff|continue)"; then
|
|
175
|
+
state_elements+=("next_steps")
|
|
176
|
+
fi
|
|
177
|
+
|
|
178
|
+
log "State elements found: ${state_elements[*]}"
|
|
179
|
+
|
|
180
|
+
if [[ ${#state_elements[@]} -eq 0 ]]; then
|
|
181
|
+
log "CONTRACT ERROR: No state elements found in handoff"
|
|
182
|
+
echo "❌ STATE CONTRACT FAILED: Missing required state elements" >&2
|
|
183
|
+
echo "📋 REQUIRED: Include completion status, file changes, or next steps" >&2
|
|
184
|
+
return 1
|
|
185
|
+
fi
|
|
186
|
+
|
|
187
|
+
return 0
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
# Check for test framework integration
|
|
191
|
+
validate_test_integration() {
|
|
192
|
+
local output="$1"
|
|
193
|
+
|
|
194
|
+
# Check if tests were run or mentioned
|
|
195
|
+
if echo "$output" | grep -qi -E "(jest|test.*pass|test.*fail|npm.*test|yarn.*test)"; then
|
|
196
|
+
log "Test framework integration detected"
|
|
197
|
+
echo "✅ TEST INTEGRATION: Test framework usage confirmed" >&2
|
|
198
|
+
return 0
|
|
199
|
+
fi
|
|
200
|
+
|
|
201
|
+
# Check for test files mentioned
|
|
202
|
+
if echo "$output" | grep -qi -E "(\.test\.|\.spec\.|__tests__|test/)"; then
|
|
203
|
+
log "Test files mentioned in handoff"
|
|
204
|
+
echo "✅ TEST FILES: Test file references found" >&2
|
|
205
|
+
return 0
|
|
206
|
+
fi
|
|
207
|
+
|
|
208
|
+
log "WARNING: No test framework integration detected"
|
|
209
|
+
echo "⚠️ TEST INTEGRATION WARNING: No test framework usage detected" >&2
|
|
210
|
+
return 0
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
# CHECKPOINT 1: Agent-level TDD validation with test execution
|
|
214
|
+
agent_tdd_checkpoint() {
|
|
215
|
+
local agent_name="$1"
|
|
216
|
+
local task_context="$2"
|
|
217
|
+
|
|
218
|
+
log "🧪 AGENT TDD CHECKPOINT: $agent_name"
|
|
219
|
+
|
|
220
|
+
# Quick test validation - must pass to proceed
|
|
221
|
+
# Check for dependencies first to avoid false positives
|
|
222
|
+
if [[ ! -d ".claude-collective/node_modules" ]]; then
|
|
223
|
+
log "Installing dependencies in .claude-collective/ for testing..."
|
|
224
|
+
(cd .claude-collective && npm install > /dev/null 2>&1) || log "Failed to install dependencies"
|
|
225
|
+
fi
|
|
226
|
+
|
|
227
|
+
# Run vitest from .claude-collective directory where dependencies are installed
|
|
228
|
+
log "🧪 Running vitest validation for $agent_name..."
|
|
229
|
+
|
|
230
|
+
timeout 60 bash -c "cd .claude-collective && npx vitest run" > /tmp/agent-test-$agent_name.log 2>&1
|
|
231
|
+
local exit_code=$?
|
|
232
|
+
|
|
233
|
+
# DUAL VALIDATION: Check both exit code AND output parsing
|
|
234
|
+
local has_test_failures=false
|
|
235
|
+
|
|
236
|
+
# Check if log file exists and has content
|
|
237
|
+
if [[ ! -f "/tmp/agent-test-$agent_name.log" || ! -s "/tmp/agent-test-$agent_name.log" ]]; then
|
|
238
|
+
log "❌ AGENT TDD FAILURE: $agent_name - no test output generated"
|
|
239
|
+
has_test_failures=true
|
|
240
|
+
else
|
|
241
|
+
# Parse output for test results - FIX: Better Vitest output parsing
|
|
242
|
+
# Check for explicit failures first (but exclude "0 failed" which means success)
|
|
243
|
+
if grep -iq "failed\|error\|✗\|×" "/tmp/agent-test-$agent_name.log" && ! grep -iq "0 failed" "/tmp/agent-test-$agent_name.log"; then
|
|
244
|
+
log "❌ AGENT TDD FAILURE: $agent_name - test failures detected in output"
|
|
245
|
+
has_test_failures=true
|
|
246
|
+
# FIX: Improved success detection for Vitest format
|
|
247
|
+
elif grep -iqE "✓.*test|Tests.*[0-9]+.*passed.*\([0-9]+\)|Test Files.*[0-9]+.*passed|[0-9]+ passed \([0-9]+\)" "/tmp/agent-test-$agent_name.log"; then
|
|
248
|
+
log "✅ AGENT TDD OUTPUT: $agent_name - tests show passing results"
|
|
249
|
+
# FIX: Also check for "Duration" which indicates test completion
|
|
250
|
+
elif grep -iq "Duration.*[0-9]" "/tmp/agent-test-$agent_name.log"; then
|
|
251
|
+
log "✅ AGENT TDD OUTPUT: $agent_name - test execution completed successfully"
|
|
252
|
+
else
|
|
253
|
+
log "❌ AGENT TDD FAILURE: $agent_name - no passing tests detected in output"
|
|
254
|
+
has_test_failures=true
|
|
255
|
+
fi
|
|
256
|
+
fi
|
|
257
|
+
|
|
258
|
+
# Final validation: Fail if exit code is bad OR output parsing shows failures
|
|
259
|
+
if [[ $exit_code -ne 0 ]] || [[ "$has_test_failures" == "true" ]]; then
|
|
260
|
+
log "❌ AGENT TDD FAILURE: $agent_name tests failing (exit_code=$exit_code, output_issues=$has_test_failures)"
|
|
261
|
+
|
|
262
|
+
# Extract specific test failures for actionable feedback
|
|
263
|
+
local test_failures=$(extract_test_failures "/tmp/agent-test-$agent_name.log")
|
|
264
|
+
|
|
265
|
+
echo "❌ AGENT TDD CHECKPOINT FAILED: Tests not passing for $agent_name" >&2
|
|
266
|
+
echo " 🔍 Exit Code: $exit_code" >&2
|
|
267
|
+
echo " 🔍 Output Analysis: $has_test_failures" >&2
|
|
268
|
+
echo "" >&2
|
|
269
|
+
echo "🔍 SPECIFIC TEST FAILURES IDENTIFIED:" >&2
|
|
270
|
+
echo "$test_failures" >&2
|
|
271
|
+
echo "" >&2
|
|
272
|
+
echo "📋 REMEDIATION REQUIRED: Fix the above failing tests before handoff allowed" >&2
|
|
273
|
+
echo "📄 Full test log: /tmp/agent-test-$agent_name.log" >&2
|
|
274
|
+
return 1
|
|
275
|
+
fi
|
|
276
|
+
|
|
277
|
+
# Quick build validation
|
|
278
|
+
if [[ -f "package.json" ]]; then
|
|
279
|
+
if ! timeout 30 npm run build > /tmp/agent-build-$agent_name.log 2>&1; then
|
|
280
|
+
log "❌ AGENT TDD FAILURE: $agent_name build failing"
|
|
281
|
+
echo "❌ AGENT TDD CHECKPOINT FAILED: Build not passing for $agent_name" >&2
|
|
282
|
+
echo "📋 REMEDIATION REQUIRED: Fix build errors before handoff allowed" >&2
|
|
283
|
+
echo "📄 Build log: /tmp/agent-build-$agent_name.log" >&2
|
|
284
|
+
return 1
|
|
285
|
+
fi
|
|
286
|
+
else
|
|
287
|
+
log "Build validation skipped - no package.json found"
|
|
288
|
+
fi
|
|
289
|
+
|
|
290
|
+
log "✅ AGENT TDD CHECKPOINT PASSED: $agent_name"
|
|
291
|
+
echo "✅ AGENT TDD CHECKPOINT PASSED: $agent_name tests and build successful" >&2
|
|
292
|
+
return 0
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
# Execute TDD validation using built-in validation logic
|
|
296
|
+
execute_tdd_validation() {
|
|
297
|
+
local agent_output="$1"
|
|
298
|
+
local agent_name="$2"
|
|
299
|
+
|
|
300
|
+
log "Executing TDD validation for agent: $agent_name"
|
|
301
|
+
|
|
302
|
+
# TDD Validation Criteria
|
|
303
|
+
local validation_passed=true
|
|
304
|
+
local validation_messages=()
|
|
305
|
+
|
|
306
|
+
# 1. Check for evidence of completed work
|
|
307
|
+
if ! echo "$agent_output" | grep -qi -E "(complete|done|finished|implemented|created|generated|delivered)"; then
|
|
308
|
+
validation_passed=false
|
|
309
|
+
validation_messages+=("❌ No completion evidence found")
|
|
310
|
+
log "TDD FAIL: No completion evidence"
|
|
311
|
+
else
|
|
312
|
+
validation_messages+=("✅ Work completion evidence found")
|
|
313
|
+
log "TDD PASS: Completion evidence found"
|
|
314
|
+
fi
|
|
315
|
+
|
|
316
|
+
# 2. For research agents, check for research deliverables
|
|
317
|
+
if [[ "$agent_name" == *"research"* ]]; then
|
|
318
|
+
if echo "$agent_output" | grep -qi -E "(research|analysis|findings|documentation|Context7|library)"; then
|
|
319
|
+
validation_messages+=("✅ Research deliverables validated")
|
|
320
|
+
log "TDD PASS: Research deliverables found"
|
|
321
|
+
else
|
|
322
|
+
validation_passed=false
|
|
323
|
+
validation_messages+=("❌ Missing research deliverables")
|
|
324
|
+
log "TDD FAIL: No research evidence"
|
|
325
|
+
fi
|
|
326
|
+
fi
|
|
327
|
+
|
|
328
|
+
# 3. For implementation agents, check for code/file evidence
|
|
329
|
+
if [[ "$agent_name" == *"implementation"* || "$agent_name" == *"component"* || "$agent_name" == *"feature"* ]]; then
|
|
330
|
+
if echo "$agent_output" | grep -qi -E "(file|code|component|function|test|npm|build)"; then
|
|
331
|
+
validation_messages+=("✅ Implementation deliverables validated")
|
|
332
|
+
log "TDD PASS: Implementation evidence found"
|
|
333
|
+
else
|
|
334
|
+
validation_passed=false
|
|
335
|
+
validation_messages+=("❌ Missing implementation deliverables")
|
|
336
|
+
log "TDD FAIL: No implementation evidence"
|
|
337
|
+
fi
|
|
338
|
+
fi
|
|
339
|
+
|
|
340
|
+
# 4. Check for handoff instruction clarity
|
|
341
|
+
if echo "$agent_output" | grep -qi -E "Use the [a-z-]+ (subagent|agent) to"; then
|
|
342
|
+
validation_messages+=("✅ Clear handoff instruction provided")
|
|
343
|
+
log "TDD PASS: Clear handoff instruction"
|
|
344
|
+
else
|
|
345
|
+
# Don't fail on this, just warn
|
|
346
|
+
validation_messages+=("⚠️ Handoff instruction could be clearer")
|
|
347
|
+
log "TDD WARN: Handoff instruction unclear"
|
|
348
|
+
fi
|
|
349
|
+
|
|
350
|
+
# 5. Check for quality indicators
|
|
351
|
+
local quality_score=0
|
|
352
|
+
if echo "$agent_output" | grep -qi -E "(test|validation|quality|error.handling)"; then
|
|
353
|
+
quality_score=$((quality_score + 1))
|
|
354
|
+
fi
|
|
355
|
+
if echo "$agent_output" | grep -qi -E "(documentation|readme|comment)"; then
|
|
356
|
+
quality_score=$((quality_score + 1))
|
|
357
|
+
fi
|
|
358
|
+
if echo "$agent_output" | grep -qi -E "(security|performance|accessibility)"; then
|
|
359
|
+
quality_score=$((quality_score + 1))
|
|
360
|
+
fi
|
|
361
|
+
|
|
362
|
+
if [[ $quality_score -gt 0 ]]; then
|
|
363
|
+
validation_messages+=("✅ Quality indicators present (score: $quality_score/3)")
|
|
364
|
+
log "TDD PASS: Quality score $quality_score/3"
|
|
365
|
+
fi
|
|
366
|
+
|
|
367
|
+
# Output validation results (to stderr so it doesn't interfere with stdout handoff)
|
|
368
|
+
echo "🧪 TDD VALIDATION RESULTS for $agent_name:" >&2
|
|
369
|
+
for message in "${validation_messages[@]}"; do
|
|
370
|
+
echo " $message" >&2
|
|
371
|
+
done
|
|
372
|
+
|
|
373
|
+
if [[ "$validation_passed" == "true" ]]; then
|
|
374
|
+
log "TDD validation PASSED for agent: $agent_name"
|
|
375
|
+
echo "✅ TDD VALIDATION PASSED: Agent handoff validated successfully" >&2
|
|
376
|
+
return 0
|
|
377
|
+
else
|
|
378
|
+
log "TDD validation FAILED for agent: $agent_name"
|
|
379
|
+
echo "❌ TDD VALIDATION FAILED: Agent handoff validation failed" >&2
|
|
380
|
+
return 1
|
|
381
|
+
fi
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
# CHECKPOINT 2: Orchestrator phase completion detection
|
|
385
|
+
detect_orchestrator_phase_completion() {
|
|
386
|
+
local output="$1"
|
|
387
|
+
local agent_name="$2"
|
|
388
|
+
|
|
389
|
+
# Only check for task-orchestrator agents
|
|
390
|
+
if [[ "$agent_name" != *"orchestrator"* ]]; then
|
|
391
|
+
return 1
|
|
392
|
+
fi
|
|
393
|
+
|
|
394
|
+
# Look for EXPLICIT completion markers, not deployment messages
|
|
395
|
+
# Must have "completed" AND past tense language
|
|
396
|
+
if echo "$output" | grep -qi "ORCHESTRATION STATUS.*COMPLETED" || \
|
|
397
|
+
echo "$output" | grep -qi "ALL TASKS SUCCESSFULLY COMPLETED" || \
|
|
398
|
+
echo "$output" | grep -qi "PHASE.*HAS BEEN COMPLETED"; then
|
|
399
|
+
|
|
400
|
+
# ALSO exclude if deployment language is present
|
|
401
|
+
if echo "$output" | grep -qi -E "(DEPLOYING|Active Deployments|DEPLOYMENT PLAN|about to deploy)"; then
|
|
402
|
+
log "Orchestrator is still deploying agents, not triggering validation"
|
|
403
|
+
return 1
|
|
404
|
+
fi
|
|
405
|
+
|
|
406
|
+
log "🎯 ORCHESTRATOR PHASE COMPLETION DETECTED"
|
|
407
|
+
|
|
408
|
+
# Extract phase info if available
|
|
409
|
+
local phase_info=$(echo "$output" | grep -i -o -E "(Task [0-9]+|Phase [0-9]+|[0-9]+ tasks?)" | head -1)
|
|
410
|
+
|
|
411
|
+
echo "ROUTE TO: tdd-validation-agent" >&2
|
|
412
|
+
echo "Context: Comprehensive TDD validation for phase completion: $phase_info" >&2
|
|
413
|
+
|
|
414
|
+
# Use blocking mechanism for orchestrator validation
|
|
415
|
+
cat <<EOF
|
|
416
|
+
{
|
|
417
|
+
"decision": "block",
|
|
418
|
+
"reason": "WORKFLOW AUTOMATION: Orchestrator phase completion detected. Phase: $phase_info. Execute next: Use the tdd-validation-agent subagent to perform comprehensive TDD validation for phase completion."
|
|
419
|
+
}
|
|
420
|
+
EOF
|
|
421
|
+
|
|
422
|
+
log "Phase completion validation routed to tdd-validation-agent: $phase_info"
|
|
423
|
+
return 0
|
|
424
|
+
fi
|
|
425
|
+
|
|
426
|
+
return 1
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
# Extract specific test failures with actionable information
|
|
430
|
+
extract_test_failures() {
|
|
431
|
+
local log_file="$1"
|
|
432
|
+
|
|
433
|
+
if [[ ! -f "$log_file" ]]; then
|
|
434
|
+
echo "❌ No test log available"
|
|
435
|
+
return
|
|
436
|
+
fi
|
|
437
|
+
|
|
438
|
+
# Extract failed tests and their errors from vitest output
|
|
439
|
+
local failures=""
|
|
440
|
+
local current_test=""
|
|
441
|
+
local error_lines=()
|
|
442
|
+
|
|
443
|
+
while IFS= read -r line; do
|
|
444
|
+
# Detect failed test (starts with ×)
|
|
445
|
+
if [[ "$line" =~ ^[[:space:]]*×[[:space:]](.+)[[:space:]]+[0-9]+ms$ ]]; then
|
|
446
|
+
# Process previous test if we have one
|
|
447
|
+
if [[ -n "$current_test" ]]; then
|
|
448
|
+
failures+="❌ $current_test"$'\n'
|
|
449
|
+
for error in "${error_lines[@]}"; do
|
|
450
|
+
failures+=" 🔹 $error"$'\n'
|
|
451
|
+
done
|
|
452
|
+
failures+=""$'\n'
|
|
453
|
+
fi
|
|
454
|
+
|
|
455
|
+
# Start new test
|
|
456
|
+
current_test=$(echo "$line" | sed 's/^[[:space:]]*×[[:space:]]//' | sed 's/[[:space:]]*[0-9]*ms$//')
|
|
457
|
+
error_lines=()
|
|
458
|
+
|
|
459
|
+
# Detect error messages (starts with →)
|
|
460
|
+
elif [[ "$line" =~ ^[[:space:]]*→[[:space:]](.+)$ ]]; then
|
|
461
|
+
local error_msg=$(echo "$line" | sed 's/^[[:space:]]*→[[:space:]]*//')
|
|
462
|
+
error_lines+=("$error_msg")
|
|
463
|
+
fi
|
|
464
|
+
done < "$log_file"
|
|
465
|
+
|
|
466
|
+
# Process final test if we have one
|
|
467
|
+
if [[ -n "$current_test" ]]; then
|
|
468
|
+
failures+="❌ $current_test"$'\n'
|
|
469
|
+
for error in "${error_lines[@]}"; do
|
|
470
|
+
failures+=" 🔹 $error"$'\n'
|
|
471
|
+
done
|
|
472
|
+
fi
|
|
473
|
+
|
|
474
|
+
# If no specific failures found, try to extract summary
|
|
475
|
+
if [[ -z "$failures" ]]; then
|
|
476
|
+
# Extract test summary stats
|
|
477
|
+
local summary=$(grep -E "(Failed|Error|✗|failed)" "$log_file" | head -5 | sed 's/^/❌ /')
|
|
478
|
+
if [[ -n "$summary" ]]; then
|
|
479
|
+
failures="$summary"
|
|
480
|
+
else
|
|
481
|
+
failures="❌ Tests failed but specific failures could not be parsed. Check full log."
|
|
482
|
+
fi
|
|
483
|
+
fi
|
|
484
|
+
|
|
485
|
+
echo "$failures"
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
# Detect handoff directive and extract next agent
|
|
489
|
+
detect_handoff() {
|
|
490
|
+
local output="$1"
|
|
491
|
+
|
|
492
|
+
# Normalize output (convert Unicode to ASCII, handle spacing)
|
|
493
|
+
local normalized_output=$(echo "$output" | sed 's/[–—‑−]/\-/g' | tr -s '[:space:]' ' ')
|
|
494
|
+
|
|
495
|
+
log "HANDOFF DETECTION: Normalized output (first 300 chars): $(echo "$normalized_output" | head -c 300)..."
|
|
496
|
+
|
|
497
|
+
# Pattern: "Use the <id> subagent to ..." (start-anchored, case insensitive)
|
|
498
|
+
local next_agent=$(echo "$normalized_output" | grep -i -o '^ *Use the [a-z0-9-]* subagent to' | head -1 | sed 's/^ *Use the //' | sed 's/ subagent to.*//')
|
|
499
|
+
|
|
500
|
+
# Also check mid-line patterns and anywhere in text
|
|
501
|
+
if [[ -z "$next_agent" ]]; then
|
|
502
|
+
next_agent=$(echo "$normalized_output" | grep -i -o 'Use the [a-z0-9-]* subagent to' | head -1 | sed 's/Use the //' | sed 's/ subagent to.*//')
|
|
503
|
+
fi
|
|
504
|
+
|
|
505
|
+
# Special check for end-of-text patterns (common in agent completions)
|
|
506
|
+
if [[ -z "$next_agent" ]]; then
|
|
507
|
+
next_agent=$(echo "$normalized_output" | tail -5 | grep -i -o 'Use the [a-z0-9-]* subagent to' | head -1 | sed 's/Use the //' | sed 's/ subagent to.*//')
|
|
508
|
+
fi
|
|
509
|
+
|
|
510
|
+
if [[ -n "$next_agent" ]]; then
|
|
511
|
+
log "HANDOFF FOUND: '$next_agent'"
|
|
512
|
+
echo "$next_agent"
|
|
513
|
+
return 0
|
|
514
|
+
fi
|
|
515
|
+
|
|
516
|
+
log "HANDOFF NOT FOUND: No 'Use the X subagent to' pattern detected"
|
|
517
|
+
return 1
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
# Main logic - handoff detection first, then TDD validation
|
|
521
|
+
main() {
|
|
522
|
+
log "Starting handoff detection and TDD validation"
|
|
523
|
+
log "Agent: $SUBAGENT_NAME, Event: $EVENT"
|
|
524
|
+
log "Agent output length: ${#AGENT_OUTPUT} chars"
|
|
525
|
+
|
|
526
|
+
# Only process SubagentStop events (allow PostToolUse for safety net)
|
|
527
|
+
if [[ "$EVENT" != "SubagentStop" && "$EVENT" != "PostToolUse" ]]; then
|
|
528
|
+
log "Skipping - not a SubagentStop or PostToolUse event (Event: '$EVENT')"
|
|
529
|
+
return 0
|
|
530
|
+
fi
|
|
531
|
+
|
|
532
|
+
# For PostToolUse, only process if SubagentStop wasn't already handled
|
|
533
|
+
if [[ "$EVENT" == "PostToolUse" ]]; then
|
|
534
|
+
# Check if we're dealing with a Task tool call (subagent execution)
|
|
535
|
+
local tool_name=$(echo "$INPUT_JSON" | jq -r '.tool_name' 2>/dev/null)
|
|
536
|
+
if [[ -z "$tool_name" || "$tool_name" == "null" ]]; then
|
|
537
|
+
tool_name=$(echo "$INPUT_JSON" | grep -o '"tool_name":"[^"]*"' | cut -d'"' -f4)
|
|
538
|
+
fi
|
|
539
|
+
if [[ "$tool_name" != "Task" ]]; then
|
|
540
|
+
log "PostToolUse: Not a Task tool, skipping"
|
|
541
|
+
return 0
|
|
542
|
+
fi
|
|
543
|
+
fi
|
|
544
|
+
|
|
545
|
+
# Check if agent output exists
|
|
546
|
+
if [[ -z "$AGENT_OUTPUT" ]]; then
|
|
547
|
+
log "No agent output to process"
|
|
548
|
+
return 0
|
|
549
|
+
fi
|
|
550
|
+
|
|
551
|
+
# CHECKPOINT 1: Agent-level TDD validation BEFORE handoff
|
|
552
|
+
local completion_detected=false
|
|
553
|
+
# Only trigger TDD checkpoint when agent explicitly signals handoff readiness
|
|
554
|
+
if echo "$AGENT_OUTPUT" | grep -q "COLLECTIVE_HANDOFF_READY"; then
|
|
555
|
+
completion_detected=true
|
|
556
|
+
log "Completion detected for $SUBAGENT_NAME - running TDD checkpoint"
|
|
557
|
+
|
|
558
|
+
# Run agent TDD checkpoint - BLOCKS handoff if fails (EXCEPT for tdd-validation-agent)
|
|
559
|
+
# tdd-validation-agent is ALLOWED to hand off even with failing tests (its job is to identify failures)
|
|
560
|
+
if [[ "$SUBAGENT_NAME" != "tdd-validation-agent" ]] && ! agent_tdd_checkpoint "$SUBAGENT_NAME" "$(echo "$AGENT_OUTPUT" | head -c 100)"; then
|
|
561
|
+
log "AGENT TDD CHECKPOINT FAILED: Blocking handoff for $SUBAGENT_NAME"
|
|
562
|
+
# Extract specific failure details for actionable feedback
|
|
563
|
+
local failure_summary=""
|
|
564
|
+
if [[ -f "/tmp/agent-test-$SUBAGENT_NAME.log" ]]; then
|
|
565
|
+
# Get a concise summary of failures without special JSON-breaking characters
|
|
566
|
+
local fail_count=$(grep -c "^[[:space:]]*×" "/tmp/agent-test-$SUBAGENT_NAME.log" 2>/dev/null || echo "0")
|
|
567
|
+
local failed_suites=$(grep -E "^[[:space:]]*×.*>" "/tmp/agent-test-$SUBAGENT_NAME.log" | sed 's/^[[:space:]]*×[[:space:]]*//' | sed 's/[[:space:]]*[0-9]*ms$//' | head -3 | tr '\n' '; ' | sed 's/[^a-zA-Z0-9 ;>-]//g')
|
|
568
|
+
|
|
569
|
+
if [[ "$fail_count" -gt 0 ]]; then
|
|
570
|
+
failure_summary="$fail_count failing tests including: $failed_suites"
|
|
571
|
+
else
|
|
572
|
+
failure_summary="Test execution failed - check logs for compilation or runtime errors"
|
|
573
|
+
fi
|
|
574
|
+
else
|
|
575
|
+
failure_summary="Test failures detected but log file not available"
|
|
576
|
+
fi
|
|
577
|
+
|
|
578
|
+
cat <<EOF
|
|
579
|
+
{
|
|
580
|
+
"decision": "block",
|
|
581
|
+
"reason": "TDD VALIDATION FAILED: Agent $SUBAGENT_NAME has failing tests. SPECIFIC FAILURES: $failure_summary. REQUIRED ACTION: Fix these specific test failures before handoff allowed. Detailed error log: /tmp/agent-test-$SUBAGENT_NAME.log. Execute next: Use the $SUBAGENT_NAME subagent to fix the failing tests and continue the original task."
|
|
582
|
+
}
|
|
583
|
+
EOF
|
|
584
|
+
return 1
|
|
585
|
+
fi
|
|
586
|
+
fi
|
|
587
|
+
|
|
588
|
+
# CHECKPOINT 2: Orchestrator phase completion detection
|
|
589
|
+
if detect_orchestrator_phase_completion "$AGENT_OUTPUT" "$SUBAGENT_NAME"; then
|
|
590
|
+
log "Orchestrator phase completion handled - routed to tdd-validation-agent"
|
|
591
|
+
return 0
|
|
592
|
+
fi
|
|
593
|
+
|
|
594
|
+
# STEP 1: Detect handoff directive OR inject hub return for implementation agents
|
|
595
|
+
local next_agent
|
|
596
|
+
log "Checking for handoff directive in output (first 200 chars): $(echo "$AGENT_OUTPUT" | head -c 200)..."
|
|
597
|
+
if next_agent=$(detect_handoff "$AGENT_OUTPUT"); then
|
|
598
|
+
log "✅ HANDOFF DETECTED: $SUBAGENT_NAME -> $next_agent"
|
|
599
|
+
|
|
600
|
+
# BREAKTHROUGH: Use Claude Code's BLOCK mechanism to force continuation
|
|
601
|
+
cat <<EOF
|
|
602
|
+
{
|
|
603
|
+
"decision": "block",
|
|
604
|
+
"reason": "WORKFLOW AUTOMATION: Agent handoff detected. $SUBAGENT_NAME completed and handed off to $next_agent. Execute next: Use the $next_agent subagent to continue the workflow."
|
|
605
|
+
}
|
|
606
|
+
EOF
|
|
607
|
+
|
|
608
|
+
log "✅ DECISION.md auto-delegation triggered for: $next_agent"
|
|
609
|
+
else
|
|
610
|
+
log "No handoff directive detected in agent output"
|
|
611
|
+
|
|
612
|
+
# CREATIVE SOLUTION: Response Stream Hijacking for Hub-and-Spoke Architecture
|
|
613
|
+
# If this is an implementation agent that completed work, inject hub return
|
|
614
|
+
if [[ "$SUBAGENT_NAME" == *"implementation"* ]] && echo "$AGENT_OUTPUT" | grep -qi -E "(complete|done|finished|delivered)"; then
|
|
615
|
+
log "RESPONSE HIJACK: Implementation agent completed without handoff - injecting hub return"
|
|
616
|
+
|
|
617
|
+
cat <<EOF
|
|
618
|
+
{
|
|
619
|
+
"decision": "block",
|
|
620
|
+
"reason": "WORKFLOW AUTOMATION: Implementation agent $SUBAGENT_NAME completed successfully. Hub return required. Execute next: Use the task-orchestrator subagent to coordinate the next phase - $SUBAGENT_NAME complete and validated."
|
|
621
|
+
}
|
|
622
|
+
EOF
|
|
623
|
+
|
|
624
|
+
log "Hub return auto-injected for completed implementation agent: $SUBAGENT_NAME"
|
|
625
|
+
# PROGRESSIVE DETECTION: Implementation agent stopped with incomplete work
|
|
626
|
+
elif [[ "$SUBAGENT_NAME" == *"implementation"* ]] && echo "$AGENT_OUTPUT" | grep -qi -E "([0-9]+%.*complete|in progress|refactor phase|next steps|partially completed|ready to proceed|remaining work)"; then
|
|
627
|
+
log "INCOMPLETE WORK DETECTED: Implementation agent stopped with incomplete work"
|
|
628
|
+
|
|
629
|
+
cat <<EOF
|
|
630
|
+
{
|
|
631
|
+
"decision": "block",
|
|
632
|
+
"reason": "WORKFLOW AUTOMATION: Agent $SUBAGENT_NAME stopped with incomplete work (progress update detected). Execute next: Use the $SUBAGENT_NAME subagent to complete all remaining work and provide proper completion handoff using the required template format."
|
|
633
|
+
}
|
|
634
|
+
EOF
|
|
635
|
+
|
|
636
|
+
log "Incomplete work auto-continuation triggered for: $SUBAGENT_NAME"
|
|
637
|
+
fi
|
|
638
|
+
fi
|
|
639
|
+
|
|
640
|
+
# STEP 2: Run TDD validation separately (output to stderr and logs only)
|
|
641
|
+
log "Running TDD validation for $SUBAGENT_NAME"
|
|
642
|
+
if execute_tdd_validation "$AGENT_OUTPUT" "$SUBAGENT_NAME" >&2; then
|
|
643
|
+
log "TDD validation PASSED for $SUBAGENT_NAME"
|
|
644
|
+
else
|
|
645
|
+
log "TDD validation FAILED for $SUBAGENT_NAME"
|
|
646
|
+
# Don't exit - let handoff proceed even if TDD fails
|
|
647
|
+
fi
|
|
648
|
+
|
|
649
|
+
return 0
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
# Execute main function
|
|
653
|
+
main "$@"
|