agentic-loop 3.18.2 → 3.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/tour.md +11 -7
- package/.claude/commands/vibe-help.md +5 -2
- package/.claude/commands/vibe-list.md +17 -2
- package/.claude/skills/prd/SKILL.md +21 -6
- package/.claude/skills/setup-review/SKILL.md +56 -0
- package/.claude/skills/tour/SKILL.md +11 -7
- package/.claude/skills/vibe-help/SKILL.md +2 -1
- package/.claude/skills/vibe-list/SKILL.md +5 -2
- package/.pre-commit-hooks.yaml +8 -0
- package/README.md +4 -0
- package/bin/agentic-loop.sh +7 -0
- package/bin/ralph.sh +35 -0
- package/dist/checks/check-signs-secrets.d.ts +9 -0
- package/dist/checks/check-signs-secrets.d.ts.map +1 -0
- package/dist/checks/check-signs-secrets.js +57 -0
- package/dist/checks/check-signs-secrets.js.map +1 -0
- package/dist/checks/index.d.ts +2 -5
- package/dist/checks/index.d.ts.map +1 -1
- package/dist/checks/index.js +4 -9
- package/dist/checks/index.js.map +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/package.json +2 -1
- package/ralph/hooks/common.sh +47 -0
- package/ralph/hooks/warn-debug.sh +12 -26
- package/ralph/hooks/warn-empty-catch.sh +21 -34
- package/ralph/hooks/warn-secrets.sh +39 -52
- package/ralph/hooks/warn-urls.sh +25 -45
- package/ralph/init.sh +60 -82
- package/ralph/loop.sh +533 -53
- package/ralph/prd-check.sh +177 -236
- package/ralph/prd.sh +5 -2
- package/ralph/setup/quick-setup.sh +2 -16
- package/ralph/setup.sh +68 -80
- package/ralph/signs.sh +8 -0
- package/ralph/uat.sh +2015 -0
- package/ralph/utils.sh +198 -69
- package/ralph/verify/tests.sh +65 -10
- package/templates/PROMPT.md +10 -4
- package/templates/UAT-PROMPT.md +197 -0
- package/templates/config/elixir.json +0 -2
- package/templates/config/fastmcp.json +0 -2
- package/templates/config/fullstack.json +2 -4
- package/templates/config/go.json +0 -2
- package/templates/config/minimal.json +0 -2
- package/templates/config/node.json +0 -2
- package/templates/config/python.json +0 -2
- package/templates/config/rust.json +0 -2
- package/templates/prd-example.json +6 -8
package/ralph/uat.sh
ADDED
|
@@ -0,0 +1,2015 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# shellcheck shell=bash
|
|
3
|
+
# uat.sh - UAT + Chaos Agent: Autonomous Testing Loops
|
|
4
|
+
#
|
|
5
|
+
# ============================================================================
|
|
6
|
+
# OVERVIEW
|
|
7
|
+
# ============================================================================
|
|
8
|
+
# Two commands share this file:
|
|
9
|
+
# uat — Acceptance testing team. "Does this work correctly?"
|
|
10
|
+
# chaos-agent — Chaos Agent red team. "Can we break it?"
|
|
11
|
+
#
|
|
12
|
+
# Both use Agent Teams for coordinated discovery, then strict TDD per test case:
|
|
13
|
+
# RED: Claude writes the test only (no app changes)
|
|
14
|
+
# GREEN: Claude fixes the app only (no test changes)
|
|
15
|
+
#
|
|
16
|
+
# 3-Phase Flow:
|
|
17
|
+
# Phase 1: DISCOVER + PLAN — Agent team explores app, generates plan
|
|
18
|
+
# Phase 2: EXECUTE LOOP — Per test case: RED (test) → GREEN (fix)
|
|
19
|
+
# Phase 3: REPORT — Summary of findings
|
|
20
|
+
#
|
|
21
|
+
# ============================================================================
|
|
22
|
+
# DEPENDENCIES: Requires utils.sh sourced first (get_config, print_*, etc.)
|
|
23
|
+
# ============================================================================
|
|
24
|
+
|
|
25
|
+
# UAT-specific directory variables (initialized by _init_uat_dirs)
|
|
26
|
+
UAT_MODE_DIR=""
|
|
27
|
+
UAT_PLAN_FILE=""
|
|
28
|
+
UAT_PROGRESS_FILE=""
|
|
29
|
+
UAT_FAILURE_FILE=""
|
|
30
|
+
UAT_SCREENSHOTS_DIR=""
|
|
31
|
+
UAT_MODE_LABEL=""
|
|
32
|
+
UAT_CONFIG_NS="" # config namespace: "uat" or "chaos"
|
|
33
|
+
UAT_CMD_NAME="" # CLI command name: "uat" or "chaos-agent"
|
|
34
|
+
|
|
35
|
+
# TDD phases
|
|
36
|
+
readonly UAT_PHASE_RED="RED"
|
|
37
|
+
readonly UAT_PHASE_GREEN="GREEN"
|
|
38
|
+
|
|
39
|
+
# Defaults (overridable via config)
|
|
40
|
+
readonly DEFAULT_UAT_MAX_ITERATIONS=20
|
|
41
|
+
readonly DEFAULT_UAT_MAX_SESSION_SECONDS=600
|
|
42
|
+
readonly DEFAULT_UAT_MAX_CASE_RETRIES=5
|
|
43
|
+
|
|
44
|
+
# Team mode timeouts (longer — Claude coordinates parallel agents)
|
|
45
|
+
readonly DEFAULT_UAT_SESSION_SECONDS=1800
|
|
46
|
+
readonly DEFAULT_CHAOS_SESSION_SECONDS=1800
|
|
47
|
+
|
|
48
|
+
# ============================================================================
|
|
49
|
+
# DIRECTORY INIT
|
|
50
|
+
# ============================================================================
|
|
51
|
+
|
|
52
|
+
_init_uat_dirs() {
|
|
53
|
+
local subdir="${1:-uat}"
|
|
54
|
+
local label="${2:-UAT}"
|
|
55
|
+
local cmd="${3:-$subdir}"
|
|
56
|
+
UAT_MODE_DIR="$RALPH_DIR/$subdir"
|
|
57
|
+
UAT_PLAN_FILE="$UAT_MODE_DIR/plan.json"
|
|
58
|
+
UAT_PROGRESS_FILE="$UAT_MODE_DIR/progress.txt"
|
|
59
|
+
UAT_FAILURE_FILE="$UAT_MODE_DIR/last_failure.txt"
|
|
60
|
+
UAT_SCREENSHOTS_DIR="$UAT_MODE_DIR/screenshots"
|
|
61
|
+
UAT_MODE_LABEL="$label"
|
|
62
|
+
UAT_CONFIG_NS="$subdir"
|
|
63
|
+
UAT_CMD_NAME="$cmd"
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
# ============================================================================
|
|
67
|
+
# SHARED ARG PARSING
|
|
68
|
+
# ============================================================================
|
|
69
|
+
|
|
70
|
+
# Sets: _ARG_FOCUS, _ARG_PLAN_ONLY, _ARG_FORCE_REVIEW, _ARG_NO_FIX,
|
|
71
|
+
# _ARG_MAX_ITERATIONS, _ARG_QUIET_MODE
|
|
72
|
+
_parse_uat_args() {
|
|
73
|
+
_ARG_FOCUS=""
|
|
74
|
+
_ARG_PLAN_ONLY=false
|
|
75
|
+
_ARG_FORCE_REVIEW=false
|
|
76
|
+
_ARG_NO_FIX=false
|
|
77
|
+
_ARG_MAX_ITERATIONS=""
|
|
78
|
+
_ARG_QUIET_MODE=$(get_config '.quiet' "false")
|
|
79
|
+
|
|
80
|
+
while [[ $# -gt 0 ]]; do
|
|
81
|
+
case "$1" in
|
|
82
|
+
--focus)
|
|
83
|
+
_ARG_FOCUS="$2"
|
|
84
|
+
shift 2
|
|
85
|
+
;;
|
|
86
|
+
--plan-only)
|
|
87
|
+
_ARG_PLAN_ONLY=true
|
|
88
|
+
shift
|
|
89
|
+
;;
|
|
90
|
+
--review)
|
|
91
|
+
_ARG_FORCE_REVIEW=true
|
|
92
|
+
shift
|
|
93
|
+
;;
|
|
94
|
+
--no-fix)
|
|
95
|
+
_ARG_NO_FIX=true
|
|
96
|
+
shift
|
|
97
|
+
;;
|
|
98
|
+
--max)
|
|
99
|
+
_ARG_MAX_ITERATIONS="$2"
|
|
100
|
+
shift 2
|
|
101
|
+
;;
|
|
102
|
+
--quiet)
|
|
103
|
+
_ARG_QUIET_MODE=true
|
|
104
|
+
shift
|
|
105
|
+
;;
|
|
106
|
+
*)
|
|
107
|
+
shift
|
|
108
|
+
;;
|
|
109
|
+
esac
|
|
110
|
+
done
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
# ============================================================================
|
|
114
|
+
# ENTRY POINT
|
|
115
|
+
# ============================================================================
|
|
116
|
+
|
|
117
|
+
run_uat() {
|
|
118
|
+
_parse_uat_args "$@"
|
|
119
|
+
|
|
120
|
+
local focus="$_ARG_FOCUS"
|
|
121
|
+
local plan_only="$_ARG_PLAN_ONLY"
|
|
122
|
+
local force_review="$_ARG_FORCE_REVIEW"
|
|
123
|
+
local no_fix="$_ARG_NO_FIX"
|
|
124
|
+
local max_iterations="$_ARG_MAX_ITERATIONS"
|
|
125
|
+
local quiet_mode="$_ARG_QUIET_MODE"
|
|
126
|
+
|
|
127
|
+
# Initialize directories for UAT mode
|
|
128
|
+
_init_uat_dirs "uat" "UAT"
|
|
129
|
+
|
|
130
|
+
# Validate prerequisites
|
|
131
|
+
check_dependencies
|
|
132
|
+
|
|
133
|
+
# Concurrent execution guard
|
|
134
|
+
_acquire_uat_lock
|
|
135
|
+
|
|
136
|
+
# Ensure directory structure
|
|
137
|
+
mkdir -p "$UAT_MODE_DIR" "$UAT_SCREENSHOTS_DIR"
|
|
138
|
+
|
|
139
|
+
# Banner
|
|
140
|
+
_print_uat_banner
|
|
141
|
+
|
|
142
|
+
# Phase 1: Discover + Plan
|
|
143
|
+
if [[ ! -f "$UAT_PLAN_FILE" ]] || [[ "$force_review" == "true" ]] || [[ "$plan_only" == "true" ]]; then
|
|
144
|
+
if [[ -f "$UAT_PLAN_FILE" ]] && [[ "$force_review" == "true" ]]; then
|
|
145
|
+
print_info "Re-reviewing existing plan..."
|
|
146
|
+
else
|
|
147
|
+
echo ""
|
|
148
|
+
print_info "Phase 1: Exploring your app and building a test plan"
|
|
149
|
+
echo ""
|
|
150
|
+
if ! _discover_and_plan "$quiet_mode" "uat"; then
|
|
151
|
+
print_error "Something went wrong while exploring your app. See the progress log for details."
|
|
152
|
+
return 1
|
|
153
|
+
fi
|
|
154
|
+
fi
|
|
155
|
+
|
|
156
|
+
# Review the plan
|
|
157
|
+
if ! _review_plan; then
|
|
158
|
+
print_info "Plan review cancelled. No changes were made."
|
|
159
|
+
return 0
|
|
160
|
+
fi
|
|
161
|
+
|
|
162
|
+
if [[ "$plan_only" == "true" ]]; then
|
|
163
|
+
print_success "Plan generated. Run 'npx agentic-loop uat' to execute."
|
|
164
|
+
return 0
|
|
165
|
+
fi
|
|
166
|
+
else
|
|
167
|
+
local remaining
|
|
168
|
+
remaining=$(jq '[.testCases[] | select(.passes==false)] | length' "$UAT_PLAN_FILE" 2>/dev/null || echo "0")
|
|
169
|
+
print_info "Picking up where we left off ($remaining tests still to go)"
|
|
170
|
+
fi
|
|
171
|
+
|
|
172
|
+
# Phase 2: Execute Loop
|
|
173
|
+
echo ""
|
|
174
|
+
print_info "Phase 2: Running tests and fixing issues"
|
|
175
|
+
echo ""
|
|
176
|
+
_run_uat_loop "$focus" "$no_fix" "$max_iterations" "$quiet_mode"
|
|
177
|
+
local loop_exit=$?
|
|
178
|
+
|
|
179
|
+
# Phase 3: Report
|
|
180
|
+
_print_report
|
|
181
|
+
|
|
182
|
+
return $loop_exit
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
# ============================================================================
|
|
186
|
+
# CHAOS AGENT ENTRY POINT
|
|
187
|
+
# ============================================================================
|
|
188
|
+
|
|
189
|
+
run_chaos() {
|
|
190
|
+
_parse_uat_args "$@"
|
|
191
|
+
|
|
192
|
+
local focus="$_ARG_FOCUS"
|
|
193
|
+
local plan_only="$_ARG_PLAN_ONLY"
|
|
194
|
+
local force_review="$_ARG_FORCE_REVIEW"
|
|
195
|
+
local no_fix="$_ARG_NO_FIX"
|
|
196
|
+
local max_iterations="$_ARG_MAX_ITERATIONS"
|
|
197
|
+
local quiet_mode="$_ARG_QUIET_MODE"
|
|
198
|
+
|
|
199
|
+
# Initialize directories for chaos mode
|
|
200
|
+
_init_uat_dirs "chaos" "Chaos Agent" "chaos-agent"
|
|
201
|
+
|
|
202
|
+
# Validate prerequisites
|
|
203
|
+
check_dependencies
|
|
204
|
+
|
|
205
|
+
# Concurrent execution guard
|
|
206
|
+
_acquire_uat_lock
|
|
207
|
+
|
|
208
|
+
# Ensure directory structure
|
|
209
|
+
mkdir -p "$UAT_MODE_DIR" "$UAT_SCREENSHOTS_DIR"
|
|
210
|
+
|
|
211
|
+
# Banner
|
|
212
|
+
_print_chaos_banner
|
|
213
|
+
|
|
214
|
+
# Phase 1: Adversarial Discovery + Plan
|
|
215
|
+
if [[ ! -f "$UAT_PLAN_FILE" ]] || [[ "$force_review" == "true" ]] || [[ "$plan_only" == "true" ]]; then
|
|
216
|
+
if [[ -f "$UAT_PLAN_FILE" ]] && [[ "$force_review" == "true" ]]; then
|
|
217
|
+
print_info "Re-reviewing existing plan..."
|
|
218
|
+
else
|
|
219
|
+
echo ""
|
|
220
|
+
print_info "Phase 1: Red team exploring your app for vulnerabilities"
|
|
221
|
+
echo ""
|
|
222
|
+
if ! _discover_and_plan "$quiet_mode" "chaos"; then
|
|
223
|
+
print_error "Something went wrong during red team exploration. See the progress log for details."
|
|
224
|
+
return 1
|
|
225
|
+
fi
|
|
226
|
+
fi
|
|
227
|
+
|
|
228
|
+
# Review the plan
|
|
229
|
+
if ! _review_plan; then
|
|
230
|
+
print_info "Plan review cancelled. No changes were made."
|
|
231
|
+
return 0
|
|
232
|
+
fi
|
|
233
|
+
|
|
234
|
+
if [[ "$plan_only" == "true" ]]; then
|
|
235
|
+
print_success "Plan generated. Run 'npx agentic-loop chaos-agent' to execute."
|
|
236
|
+
return 0
|
|
237
|
+
fi
|
|
238
|
+
else
|
|
239
|
+
local remaining
|
|
240
|
+
remaining=$(jq '[.testCases[] | select(.passes==false)] | length' "$UAT_PLAN_FILE" 2>/dev/null || echo "0")
|
|
241
|
+
print_info "Picking up where we left off ($remaining tests still to go)"
|
|
242
|
+
fi
|
|
243
|
+
|
|
244
|
+
# Phase 2: Testing for vulnerabilities and fixing issues
|
|
245
|
+
echo ""
|
|
246
|
+
print_info "Phase 2: Running attack tests and fixing issues"
|
|
247
|
+
echo ""
|
|
248
|
+
_run_uat_loop "$focus" "$no_fix" "$max_iterations" "$quiet_mode"
|
|
249
|
+
local loop_exit=$?
|
|
250
|
+
|
|
251
|
+
# Phase 3: Report
|
|
252
|
+
_print_report
|
|
253
|
+
|
|
254
|
+
return $loop_exit
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
# ============================================================================
|
|
258
|
+
# CONCURRENT EXECUTION GUARD
|
|
259
|
+
# ============================================================================
|
|
260
|
+
|
|
261
|
+
_acquire_uat_lock() {
|
|
262
|
+
local lockfile="$RALPH_DIR/.lock"
|
|
263
|
+
if [[ -f "$lockfile" ]]; then
|
|
264
|
+
local pid
|
|
265
|
+
pid=$(cat "$lockfile")
|
|
266
|
+
if kill -0 "$pid" 2>/dev/null; then
|
|
267
|
+
print_error "Another $UAT_MODE_LABEL session is already running. Stop it first with 'npx agentic-loop stop'."
|
|
268
|
+
exit 1
|
|
269
|
+
fi
|
|
270
|
+
rm -f "$lockfile" # Stale lock
|
|
271
|
+
fi
|
|
272
|
+
echo $$ > "$lockfile"
|
|
273
|
+
# Chain cleanup: lock removal + kill child processes
|
|
274
|
+
# This replaces the trap from ralph.sh, so we handle both concerns
|
|
275
|
+
trap '_uat_cleanup' EXIT
|
|
276
|
+
trap '_uat_interrupt' INT TERM
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
_uat_cleanup() {
|
|
280
|
+
rm -f "$RALPH_DIR/.lock"
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
_uat_interrupt() {
|
|
284
|
+
echo ""
|
|
285
|
+
print_warning "Interrupted. Wrapping up $UAT_MODE_LABEL..."
|
|
286
|
+
# Kill all child processes (Claude sessions, test runners)
|
|
287
|
+
kill 0 2>/dev/null || true
|
|
288
|
+
_uat_cleanup
|
|
289
|
+
exit 130
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
# ============================================================================
|
|
293
|
+
# PHASE 1: DISCOVER + PLAN
|
|
294
|
+
# ============================================================================
|
|
295
|
+
|
|
296
|
+
_discover_and_plan() {
|
|
297
|
+
local quiet="${1:-false}"
|
|
298
|
+
local mode="${2:-uat}"
|
|
299
|
+
local prompt_file output_file
|
|
300
|
+
prompt_file=$(create_temp_file ".uat-discover-prompt.md")
|
|
301
|
+
output_file=$(create_temp_file ".uat-discover-output.log")
|
|
302
|
+
|
|
303
|
+
local timeout
|
|
304
|
+
if [[ "$mode" == "chaos" ]]; then
|
|
305
|
+
timeout=$(get_config '.chaos.sessionSeconds' "$DEFAULT_CHAOS_SESSION_SECONDS")
|
|
306
|
+
_build_chaos_agent_prompt "$prompt_file"
|
|
307
|
+
_log_uat "DISCOVER" "Starting Chaos Agent discovery (timeout: ${timeout}s)"
|
|
308
|
+
else
|
|
309
|
+
timeout=$(get_config '.uat.sessionSeconds' "$DEFAULT_UAT_SESSION_SECONDS")
|
|
310
|
+
_build_uat_team_prompt "$prompt_file"
|
|
311
|
+
_log_uat "DISCOVER" "Starting UAT team discovery (timeout: ${timeout}s)"
|
|
312
|
+
fi
|
|
313
|
+
|
|
314
|
+
# Run Claude with MCP exploration
|
|
315
|
+
local claude_exit=0
|
|
316
|
+
(
|
|
317
|
+
set -o pipefail
|
|
318
|
+
cat "$prompt_file" | run_with_timeout "$timeout" claude -p \
|
|
319
|
+
--dangerously-skip-permissions \
|
|
320
|
+
--verbose \
|
|
321
|
+
--output-format stream-json \
|
|
322
|
+
2>&1 | tee "$output_file" | _parse_uat_activity "$quiet"
|
|
323
|
+
) &
|
|
324
|
+
local pipeline_pid=$!
|
|
325
|
+
wait "$pipeline_pid" || claude_exit=$?
|
|
326
|
+
|
|
327
|
+
if [[ $claude_exit -ne 0 ]]; then
|
|
328
|
+
_log_uat "DISCOVER" "Claude session failed (exit $claude_exit)"
|
|
329
|
+
print_error "App exploration session failed"
|
|
330
|
+
if [[ -f "$output_file" ]]; then
|
|
331
|
+
echo " Last output:"
|
|
332
|
+
tail -10 "$output_file" | sed 's/^/ /'
|
|
333
|
+
fi
|
|
334
|
+
return 1
|
|
335
|
+
fi
|
|
336
|
+
|
|
337
|
+
# Validate plan was generated
|
|
338
|
+
if [[ ! -f "$UAT_PLAN_FILE" ]]; then
|
|
339
|
+
print_error "No test plan was created"
|
|
340
|
+
echo ""
|
|
341
|
+
echo " The exploration finished but didn't produce a plan."
|
|
342
|
+
echo " Check the output above for what went wrong."
|
|
343
|
+
return 1
|
|
344
|
+
fi
|
|
345
|
+
|
|
346
|
+
if ! _validate_plan; then
|
|
347
|
+
print_error "The generated plan has errors and can't be used"
|
|
348
|
+
return 1
|
|
349
|
+
fi
|
|
350
|
+
|
|
351
|
+
# Check if project-specific prompt was generated
|
|
352
|
+
if [[ ! -f "$UAT_MODE_DIR/UAT-PROMPT.md" ]]; then
|
|
353
|
+
print_warning "No project-specific test instructions were created."
|
|
354
|
+
echo " Tests will use generic patterns instead."
|
|
355
|
+
echo " For better results, re-run with 'npx agentic-loop $UAT_CMD_NAME --plan-only'."
|
|
356
|
+
fi
|
|
357
|
+
|
|
358
|
+
# Mark plan as generated
|
|
359
|
+
update_json "$UAT_PLAN_FILE" '.testSuite.status = "planned"'
|
|
360
|
+
|
|
361
|
+
local case_count
|
|
362
|
+
case_count=$(jq '.testCases | length' "$UAT_PLAN_FILE")
|
|
363
|
+
_log_uat "DISCOVER" "Plan generated with $case_count test cases"
|
|
364
|
+
print_success "Plan generated: $case_count test cases"
|
|
365
|
+
|
|
366
|
+
return 0
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
_build_uat_team_prompt() {
|
|
370
|
+
local prompt_file="$1"
|
|
371
|
+
|
|
372
|
+
# Start with UAT prompt template
|
|
373
|
+
cat "$RALPH_TEMPLATES/UAT-PROMPT.md" > "$prompt_file"
|
|
374
|
+
|
|
375
|
+
cat >> "$prompt_file" << 'PROMPT_SECTION'
|
|
376
|
+
|
|
377
|
+
---
|
|
378
|
+
|
|
379
|
+
## Phase: UAT Team Discovery + Plan Generation
|
|
380
|
+
|
|
381
|
+
You are the **team lead** of an acceptance testing team. Your job is to coordinate a team of
|
|
382
|
+
agents that explore a live app, verify features work correctly, and produce a comprehensive
|
|
383
|
+
UAT plan.
|
|
384
|
+
|
|
385
|
+
### Step 1: Recon (~60 seconds)
|
|
386
|
+
|
|
387
|
+
Before spawning anyone, do a quick recon yourself:
|
|
388
|
+
|
|
389
|
+
1. **Read `.ralph/config.json`** for URLs, auth config, and directories
|
|
390
|
+
2. **Read `.ralph/prd.json`** if it exists — completed stories tell you what was built
|
|
391
|
+
3. **Navigate the app** using Playwright MCP — click through nav, find pages, note the tech stack
|
|
392
|
+
4. **Take 2-3 screenshots** of key pages (save to `.ralph/uat/screenshots/`)
|
|
393
|
+
5. **Map the feature areas** — what exists? (auth, forms, API, navigation, etc.)
|
|
394
|
+
|
|
395
|
+
Don't go deep. Just map what's there. ~60 seconds max.
|
|
396
|
+
|
|
397
|
+
### Step 2: Assemble the UAT Team
|
|
398
|
+
|
|
399
|
+
Create a team and spawn teammates:
|
|
400
|
+
|
|
401
|
+
```
|
|
402
|
+
TeamCreate: "uat-team"
|
|
403
|
+
```
|
|
404
|
+
|
|
405
|
+
Spawn these teammates using the Task tool with `team_name: "uat-team"`:
|
|
406
|
+
|
|
407
|
+
1. **"recon"** (`subagent_type: "general-purpose"`) — Deep recon. Maps all routes/endpoints,
|
|
408
|
+
catalogs forms with selectors, identifies tech stack and auth. Shares intel with teammates
|
|
409
|
+
via SendMessage.
|
|
410
|
+
|
|
411
|
+
2. **"happy-path-{area}"** (`subagent_type: "general-purpose"`) — One per feature area.
|
|
412
|
+
Completes primary user journeys, records correct behavior as ground truth assertions
|
|
413
|
+
(exact text, redirects, success messages).
|
|
414
|
+
|
|
415
|
+
3. **"edge-cases"** (`subagent_type: "general-purpose"`) — Tests boundary conditions across
|
|
416
|
+
all areas. Empty fields, long input, required-field validation, back button after submit,
|
|
417
|
+
refresh mid-flow. Focus: does the app handle these gracefully?
|
|
418
|
+
|
|
419
|
+
**Only spawn agents for areas that exist.** If there are no forms, don't spawn a forms specialist.
|
|
420
|
+
If there's no auth, skip auth testing.
|
|
421
|
+
|
|
422
|
+
Mindset: **"Verify the app works correctly for real users."**
|
|
423
|
+
|
|
424
|
+
### Agent Instructions Template
|
|
425
|
+
|
|
426
|
+
Every agent prompt MUST include:
|
|
427
|
+
|
|
428
|
+
1. **Their role and focus area** (from above)
|
|
429
|
+
2. **The recon intel** — pages, URLs, tech stack you discovered in Step 1
|
|
430
|
+
3. **Browser tab isolation** — "Open your own browser tab via `browser_tabs(action: 'new')`
|
|
431
|
+
before navigating. Do NOT use the existing tab."
|
|
432
|
+
4. **Communication** — "Share important discoveries with teammates via SendMessage.
|
|
433
|
+
Examples: 'Login redirects to /dashboard after success', 'Registration form has 4 required fields',
|
|
434
|
+
'Profile page shows user email and name'. Read messages from teammates and adapt your testing."
|
|
435
|
+
5. **Output format** — "When done, send your findings to the team lead via SendMessage.
|
|
436
|
+
Format each finding as a test case with: title, category, testFile path, targetFiles,
|
|
437
|
+
assertions (input/expected/strategy), and edgeCases."
|
|
438
|
+
|
|
439
|
+
### Step 3: Coordinate
|
|
440
|
+
|
|
441
|
+
While your team works:
|
|
442
|
+
|
|
443
|
+
- **Monitor messages** from teammates as they report findings
|
|
444
|
+
- **Redirect effort** if needed — if recon discovers something important, message the
|
|
445
|
+
relevant specialist
|
|
446
|
+
- **Create tasks** in the shared task list for any new areas discovered
|
|
447
|
+
|
|
448
|
+
### Step 4: Collect + Merge + Write Plan
|
|
449
|
+
|
|
450
|
+
After all teammates finish:
|
|
451
|
+
|
|
452
|
+
1. Collect findings from all agent messages
|
|
453
|
+
2. Dedup by test file path (keep the case with more assertions)
|
|
454
|
+
3. Assign sequential IDs: `UAT-001`, `UAT-002`, ...
|
|
455
|
+
4. Write `.ralph/uat/plan.json` (schema below)
|
|
456
|
+
5. Write `.ralph/uat/UAT-PROMPT.md` (schema below)
|
|
457
|
+
6. Shut down all teammates via SendMessage with `type: "shutdown_request"`
|
|
458
|
+
7. Clean up with TeamDelete
|
|
459
|
+
|
|
460
|
+
### plan.json Schema
|
|
461
|
+
|
|
462
|
+
Write `.ralph/uat/plan.json`:
|
|
463
|
+
|
|
464
|
+
```json
|
|
465
|
+
{
|
|
466
|
+
"testSuite": {
|
|
467
|
+
"name": "UAT Loop",
|
|
468
|
+
"generatedAt": "<ISO timestamp>",
|
|
469
|
+
"status": "pending",
|
|
470
|
+
"discoveryMethod": "uat-team"
|
|
471
|
+
},
|
|
472
|
+
"testCases": [
|
|
473
|
+
{
|
|
474
|
+
"id": "UAT-001",
|
|
475
|
+
"title": "Feature area — what the test checks",
|
|
476
|
+
"category": "auth|forms|navigation|api|ui|data",
|
|
477
|
+
"type": "e2e|integration",
|
|
478
|
+
"userStory": "As a user, I...",
|
|
479
|
+
"testApproach": "What to test and how",
|
|
480
|
+
"testFile": "tests/e2e/feature/test-name.spec.ts",
|
|
481
|
+
"targetFiles": ["src/pages/feature.tsx"],
|
|
482
|
+
"edgeCases": ["Edge case 1", "Edge case 2"],
|
|
483
|
+
"assertions": [
|
|
484
|
+
{
|
|
485
|
+
"input": "Fill name='John', submit form",
|
|
486
|
+
"expected": "Shows 'Welcome, John'",
|
|
487
|
+
"strategy": "keyword"
|
|
488
|
+
}
|
|
489
|
+
],
|
|
490
|
+
"passes": false,
|
|
491
|
+
"retryCount": 0,
|
|
492
|
+
"source": "uat-team:agent-name"
|
|
493
|
+
}
|
|
494
|
+
]
|
|
495
|
+
}
|
|
496
|
+
```
|
|
497
|
+
|
|
498
|
+
**Every test case MUST have at least 3 assertions** with concrete input/expected pairs:
|
|
499
|
+
1. One happy-path assertion (correct input → correct output)
|
|
500
|
+
2. One edge-case assertion (bad input → proper error handling)
|
|
501
|
+
3. One content assertion (page shows the RIGHT data, not just that it loads)
|
|
502
|
+
|
|
503
|
+
### UAT-PROMPT.md Schema
|
|
504
|
+
|
|
505
|
+
Write `.ralph/uat/UAT-PROMPT.md` — a project-specific testing guide based on what the
|
|
506
|
+
team ACTUALLY FOUND. Include:
|
|
507
|
+
|
|
508
|
+
```markdown
|
|
509
|
+
# UAT Guide — [Project Name]
|
|
510
|
+
|
|
511
|
+
## App Overview
|
|
512
|
+
- What the app does (1-2 sentences)
|
|
513
|
+
- Tech stack observed (framework, API patterns, auth method)
|
|
514
|
+
- Base URLs (frontend, API if applicable)
|
|
515
|
+
|
|
516
|
+
## Pages & Routes Discovered
|
|
517
|
+
For each page:
|
|
518
|
+
- URL pattern and what it shows
|
|
519
|
+
- Key interactive elements (forms, buttons, links)
|
|
520
|
+
- Selectors that work (data-testid, roles, labels)
|
|
521
|
+
|
|
522
|
+
## Auth Flow
|
|
523
|
+
- How login works (form fields, redirect after login)
|
|
524
|
+
- Test credentials if available (from config or .env)
|
|
525
|
+
- What pages require auth vs. public
|
|
526
|
+
|
|
527
|
+
## Known Forms & Inputs
|
|
528
|
+
For each form:
|
|
529
|
+
- Fields with their labels/names/selectors
|
|
530
|
+
- Required vs optional fields
|
|
531
|
+
- Validation behavior observed
|
|
532
|
+
|
|
533
|
+
## What "Correct" Looks Like
|
|
534
|
+
For each feature area:
|
|
535
|
+
- Expected behavior observed
|
|
536
|
+
- Specific text/numbers that should appear
|
|
537
|
+
|
|
538
|
+
## Console & Network Observations
|
|
539
|
+
- Any existing console errors/warnings
|
|
540
|
+
- API endpoints observed
|
|
541
|
+
- Response patterns (JSON structure, status codes)
|
|
542
|
+
```
|
|
543
|
+
|
|
544
|
+
This is NOT a copy of the template — it's ground truth from the team's exploration.
|
|
545
|
+
|
|
546
|
+
### Rules
|
|
547
|
+
|
|
548
|
+
- Test auth flows FIRST (they gate everything else)
|
|
549
|
+
- One test case per feature area (not per edge case)
|
|
550
|
+
- Include edge cases as a list within each test case
|
|
551
|
+
- **Every test case MUST have assertions with input/expected pairs**
|
|
552
|
+
- `type: "e2e"` for anything involving browser interaction
|
|
553
|
+
- `type: "integration"` for API-only tests
|
|
554
|
+
- `targetFiles` should list the app source files the test covers
|
|
555
|
+
- `testFile` path should use the project's test directory conventions
|
|
556
|
+
- Aim for 5-15 test cases depending on app complexity
|
|
557
|
+
- Always clean up: shutdown teammates and delete team when done
|
|
558
|
+
PROMPT_SECTION
|
|
559
|
+
|
|
560
|
+
_inject_prompt_context "$prompt_file"
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
_validate_plan() {
|
|
564
|
+
# Check JSON is valid
|
|
565
|
+
if ! jq -e '.' "$UAT_PLAN_FILE" >/dev/null 2>&1; then
|
|
566
|
+
print_error "Test plan file is corrupted (not valid JSON)"
|
|
567
|
+
return 1
|
|
568
|
+
fi
|
|
569
|
+
|
|
570
|
+
# Check required structure
|
|
571
|
+
if ! jq -e '.testSuite and .testCases' "$UAT_PLAN_FILE" >/dev/null 2>&1; then
|
|
572
|
+
print_error "Test plan is incomplete — missing required sections"
|
|
573
|
+
return 1
|
|
574
|
+
fi
|
|
575
|
+
|
|
576
|
+
# Check test cases have required fields
|
|
577
|
+
local invalid_cases
|
|
578
|
+
invalid_cases=$(jq '[.testCases[] | select(.id == null or .title == null or .testFile == null)] | length' "$UAT_PLAN_FILE" 2>/dev/null)
|
|
579
|
+
if [[ "$invalid_cases" -gt 0 ]]; then
|
|
580
|
+
print_error "$invalid_cases test case(s) are incomplete — each needs an ID, title, and test file"
|
|
581
|
+
return 1
|
|
582
|
+
fi
|
|
583
|
+
|
|
584
|
+
# Check test cases have assertions (the eval contract)
|
|
585
|
+
local missing_assertions
|
|
586
|
+
missing_assertions=$(jq '[.testCases[] | select((.assertions // []) | length < 1)] | length' "$UAT_PLAN_FILE" 2>/dev/null)
|
|
587
|
+
if [[ "$missing_assertions" -gt 0 ]]; then
|
|
588
|
+
print_warning "$missing_assertions test case(s) have no expected results defined — tests may not catch real issues"
|
|
589
|
+
echo " Each test case should describe what to check (input and expected outcome)."
|
|
590
|
+
echo " Run 'npx agentic-loop $UAT_CMD_NAME --review' to edit the plan and add them."
|
|
591
|
+
# Warning only, not a hard failure — Claude may add assertions during execution
|
|
592
|
+
fi
|
|
593
|
+
|
|
594
|
+
return 0
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
# ============================================================================
|
|
598
|
+
# PLAN REVIEW
|
|
599
|
+
# ============================================================================
|
|
600
|
+
|
|
601
|
+
_review_plan() {
|
|
602
|
+
echo ""
|
|
603
|
+
echo " ┌──────────────────────────────────────────────────────┐"
|
|
604
|
+
printf " │ %-54s│\n" "$UAT_MODE_LABEL Test Plan"
|
|
605
|
+
echo " └──────────────────────────────────────────────────────┘"
|
|
606
|
+
echo ""
|
|
607
|
+
|
|
608
|
+
local total_cases
|
|
609
|
+
total_cases=$(jq '.testCases | length' "$UAT_PLAN_FILE")
|
|
610
|
+
|
|
611
|
+
# Print summary table
|
|
612
|
+
local idx=0
|
|
613
|
+
while IFS=$'\t' read -r id title category tc_type edge_count assert_count; do
|
|
614
|
+
idx=$((idx + 1))
|
|
615
|
+
local type_icon=""
|
|
616
|
+
case "$tc_type" in
|
|
617
|
+
e2e) type_icon="🌐" ;;
|
|
618
|
+
integration) type_icon="🔌" ;;
|
|
619
|
+
*) type_icon="📝" ;;
|
|
620
|
+
esac
|
|
621
|
+
|
|
622
|
+
# Truncate title
|
|
623
|
+
local display_title="$title"
|
|
624
|
+
[[ ${#display_title} -gt 40 ]] && display_title="${display_title:0:37}..."
|
|
625
|
+
|
|
626
|
+
printf " %s %-10s %-40s [%s edge cases, %s checks]\n" "$type_icon" "$id" "$display_title" "$edge_count" "$assert_count"
|
|
627
|
+
done < <(jq -r '.testCases[] | [.id, .title, .category, .type, (.edgeCases | length | tostring), ((.assertions // []) | length | tostring)] | @tsv' "$UAT_PLAN_FILE" 2>/dev/null)
|
|
628
|
+
|
|
629
|
+
echo ""
|
|
630
|
+
echo " Total: $total_cases test cases"
|
|
631
|
+
echo ""
|
|
632
|
+
|
|
633
|
+
# Prompt for review
|
|
634
|
+
local response
|
|
635
|
+
read -r -p " Execute this plan? [Y/n/e(dit)] " response
|
|
636
|
+
|
|
637
|
+
case "$response" in
|
|
638
|
+
[Nn])
|
|
639
|
+
return 1
|
|
640
|
+
;;
|
|
641
|
+
[Ee])
|
|
642
|
+
local editor="${EDITOR:-vi}"
|
|
643
|
+
"$editor" "$UAT_PLAN_FILE"
|
|
644
|
+
# Re-validate after edit
|
|
645
|
+
if ! _validate_plan; then
|
|
646
|
+
print_error "Your edits made the plan invalid. Please fix and try again."
|
|
647
|
+
return 1
|
|
648
|
+
fi
|
|
649
|
+
# Mark as reviewed
|
|
650
|
+
update_json "$UAT_PLAN_FILE" \
|
|
651
|
+
--arg ts "$(date -Iseconds 2>/dev/null || date +%Y-%m-%dT%H:%M:%S)" \
|
|
652
|
+
'.testSuite.reviewedAt = $ts'
|
|
653
|
+
;;
|
|
654
|
+
*)
|
|
655
|
+
# Mark as reviewed
|
|
656
|
+
update_json "$UAT_PLAN_FILE" \
|
|
657
|
+
--arg ts "$(date -Iseconds 2>/dev/null || date +%Y-%m-%dT%H:%M:%S)" \
|
|
658
|
+
'.testSuite.reviewedAt = $ts'
|
|
659
|
+
;;
|
|
660
|
+
esac
|
|
661
|
+
|
|
662
|
+
return 0
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
# ============================================================================
|
|
666
|
+
# PHASE 2: EXECUTE LOOP
|
|
667
|
+
# ============================================================================
|
|
668
|
+
|
|
669
|
+
_run_uat_loop() {
|
|
670
|
+
local focus="$1"
|
|
671
|
+
local no_fix="$2"
|
|
672
|
+
local max_iterations_arg="$3"
|
|
673
|
+
local quiet="$4"
|
|
674
|
+
|
|
675
|
+
local max_iterations
|
|
676
|
+
max_iterations="${max_iterations_arg:-$(get_config ".$UAT_CONFIG_NS.maxIterations" "$DEFAULT_UAT_MAX_ITERATIONS")}"
|
|
677
|
+
local max_case_retries
|
|
678
|
+
max_case_retries=$(get_config ".$UAT_CONFIG_NS.maxCaseRetries" "$DEFAULT_UAT_MAX_CASE_RETRIES")
|
|
679
|
+
local timeout
|
|
680
|
+
timeout=$(get_config ".$UAT_CONFIG_NS.maxSessionSeconds" "$DEFAULT_UAT_MAX_SESSION_SECONDS")
|
|
681
|
+
|
|
682
|
+
local iteration=0
|
|
683
|
+
|
|
684
|
+
# Track results for report
|
|
685
|
+
UAT_TESTS_WRITTEN=0
|
|
686
|
+
UAT_BUGS_FOUND=0
|
|
687
|
+
UAT_BUGS_FIXED=0
|
|
688
|
+
UAT_CASES_PASSED=0
|
|
689
|
+
UAT_CASES_FAILED=0
|
|
690
|
+
UAT_CASES_SKIPPED=0
|
|
691
|
+
UAT_RED_ONLY_PASSED=0
|
|
692
|
+
UAT_GREEN_ATTEMPTS=0
|
|
693
|
+
UAT_FILES_FIXED=()
|
|
694
|
+
UAT_NEEDS_HUMAN=()
|
|
695
|
+
|
|
696
|
+
while [[ $iteration -lt $max_iterations ]]; do
|
|
697
|
+
# Check for stop signal
|
|
698
|
+
if [[ -f "$RALPH_DIR/.stop" ]]; then
|
|
699
|
+
rm -f "$RALPH_DIR/.stop"
|
|
700
|
+
print_warning "Stop requested. Finishing up..."
|
|
701
|
+
break
|
|
702
|
+
fi
|
|
703
|
+
|
|
704
|
+
iteration=$((iteration + 1))
|
|
705
|
+
|
|
706
|
+
# Pick next incomplete test case (with optional focus filter)
|
|
707
|
+
local case_id
|
|
708
|
+
if [[ -n "$focus" ]]; then
|
|
709
|
+
# Focus can be a case ID (UAT-003) or category (auth)
|
|
710
|
+
case_id=$(jq -r --arg f "$focus" '
|
|
711
|
+
.testCases[] |
|
|
712
|
+
select(.passes==false) |
|
|
713
|
+
select(.id==$f or .category==$f) |
|
|
714
|
+
.id
|
|
715
|
+
' "$UAT_PLAN_FILE" | head -1)
|
|
716
|
+
else
|
|
717
|
+
case_id=$(jq -r '.testCases[] | select(.passes==false) | .id' "$UAT_PLAN_FILE" | head -1)
|
|
718
|
+
fi
|
|
719
|
+
|
|
720
|
+
# All done?
|
|
721
|
+
if [[ -z "$case_id" ]]; then
|
|
722
|
+
break
|
|
723
|
+
fi
|
|
724
|
+
|
|
725
|
+
# Get case details
|
|
726
|
+
local case_json case_title case_type
|
|
727
|
+
case_json=$(jq --arg id "$case_id" '.testCases[] | select(.id==$id)' "$UAT_PLAN_FILE")
|
|
728
|
+
case_title=$(echo "$case_json" | jq -r '.title')
|
|
729
|
+
case_type=$(echo "$case_json" | jq -r '.type // "e2e"')
|
|
730
|
+
|
|
731
|
+
# Read TDD phase state (null = start RED, "red" = resume GREEN)
|
|
732
|
+
local phase
|
|
733
|
+
phase=$(echo "$case_json" | jq -r '.phase // "null"')
|
|
734
|
+
|
|
735
|
+
# Compute per-phase retry counts (default 0 for old plan.json files)
|
|
736
|
+
local red_retries green_retries
|
|
737
|
+
red_retries=$(echo "$case_json" | jq -r '.redRetries // 0')
|
|
738
|
+
green_retries=$(echo "$case_json" | jq -r '.greenRetries // 0')
|
|
739
|
+
|
|
740
|
+
# Circuit breaker: combined red + green retries
|
|
741
|
+
local total_retries=$((red_retries + green_retries))
|
|
742
|
+
if [[ $total_retries -ge $max_case_retries ]]; then
|
|
743
|
+
print_warning "$case_id tried $max_case_retries times without success — skipping (needs manual review)"
|
|
744
|
+
_flag_for_human "$case_id" "Tried $max_case_retries times without success"
|
|
745
|
+
UAT_CASES_SKIPPED=$((UAT_CASES_SKIPPED + 1))
|
|
746
|
+
update_json "$UAT_PLAN_FILE" \
|
|
747
|
+
--arg id "$case_id" '(.testCases[] | select(.id==$id)) |= . + {passes: true, skipped: true}'
|
|
748
|
+
continue
|
|
749
|
+
fi
|
|
750
|
+
|
|
751
|
+
# Determine current phase
|
|
752
|
+
local current_phase="$UAT_PHASE_RED"
|
|
753
|
+
if [[ "$phase" == "red" ]]; then
|
|
754
|
+
current_phase="$UAT_PHASE_GREEN"
|
|
755
|
+
fi
|
|
756
|
+
|
|
757
|
+
# Display case banner with phase
|
|
758
|
+
local display_title="$case_title"
|
|
759
|
+
[[ ${#display_title} -gt 50 ]] && display_title="${display_title:0:47}..."
|
|
760
|
+
|
|
761
|
+
echo ""
|
|
762
|
+
echo "┌──────────────────────────────────────────────────────────┐"
|
|
763
|
+
printf "│ %-10s %-45s│\n" "$case_id" "$display_title"
|
|
764
|
+
local phase_label="Writing test"
|
|
765
|
+
[[ "$current_phase" == "$UAT_PHASE_GREEN" ]] && phase_label="Fixing app"
|
|
766
|
+
printf "│ %-14s Type: %-6s Attempt: %-3s │\n" "$phase_label" "$case_type" "$((total_retries + 1))"
|
|
767
|
+
echo "└──────────────────────────────────────────────────────────┘"
|
|
768
|
+
echo ""
|
|
769
|
+
|
|
770
|
+
# Git snapshot for rollback
|
|
771
|
+
_git_snapshot "$case_id"
|
|
772
|
+
|
|
773
|
+
local test_file
|
|
774
|
+
test_file=$(jq -r --arg id "$case_id" '.testCases[] | select(.id==$id) | .testFile' "$UAT_PLAN_FILE")
|
|
775
|
+
|
|
776
|
+
if [[ "$current_phase" == "$UAT_PHASE_RED" ]]; then
|
|
777
|
+
_run_red_phase "$case_id" "$case_type" "$test_file" "$no_fix" "$timeout" "$quiet"
|
|
778
|
+
else
|
|
779
|
+
_run_green_phase "$case_id" "$case_type" "$test_file" "$timeout" "$quiet"
|
|
780
|
+
fi
|
|
781
|
+
|
|
782
|
+
# Brief pause between iterations
|
|
783
|
+
sleep 1
|
|
784
|
+
done
|
|
785
|
+
|
|
786
|
+
# Update suite status
|
|
787
|
+
local all_passed
|
|
788
|
+
all_passed=$(jq '[.testCases[] | select(.passes==false)] | length' "$UAT_PLAN_FILE" 2>/dev/null)
|
|
789
|
+
if [[ "$all_passed" -eq 0 ]]; then
|
|
790
|
+
update_json "$UAT_PLAN_FILE" '.testSuite.status = "complete"'
|
|
791
|
+
else
|
|
792
|
+
update_json "$UAT_PLAN_FILE" '.testSuite.status = "partial"'
|
|
793
|
+
fi
|
|
794
|
+
|
|
795
|
+
[[ "$all_passed" -eq 0 ]] && return 0
|
|
796
|
+
return 1
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
# ============================================================================
|
|
800
|
+
# TDD PHASES: RED (test-only) and GREEN (fix-only)
|
|
801
|
+
# ============================================================================
|
|
802
|
+
|
|
803
|
+
_run_red_phase() {
|
|
804
|
+
local case_id="$1"
|
|
805
|
+
local case_type="$2"
|
|
806
|
+
local test_file="$3"
|
|
807
|
+
local no_fix="$4"
|
|
808
|
+
local timeout="$5"
|
|
809
|
+
local quiet="$6"
|
|
810
|
+
|
|
811
|
+
local prompt_file output_file
|
|
812
|
+
prompt_file=$(create_temp_file ".uat-red-prompt.md")
|
|
813
|
+
output_file=$(create_temp_file ".uat-red-output.log")
|
|
814
|
+
|
|
815
|
+
_build_red_prompt "$case_id" "$prompt_file"
|
|
816
|
+
|
|
817
|
+
_log_uat "$case_id" "RED: Starting test-only session"
|
|
818
|
+
|
|
819
|
+
local claude_exit=0
|
|
820
|
+
(
|
|
821
|
+
set -o pipefail
|
|
822
|
+
cat "$prompt_file" | run_with_timeout "$timeout" claude -p \
|
|
823
|
+
--dangerously-skip-permissions \
|
|
824
|
+
--verbose \
|
|
825
|
+
--output-format stream-json \
|
|
826
|
+
2>&1 | tee "$output_file" | _parse_uat_activity "$quiet"
|
|
827
|
+
) &
|
|
828
|
+
local pipeline_pid=$!
|
|
829
|
+
wait "$pipeline_pid" || claude_exit=$?
|
|
830
|
+
|
|
831
|
+
rm -f "$prompt_file"
|
|
832
|
+
|
|
833
|
+
if [[ $claude_exit -ne 0 ]] && [[ $claude_exit -ne 124 ]]; then
|
|
834
|
+
print_warning "Test-writing session ended unexpectedly — will retry"
|
|
835
|
+
_log_uat "$case_id" "RED: Session failed (exit $claude_exit)"
|
|
836
|
+
_increment_red_retry "$case_id"
|
|
837
|
+
rm -f "$output_file"
|
|
838
|
+
return
|
|
839
|
+
fi
|
|
840
|
+
|
|
841
|
+
# Check if test file was created
|
|
842
|
+
if [[ ! -f "$test_file" ]]; then
|
|
843
|
+
print_warning "$case_id: Test file was not created — will retry"
|
|
844
|
+
_log_uat "$case_id" "RED: Test file not created"
|
|
845
|
+
_increment_red_retry "$case_id"
|
|
846
|
+
rm -f "$output_file"
|
|
847
|
+
return
|
|
848
|
+
fi
|
|
849
|
+
|
|
850
|
+
# Enforce RED constraint: no app changes allowed
|
|
851
|
+
if _has_app_changes "$test_file"; then
|
|
852
|
+
print_warning "$case_id: App code was changed during test-writing (not allowed) — undoing changes"
|
|
853
|
+
_log_uat "$case_id" "RED: App changes detected — rollback"
|
|
854
|
+
_rollback_to_snapshot "$case_id"
|
|
855
|
+
_save_red_violation_feedback "$case_id"
|
|
856
|
+
_increment_red_retry "$case_id"
|
|
857
|
+
rm -f "$output_file"
|
|
858
|
+
return
|
|
859
|
+
fi
|
|
860
|
+
|
|
861
|
+
UAT_TESTS_WRITTEN=$((UAT_TESTS_WRITTEN + 1))
|
|
862
|
+
|
|
863
|
+
# Validate test quality — reject shallow tests
|
|
864
|
+
if ! _validate_test_quality "$test_file" "$case_id"; then
|
|
865
|
+
print_warning "$case_id: Test doesn't check enough — will retry with better guidance"
|
|
866
|
+
_save_shallow_test_feedback "$case_id" "$test_file"
|
|
867
|
+
_increment_red_retry "$case_id"
|
|
868
|
+
rm -f "$output_file"
|
|
869
|
+
return
|
|
870
|
+
fi
|
|
871
|
+
|
|
872
|
+
# Run the test
|
|
873
|
+
if _run_test "$test_file" "$case_type"; then
|
|
874
|
+
# PASS in RED — app already correct, no fix needed
|
|
875
|
+
print_success "$case_id: Test passes — app already works correctly"
|
|
876
|
+
_mark_passed "$case_id"
|
|
877
|
+
_commit_result "$case_id" "$test_file"
|
|
878
|
+
UAT_CASES_PASSED=$((UAT_CASES_PASSED + 1))
|
|
879
|
+
UAT_RED_ONLY_PASSED=$((UAT_RED_ONLY_PASSED + 1))
|
|
880
|
+
_log_uat "$case_id" "RED: PASSED (app already correct)"
|
|
881
|
+
else
|
|
882
|
+
# FAIL — classify: test bug or app bug?
|
|
883
|
+
local failure_type
|
|
884
|
+
failure_type=$(_classify_red_failure "$test_file" "$case_id")
|
|
885
|
+
|
|
886
|
+
if [[ "$failure_type" == "test_bug" ]]; then
|
|
887
|
+
print_warning "$case_id: Test has errors — will retry"
|
|
888
|
+
_save_failure_context "$case_id" "$output_file"
|
|
889
|
+
_increment_red_retry "$case_id"
|
|
890
|
+
else
|
|
891
|
+
# App bug found — commit the RED test, transition to GREEN
|
|
892
|
+
print_info "$case_id: Found an app bug — now fixing it"
|
|
893
|
+
UAT_BUGS_FOUND=$((UAT_BUGS_FOUND + 1))
|
|
894
|
+
|
|
895
|
+
if [[ "$no_fix" == "true" ]]; then
|
|
896
|
+
# --no-fix mode: commit failing test as documented bug
|
|
897
|
+
print_info "$case_id: Saving test as a documented bug (fix skipped with --no-fix)"
|
|
898
|
+
_commit_red_test "$case_id" "$test_file"
|
|
899
|
+
_mark_passed "$case_id"
|
|
900
|
+
UAT_CASES_PASSED=$((UAT_CASES_PASSED + 1))
|
|
901
|
+
_log_uat "$case_id" "RED: Documented bug (--no-fix mode)"
|
|
902
|
+
else
|
|
903
|
+
# Commit the RED test and transition to GREEN
|
|
904
|
+
_commit_red_test "$case_id" "$test_file"
|
|
905
|
+
_mark_phase "$case_id" "red"
|
|
906
|
+
_save_failure_context "$case_id" "$output_file"
|
|
907
|
+
_log_uat "$case_id" "RED: App bug found — transitioning to GREEN"
|
|
908
|
+
fi
|
|
909
|
+
fi
|
|
910
|
+
fi
|
|
911
|
+
|
|
912
|
+
rm -f "$output_file"
|
|
913
|
+
}
|
|
914
|
+
|
|
915
|
+
_run_green_phase() {
|
|
916
|
+
local case_id="$1"
|
|
917
|
+
local case_type="$2"
|
|
918
|
+
local test_file="$3"
|
|
919
|
+
local timeout="$4"
|
|
920
|
+
local quiet="$5"
|
|
921
|
+
|
|
922
|
+
UAT_GREEN_ATTEMPTS=$((UAT_GREEN_ATTEMPTS + 1))
|
|
923
|
+
|
|
924
|
+
local prompt_file output_file
|
|
925
|
+
prompt_file=$(create_temp_file ".uat-green-prompt.md")
|
|
926
|
+
output_file=$(create_temp_file ".uat-green-output.log")
|
|
927
|
+
|
|
928
|
+
_build_green_prompt "$case_id" "$test_file" "$prompt_file"
|
|
929
|
+
|
|
930
|
+
_log_uat "$case_id" "GREEN: Starting fix-only session"
|
|
931
|
+
|
|
932
|
+
local claude_exit=0
|
|
933
|
+
(
|
|
934
|
+
set -o pipefail
|
|
935
|
+
cat "$prompt_file" | run_with_timeout "$timeout" claude -p \
|
|
936
|
+
--dangerously-skip-permissions \
|
|
937
|
+
--verbose \
|
|
938
|
+
--output-format stream-json \
|
|
939
|
+
2>&1 | tee "$output_file" | _parse_uat_activity "$quiet"
|
|
940
|
+
) &
|
|
941
|
+
local pipeline_pid=$!
|
|
942
|
+
wait "$pipeline_pid" || claude_exit=$?
|
|
943
|
+
|
|
944
|
+
rm -f "$prompt_file"
|
|
945
|
+
|
|
946
|
+
if [[ $claude_exit -ne 0 ]] && [[ $claude_exit -ne 124 ]]; then
|
|
947
|
+
print_warning "Fix session ended unexpectedly — will retry"
|
|
948
|
+
_log_uat "$case_id" "GREEN: Session failed (exit $claude_exit)"
|
|
949
|
+
_increment_green_retry "$case_id"
|
|
950
|
+
rm -f "$output_file"
|
|
951
|
+
return
|
|
952
|
+
fi
|
|
953
|
+
|
|
954
|
+
# Enforce GREEN constraint: no test file modifications
|
|
955
|
+
if _test_file_modified "$test_file"; then
|
|
956
|
+
print_warning "$case_id: Test file was changed during fix (not allowed) — restoring original"
|
|
957
|
+
_restore_test_file "$test_file" "$case_id"
|
|
958
|
+
_log_uat "$case_id" "GREEN: Test file restored after modification"
|
|
959
|
+
fi
|
|
960
|
+
|
|
961
|
+
# Run the test
|
|
962
|
+
if _run_test "$test_file" "$case_type"; then
|
|
963
|
+
# PASS — check for regressions before committing
|
|
964
|
+
if _check_regressions; then
|
|
965
|
+
print_success "$case_id: Fixed! Test passes and nothing else broke"
|
|
966
|
+
_mark_passed "$case_id"
|
|
967
|
+
_track_fixed_files "$case_id"
|
|
968
|
+
UAT_BUGS_FIXED=$((UAT_BUGS_FIXED + 1))
|
|
969
|
+
_commit_result "$case_id" "$test_file"
|
|
970
|
+
UAT_CASES_PASSED=$((UAT_CASES_PASSED + 1))
|
|
971
|
+
_log_uat "$case_id" "GREEN: PASSED"
|
|
972
|
+
else
|
|
973
|
+
# Regression detected — rollback
|
|
974
|
+
print_error "$case_id: Fix broke other tests — undoing the change"
|
|
975
|
+
_rollback_to_snapshot "$case_id"
|
|
976
|
+
_flag_for_human "$case_id" "Fix broke other tests"
|
|
977
|
+
_increment_green_retry "$case_id"
|
|
978
|
+
_log_uat "$case_id" "GREEN: ROLLBACK — fix caused regression"
|
|
979
|
+
fi
|
|
980
|
+
else
|
|
981
|
+
# FAIL — retry GREEN
|
|
982
|
+
print_warning "$case_id: Fix didn't work — test still fails, will retry"
|
|
983
|
+
_save_failure_context "$case_id" "$output_file"
|
|
984
|
+
_increment_green_retry "$case_id"
|
|
985
|
+
fi
|
|
986
|
+
|
|
987
|
+
rm -f "$output_file"
|
|
988
|
+
}
|
|
989
|
+
|
|
990
|
+
# ============================================================================
|
|
991
|
+
# TEST EXECUTION
|
|
992
|
+
# ============================================================================
|
|
993
|
+
|
|
994
|
+
_run_test() {
|
|
995
|
+
local test_file="$1"
|
|
996
|
+
local test_type="$2"
|
|
997
|
+
local log_file
|
|
998
|
+
log_file=$(create_temp_file ".uat-test.log")
|
|
999
|
+
|
|
1000
|
+
local test_cmd=""
|
|
1001
|
+
|
|
1002
|
+
if [[ "$test_type" == "e2e" ]]; then
|
|
1003
|
+
# Playwright
|
|
1004
|
+
if [[ -f "playwright.config.ts" ]] || [[ -f "playwright.config.js" ]]; then
|
|
1005
|
+
test_cmd="npx playwright test $test_file"
|
|
1006
|
+
else
|
|
1007
|
+
test_cmd="npx playwright test $test_file --config=playwright.config.ts"
|
|
1008
|
+
fi
|
|
1009
|
+
else
|
|
1010
|
+
# Integration — detect test runner
|
|
1011
|
+
if [[ -f "vitest.config.ts" ]] || [[ -f "vitest.config.js" ]] || [[ -f "vite.config.ts" ]]; then
|
|
1012
|
+
test_cmd="npx vitest run $test_file"
|
|
1013
|
+
elif [[ -f "jest.config.ts" ]] || [[ -f "jest.config.js" ]] || grep -q '"jest"' package.json 2>/dev/null; then
|
|
1014
|
+
test_cmd="npx jest $test_file"
|
|
1015
|
+
elif [[ -f "pytest.ini" ]] || [[ -f "pyproject.toml" ]]; then
|
|
1016
|
+
local py_runner
|
|
1017
|
+
py_runner=$(detect_python_runner ".")
|
|
1018
|
+
test_cmd="${py_runner}${py_runner:+ }pytest $test_file -v"
|
|
1019
|
+
else
|
|
1020
|
+
test_cmd="npx vitest run $test_file"
|
|
1021
|
+
fi
|
|
1022
|
+
fi
|
|
1023
|
+
|
|
1024
|
+
echo " Running: $test_cmd"
|
|
1025
|
+
|
|
1026
|
+
if safe_exec "$test_cmd" "$log_file"; then
|
|
1027
|
+
rm -f "$log_file"
|
|
1028
|
+
return 0
|
|
1029
|
+
else
|
|
1030
|
+
echo ""
|
|
1031
|
+
echo " Test output (last 30 lines):"
|
|
1032
|
+
tail -30 "$log_file" | sed 's/^/ /'
|
|
1033
|
+
cp "$log_file" "$UAT_MODE_DIR/last_test_output.log"
|
|
1034
|
+
rm -f "$log_file"
|
|
1035
|
+
return 1
|
|
1036
|
+
fi
|
|
1037
|
+
}
|
|
1038
|
+
|
|
1039
|
+
# ============================================================================
|
|
1040
|
+
# TEST QUALITY VALIDATION
|
|
1041
|
+
# ============================================================================
|
|
1042
|
+
|
|
1043
|
+
# Reject tests that only check structure (page loads) without verifying content.
|
|
1044
|
+
# A test that asserts "page has URL /dashboard" proves nothing about correctness.
|
|
1045
|
+
# A test that asserts "page shows 'Welcome, John'" proves the right data rendered.
|
|
1046
|
+
_validate_test_quality() {
|
|
1047
|
+
local test_file="$1"
|
|
1048
|
+
local case_id="$2"
|
|
1049
|
+
|
|
1050
|
+
# Count total assertion calls
|
|
1051
|
+
local assertion_count
|
|
1052
|
+
assertion_count=$(grep -cE 'expect\(|assert\(|\.should\(' "$test_file" 2>/dev/null || true)
|
|
1053
|
+
|
|
1054
|
+
if [[ "$assertion_count" -lt 2 ]]; then
|
|
1055
|
+
_log_uat "$case_id" "SHALLOW: only $assertion_count assertion(s)"
|
|
1056
|
+
return 1
|
|
1057
|
+
fi
|
|
1058
|
+
|
|
1059
|
+
# Count content assertions — these verify the RIGHT data, not just structure
|
|
1060
|
+
# Includes: toContain, toHaveText, toBe, toEqual, toMatch, textContent, innerText
|
|
1061
|
+
local content_assertions
|
|
1062
|
+
content_assertions=$(grep -cE 'toContain\(|toHaveText\(|toBe\(|toEqual\(|toMatch\(|textContent|innerText|toHaveValue\(' "$test_file" 2>/dev/null || true)
|
|
1063
|
+
|
|
1064
|
+
if [[ "$content_assertions" -eq 0 ]]; then
|
|
1065
|
+
_log_uat "$case_id" "SHALLOW: no content assertions (only structural checks)"
|
|
1066
|
+
return 1
|
|
1067
|
+
fi
|
|
1068
|
+
|
|
1069
|
+
# Check for input→output test pattern: test fills data and checks the result
|
|
1070
|
+
# Look for fill/type followed by expect — proves the test verifies a response to input
|
|
1071
|
+
local has_input_output=false
|
|
1072
|
+
if grep -qE 'fill\(|type\(|press\(|click\(' "$test_file" 2>/dev/null; then
|
|
1073
|
+
if grep -qE 'toContain\(|toHaveText\(|toBe\(|toEqual\(|toMatch\(' "$test_file" 2>/dev/null; then
|
|
1074
|
+
has_input_output=true
|
|
1075
|
+
fi
|
|
1076
|
+
fi
|
|
1077
|
+
|
|
1078
|
+
# For e2e tests, require at least one input→output pattern
|
|
1079
|
+
if [[ "$has_input_output" == "false" ]]; then
|
|
1080
|
+
# Check if it's an API/integration test (no browser interaction expected)
|
|
1081
|
+
if grep -qE 'page\.|browser\.|playwright' "$test_file" 2>/dev/null; then
|
|
1082
|
+
_log_uat "$case_id" "SHALLOW: e2e test has no input→output assertions"
|
|
1083
|
+
return 1
|
|
1084
|
+
fi
|
|
1085
|
+
fi
|
|
1086
|
+
|
|
1087
|
+
_log_uat "$case_id" "Quality OK: $assertion_count assertions ($content_assertions content)"
|
|
1088
|
+
return 0
|
|
1089
|
+
}
|
|
1090
|
+
|
|
1091
|
+
# Save feedback about shallow tests so Claude gets specific guidance on retry
|
|
1092
|
+
_save_shallow_test_feedback() {
|
|
1093
|
+
local case_id="$1"
|
|
1094
|
+
local test_file="$2"
|
|
1095
|
+
|
|
1096
|
+
local assertion_count content_assertions
|
|
1097
|
+
assertion_count=$(grep -cE 'expect\(|assert\(|\.should\(' "$test_file" 2>/dev/null || true)
|
|
1098
|
+
content_assertions=$(grep -cE 'toContain\(|toHaveText\(|toBe\(|toEqual\(|toMatch\(|textContent|innerText|toHaveValue\(' "$test_file" 2>/dev/null || true)
|
|
1099
|
+
|
|
1100
|
+
{
|
|
1101
|
+
echo ""
|
|
1102
|
+
echo "=== Test quality check failed for $case_id ==="
|
|
1103
|
+
echo ""
|
|
1104
|
+
echo "Your test is too shallow. It checks structure but not correctness."
|
|
1105
|
+
echo ""
|
|
1106
|
+
echo "Stats: $assertion_count total assertions, $content_assertions content assertions"
|
|
1107
|
+
echo ""
|
|
1108
|
+
echo "What's wrong:"
|
|
1109
|
+
if [[ "$assertion_count" -lt 2 ]]; then
|
|
1110
|
+
echo " - Only $assertion_count assertion(s). Every test needs at least 2."
|
|
1111
|
+
fi
|
|
1112
|
+
if [[ "$content_assertions" -eq 0 ]]; then
|
|
1113
|
+
echo " - ZERO content assertions. You're only checking that pages load,"
|
|
1114
|
+
echo " not that they show the RIGHT content."
|
|
1115
|
+
echo ""
|
|
1116
|
+
echo " Bad: await expect(page).toHaveURL('/dashboard');"
|
|
1117
|
+
echo " Good: await expect(page.getByText('Welcome, John')).toBeVisible();"
|
|
1118
|
+
echo ""
|
|
1119
|
+
echo " Bad: await expect(form).toBeVisible();"
|
|
1120
|
+
echo " Good: await expect(page.getByText('Email is required')).toBeVisible();"
|
|
1121
|
+
fi
|
|
1122
|
+
echo ""
|
|
1123
|
+
echo "Fix: Read the assertions in .ralph/$UAT_CONFIG_NS/plan.json for this test case."
|
|
1124
|
+
echo "Each assertion has an 'input' and 'expected' — encode THOSE as expect() calls."
|
|
1125
|
+
echo "---"
|
|
1126
|
+
} >> "$UAT_FAILURE_FILE"
|
|
1127
|
+
}
|
|
1128
|
+
|
|
1129
|
+
# ============================================================================
|
|
1130
|
+
# FAILURE HANDLING
|
|
1131
|
+
# ============================================================================
|
|
1132
|
+
|
|
1133
|
+
_save_failure_context() {
|
|
1134
|
+
local case_id="$1"
|
|
1135
|
+
local output_file="$2"
|
|
1136
|
+
|
|
1137
|
+
local retry_count
|
|
1138
|
+
retry_count=$(jq -r --arg id "$case_id" '.testCases[] | select(.id==$id) | .retryCount // 0' "$UAT_PLAN_FILE")
|
|
1139
|
+
|
|
1140
|
+
{
|
|
1141
|
+
echo ""
|
|
1142
|
+
echo "=== Attempt $((retry_count + 1)) failed for $case_id ==="
|
|
1143
|
+
echo ""
|
|
1144
|
+
if [[ -f "$UAT_MODE_DIR/last_test_output.log" ]]; then
|
|
1145
|
+
echo "--- Test Output ---"
|
|
1146
|
+
tail -50 "$UAT_MODE_DIR/last_test_output.log"
|
|
1147
|
+
echo ""
|
|
1148
|
+
fi
|
|
1149
|
+
echo "---"
|
|
1150
|
+
} >> "$UAT_FAILURE_FILE"
|
|
1151
|
+
|
|
1152
|
+
# Cap at 200 lines
|
|
1153
|
+
if [[ -f "$UAT_FAILURE_FILE" ]]; then
|
|
1154
|
+
local line_count
|
|
1155
|
+
line_count=$(wc -l < "$UAT_FAILURE_FILE" | tr -d ' ')
|
|
1156
|
+
if [[ $line_count -gt 200 ]]; then
|
|
1157
|
+
tail -200 "$UAT_FAILURE_FILE" > "$UAT_FAILURE_FILE.tmp" && mv "$UAT_FAILURE_FILE.tmp" "$UAT_FAILURE_FILE"
|
|
1158
|
+
fi
|
|
1159
|
+
fi
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
_increment_red_retry() {
|
|
1163
|
+
local case_id="$1"
|
|
1164
|
+
update_json "$UAT_PLAN_FILE" \
|
|
1165
|
+
--arg id "$case_id" \
|
|
1166
|
+
'(.testCases[] | select(.id==$id)) |= . + {
|
|
1167
|
+
redRetries: ((.redRetries // 0) + 1),
|
|
1168
|
+
retryCount: ((.redRetries // 0) + 1 + (.greenRetries // 0))
|
|
1169
|
+
}'
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1172
|
+
_increment_green_retry() {
|
|
1173
|
+
local case_id="$1"
|
|
1174
|
+
update_json "$UAT_PLAN_FILE" \
|
|
1175
|
+
--arg id "$case_id" \
|
|
1176
|
+
'(.testCases[] | select(.id==$id)) |= . + {
|
|
1177
|
+
greenRetries: ((.greenRetries // 0) + 1),
|
|
1178
|
+
retryCount: ((.redRetries // 0) + (.greenRetries // 0) + 1)
|
|
1179
|
+
}'
|
|
1180
|
+
}
|
|
1181
|
+
|
|
1182
|
+
_mark_phase() {
|
|
1183
|
+
local case_id="$1"
|
|
1184
|
+
local phase="$2" # "red" or null
|
|
1185
|
+
if [[ "$phase" == "null" ]]; then
|
|
1186
|
+
update_json "$UAT_PLAN_FILE" \
|
|
1187
|
+
--arg id "$case_id" \
|
|
1188
|
+
'(.testCases[] | select(.id==$id)) |= . + {phase: null}'
|
|
1189
|
+
else
|
|
1190
|
+
update_json "$UAT_PLAN_FILE" \
|
|
1191
|
+
--arg id "$case_id" \
|
|
1192
|
+
--arg phase "$phase" \
|
|
1193
|
+
'(.testCases[] | select(.id==$id)) |= . + {phase: $phase}'
|
|
1194
|
+
fi
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
_mark_passed() {
|
|
1198
|
+
local case_id="$1"
|
|
1199
|
+
update_json "$UAT_PLAN_FILE" \
|
|
1200
|
+
--arg id "$case_id" \
|
|
1201
|
+
'(.testCases[] | select(.id==$id)) |= . + {passes: true, retryCount: 0, phase: null, redRetries: 0, greenRetries: 0}'
|
|
1202
|
+
# Clear failure context for this case
|
|
1203
|
+
rm -f "$UAT_FAILURE_FILE"
|
|
1204
|
+
}
|
|
1205
|
+
|
|
1206
|
+
_commit_red_test() {
|
|
1207
|
+
local case_id="$1"
|
|
1208
|
+
local test_file="$2"
|
|
1209
|
+
|
|
1210
|
+
if ! command -v git &>/dev/null || [[ ! -d ".git" ]]; then
|
|
1211
|
+
return 0
|
|
1212
|
+
fi
|
|
1213
|
+
|
|
1214
|
+
git add "$test_file" 2>/dev/null || true
|
|
1215
|
+
|
|
1216
|
+
if git diff --cached --quiet 2>/dev/null; then
|
|
1217
|
+
return 0
|
|
1218
|
+
fi
|
|
1219
|
+
|
|
1220
|
+
local commit_log
|
|
1221
|
+
commit_log=$(mktemp)
|
|
1222
|
+
local success=false
|
|
1223
|
+
|
|
1224
|
+
for attempt in 1 2 3; do
|
|
1225
|
+
if git commit -m "test($case_id): TDD red -- failing test identifies bug" > "$commit_log" 2>&1; then
|
|
1226
|
+
success=true
|
|
1227
|
+
break
|
|
1228
|
+
fi
|
|
1229
|
+
if grep -q "files were modified by this hook" "$commit_log" 2>/dev/null; then
|
|
1230
|
+
git add "$test_file"
|
|
1231
|
+
continue
|
|
1232
|
+
fi
|
|
1233
|
+
break
|
|
1234
|
+
done
|
|
1235
|
+
|
|
1236
|
+
if [[ "$success" != "true" ]]; then
|
|
1237
|
+
git add "$test_file"
|
|
1238
|
+
git commit -m "test($case_id): TDD red -- failing test identifies bug" --no-verify > "$commit_log" 2>&1 || true
|
|
1239
|
+
fi
|
|
1240
|
+
|
|
1241
|
+
rm -f "$commit_log"
|
|
1242
|
+
}
|
|
1243
|
+
|
|
1244
|
+
_classify_red_failure() {
|
|
1245
|
+
local test_file="$1"
|
|
1246
|
+
local case_id="$2"
|
|
1247
|
+
|
|
1248
|
+
# Check last test output for test-bug patterns (syntax/import errors)
|
|
1249
|
+
local test_output="$UAT_MODE_DIR/last_test_output.log"
|
|
1250
|
+
if [[ -f "$test_output" ]]; then
|
|
1251
|
+
# Syntax errors, import failures, module not found = test bug
|
|
1252
|
+
if grep -qiE 'SyntaxError|Cannot find module|ModuleNotFoundError|ImportError|TypeError:.*is not a function|ReferenceError:.*is not defined|unexpected token' "$test_output" 2>/dev/null; then
|
|
1253
|
+
_log_uat "$case_id" "RED classify: test_bug (syntax/import error)"
|
|
1254
|
+
echo "test_bug"
|
|
1255
|
+
return
|
|
1256
|
+
fi
|
|
1257
|
+
fi
|
|
1258
|
+
|
|
1259
|
+
# Assertion failures, timeout waiting for element = app bug (test is correct, app is wrong)
|
|
1260
|
+
_log_uat "$case_id" "RED classify: app_bug (assertion failure)"
|
|
1261
|
+
echo "app_bug"
|
|
1262
|
+
}
|
|
1263
|
+
|
|
1264
|
+
_test_file_modified() {
|
|
1265
|
+
local test_file="$1"
|
|
1266
|
+
if command -v git &>/dev/null && [[ -d ".git" ]]; then
|
|
1267
|
+
# Check if test file has uncommitted changes (modified since last commit)
|
|
1268
|
+
! git diff --quiet HEAD -- "$test_file" 2>/dev/null
|
|
1269
|
+
else
|
|
1270
|
+
return 1
|
|
1271
|
+
fi
|
|
1272
|
+
}
|
|
1273
|
+
|
|
1274
|
+
_restore_test_file() {
|
|
1275
|
+
local test_file="$1"
|
|
1276
|
+
local case_id="${2:-GREEN}"
|
|
1277
|
+
if command -v git &>/dev/null && [[ -d ".git" ]]; then
|
|
1278
|
+
git checkout HEAD -- "$test_file" 2>/dev/null || true
|
|
1279
|
+
_log_uat "$case_id" "GREEN: Restored test file: $test_file"
|
|
1280
|
+
fi
|
|
1281
|
+
}
|
|
1282
|
+
|
|
1283
|
+
_save_red_violation_feedback() {
|
|
1284
|
+
local case_id="$1"
|
|
1285
|
+
{
|
|
1286
|
+
echo ""
|
|
1287
|
+
echo "=== RED PHASE VIOLATION for $case_id ==="
|
|
1288
|
+
echo ""
|
|
1289
|
+
echo "You modified application source files during the RED phase."
|
|
1290
|
+
echo "In the RED phase, you must ONLY write the test file."
|
|
1291
|
+
echo ""
|
|
1292
|
+
echo "DO NOT modify any files in src/, api/, app/, lib/, or similar directories."
|
|
1293
|
+
echo "Write ONLY the test file specified in plan.json."
|
|
1294
|
+
echo ""
|
|
1295
|
+
echo "If the app has a bug, let the test FAIL. A separate GREEN session will fix the app."
|
|
1296
|
+
echo "---"
|
|
1297
|
+
} >> "$UAT_FAILURE_FILE"
|
|
1298
|
+
}
|
|
1299
|
+
|
|
1300
|
+
_flag_for_human() {
|
|
1301
|
+
local case_id="$1"
|
|
1302
|
+
local reason="$2"
|
|
1303
|
+
UAT_NEEDS_HUMAN+=("$case_id: $reason")
|
|
1304
|
+
_log_uat "$case_id" "NEEDS_HUMAN: $reason"
|
|
1305
|
+
}
|
|
1306
|
+
|
|
1307
|
+
# ============================================================================
|
|
1308
|
+
# GIT OPERATIONS
|
|
1309
|
+
# ============================================================================
|
|
1310
|
+
|
|
1311
|
+
_git_snapshot() {
|
|
1312
|
+
local case_id="$1"
|
|
1313
|
+
if command -v git &>/dev/null && [[ -d ".git" ]]; then
|
|
1314
|
+
# Commit any pending changes so the tag captures a clean state
|
|
1315
|
+
# (tags point at commits, not the working tree)
|
|
1316
|
+
if ! git diff --quiet HEAD 2>/dev/null || ! git diff --cached --quiet 2>/dev/null; then
|
|
1317
|
+
git add -A 2>/dev/null || true
|
|
1318
|
+
git commit -m "$UAT_CONFIG_NS: snapshot before $case_id" --no-verify 2>/dev/null || true
|
|
1319
|
+
fi
|
|
1320
|
+
git tag -f "uat-snapshot-${case_id}" 2>/dev/null || true
|
|
1321
|
+
fi
|
|
1322
|
+
}
|
|
1323
|
+
|
|
1324
|
+
_rollback_to_snapshot() {
|
|
1325
|
+
local case_id="$1"
|
|
1326
|
+
if command -v git &>/dev/null && [[ -d ".git" ]]; then
|
|
1327
|
+
local tag="uat-snapshot-${case_id}"
|
|
1328
|
+
if git rev-parse "$tag" >/dev/null 2>&1; then
|
|
1329
|
+
# Reset to the snapshot commit — undoes both staged and committed changes since
|
|
1330
|
+
git reset --hard "$tag" 2>/dev/null || true
|
|
1331
|
+
print_info "Reverted changes for $case_id"
|
|
1332
|
+
fi
|
|
1333
|
+
fi
|
|
1334
|
+
}
|
|
1335
|
+
|
|
1336
|
+
_has_app_changes() {
|
|
1337
|
+
local test_file="$1"
|
|
1338
|
+
if command -v git &>/dev/null && [[ -d ".git" ]]; then
|
|
1339
|
+
# Check if any files OTHER than the test file were modified
|
|
1340
|
+
local changed_files
|
|
1341
|
+
changed_files=$(git diff --name-only HEAD 2>/dev/null | grep -Fxv "$test_file" | grep -v '\.ralph/' || true)
|
|
1342
|
+
[[ -n "$changed_files" ]]
|
|
1343
|
+
else
|
|
1344
|
+
return 1
|
|
1345
|
+
fi
|
|
1346
|
+
}
|
|
1347
|
+
|
|
1348
|
+
_check_regressions() {
|
|
1349
|
+
echo " Making sure other tests still pass..."
|
|
1350
|
+
|
|
1351
|
+
# Run existing unit tests
|
|
1352
|
+
local test_cmd
|
|
1353
|
+
test_cmd=$(get_config '.checks.testCommand' "")
|
|
1354
|
+
|
|
1355
|
+
if [[ -z "$test_cmd" ]]; then
|
|
1356
|
+
# Auto-detect
|
|
1357
|
+
if [[ -f "package.json" ]] && grep -q '"test"' package.json; then
|
|
1358
|
+
test_cmd="npm test"
|
|
1359
|
+
elif [[ -f "pytest.ini" ]] || [[ -f "pyproject.toml" ]]; then
|
|
1360
|
+
local py_runner
|
|
1361
|
+
py_runner=$(detect_python_runner ".")
|
|
1362
|
+
test_cmd="${py_runner}${py_runner:+ }pytest"
|
|
1363
|
+
elif [[ -f "Cargo.toml" ]]; then
|
|
1364
|
+
test_cmd="cargo test"
|
|
1365
|
+
elif [[ -f "go.mod" ]]; then
|
|
1366
|
+
test_cmd="go test ./..."
|
|
1367
|
+
else
|
|
1368
|
+
# No test command — can't check regressions, assume ok
|
|
1369
|
+
return 0
|
|
1370
|
+
fi
|
|
1371
|
+
fi
|
|
1372
|
+
|
|
1373
|
+
local log_file
|
|
1374
|
+
log_file=$(create_temp_file ".uat-regression.log")
|
|
1375
|
+
|
|
1376
|
+
if safe_exec "$test_cmd" "$log_file"; then
|
|
1377
|
+
print_success " All other tests still pass"
|
|
1378
|
+
rm -f "$log_file"
|
|
1379
|
+
return 0
|
|
1380
|
+
else
|
|
1381
|
+
print_error " Some other tests broke!"
|
|
1382
|
+
echo " Output (last 20 lines):"
|
|
1383
|
+
tail -20 "$log_file" | sed 's/^/ /'
|
|
1384
|
+
rm -f "$log_file"
|
|
1385
|
+
return 1
|
|
1386
|
+
fi
|
|
1387
|
+
}
|
|
1388
|
+
|
|
1389
|
+
_commit_result() {
|
|
1390
|
+
local case_id="$1"
|
|
1391
|
+
local test_file="$2"
|
|
1392
|
+
|
|
1393
|
+
if ! command -v git &>/dev/null || [[ ! -d ".git" ]]; then
|
|
1394
|
+
return 0
|
|
1395
|
+
fi
|
|
1396
|
+
|
|
1397
|
+
# Stage the test file and any app fixes
|
|
1398
|
+
git add "$test_file" 2>/dev/null || true
|
|
1399
|
+
git add -A 2>/dev/null || true
|
|
1400
|
+
|
|
1401
|
+
# Check if there's anything to commit
|
|
1402
|
+
if git diff --cached --quiet 2>/dev/null; then
|
|
1403
|
+
return 0
|
|
1404
|
+
fi
|
|
1405
|
+
|
|
1406
|
+
local commit_msg
|
|
1407
|
+
if _has_app_changes "$test_file"; then
|
|
1408
|
+
commit_msg="test+fix($case_id): TDD green -- test + app fix"
|
|
1409
|
+
else
|
|
1410
|
+
commit_msg="test($case_id): $UAT_CONFIG_NS test"
|
|
1411
|
+
fi
|
|
1412
|
+
|
|
1413
|
+
# Try commit with retries for auto-fix hooks
|
|
1414
|
+
local commit_log
|
|
1415
|
+
commit_log=$(mktemp)
|
|
1416
|
+
local success=false
|
|
1417
|
+
|
|
1418
|
+
for attempt in 1 2 3; do
|
|
1419
|
+
if git commit -m "$commit_msg" > "$commit_log" 2>&1; then
|
|
1420
|
+
success=true
|
|
1421
|
+
break
|
|
1422
|
+
fi
|
|
1423
|
+
if grep -q "files were modified by this hook" "$commit_log" 2>/dev/null; then
|
|
1424
|
+
git add -A
|
|
1425
|
+
continue
|
|
1426
|
+
fi
|
|
1427
|
+
break
|
|
1428
|
+
done
|
|
1429
|
+
|
|
1430
|
+
if [[ "$success" != "true" ]]; then
|
|
1431
|
+
# Try with --no-verify as last resort
|
|
1432
|
+
git add -A
|
|
1433
|
+
git commit -m "$commit_msg" --no-verify > "$commit_log" 2>&1 || true
|
|
1434
|
+
fi
|
|
1435
|
+
|
|
1436
|
+
rm -f "$commit_log"
|
|
1437
|
+
|
|
1438
|
+
# Clean up snapshot tag
|
|
1439
|
+
git tag -d "uat-snapshot-${case_id}" 2>/dev/null || true
|
|
1440
|
+
}
|
|
1441
|
+
|
|
1442
|
+
_track_fixed_files() {
|
|
1443
|
+
local case_id="$1"
|
|
1444
|
+
if command -v git &>/dev/null && [[ -d ".git" ]]; then
|
|
1445
|
+
local fixed
|
|
1446
|
+
fixed=$(git diff --name-only HEAD~1 2>/dev/null | grep -v 'test' | grep -v '\.ralph/' || true)
|
|
1447
|
+
while IFS= read -r f; do
|
|
1448
|
+
[[ -n "$f" ]] && UAT_FILES_FIXED+=("$f ($case_id)")
|
|
1449
|
+
done <<< "$fixed"
|
|
1450
|
+
fi
|
|
1451
|
+
}
|
|
1452
|
+
|
|
1453
|
+
# ============================================================================
|
|
1454
|
+
# PROMPT BUILDING
|
|
1455
|
+
# ============================================================================
|
|
1456
|
+
|
|
1457
|
+
_build_red_prompt() {
|
|
1458
|
+
local case_id="$1"
|
|
1459
|
+
local prompt_file="$2"
|
|
1460
|
+
|
|
1461
|
+
# Prefer project-specific UAT prompt (generated during discovery),
|
|
1462
|
+
# fall back to the universal template
|
|
1463
|
+
local uat_prompt="$RALPH_TEMPLATES/UAT-PROMPT.md"
|
|
1464
|
+
if [[ -f "$UAT_MODE_DIR/UAT-PROMPT.md" ]]; then
|
|
1465
|
+
uat_prompt="$UAT_MODE_DIR/UAT-PROMPT.md"
|
|
1466
|
+
fi
|
|
1467
|
+
cat "$uat_prompt" > "$prompt_file"
|
|
1468
|
+
|
|
1469
|
+
cat >> "$prompt_file" << PROMPT_SECTION
|
|
1470
|
+
|
|
1471
|
+
---
|
|
1472
|
+
|
|
1473
|
+
## Phase: RED — Write Test Only
|
|
1474
|
+
|
|
1475
|
+
You are in the **RED phase** of TDD. Your ONLY job is to write the test.
|
|
1476
|
+
|
|
1477
|
+
**CRITICAL: DO NOT modify any application source files. Test files ONLY.**
|
|
1478
|
+
|
|
1479
|
+
Your tasks:
|
|
1480
|
+
|
|
1481
|
+
1. **Read the test case** from \`.ralph/$UAT_CONFIG_NS/plan.json\` (case ID: $case_id)
|
|
1482
|
+
2. **Explore the feature** using Playwright MCP — navigate to the relevant pages, interact with the UI
|
|
1483
|
+
3. **Write the test file** at the path specified in the test case
|
|
1484
|
+
4. **Encode every assertion** from the test case as an actual expect() call
|
|
1485
|
+
5. **Include edge cases** listed in the test case
|
|
1486
|
+
|
|
1487
|
+
### Rules
|
|
1488
|
+
|
|
1489
|
+
- DO NOT modify any application source files (src/, api/, app/, etc.)
|
|
1490
|
+
- Write the test to verify CORRECT behavior based on the plan's assertions
|
|
1491
|
+
- If the app has a bug, the test WILL fail — that is the expected and correct outcome
|
|
1492
|
+
- Ralph will detect and reject any app code changes in this phase
|
|
1493
|
+
|
|
1494
|
+
### Assertions are mandatory
|
|
1495
|
+
|
|
1496
|
+
The test case in plan.json has an \`assertions\` array. Each assertion has:
|
|
1497
|
+
- \`input\`: what to do (fill form, click button, navigate to URL)
|
|
1498
|
+
- \`expected\`: what should happen (text appears, redirect occurs, error shown)
|
|
1499
|
+
- \`strategy\`: how to verify (keyword, structural, navigation, security, llm-judge)
|
|
1500
|
+
|
|
1501
|
+
**Every assertion MUST become an expect() call in your test.** This is how we verify
|
|
1502
|
+
correctness, not just that the page loads. Ralph will reject tests that only check
|
|
1503
|
+
structure without verifying content.
|
|
1504
|
+
|
|
1505
|
+
Example — assertion in plan.json:
|
|
1506
|
+
\`\`\`json
|
|
1507
|
+
{"input": "Fill name='John', submit", "expected": "Shows 'Welcome, John'", "strategy": "keyword"}
|
|
1508
|
+
\`\`\`
|
|
1509
|
+
|
|
1510
|
+
Becomes in the test:
|
|
1511
|
+
\`\`\`typescript
|
|
1512
|
+
await page.getByLabel('Name').fill('John');
|
|
1513
|
+
await page.getByRole('button', { name: 'Submit' }).click();
|
|
1514
|
+
await expect(page.getByText('Welcome, John')).toBeVisible();
|
|
1515
|
+
\`\`\`
|
|
1516
|
+
PROMPT_SECTION
|
|
1517
|
+
|
|
1518
|
+
# Inject failure context if retrying
|
|
1519
|
+
if [[ -f "$UAT_FAILURE_FILE" ]]; then
|
|
1520
|
+
echo "" >> "$prompt_file"
|
|
1521
|
+
echo "### Previous RED Attempt Failed" >> "$prompt_file"
|
|
1522
|
+
echo "" >> "$prompt_file"
|
|
1523
|
+
echo "Your previous test attempt had issues. Fix them:" >> "$prompt_file"
|
|
1524
|
+
echo '```' >> "$prompt_file"
|
|
1525
|
+
tail -50 "$UAT_FAILURE_FILE" >> "$prompt_file"
|
|
1526
|
+
echo '```' >> "$prompt_file"
|
|
1527
|
+
fi
|
|
1528
|
+
|
|
1529
|
+
# Inject config context
|
|
1530
|
+
echo "" >> "$prompt_file"
|
|
1531
|
+
echo "### Config" >> "$prompt_file"
|
|
1532
|
+
echo "" >> "$prompt_file"
|
|
1533
|
+
echo "Read \`.ralph/config.json\` for URLs and directories." >> "$prompt_file"
|
|
1534
|
+
|
|
1535
|
+
# Inject signs
|
|
1536
|
+
_inject_signs >> "$prompt_file"
|
|
1537
|
+
}
|
|
1538
|
+
|
|
1539
|
+
_build_green_prompt() {
|
|
1540
|
+
local case_id="$1"
|
|
1541
|
+
local test_file="$2"
|
|
1542
|
+
local prompt_file="$3"
|
|
1543
|
+
|
|
1544
|
+
# GREEN prompt is focused — no UAT-PROMPT.md preamble needed
|
|
1545
|
+
cat > "$prompt_file" << PROMPT_SECTION
|
|
1546
|
+
# GREEN Phase — Fix Application Code
|
|
1547
|
+
|
|
1548
|
+
A test has been written that correctly identifies a bug. Your job is to fix the
|
|
1549
|
+
APPLICATION CODE so the test passes.
|
|
1550
|
+
|
|
1551
|
+
**CRITICAL: DO NOT modify the test file (\`$test_file\`). Fix the app, not the test.**
|
|
1552
|
+
|
|
1553
|
+
## Case: $case_id
|
|
1554
|
+
|
|
1555
|
+
1. **Read the test file** at \`$test_file\` to understand what it checks
|
|
1556
|
+
2. **Read the test case** from \`.ralph/$UAT_CONFIG_NS/plan.json\` (case ID: $case_id) for context
|
|
1557
|
+
3. **Read the failure output** below to understand what went wrong
|
|
1558
|
+
4. **Fix the APPLICATION CODE** — make the minimum change needed to pass the test
|
|
1559
|
+
5. **DO NOT modify the test file** — Ralph will restore it if you do
|
|
1560
|
+
|
|
1561
|
+
### Rules
|
|
1562
|
+
|
|
1563
|
+
- Make the MINIMUM change needed to fix the bug
|
|
1564
|
+
- Do NOT modify the test file — it has been validated and committed
|
|
1565
|
+
- Do NOT add workarounds or hacks — fix the actual bug
|
|
1566
|
+
- Read .ralph/config.json for project URLs and directories
|
|
1567
|
+
PROMPT_SECTION
|
|
1568
|
+
|
|
1569
|
+
# Inject failure context (critical for GREEN — this is what guides the fix)
|
|
1570
|
+
if [[ -f "$UAT_FAILURE_FILE" ]]; then
|
|
1571
|
+
echo "" >> "$prompt_file"
|
|
1572
|
+
echo "## Failure Output" >> "$prompt_file"
|
|
1573
|
+
echo "" >> "$prompt_file"
|
|
1574
|
+
echo '```' >> "$prompt_file"
|
|
1575
|
+
tail -80 "$UAT_FAILURE_FILE" >> "$prompt_file"
|
|
1576
|
+
echo '```' >> "$prompt_file"
|
|
1577
|
+
fi
|
|
1578
|
+
|
|
1579
|
+
# Also include last test output if available
|
|
1580
|
+
if [[ -f "$UAT_MODE_DIR/last_test_output.log" ]]; then
|
|
1581
|
+
echo "" >> "$prompt_file"
|
|
1582
|
+
echo "## Last Test Output" >> "$prompt_file"
|
|
1583
|
+
echo "" >> "$prompt_file"
|
|
1584
|
+
echo '```' >> "$prompt_file"
|
|
1585
|
+
tail -80 "$UAT_MODE_DIR/last_test_output.log" >> "$prompt_file"
|
|
1586
|
+
echo '```' >> "$prompt_file"
|
|
1587
|
+
fi
|
|
1588
|
+
|
|
1589
|
+
# Inject signs
|
|
1590
|
+
_inject_signs >> "$prompt_file"
|
|
1591
|
+
}
|
|
1592
|
+
|
|
1593
|
+
# ============================================================================
|
|
1594
|
+
# ACTIVITY FEED (reuses pattern from loop.sh)
|
|
1595
|
+
# ============================================================================
|
|
1596
|
+
|
|
1597
|
+
_parse_uat_activity() {
|
|
1598
|
+
local quiet="${1:-false}"
|
|
1599
|
+
local dim=$'\033[2m' green=$'\033[0;32m' nc=$'\033[0m'
|
|
1600
|
+
local line
|
|
1601
|
+
while IFS= read -r line; do
|
|
1602
|
+
# Non-JSON lines — always pass through
|
|
1603
|
+
if [[ "$line" != "{"* ]]; then
|
|
1604
|
+
echo "$line"
|
|
1605
|
+
continue
|
|
1606
|
+
fi
|
|
1607
|
+
|
|
1608
|
+
[[ "$quiet" == "true" ]] && continue
|
|
1609
|
+
|
|
1610
|
+
if [[ "$line" != *'"assistant"'* && "$line" != *'"result"'* ]]; then
|
|
1611
|
+
continue
|
|
1612
|
+
fi
|
|
1613
|
+
|
|
1614
|
+
local msg_type
|
|
1615
|
+
msg_type=$(jq -r '.type // empty' <<< "$line" 2>/dev/null) || continue
|
|
1616
|
+
|
|
1617
|
+
if [[ "$msg_type" == "assistant" ]]; then
|
|
1618
|
+
local tool_entries
|
|
1619
|
+
tool_entries=$(jq -r '
|
|
1620
|
+
.message.content[]?
|
|
1621
|
+
| select(.type == "tool_use")
|
|
1622
|
+
| .name + "\t" + (.input | tostring)
|
|
1623
|
+
' <<< "$line" 2>/dev/null) || continue
|
|
1624
|
+
|
|
1625
|
+
while IFS=$'\t' read -r tool_name tool_input; do
|
|
1626
|
+
[[ -z "$tool_name" ]] && continue
|
|
1627
|
+
local label="" detail=""
|
|
1628
|
+
case "$tool_name" in
|
|
1629
|
+
Read)
|
|
1630
|
+
label="Reading"
|
|
1631
|
+
detail=$(jq -r '.file_path // empty' <<< "$tool_input" 2>/dev/null)
|
|
1632
|
+
detail="${detail#"$PWD/"}"
|
|
1633
|
+
;;
|
|
1634
|
+
Edit)
|
|
1635
|
+
label="Editing"
|
|
1636
|
+
detail=$(jq -r '.file_path // empty' <<< "$tool_input" 2>/dev/null)
|
|
1637
|
+
detail="${detail#"$PWD/"}"
|
|
1638
|
+
;;
|
|
1639
|
+
Write)
|
|
1640
|
+
label="Creating"
|
|
1641
|
+
detail=$(jq -r '.file_path // empty' <<< "$tool_input" 2>/dev/null)
|
|
1642
|
+
detail="${detail#"$PWD/"}"
|
|
1643
|
+
;;
|
|
1644
|
+
Bash)
|
|
1645
|
+
label="Running"
|
|
1646
|
+
detail=$(jq -r '.description // .command // empty' <<< "$tool_input" 2>/dev/null)
|
|
1647
|
+
detail="${detail:0:60}"
|
|
1648
|
+
;;
|
|
1649
|
+
mcp__playwright__*)
|
|
1650
|
+
label="Browser"
|
|
1651
|
+
local action="${tool_name#mcp__playwright__browser_}"
|
|
1652
|
+
detail="$action"
|
|
1653
|
+
;;
|
|
1654
|
+
*)
|
|
1655
|
+
label="$tool_name"
|
|
1656
|
+
;;
|
|
1657
|
+
esac
|
|
1658
|
+
printf " ${dim}⟳${nc} %-10s %s\n" "$label" "$detail"
|
|
1659
|
+
done <<< "$tool_entries"
|
|
1660
|
+
|
|
1661
|
+
elif [[ "$msg_type" == "result" ]]; then
|
|
1662
|
+
local cost duration_ms
|
|
1663
|
+
cost=$(jq -r '.total_cost_usd // empty' <<< "$line" 2>/dev/null)
|
|
1664
|
+
duration_ms=$(jq -r '.duration_ms // empty' <<< "$line" 2>/dev/null)
|
|
1665
|
+
local cost_str="" dur_str=""
|
|
1666
|
+
[[ -n "$cost" ]] && cost_str=$(printf '$%.2f' "$cost")
|
|
1667
|
+
if [[ -n "$duration_ms" ]]; then
|
|
1668
|
+
local total_secs=$(( duration_ms / 1000 ))
|
|
1669
|
+
if [[ $total_secs -ge 60 ]]; then
|
|
1670
|
+
dur_str="$((total_secs / 60))m $((total_secs % 60))s"
|
|
1671
|
+
else
|
|
1672
|
+
dur_str="${total_secs}s"
|
|
1673
|
+
fi
|
|
1674
|
+
fi
|
|
1675
|
+
echo ""
|
|
1676
|
+
if [[ -n "$cost_str" && -n "$dur_str" ]]; then
|
|
1677
|
+
echo -e " ${green}✓ Done${nc} ${dim}(${cost_str}, ${dur_str})${nc}"
|
|
1678
|
+
elif [[ -n "$cost_str" ]]; then
|
|
1679
|
+
echo -e " ${green}✓ Done${nc} ${dim}(${cost_str})${nc}"
|
|
1680
|
+
fi
|
|
1681
|
+
fi
|
|
1682
|
+
done
|
|
1683
|
+
}
|
|
1684
|
+
|
|
1685
|
+
# ============================================================================
|
|
1686
|
+
# PHASE 3: REPORT
|
|
1687
|
+
# ============================================================================
|
|
1688
|
+
|
|
1689
|
+
_print_report() {
|
|
1690
|
+
local total_cases passed_cases failed_cases skipped_cases
|
|
1691
|
+
total_cases=$(jq '.testCases | length' "$UAT_PLAN_FILE" 2>/dev/null || echo "0")
|
|
1692
|
+
passed_cases=$(jq '[.testCases[] | select(.passes==true and .skipped!=true)] | length' "$UAT_PLAN_FILE" 2>/dev/null || echo "0")
|
|
1693
|
+
failed_cases=$(jq '[.testCases[] | select(.passes==false)] | length' "$UAT_PLAN_FILE" 2>/dev/null || echo "0")
|
|
1694
|
+
skipped_cases=$(jq '[.testCases[] | select(.skipped==true)] | length' "$UAT_PLAN_FILE" 2>/dev/null || echo "0")
|
|
1695
|
+
|
|
1696
|
+
echo ""
|
|
1697
|
+
echo "╔══════════════════════════════════════════════════════════╗"
|
|
1698
|
+
printf "║ %-14s Results ║\n" "$UAT_MODE_LABEL"
|
|
1699
|
+
echo "╠══════════════════════════════════════════════════════════╣"
|
|
1700
|
+
printf "║ Test cases: %-3s total, %-3s passed, %-3s failed, %-3s skipped ║\n" \
|
|
1701
|
+
"$total_cases" "$passed_cases" "$failed_cases" "$skipped_cases"
|
|
1702
|
+
printf "║ App bugs found: %-3s Fixed: %-3s ║\n" \
|
|
1703
|
+
"$UAT_BUGS_FOUND" "$UAT_BUGS_FIXED"
|
|
1704
|
+
printf "║ Already working: %-3s Needed fixing: %-3s ║\n" \
|
|
1705
|
+
"$UAT_RED_ONLY_PASSED" "$UAT_GREEN_ATTEMPTS"
|
|
1706
|
+
echo "║ ║"
|
|
1707
|
+
|
|
1708
|
+
# List test files
|
|
1709
|
+
if [[ $UAT_TESTS_WRITTEN -gt 0 ]]; then
|
|
1710
|
+
echo "║ New test files: ║"
|
|
1711
|
+
jq -r '.testCases[] | select(.passes==true and .skipped!=true) | " " + .testFile + " ✅"' "$UAT_PLAN_FILE" 2>/dev/null | while IFS= read -r line; do
|
|
1712
|
+
printf "║ %-56s║\n" "$line"
|
|
1713
|
+
done
|
|
1714
|
+
jq -r '.testCases[] | select(.passes==false) | " " + .testFile + " ❌"' "$UAT_PLAN_FILE" 2>/dev/null | while IFS= read -r line; do
|
|
1715
|
+
printf "║ %-56s║\n" "$line"
|
|
1716
|
+
done
|
|
1717
|
+
fi
|
|
1718
|
+
|
|
1719
|
+
# List fixed app files
|
|
1720
|
+
if [[ ${#UAT_FILES_FIXED[@]} -gt 0 ]]; then
|
|
1721
|
+
echo "║ ║"
|
|
1722
|
+
echo "║ App files fixed: ║"
|
|
1723
|
+
for f in "${UAT_FILES_FIXED[@]}"; do
|
|
1724
|
+
local display="$f"
|
|
1725
|
+
[[ ${#display} -gt 54 ]] && display="${display:0:51}..."
|
|
1726
|
+
printf "║ %-54s║\n" "$display"
|
|
1727
|
+
done
|
|
1728
|
+
fi
|
|
1729
|
+
|
|
1730
|
+
# List items needing human attention
|
|
1731
|
+
if [[ ${#UAT_NEEDS_HUMAN[@]} -gt 0 ]]; then
|
|
1732
|
+
echo "║ ║"
|
|
1733
|
+
echo "║ Needs your attention: ║"
|
|
1734
|
+
for item in "${UAT_NEEDS_HUMAN[@]}"; do
|
|
1735
|
+
local display="$item"
|
|
1736
|
+
[[ ${#display} -gt 54 ]] && display="${display:0:51}..."
|
|
1737
|
+
printf "║ %-54s║\n" "$display"
|
|
1738
|
+
done
|
|
1739
|
+
fi
|
|
1740
|
+
|
|
1741
|
+
echo "╚══════════════════════════════════════════════════════════╝"
|
|
1742
|
+
echo ""
|
|
1743
|
+
|
|
1744
|
+
# Send notification
|
|
1745
|
+
send_notification "$UAT_MODE_LABEL: $passed_cases/$total_cases passed, $UAT_BUGS_FIXED bugs fixed"
|
|
1746
|
+
}
|
|
1747
|
+
|
|
1748
|
+
# ============================================================================
|
|
1749
|
+
# BANNER
|
|
1750
|
+
# ============================================================================
|
|
1751
|
+
|
|
1752
|
+
_print_uat_banner() {
|
|
1753
|
+
echo ""
|
|
1754
|
+
echo " _ _ _ _____ _ "
|
|
1755
|
+
echo " | | | | / \\|_ _| | | ___ ___ _ __"
|
|
1756
|
+
echo " | | | |/ _ \\ | | | | / _ \\ / _ \\| '_ \\"
|
|
1757
|
+
echo " | |_| / ___ \\| | | |__| (_) | (_) | |_) |"
|
|
1758
|
+
echo " \\___/_/ \\_\\_| |_____\\___/ \\___/| .__/"
|
|
1759
|
+
echo " |_|"
|
|
1760
|
+
echo ""
|
|
1761
|
+
}
|
|
1762
|
+
|
|
1763
|
+
_print_chaos_banner() {
|
|
1764
|
+
echo ""
|
|
1765
|
+
echo " ____ _ _ _ "
|
|
1766
|
+
echo " / ___| |__ __ _ ___ ___ / \\ __ _ ___ _ __ | |_ "
|
|
1767
|
+
echo " | | | '_ \\ / _\` |/ _ \\/ __|| _ \\ / _\` |/ _ \\ '_ \\| __|"
|
|
1768
|
+
echo " | |___| | | | (_| | (_) \\__ \\/ ___ \\ (_| | __/ | | | |_ "
|
|
1769
|
+
echo " \\____|_| |_|\\__,_|\\___/|___/_/ \\_\\__, |\\___|_| |_|\\__|"
|
|
1770
|
+
echo " |___/ "
|
|
1771
|
+
echo " Red team mode — trying to break things"
|
|
1772
|
+
echo ""
|
|
1773
|
+
}
|
|
1774
|
+
|
|
1775
|
+
# ============================================================================
|
|
1776
|
+
# CHAOS AGENT PROMPT
|
|
1777
|
+
# ============================================================================
|
|
1778
|
+
|
|
1779
|
+
_build_chaos_agent_prompt() {
|
|
1780
|
+
local prompt_file="$1"
|
|
1781
|
+
|
|
1782
|
+
# Start with UAT prompt template
|
|
1783
|
+
cat "$RALPH_TEMPLATES/UAT-PROMPT.md" > "$prompt_file"
|
|
1784
|
+
|
|
1785
|
+
cat >> "$prompt_file" << 'PROMPT_SECTION'
|
|
1786
|
+
|
|
1787
|
+
---
|
|
1788
|
+
|
|
1789
|
+
## Phase: Chaos Agent Red Team Discovery
|
|
1790
|
+
|
|
1791
|
+
You are the **team lead** of a red team. Your job is to coordinate a team of adversarial
|
|
1792
|
+
agents that attack a live app, share intel, and produce a battle-tested plan of
|
|
1793
|
+
vulnerabilities to fix.
|
|
1794
|
+
|
|
1795
|
+
**Mindset: "You are a red team. Coordinate to find every vulnerability."**
|
|
1796
|
+
|
|
1797
|
+
### Step 1: Recon (~60 seconds)
|
|
1798
|
+
|
|
1799
|
+
Before spawning anyone, do a quick recon yourself:
|
|
1800
|
+
|
|
1801
|
+
1. **Read `.ralph/config.json`** for URLs, auth config, and directories
|
|
1802
|
+
2. **Read `.ralph/prd.json`** if it exists — completed stories tell you what was built
|
|
1803
|
+
3. **Navigate the app** using Playwright MCP — click through nav, find pages, note the tech stack
|
|
1804
|
+
4. **Take 2-3 screenshots** of key pages (save to `.ralph/chaos/screenshots/`)
|
|
1805
|
+
5. **Map the attack surface** — what feature areas exist? (auth, forms, API, navigation, etc.)
|
|
1806
|
+
|
|
1807
|
+
Don't go deep. Just map what's there. ~60 seconds max.
|
|
1808
|
+
|
|
1809
|
+
### Step 2: Assemble the Red Team
|
|
1810
|
+
|
|
1811
|
+
Create a team and spawn teammates:
|
|
1812
|
+
|
|
1813
|
+
```
|
|
1814
|
+
TeamCreate: "chaos-agent"
|
|
1815
|
+
```
|
|
1816
|
+
|
|
1817
|
+
Spawn these teammates using the Task tool with `team_name: "chaos-agent"`:
|
|
1818
|
+
|
|
1819
|
+
1. **"recon"** (`subagent_type: "general-purpose"`) — Attack surface mapping. Catalogs every
|
|
1820
|
+
input, form, API endpoint, auth mechanism. Shares intel with team: "login uses JWT in
|
|
1821
|
+
localStorage", "admin panel at /admin has no auth check".
|
|
1822
|
+
|
|
1823
|
+
2. **"chaos"** (`subagent_type: "general-purpose"`) — Chaos testing. For every input: empty
|
|
1824
|
+
strings, 10000-char payloads, special characters (`<>&"'/\`), unicode/emoji, null bytes.
|
|
1825
|
+
For every form: double-submit, missing fields, back button after submit. Rapid-fire
|
|
1826
|
+
interactions.
|
|
1827
|
+
|
|
1828
|
+
3. **"security"** (`subagent_type: "general-purpose"`) — Security testing. XSS in every
|
|
1829
|
+
input (`<script>alert(1)</script>`), SQL injection (`'; DROP TABLE users; --`), auth bypass
|
|
1830
|
+
via direct URL, IDOR via ID manipulation, sensitive data in localStorage/console/page source,
|
|
1831
|
+
missing CSRF tokens.
|
|
1832
|
+
|
|
1833
|
+
**Only spawn agents for areas that exist.** If there are no forms, don't spawn a forms specialist.
|
|
1834
|
+
If there's no auth, skip auth testing.
|
|
1835
|
+
|
|
1836
|
+
Agents communicate via SendMessage — recon shares discoveries, security acts on them.
|
|
1837
|
+
|
|
1838
|
+
### Agent Instructions Template
|
|
1839
|
+
|
|
1840
|
+
Every agent prompt MUST include:
|
|
1841
|
+
|
|
1842
|
+
1. **Their role and focus area** (from above)
|
|
1843
|
+
2. **The recon intel** — pages, URLs, tech stack you discovered in Step 1
|
|
1844
|
+
3. **Browser tab isolation** — "Open your own browser tab via `browser_tabs(action: 'new')`
|
|
1845
|
+
before navigating. Do NOT use the existing tab."
|
|
1846
|
+
4. **Communication** — "Share important discoveries with teammates via SendMessage.
|
|
1847
|
+
Examples: 'Auth uses JWT in localStorage', 'Found unprotected admin route at /admin',
|
|
1848
|
+
'Form at /profile has no CSRF token'. Read messages from teammates and adapt your testing."
|
|
1849
|
+
5. **Output format** — "When done, send your findings to the team lead via SendMessage.
|
|
1850
|
+
Format each finding as a test case with: title, category, testFile path, targetFiles,
|
|
1851
|
+
assertions (input/expected/strategy), and edgeCases."
|
|
1852
|
+
|
|
1853
|
+
### Step 3: Coordinate
|
|
1854
|
+
|
|
1855
|
+
While your team works:
|
|
1856
|
+
|
|
1857
|
+
- **Monitor messages** from teammates as they report findings
|
|
1858
|
+
- **Redirect effort** if needed — if recon discovers something important, message the
|
|
1859
|
+
relevant specialist ("recon found an admin panel at /admin — security, check it for auth bypass")
|
|
1860
|
+
- **Create tasks** in the shared task list for any new areas discovered
|
|
1861
|
+
|
|
1862
|
+
### Step 4: Collect + Merge + Write Plan
|
|
1863
|
+
|
|
1864
|
+
After all teammates finish:
|
|
1865
|
+
|
|
1866
|
+
1. Collect findings from all agent messages
|
|
1867
|
+
2. Dedup by test file path (keep the case with more assertions)
|
|
1868
|
+
3. Assign sequential IDs: `UAT-001`, `UAT-002`, ...
|
|
1869
|
+
4. Write `.ralph/chaos/plan.json` (schema below)
|
|
1870
|
+
5. Write `.ralph/chaos/UAT-PROMPT.md` (schema below)
|
|
1871
|
+
6. Shut down all teammates via SendMessage with `type: "shutdown_request"`
|
|
1872
|
+
7. Clean up with TeamDelete
|
|
1873
|
+
|
|
1874
|
+
### plan.json Schema
|
|
1875
|
+
|
|
1876
|
+
Write `.ralph/chaos/plan.json`:
|
|
1877
|
+
|
|
1878
|
+
```json
|
|
1879
|
+
{
|
|
1880
|
+
"testSuite": {
|
|
1881
|
+
"name": "Chaos Agent",
|
|
1882
|
+
"generatedAt": "<ISO timestamp>",
|
|
1883
|
+
"status": "pending",
|
|
1884
|
+
"discoveryMethod": "chaos-agent"
|
|
1885
|
+
},
|
|
1886
|
+
"testCases": [
|
|
1887
|
+
{
|
|
1888
|
+
"id": "UAT-001",
|
|
1889
|
+
"title": "Feature area — what the test checks",
|
|
1890
|
+
"category": "auth|forms|navigation|api|ui|data|security",
|
|
1891
|
+
"type": "e2e|integration",
|
|
1892
|
+
"userStory": "As a user, I...",
|
|
1893
|
+
"testApproach": "What to test and how",
|
|
1894
|
+
"testFile": "tests/e2e/feature/test-name.spec.ts",
|
|
1895
|
+
"targetFiles": ["src/pages/feature.tsx"],
|
|
1896
|
+
"edgeCases": ["Edge case 1", "Edge case 2"],
|
|
1897
|
+
"assertions": [
|
|
1898
|
+
{
|
|
1899
|
+
"input": "Fill name='<script>alert(1)</script>', submit form",
|
|
1900
|
+
"expected": "Name displayed as literal text, no script execution",
|
|
1901
|
+
"strategy": "security"
|
|
1902
|
+
}
|
|
1903
|
+
],
|
|
1904
|
+
"passes": false,
|
|
1905
|
+
"retryCount": 0,
|
|
1906
|
+
"source": "chaos-agent:agent-name"
|
|
1907
|
+
}
|
|
1908
|
+
]
|
|
1909
|
+
}
|
|
1910
|
+
```
|
|
1911
|
+
|
|
1912
|
+
**Every test case MUST have at least 3 assertions** with concrete input/expected pairs:
|
|
1913
|
+
1. One happy-path assertion (correct input → correct output)
|
|
1914
|
+
2. One edge-case assertion (bad input → proper error handling)
|
|
1915
|
+
3. One content assertion (page shows the RIGHT data, not just that it loads)
|
|
1916
|
+
|
|
1917
|
+
### UAT-PROMPT.md Schema
|
|
1918
|
+
|
|
1919
|
+
Write `.ralph/chaos/UAT-PROMPT.md` — a project-specific testing guide based on what the
|
|
1920
|
+
red team ACTUALLY FOUND. Include:
|
|
1921
|
+
|
|
1922
|
+
```markdown
|
|
1923
|
+
# Chaos Agent Guide — [Project Name]
|
|
1924
|
+
|
|
1925
|
+
## App Overview
|
|
1926
|
+
- What the app does (1-2 sentences)
|
|
1927
|
+
- Tech stack observed (framework, API patterns, auth method)
|
|
1928
|
+
- Base URLs (frontend, API if applicable)
|
|
1929
|
+
|
|
1930
|
+
## Pages & Routes Discovered
|
|
1931
|
+
For each page:
|
|
1932
|
+
- URL pattern and what it shows
|
|
1933
|
+
- Key interactive elements (forms, buttons, links)
|
|
1934
|
+
- Selectors that work (data-testid, roles, labels)
|
|
1935
|
+
|
|
1936
|
+
## Auth Flow
|
|
1937
|
+
- How login works (form fields, redirect after login)
|
|
1938
|
+
- Test credentials if available (from config or .env)
|
|
1939
|
+
- What pages require auth vs. public
|
|
1940
|
+
|
|
1941
|
+
## Known Forms & Inputs
|
|
1942
|
+
For each form:
|
|
1943
|
+
- Fields with their labels/names/selectors
|
|
1944
|
+
- Required vs optional fields
|
|
1945
|
+
- Validation behavior observed
|
|
1946
|
+
|
|
1947
|
+
## What "Correct" Looks Like
|
|
1948
|
+
For each feature area:
|
|
1949
|
+
- Expected behavior observed
|
|
1950
|
+
- Specific text/numbers that should appear
|
|
1951
|
+
|
|
1952
|
+
## Console & Network Observations
|
|
1953
|
+
- Any existing console errors/warnings
|
|
1954
|
+
- API endpoints observed
|
|
1955
|
+
- Response patterns (JSON structure, status codes)
|
|
1956
|
+
|
|
1957
|
+
## Red Team Findings
|
|
1958
|
+
- Vulnerabilities discovered (XSS, injection, auth bypass, etc.)
|
|
1959
|
+
- Edge cases that broke the app
|
|
1960
|
+
- Areas that need hardening
|
|
1961
|
+
```
|
|
1962
|
+
|
|
1963
|
+
This is NOT a copy of the template — it's ground truth from the red team's exploration.
|
|
1964
|
+
|
|
1965
|
+
### Rules
|
|
1966
|
+
|
|
1967
|
+
- Test auth flows FIRST (they gate everything else)
|
|
1968
|
+
- One test case per feature area per attack vector
|
|
1969
|
+
- `type: "e2e"` for anything involving browser interaction
|
|
1970
|
+
- `targetFiles` should list the app source files the test covers
|
|
1971
|
+
- `testFile` path should use the project's test directory conventions
|
|
1972
|
+
- Always clean up: shutdown teammates and delete team when done
|
|
1973
|
+
PROMPT_SECTION
|
|
1974
|
+
|
|
1975
|
+
_inject_prompt_context "$prompt_file"
|
|
1976
|
+
}
|
|
1977
|
+
|
|
1978
|
+
# ============================================================================
|
|
1979
|
+
# HELPERS
|
|
1980
|
+
# ============================================================================
|
|
1981
|
+
|
|
1982
|
+
_inject_prompt_context() {
|
|
1983
|
+
local prompt_file="$1"
|
|
1984
|
+
|
|
1985
|
+
# Inject PRD context if available
|
|
1986
|
+
if [[ -f "$RALPH_DIR/prd.json" ]]; then
|
|
1987
|
+
echo "" >> "$prompt_file"
|
|
1988
|
+
echo "### Completed Stories (from PRD)" >> "$prompt_file"
|
|
1989
|
+
echo "" >> "$prompt_file"
|
|
1990
|
+
echo "These features have been built and should be testable:" >> "$prompt_file"
|
|
1991
|
+
echo '```json' >> "$prompt_file"
|
|
1992
|
+
jq '[.stories[] | select(.passes==true) | {id, title, type, testUrl: .testUrl}]' \
|
|
1993
|
+
"$RALPH_DIR/prd.json" >> "$prompt_file" 2>/dev/null
|
|
1994
|
+
echo '```' >> "$prompt_file"
|
|
1995
|
+
fi
|
|
1996
|
+
|
|
1997
|
+
# Inject config context
|
|
1998
|
+
if [[ -f "$RALPH_DIR/config.json" ]]; then
|
|
1999
|
+
echo "" >> "$prompt_file"
|
|
2000
|
+
echo "### Project Config" >> "$prompt_file"
|
|
2001
|
+
echo "" >> "$prompt_file"
|
|
2002
|
+
echo "Read \`.ralph/config.json\` for URLs and directories." >> "$prompt_file"
|
|
2003
|
+
fi
|
|
2004
|
+
|
|
2005
|
+
# Inject signs
|
|
2006
|
+
_inject_signs >> "$prompt_file"
|
|
2007
|
+
}
|
|
2008
|
+
|
|
2009
|
+
_log_uat() {
|
|
2010
|
+
local id="$1"
|
|
2011
|
+
local msg="$2"
|
|
2012
|
+
local timestamp
|
|
2013
|
+
timestamp=$(date -Iseconds 2>/dev/null || date +%Y-%m-%dT%H:%M:%S)
|
|
2014
|
+
echo "[$timestamp] $id $msg" >> "$UAT_PROGRESS_FILE"
|
|
2015
|
+
}
|