agentic-loop 3.19.0 → 3.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/.claude/commands/tour.md +11 -7
  2. package/.claude/commands/vibe-help.md +5 -2
  3. package/.claude/commands/vibe-list.md +17 -2
  4. package/.claude/skills/prd/SKILL.md +21 -6
  5. package/.claude/skills/setup-review/SKILL.md +56 -0
  6. package/.claude/skills/tour/SKILL.md +11 -7
  7. package/.claude/skills/vibe-help/SKILL.md +2 -1
  8. package/.claude/skills/vibe-list/SKILL.md +5 -2
  9. package/.pre-commit-hooks.yaml +8 -0
  10. package/README.md +4 -0
  11. package/bin/agentic-loop.sh +7 -0
  12. package/bin/ralph.sh +29 -0
  13. package/dist/checks/check-signs-secrets.d.ts +9 -0
  14. package/dist/checks/check-signs-secrets.d.ts.map +1 -0
  15. package/dist/checks/check-signs-secrets.js +57 -0
  16. package/dist/checks/check-signs-secrets.js.map +1 -0
  17. package/dist/checks/index.d.ts +2 -5
  18. package/dist/checks/index.d.ts.map +1 -1
  19. package/dist/checks/index.js +4 -9
  20. package/dist/checks/index.js.map +1 -1
  21. package/dist/index.d.ts +1 -1
  22. package/dist/index.d.ts.map +1 -1
  23. package/dist/index.js +1 -1
  24. package/dist/index.js.map +1 -1
  25. package/package.json +2 -1
  26. package/ralph/hooks/common.sh +47 -0
  27. package/ralph/hooks/warn-debug.sh +12 -26
  28. package/ralph/hooks/warn-empty-catch.sh +21 -34
  29. package/ralph/hooks/warn-secrets.sh +39 -52
  30. package/ralph/hooks/warn-urls.sh +25 -45
  31. package/ralph/init.sh +58 -82
  32. package/ralph/loop.sh +506 -53
  33. package/ralph/prd-check.sh +177 -236
  34. package/ralph/prd.sh +5 -2
  35. package/ralph/setup/quick-setup.sh +2 -16
  36. package/ralph/setup.sh +68 -80
  37. package/ralph/signs.sh +8 -0
  38. package/ralph/uat.sh +2664 -0
  39. package/ralph/utils.sh +213 -70
  40. package/ralph/verify/tests.sh +65 -10
  41. package/templates/PROMPT.md +10 -4
  42. package/templates/UAT-PROMPT.md +197 -0
  43. package/templates/config/elixir.json +0 -2
  44. package/templates/config/fastmcp.json +0 -2
  45. package/templates/config/fullstack.json +2 -4
  46. package/templates/config/go.json +0 -2
  47. package/templates/config/minimal.json +0 -2
  48. package/templates/config/node.json +0 -2
  49. package/templates/config/python.json +0 -2
  50. package/templates/config/rust.json +0 -2
  51. package/templates/prd-example.json +6 -8
package/ralph/uat.sh ADDED
@@ -0,0 +1,2664 @@
1
+ #!/usr/bin/env bash
2
+ # shellcheck shell=bash
3
+ # uat.sh - UAT + Chaos Agent: Autonomous Testing Loops
4
+ #
5
+ # ============================================================================
6
+ # OVERVIEW
7
+ # ============================================================================
8
+ # Two commands share this file:
9
+ # uat — Acceptance testing team. "Does this work correctly?"
10
+ # chaos-agent — Chaos Agent red team. "Can we break it?"
11
+ #
12
+ # Both use Agent Teams for coordinated discovery, then strict TDD per test case:
13
+ # RED: Claude writes the test only (no app changes)
14
+ # GREEN: Claude fixes the app only (no test changes)
15
+ #
16
+ # 3-Phase Flow:
17
+ # Phase 1: DISCOVER + PLAN — Agent team explores app, generates plan
18
+ # Phase 2: EXECUTE LOOP — Per test case: RED (test) → GREEN (fix)
19
+ # Phase 3: REPORT — Summary of findings
20
+ #
21
+ # ============================================================================
22
+ # DEPENDENCIES: Requires utils.sh sourced first (get_config, print_*, etc.)
23
+ # ============================================================================
24
+
25
+ # UAT-specific directory variables (initialized by _init_uat_dirs)
26
+ UAT_MODE_DIR=""
27
+ UAT_PLAN_FILE=""
28
+ UAT_PROGRESS_FILE=""
29
+ UAT_FAILURE_FILE=""
30
+ UAT_SCREENSHOTS_DIR=""
31
+ UAT_MODE_LABEL=""
32
+ UAT_CONFIG_NS="" # config namespace: "uat" or "chaos"
33
+ UAT_CMD_NAME="" # CLI command name: "uat" or "chaos-agent"
34
+
35
+ # Docker isolation state (set by _should_use_docker_isolation / _chaos_docker_up)
36
+ CHAOS_ISOLATION_RESULT=""
37
+ CHAOS_FRONTEND_URL=""
38
+ CHAOS_API_URL=""
39
+ CHAOS_OVERRIDE_FILE=""
40
+ CHAOS_COMPOSE_FILE=""
41
+ CHAOS_COMPOSE_CMD=""
42
+
43
+ # TDD phases
44
+ readonly UAT_PHASE_RED="RED"
45
+ readonly UAT_PHASE_GREEN="GREEN"
46
+
47
+ # Defaults (overridable via config)
48
+ readonly DEFAULT_UAT_MAX_ITERATIONS=20
49
+ readonly DEFAULT_UAT_MAX_SESSION_SECONDS=600
50
+ readonly DEFAULT_UAT_MAX_CASE_RETRIES=5
51
+
52
+ # Team mode timeouts (longer — Claude coordinates parallel agents)
53
+ readonly DEFAULT_UAT_SESSION_SECONDS=1800
54
+ readonly DEFAULT_CHAOS_SESSION_SECONDS=1800
55
+
56
+ # Archive retention
57
+ readonly MAX_UAT_ARCHIVE_COUNT=20
58
+
59
+ # ============================================================================
60
+ # DIRECTORY INIT
61
+ # ============================================================================
62
+
63
+ _init_uat_dirs() {
64
+ local subdir="${1:-uat}"
65
+ local label="${2:-UAT}"
66
+ local cmd="${3:-$subdir}"
67
+ UAT_MODE_DIR="$RALPH_DIR/$subdir"
68
+ UAT_PLAN_FILE="$UAT_MODE_DIR/plan.json"
69
+ UAT_PROGRESS_FILE="$UAT_MODE_DIR/progress.txt"
70
+ UAT_FAILURE_FILE="$UAT_MODE_DIR/last_failure.txt"
71
+ UAT_SCREENSHOTS_DIR="$UAT_MODE_DIR/screenshots"
72
+ UAT_MODE_LABEL="$label"
73
+ UAT_CONFIG_NS="$subdir"
74
+ UAT_CMD_NAME="$cmd"
75
+ }
76
+
77
+ # ============================================================================
78
+ # SHARED ARG PARSING
79
+ # ============================================================================
80
+
81
+ # Sets: _ARG_FOCUS, _ARG_PLAN_ONLY, _ARG_FORCE_REVIEW, _ARG_NO_FIX,
82
+ # _ARG_MAX_ITERATIONS, _ARG_QUIET_MODE
83
+ _parse_uat_args() {
84
+ _ARG_FOCUS=""
85
+ _ARG_PLAN_ONLY=false
86
+ _ARG_FORCE_REVIEW=false
87
+ _ARG_NO_FIX=false
88
+ _ARG_MAX_ITERATIONS=""
89
+ _ARG_QUIET_MODE=$(get_config '.quiet' "false")
90
+
91
+ while [[ $# -gt 0 ]]; do
92
+ case "$1" in
93
+ --focus)
94
+ _ARG_FOCUS="$2"
95
+ shift 2
96
+ ;;
97
+ --plan-only)
98
+ _ARG_PLAN_ONLY=true
99
+ shift
100
+ ;;
101
+ --review)
102
+ _ARG_FORCE_REVIEW=true
103
+ shift
104
+ ;;
105
+ --no-fix)
106
+ _ARG_NO_FIX=true
107
+ shift
108
+ ;;
109
+ --max)
110
+ _ARG_MAX_ITERATIONS="$2"
111
+ shift 2
112
+ ;;
113
+ --quiet)
114
+ _ARG_QUIET_MODE=true
115
+ shift
116
+ ;;
117
+ *)
118
+ shift
119
+ ;;
120
+ esac
121
+ done
122
+ }
123
+
124
+ # ============================================================================
125
+ # ENTRY POINT
126
+ # ============================================================================
127
+
128
+ run_uat() {
129
+ _parse_uat_args "$@"
130
+
131
+ local focus="$_ARG_FOCUS"
132
+ local plan_only="$_ARG_PLAN_ONLY"
133
+ local force_review="$_ARG_FORCE_REVIEW"
134
+ local no_fix="$_ARG_NO_FIX"
135
+ local max_iterations="$_ARG_MAX_ITERATIONS"
136
+ local quiet_mode="$_ARG_QUIET_MODE"
137
+
138
+ # Initialize directories for UAT mode
139
+ _init_uat_dirs "uat" "UAT"
140
+
141
+ # Validate prerequisites
142
+ check_dependencies
143
+
144
+ # Concurrent execution guard
145
+ _acquire_uat_lock
146
+
147
+ # Ensure directory structure
148
+ mkdir -p "$UAT_MODE_DIR" "$UAT_SCREENSHOTS_DIR"
149
+
150
+ # Banner
151
+ _print_uat_banner
152
+
153
+ # Phase 1: Discover + Plan
154
+ if [[ ! -f "$UAT_PLAN_FILE" ]] || [[ "$force_review" == "true" ]] || [[ "$plan_only" == "true" ]]; then
155
+ if [[ -f "$UAT_PLAN_FILE" ]] && [[ "$force_review" == "true" ]]; then
156
+ print_info "Re-reviewing existing plan..."
157
+ else
158
+ echo ""
159
+ print_info "Phase 1: Exploring your app and building a test plan"
160
+ echo ""
161
+ if ! _discover_and_plan "$quiet_mode" "uat"; then
162
+ _print_discovery_failure_help
163
+ return 1
164
+ fi
165
+ fi
166
+
167
+ # Review the plan
168
+ if ! _review_plan; then
169
+ print_info "Plan review cancelled. No changes were made."
170
+ return 0
171
+ fi
172
+
173
+ if [[ "$plan_only" == "true" ]]; then
174
+ print_success "Plan generated. Run 'npx agentic-loop uat' to execute."
175
+ return 0
176
+ fi
177
+ else
178
+ local remaining
179
+ remaining=$(jq '[.testCases[] | select(.passes==false)] | length' "$UAT_PLAN_FILE" 2>/dev/null || echo "0")
180
+ print_info "Picking up where we left off ($remaining tests still to go)"
181
+ fi
182
+
183
+ # Phase 2: Execute Loop
184
+ echo ""
185
+ print_info "Phase 2: Running tests and fixing issues"
186
+ echo ""
187
+ _run_uat_loop "$focus" "$no_fix" "$max_iterations" "$quiet_mode"
188
+ local loop_exit=$?
189
+
190
+ # Phase 3: Report
191
+ _print_report
192
+
193
+ # Archive and reset for next run
194
+ if [[ "$UAT_TESTS_WRITTEN" -gt 0 ]]; then
195
+ _archive_plan
196
+ rm -f "$UAT_PLAN_FILE"
197
+ fi
198
+
199
+ return $loop_exit
200
+ }
201
+
202
+ # ============================================================================
203
+ # CHAOS AGENT ENTRY POINT
204
+ # ============================================================================
205
+
206
+ run_chaos() {
207
+ _parse_uat_args "$@"
208
+
209
+ local focus="$_ARG_FOCUS"
210
+ local plan_only="$_ARG_PLAN_ONLY"
211
+ local force_review="$_ARG_FORCE_REVIEW"
212
+ local no_fix="$_ARG_NO_FIX"
213
+ local max_iterations="$_ARG_MAX_ITERATIONS"
214
+ local quiet_mode="$_ARG_QUIET_MODE"
215
+
216
+ # Initialize directories for chaos mode
217
+ _init_uat_dirs "chaos" "Chaos Agent" "chaos-agent"
218
+
219
+ # Validate prerequisites
220
+ check_dependencies
221
+
222
+ # Concurrent execution guard
223
+ _acquire_uat_lock
224
+
225
+ # Ensure directory structure
226
+ mkdir -p "$UAT_MODE_DIR" "$UAT_SCREENSHOTS_DIR"
227
+
228
+ # Banner
229
+ _print_chaos_banner
230
+
231
+ # Isolation: spin up Docker copy for chaos to attack
232
+ # Call directly (not in $() subshell) so globals are preserved
233
+ local use_docker=false
234
+ _should_use_docker_isolation
235
+ if [[ "$CHAOS_ISOLATION_RESULT" == "true" ]]; then
236
+ print_info "Starting isolated Docker environment..."
237
+ if _chaos_docker_up; then
238
+ use_docker=true
239
+ else
240
+ print_warning "Docker isolation failed — testing against live app"
241
+ print_warning "Non-destructive guardrails are active"
242
+ fi
243
+ fi
244
+
245
+ # Helper to tear down Docker on early exit
246
+ _chaos_early_exit() {
247
+ local code="$1"
248
+ if [[ "$use_docker" == "true" ]]; then
249
+ print_info "Tearing down isolated environment..."
250
+ _chaos_docker_down
251
+ fi
252
+ return "$code"
253
+ }
254
+
255
+ # Phase 1: Adversarial Discovery + Plan
256
+ if [[ ! -f "$UAT_PLAN_FILE" ]] || [[ "$force_review" == "true" ]] || [[ "$plan_only" == "true" ]]; then
257
+ if [[ -f "$UAT_PLAN_FILE" ]] && [[ "$force_review" == "true" ]]; then
258
+ print_info "Re-reviewing existing plan..."
259
+ else
260
+ echo ""
261
+ print_info "Phase 1: Red team exploring your app for vulnerabilities"
262
+ echo ""
263
+ if ! _discover_and_plan "$quiet_mode" "chaos"; then
264
+ _print_discovery_failure_help
265
+ _chaos_early_exit 1
266
+ return 1
267
+ fi
268
+ fi
269
+
270
+ # Review the plan
271
+ if ! _review_plan; then
272
+ print_info "Plan review cancelled. No changes were made."
273
+ _chaos_early_exit 0
274
+ return 0
275
+ fi
276
+
277
+ if [[ "$plan_only" == "true" ]]; then
278
+ print_success "Plan generated. Run 'npx agentic-loop chaos-agent' to execute."
279
+ _chaos_early_exit 0
280
+ return 0
281
+ fi
282
+ else
283
+ local remaining
284
+ remaining=$(jq '[.testCases[] | select(.passes==false)] | length' "$UAT_PLAN_FILE" 2>/dev/null || echo "0")
285
+ print_info "Picking up where we left off ($remaining tests still to go)"
286
+ fi
287
+
288
+ # Phase 2: Testing for vulnerabilities and fixing issues
289
+ echo ""
290
+ print_info "Phase 2: Running attack tests and fixing issues"
291
+ echo ""
292
+ _run_uat_loop "$focus" "$no_fix" "$max_iterations" "$quiet_mode"
293
+ local loop_exit=$?
294
+
295
+ # Phase 3: Report
296
+ _print_report
297
+
298
+ # Archive and reset for next run
299
+ if [[ "$UAT_TESTS_WRITTEN" -gt 0 ]]; then
300
+ _archive_plan
301
+ rm -f "$UAT_PLAN_FILE"
302
+ fi
303
+
304
+ # Isolation: tear down Docker environment
305
+ if [[ "$use_docker" == "true" ]]; then
306
+ print_info "Tearing down isolated environment..."
307
+ _chaos_docker_down
308
+ fi
309
+
310
+ return $loop_exit
311
+ }
312
+
313
+ # ============================================================================
314
+ # CONCURRENT EXECUTION GUARD
315
+ # ============================================================================
316
+
317
+ _acquire_uat_lock() {
318
+ local lockfile="$RALPH_DIR/.lock"
319
+ if [[ -f "$lockfile" ]]; then
320
+ local pid
321
+ pid=$(cat "$lockfile")
322
+ if kill -0 "$pid" 2>/dev/null; then
323
+ print_error "Another $UAT_MODE_LABEL session is already running. Stop it first with 'npx agentic-loop stop'."
324
+ exit 1
325
+ fi
326
+ rm -f "$lockfile" # Stale lock
327
+ fi
328
+ echo $$ > "$lockfile"
329
+ # Chain cleanup: lock removal + kill child processes
330
+ # This replaces the trap from ralph.sh, so we handle both concerns
331
+ trap '_uat_cleanup' EXIT
332
+ trap '_uat_interrupt' INT TERM
333
+ }
334
+
335
+ _uat_cleanup() {
336
+ rm -f "$RALPH_DIR/.lock"
337
+ # Safety net: tear down Docker if still running
338
+ if [[ -n "${CHAOS_OVERRIDE_FILE:-}" ]]; then
339
+ _chaos_docker_down 2>/dev/null
340
+ fi
341
+ }
342
+
343
+ _uat_interrupt() {
344
+ echo ""
345
+ print_warning "Interrupted. Wrapping up $UAT_MODE_LABEL..."
346
+ if [[ -n "${CHAOS_OVERRIDE_FILE:-}" ]]; then
347
+ print_info "Tearing down isolated Docker environment..."
348
+ _chaos_docker_down
349
+ fi
350
+ # Kill all child processes (Claude sessions, test runners)
351
+ kill 0 2>/dev/null || true
352
+ _uat_cleanup
353
+ exit 130
354
+ }
355
+
356
+ # ============================================================================
357
+ # DISCOVERY FAILURE RECOVERY
358
+ # ============================================================================
359
+
360
+ _print_discovery_failure_help() {
361
+ echo ""
362
+ echo " ┌──────────────────────────────────────────────────────┐"
363
+ echo " │ Discovery failed — here's how to recover │"
364
+ echo " └──────────────────────────────────────────────────────┘"
365
+ echo ""
366
+
367
+ # Check common causes and give specific advice
368
+ local has_config=false has_app_url=false app_url=""
369
+ if [[ -f "$RALPH_DIR/config.json" ]]; then
370
+ has_config=true
371
+ app_url=$(jq -r '.frontendUrl // .url // empty' "$RALPH_DIR/config.json" 2>/dev/null)
372
+ [[ -n "$app_url" ]] && has_app_url=true
373
+ fi
374
+
375
+ # Check if the app is reachable
376
+ if [[ "$has_app_url" == "true" ]]; then
377
+ if ! curl -s --max-time 3 "$app_url" > /dev/null 2>&1; then
378
+ echo " Likely cause: Your app at $app_url is not responding."
379
+ echo ""
380
+ echo " Fix: Start your app first, then retry:"
381
+ echo " npm run dev # or whatever starts your app"
382
+ echo " npx agentic-loop $UAT_CMD_NAME"
383
+ echo ""
384
+ return
385
+ fi
386
+ fi
387
+
388
+ if [[ "$has_config" == "false" ]]; then
389
+ echo " Likely cause: No .ralph/config.json found."
390
+ echo ""
391
+ echo " Fix: Run 'npx agentic-loop init' to create one."
392
+ echo ""
393
+ return
394
+ fi
395
+
396
+ # Generic recovery: show progress log and suggest retry
397
+ echo " What happened:"
398
+ if [[ -f "$UAT_PROGRESS_FILE" ]]; then
399
+ echo ""
400
+ tail -5 "$UAT_PROGRESS_FILE" | sed 's/^/ /'
401
+ echo ""
402
+ fi
403
+
404
+ echo " To retry:"
405
+ echo " npx agentic-loop $UAT_CMD_NAME"
406
+ echo ""
407
+ echo " To retry with more time (default: ${DEFAULT_UAT_SESSION_SECONDS}s):"
408
+ echo " Set $UAT_CONFIG_NS.sessionSeconds in .ralph/config.json"
409
+ echo ""
410
+ echo " Full log: $UAT_PROGRESS_FILE"
411
+ }
412
+
413
+ # ============================================================================
414
+ # PHASE 1: DISCOVER + PLAN
415
+ # ============================================================================
416
+
417
+ _discover_and_plan() {
418
+ local quiet="${1:-false}"
419
+ local mode="${2:-uat}"
420
+ local prompt_file output_file
421
+ prompt_file=$(create_temp_file ".uat-discover-prompt.md")
422
+ output_file=$(create_temp_file ".uat-discover-output.log")
423
+
424
+ local timeout
425
+ if [[ "$mode" == "chaos" ]]; then
426
+ timeout=$(get_config '.chaos.sessionSeconds' "$DEFAULT_CHAOS_SESSION_SECONDS")
427
+ _build_chaos_agent_prompt "$prompt_file"
428
+ _log_uat "DISCOVER" "Starting Chaos Agent discovery (timeout: ${timeout}s)"
429
+ else
430
+ timeout=$(get_config '.uat.sessionSeconds' "$DEFAULT_UAT_SESSION_SECONDS")
431
+ _build_uat_team_prompt "$prompt_file"
432
+ _log_uat "DISCOVER" "Starting UAT team discovery (timeout: ${timeout}s)"
433
+ fi
434
+
435
+ # Run Claude with MCP exploration
436
+ local claude_exit=0
437
+ (
438
+ set -o pipefail
439
+ cat "$prompt_file" | run_with_timeout "$timeout" claude -p \
440
+ --dangerously-skip-permissions \
441
+ --verbose \
442
+ --output-format stream-json \
443
+ 2>&1 | tee "$output_file" | _parse_uat_activity "$quiet"
444
+ ) &
445
+ local pipeline_pid=$!
446
+ wait "$pipeline_pid" || claude_exit=$?
447
+
448
+ if [[ $claude_exit -ne 0 ]]; then
449
+ _log_uat "DISCOVER" "Claude session failed (exit $claude_exit)"
450
+ if [[ $claude_exit -eq 124 ]]; then
451
+ print_error "Discovery timed out after ${timeout}s"
452
+ echo " The exploration ran out of time before finishing."
453
+ echo " Increase timeout: set $UAT_CONFIG_NS.sessionSeconds in .ralph/config.json"
454
+ else
455
+ print_error "Discovery session crashed (exit code $claude_exit)"
456
+ if [[ -f "$output_file" ]]; then
457
+ echo " Last output:"
458
+ tail -5 "$output_file" | sed 's/^/ /'
459
+ fi
460
+ fi
461
+ return 1
462
+ fi
463
+
464
+ # Validate plan was generated
465
+ if [[ ! -f "$UAT_PLAN_FILE" ]]; then
466
+ print_error "Discovery finished but no test plan was written"
467
+ echo ""
468
+ echo " Claude explored the app but didn't write .ralph/$UAT_CONFIG_NS/plan.json."
469
+ echo " This usually means the app wasn't reachable or had no testable features."
470
+ return 1
471
+ fi
472
+
473
+ if ! _validate_plan; then
474
+ print_error "The generated plan has errors and can't be used"
475
+ return 1
476
+ fi
477
+
478
+ # Check if project-specific prompt was generated
479
+ if [[ ! -f "$UAT_MODE_DIR/UAT-PROMPT.md" ]]; then
480
+ print_warning "No project-specific test instructions were created."
481
+ echo " Tests will use generic patterns instead."
482
+ echo " For better results, re-run with 'npx agentic-loop $UAT_CMD_NAME --plan-only'."
483
+ fi
484
+
485
+ # Mark plan as generated
486
+ update_json "$UAT_PLAN_FILE" '.testSuite.status = "planned"'
487
+
488
+ local case_count
489
+ case_count=$(jq '.testCases | length' "$UAT_PLAN_FILE")
490
+ _log_uat "DISCOVER" "Plan generated with $case_count test cases"
491
+ print_success "Plan generated: $case_count test cases"
492
+
493
+ return 0
494
+ }
495
+
496
+ _build_uat_team_prompt() {
497
+ local prompt_file="$1"
498
+
499
+ # Start with UAT prompt template
500
+ cat "$RALPH_TEMPLATES/UAT-PROMPT.md" > "$prompt_file"
501
+
502
+ cat >> "$prompt_file" << 'PROMPT_SECTION'
503
+
504
+ ---
505
+
506
+ ## Phase: UAT Team Discovery + Plan Generation
507
+
508
+ You are the **team lead** of an acceptance testing team. Your job is to coordinate a team of
509
+ agents that explore a live app, verify features work correctly, and produce a comprehensive
510
+ UAT plan.
511
+
512
+ ### Step 1: Recon (~60 seconds)
513
+
514
+ Before spawning anyone, do a quick recon yourself:
515
+
516
+ 1. **Read `.ralph/config.json`** for URLs, auth config, and directories
517
+ 2. **Read `.ralph/prd.json`** if it exists — completed stories tell you what was built
518
+ 3. **Navigate the app** using Playwright MCP — click through nav, find pages, note the tech stack
519
+ 4. **Take 2-3 screenshots** of key pages (save to `.ralph/uat/screenshots/`)
520
+ 5. **Map the feature areas** — what exists? (auth, forms, API, navigation, etc.)
521
+
522
+ Don't go deep. Just map what's there. ~60 seconds max.
523
+
524
+ ### Step 2: Assemble the UAT Team
525
+
526
+ Create a team and spawn teammates:
527
+
528
+ ```
529
+ TeamCreate: "uat-team"
530
+ ```
531
+
532
+ Spawn these teammates using the Task tool with `team_name: "uat-team"`:
533
+
534
+ 1. **"recon"** (`subagent_type: "general-purpose"`) — Deep recon. Maps all routes/endpoints,
535
+ catalogs forms with selectors, identifies tech stack and auth. Shares intel with teammates
536
+ via SendMessage.
537
+
538
+ 2. **"happy-path-{area}"** (`subagent_type: "general-purpose"`) — One per feature area.
539
+ Completes primary user journeys, records correct behavior as ground truth assertions
540
+ (exact text, redirects, success messages).
541
+
542
+ 3. **"edge-cases"** (`subagent_type: "general-purpose"`) — Tests boundary conditions across
543
+ all areas. Empty fields, long input, required-field validation, back button after submit,
544
+ refresh mid-flow. Focus: does the app handle these gracefully?
545
+
546
+ **Only spawn agents for areas that exist.** If there are no forms, don't spawn a forms specialist.
547
+ If there's no auth, skip auth testing.
548
+
549
+ Mindset: **"Verify the app works correctly for real users."**
550
+
551
+ ### Agent Instructions Template
552
+
553
+ Every agent prompt MUST include:
554
+
555
+ 1. **Their role and focus area** (from above)
556
+ 2. **The recon intel** — pages, URLs, tech stack you discovered in Step 1
557
+ 3. **Browser tab isolation** — "Open your own browser tab via `browser_tabs(action: 'new')`
558
+ before navigating. Do NOT use the existing tab."
559
+ 4. **Communication** — "Share important discoveries with teammates via SendMessage.
560
+ Examples: 'Login redirects to /dashboard after success', 'Registration form has 4 required fields',
561
+ 'Profile page shows user email and name'. Read messages from teammates and adapt your testing."
562
+ 5. **Output format** — "When done, send your findings to the team lead via SendMessage.
563
+ Format each finding as a test case with: title, category, testFile path, targetFiles,
564
+ assertions (input/expected/strategy), and edgeCases."
565
+
566
+ ### Step 3: Coordinate
567
+
568
+ While your team works:
569
+
570
+ - **Monitor messages** from teammates as they report findings
571
+ - **Redirect effort** if needed — if recon discovers something important, message the
572
+ relevant specialist
573
+ - **Create tasks** in the shared task list for any new areas discovered
574
+
575
+ ### Step 4: Collect + Merge + Write Plan
576
+
577
+ After all teammates finish:
578
+
579
+ 1. Collect findings from all agent messages
580
+ 2. Dedup by test file path (keep the case with more assertions)
581
+ 3. Assign sequential IDs: `UAT-001`, `UAT-002`, ...
582
+ 4. Write `.ralph/uat/plan.json` (schema below)
583
+ 5. Write `.ralph/uat/UAT-PROMPT.md` (schema below)
584
+ 6. Shut down all teammates via SendMessage with `type: "shutdown_request"`
585
+ 7. Clean up with TeamDelete
586
+
587
+ ### plan.json Schema
588
+
589
+ Write `.ralph/uat/plan.json`:
590
+
591
+ ```json
592
+ {
593
+ "testSuite": {
594
+ "name": "UAT Loop",
595
+ "generatedAt": "<ISO timestamp>",
596
+ "status": "pending",
597
+ "discoveryMethod": "uat-team"
598
+ },
599
+ "testCases": [
600
+ {
601
+ "id": "UAT-001",
602
+ "title": "Feature area — what the test checks",
603
+ "category": "auth|forms|navigation|api|ui|data",
604
+ "type": "e2e|integration",
605
+ "userStory": "As a user, I...",
606
+ "testApproach": "What to test and how",
607
+ "testFile": "tests/e2e/feature/test-name.spec.ts",
608
+ "targetFiles": ["src/pages/feature.tsx"],
609
+ "edgeCases": ["Edge case 1", "Edge case 2"],
610
+ "assertions": [
611
+ {
612
+ "input": "Fill name='John', submit form",
613
+ "expected": "Shows 'Welcome, John'",
614
+ "strategy": "keyword"
615
+ }
616
+ ],
617
+ "passes": false,
618
+ "retryCount": 0,
619
+ "source": "uat-team:agent-name"
620
+ }
621
+ ]
622
+ }
623
+ ```
624
+
625
+ **Every test case MUST have at least 3 assertions** with concrete input/expected pairs:
626
+ 1. One happy-path assertion (correct input → correct output)
627
+ 2. One edge-case assertion (bad input → proper error handling)
628
+ 3. One content assertion (page shows the RIGHT data, not just that it loads)
629
+
630
+ ### UAT-PROMPT.md Schema
631
+
632
+ Write `.ralph/uat/UAT-PROMPT.md` — a project-specific testing guide based on what the
633
+ team ACTUALLY FOUND. Include:
634
+
635
+ ```markdown
636
+ # UAT Guide — [Project Name]
637
+
638
+ ## App Overview
639
+ - What the app does (1-2 sentences)
640
+ - Tech stack observed (framework, API patterns, auth method)
641
+ - Base URLs (frontend, API if applicable)
642
+
643
+ ## Pages & Routes Discovered
644
+ For each page:
645
+ - URL pattern and what it shows
646
+ - Key interactive elements (forms, buttons, links)
647
+ - Selectors that work (data-testid, roles, labels)
648
+
649
+ ## Auth Flow
650
+ - How login works (form fields, redirect after login)
651
+ - Test credentials if available (from config or .env)
652
+ - What pages require auth vs. public
653
+
654
+ ## Known Forms & Inputs
655
+ For each form:
656
+ - Fields with their labels/names/selectors
657
+ - Required vs optional fields
658
+ - Validation behavior observed
659
+
660
+ ## What "Correct" Looks Like
661
+ For each feature area:
662
+ - Expected behavior observed
663
+ - Specific text/numbers that should appear
664
+
665
+ ## Console & Network Observations
666
+ - Any existing console errors/warnings
667
+ - API endpoints observed
668
+ - Response patterns (JSON structure, status codes)
669
+ ```
670
+
671
+ This is NOT a copy of the template — it's ground truth from the team's exploration.
672
+
673
+ ### Rules
674
+
675
+ - Test auth flows FIRST (they gate everything else)
676
+ - One test case per feature area (not per edge case)
677
+ - Include edge cases as a list within each test case
678
+ - **Every test case MUST have assertions with input/expected pairs**
679
+ - `type: "e2e"` for anything involving browser interaction
680
+ - `type: "integration"` for API-only tests
681
+ - `targetFiles` should list the app source files the test covers
682
+ - `testFile` path should use the project's test directory conventions
683
+ - Aim for 5-15 test cases depending on app complexity
684
+ - Always clean up: shutdown teammates and delete team when done
685
+ PROMPT_SECTION
686
+
687
+ _inject_prompt_context "$prompt_file"
688
+ }
689
+
690
+ _validate_plan() {
691
+ # Check JSON is valid
692
+ if ! jq -e '.' "$UAT_PLAN_FILE" >/dev/null 2>&1; then
693
+ print_error "Test plan file is corrupted (not valid JSON)"
694
+ return 1
695
+ fi
696
+
697
+ # Check required structure
698
+ if ! jq -e '.testSuite and .testCases' "$UAT_PLAN_FILE" >/dev/null 2>&1; then
699
+ print_error "Test plan is incomplete — missing required sections"
700
+ return 1
701
+ fi
702
+
703
+ # Check test cases have required fields
704
+ local invalid_cases
705
+ invalid_cases=$(jq '[.testCases[] | select(.id == null or .title == null or .testFile == null)] | length' "$UAT_PLAN_FILE" 2>/dev/null)
706
+ if [[ "$invalid_cases" -gt 0 ]]; then
707
+ print_error "$invalid_cases test case(s) are incomplete — each needs an ID, title, and test file"
708
+ return 1
709
+ fi
710
+
711
+ # Check test cases have assertions (the eval contract)
712
+ local missing_assertions
713
+ missing_assertions=$(jq '[.testCases[] | select((.assertions // []) | length < 1)] | length' "$UAT_PLAN_FILE" 2>/dev/null)
714
+ if [[ "$missing_assertions" -gt 0 ]]; then
715
+ print_warning "$missing_assertions test case(s) have no expected results defined — tests may not catch real issues"
716
+ echo " Each test case should describe what to check (input and expected outcome)."
717
+ echo " Run 'npx agentic-loop $UAT_CMD_NAME --review' to edit the plan and add them."
718
+ # Warning only, not a hard failure — Claude may add assertions during execution
719
+ fi
720
+
721
+ return 0
722
+ }
723
+
724
+ # ============================================================================
725
+ # PLAN REVIEW
726
+ # ============================================================================
727
+
728
+ _review_plan() {
729
+ echo ""
730
+ echo " ┌──────────────────────────────────────────────────────┐"
731
+ printf " │ %-54s│\n" "$UAT_MODE_LABEL Test Plan"
732
+ echo " └──────────────────────────────────────────────────────┘"
733
+ echo ""
734
+
735
+ local total_cases
736
+ total_cases=$(jq '.testCases | length' "$UAT_PLAN_FILE")
737
+
738
+ # Print summary table
739
+ local idx=0
740
+ while IFS=$'\t' read -r id title category tc_type edge_count assert_count; do
741
+ idx=$((idx + 1))
742
+ local type_icon=""
743
+ case "$tc_type" in
744
+ e2e) type_icon="🌐" ;;
745
+ integration) type_icon="🔌" ;;
746
+ *) type_icon="📝" ;;
747
+ esac
748
+
749
+ # Truncate title
750
+ local display_title="$title"
751
+ [[ ${#display_title} -gt 40 ]] && display_title="${display_title:0:37}..."
752
+
753
+ printf " %s %-10s %-40s [%s edge cases, %s checks]\n" "$type_icon" "$id" "$display_title" "$edge_count" "$assert_count"
754
+ done < <(jq -r '.testCases[] | [.id, .title, .category, .type, (.edgeCases | length | tostring), ((.assertions // []) | length | tostring)] | @tsv' "$UAT_PLAN_FILE" 2>/dev/null)
755
+
756
+ echo ""
757
+ echo " Total: $total_cases test cases"
758
+ echo ""
759
+
760
+ # Prompt for review
761
+ local response
762
+ read -r -p " Execute this plan? [Y/n/e(dit)] " response
763
+
764
+ case "$response" in
765
+ [Nn])
766
+ return 1
767
+ ;;
768
+ [Ee])
769
+ local editor="${EDITOR:-vi}"
770
+ "$editor" "$UAT_PLAN_FILE"
771
+ # Re-validate after edit
772
+ if ! _validate_plan; then
773
+ print_error "Your edits made the plan invalid. Please fix and try again."
774
+ return 1
775
+ fi
776
+ # Mark as reviewed
777
+ update_json "$UAT_PLAN_FILE" \
778
+ --arg ts "$(date -Iseconds 2>/dev/null || date +%Y-%m-%dT%H:%M:%S)" \
779
+ '.testSuite.reviewedAt = $ts'
780
+ ;;
781
+ *)
782
+ # Mark as reviewed
783
+ update_json "$UAT_PLAN_FILE" \
784
+ --arg ts "$(date -Iseconds 2>/dev/null || date +%Y-%m-%dT%H:%M:%S)" \
785
+ '.testSuite.reviewedAt = $ts'
786
+ ;;
787
+ esac
788
+
789
+ return 0
790
+ }
791
+
792
+ # ============================================================================
793
+ # PHASE 2: EXECUTE LOOP
794
+ # ============================================================================
795
+
796
+ _run_uat_loop() {
797
+ local focus="$1"
798
+ local no_fix="$2"
799
+ local max_iterations_arg="$3"
800
+ local quiet="$4"
801
+
802
+ local max_iterations
803
+ max_iterations="${max_iterations_arg:-$(get_config ".$UAT_CONFIG_NS.maxIterations" "$DEFAULT_UAT_MAX_ITERATIONS")}"
804
+ local max_case_retries
805
+ max_case_retries=$(get_config ".$UAT_CONFIG_NS.maxCaseRetries" "$DEFAULT_UAT_MAX_CASE_RETRIES")
806
+ local timeout
807
+ timeout=$(get_config ".$UAT_CONFIG_NS.maxSessionSeconds" "$DEFAULT_UAT_MAX_SESSION_SECONDS")
808
+
809
+ local iteration=0
810
+
811
+ # Track results for report
812
+ UAT_TESTS_WRITTEN=0
813
+ UAT_BUGS_FOUND=0
814
+ UAT_BUGS_FIXED=0
815
+ UAT_CASES_PASSED=0
816
+ UAT_CASES_FAILED=0
817
+ UAT_CASES_SKIPPED=0
818
+ UAT_RED_ONLY_PASSED=0
819
+ UAT_GREEN_ATTEMPTS=0
820
+ UAT_FILES_FIXED=()
821
+ UAT_NEEDS_HUMAN=()
822
+
823
+ while [[ $iteration -lt $max_iterations ]]; do
824
+ # Check for stop signal
825
+ if [[ -f "$RALPH_DIR/.stop" ]]; then
826
+ rm -f "$RALPH_DIR/.stop"
827
+ print_warning "Stop requested. Finishing up..."
828
+ break
829
+ fi
830
+
831
+ iteration=$((iteration + 1))
832
+
833
+ # Pick next incomplete test case (with optional focus filter)
834
+ local case_id
835
+ if [[ -n "$focus" ]]; then
836
+ # Focus can be a case ID (UAT-003) or category (auth)
837
+ case_id=$(jq -r --arg f "$focus" '
838
+ .testCases[] |
839
+ select(.passes==false) |
840
+ select(.id==$f or .category==$f) |
841
+ .id
842
+ ' "$UAT_PLAN_FILE" | head -1)
843
+ else
844
+ case_id=$(jq -r '.testCases[] | select(.passes==false) | .id' "$UAT_PLAN_FILE" | head -1)
845
+ fi
846
+
847
+ # All done?
848
+ if [[ -z "$case_id" ]]; then
849
+ break
850
+ fi
851
+
852
+ # Get case details
853
+ local case_json case_title case_type
854
+ case_json=$(jq --arg id "$case_id" '.testCases[] | select(.id==$id)' "$UAT_PLAN_FILE")
855
+ case_title=$(echo "$case_json" | jq -r '.title')
856
+ case_type=$(echo "$case_json" | jq -r '.type // "e2e"')
857
+
858
+ # Read TDD phase state (null = start RED, "red" = resume GREEN)
859
+ local phase
860
+ phase=$(echo "$case_json" | jq -r '.phase // "null"')
861
+
862
+ # Compute per-phase retry counts (default 0 for old plan.json files)
863
+ local red_retries green_retries
864
+ red_retries=$(echo "$case_json" | jq -r '.redRetries // 0')
865
+ green_retries=$(echo "$case_json" | jq -r '.greenRetries // 0')
866
+
867
+ # Circuit breaker: combined red + green retries
868
+ local total_retries=$((red_retries + green_retries))
869
+ if [[ $total_retries -ge $max_case_retries ]]; then
870
+ print_warning "$case_id tried $max_case_retries times without success — skipping (needs manual review)"
871
+ _flag_for_human "$case_id" "Tried $max_case_retries times without success"
872
+ UAT_CASES_SKIPPED=$((UAT_CASES_SKIPPED + 1))
873
+ update_json "$UAT_PLAN_FILE" \
874
+ --arg id "$case_id" '(.testCases[] | select(.id==$id)) |= . + {passes: true, skipped: true}'
875
+ continue
876
+ fi
877
+
878
+ # Determine current phase
879
+ local current_phase="$UAT_PHASE_RED"
880
+ if [[ "$phase" == "red" ]]; then
881
+ current_phase="$UAT_PHASE_GREEN"
882
+ fi
883
+
884
+ # Display case banner with phase
885
+ local display_title="$case_title"
886
+ [[ ${#display_title} -gt 50 ]] && display_title="${display_title:0:47}..."
887
+
888
+ echo ""
889
+ echo "┌──────────────────────────────────────────────────────────┐"
890
+ printf "│ %-10s %-45s│\n" "$case_id" "$display_title"
891
+ local phase_label="Writing test"
892
+ [[ "$current_phase" == "$UAT_PHASE_GREEN" ]] && phase_label="Fixing app"
893
+ printf "│ %-14s Type: %-6s Attempt: %-3s │\n" "$phase_label" "$case_type" "$((total_retries + 1))"
894
+ echo "└──────────────────────────────────────────────────────────┘"
895
+ echo ""
896
+
897
+ # Git snapshot for rollback
898
+ _git_snapshot "$case_id"
899
+
900
+ local test_file
901
+ test_file=$(jq -r --arg id "$case_id" '.testCases[] | select(.id==$id) | .testFile' "$UAT_PLAN_FILE")
902
+
903
+ if [[ "$current_phase" == "$UAT_PHASE_RED" ]]; then
904
+ _run_red_phase "$case_id" "$case_type" "$test_file" "$no_fix" "$timeout" "$quiet"
905
+ else
906
+ _run_green_phase "$case_id" "$case_type" "$test_file" "$timeout" "$quiet"
907
+ fi
908
+
909
+ # Brief pause between iterations
910
+ sleep 1
911
+ done
912
+
913
+ # Update suite status
914
+ local all_passed
915
+ all_passed=$(jq '[.testCases[] | select(.passes==false)] | length' "$UAT_PLAN_FILE" 2>/dev/null)
916
+ if [[ "$all_passed" -eq 0 ]]; then
917
+ update_json "$UAT_PLAN_FILE" '.testSuite.status = "complete"'
918
+ else
919
+ update_json "$UAT_PLAN_FILE" '.testSuite.status = "partial"'
920
+ fi
921
+
922
+ [[ "$all_passed" -eq 0 ]] && return 0
923
+ return 1
924
+ }
925
+
926
+ # ============================================================================
927
+ # TDD PHASES: RED (test-only) and GREEN (fix-only)
928
+ # ============================================================================
929
+
930
+ _run_red_phase() {
931
+ local case_id="$1"
932
+ local case_type="$2"
933
+ local test_file="$3"
934
+ local no_fix="$4"
935
+ local timeout="$5"
936
+ local quiet="$6"
937
+
938
+ local prompt_file output_file
939
+ prompt_file=$(create_temp_file ".uat-red-prompt.md")
940
+ output_file=$(create_temp_file ".uat-red-output.log")
941
+
942
+ _build_red_prompt "$case_id" "$prompt_file"
943
+
944
+ _log_uat "$case_id" "RED: Starting test-only session"
945
+
946
+ local claude_exit=0
947
+ (
948
+ set -o pipefail
949
+ cat "$prompt_file" | run_with_timeout "$timeout" claude -p \
950
+ --dangerously-skip-permissions \
951
+ --verbose \
952
+ --output-format stream-json \
953
+ 2>&1 | tee "$output_file" | _parse_uat_activity "$quiet"
954
+ ) &
955
+ local pipeline_pid=$!
956
+ wait "$pipeline_pid" || claude_exit=$?
957
+
958
+ rm -f "$prompt_file"
959
+
960
+ if [[ $claude_exit -ne 0 ]] && [[ $claude_exit -ne 124 ]]; then
961
+ print_warning "Test-writing session ended unexpectedly — will retry"
962
+ _log_uat "$case_id" "RED: Session failed (exit $claude_exit)"
963
+ _increment_red_retry "$case_id"
964
+ rm -f "$output_file"
965
+ return
966
+ fi
967
+
968
+ # Check if test file was created
969
+ if [[ ! -f "$test_file" ]]; then
970
+ print_warning "$case_id: Test file was not created — will retry"
971
+ _log_uat "$case_id" "RED: Test file not created"
972
+ _increment_red_retry "$case_id"
973
+ rm -f "$output_file"
974
+ return
975
+ fi
976
+
977
+ # Enforce RED constraint: no app changes allowed
978
+ if _has_app_changes "$test_file"; then
979
+ print_warning "$case_id: App code was changed during test-writing (not allowed) — undoing changes"
980
+ _log_uat "$case_id" "RED: App changes detected — rollback"
981
+ _rollback_to_snapshot "$case_id"
982
+ _save_red_violation_feedback "$case_id"
983
+ _increment_red_retry "$case_id"
984
+ rm -f "$output_file"
985
+ return
986
+ fi
987
+
988
+ UAT_TESTS_WRITTEN=$((UAT_TESTS_WRITTEN + 1))
989
+
990
+ # Validate test quality — reject shallow tests
991
+ if ! _validate_test_quality "$test_file" "$case_id"; then
992
+ print_warning "$case_id: Test doesn't check enough — will retry with better guidance"
993
+ _save_shallow_test_feedback "$case_id" "$test_file"
994
+ _increment_red_retry "$case_id"
995
+ rm -f "$output_file"
996
+ return
997
+ fi
998
+
999
+ # Run the test
1000
+ if _run_test "$test_file" "$case_type"; then
1001
+ # PASS in RED — app already correct, no fix needed
1002
+ print_success "$case_id: Test passes — app already works correctly"
1003
+ _mark_passed "$case_id"
1004
+ _commit_result "$case_id" "$test_file"
1005
+ UAT_CASES_PASSED=$((UAT_CASES_PASSED + 1))
1006
+ UAT_RED_ONLY_PASSED=$((UAT_RED_ONLY_PASSED + 1))
1007
+ _log_uat "$case_id" "RED: PASSED (app already correct)"
1008
+ else
1009
+ # FAIL — classify: test bug or app bug?
1010
+ local failure_type
1011
+ failure_type=$(_classify_red_failure "$test_file" "$case_id")
1012
+
1013
+ if [[ "$failure_type" == "test_bug" ]]; then
1014
+ print_warning "$case_id: Test has errors — will retry"
1015
+ _save_failure_context "$case_id" "$output_file"
1016
+ _increment_red_retry "$case_id"
1017
+ else
1018
+ # App bug found — commit the RED test, transition to GREEN
1019
+ print_info "$case_id: Found an app bug — now fixing it"
1020
+ UAT_BUGS_FOUND=$((UAT_BUGS_FOUND + 1))
1021
+
1022
+ if [[ "$no_fix" == "true" ]]; then
1023
+ # --no-fix mode: commit failing test as documented bug
1024
+ print_info "$case_id: Saving test as a documented bug (fix skipped with --no-fix)"
1025
+ _commit_red_test "$case_id" "$test_file"
1026
+ _mark_passed "$case_id"
1027
+ UAT_CASES_PASSED=$((UAT_CASES_PASSED + 1))
1028
+ _log_uat "$case_id" "RED: Documented bug (--no-fix mode)"
1029
+ else
1030
+ # Commit the RED test and transition to GREEN
1031
+ _commit_red_test "$case_id" "$test_file"
1032
+ _mark_phase "$case_id" "red"
1033
+ _save_failure_context "$case_id" "$output_file"
1034
+ _log_uat "$case_id" "RED: App bug found — transitioning to GREEN"
1035
+ fi
1036
+ fi
1037
+ fi
1038
+
1039
+ rm -f "$output_file"
1040
+ }
1041
+
1042
+ _run_green_phase() {
1043
+ local case_id="$1"
1044
+ local case_type="$2"
1045
+ local test_file="$3"
1046
+ local timeout="$4"
1047
+ local quiet="$5"
1048
+
1049
+ UAT_GREEN_ATTEMPTS=$((UAT_GREEN_ATTEMPTS + 1))
1050
+
1051
+ local prompt_file output_file
1052
+ prompt_file=$(create_temp_file ".uat-green-prompt.md")
1053
+ output_file=$(create_temp_file ".uat-green-output.log")
1054
+
1055
+ _build_green_prompt "$case_id" "$test_file" "$prompt_file"
1056
+
1057
+ _log_uat "$case_id" "GREEN: Starting fix-only session"
1058
+
1059
+ local claude_exit=0
1060
+ (
1061
+ set -o pipefail
1062
+ cat "$prompt_file" | run_with_timeout "$timeout" claude -p \
1063
+ --dangerously-skip-permissions \
1064
+ --verbose \
1065
+ --output-format stream-json \
1066
+ 2>&1 | tee "$output_file" | _parse_uat_activity "$quiet"
1067
+ ) &
1068
+ local pipeline_pid=$!
1069
+ wait "$pipeline_pid" || claude_exit=$?
1070
+
1071
+ rm -f "$prompt_file"
1072
+
1073
+ if [[ $claude_exit -ne 0 ]] && [[ $claude_exit -ne 124 ]]; then
1074
+ print_warning "Fix session ended unexpectedly — will retry"
1075
+ _log_uat "$case_id" "GREEN: Session failed (exit $claude_exit)"
1076
+ _increment_green_retry "$case_id"
1077
+ rm -f "$output_file"
1078
+ return
1079
+ fi
1080
+
1081
+ # Enforce GREEN constraint: no test file modifications
1082
+ if _test_file_modified "$test_file"; then
1083
+ print_warning "$case_id: Test file was changed during fix (not allowed) — restoring original"
1084
+ _restore_test_file "$test_file" "$case_id"
1085
+ _log_uat "$case_id" "GREEN: Test file restored after modification"
1086
+ fi
1087
+
1088
+ # Run the test
1089
+ if _run_test "$test_file" "$case_type"; then
1090
+ # PASS — check for regressions before committing
1091
+ if _check_regressions; then
1092
+ print_success "$case_id: Fixed! Test passes and nothing else broke"
1093
+ _mark_passed "$case_id"
1094
+ _track_fixed_files "$case_id"
1095
+ _auto_sign_from_case "$case_id"
1096
+ UAT_BUGS_FIXED=$((UAT_BUGS_FIXED + 1))
1097
+ _commit_result "$case_id" "$test_file"
1098
+ UAT_CASES_PASSED=$((UAT_CASES_PASSED + 1))
1099
+ _log_uat "$case_id" "GREEN: PASSED"
1100
+ else
1101
+ # Regression detected — rollback
1102
+ print_error "$case_id: Fix broke other tests — undoing the change"
1103
+ _rollback_to_snapshot "$case_id"
1104
+ _flag_for_human "$case_id" "Fix broke other tests"
1105
+ _increment_green_retry "$case_id"
1106
+ _log_uat "$case_id" "GREEN: ROLLBACK — fix caused regression"
1107
+ fi
1108
+ else
1109
+ # FAIL — retry GREEN
1110
+ print_warning "$case_id: Fix didn't work — test still fails, will retry"
1111
+ _save_failure_context "$case_id" "$output_file"
1112
+ _increment_green_retry "$case_id"
1113
+ fi
1114
+
1115
+ rm -f "$output_file"
1116
+ }
1117
+
1118
+ # ============================================================================
1119
+ # TEST EXECUTION
1120
+ # ============================================================================
1121
+
1122
+ _run_test() {
1123
+ local test_file="$1"
1124
+ local test_type="$2"
1125
+ local log_file
1126
+ log_file=$(create_temp_file ".uat-test.log")
1127
+
1128
+ local test_cmd=""
1129
+
1130
+ if [[ "$test_type" == "e2e" ]]; then
1131
+ # Playwright
1132
+ if [[ -f "playwright.config.ts" ]] || [[ -f "playwright.config.js" ]]; then
1133
+ test_cmd="npx playwright test $test_file"
1134
+ else
1135
+ test_cmd="npx playwright test $test_file --config=playwright.config.ts"
1136
+ fi
1137
+ else
1138
+ # Integration — detect test runner
1139
+ if [[ -f "vitest.config.ts" ]] || [[ -f "vitest.config.js" ]] || [[ -f "vite.config.ts" ]]; then
1140
+ test_cmd="npx vitest run $test_file"
1141
+ elif [[ -f "jest.config.ts" ]] || [[ -f "jest.config.js" ]] || grep -q '"jest"' package.json 2>/dev/null; then
1142
+ test_cmd="npx jest $test_file"
1143
+ elif [[ -f "pytest.ini" ]] || [[ -f "pyproject.toml" ]]; then
1144
+ local py_runner
1145
+ py_runner=$(detect_python_runner ".")
1146
+ test_cmd="${py_runner}${py_runner:+ }pytest $test_file -v"
1147
+ else
1148
+ test_cmd="npx vitest run $test_file"
1149
+ fi
1150
+ fi
1151
+
1152
+ echo " Running: $test_cmd"
1153
+
1154
+ if safe_exec "$test_cmd" "$log_file"; then
1155
+ rm -f "$log_file"
1156
+ return 0
1157
+ else
1158
+ echo ""
1159
+ echo " Test output (last 30 lines):"
1160
+ tail -30 "$log_file" | sed 's/^/ /'
1161
+ cp "$log_file" "$UAT_MODE_DIR/last_test_output.log"
1162
+ rm -f "$log_file"
1163
+ return 1
1164
+ fi
1165
+ }
1166
+
1167
+ # ============================================================================
1168
+ # TEST QUALITY VALIDATION
1169
+ # ============================================================================
1170
+
1171
+ # Reject tests that only check structure (page loads) without verifying content.
1172
+ # A test that asserts "page has URL /dashboard" proves nothing about correctness.
1173
+ # A test that asserts "page shows 'Welcome, John'" proves the right data rendered.
1174
+ _validate_test_quality() {
1175
+ local test_file="$1"
1176
+ local case_id="$2"
1177
+
1178
+ # Count total assertion calls
1179
+ local assertion_count
1180
+ assertion_count=$(grep -cE 'expect\(|assert\(|\.should\(' "$test_file" 2>/dev/null || true)
1181
+
1182
+ if [[ "$assertion_count" -lt 2 ]]; then
1183
+ _log_uat "$case_id" "SHALLOW: only $assertion_count assertion(s)"
1184
+ return 1
1185
+ fi
1186
+
1187
+ # Count content assertions — these verify the RIGHT data, not just structure
1188
+ # Includes: toContain, toHaveText, toBe, toEqual, toMatch, textContent, innerText
1189
+ local content_assertions
1190
+ content_assertions=$(grep -cE 'toContain\(|toHaveText\(|toBe\(|toEqual\(|toMatch\(|textContent|innerText|toHaveValue\(' "$test_file" 2>/dev/null || true)
1191
+
1192
+ if [[ "$content_assertions" -eq 0 ]]; then
1193
+ _log_uat "$case_id" "SHALLOW: no content assertions (only structural checks)"
1194
+ return 1
1195
+ fi
1196
+
1197
+ # Check for input→output test pattern: test fills data and checks the result
1198
+ # Look for fill/type followed by expect — proves the test verifies a response to input
1199
+ local has_input_output=false
1200
+ if grep -qE 'fill\(|type\(|press\(|click\(' "$test_file" 2>/dev/null; then
1201
+ if grep -qE 'toContain\(|toHaveText\(|toBe\(|toEqual\(|toMatch\(' "$test_file" 2>/dev/null; then
1202
+ has_input_output=true
1203
+ fi
1204
+ fi
1205
+
1206
+ # For e2e tests, require at least one input→output pattern
1207
+ if [[ "$has_input_output" == "false" ]]; then
1208
+ # Check if it's an API/integration test (no browser interaction expected)
1209
+ if grep -qE 'page\.|browser\.|playwright' "$test_file" 2>/dev/null; then
1210
+ _log_uat "$case_id" "SHALLOW: e2e test has no input→output assertions"
1211
+ return 1
1212
+ fi
1213
+ fi
1214
+
1215
+ _log_uat "$case_id" "Quality OK: $assertion_count assertions ($content_assertions content)"
1216
+ return 0
1217
+ }
1218
+
1219
+ # Save feedback about shallow tests so Claude gets specific guidance on retry
1220
+ _save_shallow_test_feedback() {
1221
+ local case_id="$1"
1222
+ local test_file="$2"
1223
+
1224
+ local assertion_count content_assertions
1225
+ assertion_count=$(grep -cE 'expect\(|assert\(|\.should\(' "$test_file" 2>/dev/null || true)
1226
+ content_assertions=$(grep -cE 'toContain\(|toHaveText\(|toBe\(|toEqual\(|toMatch\(|textContent|innerText|toHaveValue\(' "$test_file" 2>/dev/null || true)
1227
+
1228
+ {
1229
+ echo ""
1230
+ echo "=== Test quality check failed for $case_id ==="
1231
+ echo ""
1232
+ echo "Your test is too shallow. It checks structure but not correctness."
1233
+ echo ""
1234
+ echo "Stats: $assertion_count total assertions, $content_assertions content assertions"
1235
+ echo ""
1236
+ echo "What's wrong:"
1237
+ if [[ "$assertion_count" -lt 2 ]]; then
1238
+ echo " - Only $assertion_count assertion(s). Every test needs at least 2."
1239
+ fi
1240
+ if [[ "$content_assertions" -eq 0 ]]; then
1241
+ echo " - ZERO content assertions. You're only checking that pages load,"
1242
+ echo " not that they show the RIGHT content."
1243
+ echo ""
1244
+ echo " Bad: await expect(page).toHaveURL('/dashboard');"
1245
+ echo " Good: await expect(page.getByText('Welcome, John')).toBeVisible();"
1246
+ echo ""
1247
+ echo " Bad: await expect(form).toBeVisible();"
1248
+ echo " Good: await expect(page.getByText('Email is required')).toBeVisible();"
1249
+ fi
1250
+ echo ""
1251
+ echo "Fix: Read the assertions in .ralph/$UAT_CONFIG_NS/plan.json for this test case."
1252
+ echo "Each assertion has an 'input' and 'expected' — encode THOSE as expect() calls."
1253
+ echo "---"
1254
+ } >> "$UAT_FAILURE_FILE"
1255
+ }
1256
+
1257
+ # ============================================================================
1258
+ # FAILURE HANDLING
1259
+ # ============================================================================
1260
+
1261
+ _save_failure_context() {
1262
+ local case_id="$1"
1263
+ local output_file="$2"
1264
+
1265
+ local retry_count
1266
+ retry_count=$(jq -r --arg id "$case_id" '.testCases[] | select(.id==$id) | .retryCount // 0' "$UAT_PLAN_FILE")
1267
+
1268
+ {
1269
+ echo ""
1270
+ echo "=== Attempt $((retry_count + 1)) failed for $case_id ==="
1271
+ echo ""
1272
+ if [[ -f "$UAT_MODE_DIR/last_test_output.log" ]]; then
1273
+ echo "--- Test Output ---"
1274
+ tail -50 "$UAT_MODE_DIR/last_test_output.log"
1275
+ echo ""
1276
+ fi
1277
+ echo "---"
1278
+ } >> "$UAT_FAILURE_FILE"
1279
+
1280
+ # Cap at 200 lines
1281
+ if [[ -f "$UAT_FAILURE_FILE" ]]; then
1282
+ local line_count
1283
+ line_count=$(wc -l < "$UAT_FAILURE_FILE" | tr -d ' ')
1284
+ if [[ $line_count -gt 200 ]]; then
1285
+ tail -200 "$UAT_FAILURE_FILE" > "$UAT_FAILURE_FILE.tmp" && mv "$UAT_FAILURE_FILE.tmp" "$UAT_FAILURE_FILE"
1286
+ fi
1287
+ fi
1288
+ }
1289
+
1290
+ _increment_red_retry() {
1291
+ local case_id="$1"
1292
+ update_json "$UAT_PLAN_FILE" \
1293
+ --arg id "$case_id" \
1294
+ '(.testCases[] | select(.id==$id)) |= . + {
1295
+ redRetries: ((.redRetries // 0) + 1),
1296
+ retryCount: ((.redRetries // 0) + 1 + (.greenRetries // 0))
1297
+ }'
1298
+ }
1299
+
1300
+ _increment_green_retry() {
1301
+ local case_id="$1"
1302
+ update_json "$UAT_PLAN_FILE" \
1303
+ --arg id "$case_id" \
1304
+ '(.testCases[] | select(.id==$id)) |= . + {
1305
+ greenRetries: ((.greenRetries // 0) + 1),
1306
+ retryCount: ((.redRetries // 0) + (.greenRetries // 0) + 1)
1307
+ }'
1308
+ }
1309
+
1310
+ _mark_phase() {
1311
+ local case_id="$1"
1312
+ local phase="$2" # "red" or null
1313
+ if [[ "$phase" == "null" ]]; then
1314
+ update_json "$UAT_PLAN_FILE" \
1315
+ --arg id "$case_id" \
1316
+ '(.testCases[] | select(.id==$id)) |= . + {phase: null}'
1317
+ else
1318
+ update_json "$UAT_PLAN_FILE" \
1319
+ --arg id "$case_id" \
1320
+ --arg phase "$phase" \
1321
+ '(.testCases[] | select(.id==$id)) |= . + {phase: $phase}'
1322
+ fi
1323
+ }
1324
+
1325
+ _mark_passed() {
1326
+ local case_id="$1"
1327
+ update_json "$UAT_PLAN_FILE" \
1328
+ --arg id "$case_id" \
1329
+ '(.testCases[] | select(.id==$id)) |= . + {passes: true, retryCount: 0, phase: null, redRetries: 0, greenRetries: 0}'
1330
+ # Clear failure context for this case
1331
+ rm -f "$UAT_FAILURE_FILE"
1332
+ }
1333
+
1334
+ _commit_red_test() {
1335
+ local case_id="$1"
1336
+ local test_file="$2"
1337
+
1338
+ if ! command -v git &>/dev/null || [[ ! -d ".git" ]]; then
1339
+ return 0
1340
+ fi
1341
+
1342
+ git add "$test_file" 2>/dev/null || true
1343
+
1344
+ if git diff --cached --quiet 2>/dev/null; then
1345
+ return 0
1346
+ fi
1347
+
1348
+ local commit_log
1349
+ commit_log=$(mktemp)
1350
+ local success=false
1351
+
1352
+ for attempt in 1 2 3; do
1353
+ if git commit -m "test($case_id): TDD red -- failing test identifies bug" > "$commit_log" 2>&1; then
1354
+ success=true
1355
+ break
1356
+ fi
1357
+ if grep -q "files were modified by this hook" "$commit_log" 2>/dev/null; then
1358
+ git add "$test_file"
1359
+ continue
1360
+ fi
1361
+ break
1362
+ done
1363
+
1364
+ if [[ "$success" != "true" ]]; then
1365
+ git add "$test_file"
1366
+ git commit -m "test($case_id): TDD red -- failing test identifies bug" --no-verify > "$commit_log" 2>&1 || true
1367
+ fi
1368
+
1369
+ rm -f "$commit_log"
1370
+ }
1371
+
1372
+ _classify_red_failure() {
1373
+ local test_file="$1"
1374
+ local case_id="$2"
1375
+
1376
+ # Check last test output for test-bug patterns (syntax/import errors)
1377
+ local test_output="$UAT_MODE_DIR/last_test_output.log"
1378
+ if [[ -f "$test_output" ]]; then
1379
+ # Syntax errors, import failures, module not found = test bug
1380
+ if grep -qiE 'SyntaxError|Cannot find module|ModuleNotFoundError|ImportError|TypeError:.*is not a function|ReferenceError:.*is not defined|unexpected token' "$test_output" 2>/dev/null; then
1381
+ _log_uat "$case_id" "RED classify: test_bug (syntax/import error)"
1382
+ echo "test_bug"
1383
+ return
1384
+ fi
1385
+ fi
1386
+
1387
+ # Assertion failures, timeout waiting for element = app bug (test is correct, app is wrong)
1388
+ _log_uat "$case_id" "RED classify: app_bug (assertion failure)"
1389
+ echo "app_bug"
1390
+ }
1391
+
1392
+ _test_file_modified() {
1393
+ local test_file="$1"
1394
+ if command -v git &>/dev/null && [[ -d ".git" ]]; then
1395
+ # Check if test file has uncommitted changes (modified since last commit)
1396
+ ! git diff --quiet HEAD -- "$test_file" 2>/dev/null
1397
+ else
1398
+ return 1
1399
+ fi
1400
+ }
1401
+
1402
+ _restore_test_file() {
1403
+ local test_file="$1"
1404
+ local case_id="${2:-GREEN}"
1405
+ if command -v git &>/dev/null && [[ -d ".git" ]]; then
1406
+ git checkout HEAD -- "$test_file" 2>/dev/null || true
1407
+ _log_uat "$case_id" "GREEN: Restored test file: $test_file"
1408
+ fi
1409
+ }
1410
+
1411
+ _save_red_violation_feedback() {
1412
+ local case_id="$1"
1413
+ {
1414
+ echo ""
1415
+ echo "=== RED PHASE VIOLATION for $case_id ==="
1416
+ echo ""
1417
+ echo "You modified application source files during the RED phase."
1418
+ echo "In the RED phase, you must ONLY write the test file."
1419
+ echo ""
1420
+ echo "DO NOT modify any files in src/, api/, app/, lib/, or similar directories."
1421
+ echo "Write ONLY the test file specified in plan.json."
1422
+ echo ""
1423
+ echo "If the app has a bug, let the test FAIL. A separate GREEN session will fix the app."
1424
+ echo "---"
1425
+ } >> "$UAT_FAILURE_FILE"
1426
+ }
1427
+
1428
+ _flag_for_human() {
1429
+ local case_id="$1"
1430
+ local reason="$2"
1431
+ UAT_NEEDS_HUMAN+=("$case_id: $reason")
1432
+ _log_uat "$case_id" "NEEDS_HUMAN: $reason"
1433
+ }
1434
+
1435
+ # ============================================================================
1436
+ # GIT OPERATIONS
1437
+ # ============================================================================
1438
+
1439
+ _git_snapshot() {
1440
+ local case_id="$1"
1441
+ if command -v git &>/dev/null && [[ -d ".git" ]]; then
1442
+ # Commit any pending changes so the tag captures a clean state
1443
+ # (tags point at commits, not the working tree)
1444
+ if ! git diff --quiet HEAD 2>/dev/null || ! git diff --cached --quiet 2>/dev/null; then
1445
+ git add -A 2>/dev/null || true
1446
+ git commit -m "$UAT_CONFIG_NS: snapshot before $case_id" --no-verify 2>/dev/null || true
1447
+ fi
1448
+ git tag -f "uat-snapshot-${case_id}" 2>/dev/null || true
1449
+ fi
1450
+ }
1451
+
1452
+ _rollback_to_snapshot() {
1453
+ local case_id="$1"
1454
+ if command -v git &>/dev/null && [[ -d ".git" ]]; then
1455
+ local tag="uat-snapshot-${case_id}"
1456
+ if git rev-parse "$tag" >/dev/null 2>&1; then
1457
+ # Reset to the snapshot commit — undoes both staged and committed changes since
1458
+ git reset --hard "$tag" 2>/dev/null || true
1459
+ print_info "Reverted changes for $case_id"
1460
+ fi
1461
+ fi
1462
+ }
1463
+
1464
+ _has_app_changes() {
1465
+ local test_file="$1"
1466
+ if command -v git &>/dev/null && [[ -d ".git" ]]; then
1467
+ # Check if any files OTHER than the test file were modified
1468
+ local changed_files
1469
+ changed_files=$(git diff --name-only HEAD 2>/dev/null | grep -Fxv "$test_file" | grep -v '\.ralph/' || true)
1470
+ [[ -n "$changed_files" ]]
1471
+ else
1472
+ return 1
1473
+ fi
1474
+ }
1475
+
1476
+ _check_regressions() {
1477
+ echo " Making sure other tests still pass..."
1478
+
1479
+ # Run existing unit tests
1480
+ local test_cmd
1481
+ test_cmd=$(get_config '.checks.testCommand' "")
1482
+
1483
+ if [[ -z "$test_cmd" ]]; then
1484
+ # Auto-detect
1485
+ if [[ -f "package.json" ]] && grep -q '"test"' package.json; then
1486
+ test_cmd="npm test"
1487
+ elif [[ -f "pytest.ini" ]] || [[ -f "pyproject.toml" ]]; then
1488
+ local py_runner
1489
+ py_runner=$(detect_python_runner ".")
1490
+ test_cmd="${py_runner}${py_runner:+ }pytest"
1491
+ elif [[ -f "Cargo.toml" ]]; then
1492
+ test_cmd="cargo test"
1493
+ elif [[ -f "go.mod" ]]; then
1494
+ test_cmd="go test ./..."
1495
+ else
1496
+ # No test command — can't check regressions, assume ok
1497
+ return 0
1498
+ fi
1499
+ fi
1500
+
1501
+ local log_file
1502
+ log_file=$(create_temp_file ".uat-regression.log")
1503
+
1504
+ if safe_exec "$test_cmd" "$log_file"; then
1505
+ print_success " All other tests still pass"
1506
+ rm -f "$log_file"
1507
+ return 0
1508
+ else
1509
+ print_error " Some other tests broke!"
1510
+ echo " Output (last 20 lines):"
1511
+ tail -20 "$log_file" | sed 's/^/ /'
1512
+ rm -f "$log_file"
1513
+ return 1
1514
+ fi
1515
+ }
1516
+
1517
+ _commit_result() {
1518
+ local case_id="$1"
1519
+ local test_file="$2"
1520
+
1521
+ if ! command -v git &>/dev/null || [[ ! -d ".git" ]]; then
1522
+ return 0
1523
+ fi
1524
+
1525
+ # Stage the test file and any app fixes
1526
+ git add "$test_file" 2>/dev/null || true
1527
+ git add -A 2>/dev/null || true
1528
+
1529
+ # Check if there's anything to commit
1530
+ if git diff --cached --quiet 2>/dev/null; then
1531
+ return 0
1532
+ fi
1533
+
1534
+ local commit_msg
1535
+ if _has_app_changes "$test_file"; then
1536
+ commit_msg="test+fix($case_id): TDD green -- test + app fix"
1537
+ else
1538
+ commit_msg="test($case_id): $UAT_CONFIG_NS test"
1539
+ fi
1540
+
1541
+ # Try commit with retries for auto-fix hooks
1542
+ local commit_log
1543
+ commit_log=$(mktemp)
1544
+ local success=false
1545
+
1546
+ for attempt in 1 2 3; do
1547
+ if git commit -m "$commit_msg" > "$commit_log" 2>&1; then
1548
+ success=true
1549
+ break
1550
+ fi
1551
+ if grep -q "files were modified by this hook" "$commit_log" 2>/dev/null; then
1552
+ git add -A
1553
+ continue
1554
+ fi
1555
+ break
1556
+ done
1557
+
1558
+ if [[ "$success" != "true" ]]; then
1559
+ # Try with --no-verify as last resort
1560
+ git add -A
1561
+ git commit -m "$commit_msg" --no-verify > "$commit_log" 2>&1 || true
1562
+ fi
1563
+
1564
+ rm -f "$commit_log"
1565
+
1566
+ # Clean up snapshot tag
1567
+ git tag -d "uat-snapshot-${case_id}" 2>/dev/null || true
1568
+ }
1569
+
1570
+ _track_fixed_files() {
1571
+ local case_id="$1"
1572
+ if command -v git &>/dev/null && [[ -d ".git" ]]; then
1573
+ local fixed
1574
+ fixed=$(git diff --name-only HEAD~1 2>/dev/null | grep -v 'test' | grep -v '\.ralph/' || true)
1575
+ while IFS= read -r f; do
1576
+ [[ -n "$f" ]] && UAT_FILES_FIXED+=("$f ($case_id)")
1577
+ done <<< "$fixed"
1578
+ fi
1579
+ }
1580
+
1581
+ # ============================================================================
1582
+ # PROMPT BUILDING
1583
+ # ============================================================================
1584
+
1585
+ _build_red_prompt() {
1586
+ local case_id="$1"
1587
+ local prompt_file="$2"
1588
+
1589
+ # Prefer project-specific UAT prompt (generated during discovery),
1590
+ # fall back to the universal template
1591
+ local uat_prompt="$RALPH_TEMPLATES/UAT-PROMPT.md"
1592
+ if [[ -f "$UAT_MODE_DIR/UAT-PROMPT.md" ]]; then
1593
+ uat_prompt="$UAT_MODE_DIR/UAT-PROMPT.md"
1594
+ fi
1595
+ cat "$uat_prompt" > "$prompt_file"
1596
+
1597
+ cat >> "$prompt_file" << PROMPT_SECTION
1598
+
1599
+ ---
1600
+
1601
+ ## Phase: RED — Write Test Only
1602
+
1603
+ You are in the **RED phase** of TDD. Your ONLY job is to write the test.
1604
+
1605
+ **CRITICAL: DO NOT modify any application source files. Test files ONLY.**
1606
+
1607
+ Your tasks:
1608
+
1609
+ 1. **Read the test case** from \`.ralph/$UAT_CONFIG_NS/plan.json\` (case ID: $case_id)
1610
+ 2. **Explore the feature** using Playwright MCP — navigate to the relevant pages, interact with the UI
1611
+ 3. **Write the test file** at the path specified in the test case
1612
+ 4. **Encode every assertion** from the test case as an actual expect() call
1613
+ 5. **Include edge cases** listed in the test case
1614
+
1615
+ ### Rules
1616
+
1617
+ - DO NOT modify any application source files (src/, api/, app/, etc.)
1618
+ - Write the test to verify CORRECT behavior based on the plan's assertions
1619
+ - If the app has a bug, the test WILL fail — that is the expected and correct outcome
1620
+ - Ralph will detect and reject any app code changes in this phase
1621
+
1622
+ ### Assertions are mandatory
1623
+
1624
+ The test case in plan.json has an \`assertions\` array. Each assertion has:
1625
+ - \`input\`: what to do (fill form, click button, navigate to URL)
1626
+ - \`expected\`: what should happen (text appears, redirect occurs, error shown)
1627
+ - \`strategy\`: how to verify (keyword, structural, navigation, security, llm-judge)
1628
+
1629
+ **Every assertion MUST become an expect() call in your test.** This is how we verify
1630
+ correctness, not just that the page loads. Ralph will reject tests that only check
1631
+ structure without verifying content.
1632
+
1633
+ Example — assertion in plan.json:
1634
+ \`\`\`json
1635
+ {"input": "Fill name='John', submit", "expected": "Shows 'Welcome, John'", "strategy": "keyword"}
1636
+ \`\`\`
1637
+
1638
+ Becomes in the test:
1639
+ \`\`\`typescript
1640
+ await page.getByLabel('Name').fill('John');
1641
+ await page.getByRole('button', { name: 'Submit' }).click();
1642
+ await expect(page.getByText('Welcome, John')).toBeVisible();
1643
+ \`\`\`
1644
+ PROMPT_SECTION
1645
+
1646
+ # Inject failure context if retrying
1647
+ if [[ -f "$UAT_FAILURE_FILE" ]]; then
1648
+ echo "" >> "$prompt_file"
1649
+ echo "### Previous RED Attempt Failed" >> "$prompt_file"
1650
+ echo "" >> "$prompt_file"
1651
+ echo "Your previous test attempt had issues. Fix them:" >> "$prompt_file"
1652
+ echo '```' >> "$prompt_file"
1653
+ tail -50 "$UAT_FAILURE_FILE" >> "$prompt_file"
1654
+ echo '```' >> "$prompt_file"
1655
+ fi
1656
+
1657
+ # Inject config context
1658
+ echo "" >> "$prompt_file"
1659
+ echo "### Config" >> "$prompt_file"
1660
+ echo "" >> "$prompt_file"
1661
+ echo "Read \`.ralph/config.json\` for URLs and directories." >> "$prompt_file"
1662
+
1663
+ # Inject signs
1664
+ _inject_signs >> "$prompt_file"
1665
+ }
1666
+
1667
+ _build_green_prompt() {
1668
+ local case_id="$1"
1669
+ local test_file="$2"
1670
+ local prompt_file="$3"
1671
+
1672
+ # GREEN prompt is focused — no UAT-PROMPT.md preamble needed
1673
+ cat > "$prompt_file" << PROMPT_SECTION
1674
+ # GREEN Phase — Fix Application Code
1675
+
1676
+ A test has been written that correctly identifies a bug. Your job is to fix the
1677
+ APPLICATION CODE so the test passes.
1678
+
1679
+ **CRITICAL: DO NOT modify the test file (\`$test_file\`). Fix the app, not the test.**
1680
+
1681
+ ## Case: $case_id
1682
+
1683
+ 1. **Read the test file** at \`$test_file\` to understand what it checks
1684
+ 2. **Read the test case** from \`.ralph/$UAT_CONFIG_NS/plan.json\` (case ID: $case_id) for context
1685
+ 3. **Read the failure output** below to understand what went wrong
1686
+ 4. **Fix the APPLICATION CODE** — make the minimum change needed to pass the test
1687
+ 5. **DO NOT modify the test file** — Ralph will restore it if you do
1688
+
1689
+ ### Rules
1690
+
1691
+ - Make the MINIMUM change needed to fix the bug
1692
+ - Do NOT modify the test file — it has been validated and committed
1693
+ - Do NOT add workarounds or hacks — fix the actual bug
1694
+ - Read .ralph/config.json for project URLs and directories
1695
+ PROMPT_SECTION
1696
+
1697
+ # Inject failure context (critical for GREEN — this is what guides the fix)
1698
+ if [[ -f "$UAT_FAILURE_FILE" ]]; then
1699
+ echo "" >> "$prompt_file"
1700
+ echo "## Failure Output" >> "$prompt_file"
1701
+ echo "" >> "$prompt_file"
1702
+ echo '```' >> "$prompt_file"
1703
+ tail -80 "$UAT_FAILURE_FILE" >> "$prompt_file"
1704
+ echo '```' >> "$prompt_file"
1705
+ fi
1706
+
1707
+ # Also include last test output if available
1708
+ if [[ -f "$UAT_MODE_DIR/last_test_output.log" ]]; then
1709
+ echo "" >> "$prompt_file"
1710
+ echo "## Last Test Output" >> "$prompt_file"
1711
+ echo "" >> "$prompt_file"
1712
+ echo '```' >> "$prompt_file"
1713
+ tail -80 "$UAT_MODE_DIR/last_test_output.log" >> "$prompt_file"
1714
+ echo '```' >> "$prompt_file"
1715
+ fi
1716
+
1717
+ # Inject signs
1718
+ _inject_signs >> "$prompt_file"
1719
+ }
1720
+
1721
+ # ============================================================================
1722
+ # ACTIVITY FEED (reuses pattern from loop.sh)
1723
+ # ============================================================================
1724
+
1725
+ _parse_uat_activity() {
1726
+ local quiet="${1:-false}"
1727
+ local dim=$'\033[2m' green=$'\033[0;32m' nc=$'\033[0m'
1728
+ local line
1729
+ while IFS= read -r line; do
1730
+ # Non-JSON lines — always pass through
1731
+ if [[ "$line" != "{"* ]]; then
1732
+ echo "$line"
1733
+ continue
1734
+ fi
1735
+
1736
+ [[ "$quiet" == "true" ]] && continue
1737
+
1738
+ if [[ "$line" != *'"assistant"'* && "$line" != *'"result"'* ]]; then
1739
+ continue
1740
+ fi
1741
+
1742
+ local msg_type
1743
+ msg_type=$(jq -r '.type // empty' <<< "$line" 2>/dev/null) || continue
1744
+
1745
+ if [[ "$msg_type" == "assistant" ]]; then
1746
+ local tool_entries
1747
+ tool_entries=$(jq -r '
1748
+ .message.content[]?
1749
+ | select(.type == "tool_use")
1750
+ | .name + "\t" + (.input | tostring)
1751
+ ' <<< "$line" 2>/dev/null) || continue
1752
+
1753
+ while IFS=$'\t' read -r tool_name tool_input; do
1754
+ [[ -z "$tool_name" ]] && continue
1755
+ local label="" detail=""
1756
+ case "$tool_name" in
1757
+ Read)
1758
+ label="Reading"
1759
+ detail=$(jq -r '.file_path // empty' <<< "$tool_input" 2>/dev/null)
1760
+ detail="${detail#"$PWD/"}"
1761
+ ;;
1762
+ Edit)
1763
+ label="Editing"
1764
+ detail=$(jq -r '.file_path // empty' <<< "$tool_input" 2>/dev/null)
1765
+ detail="${detail#"$PWD/"}"
1766
+ ;;
1767
+ Write)
1768
+ label="Creating"
1769
+ detail=$(jq -r '.file_path // empty' <<< "$tool_input" 2>/dev/null)
1770
+ detail="${detail#"$PWD/"}"
1771
+ ;;
1772
+ Bash)
1773
+ label="Running"
1774
+ detail=$(jq -r '.description // .command // empty' <<< "$tool_input" 2>/dev/null)
1775
+ detail="${detail:0:60}"
1776
+ ;;
1777
+ mcp__playwright__*)
1778
+ label="Browser"
1779
+ local action="${tool_name#mcp__playwright__browser_}"
1780
+ detail="$action"
1781
+ ;;
1782
+ *)
1783
+ label="$tool_name"
1784
+ ;;
1785
+ esac
1786
+ printf " ${dim}⟳${nc} %-10s %s\n" "$label" "$detail"
1787
+ done <<< "$tool_entries"
1788
+
1789
+ elif [[ "$msg_type" == "result" ]]; then
1790
+ local cost duration_ms
1791
+ cost=$(jq -r '.total_cost_usd // empty' <<< "$line" 2>/dev/null)
1792
+ duration_ms=$(jq -r '.duration_ms // empty' <<< "$line" 2>/dev/null)
1793
+ local cost_str="" dur_str=""
1794
+ [[ -n "$cost" ]] && cost_str=$(printf '$%.2f' "$cost")
1795
+ if [[ -n "$duration_ms" ]]; then
1796
+ local total_secs=$(( duration_ms / 1000 ))
1797
+ if [[ $total_secs -ge 60 ]]; then
1798
+ dur_str="$((total_secs / 60))m $((total_secs % 60))s"
1799
+ else
1800
+ dur_str="${total_secs}s"
1801
+ fi
1802
+ fi
1803
+ echo ""
1804
+ if [[ -n "$cost_str" && -n "$dur_str" ]]; then
1805
+ echo -e " ${green}✓ Done${nc} ${dim}(${cost_str}, ${dur_str})${nc}"
1806
+ elif [[ -n "$cost_str" ]]; then
1807
+ echo -e " ${green}✓ Done${nc} ${dim}(${cost_str})${nc}"
1808
+ fi
1809
+ fi
1810
+ done
1811
+ }
1812
+
1813
+ # ============================================================================
1814
+ # PHASE 3: REPORT
1815
+ # ============================================================================
1816
+
1817
+ _print_report() {
1818
+ local total_cases passed_cases failed_cases skipped_cases
1819
+ total_cases=$(jq '.testCases | length' "$UAT_PLAN_FILE" 2>/dev/null || echo "0")
1820
+ passed_cases=$(jq '[.testCases[] | select(.passes==true and .skipped!=true)] | length' "$UAT_PLAN_FILE" 2>/dev/null || echo "0")
1821
+ failed_cases=$(jq '[.testCases[] | select(.passes==false)] | length' "$UAT_PLAN_FILE" 2>/dev/null || echo "0")
1822
+ skipped_cases=$(jq '[.testCases[] | select(.skipped==true)] | length' "$UAT_PLAN_FILE" 2>/dev/null || echo "0")
1823
+
1824
+ echo ""
1825
+ echo "╔══════════════════════════════════════════════════════════╗"
1826
+ printf "║ %-14s Results ║\n" "$UAT_MODE_LABEL"
1827
+ echo "╠══════════════════════════════════════════════════════════╣"
1828
+ printf "║ Test cases: %-3s total, %-3s passed, %-3s failed, %-3s skipped ║\n" \
1829
+ "$total_cases" "$passed_cases" "$failed_cases" "$skipped_cases"
1830
+ printf "║ App bugs found: %-3s Fixed: %-3s ║\n" \
1831
+ "$UAT_BUGS_FOUND" "$UAT_BUGS_FIXED"
1832
+ printf "║ Already working: %-3s Needed fixing: %-3s ║\n" \
1833
+ "$UAT_RED_ONLY_PASSED" "$UAT_GREEN_ATTEMPTS"
1834
+ echo "║ ║"
1835
+
1836
+ # List test files
1837
+ if [[ $UAT_TESTS_WRITTEN -gt 0 ]]; then
1838
+ echo "║ New test files: ║"
1839
+ jq -r '.testCases[] | select(.passes==true and .skipped!=true) | " " + .testFile + " ✅"' "$UAT_PLAN_FILE" 2>/dev/null | while IFS= read -r line; do
1840
+ printf "║ %-56s║\n" "$line"
1841
+ done
1842
+ jq -r '.testCases[] | select(.passes==false) | " " + .testFile + " ❌"' "$UAT_PLAN_FILE" 2>/dev/null | while IFS= read -r line; do
1843
+ printf "║ %-56s║\n" "$line"
1844
+ done
1845
+ fi
1846
+
1847
+ # List fixed app files
1848
+ if [[ ${#UAT_FILES_FIXED[@]} -gt 0 ]]; then
1849
+ echo "║ ║"
1850
+ echo "║ App files fixed: ║"
1851
+ for f in "${UAT_FILES_FIXED[@]}"; do
1852
+ local display="$f"
1853
+ [[ ${#display} -gt 54 ]] && display="${display:0:51}..."
1854
+ printf "║ %-54s║\n" "$display"
1855
+ done
1856
+ fi
1857
+
1858
+ # List items needing human attention
1859
+ if [[ ${#UAT_NEEDS_HUMAN[@]} -gt 0 ]]; then
1860
+ echo "║ ║"
1861
+ echo "║ Needs your attention: ║"
1862
+ for item in "${UAT_NEEDS_HUMAN[@]}"; do
1863
+ local display="$item"
1864
+ [[ ${#display} -gt 54 ]] && display="${display:0:51}..."
1865
+ printf "║ %-54s║\n" "$display"
1866
+ done
1867
+ fi
1868
+
1869
+ echo "╚══════════════════════════════════════════════════════════╝"
1870
+ echo ""
1871
+
1872
+ # Send notification
1873
+ send_notification "$UAT_MODE_LABEL: $passed_cases/$total_cases passed, $UAT_BUGS_FIXED bugs fixed"
1874
+ }
1875
+
1876
+ # ============================================================================
1877
+ # BANNER
1878
+ # ============================================================================
1879
+
1880
+ _print_uat_banner() {
1881
+ echo ""
1882
+ echo " _ _ _ _____ _ "
1883
+ echo " | | | | / \\|_ _| | | ___ ___ _ __"
1884
+ echo " | | | |/ _ \\ | | | | / _ \\ / _ \\| '_ \\"
1885
+ echo " | |_| / ___ \\| | | |__| (_) | (_) | |_) |"
1886
+ echo " \\___/_/ \\_\\_| |_____\\___/ \\___/| .__/"
1887
+ echo " |_|"
1888
+ echo " Acceptance testing loop — verifying things work"
1889
+ echo ""
1890
+ }
1891
+
1892
+ _print_chaos_banner() {
1893
+ echo ""
1894
+ echo " ____ _ _ _ "
1895
+ echo " / ___| |__ __ _ ___ ___ / \\ __ _ ___ _ __ | |_ "
1896
+ echo " | | | '_ \\ / _\` |/ _ \\/ __|| _ \\ / _\` |/ _ \\ '_ \\| __|"
1897
+ echo " | |___| | | | (_| | (_) \\__ \\/ ___ \\ (_| | __/ | | | |_ "
1898
+ echo " \\____|_| |_|\\__,_|\\___/|___/_/ \\_\\__, |\\___|_| |_|\\__|"
1899
+ echo " |___/ "
1900
+ echo " Red team loop — trying to break things"
1901
+ echo ""
1902
+ }
1903
+
1904
+ # ============================================================================
1905
+ # CHAOS AGENT PROMPT
1906
+ # ============================================================================
1907
+
1908
+ _build_chaos_agent_prompt() {
1909
+ local prompt_file="$1"
1910
+
1911
+ # Start with UAT prompt template
1912
+ cat "$RALPH_TEMPLATES/UAT-PROMPT.md" > "$prompt_file"
1913
+
1914
+ cat >> "$prompt_file" << 'PROMPT_SECTION'
1915
+
1916
+ ---
1917
+
1918
+ ## Phase: Chaos Agent Red Team Discovery
1919
+
1920
+ You are the **team lead** of a red team. Your job is to coordinate a team of adversarial
1921
+ agents that attack a live app, share intel, and produce a battle-tested plan of
1922
+ vulnerabilities to fix.
1923
+
1924
+ **Mindset: "You are a red team. Coordinate to find every vulnerability."**
1925
+
1926
+ ### Step 1: Recon (~60 seconds)
1927
+
1928
+ Before spawning anyone, do a quick recon yourself:
1929
+
1930
+ 1. **Read `.ralph/config.json`** for URLs, auth config, and directories
1931
+ 2. **Read `.ralph/prd.json`** if it exists — completed stories tell you what was built
1932
+ 3. **Navigate the app** using Playwright MCP — click through nav, find pages, note the tech stack
1933
+ 4. **Take 2-3 screenshots** of key pages (save to `.ralph/chaos/screenshots/`)
1934
+ 5. **Map the attack surface** — what feature areas exist? (auth, forms, API, navigation, etc.)
1935
+
1936
+ Don't go deep. Just map what's there. ~60 seconds max.
1937
+
1938
+ ### Step 2: Assemble the Red Team
1939
+
1940
+ Create a team and spawn teammates:
1941
+
1942
+ ```
1943
+ TeamCreate: "chaos-agent"
1944
+ ```
1945
+
1946
+ Spawn these teammates using the Task tool with `team_name: "chaos-agent"`:
1947
+
1948
+ 1. **"recon"** (`subagent_type: "general-purpose"`) — Attack surface mapping. Catalogs every
1949
+ input, form, API endpoint, auth mechanism. Shares intel with team: "login uses JWT in
1950
+ localStorage", "admin panel at /admin has no auth check".
1951
+
1952
+ 2. **"chaos"** (`subagent_type: "general-purpose"`) — Chaos testing. For every input: empty
1953
+ strings, 10000-char payloads, special characters (`<>&"'/\`), unicode/emoji, null bytes.
1954
+ For every form: double-submit, missing fields, back button after submit. Rapid-fire
1955
+ interactions.
1956
+
1957
+ 3. **"security"** (`subagent_type: "general-purpose"`) — Security testing. XSS in every
1958
+ input (`<script>alert(1)</script>`), SQL injection (`'; DROP TABLE users; --`), auth bypass
1959
+ via direct URL, IDOR via ID manipulation, sensitive data in localStorage/console/page source,
1960
+ missing CSRF tokens.
1961
+
1962
+ **Only spawn agents for areas that exist.** If there are no forms, don't spawn a forms specialist.
1963
+ If there's no auth, skip auth testing.
1964
+
1965
+ Agents communicate via SendMessage — recon shares discoveries, security acts on them.
1966
+
1967
+ ### Agent Instructions Template
1968
+
1969
+ Every agent prompt MUST include:
1970
+
1971
+ 1. **Their role and focus area** (from above)
1972
+ 2. **The recon intel** — pages, URLs, tech stack you discovered in Step 1
1973
+ 3. **Browser tab isolation** — "Open your own browser tab via `browser_tabs(action: 'new')`
1974
+ before navigating. Do NOT use the existing tab."
1975
+ 4. **Communication** — "Share important discoveries with teammates via SendMessage.
1976
+ Examples: 'Auth uses JWT in localStorage', 'Found unprotected admin route at /admin',
1977
+ 'Form at /profile has no CSRF token'. Read messages from teammates and adapt your testing."
1978
+ 5. **Output format** — "When done, send your findings to the team lead via SendMessage.
1979
+ Format each finding as a test case with: title, category, testFile path, targetFiles,
1980
+ assertions (input/expected/strategy), and edgeCases."
1981
+
1982
+ ### Step 3: Coordinate
1983
+
1984
+ While your team works:
1985
+
1986
+ - **Monitor messages** from teammates as they report findings
1987
+ - **Redirect effort** if needed — if recon discovers something important, message the
1988
+ relevant specialist ("recon found an admin panel at /admin — security, check it for auth bypass")
1989
+ - **Create tasks** in the shared task list for any new areas discovered
1990
+
1991
+ ### Step 4: Collect + Merge + Write Plan
1992
+
1993
+ After all teammates finish:
1994
+
1995
+ 1. Collect findings from all agent messages
1996
+ 2. Dedup by test file path (keep the case with more assertions)
1997
+ 3. Assign sequential IDs: `UAT-001`, `UAT-002`, ...
1998
+ 4. Write `.ralph/chaos/plan.json` (schema below)
1999
+ 5. Write `.ralph/chaos/UAT-PROMPT.md` (schema below)
2000
+ 6. Shut down all teammates via SendMessage with `type: "shutdown_request"`
2001
+ 7. Clean up with TeamDelete
2002
+
2003
+ ### plan.json Schema
2004
+
2005
+ Write `.ralph/chaos/plan.json`:
2006
+
2007
+ ```json
2008
+ {
2009
+ "testSuite": {
2010
+ "name": "Chaos Agent",
2011
+ "generatedAt": "<ISO timestamp>",
2012
+ "status": "pending",
2013
+ "discoveryMethod": "chaos-agent"
2014
+ },
2015
+ "testCases": [
2016
+ {
2017
+ "id": "UAT-001",
2018
+ "title": "Feature area — what the test checks",
2019
+ "category": "auth|forms|navigation|api|ui|data|security",
2020
+ "type": "e2e|integration",
2021
+ "userStory": "As a user, I...",
2022
+ "testApproach": "What to test and how",
2023
+ "testFile": "tests/e2e/feature/test-name.spec.ts",
2024
+ "targetFiles": ["src/pages/feature.tsx"],
2025
+ "edgeCases": ["Edge case 1", "Edge case 2"],
2026
+ "assertions": [
2027
+ {
2028
+ "input": "Fill name='<script>alert(1)</script>', submit form",
2029
+ "expected": "Name displayed as literal text, no script execution",
2030
+ "strategy": "security"
2031
+ }
2032
+ ],
2033
+ "passes": false,
2034
+ "retryCount": 0,
2035
+ "source": "chaos-agent:agent-name"
2036
+ }
2037
+ ]
2038
+ }
2039
+ ```
2040
+
2041
+ **Every test case MUST have at least 3 assertions** with concrete input/expected pairs:
2042
+ 1. One happy-path assertion (correct input → correct output)
2043
+ 2. One edge-case assertion (bad input → proper error handling)
2044
+ 3. One content assertion (page shows the RIGHT data, not just that it loads)
2045
+
2046
+ ### UAT-PROMPT.md Schema
2047
+
2048
+ Write `.ralph/chaos/UAT-PROMPT.md` — a project-specific testing guide based on what the
2049
+ red team ACTUALLY FOUND. Include:
2050
+
2051
+ ```markdown
2052
+ # Chaos Agent Guide — [Project Name]
2053
+
2054
+ ## App Overview
2055
+ - What the app does (1-2 sentences)
2056
+ - Tech stack observed (framework, API patterns, auth method)
2057
+ - Base URLs (frontend, API if applicable)
2058
+
2059
+ ## Pages & Routes Discovered
2060
+ For each page:
2061
+ - URL pattern and what it shows
2062
+ - Key interactive elements (forms, buttons, links)
2063
+ - Selectors that work (data-testid, roles, labels)
2064
+
2065
+ ## Auth Flow
2066
+ - How login works (form fields, redirect after login)
2067
+ - Test credentials if available (from config or .env)
2068
+ - What pages require auth vs. public
2069
+
2070
+ ## Known Forms & Inputs
2071
+ For each form:
2072
+ - Fields with their labels/names/selectors
2073
+ - Required vs optional fields
2074
+ - Validation behavior observed
2075
+
2076
+ ## What "Correct" Looks Like
2077
+ For each feature area:
2078
+ - Expected behavior observed
2079
+ - Specific text/numbers that should appear
2080
+
2081
+ ## Console & Network Observations
2082
+ - Any existing console errors/warnings
2083
+ - API endpoints observed
2084
+ - Response patterns (JSON structure, status codes)
2085
+
2086
+ ## Red Team Findings
2087
+ - Vulnerabilities discovered (XSS, injection, auth bypass, etc.)
2088
+ - Edge cases that broke the app
2089
+ - Areas that need hardening
2090
+ ```
2091
+
2092
+ This is NOT a copy of the template — it's ground truth from the red team's exploration.
2093
+
2094
+ ### Rules
2095
+
2096
+ - Test auth flows FIRST (they gate everything else)
2097
+ - One test case per feature area per attack vector
2098
+ - `type: "e2e"` for anything involving browser interaction
2099
+ - `targetFiles` should list the app source files the test covers
2100
+ - `testFile` path should use the project's test directory conventions
2101
+ - Always clean up: shutdown teammates and delete team when done
2102
+
2103
+ PROMPT_SECTION
2104
+
2105
+ # Conditional section: Docker isolation vs non-destructive guardrails
2106
+ if [[ -n "${CHAOS_FRONTEND_URL:-}" ]]; then
2107
+ cat >> "$prompt_file" << PROMPT_DOCKER
2108
+ ### ISOLATED ENVIRONMENT (Docker)
2109
+
2110
+ You are attacking an ISOLATED Docker copy of the application.
2111
+ The developer's live server is NOT affected. Go deeper and harder.
2112
+
2113
+ - Frontend: ${CHAOS_FRONTEND_URL}
2114
+ - API: ${CHAOS_API_URL}
2115
+
2116
+ Use THESE URLs for all testing. Ignore URLs in .ralph/config.json.
2117
+ You CAN test destructive operations (DELETE endpoints, data mutations, etc.)
2118
+ since this environment is disposable.
2119
+ PROMPT_DOCKER
2120
+ else
2121
+ cat >> "$prompt_file" << 'PROMPT_SAFE'
2122
+ ### Non-Destructive Testing (CRITICAL)
2123
+
2124
+ The developer is actively running this app. Your testing MUST NOT corrupt application state:
2125
+
2126
+ - **OBSERVE, don't destroy** — read data, don't delete it. Test inputs, don't wipe databases.
2127
+ - **NO destructive API calls** — do NOT call DELETE endpoints, DROP tables, or clear/reset data
2128
+ - **NO mass mutations** — don't create thousands of records, flood queues, or exhaust rate limits
2129
+ - **Prefer GET over POST/PUT/DELETE** for reconnaissance
2130
+ - **Test XSS/injection via form inputs**, not direct database manipulation
2131
+ - **If you find a destructive vulnerability**, DOCUMENT IT in the plan — don't exploit it live
2132
+ - **Leave the app in a usable state** after each agent finishes
2133
+ - **If the app crashes or becomes unresponsive**, stop testing and report what caused it
2134
+ PROMPT_SAFE
2135
+ fi
2136
+
2137
+ _inject_prompt_context "$prompt_file"
2138
+ }
2139
+
2140
+ # ============================================================================
2141
+ # ISOLATION: DOCKER-BASED CHAOS ENVIRONMENT
2142
+ # ============================================================================
2143
+
2144
+ # Check whether Docker isolation should be used for chaos-agent runs.
2145
+ # Sets CHAOS_ISOLATION_RESULT to "true" or "false".
2146
+ # Must be called directly (not in a $() subshell) so globals are preserved.
2147
+ # Also sets: CHAOS_COMPOSE_CMD, CHAOS_COMPOSE_FILE
2148
+ _should_use_docker_isolation() {
2149
+ CHAOS_ISOLATION_RESULT="false"
2150
+
2151
+ # Read chaos.isolate directly — get_config uses `// empty` which treats
2152
+ # boolean false as falsy and falls through to the default
2153
+ local isolate="true"
2154
+ local config="$RALPH_DIR/config.json"
2155
+ if [[ -f "$config" ]]; then
2156
+ local raw
2157
+ raw=$(jq -r 'if .chaos.isolate == false then "false" elif .chaos.isolate then .chaos.isolate else "unset" end' "$config" 2>/dev/null)
2158
+ [[ "$raw" != "unset" && "$raw" != "null" && -n "$raw" ]] && isolate="$raw"
2159
+ fi
2160
+ if [[ "$isolate" != "true" ]]; then
2161
+ print_info "Docker isolation disabled (chaos.isolate=false)"
2162
+ return 0
2163
+ fi
2164
+
2165
+ CHAOS_COMPOSE_CMD=$(_detect_compose_cmd)
2166
+ if [[ -z "$CHAOS_COMPOSE_CMD" ]]; then
2167
+ print_info "Docker not available — skipping isolation"
2168
+ return 0
2169
+ fi
2170
+
2171
+ # Find compose file: config override, then standard names
2172
+ local compose_file
2173
+ compose_file=$(get_config '.docker.composeFile' "")
2174
+ if [[ -n "$compose_file" && -f "$compose_file" ]]; then
2175
+ CHAOS_COMPOSE_FILE="$compose_file"
2176
+ CHAOS_ISOLATION_RESULT="true"
2177
+ return 0
2178
+ fi
2179
+
2180
+ for candidate in "docker-compose.yml" "docker-compose.yaml" "compose.yml" "compose.yaml"; do
2181
+ if [[ -f "$candidate" ]]; then
2182
+ CHAOS_COMPOSE_FILE="$candidate"
2183
+ CHAOS_ISOLATION_RESULT="true"
2184
+ return 0
2185
+ fi
2186
+ done
2187
+
2188
+ print_info "No compose file found — skipping Docker isolation"
2189
+ }
2190
+
2191
+ # Parse the compose file for port mappings and generate an override file
2192
+ # with ports offset by chaos.docker.portOffset (default: 10000).
2193
+ # Sets: CHAOS_OVERRIDE_FILE, CHAOS_COMPOSE_FILE
2194
+ _generate_chaos_override() {
2195
+ local port_offset
2196
+ port_offset=$(get_config '.chaos.docker.portOffset' "10000")
2197
+
2198
+ local override_file
2199
+ override_file=$(create_temp_file ".chaos-override.yml")
2200
+
2201
+ # Check for network_mode: host (at service-level indentation, 4+ spaces)
2202
+ if grep -qE '^[[:space:]]{4,}network_mode:[[:space:]]*"?host"?' "$CHAOS_COMPOSE_FILE" 2>/dev/null; then
2203
+ print_error "Compose file uses network_mode: host — cannot isolate ports"
2204
+ return 1
2205
+ fi
2206
+
2207
+ # Build override YAML
2208
+ echo "services:" > "$override_file"
2209
+
2210
+ local current_service=""
2211
+ local in_ports=false
2212
+ local service_has_ports=false
2213
+
2214
+ while IFS= read -r line; do
2215
+ # Detect top-level service name: 2-space indent, alphanumeric/dot/dash/underscore, colon
2216
+ # Allows trailing whitespace and comments (e.g., " web: # my service")
2217
+ if [[ "$line" =~ ^[[:space:]]{2}[a-zA-Z0-9._-]+:[[:space:]]*(#.*)?$ ]] && ! [[ "$line" =~ ^[[:space:]]{4} ]]; then
2218
+ current_service=$(echo "$line" | sed 's/^[[:space:]]*//' | sed 's/:[[:space:]]*#.*//' | tr -d ':')
2219
+ in_ports=false
2220
+ service_has_ports=false
2221
+ fi
2222
+
2223
+ # Detect ports: section (must be under a service, i.e. 4+ spaces)
2224
+ if [[ "$line" =~ ^[[:space:]]{4,}ports:[[:space:]]*(#.*)?$ ]]; then
2225
+ in_ports=true
2226
+ continue
2227
+ fi
2228
+
2229
+ # Parse port mappings within a ports: section
2230
+ if [[ "$in_ports" == "true" ]]; then
2231
+ # Handle three-part format: "IP:HOST:CONTAINER" (e.g., "127.0.0.1:8080:8080")
2232
+ if [[ "$line" =~ ^[[:space:]]*-[[:space:]]*\"?([0-9.]+):([0-9]+):([0-9]+)\"? ]]; then
2233
+ local bind_ip="${BASH_REMATCH[1]}"
2234
+ local host_port="${BASH_REMATCH[2]}"
2235
+ local container_port="${BASH_REMATCH[3]}"
2236
+ local new_port=$((host_port + port_offset))
2237
+
2238
+ if [[ "$new_port" -gt 65535 ]]; then
2239
+ print_error "Port ${host_port}+${port_offset}=${new_port} exceeds 65535"
2240
+ print_error "Reduce chaos.docker.portOffset in .ralph/config.json"
2241
+ return 1
2242
+ fi
2243
+
2244
+ if [[ "$service_has_ports" == "false" ]]; then
2245
+ echo " ${current_service}:" >> "$override_file"
2246
+ echo " ports:" >> "$override_file"
2247
+ service_has_ports=true
2248
+ fi
2249
+
2250
+ echo " - \"${bind_ip}:${new_port}:${container_port}\"" >> "$override_file"
2251
+ # Standard two-part format: "HOST:CONTAINER" (e.g., "8001:8001")
2252
+ elif [[ "$line" =~ ^[[:space:]]*-[[:space:]]*\"?([0-9]+):([0-9]+)\"? ]]; then
2253
+ local host_port="${BASH_REMATCH[1]}"
2254
+ local container_port="${BASH_REMATCH[2]}"
2255
+ local new_port=$((host_port + port_offset))
2256
+
2257
+ if [[ "$new_port" -gt 65535 ]]; then
2258
+ print_error "Port ${host_port}+${port_offset}=${new_port} exceeds 65535"
2259
+ print_error "Reduce chaos.docker.portOffset in .ralph/config.json"
2260
+ return 1
2261
+ fi
2262
+
2263
+ # Write service header on first port
2264
+ if [[ "$service_has_ports" == "false" ]]; then
2265
+ echo " ${current_service}:" >> "$override_file"
2266
+ echo " ports:" >> "$override_file"
2267
+ service_has_ports=true
2268
+ fi
2269
+
2270
+ echo " - \"${new_port}:${container_port}\"" >> "$override_file"
2271
+ elif [[ ! "$line" =~ ^[[:space:]]*- ]] && [[ ! "$line" =~ ^[[:space:]]*$ ]] && [[ ! "$line" =~ ^[[:space:]]*# ]]; then
2272
+ # Non-list, non-blank, non-comment line means we exited the ports section
2273
+ in_ports=false
2274
+ fi
2275
+ fi
2276
+ done < "$CHAOS_COMPOSE_FILE"
2277
+
2278
+ CHAOS_OVERRIDE_FILE="$override_file"
2279
+ }
2280
+
2281
+ # Start the isolated Docker stack for chaos-agent.
2282
+ # Sets: CHAOS_FRONTEND_URL, CHAOS_API_URL
2283
+ _chaos_docker_up() {
2284
+ # Clean up any stale containers from interrupted runs
2285
+ _chaos_docker_down 2>/dev/null
2286
+
2287
+ # Call directly (not in $() subshell) so CHAOS_OVERRIDE_FILE global is preserved
2288
+ _generate_chaos_override || return 1
2289
+
2290
+ local port_offset health_timeout
2291
+ port_offset=$(get_config '.chaos.docker.portOffset' "10000")
2292
+ health_timeout=$(get_config '.chaos.docker.healthTimeout' "120")
2293
+
2294
+ # Read chaos.docker.build directly — get_config treats boolean false as falsy
2295
+ local should_build="true"
2296
+ local config="$RALPH_DIR/config.json"
2297
+ if [[ -f "$config" ]]; then
2298
+ local raw_build
2299
+ raw_build=$(jq -r 'if .chaos.docker.build == false then "false" elif .chaos.docker.build then .chaos.docker.build else "unset" end' "$config" 2>/dev/null)
2300
+ [[ "$raw_build" != "unset" && "$raw_build" != "null" && -n "$raw_build" ]] && should_build="$raw_build"
2301
+ fi
2302
+
2303
+ local build_flag=""
2304
+ [[ "$should_build" == "true" ]] && build_flag="--build"
2305
+
2306
+ # Check if compose v2 supports --wait
2307
+ local wait_flag=""
2308
+ if $CHAOS_COMPOSE_CMD up --help 2>&1 | grep -q '\-\-wait'; then
2309
+ wait_flag="--wait --wait-timeout $health_timeout"
2310
+ fi
2311
+
2312
+ _log_uat "ISOLATE" "Starting Docker stack: $CHAOS_COMPOSE_CMD -p ralph-chaos up -d $build_flag $wait_flag"
2313
+
2314
+ # shellcheck disable=SC2086
2315
+ if ! $CHAOS_COMPOSE_CMD -f "$CHAOS_COMPOSE_FILE" -f "$CHAOS_OVERRIDE_FILE" \
2316
+ -p ralph-chaos up -d $build_flag $wait_flag 2>&1; then
2317
+ print_error "Docker stack failed to start"
2318
+ _log_uat "ISOLATE" "Docker stack failed"
2319
+ _chaos_docker_down 2>/dev/null
2320
+ return 1
2321
+ fi
2322
+
2323
+ # If --wait wasn't available, poll for health
2324
+ if [[ -z "$wait_flag" ]]; then
2325
+ if ! _chaos_poll_health "$port_offset" "$health_timeout"; then
2326
+ print_error "Health check timed out after ${health_timeout}s"
2327
+ _log_uat "ISOLATE" "Health check timeout"
2328
+ _chaos_docker_down 2>/dev/null
2329
+ return 1
2330
+ fi
2331
+ fi
2332
+
2333
+ # Compute isolated URLs from offset ports
2334
+ # Extract port after the last colon in URL (handles http://host:PORT/path)
2335
+ local frontend_port api_port
2336
+ frontend_port=$(get_config '.urls.frontend' "http://localhost:5173" | grep -oE ':[0-9]+' | tail -1 | tr -d ':')
2337
+ api_port=$(get_config '.urls.api' "" | grep -oE ':[0-9]+' | tail -1 | tr -d ':')
2338
+
2339
+ if [[ -n "$frontend_port" ]]; then
2340
+ CHAOS_FRONTEND_URL="http://localhost:$((frontend_port + port_offset))"
2341
+ fi
2342
+ if [[ -n "$api_port" ]]; then
2343
+ CHAOS_API_URL="http://localhost:$((api_port + port_offset))"
2344
+ fi
2345
+
2346
+ _log_uat "ISOLATE" "Docker stack ready (frontend: ${CHAOS_FRONTEND_URL:-none}, api: ${CHAOS_API_URL:-none})"
2347
+ print_info "Isolated environment ready (frontend: ${CHAOS_FRONTEND_URL:-none}, api: ${CHAOS_API_URL:-none})"
2348
+ return 0
2349
+ }
2350
+
2351
+ # Fallback health check when --wait is unavailable.
2352
+ # Polls the API health endpoint or checks container state.
2353
+ _chaos_poll_health() {
2354
+ local port_offset="$1"
2355
+ local timeout="$2"
2356
+
2357
+ local health_endpoint
2358
+ health_endpoint=$(get_config '.api.healthEndpoint' "/health")
2359
+ local api_port
2360
+ api_port=$(get_config '.urls.api' "" | grep -oE ':[0-9]+' | tail -1 | tr -d ':')
2361
+
2362
+ local start_time
2363
+ start_time=$(date +%s)
2364
+
2365
+ if [[ -n "$api_port" ]]; then
2366
+ local url="http://localhost:$((api_port + port_offset))${health_endpoint}"
2367
+ print_info "Waiting for health check at $url..."
2368
+ while true; do
2369
+ local now
2370
+ now=$(date +%s)
2371
+ [[ $((now - start_time)) -ge "$timeout" ]] && break
2372
+ if curl -sf --max-time 5 "$url" >/dev/null 2>&1; then
2373
+ return 0
2374
+ fi
2375
+ sleep 3
2376
+ done
2377
+ else
2378
+ # No API URL — just wait for containers to be running
2379
+ print_info "Waiting for containers to be running..."
2380
+ while true; do
2381
+ local now
2382
+ now=$(date +%s)
2383
+ [[ $((now - start_time)) -ge "$timeout" ]] && break
2384
+ # shellcheck disable=SC2086
2385
+ local running
2386
+ running=$($CHAOS_COMPOSE_CMD -p ralph-chaos ps --format json 2>/dev/null | \
2387
+ grep -c '"running"' 2>/dev/null || echo "0")
2388
+ if [[ "$running" -gt 0 ]]; then
2389
+ return 0
2390
+ fi
2391
+ sleep 3
2392
+ done
2393
+ fi
2394
+
2395
+ return 1
2396
+ }
2397
+
2398
+ # Tear down the isolated Docker stack. Idempotent — safe to call when nothing is running.
2399
+ _chaos_docker_down() {
2400
+ if [[ -z "${CHAOS_COMPOSE_CMD:-}" || -z "${CHAOS_COMPOSE_FILE:-}" ]]; then
2401
+ return 0
2402
+ fi
2403
+
2404
+ if [[ -n "${CHAOS_OVERRIDE_FILE:-}" && -f "${CHAOS_OVERRIDE_FILE:-}" ]]; then
2405
+ $CHAOS_COMPOSE_CMD -f "$CHAOS_COMPOSE_FILE" -f "$CHAOS_OVERRIDE_FILE" \
2406
+ -p ralph-chaos down -v --timeout 10 2>/dev/null
2407
+ else
2408
+ $CHAOS_COMPOSE_CMD -f "$CHAOS_COMPOSE_FILE" \
2409
+ -p ralph-chaos down -v --timeout 10 2>/dev/null
2410
+ fi
2411
+
2412
+ CHAOS_FRONTEND_URL=""
2413
+ CHAOS_API_URL=""
2414
+ CHAOS_OVERRIDE_FILE=""
2415
+ }
2416
+
2417
+ # ============================================================================
2418
+ # SELF-LEARNING: ARCHIVE, AUTO-SIGN, HISTORY
2419
+ # ============================================================================
2420
+
2421
+ # Auto-add a sign when chaos-agent fixes a vulnerability (GREEN success only).
2422
+ # UAT mode is skipped — functional test titles are too generic to be useful signs.
2423
+ _auto_sign_from_case() {
2424
+ local case_id="$1"
2425
+
2426
+ # Only for chaos-agent — security findings are high-signal
2427
+ [[ "$UAT_CONFIG_NS" != "chaos" ]] && return 0
2428
+
2429
+ # Read case data from plan.json
2430
+ local case_json title test_approach pattern
2431
+ case_json=$(jq --arg id "$case_id" '.testCases[] | select(.id==$id)' "$UAT_PLAN_FILE" 2>/dev/null)
2432
+ [[ -z "$case_json" ]] && return 0
2433
+
2434
+ title=$(echo "$case_json" | jq -r '.title // empty')
2435
+ [[ -z "$title" ]] && return 0
2436
+
2437
+ test_approach=$(echo "$case_json" | jq -r '.testApproach // empty')
2438
+
2439
+ # Build pattern: "title -- testApproach" or just title
2440
+ if [[ -n "$test_approach" ]]; then
2441
+ pattern="$title -- $test_approach"
2442
+ else
2443
+ pattern="$title"
2444
+ fi
2445
+
2446
+ # Truncate at 200 chars
2447
+ [[ ${#pattern} -gt 200 ]] && pattern="${pattern:0:200}"
2448
+
2449
+ # Check for duplicates
2450
+ if _sign_is_duplicate "$pattern"; then
2451
+ _log_uat "$case_id" "AUTO_SIGN: Skipped duplicate — $pattern"
2452
+ return 0
2453
+ fi
2454
+
2455
+ # Add sign with output suppressed (redirect to log)
2456
+ if ralph_sign "$pattern" "security" "true" "$case_id" > /dev/null 2>&1; then
2457
+ _log_uat "$case_id" "AUTO_SIGN: Added [security] $pattern"
2458
+ print_info "Learned: [security] $pattern"
2459
+ else
2460
+ _log_uat "$case_id" "AUTO_SIGN: Failed to add sign"
2461
+ fi
2462
+ }
2463
+
2464
+ # Archive a completed plan for future reference.
2465
+ _archive_plan() {
2466
+ local archive_dir="$UAT_MODE_DIR/archive"
2467
+ mkdir -p "$archive_dir"
2468
+
2469
+ local timestamp
2470
+ timestamp=$(date +%Y%m%d-%H%M%S 2>/dev/null || date +%Y%m%d-%H%M%S)
2471
+
2472
+ local archive_file="$archive_dir/plan-${timestamp}.json"
2473
+
2474
+ # Record current git hash in the archived plan
2475
+ local git_hash=""
2476
+ if command -v git &>/dev/null && [[ -d ".git" ]]; then
2477
+ git_hash=$(git rev-parse HEAD 2>/dev/null || echo "")
2478
+ fi
2479
+
2480
+ if [[ -n "$git_hash" ]]; then
2481
+ jq --arg hash "$git_hash" '.testSuite.gitHash = $hash' "$UAT_PLAN_FILE" > "$archive_file" 2>/dev/null
2482
+ else
2483
+ cp "$UAT_PLAN_FILE" "$archive_file"
2484
+ fi
2485
+
2486
+ _prune_archives
2487
+ _log_uat "ARCHIVE" "Plan archived: $archive_file"
2488
+ print_info "Plan archived for future reference"
2489
+ }
2490
+
2491
+ # Remove oldest archives beyond retention limit.
2492
+ _prune_archives() {
2493
+ local archive_dir="$UAT_MODE_DIR/archive"
2494
+ [[ ! -d "$archive_dir" ]] && return 0
2495
+
2496
+ local count
2497
+ count=$(find "$archive_dir" -name 'plan-*.json' -type f 2>/dev/null | wc -l | tr -d ' ')
2498
+
2499
+ if [[ "$count" -gt "$MAX_UAT_ARCHIVE_COUNT" ]]; then
2500
+ local to_remove=$((count - MAX_UAT_ARCHIVE_COUNT))
2501
+ # Sort by modification time (oldest first), remove excess
2502
+ find "$archive_dir" -name 'plan-*.json' -type f -print0 2>/dev/null \
2503
+ | xargs -0 ls -1t 2>/dev/null \
2504
+ | tail -"$to_remove" \
2505
+ | while IFS= read -r f; do
2506
+ rm -f "$f"
2507
+ done
2508
+ _log_uat "ARCHIVE" "Pruned $to_remove old archive(s)"
2509
+ fi
2510
+ }
2511
+
2512
+ # Read git hash from the most recent archived plan.
2513
+ # Returns 1 if no archive exists.
2514
+ _get_last_run_git_hash() {
2515
+ local archive_dir="$UAT_MODE_DIR/archive"
2516
+ [[ ! -d "$archive_dir" ]] && return 1
2517
+
2518
+ # Find most recent archive by name (timestamps sort lexically)
2519
+ local latest
2520
+ latest=$(find "$archive_dir" -name 'plan-*.json' -type f 2>/dev/null | sort -r | head -1)
2521
+ [[ -z "$latest" ]] && return 1
2522
+
2523
+ local hash
2524
+ hash=$(jq -r '.testSuite.gitHash // empty' "$latest" 2>/dev/null)
2525
+ [[ -z "$hash" ]] && return 1
2526
+
2527
+ echo "$hash"
2528
+ }
2529
+
2530
+ # List files changed since last run (excluding .ralph/).
2531
+ # Returns empty if no prior run or git unavailable.
2532
+ _get_changed_files_since_last_run() {
2533
+ command -v git &>/dev/null || return 0
2534
+ [[ -d ".git" ]] || return 0
2535
+
2536
+ local last_hash
2537
+ last_hash=$(_get_last_run_git_hash) || return 0
2538
+
2539
+ # Verify the hash is still valid (not from a force push)
2540
+ if ! git rev-parse --verify "$last_hash" &>/dev/null; then
2541
+ return 0
2542
+ fi
2543
+
2544
+ git diff --name-only "${last_hash}..HEAD" 2>/dev/null | grep -v '\.ralph/' || true
2545
+ }
2546
+
2547
+ # Build markdown summary of the last 5 archived plans.
2548
+ _build_archive_summary() {
2549
+ local archive_dir="$UAT_MODE_DIR/archive"
2550
+ [[ ! -d "$archive_dir" ]] && return 0
2551
+
2552
+ local archives
2553
+ archives=$(find "$archive_dir" -name 'plan-*.json' -type f 2>/dev/null | sort -r | head -5)
2554
+ [[ -z "$archives" ]] && return 0
2555
+
2556
+ local archive_count
2557
+ archive_count=$(find "$archive_dir" -name 'plan-*.json' -type f 2>/dev/null | wc -l | tr -d ' ')
2558
+
2559
+ echo ""
2560
+ echo "### Prior Run History ($archive_count previous run$([ "$archive_count" -ne 1 ] && echo "s"))"
2561
+ echo ""
2562
+ echo "These tests have ALREADY been run. Do NOT repeat them."
2563
+ echo ""
2564
+
2565
+ local run_num=0
2566
+ while IFS= read -r archive_file; do
2567
+ [[ -z "$archive_file" ]] && continue
2568
+ run_num=$((run_num + 1))
2569
+
2570
+ # Extract timestamp from filename: plan-YYYYMMDD-HHMMSS.json
2571
+ local ts
2572
+ ts=$(basename "$archive_file" .json | sed 's/^plan-//')
2573
+
2574
+ echo "**Run $run_num** ($ts):"
2575
+
2576
+ # List test cases with status
2577
+ jq -r '.testCases[] | " \(.id) [\(.category // "general")] \(.title) — \(if .passes then (if .skipped then "SKIPPED" else "PASSED" end) else "FAILED" end)"' \
2578
+ "$archive_file" 2>/dev/null || true
2579
+
2580
+ echo ""
2581
+ done <<< "$archives"
2582
+ }
2583
+
2584
+ # Build markdown section listing files changed since last run.
2585
+ _build_changed_files_section() {
2586
+ local changed_files
2587
+ changed_files=$(_get_changed_files_since_last_run)
2588
+ [[ -z "$changed_files" ]] && return 0
2589
+
2590
+ local file_count
2591
+ file_count=$(echo "$changed_files" | wc -l | tr -d ' ')
2592
+
2593
+ echo ""
2594
+ echo "### Files Changed Since Last Run ($file_count file$([ "$file_count" -ne 1 ] && echo "s"))"
2595
+ echo ""
2596
+ echo "PRIORITIZE testing these files — they are most likely to have new vulnerabilities."
2597
+ echo ""
2598
+ echo "$changed_files"
2599
+ }
2600
+
2601
+ # ============================================================================
2602
+ # HELPERS
2603
+ # ============================================================================
2604
+
2605
+ _inject_prompt_context() {
2606
+ local prompt_file="$1"
2607
+
2608
+ # Inject PRD context if available
2609
+ if [[ -f "$RALPH_DIR/prd.json" ]]; then
2610
+ echo "" >> "$prompt_file"
2611
+ echo "### Completed Stories (from PRD)" >> "$prompt_file"
2612
+ echo "" >> "$prompt_file"
2613
+ echo "These features have been built and should be testable:" >> "$prompt_file"
2614
+ echo '```json' >> "$prompt_file"
2615
+ jq '[.stories[] | select(.passes==true) | {id, title, type, testUrl: .testUrl}]' \
2616
+ "$RALPH_DIR/prd.json" >> "$prompt_file" 2>/dev/null
2617
+ echo '```' >> "$prompt_file"
2618
+ fi
2619
+
2620
+ # Inject config context
2621
+ if [[ -f "$RALPH_DIR/config.json" ]]; then
2622
+ echo "" >> "$prompt_file"
2623
+ echo "### Project Config" >> "$prompt_file"
2624
+ echo "" >> "$prompt_file"
2625
+ echo "Read \`.ralph/config.json\` for URLs and directories." >> "$prompt_file"
2626
+ fi
2627
+
2628
+ # Inject prior run history (what was already tested)
2629
+ _build_archive_summary >> "$prompt_file"
2630
+
2631
+ # Inject changed files (what to focus on)
2632
+ _build_changed_files_section >> "$prompt_file"
2633
+
2634
+ # "Do Not Repeat" instruction block
2635
+ local has_history=false
2636
+ [[ -d "$UAT_MODE_DIR/archive" ]] && \
2637
+ [[ -n "$(find "$UAT_MODE_DIR/archive" -name 'plan-*.json' -type f 2>/dev/null | head -1)" ]] && \
2638
+ has_history=true
2639
+
2640
+ if [[ "$has_history" == "true" ]]; then
2641
+ cat >> "$prompt_file" << 'DO_NOT_REPEAT'
2642
+
2643
+ ### Focus: New Ground Only
2644
+
2645
+ You have access to prior run history above. Follow these rules:
2646
+ - Do NOT repeat tests that already passed in prior runs
2647
+ - PRIORITIZE files changed since the last run
2648
+ - Go DEEPER — find new attack vectors, edge cases, and cross-feature interactions
2649
+ - If prior runs tested a feature superficially, test it more thoroughly
2650
+ - Focus on interactions BETWEEN features (e.g., auth + forms, navigation + data)
2651
+ DO_NOT_REPEAT
2652
+ fi
2653
+
2654
+ # Inject signs
2655
+ _inject_signs >> "$prompt_file"
2656
+ }
2657
+
2658
+ _log_uat() {
2659
+ local id="$1"
2660
+ local msg="$2"
2661
+ local timestamp
2662
+ timestamp=$(date -Iseconds 2>/dev/null || date +%Y-%m-%dT%H:%M:%S)
2663
+ echo "[$timestamp] $id $msg" >> "$UAT_PROGRESS_FILE"
2664
+ }