wiggum-cli 0.17.2 → 0.17.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +8 -2
  2. package/dist/agent/orchestrator.d.ts +1 -1
  3. package/dist/agent/orchestrator.js +19 -4
  4. package/dist/agent/tools/backlog.js +8 -4
  5. package/dist/agent/tools/execution.js +1 -1
  6. package/dist/agent/tools/introspection.js +26 -4
  7. package/dist/commands/config.js +96 -2
  8. package/dist/commands/run.d.ts +2 -0
  9. package/dist/commands/run.js +47 -2
  10. package/dist/generator/config.js +13 -2
  11. package/dist/index.js +7 -1
  12. package/dist/repl/command-parser.d.ts +1 -1
  13. package/dist/repl/command-parser.js +1 -1
  14. package/dist/templates/config/ralph.config.cjs.tmpl +9 -2
  15. package/dist/templates/prompts/PROMPT_e2e.md.tmpl +16 -89
  16. package/dist/templates/prompts/PROMPT_e2e_fix.md.tmpl +55 -0
  17. package/dist/templates/prompts/PROMPT_feature.md.tmpl +12 -98
  18. package/dist/templates/prompts/PROMPT_review_auto.md.tmpl +52 -49
  19. package/dist/templates/prompts/PROMPT_review_manual.md.tmpl +30 -2
  20. package/dist/templates/prompts/PROMPT_review_merge.md.tmpl +59 -69
  21. package/dist/templates/prompts/PROMPT_verify.md.tmpl +7 -0
  22. package/dist/templates/root/README.md.tmpl +2 -3
  23. package/dist/templates/scripts/feature-loop.sh.tmpl +777 -90
  24. package/dist/templates/scripts/loop.sh.tmpl +5 -1
  25. package/dist/templates/scripts/ralph-monitor.sh.tmpl +0 -2
  26. package/dist/tui/app.d.ts +5 -1
  27. package/dist/tui/app.js +12 -2
  28. package/dist/tui/hooks/useAgentOrchestrator.js +16 -7
  29. package/dist/tui/hooks/useInit.d.ts +5 -1
  30. package/dist/tui/hooks/useInit.js +20 -2
  31. package/dist/tui/screens/InitScreen.js +12 -1
  32. package/dist/tui/screens/MainShell.js +70 -6
  33. package/dist/tui/screens/RunScreen.d.ts +6 -2
  34. package/dist/tui/screens/RunScreen.js +48 -6
  35. package/dist/tui/utils/loop-status.d.ts +15 -0
  36. package/dist/tui/utils/loop-status.js +89 -27
  37. package/dist/utils/config.d.ts +7 -0
  38. package/dist/utils/config.js +14 -0
  39. package/package.json +1 -1
  40. package/src/templates/config/ralph.config.cjs.tmpl +9 -2
  41. package/src/templates/prompts/PROMPT_e2e.md.tmpl +16 -89
  42. package/src/templates/prompts/PROMPT_e2e_fix.md.tmpl +55 -0
  43. package/src/templates/prompts/PROMPT_feature.md.tmpl +12 -98
  44. package/src/templates/prompts/PROMPT_review_auto.md.tmpl +52 -49
  45. package/src/templates/prompts/PROMPT_review_manual.md.tmpl +30 -2
  46. package/src/templates/prompts/PROMPT_review_merge.md.tmpl +59 -69
  47. package/src/templates/prompts/PROMPT_verify.md.tmpl +7 -0
  48. package/src/templates/root/README.md.tmpl +2 -3
  49. package/src/templates/scripts/feature-loop.sh.tmpl +777 -90
  50. package/src/templates/scripts/loop.sh.tmpl +5 -1
  51. package/src/templates/scripts/ralph-monitor.sh.tmpl +0 -2
@@ -1,12 +1,14 @@
1
1
  #!/bin/bash
2
2
  # feature-loop.sh - Full feature workflow: branch -> implement -> E2E test -> PR -> review -> merge
3
3
  # Generated by ralph-cli for {{projectName}}
4
- # Usage: ./feature-loop.sh <feature-name> [max-iterations] [max-e2e-attempts] [--worktree] [--resume] [--model MODEL] [--review-mode MODE]
4
+ # Usage: ./feature-loop.sh <feature-name> [max-iterations] [max-e2e-attempts] [--worktree] [--resume] [--model MODEL] [--cli CLI] [--review-cli CLI] [--review-mode MODE]
5
5
  #
6
6
  # Options:
7
7
  # --worktree Use git worktree for isolation (enables parallel execution)
8
8
  # --resume Resume an interrupted loop (reuses existing branch/worktree)
9
- # --model MODEL Claude model to use (e.g., opus, sonnet, claude-sonnet-4-6)
9
+ # --model MODEL Model to use for coding/review CLI
10
+ # --cli CLI Implementation CLI: 'claude' | 'codex'
11
+ # --review-cli CLI Review CLI: 'claude' | 'codex'
10
12
  # --review-mode MODE Review mode: 'manual' (stop at PR), 'auto' (review, no merge), or 'merge' (review + merge). Default: 'manual'
11
13
 
12
14
  set -e
@@ -23,6 +25,13 @@ if [ -f "$SCRIPT_DIR/../ralph.config.cjs" ]; then
23
25
  PROMPTS_DIR=$(node -e "console.log(require('$CONFIG_PATH').paths?.prompts || '.ralph/prompts')" 2>/dev/null || echo ".ralph/prompts")
24
26
  DEFAULT_MODEL=$(node -e "console.log(require('$CONFIG_PATH').loop?.defaultModel || 'sonnet')" 2>/dev/null || echo "sonnet")
25
27
  PLANNING_MODEL=$(node -e "console.log(require('$CONFIG_PATH').loop?.planningModel || 'opus')" 2>/dev/null || echo "opus")
28
+ DEFAULT_CODEX_MODEL=$(node -e "console.log(require('$CONFIG_PATH').loop?.codexModel || 'gpt-5.3-codex')" 2>/dev/null || echo "gpt-5.3-codex")
29
+ DEFAULT_CODING_CLI=$(node -e "console.log(require('$CONFIG_PATH').loop?.codingCli || 'claude')" 2>/dev/null || echo "claude")
30
+ DEFAULT_REVIEW_CLI=$(node -e "console.log(require('$CONFIG_PATH').loop?.reviewCli || require('$CONFIG_PATH').loop?.codingCli || 'claude')" 2>/dev/null || echo "claude")
31
+ CLAUDE_PERMISSION_MODE=$(node -e "console.log(require('$CONFIG_PATH').loop?.claudePermissionMode || 'default')" 2>/dev/null || echo "default")
32
+ CODEX_SANDBOX=$(node -e "console.log(require('$CONFIG_PATH').loop?.codexSandbox || 'workspace-write')" 2>/dev/null || echo "workspace-write")
33
+ CODEX_APPROVAL_POLICY=$(node -e "console.log(require('$CONFIG_PATH').loop?.codexApprovalPolicy || 'never')" 2>/dev/null || echo "never")
34
+ DISABLE_MCP_IN_AUTOMATED=$(node -e "const v=require('$CONFIG_PATH').loop?.disableMcpInAutomatedRuns; console.log(v === undefined ? 'true' : String(v))" 2>/dev/null || echo "true")
26
35
  DEFAULT_MAX_ITERATIONS=$(node -e "console.log(require('$CONFIG_PATH').loop?.maxIterations || 10)" 2>/dev/null || echo "10")
27
36
  DEFAULT_MAX_E2E=$(node -e "console.log(require('$CONFIG_PATH').loop?.maxE2eAttempts || 5)" 2>/dev/null || echo "5")
28
37
  TEST_COMMAND=$(node -e "console.log(require('$CONFIG_PATH').commands?.test || 'npm test')" 2>/dev/null || echo "npm test")
@@ -34,6 +43,13 @@ elif [ -f "$SCRIPT_DIR/../../ralph.config.cjs" ]; then
34
43
  PROMPTS_DIR=$(node -e "console.log(require('$CONFIG_PATH').paths?.prompts || '.ralph/prompts')" 2>/dev/null || echo ".ralph/prompts")
35
44
  DEFAULT_MODEL=$(node -e "console.log(require('$CONFIG_PATH').loop?.defaultModel || 'sonnet')" 2>/dev/null || echo "sonnet")
36
45
  PLANNING_MODEL=$(node -e "console.log(require('$CONFIG_PATH').loop?.planningModel || 'opus')" 2>/dev/null || echo "opus")
46
+ DEFAULT_CODEX_MODEL=$(node -e "console.log(require('$CONFIG_PATH').loop?.codexModel || 'gpt-5.3-codex')" 2>/dev/null || echo "gpt-5.3-codex")
47
+ DEFAULT_CODING_CLI=$(node -e "console.log(require('$CONFIG_PATH').loop?.codingCli || 'claude')" 2>/dev/null || echo "claude")
48
+ DEFAULT_REVIEW_CLI=$(node -e "console.log(require('$CONFIG_PATH').loop?.reviewCli || require('$CONFIG_PATH').loop?.codingCli || 'claude')" 2>/dev/null || echo "claude")
49
+ CLAUDE_PERMISSION_MODE=$(node -e "console.log(require('$CONFIG_PATH').loop?.claudePermissionMode || 'default')" 2>/dev/null || echo "default")
50
+ CODEX_SANDBOX=$(node -e "console.log(require('$CONFIG_PATH').loop?.codexSandbox || 'workspace-write')" 2>/dev/null || echo "workspace-write")
51
+ CODEX_APPROVAL_POLICY=$(node -e "console.log(require('$CONFIG_PATH').loop?.codexApprovalPolicy || 'never')" 2>/dev/null || echo "never")
52
+ DISABLE_MCP_IN_AUTOMATED=$(node -e "const v=require('$CONFIG_PATH').loop?.disableMcpInAutomatedRuns; console.log(v === undefined ? 'true' : String(v))" 2>/dev/null || echo "true")
37
53
  DEFAULT_MAX_ITERATIONS=$(node -e "console.log(require('$CONFIG_PATH').loop?.maxIterations || 10)" 2>/dev/null || echo "10")
38
54
  DEFAULT_MAX_E2E=$(node -e "console.log(require('$CONFIG_PATH').loop?.maxE2eAttempts || 5)" 2>/dev/null || echo "5")
39
55
  TEST_COMMAND=$(node -e "console.log(require('$CONFIG_PATH').commands?.test || 'npm test')" 2>/dev/null || echo "npm test")
@@ -45,6 +61,13 @@ else
45
61
  PROMPTS_DIR=".ralph/prompts"
46
62
  DEFAULT_MODEL="sonnet"
47
63
  PLANNING_MODEL="opus"
64
+ DEFAULT_CODEX_MODEL="gpt-5.3-codex"
65
+ DEFAULT_CODING_CLI="claude"
66
+ DEFAULT_REVIEW_CLI="claude"
67
+ CLAUDE_PERMISSION_MODE="default"
68
+ CODEX_SANDBOX="workspace-write"
69
+ CODEX_APPROVAL_POLICY="never"
70
+ DISABLE_MCP_IN_AUTOMATED="true"
48
71
  DEFAULT_MAX_ITERATIONS="10"
49
72
  DEFAULT_MAX_E2E="5"
50
73
  TEST_COMMAND="npm test"
@@ -59,6 +82,8 @@ USE_WORKTREE=false
59
82
  RESUME=false
60
83
  MODEL=""
61
84
  REVIEW_MODE=""
85
+ CLI_OVERRIDE=""
86
+ REVIEW_CLI_OVERRIDE=""
62
87
  POSITIONAL=()
63
88
  while [[ $# -gt 0 ]]; do
64
89
  case $1 in
@@ -74,6 +99,14 @@ while [[ $# -gt 0 ]]; do
74
99
  MODEL="$2"
75
100
  shift 2
76
101
  ;;
102
+ --cli)
103
+ CLI_OVERRIDE="$2"
104
+ shift 2
105
+ ;;
106
+ --review-cli)
107
+ REVIEW_CLI_OVERRIDE="$2"
108
+ shift 2
109
+ ;;
77
110
  --review-mode)
78
111
  REVIEW_MODE="$2"
79
112
  shift 2
@@ -117,9 +150,163 @@ if [ "$REVIEW_MODE" != "manual" ] && [ "$REVIEW_MODE" != "auto" ] && [ "$REVIEW_
117
150
  exit 1
118
151
  fi
119
152
 
120
- # Build claude commands
121
- CLAUDE_CMD_OPUS="claude -p --output-format json --dangerously-skip-permissions --model ${PLANNING_MODEL}"
122
- CLAUDE_CMD_IMPL="claude -p --output-format json --dangerously-skip-permissions --model ${MODEL:-$DEFAULT_MODEL}"
153
+ # Resolve coding/review CLI from CLI > config > default
154
+ CODING_CLI="${CLI_OVERRIDE:-$DEFAULT_CODING_CLI}"
155
+ REVIEW_CLI="${REVIEW_CLI_OVERRIDE:-${DEFAULT_REVIEW_CLI:-$CODING_CLI}}"
156
+ DISABLE_MCP_IN_AUTOMATED_NORM=$(echo "$DISABLE_MCP_IN_AUTOMATED" | tr '[:upper:]' '[:lower:]')
157
+
158
+ # Validate CLI values
159
+ if [ "$CODING_CLI" != "claude" ] && [ "$CODING_CLI" != "codex" ]; then
160
+ echo "ERROR: Invalid --cli value '$CODING_CLI'. Allowed values are 'claude' or 'codex'." >&2
161
+ exit 1
162
+ fi
163
+
164
+ if [ "$REVIEW_CLI" != "claude" ] && [ "$REVIEW_CLI" != "codex" ]; then
165
+ echo "ERROR: Invalid --review-cli value '$REVIEW_CLI'. Allowed values are 'claude' or 'codex'." >&2
166
+ exit 1
167
+ fi
168
+
169
+ is_valid_claude_permission_mode() {
170
+ case "$1" in
171
+ acceptEdits|bypassPermissions|default|dontAsk|plan|auto) return 0 ;;
172
+ *) return 1 ;;
173
+ esac
174
+ }
175
+
176
+ is_valid_codex_sandbox() {
177
+ case "$1" in
178
+ read-only|workspace-write|danger-full-access) return 0 ;;
179
+ *) return 1 ;;
180
+ esac
181
+ }
182
+
183
+ is_valid_codex_approval_policy() {
184
+ case "$1" in
185
+ untrusted|on-failure|on-request|never) return 0 ;;
186
+ *) return 1 ;;
187
+ esac
188
+ }
189
+
190
+ if ! is_valid_claude_permission_mode "$CLAUDE_PERMISSION_MODE"; then
191
+ echo "ERROR: Invalid loop.claudePermissionMode '$CLAUDE_PERMISSION_MODE' in ralph.config.cjs." >&2
192
+ exit 1
193
+ fi
194
+
195
+ if ! is_valid_codex_sandbox "$CODEX_SANDBOX"; then
196
+ echo "ERROR: Invalid loop.codexSandbox '$CODEX_SANDBOX' in ralph.config.cjs." >&2
197
+ exit 1
198
+ fi
199
+
200
+ if ! is_valid_codex_approval_policy "$CODEX_APPROVAL_POLICY"; then
201
+ echo "ERROR: Invalid loop.codexApprovalPolicy '$CODEX_APPROVAL_POLICY' in ralph.config.cjs." >&2
202
+ exit 1
203
+ fi
204
+
205
+ case "$DISABLE_MCP_IN_AUTOMATED_NORM" in
206
+ true|false) ;;
207
+ *)
208
+ echo "ERROR: Invalid loop.disableMcpInAutomatedRuns '$DISABLE_MCP_IN_AUTOMATED' in ralph.config.cjs. Use true or false." >&2
209
+ exit 1
210
+ ;;
211
+ esac
212
+
213
+ is_claude_only_model() {
214
+ local candidate="$1"
215
+ case "$candidate" in
216
+ sonnet|opus|haiku|claude-*) return 0 ;;
217
+ *) return 1 ;;
218
+ esac
219
+ }
220
+
221
+ resolve_codex_model() {
222
+ local candidate="${MODEL:-$DEFAULT_CODEX_MODEL}"
223
+ if is_claude_only_model "$candidate"; then
224
+ echo "$DEFAULT_CODEX_MODEL"
225
+ else
226
+ echo "$candidate"
227
+ fi
228
+ }
229
+
230
+ if [ -n "$MODEL" ] && { [ "$CODING_CLI" = "codex" ] || [ "$REVIEW_CLI" = "codex" ]; }; then
231
+ if is_claude_only_model "$MODEL"; then
232
+ echo "WARNING: --model '$MODEL' is Claude-specific. Codex phases will use '$DEFAULT_CODEX_MODEL'." >&2
233
+ fi
234
+ fi
235
+
236
+ build_cli_cmd() {
237
+ local cli="$1"
238
+ local model="$2"
239
+ case "$cli" in
240
+ claude)
241
+ echo "claude -p --output-format json --permission-mode ${CLAUDE_PERMISSION_MODE} --model ${model}"
242
+ ;;
243
+ codex)
244
+ local codex_extra=""
245
+ # Avoid MCP startup deadlocks in unattended loop runs.
246
+ if [ "${RALPH_AUTOMATED:-}" = "1" ] && [ "$DISABLE_MCP_IN_AUTOMATED_NORM" = "true" ]; then
247
+ codex_extra=" -c 'mcp_servers={}'"
248
+ fi
249
+ echo "codex --ask-for-approval \"$CODEX_APPROVAL_POLICY\" --sandbox \"$CODEX_SANDBOX\" exec -C \"$APP_DIR\" --model \"${model}\"${codex_extra}"
250
+ ;;
251
+ *)
252
+ echo "ERROR: Unsupported CLI '$cli'" >&2
253
+ return 1
254
+ ;;
255
+ esac
256
+ }
257
+
258
+ get_phase_cli() {
259
+ local phase="$1"
260
+ case "$phase" in
261
+ review)
262
+ echo "$REVIEW_CLI"
263
+ ;;
264
+ *)
265
+ echo "$CODING_CLI"
266
+ ;;
267
+ esac
268
+ }
269
+
270
+ get_phase_model() {
271
+ local phase="$1"
272
+ local cli
273
+ cli=$(get_phase_cli "$phase")
274
+ if [ "$cli" = "codex" ]; then
275
+ resolve_codex_model
276
+ return
277
+ fi
278
+
279
+ case "$phase" in
280
+ planning|review)
281
+ echo "$PLANNING_MODEL"
282
+ ;;
283
+ *)
284
+ echo "${MODEL:-$DEFAULT_MODEL}"
285
+ ;;
286
+ esac
287
+ }
288
+
289
+ get_phase_cmd() {
290
+ local phase="$1"
291
+ local cli
292
+ local model
293
+ cli=$(get_phase_cli "$phase")
294
+ model=$(get_phase_model "$phase")
295
+ build_cli_cmd "$cli" "$model"
296
+ }
297
+
298
+ check_cli_binary() {
299
+ local cli="$1"
300
+ local install_hint=""
301
+ case "$cli" in
302
+ claude) install_hint="npm install -g @anthropic-ai/claude-code" ;;
303
+ codex) install_hint="npm install -g @openai/codex" ;;
304
+ esac
305
+ if ! command -v "$cli" >/dev/null 2>&1; then
306
+ echo "ERROR: ${cli} CLI not found. Install with: ${install_hint}" >&2
307
+ exit 1
308
+ fi
309
+ }
123
310
 
124
311
  # Automation footer appended to every prompt in automated mode.
125
312
  # Prevents interactive skill prompts from blocking headless sessions.
@@ -141,22 +328,59 @@ This is a fully automated session with no human operator. You MUST:
141
328
  '
142
329
  fi
143
330
 
144
- # Helper: pipe prompt with automation footer to claude
331
+ # Helper: pipe prompt with automation footer to selected CLI command
145
332
  run_claude_prompt() {
146
333
  local prompt_file="$1"
147
334
  local claude_cmd="$2"
148
- { cat "$prompt_file" | envsubst; echo "$AUTOMATION_FOOTER"; } | $claude_cmd
335
+ if [[ "$claude_cmd" == codex* ]]; then
336
+ LAST_RUN_CLI="codex"
337
+ { cat "$prompt_file" | envsubst; echo "$AUTOMATION_FOOTER"; } | (cd "$APP_DIR" && eval "$claude_cmd --json --output-last-message \"$LAST_MESSAGE_FILE\" -")
338
+ else
339
+ LAST_RUN_CLI="claude"
340
+ { cat "$prompt_file" | envsubst; echo "$AUTOMATION_FOOTER"; } | $claude_cmd
341
+ fi
342
+ }
343
+
344
+ # Helper: resume an existing session with a short continuation prompt
345
+ run_claude_resume() {
346
+ local session_id="$1"
347
+ local continuation_prompt="$2"
348
+ local claude_cmd="$3"
349
+ if [[ "$claude_cmd" == codex* ]]; then
350
+ LAST_RUN_CLI="codex"
351
+ local resume_cmd="${claude_cmd/ exec / exec resume }"
352
+ if [ "$resume_cmd" = "$claude_cmd" ]; then
353
+ echo "WARNING: codex resume injection failed, exec segment not found in command" >&2
354
+ return 1
355
+ fi
356
+ # codex exec resume does not accept -C/--cd; resume from APP_DIR instead.
357
+ resume_cmd="${resume_cmd/ -C \"$APP_DIR\"/}"
358
+ resume_cmd="${resume_cmd/ --cd \"$APP_DIR\"/}"
359
+ { echo "$continuation_prompt"; echo "$AUTOMATION_FOOTER"; } | (cd "$APP_DIR" && eval "$resume_cmd \"$session_id\" - --json --output-last-message \"$LAST_MESSAGE_FILE\"")
360
+ else
361
+ LAST_RUN_CLI="claude"
362
+ # Insert --resume "$session_id" before the -p flag
363
+ local resume_cmd="${claude_cmd/ -p / --resume \"$session_id\" -p }"
364
+ if [ "$resume_cmd" = "$claude_cmd" ]; then
365
+ echo "WARNING: --resume injection failed, -p flag not found in command" >&2
366
+ return 1
367
+ fi
368
+ { echo "$continuation_prompt"; echo "$AUTOMATION_FOOTER"; } | $resume_cmd
369
+ fi
149
370
  }
150
371
 
151
372
  # Token tracking
152
373
  TOKENS_FILE="/tmp/ralph-loop-${1}.tokens"
153
374
  CLAUDE_OUTPUT="/tmp/ralph-loop-${1}.output"
375
+ LAST_MESSAGE_FILE="/tmp/ralph-loop-${1}.last-message"
154
376
  STATUS_FILE="/tmp/ralph-loop-${1}.status"
155
377
  FINAL_STATUS_FILE="/tmp/ralph-loop-${1}.final"
156
378
  PHASES_FILE="/tmp/ralph-loop-${1}.phases"
157
379
  BASELINE_FILE="/tmp/ralph-loop-${1}.baseline"
380
+ PRE_RUN_DIRTY_FILE="/tmp/ralph-loop-${1}.dirty"
158
381
  SESSIONS_FILE="/tmp/ralph-loop-${1}.sessions"
159
382
  LOG_FILE="/tmp/ralph-loop-${1}.log"
383
+ LAST_RUN_CLI=""
160
384
 
161
385
  # Initialize token tracking (4-field format: input|output|cache_create|cache_read)
162
386
  init_tokens() {
@@ -165,15 +389,85 @@ init_tokens() {
165
389
  > "$LOG_FILE"
166
390
  }
167
391
 
168
- # Extract session result from JSON output.
392
+ # Extract session result from command output.
169
393
  # Writes human-readable result text to the .log file and captures session_id.
170
- # Usage: extract_session_result <raw_json_file>
394
+ # Usage: extract_session_result <raw_file> [cli]
171
395
  # Sets: LAST_SESSION_ID variable
172
396
  extract_session_result() {
173
397
  local raw_file="$1"
398
+ local cli="${2:-$LAST_RUN_CLI}"
174
399
  LAST_SESSION_ID=""
175
400
  if [ ! -f "$raw_file" ]; then return; fi
176
401
 
402
+ if [ "$cli" = "codex" ]; then
403
+ local result
404
+ result=$(python3 -c "
405
+ import json, sys
406
+ session = ''
407
+ for line in open(sys.argv[1], encoding='utf-8', errors='ignore'):
408
+ line = line.strip()
409
+ if not line:
410
+ continue
411
+ try:
412
+ obj = json.loads(line)
413
+ except Exception:
414
+ continue
415
+ stack = [obj]
416
+ while stack:
417
+ cur = stack.pop()
418
+ if isinstance(cur, dict):
419
+ for key in ('session_id', 'sessionId', 'conversation_id', 'conversationId', 'thread_id', 'threadId', 'response_id', 'responseId', 'run_id', 'runId'):
420
+ val = cur.get(key)
421
+ if isinstance(val, str) and val:
422
+ session = val
423
+ # Newer Codex JSON can nest thread/session identifiers under typed objects.
424
+ node_type = cur.get('type')
425
+ if node_type in ('thread.started', 'session.started'):
426
+ val = cur.get('id')
427
+ if isinstance(val, str) and val:
428
+ session = val
429
+ thread_obj = cur.get('thread')
430
+ if isinstance(thread_obj, dict):
431
+ val = thread_obj.get('id')
432
+ if isinstance(val, str) and val:
433
+ session = val
434
+ for val in cur.values():
435
+ if isinstance(val, (dict, list)):
436
+ stack.append(val)
437
+ elif isinstance(cur, list):
438
+ for val in cur:
439
+ if isinstance(val, (dict, list)):
440
+ stack.append(val)
441
+ print(session)
442
+ " "$raw_file" 2>/dev/null) || true
443
+
444
+ LAST_SESSION_ID="$result"
445
+ if [ -n "$LAST_SESSION_ID" ]; then
446
+ echo "$LAST_SESSION_ID" >> "$SESSIONS_FILE"
447
+ fi
448
+
449
+ if [ -f "$LAST_MESSAGE_FILE" ]; then
450
+ cat "$LAST_MESSAGE_FILE" >> "$LOG_FILE" 2>/dev/null || true
451
+ echo "" >> "$LOG_FILE"
452
+ else
453
+ python3 -c "
454
+ import json, sys
455
+ for line in open(sys.argv[1], encoding='utf-8', errors='ignore'):
456
+ try:
457
+ obj = json.loads(line)
458
+ except Exception:
459
+ continue
460
+ if not isinstance(obj, dict):
461
+ continue
462
+ for key in ('output_text', 'text', 'content'):
463
+ val = obj.get(key)
464
+ if isinstance(val, str) and val.strip():
465
+ print(val.strip())
466
+ " "$raw_file" >> "$LOG_FILE" 2>/dev/null || true
467
+ fi
468
+ return
469
+ fi
470
+
177
471
  local result
178
472
  result=$(python3 -c "
179
473
  import json, sys
@@ -210,29 +504,109 @@ except Exception:
210
504
  " "$raw_file" >> "$LOG_FILE" 2>/dev/null || true
211
505
  }
212
506
 
213
- # Accumulate tokens from a session JSONL file into the .tokens file.
214
- # Usage: accumulate_tokens_from_session <session_id>
507
+ # Accumulate tokens into the .tokens file.
508
+ # Usage: accumulate_tokens_from_session <session_id> [raw_file] [cli]
215
509
  accumulate_tokens_from_session() {
216
510
  local session_id="$1"
217
- if [ -z "$session_id" ]; then return; fi
511
+ local raw_file="${2:-}"
512
+ local cli="${3:-$LAST_RUN_CLI}"
218
513
 
219
- # Find the JSONL file for this session
220
- local jsonl_file=""
221
- for f in ~/.claude/projects/*/"${session_id}.jsonl"; do
222
- if [ -f "$f" ]; then
223
- jsonl_file="$f"
224
- break
514
+ local s_input=0
515
+ local s_output=0
516
+ local s_cache_create=0
517
+ local s_cache_read=0
518
+
519
+ if [ "$cli" = "codex" ]; then
520
+ if [ -z "$raw_file" ] || [ ! -f "$raw_file" ]; then
521
+ return
225
522
  fi
226
- done
227
523
 
228
- if [ -z "$jsonl_file" ]; then
229
- echo "WARNING: Could not find JSONL for session $session_id" >&2
230
- return
231
- fi
524
+ local session_tokens
525
+ session_tokens=$(python3 -c "
526
+ import json, sys
527
+
528
+ def to_int(v):
529
+ try:
530
+ return int(v)
531
+ except Exception:
532
+ return 0
533
+
534
+ def usage_pair(usage):
535
+ input_tokens = (
536
+ to_int(usage.get('input_tokens'))
537
+ or to_int(usage.get('inputTokens'))
538
+ or to_int(usage.get('prompt_tokens'))
539
+ or to_int(usage.get('promptTokens'))
540
+ )
541
+ output_tokens = (
542
+ to_int(usage.get('output_tokens'))
543
+ or to_int(usage.get('outputTokens'))
544
+ or to_int(usage.get('completion_tokens'))
545
+ or to_int(usage.get('completionTokens'))
546
+ )
547
+ return input_tokens, output_tokens
548
+
549
+ # Codex JSONL often contains repeated/cumulative usage in multiple events.
550
+ # Use the highest observed values from a single run to avoid overcounting.
551
+ max_input = 0
552
+ max_output = 0
553
+
554
+ for line in open(sys.argv[1], encoding='utf-8', errors='ignore'):
555
+ line = line.strip()
556
+ if not line:
557
+ continue
558
+ try:
559
+ obj = json.loads(line)
560
+ except Exception:
561
+ continue
562
+ stack = [obj]
563
+ while stack:
564
+ cur = stack.pop()
565
+ if isinstance(cur, dict):
566
+ if 'usage' in cur and isinstance(cur['usage'], dict):
567
+ usage = cur['usage']
568
+ u_in, u_out = usage_pair(usage)
569
+ if u_in > max_input:
570
+ max_input = u_in
571
+ if u_out > max_output:
572
+ max_output = u_out
573
+ for val in cur.values():
574
+ if isinstance(val, (dict, list)):
575
+ stack.append(val)
576
+ elif isinstance(cur, list):
577
+ for val in cur:
578
+ if isinstance(val, (dict, list)):
579
+ stack.append(val)
580
+
581
+ print(f\"{max_input}|{max_output}|0|0\")
582
+ " "$raw_file" 2>/dev/null) || true
583
+
584
+ if [ -n "$session_tokens" ]; then
585
+ s_input=$(echo "$session_tokens" | cut -d'|' -f1)
586
+ s_output=$(echo "$session_tokens" | cut -d'|' -f2)
587
+ s_cache_create=0
588
+ s_cache_read=0
589
+ fi
590
+ else
591
+ if [ -z "$session_id" ]; then return; fi
592
+
593
+ # Find the JSONL file for this session
594
+ local jsonl_file=""
595
+ for f in ~/.claude/projects/*/"${session_id}.jsonl"; do
596
+ if [ -f "$f" ]; then
597
+ jsonl_file="$f"
598
+ break
599
+ fi
600
+ done
232
601
 
233
- # Extract and sum token usage from all assistant messages
234
- local session_tokens
235
- session_tokens=$(python3 -c "
602
+ if [ -z "$jsonl_file" ]; then
603
+ echo "WARNING: Could not find JSONL for session $session_id" >&2
604
+ return
605
+ fi
606
+
607
+ # Extract and sum token usage from all assistant messages
608
+ local session_tokens
609
+ session_tokens=$(python3 -c "
236
610
  import json, sys
237
611
  totals = {'input': 0, 'output': 0, 'cache_create': 0, 'cache_read': 0}
238
612
  for line in open(sys.argv[1]):
@@ -252,14 +626,19 @@ for line in open(sys.argv[1]):
252
626
  print(f\"{totals['input']}|{totals['output']}|{totals['cache_create']}|{totals['cache_read']}\")
253
627
  " "$jsonl_file" 2>/dev/null) || true
254
628
 
255
- if [ -z "$session_tokens" ]; then return; fi
629
+ if [ -z "$session_tokens" ]; then return; fi
630
+
631
+ # Parse session tokens
632
+ s_input=$(echo "$session_tokens" | cut -d'|' -f1)
633
+ s_output=$(echo "$session_tokens" | cut -d'|' -f2)
634
+ s_cache_create=$(echo "$session_tokens" | cut -d'|' -f3)
635
+ s_cache_read=$(echo "$session_tokens" | cut -d'|' -f4)
636
+ fi
256
637
 
257
- # Parse session tokens
258
- local s_input s_output s_cache_create s_cache_read
259
- s_input=$(echo "$session_tokens" | cut -d'|' -f1)
260
- s_output=$(echo "$session_tokens" | cut -d'|' -f2)
261
- s_cache_create=$(echo "$session_tokens" | cut -d'|' -f3)
262
- s_cache_read=$(echo "$session_tokens" | cut -d'|' -f4)
638
+ [[ "$s_input" =~ ^[0-9]+$ ]] || s_input=0
639
+ [[ "$s_output" =~ ^[0-9]+$ ]] || s_output=0
640
+ [[ "$s_cache_create" =~ ^[0-9]+$ ]] || s_cache_create=0
641
+ [[ "$s_cache_read" =~ ^[0-9]+$ ]] || s_cache_read=0
263
642
 
264
643
  # Read current totals
265
644
  local current c_input c_output c_cache_create c_cache_read
@@ -396,7 +775,51 @@ detect_plan_format() {
396
775
  echo "unknown"
397
776
  }
398
777
 
399
- # Check if any tracked files were changed since baseline.
778
+ # Snapshot tracked dirty files before loop execution starts.
779
+ # Each entry stores <path>\t<working-tree blob hash or __MISSING__ marker>.
780
+ capture_pre_run_dirty_snapshot() {
781
+ local baseline="$1"
782
+ > "$PRE_RUN_DIRTY_FILE"
783
+ if [ -z "$baseline" ]; then
784
+ return
785
+ fi
786
+ python3 - "$baseline" "$PRE_RUN_DIRTY_FILE" <<'PY' 2>/dev/null || true
787
+ import os
788
+ import subprocess
789
+ import sys
790
+
791
+ baseline = sys.argv[1]
792
+ output_path = sys.argv[2]
793
+
794
+ try:
795
+ changed = subprocess.check_output(
796
+ ["git", "diff", "--name-only", baseline],
797
+ stderr=subprocess.DEVNULL,
798
+ ).splitlines()
799
+ except Exception:
800
+ changed = []
801
+
802
+ with open(output_path, "w", encoding="utf-8", errors="surrogateescape") as fh:
803
+ for raw_path in changed:
804
+ if not raw_path:
805
+ continue
806
+ path = raw_path.decode("utf-8", errors="surrogateescape")
807
+ marker = "__MISSING__"
808
+ if os.path.exists(path):
809
+ try:
810
+ marker = subprocess.check_output(
811
+ ["git", "hash-object", "--", path],
812
+ stderr=subprocess.DEVNULL,
813
+ text=True,
814
+ ).strip()
815
+ except Exception:
816
+ marker = "__HASH_ERROR__"
817
+ fh.write(f"{path}\t{marker}\n")
818
+ PY
819
+ }
820
+
821
+ # Check if any tracked files were changed since baseline (excluding unchanged
822
+ # tracked edits that already existed before the loop started).
400
823
  # Counts ALL file types (code, docs, config) — not just source code.
401
824
  count_file_changes() {
402
825
  local baseline="$1"
@@ -405,14 +828,90 @@ count_file_changes() {
405
828
  return
406
829
  fi
407
830
  local count
408
- count=$(git diff --name-only "${baseline}..HEAD" 2>/dev/null | wc -l | tr -d ' ') || count=0
831
+ # Compare baseline commit against working tree; subtract pre-run dirty files
832
+ # unless that specific file changed again after loop start.
833
+ count=$(python3 - "$baseline" "$PRE_RUN_DIRTY_FILE" <<'PY' 2>/dev/null || echo "0"
834
+ import os
835
+ import subprocess
836
+ import sys
837
+
838
+ baseline = sys.argv[1]
839
+ snapshot_path = sys.argv[2]
840
+ snapshot = {}
841
+
842
+ if os.path.exists(snapshot_path):
843
+ with open(snapshot_path, "r", encoding="utf-8", errors="surrogateescape") as fh:
844
+ for line in fh:
845
+ line = line.rstrip("\n")
846
+ if not line or "\t" not in line:
847
+ continue
848
+ path, marker = line.split("\t", 1)
849
+ snapshot[path] = marker
850
+
851
+ try:
852
+ changed = subprocess.check_output(
853
+ ["git", "diff", "--name-only", baseline],
854
+ stderr=subprocess.DEVNULL,
855
+ ).splitlines()
856
+ except Exception:
857
+ print("0")
858
+ raise SystemExit(0)
859
+
860
+ count = 0
861
+ for raw_path in changed:
862
+ if not raw_path:
863
+ continue
864
+ path = raw_path.decode("utf-8", errors="surrogateescape")
865
+ start_marker = snapshot.get(path)
866
+ if start_marker is None:
867
+ count += 1
868
+ continue
869
+
870
+ current_marker = "__MISSING__"
871
+ if os.path.exists(path):
872
+ try:
873
+ current_marker = subprocess.check_output(
874
+ ["git", "hash-object", "--", path],
875
+ stderr=subprocess.DEVNULL,
876
+ text=True,
877
+ ).strip()
878
+ except Exception:
879
+ current_marker = "__HASH_ERROR__"
880
+ if current_marker != start_marker:
881
+ count += 1
882
+
883
+ print(count)
884
+ PY
885
+ )
409
886
  echo "$count"
410
887
  }
411
888
 
412
- # Extract review findings text from a Claude JSON output file.
889
+ # Extract review findings text from command output.
413
890
  # Returns the result text from the last result entry.
414
891
  extract_review_findings() {
415
892
  local raw_file="$1"
893
+ local cli="${2:-$LAST_RUN_CLI}"
894
+ if [ "$cli" = "codex" ]; then
895
+ if [ -f "$LAST_MESSAGE_FILE" ]; then
896
+ cat "$LAST_MESSAGE_FILE" 2>/dev/null || echo "No review output available"
897
+ return
898
+ fi
899
+ python3 -c "
900
+ import json, sys
901
+ for line in open(sys.argv[1], encoding='utf-8', errors='ignore'):
902
+ try:
903
+ obj = json.loads(line)
904
+ except Exception:
905
+ continue
906
+ if isinstance(obj, dict):
907
+ for key in ('output_text', 'text', 'content'):
908
+ val = obj.get(key)
909
+ if isinstance(val, str) and val.strip():
910
+ print(val.strip())
911
+ " "$raw_file" 2>/dev/null || echo "No review output available"
912
+ return
913
+ fi
914
+
416
915
  python3 -c "
417
916
  import json, sys
418
917
  try:
@@ -428,11 +927,19 @@ except Exception:
428
927
  }
429
928
 
430
929
  # Run a fix iteration based on code review findings.
431
- # Pipes the review output into Claude for targeted fixes.
930
+ # Pipes the review output into the implementation CLI for targeted fixes.
432
931
  run_review_fix() {
433
932
  local findings
933
+ local impl_cli="$CODING_CLI"
934
+ local impl_cmd="$IMPL_CMD"
434
935
  findings=$(extract_review_findings "${CLAUDE_OUTPUT}.raw")
435
- cat <<FIXEOF | $CLAUDE_CMD_IMPL 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
936
+ if [ "$impl_cli" = "codex" ]; then
937
+ LAST_RUN_CLI="codex"
938
+ impl_cmd="$IMPL_CMD --json --output-last-message \"$LAST_MESSAGE_FILE\""
939
+ else
940
+ LAST_RUN_CLI="claude"
941
+ fi
942
+ cat <<FIXEOF | eval "$impl_cmd" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
436
943
  ## Code Review Findings
437
944
 
438
945
  The following issues were found during code review:
@@ -447,8 +954,8 @@ Fix each issue listed above. Run git diff $DEFAULT_BRANCH to see the current cha
447
954
  3. Commit and push the fixes
448
955
  Do NOT propose completion options or ask interactive questions. Just fix, test, commit, push.
449
956
  FIXEOF
450
- extract_session_result "${CLAUDE_OUTPUT}.raw"
451
- accumulate_tokens_from_session "$LAST_SESSION_ID"
957
+ extract_session_result "${CLAUDE_OUTPUT}.raw" "$impl_cli"
958
+ accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$impl_cli"
452
959
  }
453
960
 
454
961
  # Normalize test failure lines: extract test name, strip timing, deduplicate.
@@ -523,7 +1030,7 @@ write_phase_end() {
523
1030
  # Initialize phase tracking
524
1031
  > "$PHASES_FILE"
525
1032
 
526
- FEATURE="${1:?Usage: ./feature-loop.sh <feature-name> [max-iterations] [max-e2e-attempts] [--worktree] [--resume] [--model MODEL]}"
1033
+ FEATURE="${1:?Usage: ./feature-loop.sh <feature-name> [max-iterations] [max-e2e-attempts] [--worktree] [--resume] [--model MODEL] [--cli CLI] [--review-cli CLI] [--review-mode MODE]}"
527
1034
  # Sanitize feature name to prevent path traversal and shell injection when used in temp file paths
528
1035
  if [[ ! "$FEATURE" =~ ^[a-zA-Z0-9][a-zA-Z0-9_-]*$ ]]; then
529
1036
  echo "ERROR: Feature name must start with alphanumeric and contain only letters, numbers, hyphens, and underscores." >&2
@@ -538,6 +1045,15 @@ SPEC_FILE="$SPEC_DIR/${FEATURE}.md"
538
1045
  PLAN_FILE="$SPEC_DIR/${FEATURE}-implementation-plan.md"
539
1046
  BRANCH="feat/${FEATURE}"
540
1047
  APP_DIR="$(pwd)"
1048
+ PLANNING_CMD=$(get_phase_cmd "planning")
1049
+ IMPL_CMD=$(get_phase_cmd "implementation")
1050
+ REVIEW_CMD=$(get_phase_cmd "review")
1051
+
1052
+ # Fail fast if required CLIs are not installed.
1053
+ check_cli_binary "$CODING_CLI"
1054
+ if [ "$REVIEW_CLI" != "$CODING_CLI" ]; then
1055
+ check_cli_binary "$REVIEW_CLI"
1056
+ fi
541
1057
 
542
1058
  echo "=========================================="
543
1059
  echo "Ralph Loop: $FEATURE"
@@ -547,9 +1063,26 @@ echo "Branch: $BRANCH"
547
1063
  echo "App dir: $APP_DIR"
548
1064
  echo "Worktree mode: $USE_WORKTREE"
549
1065
  echo "Resume mode: $RESUME"
1066
+ echo "Coding CLI (impl/e2e): $CODING_CLI"
1067
+ echo "Review CLI: $REVIEW_CLI"
550
1068
  echo "Review mode: $REVIEW_MODE"
551
- echo "Model (planning): $PLANNING_MODEL"
552
- echo "Model (impl): ${MODEL:-$DEFAULT_MODEL}"
1069
+ echo "Claude permission mode: $CLAUDE_PERMISSION_MODE"
1070
+ echo "Codex sandbox: $CODEX_SANDBOX"
1071
+ echo "Codex approval policy: $CODEX_APPROVAL_POLICY"
1072
+ if [ "${RALPH_AUTOMATED:-}" = "1" ]; then
1073
+ echo "Disable MCP in automated runs: $DISABLE_MCP_IN_AUTOMATED_NORM"
1074
+ fi
1075
+ if [ "$CODING_CLI" = "codex" ] && [ "$REVIEW_CLI" = "codex" ]; then
1076
+ echo "Model (all phases): $(resolve_codex_model)"
1077
+ else
1078
+ if [ "$CODING_CLI" = "claude" ] || [ "$REVIEW_CLI" = "claude" ]; then
1079
+ echo "Model (Claude planning/review): $PLANNING_MODEL"
1080
+ echo "Model (Claude impl/e2e): ${MODEL:-$DEFAULT_MODEL}"
1081
+ fi
1082
+ if [ "$CODING_CLI" = "codex" ] || [ "$REVIEW_CLI" = "codex" ]; then
1083
+ echo "Model (Codex phases): $(resolve_codex_model)"
1084
+ fi
1085
+ fi
553
1086
  echo "Max iterations: $MAX_ITERATIONS"
554
1087
  echo "Max E2E attempts: $MAX_E2E_ATTEMPTS"
555
1088
  echo "=========================================="
@@ -588,7 +1121,7 @@ fi
588
1121
  # tasks to be checked — the checkboxes may be stale if the work shipped under a
589
1122
  # different branch name that never updated this plan file.
590
1123
  if [ -f "$PLAN_FILE" ]; then
591
- _DIFF_STAT=$(git diff "$DEFAULT_BRANCH..HEAD" --stat 2>/dev/null || echo "")
1124
+ _DIFF_STAT=$(git diff "${DEFAULT_BRANCH}..HEAD" --stat 2>/dev/null || echo "")
592
1125
  if [ -z "$_DIFF_STAT" ]; then
593
1126
  echo "Plan exists but branch has no diff to $DEFAULT_BRANCH — work already merged."
594
1127
  > "$PHASES_FILE"
@@ -612,6 +1145,7 @@ if git rev-parse --git-dir > /dev/null 2>&1; then
612
1145
  if [ -n "$BASELINE_COMMIT" ]; then
613
1146
  echo "$BASELINE_COMMIT" > "$BASELINE_FILE"
614
1147
  echo "Baseline commit: $BASELINE_COMMIT"
1148
+ capture_pre_run_dirty_snapshot "$BASELINE_COMMIT"
615
1149
  fi
616
1150
  fi
617
1151
 
@@ -634,14 +1168,14 @@ echo "0|$MAX_ITERATIONS|$(date +%s)" > "$STATUS_FILE"
634
1168
  if [ ! -f "$PLAN_FILE" ]; then
635
1169
  echo "======================== PLANNING PHASE ========================"
636
1170
  write_phase_start "planning"
637
- export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR
638
- run_claude_prompt "$PROMPTS_DIR/PROMPT_feature.md" "$CLAUDE_CMD_OPUS" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || {
1171
+ export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR CODING_CLI REVIEW_CLI REVIEW_MODE
1172
+ run_claude_prompt "$PROMPTS_DIR/PROMPT_feature.md" "$PLANNING_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || {
639
1173
  echo "ERROR: Planning phase failed"
640
1174
  write_phase_end "planning" "failed"
641
1175
  exit 1
642
1176
  }
643
- extract_session_result "${CLAUDE_OUTPUT}.raw"
644
- accumulate_tokens_from_session "$LAST_SESSION_ID"
1177
+ extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
1178
+ accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
645
1179
  write_phase_end "planning" "success"
646
1180
  else
647
1181
  echo "Plan file exists, skipping planning phase"
@@ -693,10 +1227,30 @@ while true; do
693
1227
  TASKS_BEFORE=$(count_pending_tasks "$PLAN_FILE")
694
1228
  echo "Legacy plan format — relying on source-file gate for completion."
695
1229
  fi
696
- export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR
697
- run_claude_prompt "$PROMPTS_DIR/PROMPT.md" "$CLAUDE_CMD_IMPL" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
698
- extract_session_result "${CLAUDE_OUTPUT}.raw"
699
- accumulate_tokens_from_session "$LAST_SESSION_ID"
1230
+ export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR CODING_CLI REVIEW_CLI REVIEW_MODE
1231
+ # Continuation prompt for implementation loop iterations 2+
1232
+ CONTINUATION_PROMPT="Continue implementing the remaining tasks in the implementation plan at $SPEC_DIR/${FEATURE}-implementation-plan.md.
1233
+ Check off completed tasks as you go. Skip any E2E testing tasks.
1234
+ Run validation (lint, typecheck, test) after completing tasks."
1235
+ if [ $ITERATION -eq 1 ] || [ -z "$LAST_SESSION_ID" ]; then
1236
+ echo "Mode: fresh"
1237
+ run_claude_prompt "$PROMPTS_DIR/PROMPT.md" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
1238
+ else
1239
+ echo "Mode: resume (session: $LAST_SESSION_ID)"
1240
+ RESUME_EXIT=0
1241
+ run_claude_resume "$LAST_SESSION_ID" "$CONTINUATION_PROMPT" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || RESUME_EXIT=$?
1242
+ extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
1243
+ if [ $RESUME_EXIT -ne 0 ] || [ -z "$LAST_SESSION_ID" ]; then
1244
+ if [ $RESUME_EXIT -ne 0 ]; then
1245
+ echo "Resume failed (resume_exit_nonzero: exit=$RESUME_EXIT). Fallback: using fresh prompt"
1246
+ else
1247
+ echo "Resume failed (resume_no_session_id). Fallback: using fresh prompt"
1248
+ fi
1249
+ run_claude_prompt "$PROMPTS_DIR/PROMPT.md" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
1250
+ fi
1251
+ fi
1252
+ extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
1253
+ accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
700
1254
 
701
1255
  # Check if any progress was made
702
1256
  TASKS_AFTER=$(count_pending_tasks "$PLAN_FILE")
@@ -752,6 +1306,20 @@ if [ "$IMPL_SUCCESS" = true ]; then
752
1306
  fi
753
1307
  fi
754
1308
 
1309
+ # Stop early when implementation failed to avoid wasting E2E/review cycles.
1310
+ if [ "$IMPL_SUCCESS" != true ]; then
1311
+ echo "Implementation phase failed. Skipping remaining phases."
1312
+ write_phase_start "e2e_testing"
1313
+ write_phase_end "e2e_testing" "skipped"
1314
+ write_phase_start "verification"
1315
+ write_phase_end "verification" "skipped"
1316
+ write_phase_start "pr_review"
1317
+ write_phase_end "pr_review" "skipped"
1318
+ echo "$ITERATION|$MAX_ITERATIONS|$(date +%s)|failed" > "$FINAL_STATUS_FILE"
1319
+ rm -f "$STATUS_FILE" 2>/dev/null || true
1320
+ exit 1
1321
+ fi
1322
+
755
1323
  # Phase 5: E2E Testing
756
1324
  echo "======================== E2E TESTING PHASE ========================"
757
1325
  E2E_TOTAL=$({ grep "^- \[.\].*E2E:" "$PLAN_FILE" 2>/dev/null || true; } | wc -l | tr -d ' ')
@@ -763,14 +1331,32 @@ else
763
1331
  write_phase_start "e2e_testing"
764
1332
  E2E_SUCCESS=false
765
1333
  E2E_ATTEMPT=0
1334
+ E2E_SESSION_ID=""
1335
+ E2E_CONTINUATION_PROMPT="Continue remaining E2E scenarios. Check the implementation plan for unchecked \`- [ ] E2E:\` entries and implement/run those tests. Run validation after completing each scenario."
766
1336
  while [ $E2E_ATTEMPT -lt $MAX_E2E_ATTEMPTS ]; do
767
1337
  E2E_ATTEMPT=$((E2E_ATTEMPT + 1))
1338
+ echo "$E2E_ATTEMPT|$MAX_E2E_ATTEMPTS|$(date +%s)" > "$STATUS_FILE"
768
1339
  echo "------------------------ E2E Attempt $E2E_ATTEMPT of $MAX_E2E_ATTEMPTS ------------------------"
769
1340
 
770
- export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR
771
- run_claude_prompt "$PROMPTS_DIR/PROMPT_e2e.md" "$CLAUDE_CMD_IMPL" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
772
- extract_session_result "${CLAUDE_OUTPUT}.raw"
773
- accumulate_tokens_from_session "$LAST_SESSION_ID"
1341
+ export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR CODING_CLI REVIEW_CLI REVIEW_MODE
1342
+ if [ $E2E_ATTEMPT -eq 1 ] || [ -z "$E2E_SESSION_ID" ]; then
1343
+ echo "E2E attempt $E2E_ATTEMPT: using full prompt"
1344
+ run_claude_prompt "$PROMPTS_DIR/PROMPT_e2e.md" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
1345
+ else
1346
+ echo "E2E attempt $E2E_ATTEMPT: using resume session $E2E_SESSION_ID"
1347
+ E2E_RESUME_EXIT=0
1348
+ run_claude_resume "$E2E_SESSION_ID" "$E2E_CONTINUATION_PROMPT" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || E2E_RESUME_EXIT=$?
1349
+ extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
1350
+ if [ $E2E_RESUME_EXIT -ne 0 ] || [ -z "$LAST_SESSION_ID" ]; then
1351
+ echo "E2E attempt $E2E_ATTEMPT: resume unavailable, using full prompt"
1352
+ run_claude_prompt "$PROMPTS_DIR/PROMPT_e2e.md" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
1353
+ fi
1354
+ fi
1355
+ extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
1356
+ if [ -n "$LAST_SESSION_ID" ]; then
1357
+ E2E_SESSION_ID="$LAST_SESSION_ID"
1358
+ fi
1359
+ accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
774
1360
 
775
1361
  # Check if all E2E tests passed
776
1362
  E2E_FAILED=$({ grep "^- \[ \].*E2E:.*FAILED" "$PLAN_FILE" 2>/dev/null || true; } | wc -l | tr -d ' ')
@@ -784,9 +1370,24 @@ else
784
1370
 
785
1371
  if [ $E2E_ATTEMPT -lt $MAX_E2E_ATTEMPTS ]; then
786
1372
  echo "E2E tests have failures. Running fix iteration..."
787
- run_claude_prompt "$PROMPTS_DIR/PROMPT.md" "$CLAUDE_CMD_IMPL" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
788
- extract_session_result "${CLAUDE_OUTPUT}.raw"
789
- accumulate_tokens_from_session "$LAST_SESSION_ID"
1373
+ if [ -n "$E2E_SESSION_ID" ]; then
1374
+ echo "E2E fix: using resume session $E2E_SESSION_ID"
1375
+ E2E_FIX_EXIT=0
1376
+ run_claude_resume "$E2E_SESSION_ID" "Fix the failing E2E tests identified above. Run validation after fixing." "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || E2E_FIX_EXIT=$?
1377
+ extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
1378
+ if [ $E2E_FIX_EXIT -ne 0 ] || [ -z "$LAST_SESSION_ID" ]; then
1379
+ echo "E2E fix: resume unavailable, using full prompt"
1380
+ run_claude_prompt "$PROMPTS_DIR/PROMPT_e2e_fix.md" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
1381
+ fi
1382
+ else
1383
+ echo "E2E fix: resume unavailable, using full prompt"
1384
+ run_claude_prompt "$PROMPTS_DIR/PROMPT_e2e_fix.md" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
1385
+ fi
1386
+ extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
1387
+ if [ -n "$LAST_SESSION_ID" ]; then
1388
+ E2E_SESSION_ID="$LAST_SESSION_ID"
1389
+ fi
1390
+ accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
790
1391
  fi
791
1392
  done
792
1393
 
@@ -797,21 +1398,16 @@ else
797
1398
  fi
798
1399
  fi
799
1400
 
800
- # Phase 6: Spec Verification
801
- echo "======================== SPEC VERIFICATION PHASE ========================"
1401
+ # Phase 6: Spec Verification (merged into review phase)
1402
+ # Verification responsibilities (spec status, acceptance criteria, README updates)
1403
+ # are now handled in Step 0 of the review prompt templates.
1404
+ # This no-op marker preserves backward compatibility for TUI phase tracking.
802
1405
  write_phase_start "verification"
803
- export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR
804
- VERIFY_STATUS="success"
805
- if ! run_claude_prompt "$PROMPTS_DIR/PROMPT_verify.md" "$CLAUDE_CMD_OPUS" 2>&1 | tee "${CLAUDE_OUTPUT}.raw"; then
806
- VERIFY_STATUS="failed"
807
- fi
808
- extract_session_result "${CLAUDE_OUTPUT}.raw"
809
- accumulate_tokens_from_session "$LAST_SESSION_ID"
810
- write_phase_end "verification" "$VERIFY_STATUS"
1406
+ write_phase_end "verification" "skipped"
811
1407
 
812
1408
  # Guard B: Skip PR phase if branch has no diff to default branch
813
1409
  # Safety net for cases where implementation ran but produced no net diff.
814
- _PR_DIFF_STAT=$(git diff "$DEFAULT_BRANCH..HEAD" --stat 2>/dev/null || echo "")
1410
+ _PR_DIFF_STAT=$(git diff "${DEFAULT_BRANCH}..HEAD" --stat 2>/dev/null || echo "")
815
1411
  if [ -z "$_PR_DIFF_STAT" ]; then
816
1412
  echo "No diff between $BRANCH and $DEFAULT_BRANCH — skipping PR phase."
817
1413
  write_phase_start "pr_review"
@@ -824,12 +1420,14 @@ if [ -z "$_PR_DIFF_STAT" ]; then
824
1420
  exit 0
825
1421
  fi
826
1422
 
827
- # Phase 7: PR and Review
1423
+ # Phase 7: PR and Review (includes spec verification via Step 0 in review prompts)
828
1424
  echo "======================== PR & REVIEW PHASE ========================"
829
1425
  write_phase_start "pr_review"
830
- export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR
1426
+ export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR CODING_CLI REVIEW_CLI REVIEW_MODE
831
1427
  PR_STATUS="success"
832
1428
  MAX_REVIEW_ATTEMPTS=3
1429
+ REVIEW_SESSION_ID=""
1430
+ REVIEW_CONTINUATION_PROMPT="The issues from the previous review have been fixed. Re-run the code review, checking only for remaining issues. Report your verdict."
833
1431
 
834
1432
  # Short-circuit: skip review if no files exist in diff
835
1433
  _REVIEW_FILE_CHANGES=$(count_file_changes "$BASELINE_COMMIT")
@@ -860,14 +1458,7 @@ check_review_approved() {
860
1458
  fi
861
1459
  fi
862
1460
 
863
- # Secondary: check if PR was already merged (Claude may merge before verdict is captured)
864
- local pr_state
865
- pr_state=$(gh pr view "$BRANCH" --json state --jq '.state' 2>/dev/null || echo "")
866
- if [ "$pr_state" = "MERGED" ]; then
867
- return 0
868
- fi
869
-
870
- # Tertiary: check the latest PR comment for approval signal
1461
+ # Secondary: check the latest PR comment for approval signal
871
1462
  local latest_comment
872
1463
  latest_comment=$(gh pr view "$BRANCH" --json comments --jq '.comments[-1].body' 2>/dev/null || echo "")
873
1464
  if echo "$latest_comment" | grep -qi "VERDICT:.*APPROVED" 2>/dev/null; then
@@ -879,12 +1470,60 @@ check_review_approved() {
879
1470
  return 1
880
1471
  }
881
1472
 
1473
+ # Wait for CI checks to finish and pass.
1474
+ # Returns 0 when checks pass (or no checks exist), 1 on failure.
1475
+ wait_for_ci_checks() {
1476
+ local pr_ref="$1"
1477
+ echo "Waiting for CI checks on $pr_ref..."
1478
+
1479
+ local checks_output=""
1480
+ checks_output=$(gh pr checks "$pr_ref" --watch --interval 10 2>&1)
1481
+ local checks_exit=$?
1482
+
1483
+ echo "$checks_output"
1484
+
1485
+ if [ $checks_exit -eq 0 ]; then
1486
+ echo "CI checks passed."
1487
+ return 0
1488
+ fi
1489
+
1490
+ # Some repos have no checks configured for certain PRs.
1491
+ if echo "$checks_output" | grep -qiE "no checks|no status checks"; then
1492
+ echo "No CI checks found for $pr_ref. Continuing."
1493
+ return 0
1494
+ fi
1495
+
1496
+ echo "ERROR: CI checks failed or did not complete successfully." >&2
1497
+ return 1
1498
+ }
1499
+
1500
+ # Merge PR after all gates are green.
1501
+ # Returns 0 on success, 1 on failure.
1502
+ merge_pr_after_ci_gate() {
1503
+ local pr_ref="$1"
1504
+ local pr_state
1505
+ pr_state=$(gh pr view "$pr_ref" --json state --jq '.state' 2>/dev/null || echo "")
1506
+
1507
+ if [ "$pr_state" = "MERGED" ]; then
1508
+ echo "PR already merged."
1509
+ return 0
1510
+ fi
1511
+
1512
+ echo "Merging PR after CI gate..."
1513
+ if gh pr merge "$pr_ref" --squash --delete-branch; then
1514
+ return 0
1515
+ fi
1516
+
1517
+ echo "ERROR: Failed to merge PR $pr_ref after CI gate." >&2
1518
+ return 1
1519
+ }
1520
+
882
1521
  if [ "$REVIEW_MODE" = "manual" ]; then
883
- if ! run_claude_prompt "$PROMPTS_DIR/PROMPT_review_manual.md" "$CLAUDE_CMD_OPUS" 2>&1 | tee "${CLAUDE_OUTPUT}.raw"; then
1522
+ if ! run_claude_prompt "$PROMPTS_DIR/PROMPT_review_manual.md" "$REVIEW_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw"; then
884
1523
  PR_STATUS="failed"
885
1524
  fi
886
- extract_session_result "${CLAUDE_OUTPUT}.raw"
887
- accumulate_tokens_from_session "$LAST_SESSION_ID"
1525
+ extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
1526
+ accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
888
1527
 
889
1528
  elif [ "$REVIEW_MODE" = "merge" ]; then
890
1529
  # Merge mode: create PR, iterate review+fixes until approved, then merge
@@ -893,22 +1532,53 @@ elif [ "$REVIEW_MODE" = "merge" ]; then
893
1532
  while [ $REVIEW_ATTEMPT -lt $MAX_REVIEW_ATTEMPTS ]; do
894
1533
  REVIEW_ATTEMPT=$((REVIEW_ATTEMPT + 1))
895
1534
  echo "--- Review attempt $REVIEW_ATTEMPT of $MAX_REVIEW_ATTEMPTS ---"
896
- run_claude_prompt "$PROMPTS_DIR/PROMPT_review_merge.md" "$CLAUDE_CMD_OPUS" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
897
- extract_session_result "${CLAUDE_OUTPUT}.raw"
898
- accumulate_tokens_from_session "$LAST_SESSION_ID"
1535
+ if [ $REVIEW_ATTEMPT -eq 1 ] || [ -z "$REVIEW_SESSION_ID" ]; then
1536
+ echo "Review attempt $REVIEW_ATTEMPT: using full prompt"
1537
+ run_claude_prompt "$PROMPTS_DIR/PROMPT_review_merge.md" "$REVIEW_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
1538
+ else
1539
+ echo "Review attempt $REVIEW_ATTEMPT: using resume session $REVIEW_SESSION_ID"
1540
+ REVIEW_RESUME_EXIT=0
1541
+ run_claude_resume "$REVIEW_SESSION_ID" "$REVIEW_CONTINUATION_PROMPT" "$REVIEW_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || REVIEW_RESUME_EXIT=$?
1542
+ extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
1543
+ if [ $REVIEW_RESUME_EXIT -ne 0 ] || [ -z "$LAST_SESSION_ID" ]; then
1544
+ echo "Review attempt $REVIEW_ATTEMPT: resume unavailable, using full prompt"
1545
+ run_claude_prompt "$PROMPTS_DIR/PROMPT_review_merge.md" "$REVIEW_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
1546
+ fi
1547
+ fi
1548
+ extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
1549
+ if [ $REVIEW_ATTEMPT -eq 1 ] && [ -n "$LAST_SESSION_ID" ]; then
1550
+ REVIEW_SESSION_ID="$LAST_SESSION_ID"
1551
+ fi
1552
+ accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
899
1553
 
900
1554
  # Check stdout and PR comment for approval
901
1555
  if check_review_approved "${CLAUDE_OUTPUT}.raw"; then
902
1556
  echo "Review approved! Running post-approval test gate..."
903
1557
  if check_tests_pass_or_baseline; then
904
1558
  echo "Post-approval test gate passed."
1559
+ if ! wait_for_ci_checks "$BRANCH"; then
1560
+ PR_STATUS="failed"
1561
+ break
1562
+ fi
1563
+ if ! merge_pr_after_ci_gate "$BRANCH"; then
1564
+ PR_STATUS="failed"
1565
+ break
1566
+ fi
905
1567
  REVIEW_APPROVED=true
906
1568
  break
907
1569
  else
908
1570
  echo "WARNING: Tests failing after review approval. Running fix iteration..."
909
1571
  run_review_fix
910
1572
  if check_tests_pass_or_baseline; then
911
- echo "Tests pass after fix. Proceeding with merge."
1573
+ echo "Tests pass after fix. Running CI and merge gates."
1574
+ if ! wait_for_ci_checks "$BRANCH"; then
1575
+ PR_STATUS="failed"
1576
+ break
1577
+ fi
1578
+ if ! merge_pr_after_ci_gate "$BRANCH"; then
1579
+ PR_STATUS="failed"
1580
+ break
1581
+ fi
912
1582
  REVIEW_APPROVED=true
913
1583
  break
914
1584
  else
@@ -936,9 +1606,24 @@ else
936
1606
  while [ $REVIEW_ATTEMPT -lt $MAX_REVIEW_ATTEMPTS ]; do
937
1607
  REVIEW_ATTEMPT=$((REVIEW_ATTEMPT + 1))
938
1608
  echo "--- Review attempt $REVIEW_ATTEMPT of $MAX_REVIEW_ATTEMPTS ---"
939
- run_claude_prompt "$PROMPTS_DIR/PROMPT_review_auto.md" "$CLAUDE_CMD_OPUS" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
940
- extract_session_result "${CLAUDE_OUTPUT}.raw"
941
- accumulate_tokens_from_session "$LAST_SESSION_ID"
1609
+ if [ $REVIEW_ATTEMPT -eq 1 ] || [ -z "$REVIEW_SESSION_ID" ]; then
1610
+ echo "Review attempt $REVIEW_ATTEMPT: using full prompt"
1611
+ run_claude_prompt "$PROMPTS_DIR/PROMPT_review_auto.md" "$REVIEW_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
1612
+ else
1613
+ echo "Review attempt $REVIEW_ATTEMPT: using resume session $REVIEW_SESSION_ID"
1614
+ REVIEW_RESUME_EXIT=0
1615
+ run_claude_resume "$REVIEW_SESSION_ID" "$REVIEW_CONTINUATION_PROMPT" "$REVIEW_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || REVIEW_RESUME_EXIT=$?
1616
+ extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
1617
+ if [ $REVIEW_RESUME_EXIT -ne 0 ] || [ -z "$LAST_SESSION_ID" ]; then
1618
+ echo "Review attempt $REVIEW_ATTEMPT: resume unavailable, using full prompt"
1619
+ run_claude_prompt "$PROMPTS_DIR/PROMPT_review_auto.md" "$REVIEW_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
1620
+ fi
1621
+ fi
1622
+ extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
1623
+ if [ $REVIEW_ATTEMPT -eq 1 ] && [ -n "$LAST_SESSION_ID" ]; then
1624
+ REVIEW_SESSION_ID="$LAST_SESSION_ID"
1625
+ fi
1626
+ accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
942
1627
 
943
1628
  # Check stdout and PR comment for approval
944
1629
  if check_review_approved "${CLAUDE_OUTPUT}.raw"; then
@@ -1016,6 +1701,8 @@ fi
1016
1701
  rm -f "$STATUS_FILE" 2>/dev/null || true
1017
1702
  rm -f "/tmp/ralph-loop-${FEATURE}.output" 2>/dev/null || true
1018
1703
  rm -f "/tmp/ralph-loop-${FEATURE}.output.raw" 2>/dev/null || true
1704
+ rm -f "/tmp/ralph-loop-${FEATURE}.last-message" 2>/dev/null || true
1705
+ rm -f "$PRE_RUN_DIRTY_FILE" 2>/dev/null || true
1019
1706
 
1020
1707
  # Print final token usage
1021
1708
  if [ -f "/tmp/ralph-loop-${FEATURE}.tokens" ]; then