wiggum-cli 0.17.2 → 0.17.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -2
- package/dist/agent/orchestrator.d.ts +1 -1
- package/dist/agent/orchestrator.js +19 -4
- package/dist/agent/tools/backlog.js +8 -4
- package/dist/agent/tools/execution.js +1 -1
- package/dist/agent/tools/introspection.js +26 -4
- package/dist/commands/config.js +96 -2
- package/dist/commands/run.d.ts +2 -0
- package/dist/commands/run.js +47 -2
- package/dist/generator/config.js +13 -2
- package/dist/index.js +7 -1
- package/dist/repl/command-parser.d.ts +1 -1
- package/dist/repl/command-parser.js +1 -1
- package/dist/templates/config/ralph.config.cjs.tmpl +9 -2
- package/dist/templates/prompts/PROMPT_e2e.md.tmpl +16 -89
- package/dist/templates/prompts/PROMPT_e2e_fix.md.tmpl +55 -0
- package/dist/templates/prompts/PROMPT_feature.md.tmpl +12 -98
- package/dist/templates/prompts/PROMPT_review_auto.md.tmpl +52 -49
- package/dist/templates/prompts/PROMPT_review_manual.md.tmpl +30 -2
- package/dist/templates/prompts/PROMPT_review_merge.md.tmpl +59 -69
- package/dist/templates/prompts/PROMPT_verify.md.tmpl +7 -0
- package/dist/templates/root/README.md.tmpl +2 -3
- package/dist/templates/scripts/feature-loop.sh.tmpl +777 -90
- package/dist/templates/scripts/loop.sh.tmpl +5 -1
- package/dist/templates/scripts/ralph-monitor.sh.tmpl +0 -2
- package/dist/tui/app.d.ts +5 -1
- package/dist/tui/app.js +12 -2
- package/dist/tui/hooks/useAgentOrchestrator.js +16 -7
- package/dist/tui/hooks/useInit.d.ts +5 -1
- package/dist/tui/hooks/useInit.js +20 -2
- package/dist/tui/screens/InitScreen.js +12 -1
- package/dist/tui/screens/MainShell.js +70 -6
- package/dist/tui/screens/RunScreen.d.ts +6 -2
- package/dist/tui/screens/RunScreen.js +48 -6
- package/dist/tui/utils/loop-status.d.ts +15 -0
- package/dist/tui/utils/loop-status.js +89 -27
- package/dist/utils/config.d.ts +7 -0
- package/dist/utils/config.js +14 -0
- package/package.json +1 -1
- package/src/templates/config/ralph.config.cjs.tmpl +9 -2
- package/src/templates/prompts/PROMPT_e2e.md.tmpl +16 -89
- package/src/templates/prompts/PROMPT_e2e_fix.md.tmpl +55 -0
- package/src/templates/prompts/PROMPT_feature.md.tmpl +12 -98
- package/src/templates/prompts/PROMPT_review_auto.md.tmpl +52 -49
- package/src/templates/prompts/PROMPT_review_manual.md.tmpl +30 -2
- package/src/templates/prompts/PROMPT_review_merge.md.tmpl +59 -69
- package/src/templates/prompts/PROMPT_verify.md.tmpl +7 -0
- package/src/templates/root/README.md.tmpl +2 -3
- package/src/templates/scripts/feature-loop.sh.tmpl +777 -90
- package/src/templates/scripts/loop.sh.tmpl +5 -1
- package/src/templates/scripts/ralph-monitor.sh.tmpl +0 -2
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
2
|
# feature-loop.sh - Full feature workflow: branch -> implement -> E2E test -> PR -> review -> merge
|
|
3
3
|
# Generated by ralph-cli for {{projectName}}
|
|
4
|
-
# Usage: ./feature-loop.sh <feature-name> [max-iterations] [max-e2e-attempts] [--worktree] [--resume] [--model MODEL] [--review-mode MODE]
|
|
4
|
+
# Usage: ./feature-loop.sh <feature-name> [max-iterations] [max-e2e-attempts] [--worktree] [--resume] [--model MODEL] [--cli CLI] [--review-cli CLI] [--review-mode MODE]
|
|
5
5
|
#
|
|
6
6
|
# Options:
|
|
7
7
|
# --worktree Use git worktree for isolation (enables parallel execution)
|
|
8
8
|
# --resume Resume an interrupted loop (reuses existing branch/worktree)
|
|
9
|
-
# --model MODEL
|
|
9
|
+
# --model MODEL Model to use for coding/review CLI
|
|
10
|
+
# --cli CLI Implementation CLI: 'claude' | 'codex'
|
|
11
|
+
# --review-cli CLI Review CLI: 'claude' | 'codex'
|
|
10
12
|
# --review-mode MODE Review mode: 'manual' (stop at PR), 'auto' (review, no merge), or 'merge' (review + merge). Default: 'manual'
|
|
11
13
|
|
|
12
14
|
set -e
|
|
@@ -23,6 +25,13 @@ if [ -f "$SCRIPT_DIR/../ralph.config.cjs" ]; then
|
|
|
23
25
|
PROMPTS_DIR=$(node -e "console.log(require('$CONFIG_PATH').paths?.prompts || '.ralph/prompts')" 2>/dev/null || echo ".ralph/prompts")
|
|
24
26
|
DEFAULT_MODEL=$(node -e "console.log(require('$CONFIG_PATH').loop?.defaultModel || 'sonnet')" 2>/dev/null || echo "sonnet")
|
|
25
27
|
PLANNING_MODEL=$(node -e "console.log(require('$CONFIG_PATH').loop?.planningModel || 'opus')" 2>/dev/null || echo "opus")
|
|
28
|
+
DEFAULT_CODEX_MODEL=$(node -e "console.log(require('$CONFIG_PATH').loop?.codexModel || 'gpt-5.3-codex')" 2>/dev/null || echo "gpt-5.3-codex")
|
|
29
|
+
DEFAULT_CODING_CLI=$(node -e "console.log(require('$CONFIG_PATH').loop?.codingCli || 'claude')" 2>/dev/null || echo "claude")
|
|
30
|
+
DEFAULT_REVIEW_CLI=$(node -e "console.log(require('$CONFIG_PATH').loop?.reviewCli || require('$CONFIG_PATH').loop?.codingCli || 'claude')" 2>/dev/null || echo "claude")
|
|
31
|
+
CLAUDE_PERMISSION_MODE=$(node -e "console.log(require('$CONFIG_PATH').loop?.claudePermissionMode || 'default')" 2>/dev/null || echo "default")
|
|
32
|
+
CODEX_SANDBOX=$(node -e "console.log(require('$CONFIG_PATH').loop?.codexSandbox || 'workspace-write')" 2>/dev/null || echo "workspace-write")
|
|
33
|
+
CODEX_APPROVAL_POLICY=$(node -e "console.log(require('$CONFIG_PATH').loop?.codexApprovalPolicy || 'never')" 2>/dev/null || echo "never")
|
|
34
|
+
DISABLE_MCP_IN_AUTOMATED=$(node -e "const v=require('$CONFIG_PATH').loop?.disableMcpInAutomatedRuns; console.log(v === undefined ? 'true' : String(v))" 2>/dev/null || echo "true")
|
|
26
35
|
DEFAULT_MAX_ITERATIONS=$(node -e "console.log(require('$CONFIG_PATH').loop?.maxIterations || 10)" 2>/dev/null || echo "10")
|
|
27
36
|
DEFAULT_MAX_E2E=$(node -e "console.log(require('$CONFIG_PATH').loop?.maxE2eAttempts || 5)" 2>/dev/null || echo "5")
|
|
28
37
|
TEST_COMMAND=$(node -e "console.log(require('$CONFIG_PATH').commands?.test || 'npm test')" 2>/dev/null || echo "npm test")
|
|
@@ -34,6 +43,13 @@ elif [ -f "$SCRIPT_DIR/../../ralph.config.cjs" ]; then
|
|
|
34
43
|
PROMPTS_DIR=$(node -e "console.log(require('$CONFIG_PATH').paths?.prompts || '.ralph/prompts')" 2>/dev/null || echo ".ralph/prompts")
|
|
35
44
|
DEFAULT_MODEL=$(node -e "console.log(require('$CONFIG_PATH').loop?.defaultModel || 'sonnet')" 2>/dev/null || echo "sonnet")
|
|
36
45
|
PLANNING_MODEL=$(node -e "console.log(require('$CONFIG_PATH').loop?.planningModel || 'opus')" 2>/dev/null || echo "opus")
|
|
46
|
+
DEFAULT_CODEX_MODEL=$(node -e "console.log(require('$CONFIG_PATH').loop?.codexModel || 'gpt-5.3-codex')" 2>/dev/null || echo "gpt-5.3-codex")
|
|
47
|
+
DEFAULT_CODING_CLI=$(node -e "console.log(require('$CONFIG_PATH').loop?.codingCli || 'claude')" 2>/dev/null || echo "claude")
|
|
48
|
+
DEFAULT_REVIEW_CLI=$(node -e "console.log(require('$CONFIG_PATH').loop?.reviewCli || require('$CONFIG_PATH').loop?.codingCli || 'claude')" 2>/dev/null || echo "claude")
|
|
49
|
+
CLAUDE_PERMISSION_MODE=$(node -e "console.log(require('$CONFIG_PATH').loop?.claudePermissionMode || 'default')" 2>/dev/null || echo "default")
|
|
50
|
+
CODEX_SANDBOX=$(node -e "console.log(require('$CONFIG_PATH').loop?.codexSandbox || 'workspace-write')" 2>/dev/null || echo "workspace-write")
|
|
51
|
+
CODEX_APPROVAL_POLICY=$(node -e "console.log(require('$CONFIG_PATH').loop?.codexApprovalPolicy || 'never')" 2>/dev/null || echo "never")
|
|
52
|
+
DISABLE_MCP_IN_AUTOMATED=$(node -e "const v=require('$CONFIG_PATH').loop?.disableMcpInAutomatedRuns; console.log(v === undefined ? 'true' : String(v))" 2>/dev/null || echo "true")
|
|
37
53
|
DEFAULT_MAX_ITERATIONS=$(node -e "console.log(require('$CONFIG_PATH').loop?.maxIterations || 10)" 2>/dev/null || echo "10")
|
|
38
54
|
DEFAULT_MAX_E2E=$(node -e "console.log(require('$CONFIG_PATH').loop?.maxE2eAttempts || 5)" 2>/dev/null || echo "5")
|
|
39
55
|
TEST_COMMAND=$(node -e "console.log(require('$CONFIG_PATH').commands?.test || 'npm test')" 2>/dev/null || echo "npm test")
|
|
@@ -45,6 +61,13 @@ else
|
|
|
45
61
|
PROMPTS_DIR=".ralph/prompts"
|
|
46
62
|
DEFAULT_MODEL="sonnet"
|
|
47
63
|
PLANNING_MODEL="opus"
|
|
64
|
+
DEFAULT_CODEX_MODEL="gpt-5.3-codex"
|
|
65
|
+
DEFAULT_CODING_CLI="claude"
|
|
66
|
+
DEFAULT_REVIEW_CLI="claude"
|
|
67
|
+
CLAUDE_PERMISSION_MODE="default"
|
|
68
|
+
CODEX_SANDBOX="workspace-write"
|
|
69
|
+
CODEX_APPROVAL_POLICY="never"
|
|
70
|
+
DISABLE_MCP_IN_AUTOMATED="true"
|
|
48
71
|
DEFAULT_MAX_ITERATIONS="10"
|
|
49
72
|
DEFAULT_MAX_E2E="5"
|
|
50
73
|
TEST_COMMAND="npm test"
|
|
@@ -59,6 +82,8 @@ USE_WORKTREE=false
|
|
|
59
82
|
RESUME=false
|
|
60
83
|
MODEL=""
|
|
61
84
|
REVIEW_MODE=""
|
|
85
|
+
CLI_OVERRIDE=""
|
|
86
|
+
REVIEW_CLI_OVERRIDE=""
|
|
62
87
|
POSITIONAL=()
|
|
63
88
|
while [[ $# -gt 0 ]]; do
|
|
64
89
|
case $1 in
|
|
@@ -74,6 +99,14 @@ while [[ $# -gt 0 ]]; do
|
|
|
74
99
|
MODEL="$2"
|
|
75
100
|
shift 2
|
|
76
101
|
;;
|
|
102
|
+
--cli)
|
|
103
|
+
CLI_OVERRIDE="$2"
|
|
104
|
+
shift 2
|
|
105
|
+
;;
|
|
106
|
+
--review-cli)
|
|
107
|
+
REVIEW_CLI_OVERRIDE="$2"
|
|
108
|
+
shift 2
|
|
109
|
+
;;
|
|
77
110
|
--review-mode)
|
|
78
111
|
REVIEW_MODE="$2"
|
|
79
112
|
shift 2
|
|
@@ -117,9 +150,163 @@ if [ "$REVIEW_MODE" != "manual" ] && [ "$REVIEW_MODE" != "auto" ] && [ "$REVIEW_
|
|
|
117
150
|
exit 1
|
|
118
151
|
fi
|
|
119
152
|
|
|
120
|
-
#
|
|
121
|
-
|
|
122
|
-
|
|
153
|
+
# Resolve coding/review CLI from CLI > config > default
|
|
154
|
+
CODING_CLI="${CLI_OVERRIDE:-$DEFAULT_CODING_CLI}"
|
|
155
|
+
REVIEW_CLI="${REVIEW_CLI_OVERRIDE:-${DEFAULT_REVIEW_CLI:-$CODING_CLI}}"
|
|
156
|
+
DISABLE_MCP_IN_AUTOMATED_NORM=$(echo "$DISABLE_MCP_IN_AUTOMATED" | tr '[:upper:]' '[:lower:]')
|
|
157
|
+
|
|
158
|
+
# Validate CLI values
|
|
159
|
+
if [ "$CODING_CLI" != "claude" ] && [ "$CODING_CLI" != "codex" ]; then
|
|
160
|
+
echo "ERROR: Invalid --cli value '$CODING_CLI'. Allowed values are 'claude' or 'codex'." >&2
|
|
161
|
+
exit 1
|
|
162
|
+
fi
|
|
163
|
+
|
|
164
|
+
if [ "$REVIEW_CLI" != "claude" ] && [ "$REVIEW_CLI" != "codex" ]; then
|
|
165
|
+
echo "ERROR: Invalid --review-cli value '$REVIEW_CLI'. Allowed values are 'claude' or 'codex'." >&2
|
|
166
|
+
exit 1
|
|
167
|
+
fi
|
|
168
|
+
|
|
169
|
+
is_valid_claude_permission_mode() {
|
|
170
|
+
case "$1" in
|
|
171
|
+
acceptEdits|bypassPermissions|default|dontAsk|plan|auto) return 0 ;;
|
|
172
|
+
*) return 1 ;;
|
|
173
|
+
esac
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
is_valid_codex_sandbox() {
|
|
177
|
+
case "$1" in
|
|
178
|
+
read-only|workspace-write|danger-full-access) return 0 ;;
|
|
179
|
+
*) return 1 ;;
|
|
180
|
+
esac
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
is_valid_codex_approval_policy() {
|
|
184
|
+
case "$1" in
|
|
185
|
+
untrusted|on-failure|on-request|never) return 0 ;;
|
|
186
|
+
*) return 1 ;;
|
|
187
|
+
esac
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
if ! is_valid_claude_permission_mode "$CLAUDE_PERMISSION_MODE"; then
|
|
191
|
+
echo "ERROR: Invalid loop.claudePermissionMode '$CLAUDE_PERMISSION_MODE' in ralph.config.cjs." >&2
|
|
192
|
+
exit 1
|
|
193
|
+
fi
|
|
194
|
+
|
|
195
|
+
if ! is_valid_codex_sandbox "$CODEX_SANDBOX"; then
|
|
196
|
+
echo "ERROR: Invalid loop.codexSandbox '$CODEX_SANDBOX' in ralph.config.cjs." >&2
|
|
197
|
+
exit 1
|
|
198
|
+
fi
|
|
199
|
+
|
|
200
|
+
if ! is_valid_codex_approval_policy "$CODEX_APPROVAL_POLICY"; then
|
|
201
|
+
echo "ERROR: Invalid loop.codexApprovalPolicy '$CODEX_APPROVAL_POLICY' in ralph.config.cjs." >&2
|
|
202
|
+
exit 1
|
|
203
|
+
fi
|
|
204
|
+
|
|
205
|
+
case "$DISABLE_MCP_IN_AUTOMATED_NORM" in
|
|
206
|
+
true|false) ;;
|
|
207
|
+
*)
|
|
208
|
+
echo "ERROR: Invalid loop.disableMcpInAutomatedRuns '$DISABLE_MCP_IN_AUTOMATED' in ralph.config.cjs. Use true or false." >&2
|
|
209
|
+
exit 1
|
|
210
|
+
;;
|
|
211
|
+
esac
|
|
212
|
+
|
|
213
|
+
is_claude_only_model() {
|
|
214
|
+
local candidate="$1"
|
|
215
|
+
case "$candidate" in
|
|
216
|
+
sonnet|opus|haiku|claude-*) return 0 ;;
|
|
217
|
+
*) return 1 ;;
|
|
218
|
+
esac
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
resolve_codex_model() {
|
|
222
|
+
local candidate="${MODEL:-$DEFAULT_CODEX_MODEL}"
|
|
223
|
+
if is_claude_only_model "$candidate"; then
|
|
224
|
+
echo "$DEFAULT_CODEX_MODEL"
|
|
225
|
+
else
|
|
226
|
+
echo "$candidate"
|
|
227
|
+
fi
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
if [ -n "$MODEL" ] && { [ "$CODING_CLI" = "codex" ] || [ "$REVIEW_CLI" = "codex" ]; }; then
|
|
231
|
+
if is_claude_only_model "$MODEL"; then
|
|
232
|
+
echo "WARNING: --model '$MODEL' is Claude-specific. Codex phases will use '$DEFAULT_CODEX_MODEL'." >&2
|
|
233
|
+
fi
|
|
234
|
+
fi
|
|
235
|
+
|
|
236
|
+
build_cli_cmd() {
|
|
237
|
+
local cli="$1"
|
|
238
|
+
local model="$2"
|
|
239
|
+
case "$cli" in
|
|
240
|
+
claude)
|
|
241
|
+
echo "claude -p --output-format json --permission-mode ${CLAUDE_PERMISSION_MODE} --model ${model}"
|
|
242
|
+
;;
|
|
243
|
+
codex)
|
|
244
|
+
local codex_extra=""
|
|
245
|
+
# Avoid MCP startup deadlocks in unattended loop runs.
|
|
246
|
+
if [ "${RALPH_AUTOMATED:-}" = "1" ] && [ "$DISABLE_MCP_IN_AUTOMATED_NORM" = "true" ]; then
|
|
247
|
+
codex_extra=" -c 'mcp_servers={}'"
|
|
248
|
+
fi
|
|
249
|
+
echo "codex --ask-for-approval \"$CODEX_APPROVAL_POLICY\" --sandbox \"$CODEX_SANDBOX\" exec -C \"$APP_DIR\" --model \"${model}\"${codex_extra}"
|
|
250
|
+
;;
|
|
251
|
+
*)
|
|
252
|
+
echo "ERROR: Unsupported CLI '$cli'" >&2
|
|
253
|
+
return 1
|
|
254
|
+
;;
|
|
255
|
+
esac
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
get_phase_cli() {
|
|
259
|
+
local phase="$1"
|
|
260
|
+
case "$phase" in
|
|
261
|
+
review)
|
|
262
|
+
echo "$REVIEW_CLI"
|
|
263
|
+
;;
|
|
264
|
+
*)
|
|
265
|
+
echo "$CODING_CLI"
|
|
266
|
+
;;
|
|
267
|
+
esac
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
get_phase_model() {
|
|
271
|
+
local phase="$1"
|
|
272
|
+
local cli
|
|
273
|
+
cli=$(get_phase_cli "$phase")
|
|
274
|
+
if [ "$cli" = "codex" ]; then
|
|
275
|
+
resolve_codex_model
|
|
276
|
+
return
|
|
277
|
+
fi
|
|
278
|
+
|
|
279
|
+
case "$phase" in
|
|
280
|
+
planning|review)
|
|
281
|
+
echo "$PLANNING_MODEL"
|
|
282
|
+
;;
|
|
283
|
+
*)
|
|
284
|
+
echo "${MODEL:-$DEFAULT_MODEL}"
|
|
285
|
+
;;
|
|
286
|
+
esac
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
get_phase_cmd() {
|
|
290
|
+
local phase="$1"
|
|
291
|
+
local cli
|
|
292
|
+
local model
|
|
293
|
+
cli=$(get_phase_cli "$phase")
|
|
294
|
+
model=$(get_phase_model "$phase")
|
|
295
|
+
build_cli_cmd "$cli" "$model"
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
check_cli_binary() {
|
|
299
|
+
local cli="$1"
|
|
300
|
+
local install_hint=""
|
|
301
|
+
case "$cli" in
|
|
302
|
+
claude) install_hint="npm install -g @anthropic-ai/claude-code" ;;
|
|
303
|
+
codex) install_hint="npm install -g @openai/codex" ;;
|
|
304
|
+
esac
|
|
305
|
+
if ! command -v "$cli" >/dev/null 2>&1; then
|
|
306
|
+
echo "ERROR: ${cli} CLI not found. Install with: ${install_hint}" >&2
|
|
307
|
+
exit 1
|
|
308
|
+
fi
|
|
309
|
+
}
|
|
123
310
|
|
|
124
311
|
# Automation footer appended to every prompt in automated mode.
|
|
125
312
|
# Prevents interactive skill prompts from blocking headless sessions.
|
|
@@ -141,22 +328,59 @@ This is a fully automated session with no human operator. You MUST:
|
|
|
141
328
|
'
|
|
142
329
|
fi
|
|
143
330
|
|
|
144
|
-
# Helper: pipe prompt with automation footer to
|
|
331
|
+
# Helper: pipe prompt with automation footer to selected CLI command
|
|
145
332
|
run_claude_prompt() {
|
|
146
333
|
local prompt_file="$1"
|
|
147
334
|
local claude_cmd="$2"
|
|
148
|
-
|
|
335
|
+
if [[ "$claude_cmd" == codex* ]]; then
|
|
336
|
+
LAST_RUN_CLI="codex"
|
|
337
|
+
{ cat "$prompt_file" | envsubst; echo "$AUTOMATION_FOOTER"; } | (cd "$APP_DIR" && eval "$claude_cmd --json --output-last-message \"$LAST_MESSAGE_FILE\" -")
|
|
338
|
+
else
|
|
339
|
+
LAST_RUN_CLI="claude"
|
|
340
|
+
{ cat "$prompt_file" | envsubst; echo "$AUTOMATION_FOOTER"; } | $claude_cmd
|
|
341
|
+
fi
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
# Helper: resume an existing session with a short continuation prompt
|
|
345
|
+
run_claude_resume() {
|
|
346
|
+
local session_id="$1"
|
|
347
|
+
local continuation_prompt="$2"
|
|
348
|
+
local claude_cmd="$3"
|
|
349
|
+
if [[ "$claude_cmd" == codex* ]]; then
|
|
350
|
+
LAST_RUN_CLI="codex"
|
|
351
|
+
local resume_cmd="${claude_cmd/ exec / exec resume }"
|
|
352
|
+
if [ "$resume_cmd" = "$claude_cmd" ]; then
|
|
353
|
+
echo "WARNING: codex resume injection failed, exec segment not found in command" >&2
|
|
354
|
+
return 1
|
|
355
|
+
fi
|
|
356
|
+
# codex exec resume does not accept -C/--cd; resume from APP_DIR instead.
|
|
357
|
+
resume_cmd="${resume_cmd/ -C \"$APP_DIR\"/}"
|
|
358
|
+
resume_cmd="${resume_cmd/ --cd \"$APP_DIR\"/}"
|
|
359
|
+
{ echo "$continuation_prompt"; echo "$AUTOMATION_FOOTER"; } | (cd "$APP_DIR" && eval "$resume_cmd \"$session_id\" - --json --output-last-message \"$LAST_MESSAGE_FILE\"")
|
|
360
|
+
else
|
|
361
|
+
LAST_RUN_CLI="claude"
|
|
362
|
+
# Insert --resume "$session_id" before the -p flag
|
|
363
|
+
local resume_cmd="${claude_cmd/ -p / --resume \"$session_id\" -p }"
|
|
364
|
+
if [ "$resume_cmd" = "$claude_cmd" ]; then
|
|
365
|
+
echo "WARNING: --resume injection failed, -p flag not found in command" >&2
|
|
366
|
+
return 1
|
|
367
|
+
fi
|
|
368
|
+
{ echo "$continuation_prompt"; echo "$AUTOMATION_FOOTER"; } | $resume_cmd
|
|
369
|
+
fi
|
|
149
370
|
}
|
|
150
371
|
|
|
151
372
|
# Token tracking
|
|
152
373
|
TOKENS_FILE="/tmp/ralph-loop-${1}.tokens"
|
|
153
374
|
CLAUDE_OUTPUT="/tmp/ralph-loop-${1}.output"
|
|
375
|
+
LAST_MESSAGE_FILE="/tmp/ralph-loop-${1}.last-message"
|
|
154
376
|
STATUS_FILE="/tmp/ralph-loop-${1}.status"
|
|
155
377
|
FINAL_STATUS_FILE="/tmp/ralph-loop-${1}.final"
|
|
156
378
|
PHASES_FILE="/tmp/ralph-loop-${1}.phases"
|
|
157
379
|
BASELINE_FILE="/tmp/ralph-loop-${1}.baseline"
|
|
380
|
+
PRE_RUN_DIRTY_FILE="/tmp/ralph-loop-${1}.dirty"
|
|
158
381
|
SESSIONS_FILE="/tmp/ralph-loop-${1}.sessions"
|
|
159
382
|
LOG_FILE="/tmp/ralph-loop-${1}.log"
|
|
383
|
+
LAST_RUN_CLI=""
|
|
160
384
|
|
|
161
385
|
# Initialize token tracking (4-field format: input|output|cache_create|cache_read)
|
|
162
386
|
init_tokens() {
|
|
@@ -165,15 +389,85 @@ init_tokens() {
|
|
|
165
389
|
> "$LOG_FILE"
|
|
166
390
|
}
|
|
167
391
|
|
|
168
|
-
# Extract session result from
|
|
392
|
+
# Extract session result from command output.
|
|
169
393
|
# Writes human-readable result text to the .log file and captures session_id.
|
|
170
|
-
# Usage: extract_session_result <
|
|
394
|
+
# Usage: extract_session_result <raw_file> [cli]
|
|
171
395
|
# Sets: LAST_SESSION_ID variable
|
|
172
396
|
extract_session_result() {
|
|
173
397
|
local raw_file="$1"
|
|
398
|
+
local cli="${2:-$LAST_RUN_CLI}"
|
|
174
399
|
LAST_SESSION_ID=""
|
|
175
400
|
if [ ! -f "$raw_file" ]; then return; fi
|
|
176
401
|
|
|
402
|
+
if [ "$cli" = "codex" ]; then
|
|
403
|
+
local result
|
|
404
|
+
result=$(python3 -c "
|
|
405
|
+
import json, sys
|
|
406
|
+
session = ''
|
|
407
|
+
for line in open(sys.argv[1], encoding='utf-8', errors='ignore'):
|
|
408
|
+
line = line.strip()
|
|
409
|
+
if not line:
|
|
410
|
+
continue
|
|
411
|
+
try:
|
|
412
|
+
obj = json.loads(line)
|
|
413
|
+
except Exception:
|
|
414
|
+
continue
|
|
415
|
+
stack = [obj]
|
|
416
|
+
while stack:
|
|
417
|
+
cur = stack.pop()
|
|
418
|
+
if isinstance(cur, dict):
|
|
419
|
+
for key in ('session_id', 'sessionId', 'conversation_id', 'conversationId', 'thread_id', 'threadId', 'response_id', 'responseId', 'run_id', 'runId'):
|
|
420
|
+
val = cur.get(key)
|
|
421
|
+
if isinstance(val, str) and val:
|
|
422
|
+
session = val
|
|
423
|
+
# Newer Codex JSON can nest thread/session identifiers under typed objects.
|
|
424
|
+
node_type = cur.get('type')
|
|
425
|
+
if node_type in ('thread.started', 'session.started'):
|
|
426
|
+
val = cur.get('id')
|
|
427
|
+
if isinstance(val, str) and val:
|
|
428
|
+
session = val
|
|
429
|
+
thread_obj = cur.get('thread')
|
|
430
|
+
if isinstance(thread_obj, dict):
|
|
431
|
+
val = thread_obj.get('id')
|
|
432
|
+
if isinstance(val, str) and val:
|
|
433
|
+
session = val
|
|
434
|
+
for val in cur.values():
|
|
435
|
+
if isinstance(val, (dict, list)):
|
|
436
|
+
stack.append(val)
|
|
437
|
+
elif isinstance(cur, list):
|
|
438
|
+
for val in cur:
|
|
439
|
+
if isinstance(val, (dict, list)):
|
|
440
|
+
stack.append(val)
|
|
441
|
+
print(session)
|
|
442
|
+
" "$raw_file" 2>/dev/null) || true
|
|
443
|
+
|
|
444
|
+
LAST_SESSION_ID="$result"
|
|
445
|
+
if [ -n "$LAST_SESSION_ID" ]; then
|
|
446
|
+
echo "$LAST_SESSION_ID" >> "$SESSIONS_FILE"
|
|
447
|
+
fi
|
|
448
|
+
|
|
449
|
+
if [ -f "$LAST_MESSAGE_FILE" ]; then
|
|
450
|
+
cat "$LAST_MESSAGE_FILE" >> "$LOG_FILE" 2>/dev/null || true
|
|
451
|
+
echo "" >> "$LOG_FILE"
|
|
452
|
+
else
|
|
453
|
+
python3 -c "
|
|
454
|
+
import json, sys
|
|
455
|
+
for line in open(sys.argv[1], encoding='utf-8', errors='ignore'):
|
|
456
|
+
try:
|
|
457
|
+
obj = json.loads(line)
|
|
458
|
+
except Exception:
|
|
459
|
+
continue
|
|
460
|
+
if not isinstance(obj, dict):
|
|
461
|
+
continue
|
|
462
|
+
for key in ('output_text', 'text', 'content'):
|
|
463
|
+
val = obj.get(key)
|
|
464
|
+
if isinstance(val, str) and val.strip():
|
|
465
|
+
print(val.strip())
|
|
466
|
+
" "$raw_file" >> "$LOG_FILE" 2>/dev/null || true
|
|
467
|
+
fi
|
|
468
|
+
return
|
|
469
|
+
fi
|
|
470
|
+
|
|
177
471
|
local result
|
|
178
472
|
result=$(python3 -c "
|
|
179
473
|
import json, sys
|
|
@@ -210,29 +504,109 @@ except Exception:
|
|
|
210
504
|
" "$raw_file" >> "$LOG_FILE" 2>/dev/null || true
|
|
211
505
|
}
|
|
212
506
|
|
|
213
|
-
# Accumulate tokens
|
|
214
|
-
# Usage: accumulate_tokens_from_session <session_id>
|
|
507
|
+
# Accumulate tokens into the .tokens file.
|
|
508
|
+
# Usage: accumulate_tokens_from_session <session_id> [raw_file] [cli]
|
|
215
509
|
accumulate_tokens_from_session() {
|
|
216
510
|
local session_id="$1"
|
|
217
|
-
|
|
511
|
+
local raw_file="${2:-}"
|
|
512
|
+
local cli="${3:-$LAST_RUN_CLI}"
|
|
218
513
|
|
|
219
|
-
|
|
220
|
-
local
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
514
|
+
local s_input=0
|
|
515
|
+
local s_output=0
|
|
516
|
+
local s_cache_create=0
|
|
517
|
+
local s_cache_read=0
|
|
518
|
+
|
|
519
|
+
if [ "$cli" = "codex" ]; then
|
|
520
|
+
if [ -z "$raw_file" ] || [ ! -f "$raw_file" ]; then
|
|
521
|
+
return
|
|
225
522
|
fi
|
|
226
|
-
done
|
|
227
523
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
524
|
+
local session_tokens
|
|
525
|
+
session_tokens=$(python3 -c "
|
|
526
|
+
import json, sys
|
|
527
|
+
|
|
528
|
+
def to_int(v):
|
|
529
|
+
try:
|
|
530
|
+
return int(v)
|
|
531
|
+
except Exception:
|
|
532
|
+
return 0
|
|
533
|
+
|
|
534
|
+
def usage_pair(usage):
|
|
535
|
+
input_tokens = (
|
|
536
|
+
to_int(usage.get('input_tokens'))
|
|
537
|
+
or to_int(usage.get('inputTokens'))
|
|
538
|
+
or to_int(usage.get('prompt_tokens'))
|
|
539
|
+
or to_int(usage.get('promptTokens'))
|
|
540
|
+
)
|
|
541
|
+
output_tokens = (
|
|
542
|
+
to_int(usage.get('output_tokens'))
|
|
543
|
+
or to_int(usage.get('outputTokens'))
|
|
544
|
+
or to_int(usage.get('completion_tokens'))
|
|
545
|
+
or to_int(usage.get('completionTokens'))
|
|
546
|
+
)
|
|
547
|
+
return input_tokens, output_tokens
|
|
548
|
+
|
|
549
|
+
# Codex JSONL often contains repeated/cumulative usage in multiple events.
|
|
550
|
+
# Use the highest observed values from a single run to avoid overcounting.
|
|
551
|
+
max_input = 0
|
|
552
|
+
max_output = 0
|
|
553
|
+
|
|
554
|
+
for line in open(sys.argv[1], encoding='utf-8', errors='ignore'):
|
|
555
|
+
line = line.strip()
|
|
556
|
+
if not line:
|
|
557
|
+
continue
|
|
558
|
+
try:
|
|
559
|
+
obj = json.loads(line)
|
|
560
|
+
except Exception:
|
|
561
|
+
continue
|
|
562
|
+
stack = [obj]
|
|
563
|
+
while stack:
|
|
564
|
+
cur = stack.pop()
|
|
565
|
+
if isinstance(cur, dict):
|
|
566
|
+
if 'usage' in cur and isinstance(cur['usage'], dict):
|
|
567
|
+
usage = cur['usage']
|
|
568
|
+
u_in, u_out = usage_pair(usage)
|
|
569
|
+
if u_in > max_input:
|
|
570
|
+
max_input = u_in
|
|
571
|
+
if u_out > max_output:
|
|
572
|
+
max_output = u_out
|
|
573
|
+
for val in cur.values():
|
|
574
|
+
if isinstance(val, (dict, list)):
|
|
575
|
+
stack.append(val)
|
|
576
|
+
elif isinstance(cur, list):
|
|
577
|
+
for val in cur:
|
|
578
|
+
if isinstance(val, (dict, list)):
|
|
579
|
+
stack.append(val)
|
|
580
|
+
|
|
581
|
+
print(f\"{max_input}|{max_output}|0|0\")
|
|
582
|
+
" "$raw_file" 2>/dev/null) || true
|
|
583
|
+
|
|
584
|
+
if [ -n "$session_tokens" ]; then
|
|
585
|
+
s_input=$(echo "$session_tokens" | cut -d'|' -f1)
|
|
586
|
+
s_output=$(echo "$session_tokens" | cut -d'|' -f2)
|
|
587
|
+
s_cache_create=0
|
|
588
|
+
s_cache_read=0
|
|
589
|
+
fi
|
|
590
|
+
else
|
|
591
|
+
if [ -z "$session_id" ]; then return; fi
|
|
592
|
+
|
|
593
|
+
# Find the JSONL file for this session
|
|
594
|
+
local jsonl_file=""
|
|
595
|
+
for f in ~/.claude/projects/*/"${session_id}.jsonl"; do
|
|
596
|
+
if [ -f "$f" ]; then
|
|
597
|
+
jsonl_file="$f"
|
|
598
|
+
break
|
|
599
|
+
fi
|
|
600
|
+
done
|
|
232
601
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
602
|
+
if [ -z "$jsonl_file" ]; then
|
|
603
|
+
echo "WARNING: Could not find JSONL for session $session_id" >&2
|
|
604
|
+
return
|
|
605
|
+
fi
|
|
606
|
+
|
|
607
|
+
# Extract and sum token usage from all assistant messages
|
|
608
|
+
local session_tokens
|
|
609
|
+
session_tokens=$(python3 -c "
|
|
236
610
|
import json, sys
|
|
237
611
|
totals = {'input': 0, 'output': 0, 'cache_create': 0, 'cache_read': 0}
|
|
238
612
|
for line in open(sys.argv[1]):
|
|
@@ -252,14 +626,19 @@ for line in open(sys.argv[1]):
|
|
|
252
626
|
print(f\"{totals['input']}|{totals['output']}|{totals['cache_create']}|{totals['cache_read']}\")
|
|
253
627
|
" "$jsonl_file" 2>/dev/null) || true
|
|
254
628
|
|
|
255
|
-
|
|
629
|
+
if [ -z "$session_tokens" ]; then return; fi
|
|
630
|
+
|
|
631
|
+
# Parse session tokens
|
|
632
|
+
s_input=$(echo "$session_tokens" | cut -d'|' -f1)
|
|
633
|
+
s_output=$(echo "$session_tokens" | cut -d'|' -f2)
|
|
634
|
+
s_cache_create=$(echo "$session_tokens" | cut -d'|' -f3)
|
|
635
|
+
s_cache_read=$(echo "$session_tokens" | cut -d'|' -f4)
|
|
636
|
+
fi
|
|
256
637
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
s_cache_create=$(echo "$session_tokens" | cut -d'|' -f3)
|
|
262
|
-
s_cache_read=$(echo "$session_tokens" | cut -d'|' -f4)
|
|
638
|
+
[[ "$s_input" =~ ^[0-9]+$ ]] || s_input=0
|
|
639
|
+
[[ "$s_output" =~ ^[0-9]+$ ]] || s_output=0
|
|
640
|
+
[[ "$s_cache_create" =~ ^[0-9]+$ ]] || s_cache_create=0
|
|
641
|
+
[[ "$s_cache_read" =~ ^[0-9]+$ ]] || s_cache_read=0
|
|
263
642
|
|
|
264
643
|
# Read current totals
|
|
265
644
|
local current c_input c_output c_cache_create c_cache_read
|
|
@@ -396,7 +775,51 @@ detect_plan_format() {
|
|
|
396
775
|
echo "unknown"
|
|
397
776
|
}
|
|
398
777
|
|
|
399
|
-
#
|
|
778
|
+
# Snapshot tracked dirty files before loop execution starts.
|
|
779
|
+
# Each entry stores <path>\t<working-tree blob hash or __MISSING__ marker>.
|
|
780
|
+
capture_pre_run_dirty_snapshot() {
|
|
781
|
+
local baseline="$1"
|
|
782
|
+
> "$PRE_RUN_DIRTY_FILE"
|
|
783
|
+
if [ -z "$baseline" ]; then
|
|
784
|
+
return
|
|
785
|
+
fi
|
|
786
|
+
python3 - "$baseline" "$PRE_RUN_DIRTY_FILE" <<'PY' 2>/dev/null || true
|
|
787
|
+
import os
|
|
788
|
+
import subprocess
|
|
789
|
+
import sys
|
|
790
|
+
|
|
791
|
+
baseline = sys.argv[1]
|
|
792
|
+
output_path = sys.argv[2]
|
|
793
|
+
|
|
794
|
+
try:
|
|
795
|
+
changed = subprocess.check_output(
|
|
796
|
+
["git", "diff", "--name-only", baseline],
|
|
797
|
+
stderr=subprocess.DEVNULL,
|
|
798
|
+
).splitlines()
|
|
799
|
+
except Exception:
|
|
800
|
+
changed = []
|
|
801
|
+
|
|
802
|
+
with open(output_path, "w", encoding="utf-8", errors="surrogateescape") as fh:
|
|
803
|
+
for raw_path in changed:
|
|
804
|
+
if not raw_path:
|
|
805
|
+
continue
|
|
806
|
+
path = raw_path.decode("utf-8", errors="surrogateescape")
|
|
807
|
+
marker = "__MISSING__"
|
|
808
|
+
if os.path.exists(path):
|
|
809
|
+
try:
|
|
810
|
+
marker = subprocess.check_output(
|
|
811
|
+
["git", "hash-object", "--", path],
|
|
812
|
+
stderr=subprocess.DEVNULL,
|
|
813
|
+
text=True,
|
|
814
|
+
).strip()
|
|
815
|
+
except Exception:
|
|
816
|
+
marker = "__HASH_ERROR__"
|
|
817
|
+
fh.write(f"{path}\t{marker}\n")
|
|
818
|
+
PY
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
# Check if any tracked files were changed since baseline (excluding unchanged
|
|
822
|
+
# tracked edits that already existed before the loop started).
|
|
400
823
|
# Counts ALL file types (code, docs, config) — not just source code.
|
|
401
824
|
count_file_changes() {
|
|
402
825
|
local baseline="$1"
|
|
@@ -405,14 +828,90 @@ count_file_changes() {
|
|
|
405
828
|
return
|
|
406
829
|
fi
|
|
407
830
|
local count
|
|
408
|
-
|
|
831
|
+
# Compare baseline commit against working tree; subtract pre-run dirty files
|
|
832
|
+
# unless that specific file changed again after loop start.
|
|
833
|
+
count=$(python3 - "$baseline" "$PRE_RUN_DIRTY_FILE" <<'PY' 2>/dev/null || echo "0"
|
|
834
|
+
import os
|
|
835
|
+
import subprocess
|
|
836
|
+
import sys
|
|
837
|
+
|
|
838
|
+
baseline = sys.argv[1]
|
|
839
|
+
snapshot_path = sys.argv[2]
|
|
840
|
+
snapshot = {}
|
|
841
|
+
|
|
842
|
+
if os.path.exists(snapshot_path):
|
|
843
|
+
with open(snapshot_path, "r", encoding="utf-8", errors="surrogateescape") as fh:
|
|
844
|
+
for line in fh:
|
|
845
|
+
line = line.rstrip("\n")
|
|
846
|
+
if not line or "\t" not in line:
|
|
847
|
+
continue
|
|
848
|
+
path, marker = line.split("\t", 1)
|
|
849
|
+
snapshot[path] = marker
|
|
850
|
+
|
|
851
|
+
try:
|
|
852
|
+
changed = subprocess.check_output(
|
|
853
|
+
["git", "diff", "--name-only", baseline],
|
|
854
|
+
stderr=subprocess.DEVNULL,
|
|
855
|
+
).splitlines()
|
|
856
|
+
except Exception:
|
|
857
|
+
print("0")
|
|
858
|
+
raise SystemExit(0)
|
|
859
|
+
|
|
860
|
+
count = 0
|
|
861
|
+
for raw_path in changed:
|
|
862
|
+
if not raw_path:
|
|
863
|
+
continue
|
|
864
|
+
path = raw_path.decode("utf-8", errors="surrogateescape")
|
|
865
|
+
start_marker = snapshot.get(path)
|
|
866
|
+
if start_marker is None:
|
|
867
|
+
count += 1
|
|
868
|
+
continue
|
|
869
|
+
|
|
870
|
+
current_marker = "__MISSING__"
|
|
871
|
+
if os.path.exists(path):
|
|
872
|
+
try:
|
|
873
|
+
current_marker = subprocess.check_output(
|
|
874
|
+
["git", "hash-object", "--", path],
|
|
875
|
+
stderr=subprocess.DEVNULL,
|
|
876
|
+
text=True,
|
|
877
|
+
).strip()
|
|
878
|
+
except Exception:
|
|
879
|
+
current_marker = "__HASH_ERROR__"
|
|
880
|
+
if current_marker != start_marker:
|
|
881
|
+
count += 1
|
|
882
|
+
|
|
883
|
+
print(count)
|
|
884
|
+
PY
|
|
885
|
+
)
|
|
409
886
|
echo "$count"
|
|
410
887
|
}
|
|
411
888
|
|
|
412
|
-
# Extract review findings text from
|
|
889
|
+
# Extract review findings text from command output.
|
|
413
890
|
# Returns the result text from the last result entry.
|
|
414
891
|
extract_review_findings() {
|
|
415
892
|
local raw_file="$1"
|
|
893
|
+
local cli="${2:-$LAST_RUN_CLI}"
|
|
894
|
+
if [ "$cli" = "codex" ]; then
|
|
895
|
+
if [ -f "$LAST_MESSAGE_FILE" ]; then
|
|
896
|
+
cat "$LAST_MESSAGE_FILE" 2>/dev/null || echo "No review output available"
|
|
897
|
+
return
|
|
898
|
+
fi
|
|
899
|
+
python3 -c "
|
|
900
|
+
import json, sys
|
|
901
|
+
for line in open(sys.argv[1], encoding='utf-8', errors='ignore'):
|
|
902
|
+
try:
|
|
903
|
+
obj = json.loads(line)
|
|
904
|
+
except Exception:
|
|
905
|
+
continue
|
|
906
|
+
if isinstance(obj, dict):
|
|
907
|
+
for key in ('output_text', 'text', 'content'):
|
|
908
|
+
val = obj.get(key)
|
|
909
|
+
if isinstance(val, str) and val.strip():
|
|
910
|
+
print(val.strip())
|
|
911
|
+
" "$raw_file" 2>/dev/null || echo "No review output available"
|
|
912
|
+
return
|
|
913
|
+
fi
|
|
914
|
+
|
|
416
915
|
python3 -c "
|
|
417
916
|
import json, sys
|
|
418
917
|
try:
|
|
@@ -428,11 +927,19 @@ except Exception:
|
|
|
428
927
|
}
|
|
429
928
|
|
|
430
929
|
# Run a fix iteration based on code review findings.
|
|
431
|
-
# Pipes the review output into
|
|
930
|
+
# Pipes the review output into the implementation CLI for targeted fixes.
|
|
432
931
|
run_review_fix() {
|
|
433
932
|
local findings
|
|
933
|
+
local impl_cli="$CODING_CLI"
|
|
934
|
+
local impl_cmd="$IMPL_CMD"
|
|
434
935
|
findings=$(extract_review_findings "${CLAUDE_OUTPUT}.raw")
|
|
435
|
-
|
|
936
|
+
if [ "$impl_cli" = "codex" ]; then
|
|
937
|
+
LAST_RUN_CLI="codex"
|
|
938
|
+
impl_cmd="$IMPL_CMD --json --output-last-message \"$LAST_MESSAGE_FILE\""
|
|
939
|
+
else
|
|
940
|
+
LAST_RUN_CLI="claude"
|
|
941
|
+
fi
|
|
942
|
+
cat <<FIXEOF | eval "$impl_cmd" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
436
943
|
## Code Review Findings
|
|
437
944
|
|
|
438
945
|
The following issues were found during code review:
|
|
@@ -447,8 +954,8 @@ Fix each issue listed above. Run git diff $DEFAULT_BRANCH to see the current cha
|
|
|
447
954
|
3. Commit and push the fixes
|
|
448
955
|
Do NOT propose completion options or ask interactive questions. Just fix, test, commit, push.
|
|
449
956
|
FIXEOF
|
|
450
|
-
extract_session_result "${CLAUDE_OUTPUT}.raw"
|
|
451
|
-
accumulate_tokens_from_session "$LAST_SESSION_ID"
|
|
957
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$impl_cli"
|
|
958
|
+
accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$impl_cli"
|
|
452
959
|
}
|
|
453
960
|
|
|
454
961
|
# Normalize test failure lines: extract test name, strip timing, deduplicate.
|
|
@@ -523,7 +1030,7 @@ write_phase_end() {
|
|
|
523
1030
|
# Initialize phase tracking
|
|
524
1031
|
> "$PHASES_FILE"
|
|
525
1032
|
|
|
526
|
-
FEATURE="${1:?Usage: ./feature-loop.sh <feature-name> [max-iterations] [max-e2e-attempts] [--worktree] [--resume] [--model MODEL]}"
|
|
1033
|
+
FEATURE="${1:?Usage: ./feature-loop.sh <feature-name> [max-iterations] [max-e2e-attempts] [--worktree] [--resume] [--model MODEL] [--cli CLI] [--review-cli CLI] [--review-mode MODE]}"
|
|
527
1034
|
# Sanitize feature name to prevent path traversal and shell injection when used in temp file paths
|
|
528
1035
|
if [[ ! "$FEATURE" =~ ^[a-zA-Z0-9][a-zA-Z0-9_-]*$ ]]; then
|
|
529
1036
|
echo "ERROR: Feature name must start with alphanumeric and contain only letters, numbers, hyphens, and underscores." >&2
|
|
@@ -538,6 +1045,15 @@ SPEC_FILE="$SPEC_DIR/${FEATURE}.md"
|
|
|
538
1045
|
PLAN_FILE="$SPEC_DIR/${FEATURE}-implementation-plan.md"
|
|
539
1046
|
BRANCH="feat/${FEATURE}"
|
|
540
1047
|
APP_DIR="$(pwd)"
|
|
1048
|
+
PLANNING_CMD=$(get_phase_cmd "planning")
|
|
1049
|
+
IMPL_CMD=$(get_phase_cmd "implementation")
|
|
1050
|
+
REVIEW_CMD=$(get_phase_cmd "review")
|
|
1051
|
+
|
|
1052
|
+
# Fail fast if required CLIs are not installed.
|
|
1053
|
+
check_cli_binary "$CODING_CLI"
|
|
1054
|
+
if [ "$REVIEW_CLI" != "$CODING_CLI" ]; then
|
|
1055
|
+
check_cli_binary "$REVIEW_CLI"
|
|
1056
|
+
fi
|
|
541
1057
|
|
|
542
1058
|
echo "=========================================="
|
|
543
1059
|
echo "Ralph Loop: $FEATURE"
|
|
@@ -547,9 +1063,26 @@ echo "Branch: $BRANCH"
|
|
|
547
1063
|
echo "App dir: $APP_DIR"
|
|
548
1064
|
echo "Worktree mode: $USE_WORKTREE"
|
|
549
1065
|
echo "Resume mode: $RESUME"
|
|
1066
|
+
echo "Coding CLI (impl/e2e): $CODING_CLI"
|
|
1067
|
+
echo "Review CLI: $REVIEW_CLI"
|
|
550
1068
|
echo "Review mode: $REVIEW_MODE"
|
|
551
|
-
echo "
|
|
552
|
-
echo "
|
|
1069
|
+
echo "Claude permission mode: $CLAUDE_PERMISSION_MODE"
|
|
1070
|
+
echo "Codex sandbox: $CODEX_SANDBOX"
|
|
1071
|
+
echo "Codex approval policy: $CODEX_APPROVAL_POLICY"
|
|
1072
|
+
if [ "${RALPH_AUTOMATED:-}" = "1" ]; then
|
|
1073
|
+
echo "Disable MCP in automated runs: $DISABLE_MCP_IN_AUTOMATED_NORM"
|
|
1074
|
+
fi
|
|
1075
|
+
if [ "$CODING_CLI" = "codex" ] && [ "$REVIEW_CLI" = "codex" ]; then
|
|
1076
|
+
echo "Model (all phases): $(resolve_codex_model)"
|
|
1077
|
+
else
|
|
1078
|
+
if [ "$CODING_CLI" = "claude" ] || [ "$REVIEW_CLI" = "claude" ]; then
|
|
1079
|
+
echo "Model (Claude planning/review): $PLANNING_MODEL"
|
|
1080
|
+
echo "Model (Claude impl/e2e): ${MODEL:-$DEFAULT_MODEL}"
|
|
1081
|
+
fi
|
|
1082
|
+
if [ "$CODING_CLI" = "codex" ] || [ "$REVIEW_CLI" = "codex" ]; then
|
|
1083
|
+
echo "Model (Codex phases): $(resolve_codex_model)"
|
|
1084
|
+
fi
|
|
1085
|
+
fi
|
|
553
1086
|
echo "Max iterations: $MAX_ITERATIONS"
|
|
554
1087
|
echo "Max E2E attempts: $MAX_E2E_ATTEMPTS"
|
|
555
1088
|
echo "=========================================="
|
|
@@ -588,7 +1121,7 @@ fi
|
|
|
588
1121
|
# tasks to be checked — the checkboxes may be stale if the work shipped under a
|
|
589
1122
|
# different branch name that never updated this plan file.
|
|
590
1123
|
if [ -f "$PLAN_FILE" ]; then
|
|
591
|
-
_DIFF_STAT=$(git diff "$DEFAULT_BRANCH..HEAD" --stat 2>/dev/null || echo "")
|
|
1124
|
+
_DIFF_STAT=$(git diff "${DEFAULT_BRANCH}..HEAD" --stat 2>/dev/null || echo "")
|
|
592
1125
|
if [ -z "$_DIFF_STAT" ]; then
|
|
593
1126
|
echo "Plan exists but branch has no diff to $DEFAULT_BRANCH — work already merged."
|
|
594
1127
|
> "$PHASES_FILE"
|
|
@@ -612,6 +1145,7 @@ if git rev-parse --git-dir > /dev/null 2>&1; then
|
|
|
612
1145
|
if [ -n "$BASELINE_COMMIT" ]; then
|
|
613
1146
|
echo "$BASELINE_COMMIT" > "$BASELINE_FILE"
|
|
614
1147
|
echo "Baseline commit: $BASELINE_COMMIT"
|
|
1148
|
+
capture_pre_run_dirty_snapshot "$BASELINE_COMMIT"
|
|
615
1149
|
fi
|
|
616
1150
|
fi
|
|
617
1151
|
|
|
@@ -634,14 +1168,14 @@ echo "0|$MAX_ITERATIONS|$(date +%s)" > "$STATUS_FILE"
|
|
|
634
1168
|
if [ ! -f "$PLAN_FILE" ]; then
|
|
635
1169
|
echo "======================== PLANNING PHASE ========================"
|
|
636
1170
|
write_phase_start "planning"
|
|
637
|
-
export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR
|
|
638
|
-
run_claude_prompt "$PROMPTS_DIR/PROMPT_feature.md" "$
|
|
1171
|
+
export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR CODING_CLI REVIEW_CLI REVIEW_MODE
|
|
1172
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT_feature.md" "$PLANNING_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || {
|
|
639
1173
|
echo "ERROR: Planning phase failed"
|
|
640
1174
|
write_phase_end "planning" "failed"
|
|
641
1175
|
exit 1
|
|
642
1176
|
}
|
|
643
|
-
extract_session_result "${CLAUDE_OUTPUT}.raw"
|
|
644
|
-
accumulate_tokens_from_session "$LAST_SESSION_ID"
|
|
1177
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1178
|
+
accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
645
1179
|
write_phase_end "planning" "success"
|
|
646
1180
|
else
|
|
647
1181
|
echo "Plan file exists, skipping planning phase"
|
|
@@ -693,10 +1227,30 @@ while true; do
|
|
|
693
1227
|
TASKS_BEFORE=$(count_pending_tasks "$PLAN_FILE")
|
|
694
1228
|
echo "Legacy plan format — relying on source-file gate for completion."
|
|
695
1229
|
fi
|
|
696
|
-
export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
1230
|
+
export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR CODING_CLI REVIEW_CLI REVIEW_MODE
|
|
1231
|
+
# Continuation prompt for implementation loop iterations 2+
|
|
1232
|
+
CONTINUATION_PROMPT="Continue implementing the remaining tasks in the implementation plan at $SPEC_DIR/${FEATURE}-implementation-plan.md.
|
|
1233
|
+
Check off completed tasks as you go. Skip any E2E testing tasks.
|
|
1234
|
+
Run validation (lint, typecheck, test) after completing tasks."
|
|
1235
|
+
if [ $ITERATION -eq 1 ] || [ -z "$LAST_SESSION_ID" ]; then
|
|
1236
|
+
echo "Mode: fresh"
|
|
1237
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT.md" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
1238
|
+
else
|
|
1239
|
+
echo "Mode: resume (session: $LAST_SESSION_ID)"
|
|
1240
|
+
RESUME_EXIT=0
|
|
1241
|
+
run_claude_resume "$LAST_SESSION_ID" "$CONTINUATION_PROMPT" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || RESUME_EXIT=$?
|
|
1242
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1243
|
+
if [ $RESUME_EXIT -ne 0 ] || [ -z "$LAST_SESSION_ID" ]; then
|
|
1244
|
+
if [ $RESUME_EXIT -ne 0 ]; then
|
|
1245
|
+
echo "Resume failed (resume_exit_nonzero: exit=$RESUME_EXIT). Fallback: using fresh prompt"
|
|
1246
|
+
else
|
|
1247
|
+
echo "Resume failed (resume_no_session_id). Fallback: using fresh prompt"
|
|
1248
|
+
fi
|
|
1249
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT.md" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
1250
|
+
fi
|
|
1251
|
+
fi
|
|
1252
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1253
|
+
accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
700
1254
|
|
|
701
1255
|
# Check if any progress was made
|
|
702
1256
|
TASKS_AFTER=$(count_pending_tasks "$PLAN_FILE")
|
|
@@ -752,6 +1306,20 @@ if [ "$IMPL_SUCCESS" = true ]; then
|
|
|
752
1306
|
fi
|
|
753
1307
|
fi
|
|
754
1308
|
|
|
1309
|
+
# Stop early when implementation failed to avoid wasting E2E/review cycles.
|
|
1310
|
+
if [ "$IMPL_SUCCESS" != true ]; then
|
|
1311
|
+
echo "Implementation phase failed. Skipping remaining phases."
|
|
1312
|
+
write_phase_start "e2e_testing"
|
|
1313
|
+
write_phase_end "e2e_testing" "skipped"
|
|
1314
|
+
write_phase_start "verification"
|
|
1315
|
+
write_phase_end "verification" "skipped"
|
|
1316
|
+
write_phase_start "pr_review"
|
|
1317
|
+
write_phase_end "pr_review" "skipped"
|
|
1318
|
+
echo "$ITERATION|$MAX_ITERATIONS|$(date +%s)|failed" > "$FINAL_STATUS_FILE"
|
|
1319
|
+
rm -f "$STATUS_FILE" 2>/dev/null || true
|
|
1320
|
+
exit 1
|
|
1321
|
+
fi
|
|
1322
|
+
|
|
755
1323
|
# Phase 5: E2E Testing
|
|
756
1324
|
echo "======================== E2E TESTING PHASE ========================"
|
|
757
1325
|
E2E_TOTAL=$({ grep "^- \[.\].*E2E:" "$PLAN_FILE" 2>/dev/null || true; } | wc -l | tr -d ' ')
|
|
@@ -763,14 +1331,32 @@ else
|
|
|
763
1331
|
write_phase_start "e2e_testing"
|
|
764
1332
|
E2E_SUCCESS=false
|
|
765
1333
|
E2E_ATTEMPT=0
|
|
1334
|
+
E2E_SESSION_ID=""
|
|
1335
|
+
E2E_CONTINUATION_PROMPT="Continue remaining E2E scenarios. Check the implementation plan for unchecked \`- [ ] E2E:\` entries and implement/run those tests. Run validation after completing each scenario."
|
|
766
1336
|
while [ $E2E_ATTEMPT -lt $MAX_E2E_ATTEMPTS ]; do
|
|
767
1337
|
E2E_ATTEMPT=$((E2E_ATTEMPT + 1))
|
|
1338
|
+
echo "$E2E_ATTEMPT|$MAX_E2E_ATTEMPTS|$(date +%s)" > "$STATUS_FILE"
|
|
768
1339
|
echo "------------------------ E2E Attempt $E2E_ATTEMPT of $MAX_E2E_ATTEMPTS ------------------------"
|
|
769
1340
|
|
|
770
|
-
export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
1341
|
+
export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR CODING_CLI REVIEW_CLI REVIEW_MODE
|
|
1342
|
+
if [ $E2E_ATTEMPT -eq 1 ] || [ -z "$E2E_SESSION_ID" ]; then
|
|
1343
|
+
echo "E2E attempt $E2E_ATTEMPT: using full prompt"
|
|
1344
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT_e2e.md" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
1345
|
+
else
|
|
1346
|
+
echo "E2E attempt $E2E_ATTEMPT: using resume session $E2E_SESSION_ID"
|
|
1347
|
+
E2E_RESUME_EXIT=0
|
|
1348
|
+
run_claude_resume "$E2E_SESSION_ID" "$E2E_CONTINUATION_PROMPT" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || E2E_RESUME_EXIT=$?
|
|
1349
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1350
|
+
if [ $E2E_RESUME_EXIT -ne 0 ] || [ -z "$LAST_SESSION_ID" ]; then
|
|
1351
|
+
echo "E2E attempt $E2E_ATTEMPT: resume unavailable, using full prompt"
|
|
1352
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT_e2e.md" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
1353
|
+
fi
|
|
1354
|
+
fi
|
|
1355
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1356
|
+
if [ -n "$LAST_SESSION_ID" ]; then
|
|
1357
|
+
E2E_SESSION_ID="$LAST_SESSION_ID"
|
|
1358
|
+
fi
|
|
1359
|
+
accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
774
1360
|
|
|
775
1361
|
# Check if all E2E tests passed
|
|
776
1362
|
E2E_FAILED=$({ grep "^- \[ \].*E2E:.*FAILED" "$PLAN_FILE" 2>/dev/null || true; } | wc -l | tr -d ' ')
|
|
@@ -784,9 +1370,24 @@ else
|
|
|
784
1370
|
|
|
785
1371
|
if [ $E2E_ATTEMPT -lt $MAX_E2E_ATTEMPTS ]; then
|
|
786
1372
|
echo "E2E tests have failures. Running fix iteration..."
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
1373
|
+
if [ -n "$E2E_SESSION_ID" ]; then
|
|
1374
|
+
echo "E2E fix: using resume session $E2E_SESSION_ID"
|
|
1375
|
+
E2E_FIX_EXIT=0
|
|
1376
|
+
run_claude_resume "$E2E_SESSION_ID" "Fix the failing E2E tests identified above. Run validation after fixing." "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || E2E_FIX_EXIT=$?
|
|
1377
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1378
|
+
if [ $E2E_FIX_EXIT -ne 0 ] || [ -z "$LAST_SESSION_ID" ]; then
|
|
1379
|
+
echo "E2E fix: resume unavailable, using full prompt"
|
|
1380
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT_e2e_fix.md" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
1381
|
+
fi
|
|
1382
|
+
else
|
|
1383
|
+
echo "E2E fix: resume unavailable, using full prompt"
|
|
1384
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT_e2e_fix.md" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
1385
|
+
fi
|
|
1386
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1387
|
+
if [ -n "$LAST_SESSION_ID" ]; then
|
|
1388
|
+
E2E_SESSION_ID="$LAST_SESSION_ID"
|
|
1389
|
+
fi
|
|
1390
|
+
accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
790
1391
|
fi
|
|
791
1392
|
done
|
|
792
1393
|
|
|
@@ -797,21 +1398,16 @@ else
|
|
|
797
1398
|
fi
|
|
798
1399
|
fi
|
|
799
1400
|
|
|
800
|
-
# Phase 6: Spec Verification
|
|
801
|
-
|
|
1401
|
+
# Phase 6: Spec Verification (merged into review phase)
|
|
1402
|
+
# Verification responsibilities (spec status, acceptance criteria, README updates)
|
|
1403
|
+
# are now handled in Step 0 of the review prompt templates.
|
|
1404
|
+
# This no-op marker preserves backward compatibility for TUI phase tracking.
|
|
802
1405
|
write_phase_start "verification"
|
|
803
|
-
|
|
804
|
-
VERIFY_STATUS="success"
|
|
805
|
-
if ! run_claude_prompt "$PROMPTS_DIR/PROMPT_verify.md" "$CLAUDE_CMD_OPUS" 2>&1 | tee "${CLAUDE_OUTPUT}.raw"; then
|
|
806
|
-
VERIFY_STATUS="failed"
|
|
807
|
-
fi
|
|
808
|
-
extract_session_result "${CLAUDE_OUTPUT}.raw"
|
|
809
|
-
accumulate_tokens_from_session "$LAST_SESSION_ID"
|
|
810
|
-
write_phase_end "verification" "$VERIFY_STATUS"
|
|
1406
|
+
write_phase_end "verification" "skipped"
|
|
811
1407
|
|
|
812
1408
|
# Guard B: Skip PR phase if branch has no diff to default branch
|
|
813
1409
|
# Safety net for cases where implementation ran but produced no net diff.
|
|
814
|
-
_PR_DIFF_STAT=$(git diff "$DEFAULT_BRANCH..HEAD" --stat 2>/dev/null || echo "")
|
|
1410
|
+
_PR_DIFF_STAT=$(git diff "${DEFAULT_BRANCH}..HEAD" --stat 2>/dev/null || echo "")
|
|
815
1411
|
if [ -z "$_PR_DIFF_STAT" ]; then
|
|
816
1412
|
echo "No diff between $BRANCH and $DEFAULT_BRANCH — skipping PR phase."
|
|
817
1413
|
write_phase_start "pr_review"
|
|
@@ -824,12 +1420,14 @@ if [ -z "$_PR_DIFF_STAT" ]; then
|
|
|
824
1420
|
exit 0
|
|
825
1421
|
fi
|
|
826
1422
|
|
|
827
|
-
# Phase 7: PR and Review
|
|
1423
|
+
# Phase 7: PR and Review (includes spec verification via Step 0 in review prompts)
|
|
828
1424
|
echo "======================== PR & REVIEW PHASE ========================"
|
|
829
1425
|
write_phase_start "pr_review"
|
|
830
|
-
export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR
|
|
1426
|
+
export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR CODING_CLI REVIEW_CLI REVIEW_MODE
|
|
831
1427
|
PR_STATUS="success"
|
|
832
1428
|
MAX_REVIEW_ATTEMPTS=3
|
|
1429
|
+
REVIEW_SESSION_ID=""
|
|
1430
|
+
REVIEW_CONTINUATION_PROMPT="The issues from the previous review have been fixed. Re-run the code review, checking only for remaining issues. Report your verdict."
|
|
833
1431
|
|
|
834
1432
|
# Short-circuit: skip review if no files exist in diff
|
|
835
1433
|
_REVIEW_FILE_CHANGES=$(count_file_changes "$BASELINE_COMMIT")
|
|
@@ -860,14 +1458,7 @@ check_review_approved() {
|
|
|
860
1458
|
fi
|
|
861
1459
|
fi
|
|
862
1460
|
|
|
863
|
-
# Secondary: check
|
|
864
|
-
local pr_state
|
|
865
|
-
pr_state=$(gh pr view "$BRANCH" --json state --jq '.state' 2>/dev/null || echo "")
|
|
866
|
-
if [ "$pr_state" = "MERGED" ]; then
|
|
867
|
-
return 0
|
|
868
|
-
fi
|
|
869
|
-
|
|
870
|
-
# Tertiary: check the latest PR comment for approval signal
|
|
1461
|
+
# Secondary: check the latest PR comment for approval signal
|
|
871
1462
|
local latest_comment
|
|
872
1463
|
latest_comment=$(gh pr view "$BRANCH" --json comments --jq '.comments[-1].body' 2>/dev/null || echo "")
|
|
873
1464
|
if echo "$latest_comment" | grep -qi "VERDICT:.*APPROVED" 2>/dev/null; then
|
|
@@ -879,12 +1470,60 @@ check_review_approved() {
|
|
|
879
1470
|
return 1
|
|
880
1471
|
}
|
|
881
1472
|
|
|
1473
|
+
# Wait for CI checks to finish and pass.
|
|
1474
|
+
# Returns 0 when checks pass (or no checks exist), 1 on failure.
|
|
1475
|
+
wait_for_ci_checks() {
|
|
1476
|
+
local pr_ref="$1"
|
|
1477
|
+
echo "Waiting for CI checks on $pr_ref..."
|
|
1478
|
+
|
|
1479
|
+
local checks_output=""
|
|
1480
|
+
checks_output=$(gh pr checks "$pr_ref" --watch --interval 10 2>&1)
|
|
1481
|
+
local checks_exit=$?
|
|
1482
|
+
|
|
1483
|
+
echo "$checks_output"
|
|
1484
|
+
|
|
1485
|
+
if [ $checks_exit -eq 0 ]; then
|
|
1486
|
+
echo "CI checks passed."
|
|
1487
|
+
return 0
|
|
1488
|
+
fi
|
|
1489
|
+
|
|
1490
|
+
# Some repos have no checks configured for certain PRs.
|
|
1491
|
+
if echo "$checks_output" | grep -qiE "no checks|no status checks"; then
|
|
1492
|
+
echo "No CI checks found for $pr_ref. Continuing."
|
|
1493
|
+
return 0
|
|
1494
|
+
fi
|
|
1495
|
+
|
|
1496
|
+
echo "ERROR: CI checks failed or did not complete successfully." >&2
|
|
1497
|
+
return 1
|
|
1498
|
+
}
|
|
1499
|
+
|
|
1500
|
+
# Merge PR after all gates are green.
|
|
1501
|
+
# Returns 0 on success, 1 on failure.
|
|
1502
|
+
merge_pr_after_ci_gate() {
|
|
1503
|
+
local pr_ref="$1"
|
|
1504
|
+
local pr_state
|
|
1505
|
+
pr_state=$(gh pr view "$pr_ref" --json state --jq '.state' 2>/dev/null || echo "")
|
|
1506
|
+
|
|
1507
|
+
if [ "$pr_state" = "MERGED" ]; then
|
|
1508
|
+
echo "PR already merged."
|
|
1509
|
+
return 0
|
|
1510
|
+
fi
|
|
1511
|
+
|
|
1512
|
+
echo "Merging PR after CI gate..."
|
|
1513
|
+
if gh pr merge "$pr_ref" --squash --delete-branch; then
|
|
1514
|
+
return 0
|
|
1515
|
+
fi
|
|
1516
|
+
|
|
1517
|
+
echo "ERROR: Failed to merge PR $pr_ref after CI gate." >&2
|
|
1518
|
+
return 1
|
|
1519
|
+
}
|
|
1520
|
+
|
|
882
1521
|
if [ "$REVIEW_MODE" = "manual" ]; then
|
|
883
|
-
if ! run_claude_prompt "$PROMPTS_DIR/PROMPT_review_manual.md" "$
|
|
1522
|
+
if ! run_claude_prompt "$PROMPTS_DIR/PROMPT_review_manual.md" "$REVIEW_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw"; then
|
|
884
1523
|
PR_STATUS="failed"
|
|
885
1524
|
fi
|
|
886
|
-
extract_session_result "${CLAUDE_OUTPUT}.raw"
|
|
887
|
-
accumulate_tokens_from_session "$LAST_SESSION_ID"
|
|
1525
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1526
|
+
accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
888
1527
|
|
|
889
1528
|
elif [ "$REVIEW_MODE" = "merge" ]; then
|
|
890
1529
|
# Merge mode: create PR, iterate review+fixes until approved, then merge
|
|
@@ -893,22 +1532,53 @@ elif [ "$REVIEW_MODE" = "merge" ]; then
|
|
|
893
1532
|
while [ $REVIEW_ATTEMPT -lt $MAX_REVIEW_ATTEMPTS ]; do
|
|
894
1533
|
REVIEW_ATTEMPT=$((REVIEW_ATTEMPT + 1))
|
|
895
1534
|
echo "--- Review attempt $REVIEW_ATTEMPT of $MAX_REVIEW_ATTEMPTS ---"
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
1535
|
+
if [ $REVIEW_ATTEMPT -eq 1 ] || [ -z "$REVIEW_SESSION_ID" ]; then
|
|
1536
|
+
echo "Review attempt $REVIEW_ATTEMPT: using full prompt"
|
|
1537
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT_review_merge.md" "$REVIEW_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
1538
|
+
else
|
|
1539
|
+
echo "Review attempt $REVIEW_ATTEMPT: using resume session $REVIEW_SESSION_ID"
|
|
1540
|
+
REVIEW_RESUME_EXIT=0
|
|
1541
|
+
run_claude_resume "$REVIEW_SESSION_ID" "$REVIEW_CONTINUATION_PROMPT" "$REVIEW_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || REVIEW_RESUME_EXIT=$?
|
|
1542
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1543
|
+
if [ $REVIEW_RESUME_EXIT -ne 0 ] || [ -z "$LAST_SESSION_ID" ]; then
|
|
1544
|
+
echo "Review attempt $REVIEW_ATTEMPT: resume unavailable, using full prompt"
|
|
1545
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT_review_merge.md" "$REVIEW_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
1546
|
+
fi
|
|
1547
|
+
fi
|
|
1548
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1549
|
+
if [ $REVIEW_ATTEMPT -eq 1 ] && [ -n "$LAST_SESSION_ID" ]; then
|
|
1550
|
+
REVIEW_SESSION_ID="$LAST_SESSION_ID"
|
|
1551
|
+
fi
|
|
1552
|
+
accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
899
1553
|
|
|
900
1554
|
# Check stdout and PR comment for approval
|
|
901
1555
|
if check_review_approved "${CLAUDE_OUTPUT}.raw"; then
|
|
902
1556
|
echo "Review approved! Running post-approval test gate..."
|
|
903
1557
|
if check_tests_pass_or_baseline; then
|
|
904
1558
|
echo "Post-approval test gate passed."
|
|
1559
|
+
if ! wait_for_ci_checks "$BRANCH"; then
|
|
1560
|
+
PR_STATUS="failed"
|
|
1561
|
+
break
|
|
1562
|
+
fi
|
|
1563
|
+
if ! merge_pr_after_ci_gate "$BRANCH"; then
|
|
1564
|
+
PR_STATUS="failed"
|
|
1565
|
+
break
|
|
1566
|
+
fi
|
|
905
1567
|
REVIEW_APPROVED=true
|
|
906
1568
|
break
|
|
907
1569
|
else
|
|
908
1570
|
echo "WARNING: Tests failing after review approval. Running fix iteration..."
|
|
909
1571
|
run_review_fix
|
|
910
1572
|
if check_tests_pass_or_baseline; then
|
|
911
|
-
echo "Tests pass after fix.
|
|
1573
|
+
echo "Tests pass after fix. Running CI and merge gates."
|
|
1574
|
+
if ! wait_for_ci_checks "$BRANCH"; then
|
|
1575
|
+
PR_STATUS="failed"
|
|
1576
|
+
break
|
|
1577
|
+
fi
|
|
1578
|
+
if ! merge_pr_after_ci_gate "$BRANCH"; then
|
|
1579
|
+
PR_STATUS="failed"
|
|
1580
|
+
break
|
|
1581
|
+
fi
|
|
912
1582
|
REVIEW_APPROVED=true
|
|
913
1583
|
break
|
|
914
1584
|
else
|
|
@@ -936,9 +1606,24 @@ else
|
|
|
936
1606
|
while [ $REVIEW_ATTEMPT -lt $MAX_REVIEW_ATTEMPTS ]; do
|
|
937
1607
|
REVIEW_ATTEMPT=$((REVIEW_ATTEMPT + 1))
|
|
938
1608
|
echo "--- Review attempt $REVIEW_ATTEMPT of $MAX_REVIEW_ATTEMPTS ---"
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
1609
|
+
if [ $REVIEW_ATTEMPT -eq 1 ] || [ -z "$REVIEW_SESSION_ID" ]; then
|
|
1610
|
+
echo "Review attempt $REVIEW_ATTEMPT: using full prompt"
|
|
1611
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT_review_auto.md" "$REVIEW_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
1612
|
+
else
|
|
1613
|
+
echo "Review attempt $REVIEW_ATTEMPT: using resume session $REVIEW_SESSION_ID"
|
|
1614
|
+
REVIEW_RESUME_EXIT=0
|
|
1615
|
+
run_claude_resume "$REVIEW_SESSION_ID" "$REVIEW_CONTINUATION_PROMPT" "$REVIEW_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || REVIEW_RESUME_EXIT=$?
|
|
1616
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1617
|
+
if [ $REVIEW_RESUME_EXIT -ne 0 ] || [ -z "$LAST_SESSION_ID" ]; then
|
|
1618
|
+
echo "Review attempt $REVIEW_ATTEMPT: resume unavailable, using full prompt"
|
|
1619
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT_review_auto.md" "$REVIEW_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
1620
|
+
fi
|
|
1621
|
+
fi
|
|
1622
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1623
|
+
if [ $REVIEW_ATTEMPT -eq 1 ] && [ -n "$LAST_SESSION_ID" ]; then
|
|
1624
|
+
REVIEW_SESSION_ID="$LAST_SESSION_ID"
|
|
1625
|
+
fi
|
|
1626
|
+
accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
942
1627
|
|
|
943
1628
|
# Check stdout and PR comment for approval
|
|
944
1629
|
if check_review_approved "${CLAUDE_OUTPUT}.raw"; then
|
|
@@ -1016,6 +1701,8 @@ fi
|
|
|
1016
1701
|
rm -f "$STATUS_FILE" 2>/dev/null || true
|
|
1017
1702
|
rm -f "/tmp/ralph-loop-${FEATURE}.output" 2>/dev/null || true
|
|
1018
1703
|
rm -f "/tmp/ralph-loop-${FEATURE}.output.raw" 2>/dev/null || true
|
|
1704
|
+
rm -f "/tmp/ralph-loop-${FEATURE}.last-message" 2>/dev/null || true
|
|
1705
|
+
rm -f "$PRE_RUN_DIRTY_FILE" 2>/dev/null || true
|
|
1019
1706
|
|
|
1020
1707
|
# Print final token usage
|
|
1021
1708
|
if [ -f "/tmp/ralph-loop-${FEATURE}.tokens" ]; then
|