wiggum-cli 0.17.2 → 0.18.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -14
- package/dist/agent/orchestrator.d.ts +21 -3
- package/dist/agent/orchestrator.js +394 -187
- package/dist/agent/resolve-config.js +1 -1
- package/dist/agent/scheduler.d.ts +29 -0
- package/dist/agent/scheduler.js +1149 -0
- package/dist/agent/tools/backlog.d.ts +6 -0
- package/dist/agent/tools/backlog.js +23 -4
- package/dist/agent/tools/execution.js +1 -1
- package/dist/agent/tools/introspection.js +26 -4
- package/dist/agent/types.d.ts +113 -0
- package/dist/ai/conversation/url-fetcher.js +46 -13
- package/dist/ai/enhancer.js +1 -2
- package/dist/ai/providers.js +4 -4
- package/dist/commands/agent.d.ts +1 -0
- package/dist/commands/agent.js +53 -1
- package/dist/commands/config.js +100 -6
- package/dist/commands/run.d.ts +2 -0
- package/dist/commands/run.js +47 -2
- package/dist/commands/sync.js +2 -2
- package/dist/generator/config.js +13 -2
- package/dist/index.js +11 -3
- package/dist/repl/command-parser.d.ts +1 -1
- package/dist/repl/command-parser.js +1 -1
- package/dist/templates/config/ralph.config.cjs.tmpl +9 -2
- package/dist/templates/prompts/PROMPT_e2e.md.tmpl +16 -89
- package/dist/templates/prompts/PROMPT_e2e_fix.md.tmpl +55 -0
- package/dist/templates/prompts/PROMPT_feature.md.tmpl +12 -98
- package/dist/templates/prompts/PROMPT_review_auto.md.tmpl +52 -49
- package/dist/templates/prompts/PROMPT_review_manual.md.tmpl +30 -2
- package/dist/templates/prompts/PROMPT_review_merge.md.tmpl +59 -69
- package/dist/templates/prompts/PROMPT_verify.md.tmpl +7 -0
- package/dist/templates/root/README.md.tmpl +2 -3
- package/dist/templates/scripts/feature-loop.sh.tmpl +835 -93
- package/dist/templates/scripts/loop.sh.tmpl +5 -1
- package/dist/templates/scripts/ralph-monitor.sh.tmpl +0 -2
- package/dist/tui/app.d.ts +5 -1
- package/dist/tui/app.js +22 -3
- package/dist/tui/components/HeaderContent.d.ts +4 -1
- package/dist/tui/components/HeaderContent.js +4 -2
- package/dist/tui/hooks/useAgentOrchestrator.d.ts +2 -1
- package/dist/tui/hooks/useAgentOrchestrator.js +86 -33
- package/dist/tui/hooks/useInit.d.ts +5 -1
- package/dist/tui/hooks/useInit.js +20 -2
- package/dist/tui/screens/AgentScreen.js +3 -1
- package/dist/tui/screens/InitScreen.js +12 -1
- package/dist/tui/screens/MainShell.js +70 -6
- package/dist/tui/screens/RunScreen.d.ts +6 -2
- package/dist/tui/screens/RunScreen.js +48 -6
- package/dist/tui/utils/loop-status.d.ts +15 -0
- package/dist/tui/utils/loop-status.js +89 -27
- package/dist/tui/utils/polishGoal.js +14 -1
- package/dist/utils/config.d.ts +7 -0
- package/dist/utils/config.js +14 -0
- package/dist/utils/env.js +7 -1
- package/dist/utils/github.d.ts +13 -0
- package/dist/utils/github.js +63 -4
- package/dist/utils/logger.js +1 -1
- package/package.json +9 -7
- package/src/templates/config/ralph.config.cjs.tmpl +9 -2
- package/src/templates/prompts/PROMPT_e2e.md.tmpl +16 -89
- package/src/templates/prompts/PROMPT_e2e_fix.md.tmpl +55 -0
- package/src/templates/prompts/PROMPT_feature.md.tmpl +12 -98
- package/src/templates/prompts/PROMPT_review_auto.md.tmpl +52 -49
- package/src/templates/prompts/PROMPT_review_manual.md.tmpl +30 -2
- package/src/templates/prompts/PROMPT_review_merge.md.tmpl +59 -69
- package/src/templates/prompts/PROMPT_verify.md.tmpl +7 -0
- package/src/templates/root/README.md.tmpl +2 -3
- package/src/templates/scripts/feature-loop.sh.tmpl +835 -93
- package/src/templates/scripts/loop.sh.tmpl +5 -1
- package/src/templates/scripts/ralph-monitor.sh.tmpl +0 -2
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
2
|
# feature-loop.sh - Full feature workflow: branch -> implement -> E2E test -> PR -> review -> merge
|
|
3
3
|
# Generated by ralph-cli for {{projectName}}
|
|
4
|
-
# Usage: ./feature-loop.sh <feature-name> [max-iterations] [max-e2e-attempts] [--worktree] [--resume] [--model MODEL] [--review-mode MODE]
|
|
4
|
+
# Usage: ./feature-loop.sh <feature-name> [max-iterations] [max-e2e-attempts] [--worktree] [--resume] [--model MODEL] [--cli CLI] [--review-cli CLI] [--review-mode MODE]
|
|
5
5
|
#
|
|
6
6
|
# Options:
|
|
7
7
|
# --worktree Use git worktree for isolation (enables parallel execution)
|
|
8
8
|
# --resume Resume an interrupted loop (reuses existing branch/worktree)
|
|
9
|
-
# --model MODEL
|
|
9
|
+
# --model MODEL Model to use for coding/review CLI
|
|
10
|
+
# --cli CLI Implementation CLI: 'claude' | 'codex'
|
|
11
|
+
# --review-cli CLI Review CLI: 'claude' | 'codex'
|
|
10
12
|
# --review-mode MODE Review mode: 'manual' (stop at PR), 'auto' (review, no merge), or 'merge' (review + merge). Default: 'manual'
|
|
11
13
|
|
|
12
14
|
set -e
|
|
@@ -23,6 +25,13 @@ if [ -f "$SCRIPT_DIR/../ralph.config.cjs" ]; then
|
|
|
23
25
|
PROMPTS_DIR=$(node -e "console.log(require('$CONFIG_PATH').paths?.prompts || '.ralph/prompts')" 2>/dev/null || echo ".ralph/prompts")
|
|
24
26
|
DEFAULT_MODEL=$(node -e "console.log(require('$CONFIG_PATH').loop?.defaultModel || 'sonnet')" 2>/dev/null || echo "sonnet")
|
|
25
27
|
PLANNING_MODEL=$(node -e "console.log(require('$CONFIG_PATH').loop?.planningModel || 'opus')" 2>/dev/null || echo "opus")
|
|
28
|
+
DEFAULT_CODEX_MODEL=$(node -e "console.log(require('$CONFIG_PATH').loop?.codexModel || 'gpt-5.3-codex')" 2>/dev/null || echo "gpt-5.3-codex")
|
|
29
|
+
DEFAULT_CODING_CLI=$(node -e "console.log(require('$CONFIG_PATH').loop?.codingCli || 'claude')" 2>/dev/null || echo "claude")
|
|
30
|
+
DEFAULT_REVIEW_CLI=$(node -e "console.log(require('$CONFIG_PATH').loop?.reviewCli || require('$CONFIG_PATH').loop?.codingCli || 'claude')" 2>/dev/null || echo "claude")
|
|
31
|
+
CLAUDE_PERMISSION_MODE=$(node -e "console.log(require('$CONFIG_PATH').loop?.claudePermissionMode || 'default')" 2>/dev/null || echo "default")
|
|
32
|
+
CODEX_SANDBOX=$(node -e "console.log(require('$CONFIG_PATH').loop?.codexSandbox || 'workspace-write')" 2>/dev/null || echo "workspace-write")
|
|
33
|
+
CODEX_APPROVAL_POLICY=$(node -e "console.log(require('$CONFIG_PATH').loop?.codexApprovalPolicy || 'never')" 2>/dev/null || echo "never")
|
|
34
|
+
DISABLE_MCP_IN_AUTOMATED=$(node -e "const v=require('$CONFIG_PATH').loop?.disableMcpInAutomatedRuns; console.log(v === undefined ? 'true' : String(v))" 2>/dev/null || echo "true")
|
|
26
35
|
DEFAULT_MAX_ITERATIONS=$(node -e "console.log(require('$CONFIG_PATH').loop?.maxIterations || 10)" 2>/dev/null || echo "10")
|
|
27
36
|
DEFAULT_MAX_E2E=$(node -e "console.log(require('$CONFIG_PATH').loop?.maxE2eAttempts || 5)" 2>/dev/null || echo "5")
|
|
28
37
|
TEST_COMMAND=$(node -e "console.log(require('$CONFIG_PATH').commands?.test || 'npm test')" 2>/dev/null || echo "npm test")
|
|
@@ -34,6 +43,13 @@ elif [ -f "$SCRIPT_DIR/../../ralph.config.cjs" ]; then
|
|
|
34
43
|
PROMPTS_DIR=$(node -e "console.log(require('$CONFIG_PATH').paths?.prompts || '.ralph/prompts')" 2>/dev/null || echo ".ralph/prompts")
|
|
35
44
|
DEFAULT_MODEL=$(node -e "console.log(require('$CONFIG_PATH').loop?.defaultModel || 'sonnet')" 2>/dev/null || echo "sonnet")
|
|
36
45
|
PLANNING_MODEL=$(node -e "console.log(require('$CONFIG_PATH').loop?.planningModel || 'opus')" 2>/dev/null || echo "opus")
|
|
46
|
+
DEFAULT_CODEX_MODEL=$(node -e "console.log(require('$CONFIG_PATH').loop?.codexModel || 'gpt-5.3-codex')" 2>/dev/null || echo "gpt-5.3-codex")
|
|
47
|
+
DEFAULT_CODING_CLI=$(node -e "console.log(require('$CONFIG_PATH').loop?.codingCli || 'claude')" 2>/dev/null || echo "claude")
|
|
48
|
+
DEFAULT_REVIEW_CLI=$(node -e "console.log(require('$CONFIG_PATH').loop?.reviewCli || require('$CONFIG_PATH').loop?.codingCli || 'claude')" 2>/dev/null || echo "claude")
|
|
49
|
+
CLAUDE_PERMISSION_MODE=$(node -e "console.log(require('$CONFIG_PATH').loop?.claudePermissionMode || 'default')" 2>/dev/null || echo "default")
|
|
50
|
+
CODEX_SANDBOX=$(node -e "console.log(require('$CONFIG_PATH').loop?.codexSandbox || 'workspace-write')" 2>/dev/null || echo "workspace-write")
|
|
51
|
+
CODEX_APPROVAL_POLICY=$(node -e "console.log(require('$CONFIG_PATH').loop?.codexApprovalPolicy || 'never')" 2>/dev/null || echo "never")
|
|
52
|
+
DISABLE_MCP_IN_AUTOMATED=$(node -e "const v=require('$CONFIG_PATH').loop?.disableMcpInAutomatedRuns; console.log(v === undefined ? 'true' : String(v))" 2>/dev/null || echo "true")
|
|
37
53
|
DEFAULT_MAX_ITERATIONS=$(node -e "console.log(require('$CONFIG_PATH').loop?.maxIterations || 10)" 2>/dev/null || echo "10")
|
|
38
54
|
DEFAULT_MAX_E2E=$(node -e "console.log(require('$CONFIG_PATH').loop?.maxE2eAttempts || 5)" 2>/dev/null || echo "5")
|
|
39
55
|
TEST_COMMAND=$(node -e "console.log(require('$CONFIG_PATH').commands?.test || 'npm test')" 2>/dev/null || echo "npm test")
|
|
@@ -45,6 +61,13 @@ else
|
|
|
45
61
|
PROMPTS_DIR=".ralph/prompts"
|
|
46
62
|
DEFAULT_MODEL="sonnet"
|
|
47
63
|
PLANNING_MODEL="opus"
|
|
64
|
+
DEFAULT_CODEX_MODEL="gpt-5.3-codex"
|
|
65
|
+
DEFAULT_CODING_CLI="claude"
|
|
66
|
+
DEFAULT_REVIEW_CLI="claude"
|
|
67
|
+
CLAUDE_PERMISSION_MODE="default"
|
|
68
|
+
CODEX_SANDBOX="workspace-write"
|
|
69
|
+
CODEX_APPROVAL_POLICY="never"
|
|
70
|
+
DISABLE_MCP_IN_AUTOMATED="true"
|
|
48
71
|
DEFAULT_MAX_ITERATIONS="10"
|
|
49
72
|
DEFAULT_MAX_E2E="5"
|
|
50
73
|
TEST_COMMAND="npm test"
|
|
@@ -59,6 +82,8 @@ USE_WORKTREE=false
|
|
|
59
82
|
RESUME=false
|
|
60
83
|
MODEL=""
|
|
61
84
|
REVIEW_MODE=""
|
|
85
|
+
CLI_OVERRIDE=""
|
|
86
|
+
REVIEW_CLI_OVERRIDE=""
|
|
62
87
|
POSITIONAL=()
|
|
63
88
|
while [[ $# -gt 0 ]]; do
|
|
64
89
|
case $1 in
|
|
@@ -74,6 +99,14 @@ while [[ $# -gt 0 ]]; do
|
|
|
74
99
|
MODEL="$2"
|
|
75
100
|
shift 2
|
|
76
101
|
;;
|
|
102
|
+
--cli)
|
|
103
|
+
CLI_OVERRIDE="$2"
|
|
104
|
+
shift 2
|
|
105
|
+
;;
|
|
106
|
+
--review-cli)
|
|
107
|
+
REVIEW_CLI_OVERRIDE="$2"
|
|
108
|
+
shift 2
|
|
109
|
+
;;
|
|
77
110
|
--review-mode)
|
|
78
111
|
REVIEW_MODE="$2"
|
|
79
112
|
shift 2
|
|
@@ -117,9 +150,168 @@ if [ "$REVIEW_MODE" != "manual" ] && [ "$REVIEW_MODE" != "auto" ] && [ "$REVIEW_
|
|
|
117
150
|
exit 1
|
|
118
151
|
fi
|
|
119
152
|
|
|
120
|
-
#
|
|
121
|
-
|
|
122
|
-
|
|
153
|
+
# Resolve coding/review CLI from CLI > config > default
|
|
154
|
+
CODING_CLI="${CLI_OVERRIDE:-$DEFAULT_CODING_CLI}"
|
|
155
|
+
REVIEW_CLI="${REVIEW_CLI_OVERRIDE:-${DEFAULT_REVIEW_CLI:-$CODING_CLI}}"
|
|
156
|
+
DISABLE_MCP_IN_AUTOMATED_NORM=$(echo "$DISABLE_MCP_IN_AUTOMATED" | tr '[:upper:]' '[:lower:]')
|
|
157
|
+
|
|
158
|
+
# Validate CLI values
|
|
159
|
+
if [ "$CODING_CLI" != "claude" ] && [ "$CODING_CLI" != "codex" ]; then
|
|
160
|
+
echo "ERROR: Invalid --cli value '$CODING_CLI'. Allowed values are 'claude' or 'codex'." >&2
|
|
161
|
+
exit 1
|
|
162
|
+
fi
|
|
163
|
+
|
|
164
|
+
if [ "$REVIEW_CLI" != "claude" ] && [ "$REVIEW_CLI" != "codex" ]; then
|
|
165
|
+
echo "ERROR: Invalid --review-cli value '$REVIEW_CLI'. Allowed values are 'claude' or 'codex'." >&2
|
|
166
|
+
exit 1
|
|
167
|
+
fi
|
|
168
|
+
|
|
169
|
+
is_valid_claude_permission_mode() {
|
|
170
|
+
case "$1" in
|
|
171
|
+
acceptEdits|bypassPermissions|default|dontAsk|plan|auto) return 0 ;;
|
|
172
|
+
*) return 1 ;;
|
|
173
|
+
esac
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
is_valid_codex_sandbox() {
|
|
177
|
+
case "$1" in
|
|
178
|
+
read-only|workspace-write|danger-full-access) return 0 ;;
|
|
179
|
+
*) return 1 ;;
|
|
180
|
+
esac
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
is_valid_codex_approval_policy() {
|
|
184
|
+
case "$1" in
|
|
185
|
+
untrusted|on-failure|on-request|never) return 0 ;;
|
|
186
|
+
*) return 1 ;;
|
|
187
|
+
esac
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
if ! is_valid_claude_permission_mode "$CLAUDE_PERMISSION_MODE"; then
|
|
191
|
+
echo "ERROR: Invalid loop.claudePermissionMode '$CLAUDE_PERMISSION_MODE' in ralph.config.cjs." >&2
|
|
192
|
+
exit 1
|
|
193
|
+
fi
|
|
194
|
+
|
|
195
|
+
if ! is_valid_codex_sandbox "$CODEX_SANDBOX"; then
|
|
196
|
+
echo "ERROR: Invalid loop.codexSandbox '$CODEX_SANDBOX' in ralph.config.cjs." >&2
|
|
197
|
+
exit 1
|
|
198
|
+
fi
|
|
199
|
+
|
|
200
|
+
if ! is_valid_codex_approval_policy "$CODEX_APPROVAL_POLICY"; then
|
|
201
|
+
echo "ERROR: Invalid loop.codexApprovalPolicy '$CODEX_APPROVAL_POLICY' in ralph.config.cjs." >&2
|
|
202
|
+
exit 1
|
|
203
|
+
fi
|
|
204
|
+
|
|
205
|
+
case "$DISABLE_MCP_IN_AUTOMATED_NORM" in
|
|
206
|
+
true|false) ;;
|
|
207
|
+
*)
|
|
208
|
+
echo "ERROR: Invalid loop.disableMcpInAutomatedRuns '$DISABLE_MCP_IN_AUTOMATED' in ralph.config.cjs. Use true or false." >&2
|
|
209
|
+
exit 1
|
|
210
|
+
;;
|
|
211
|
+
esac
|
|
212
|
+
|
|
213
|
+
is_claude_only_model() {
|
|
214
|
+
local candidate="$1"
|
|
215
|
+
case "$candidate" in
|
|
216
|
+
sonnet|opus|haiku|claude-*) return 0 ;;
|
|
217
|
+
*) return 1 ;;
|
|
218
|
+
esac
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
resolve_codex_model() {
|
|
222
|
+
local candidate="${MODEL:-$DEFAULT_CODEX_MODEL}"
|
|
223
|
+
if is_claude_only_model "$candidate"; then
|
|
224
|
+
echo "$DEFAULT_CODEX_MODEL"
|
|
225
|
+
else
|
|
226
|
+
echo "$candidate"
|
|
227
|
+
fi
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
if [ -n "$MODEL" ] && { [ "$CODING_CLI" = "codex" ] || [ "$REVIEW_CLI" = "codex" ]; }; then
|
|
231
|
+
if is_claude_only_model "$MODEL"; then
|
|
232
|
+
echo "WARNING: --model '$MODEL' is Claude-specific. Codex phases will use '$DEFAULT_CODEX_MODEL'." >&2
|
|
233
|
+
fi
|
|
234
|
+
fi
|
|
235
|
+
|
|
236
|
+
build_cli_cmd() {
|
|
237
|
+
local cli="$1"
|
|
238
|
+
local model="$2"
|
|
239
|
+
if [[ ! "$model" =~ ^[A-Za-z0-9._:/=-]+$ ]]; then
|
|
240
|
+
echo "ERROR: Invalid model value '$model'. Only alphanumeric, dot, underscore, colon, slash, equals and hyphen are allowed." >&2
|
|
241
|
+
return 1
|
|
242
|
+
fi
|
|
243
|
+
case "$cli" in
|
|
244
|
+
claude)
|
|
245
|
+
echo "claude -p --output-format json --permission-mode ${CLAUDE_PERMISSION_MODE} --model ${model}"
|
|
246
|
+
;;
|
|
247
|
+
codex)
|
|
248
|
+
local codex_extra=""
|
|
249
|
+
# Avoid MCP startup deadlocks in unattended loop runs.
|
|
250
|
+
if [ "${RALPH_AUTOMATED:-}" = "1" ] && [ "$DISABLE_MCP_IN_AUTOMATED_NORM" = "true" ]; then
|
|
251
|
+
codex_extra=" -c mcp_servers={}"
|
|
252
|
+
fi
|
|
253
|
+
# Keep command string token-safe. run_claude_* parses it as an argument array.
|
|
254
|
+
echo "codex --ask-for-approval ${CODEX_APPROVAL_POLICY} --sandbox ${CODEX_SANDBOX} exec --cd . --model ${model}${codex_extra}"
|
|
255
|
+
;;
|
|
256
|
+
*)
|
|
257
|
+
echo "ERROR: Unsupported CLI '$cli'" >&2
|
|
258
|
+
return 1
|
|
259
|
+
;;
|
|
260
|
+
esac
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
get_phase_cli() {
|
|
264
|
+
local phase="$1"
|
|
265
|
+
case "$phase" in
|
|
266
|
+
review)
|
|
267
|
+
echo "$REVIEW_CLI"
|
|
268
|
+
;;
|
|
269
|
+
*)
|
|
270
|
+
echo "$CODING_CLI"
|
|
271
|
+
;;
|
|
272
|
+
esac
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
get_phase_model() {
|
|
276
|
+
local phase="$1"
|
|
277
|
+
local cli
|
|
278
|
+
cli=$(get_phase_cli "$phase")
|
|
279
|
+
if [ "$cli" = "codex" ]; then
|
|
280
|
+
resolve_codex_model
|
|
281
|
+
return
|
|
282
|
+
fi
|
|
283
|
+
|
|
284
|
+
case "$phase" in
|
|
285
|
+
planning|review)
|
|
286
|
+
echo "$PLANNING_MODEL"
|
|
287
|
+
;;
|
|
288
|
+
*)
|
|
289
|
+
echo "${MODEL:-$DEFAULT_MODEL}"
|
|
290
|
+
;;
|
|
291
|
+
esac
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
get_phase_cmd() {
|
|
295
|
+
local phase="$1"
|
|
296
|
+
local cli
|
|
297
|
+
local model
|
|
298
|
+
cli=$(get_phase_cli "$phase")
|
|
299
|
+
model=$(get_phase_model "$phase")
|
|
300
|
+
build_cli_cmd "$cli" "$model"
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
check_cli_binary() {
|
|
304
|
+
local cli="$1"
|
|
305
|
+
local install_hint=""
|
|
306
|
+
case "$cli" in
|
|
307
|
+
claude) install_hint="npm install -g @anthropic-ai/claude-code" ;;
|
|
308
|
+
codex) install_hint="npm install -g @openai/codex" ;;
|
|
309
|
+
esac
|
|
310
|
+
if ! command -v "$cli" >/dev/null 2>&1; then
|
|
311
|
+
echo "ERROR: ${cli} CLI not found. Install with: ${install_hint}" >&2
|
|
312
|
+
exit 1
|
|
313
|
+
fi
|
|
314
|
+
}
|
|
123
315
|
|
|
124
316
|
# Automation footer appended to every prompt in automated mode.
|
|
125
317
|
# Prevents interactive skill prompts from blocking headless sessions.
|
|
@@ -141,22 +333,69 @@ This is a fully automated session with no human operator. You MUST:
|
|
|
141
333
|
'
|
|
142
334
|
fi
|
|
143
335
|
|
|
144
|
-
# Helper: pipe prompt with automation footer to
|
|
336
|
+
# Helper: pipe prompt with automation footer to selected CLI command
|
|
145
337
|
run_claude_prompt() {
|
|
146
338
|
local prompt_file="$1"
|
|
147
339
|
local claude_cmd="$2"
|
|
148
|
-
|
|
340
|
+
local -a cmd_parts=()
|
|
341
|
+
read -r -a cmd_parts <<< "$claude_cmd"
|
|
342
|
+
if [[ "${cmd_parts[0]:-}" == "codex" ]]; then
|
|
343
|
+
LAST_RUN_CLI="codex"
|
|
344
|
+
{ cat "$prompt_file" | envsubst; echo "$AUTOMATION_FOOTER"; } | (cd "$APP_DIR" && "${cmd_parts[@]}" --json --output-last-message "$LAST_MESSAGE_FILE" -)
|
|
345
|
+
else
|
|
346
|
+
LAST_RUN_CLI="claude"
|
|
347
|
+
{ cat "$prompt_file" | envsubst; echo "$AUTOMATION_FOOTER"; } | "${cmd_parts[@]}"
|
|
348
|
+
fi
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
# Helper: resume an existing session with a short continuation prompt
|
|
352
|
+
run_claude_resume() {
|
|
353
|
+
local session_id="$1"
|
|
354
|
+
local continuation_prompt="$2"
|
|
355
|
+
local claude_cmd="$3"
|
|
356
|
+
if [[ ! "$session_id" =~ ^[A-Za-z0-9._:-]+$ ]]; then
|
|
357
|
+
echo "WARNING: Refusing to resume with unsafe session id '$session_id'" >&2
|
|
358
|
+
return 1
|
|
359
|
+
fi
|
|
360
|
+
if [[ "$claude_cmd" == codex* ]]; then
|
|
361
|
+
LAST_RUN_CLI="codex"
|
|
362
|
+
local resume_cmd="${claude_cmd/ exec / exec resume }"
|
|
363
|
+
if [ "$resume_cmd" = "$claude_cmd" ]; then
|
|
364
|
+
echo "WARNING: codex resume injection failed, exec segment not found in command" >&2
|
|
365
|
+
return 1
|
|
366
|
+
fi
|
|
367
|
+
# codex exec resume does not accept -C/--cd; resume from APP_DIR instead.
|
|
368
|
+
resume_cmd="${resume_cmd/ --cd ./}"
|
|
369
|
+
resume_cmd="${resume_cmd/ -C ./}"
|
|
370
|
+
local -a resume_parts=()
|
|
371
|
+
read -r -a resume_parts <<< "$resume_cmd"
|
|
372
|
+
{ echo "$continuation_prompt"; echo "$AUTOMATION_FOOTER"; } | (cd "$APP_DIR" && "${resume_parts[@]}" "$session_id" - --json --output-last-message "$LAST_MESSAGE_FILE")
|
|
373
|
+
else
|
|
374
|
+
LAST_RUN_CLI="claude"
|
|
375
|
+
# Insert --resume before the -p flag.
|
|
376
|
+
local resume_cmd="${claude_cmd/ -p / --resume ${session_id} -p }"
|
|
377
|
+
if [ "$resume_cmd" = "$claude_cmd" ]; then
|
|
378
|
+
echo "WARNING: --resume injection failed, -p flag not found in command" >&2
|
|
379
|
+
return 1
|
|
380
|
+
fi
|
|
381
|
+
local -a resume_parts=()
|
|
382
|
+
read -r -a resume_parts <<< "$resume_cmd"
|
|
383
|
+
{ echo "$continuation_prompt"; echo "$AUTOMATION_FOOTER"; } | "${resume_parts[@]}"
|
|
384
|
+
fi
|
|
149
385
|
}
|
|
150
386
|
|
|
151
387
|
# Token tracking
|
|
152
388
|
TOKENS_FILE="/tmp/ralph-loop-${1}.tokens"
|
|
153
389
|
CLAUDE_OUTPUT="/tmp/ralph-loop-${1}.output"
|
|
390
|
+
LAST_MESSAGE_FILE="/tmp/ralph-loop-${1}.last-message"
|
|
154
391
|
STATUS_FILE="/tmp/ralph-loop-${1}.status"
|
|
155
392
|
FINAL_STATUS_FILE="/tmp/ralph-loop-${1}.final"
|
|
156
393
|
PHASES_FILE="/tmp/ralph-loop-${1}.phases"
|
|
157
394
|
BASELINE_FILE="/tmp/ralph-loop-${1}.baseline"
|
|
395
|
+
PRE_RUN_DIRTY_FILE="/tmp/ralph-loop-${1}.dirty"
|
|
158
396
|
SESSIONS_FILE="/tmp/ralph-loop-${1}.sessions"
|
|
159
397
|
LOG_FILE="/tmp/ralph-loop-${1}.log"
|
|
398
|
+
LAST_RUN_CLI=""
|
|
160
399
|
|
|
161
400
|
# Initialize token tracking (4-field format: input|output|cache_create|cache_read)
|
|
162
401
|
init_tokens() {
|
|
@@ -165,15 +404,85 @@ init_tokens() {
|
|
|
165
404
|
> "$LOG_FILE"
|
|
166
405
|
}
|
|
167
406
|
|
|
168
|
-
# Extract session result from
|
|
407
|
+
# Extract session result from command output.
|
|
169
408
|
# Writes human-readable result text to the .log file and captures session_id.
|
|
170
|
-
# Usage: extract_session_result <
|
|
409
|
+
# Usage: extract_session_result <raw_file> [cli]
|
|
171
410
|
# Sets: LAST_SESSION_ID variable
|
|
172
411
|
extract_session_result() {
|
|
173
412
|
local raw_file="$1"
|
|
413
|
+
local cli="${2:-$LAST_RUN_CLI}"
|
|
174
414
|
LAST_SESSION_ID=""
|
|
175
415
|
if [ ! -f "$raw_file" ]; then return; fi
|
|
176
416
|
|
|
417
|
+
if [ "$cli" = "codex" ]; then
|
|
418
|
+
local result
|
|
419
|
+
result=$(python3 -c "
|
|
420
|
+
import json, sys
|
|
421
|
+
session = ''
|
|
422
|
+
for line in open(sys.argv[1], encoding='utf-8', errors='ignore'):
|
|
423
|
+
line = line.strip()
|
|
424
|
+
if not line:
|
|
425
|
+
continue
|
|
426
|
+
try:
|
|
427
|
+
obj = json.loads(line)
|
|
428
|
+
except Exception:
|
|
429
|
+
continue
|
|
430
|
+
stack = [obj]
|
|
431
|
+
while stack:
|
|
432
|
+
cur = stack.pop()
|
|
433
|
+
if isinstance(cur, dict):
|
|
434
|
+
for key in ('session_id', 'sessionId', 'conversation_id', 'conversationId', 'thread_id', 'threadId', 'response_id', 'responseId', 'run_id', 'runId'):
|
|
435
|
+
val = cur.get(key)
|
|
436
|
+
if isinstance(val, str) and val:
|
|
437
|
+
session = val
|
|
438
|
+
# Newer Codex JSON can nest thread/session identifiers under typed objects.
|
|
439
|
+
node_type = cur.get('type')
|
|
440
|
+
if node_type in ('thread.started', 'session.started'):
|
|
441
|
+
val = cur.get('id')
|
|
442
|
+
if isinstance(val, str) and val:
|
|
443
|
+
session = val
|
|
444
|
+
thread_obj = cur.get('thread')
|
|
445
|
+
if isinstance(thread_obj, dict):
|
|
446
|
+
val = thread_obj.get('id')
|
|
447
|
+
if isinstance(val, str) and val:
|
|
448
|
+
session = val
|
|
449
|
+
for val in cur.values():
|
|
450
|
+
if isinstance(val, (dict, list)):
|
|
451
|
+
stack.append(val)
|
|
452
|
+
elif isinstance(cur, list):
|
|
453
|
+
for val in cur:
|
|
454
|
+
if isinstance(val, (dict, list)):
|
|
455
|
+
stack.append(val)
|
|
456
|
+
print(session)
|
|
457
|
+
" "$raw_file" 2>/dev/null) || true
|
|
458
|
+
|
|
459
|
+
LAST_SESSION_ID="$result"
|
|
460
|
+
if [ -n "$LAST_SESSION_ID" ]; then
|
|
461
|
+
echo "$LAST_SESSION_ID" >> "$SESSIONS_FILE"
|
|
462
|
+
fi
|
|
463
|
+
|
|
464
|
+
if [ -f "$LAST_MESSAGE_FILE" ]; then
|
|
465
|
+
cat "$LAST_MESSAGE_FILE" >> "$LOG_FILE" 2>/dev/null || true
|
|
466
|
+
echo "" >> "$LOG_FILE"
|
|
467
|
+
else
|
|
468
|
+
python3 -c "
|
|
469
|
+
import json, sys
|
|
470
|
+
for line in open(sys.argv[1], encoding='utf-8', errors='ignore'):
|
|
471
|
+
try:
|
|
472
|
+
obj = json.loads(line)
|
|
473
|
+
except Exception:
|
|
474
|
+
continue
|
|
475
|
+
if not isinstance(obj, dict):
|
|
476
|
+
continue
|
|
477
|
+
for key in ('output_text', 'text', 'content'):
|
|
478
|
+
val = obj.get(key)
|
|
479
|
+
if isinstance(val, str) and val.strip():
|
|
480
|
+
print(val.strip())
|
|
481
|
+
" "$raw_file" >> "$LOG_FILE" 2>/dev/null || true
|
|
482
|
+
fi
|
|
483
|
+
return
|
|
484
|
+
fi
|
|
485
|
+
|
|
177
486
|
local result
|
|
178
487
|
result=$(python3 -c "
|
|
179
488
|
import json, sys
|
|
@@ -210,29 +519,109 @@ except Exception:
|
|
|
210
519
|
" "$raw_file" >> "$LOG_FILE" 2>/dev/null || true
|
|
211
520
|
}
|
|
212
521
|
|
|
213
|
-
# Accumulate tokens
|
|
214
|
-
# Usage: accumulate_tokens_from_session <session_id>
|
|
522
|
+
# Accumulate tokens into the .tokens file.
|
|
523
|
+
# Usage: accumulate_tokens_from_session <session_id> [raw_file] [cli]
|
|
215
524
|
accumulate_tokens_from_session() {
|
|
216
525
|
local session_id="$1"
|
|
217
|
-
|
|
526
|
+
local raw_file="${2:-}"
|
|
527
|
+
local cli="${3:-$LAST_RUN_CLI}"
|
|
218
528
|
|
|
219
|
-
|
|
220
|
-
local
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
529
|
+
local s_input=0
|
|
530
|
+
local s_output=0
|
|
531
|
+
local s_cache_create=0
|
|
532
|
+
local s_cache_read=0
|
|
533
|
+
|
|
534
|
+
if [ "$cli" = "codex" ]; then
|
|
535
|
+
if [ -z "$raw_file" ] || [ ! -f "$raw_file" ]; then
|
|
536
|
+
return
|
|
225
537
|
fi
|
|
226
|
-
done
|
|
227
538
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
539
|
+
local session_tokens
|
|
540
|
+
session_tokens=$(python3 -c "
|
|
541
|
+
import json, sys
|
|
542
|
+
|
|
543
|
+
def to_int(v):
|
|
544
|
+
try:
|
|
545
|
+
return int(v)
|
|
546
|
+
except Exception:
|
|
547
|
+
return 0
|
|
548
|
+
|
|
549
|
+
def usage_pair(usage):
|
|
550
|
+
input_tokens = (
|
|
551
|
+
to_int(usage.get('input_tokens'))
|
|
552
|
+
or to_int(usage.get('inputTokens'))
|
|
553
|
+
or to_int(usage.get('prompt_tokens'))
|
|
554
|
+
or to_int(usage.get('promptTokens'))
|
|
555
|
+
)
|
|
556
|
+
output_tokens = (
|
|
557
|
+
to_int(usage.get('output_tokens'))
|
|
558
|
+
or to_int(usage.get('outputTokens'))
|
|
559
|
+
or to_int(usage.get('completion_tokens'))
|
|
560
|
+
or to_int(usage.get('completionTokens'))
|
|
561
|
+
)
|
|
562
|
+
return input_tokens, output_tokens
|
|
563
|
+
|
|
564
|
+
# Codex JSONL often contains repeated/cumulative usage in multiple events.
|
|
565
|
+
# Use the highest observed values from a single run to avoid overcounting.
|
|
566
|
+
max_input = 0
|
|
567
|
+
max_output = 0
|
|
568
|
+
|
|
569
|
+
for line in open(sys.argv[1], encoding='utf-8', errors='ignore'):
|
|
570
|
+
line = line.strip()
|
|
571
|
+
if not line:
|
|
572
|
+
continue
|
|
573
|
+
try:
|
|
574
|
+
obj = json.loads(line)
|
|
575
|
+
except Exception:
|
|
576
|
+
continue
|
|
577
|
+
stack = [obj]
|
|
578
|
+
while stack:
|
|
579
|
+
cur = stack.pop()
|
|
580
|
+
if isinstance(cur, dict):
|
|
581
|
+
if 'usage' in cur and isinstance(cur['usage'], dict):
|
|
582
|
+
usage = cur['usage']
|
|
583
|
+
u_in, u_out = usage_pair(usage)
|
|
584
|
+
if u_in > max_input:
|
|
585
|
+
max_input = u_in
|
|
586
|
+
if u_out > max_output:
|
|
587
|
+
max_output = u_out
|
|
588
|
+
for val in cur.values():
|
|
589
|
+
if isinstance(val, (dict, list)):
|
|
590
|
+
stack.append(val)
|
|
591
|
+
elif isinstance(cur, list):
|
|
592
|
+
for val in cur:
|
|
593
|
+
if isinstance(val, (dict, list)):
|
|
594
|
+
stack.append(val)
|
|
595
|
+
|
|
596
|
+
print(f\"{max_input}|{max_output}|0|0\")
|
|
597
|
+
" "$raw_file" 2>/dev/null) || true
|
|
598
|
+
|
|
599
|
+
if [ -n "$session_tokens" ]; then
|
|
600
|
+
s_input=$(echo "$session_tokens" | cut -d'|' -f1)
|
|
601
|
+
s_output=$(echo "$session_tokens" | cut -d'|' -f2)
|
|
602
|
+
s_cache_create=0
|
|
603
|
+
s_cache_read=0
|
|
604
|
+
fi
|
|
605
|
+
else
|
|
606
|
+
if [ -z "$session_id" ]; then return; fi
|
|
607
|
+
|
|
608
|
+
# Find the JSONL file for this session
|
|
609
|
+
local jsonl_file=""
|
|
610
|
+
for f in ~/.claude/projects/*/"${session_id}.jsonl"; do
|
|
611
|
+
if [ -f "$f" ]; then
|
|
612
|
+
jsonl_file="$f"
|
|
613
|
+
break
|
|
614
|
+
fi
|
|
615
|
+
done
|
|
616
|
+
|
|
617
|
+
if [ -z "$jsonl_file" ]; then
|
|
618
|
+
echo "WARNING: Could not find JSONL for session $session_id" >&2
|
|
619
|
+
return
|
|
620
|
+
fi
|
|
232
621
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
622
|
+
# Extract and sum token usage from all assistant messages
|
|
623
|
+
local session_tokens
|
|
624
|
+
session_tokens=$(python3 -c "
|
|
236
625
|
import json, sys
|
|
237
626
|
totals = {'input': 0, 'output': 0, 'cache_create': 0, 'cache_read': 0}
|
|
238
627
|
for line in open(sys.argv[1]):
|
|
@@ -252,14 +641,19 @@ for line in open(sys.argv[1]):
|
|
|
252
641
|
print(f\"{totals['input']}|{totals['output']}|{totals['cache_create']}|{totals['cache_read']}\")
|
|
253
642
|
" "$jsonl_file" 2>/dev/null) || true
|
|
254
643
|
|
|
255
|
-
|
|
644
|
+
if [ -z "$session_tokens" ]; then return; fi
|
|
256
645
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
646
|
+
# Parse session tokens
|
|
647
|
+
s_input=$(echo "$session_tokens" | cut -d'|' -f1)
|
|
648
|
+
s_output=$(echo "$session_tokens" | cut -d'|' -f2)
|
|
649
|
+
s_cache_create=$(echo "$session_tokens" | cut -d'|' -f3)
|
|
650
|
+
s_cache_read=$(echo "$session_tokens" | cut -d'|' -f4)
|
|
651
|
+
fi
|
|
652
|
+
|
|
653
|
+
[[ "$s_input" =~ ^[0-9]+$ ]] || s_input=0
|
|
654
|
+
[[ "$s_output" =~ ^[0-9]+$ ]] || s_output=0
|
|
655
|
+
[[ "$s_cache_create" =~ ^[0-9]+$ ]] || s_cache_create=0
|
|
656
|
+
[[ "$s_cache_read" =~ ^[0-9]+$ ]] || s_cache_read=0
|
|
263
657
|
|
|
264
658
|
# Read current totals
|
|
265
659
|
local current c_input c_output c_cache_create c_cache_read
|
|
@@ -396,7 +790,51 @@ detect_plan_format() {
|
|
|
396
790
|
echo "unknown"
|
|
397
791
|
}
|
|
398
792
|
|
|
399
|
-
#
|
|
793
|
+
# Snapshot tracked dirty files before loop execution starts.
|
|
794
|
+
# Each entry stores <path>\t<working-tree blob hash or __MISSING__ marker>.
|
|
795
|
+
capture_pre_run_dirty_snapshot() {
|
|
796
|
+
local baseline="$1"
|
|
797
|
+
> "$PRE_RUN_DIRTY_FILE"
|
|
798
|
+
if [ -z "$baseline" ]; then
|
|
799
|
+
return
|
|
800
|
+
fi
|
|
801
|
+
python3 - "$baseline" "$PRE_RUN_DIRTY_FILE" <<'PY' 2>/dev/null || true
|
|
802
|
+
import os
|
|
803
|
+
import subprocess
|
|
804
|
+
import sys
|
|
805
|
+
|
|
806
|
+
baseline = sys.argv[1]
|
|
807
|
+
output_path = sys.argv[2]
|
|
808
|
+
|
|
809
|
+
try:
|
|
810
|
+
changed = subprocess.check_output(
|
|
811
|
+
["git", "diff", "--name-only", baseline],
|
|
812
|
+
stderr=subprocess.DEVNULL,
|
|
813
|
+
).splitlines()
|
|
814
|
+
except Exception:
|
|
815
|
+
changed = []
|
|
816
|
+
|
|
817
|
+
with open(output_path, "w", encoding="utf-8", errors="surrogateescape") as fh:
|
|
818
|
+
for raw_path in changed:
|
|
819
|
+
if not raw_path:
|
|
820
|
+
continue
|
|
821
|
+
path = raw_path.decode("utf-8", errors="surrogateescape")
|
|
822
|
+
marker = "__MISSING__"
|
|
823
|
+
if os.path.exists(path):
|
|
824
|
+
try:
|
|
825
|
+
marker = subprocess.check_output(
|
|
826
|
+
["git", "hash-object", "--", path],
|
|
827
|
+
stderr=subprocess.DEVNULL,
|
|
828
|
+
text=True,
|
|
829
|
+
).strip()
|
|
830
|
+
except Exception:
|
|
831
|
+
marker = "__HASH_ERROR__"
|
|
832
|
+
fh.write(f"{path}\t{marker}\n")
|
|
833
|
+
PY
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
# Check if any tracked files were changed since baseline (excluding unchanged
|
|
837
|
+
# tracked edits that already existed before the loop started).
|
|
400
838
|
# Counts ALL file types (code, docs, config) — not just source code.
|
|
401
839
|
count_file_changes() {
|
|
402
840
|
local baseline="$1"
|
|
@@ -405,14 +843,90 @@ count_file_changes() {
|
|
|
405
843
|
return
|
|
406
844
|
fi
|
|
407
845
|
local count
|
|
408
|
-
|
|
846
|
+
# Compare baseline commit against working tree; subtract pre-run dirty files
|
|
847
|
+
# unless that specific file changed again after loop start.
|
|
848
|
+
count=$(python3 - "$baseline" "$PRE_RUN_DIRTY_FILE" <<'PY' 2>/dev/null || echo "0"
|
|
849
|
+
import os
|
|
850
|
+
import subprocess
|
|
851
|
+
import sys
|
|
852
|
+
|
|
853
|
+
baseline = sys.argv[1]
|
|
854
|
+
snapshot_path = sys.argv[2]
|
|
855
|
+
snapshot = {}
|
|
856
|
+
|
|
857
|
+
if os.path.exists(snapshot_path):
|
|
858
|
+
with open(snapshot_path, "r", encoding="utf-8", errors="surrogateescape") as fh:
|
|
859
|
+
for line in fh:
|
|
860
|
+
line = line.rstrip("\n")
|
|
861
|
+
if not line or "\t" not in line:
|
|
862
|
+
continue
|
|
863
|
+
path, marker = line.split("\t", 1)
|
|
864
|
+
snapshot[path] = marker
|
|
865
|
+
|
|
866
|
+
try:
|
|
867
|
+
changed = subprocess.check_output(
|
|
868
|
+
["git", "diff", "--name-only", baseline],
|
|
869
|
+
stderr=subprocess.DEVNULL,
|
|
870
|
+
).splitlines()
|
|
871
|
+
except Exception:
|
|
872
|
+
print("0")
|
|
873
|
+
raise SystemExit(0)
|
|
874
|
+
|
|
875
|
+
count = 0
|
|
876
|
+
for raw_path in changed:
|
|
877
|
+
if not raw_path:
|
|
878
|
+
continue
|
|
879
|
+
path = raw_path.decode("utf-8", errors="surrogateescape")
|
|
880
|
+
start_marker = snapshot.get(path)
|
|
881
|
+
if start_marker is None:
|
|
882
|
+
count += 1
|
|
883
|
+
continue
|
|
884
|
+
|
|
885
|
+
current_marker = "__MISSING__"
|
|
886
|
+
if os.path.exists(path):
|
|
887
|
+
try:
|
|
888
|
+
current_marker = subprocess.check_output(
|
|
889
|
+
["git", "hash-object", "--", path],
|
|
890
|
+
stderr=subprocess.DEVNULL,
|
|
891
|
+
text=True,
|
|
892
|
+
).strip()
|
|
893
|
+
except Exception:
|
|
894
|
+
current_marker = "__HASH_ERROR__"
|
|
895
|
+
if current_marker != start_marker:
|
|
896
|
+
count += 1
|
|
897
|
+
|
|
898
|
+
print(count)
|
|
899
|
+
PY
|
|
900
|
+
)
|
|
409
901
|
echo "$count"
|
|
410
902
|
}
|
|
411
903
|
|
|
412
|
-
# Extract review findings text from
|
|
904
|
+
# Extract review findings text from command output.
|
|
413
905
|
# Returns the result text from the last result entry.
|
|
414
906
|
extract_review_findings() {
|
|
415
907
|
local raw_file="$1"
|
|
908
|
+
local cli="${2:-$LAST_RUN_CLI}"
|
|
909
|
+
if [ "$cli" = "codex" ]; then
|
|
910
|
+
if [ -f "$LAST_MESSAGE_FILE" ]; then
|
|
911
|
+
cat "$LAST_MESSAGE_FILE" 2>/dev/null || echo "No review output available"
|
|
912
|
+
return
|
|
913
|
+
fi
|
|
914
|
+
python3 -c "
|
|
915
|
+
import json, sys
|
|
916
|
+
for line in open(sys.argv[1], encoding='utf-8', errors='ignore'):
|
|
917
|
+
try:
|
|
918
|
+
obj = json.loads(line)
|
|
919
|
+
except Exception:
|
|
920
|
+
continue
|
|
921
|
+
if isinstance(obj, dict):
|
|
922
|
+
for key in ('output_text', 'text', 'content'):
|
|
923
|
+
val = obj.get(key)
|
|
924
|
+
if isinstance(val, str) and val.strip():
|
|
925
|
+
print(val.strip())
|
|
926
|
+
" "$raw_file" 2>/dev/null || echo "No review output available"
|
|
927
|
+
return
|
|
928
|
+
fi
|
|
929
|
+
|
|
416
930
|
python3 -c "
|
|
417
931
|
import json, sys
|
|
418
932
|
try:
|
|
@@ -428,11 +942,16 @@ except Exception:
|
|
|
428
942
|
}
|
|
429
943
|
|
|
430
944
|
# Run a fix iteration based on code review findings.
|
|
431
|
-
# Pipes the review output into
|
|
945
|
+
# Pipes the review output into the implementation CLI for targeted fixes.
|
|
432
946
|
run_review_fix() {
|
|
433
947
|
local findings
|
|
948
|
+
local impl_cli="$CODING_CLI"
|
|
434
949
|
findings=$(extract_review_findings "${CLAUDE_OUTPUT}.raw")
|
|
435
|
-
|
|
950
|
+
local -a impl_cmd_parts=()
|
|
951
|
+
read -r -a impl_cmd_parts <<< "$IMPL_CMD"
|
|
952
|
+
if [ "$impl_cli" = "codex" ]; then
|
|
953
|
+
LAST_RUN_CLI="codex"
|
|
954
|
+
cat <<FIXEOF | "${impl_cmd_parts[@]}" --json --output-last-message "$LAST_MESSAGE_FILE" - 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
436
955
|
## Code Review Findings
|
|
437
956
|
|
|
438
957
|
The following issues were found during code review:
|
|
@@ -447,8 +966,51 @@ Fix each issue listed above. Run git diff $DEFAULT_BRANCH to see the current cha
|
|
|
447
966
|
3. Commit and push the fixes
|
|
448
967
|
Do NOT propose completion options or ask interactive questions. Just fix, test, commit, push.
|
|
449
968
|
FIXEOF
|
|
450
|
-
|
|
451
|
-
|
|
969
|
+
else
|
|
970
|
+
LAST_RUN_CLI="claude"
|
|
971
|
+
cat <<FIXEOF | "${impl_cmd_parts[@]}" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
972
|
+
## Code Review Findings
|
|
973
|
+
|
|
974
|
+
The following issues were found during code review:
|
|
975
|
+
|
|
976
|
+
${findings}
|
|
977
|
+
|
|
978
|
+
## Task
|
|
979
|
+
|
|
980
|
+
Fix each issue listed above. Run git diff $DEFAULT_BRANCH to see the current changes, then:
|
|
981
|
+
1. Fix each issue referenced in the review
|
|
982
|
+
2. Run tests to verify fixes
|
|
983
|
+
3. Commit and push the fixes
|
|
984
|
+
Do NOT propose completion options or ask interactive questions. Just fix, test, commit, push.
|
|
985
|
+
FIXEOF
|
|
986
|
+
fi
|
|
987
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$impl_cli"
|
|
988
|
+
accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$impl_cli"
|
|
989
|
+
}
|
|
990
|
+
|
|
991
|
+
validate_simple_command() {
|
|
992
|
+
local command="$1"
|
|
993
|
+
if [[ "$command" =~ [\;\&\|\<\>\`\$\(\)] ]]; then
|
|
994
|
+
return 1
|
|
995
|
+
fi
|
|
996
|
+
if [[ "$command" =~ [[:cntrl:]] ]]; then
|
|
997
|
+
return 1
|
|
998
|
+
fi
|
|
999
|
+
return 0
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
run_test_command() {
|
|
1003
|
+
if ! validate_simple_command "$TEST_COMMAND"; then
|
|
1004
|
+
echo "ERROR: commands.test contains unsupported shell operators. Use a plain command and arguments." >&2
|
|
1005
|
+
return 2
|
|
1006
|
+
fi
|
|
1007
|
+
local -a test_cmd_parts=()
|
|
1008
|
+
read -r -a test_cmd_parts <<< "$TEST_COMMAND"
|
|
1009
|
+
if [ ${#test_cmd_parts[@]} -eq 0 ]; then
|
|
1010
|
+
echo "ERROR: commands.test resolved to an empty command." >&2
|
|
1011
|
+
return 2
|
|
1012
|
+
fi
|
|
1013
|
+
(cd "$APP_DIR" && "${test_cmd_parts[@]}")
|
|
452
1014
|
}
|
|
453
1015
|
|
|
454
1016
|
# Normalize test failure lines: extract test name, strip timing, deduplicate.
|
|
@@ -461,7 +1023,7 @@ normalize_test_failures() {
|
|
|
461
1023
|
# Returns 0 if tests pass OR all failures are pre-existing (captured at baseline).
|
|
462
1024
|
check_tests_pass_or_baseline() {
|
|
463
1025
|
local test_output
|
|
464
|
-
test_output=$(
|
|
1026
|
+
test_output=$(run_test_command 2>&1)
|
|
465
1027
|
local exit_code=$?
|
|
466
1028
|
|
|
467
1029
|
if [ $exit_code -eq 0 ]; then
|
|
@@ -523,7 +1085,7 @@ write_phase_end() {
|
|
|
523
1085
|
# Initialize phase tracking
|
|
524
1086
|
> "$PHASES_FILE"
|
|
525
1087
|
|
|
526
|
-
FEATURE="${1:?Usage: ./feature-loop.sh <feature-name> [max-iterations] [max-e2e-attempts] [--worktree] [--resume] [--model MODEL]}"
|
|
1088
|
+
FEATURE="${1:?Usage: ./feature-loop.sh <feature-name> [max-iterations] [max-e2e-attempts] [--worktree] [--resume] [--model MODEL] [--cli CLI] [--review-cli CLI] [--review-mode MODE]}"
|
|
527
1089
|
# Sanitize feature name to prevent path traversal and shell injection when used in temp file paths
|
|
528
1090
|
if [[ ! "$FEATURE" =~ ^[a-zA-Z0-9][a-zA-Z0-9_-]*$ ]]; then
|
|
529
1091
|
echo "ERROR: Feature name must start with alphanumeric and contain only letters, numbers, hyphens, and underscores." >&2
|
|
@@ -538,6 +1100,15 @@ SPEC_FILE="$SPEC_DIR/${FEATURE}.md"
|
|
|
538
1100
|
PLAN_FILE="$SPEC_DIR/${FEATURE}-implementation-plan.md"
|
|
539
1101
|
BRANCH="feat/${FEATURE}"
|
|
540
1102
|
APP_DIR="$(pwd)"
|
|
1103
|
+
PLANNING_CMD=$(get_phase_cmd "planning")
|
|
1104
|
+
IMPL_CMD=$(get_phase_cmd "implementation")
|
|
1105
|
+
REVIEW_CMD=$(get_phase_cmd "review")
|
|
1106
|
+
|
|
1107
|
+
# Fail fast if required CLIs are not installed.
|
|
1108
|
+
check_cli_binary "$CODING_CLI"
|
|
1109
|
+
if [ "$REVIEW_CLI" != "$CODING_CLI" ]; then
|
|
1110
|
+
check_cli_binary "$REVIEW_CLI"
|
|
1111
|
+
fi
|
|
541
1112
|
|
|
542
1113
|
echo "=========================================="
|
|
543
1114
|
echo "Ralph Loop: $FEATURE"
|
|
@@ -547,9 +1118,26 @@ echo "Branch: $BRANCH"
|
|
|
547
1118
|
echo "App dir: $APP_DIR"
|
|
548
1119
|
echo "Worktree mode: $USE_WORKTREE"
|
|
549
1120
|
echo "Resume mode: $RESUME"
|
|
1121
|
+
echo "Coding CLI (impl/e2e): $CODING_CLI"
|
|
1122
|
+
echo "Review CLI: $REVIEW_CLI"
|
|
550
1123
|
echo "Review mode: $REVIEW_MODE"
|
|
551
|
-
echo "
|
|
552
|
-
echo "
|
|
1124
|
+
echo "Claude permission mode: $CLAUDE_PERMISSION_MODE"
|
|
1125
|
+
echo "Codex sandbox: $CODEX_SANDBOX"
|
|
1126
|
+
echo "Codex approval policy: $CODEX_APPROVAL_POLICY"
|
|
1127
|
+
if [ "${RALPH_AUTOMATED:-}" = "1" ]; then
|
|
1128
|
+
echo "Disable MCP in automated runs: $DISABLE_MCP_IN_AUTOMATED_NORM"
|
|
1129
|
+
fi
|
|
1130
|
+
if [ "$CODING_CLI" = "codex" ] && [ "$REVIEW_CLI" = "codex" ]; then
|
|
1131
|
+
echo "Model (all phases): $(resolve_codex_model)"
|
|
1132
|
+
else
|
|
1133
|
+
if [ "$CODING_CLI" = "claude" ] || [ "$REVIEW_CLI" = "claude" ]; then
|
|
1134
|
+
echo "Model (Claude planning/review): $PLANNING_MODEL"
|
|
1135
|
+
echo "Model (Claude impl/e2e): ${MODEL:-$DEFAULT_MODEL}"
|
|
1136
|
+
fi
|
|
1137
|
+
if [ "$CODING_CLI" = "codex" ] || [ "$REVIEW_CLI" = "codex" ]; then
|
|
1138
|
+
echo "Model (Codex phases): $(resolve_codex_model)"
|
|
1139
|
+
fi
|
|
1140
|
+
fi
|
|
553
1141
|
echo "Max iterations: $MAX_ITERATIONS"
|
|
554
1142
|
echo "Max E2E attempts: $MAX_E2E_ATTEMPTS"
|
|
555
1143
|
echo "=========================================="
|
|
@@ -588,7 +1176,7 @@ fi
|
|
|
588
1176
|
# tasks to be checked — the checkboxes may be stale if the work shipped under a
|
|
589
1177
|
# different branch name that never updated this plan file.
|
|
590
1178
|
if [ -f "$PLAN_FILE" ]; then
|
|
591
|
-
_DIFF_STAT=$(git diff "$DEFAULT_BRANCH..HEAD" --stat 2>/dev/null || echo "")
|
|
1179
|
+
_DIFF_STAT=$(git diff "${DEFAULT_BRANCH}..HEAD" --stat 2>/dev/null || echo "")
|
|
592
1180
|
if [ -z "$_DIFF_STAT" ]; then
|
|
593
1181
|
echo "Plan exists but branch has no diff to $DEFAULT_BRANCH — work already merged."
|
|
594
1182
|
> "$PHASES_FILE"
|
|
@@ -612,17 +1200,18 @@ if git rev-parse --git-dir > /dev/null 2>&1; then
|
|
|
612
1200
|
if [ -n "$BASELINE_COMMIT" ]; then
|
|
613
1201
|
echo "$BASELINE_COMMIT" > "$BASELINE_FILE"
|
|
614
1202
|
echo "Baseline commit: $BASELINE_COMMIT"
|
|
1203
|
+
capture_pre_run_dirty_snapshot "$BASELINE_COMMIT"
|
|
615
1204
|
fi
|
|
616
1205
|
fi
|
|
617
1206
|
|
|
618
1207
|
# Capture baseline test failures for pre-existing failure detection
|
|
619
1208
|
BASELINE_FAILURES_FILE="/tmp/ralph-loop-${FEATURE}.baseline-failures"
|
|
620
1209
|
echo "Capturing baseline test failures..."
|
|
621
|
-
if
|
|
1210
|
+
if run_test_command > /dev/null 2>&1; then
|
|
622
1211
|
echo "Baseline: all tests passing"
|
|
623
1212
|
: > "$BASELINE_FAILURES_FILE"
|
|
624
1213
|
else
|
|
625
|
-
|
|
1214
|
+
run_test_command 2>&1 | normalize_test_failures > "$BASELINE_FAILURES_FILE" 2>/dev/null || true
|
|
626
1215
|
BASELINE_COUNT=$(wc -l < "$BASELINE_FAILURES_FILE" | tr -d ' ')
|
|
627
1216
|
echo "Baseline: $BASELINE_COUNT pre-existing test failure(s) recorded"
|
|
628
1217
|
fi
|
|
@@ -634,14 +1223,14 @@ echo "0|$MAX_ITERATIONS|$(date +%s)" > "$STATUS_FILE"
|
|
|
634
1223
|
if [ ! -f "$PLAN_FILE" ]; then
|
|
635
1224
|
echo "======================== PLANNING PHASE ========================"
|
|
636
1225
|
write_phase_start "planning"
|
|
637
|
-
export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR
|
|
638
|
-
run_claude_prompt "$PROMPTS_DIR/PROMPT_feature.md" "$
|
|
1226
|
+
export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR CODING_CLI REVIEW_CLI REVIEW_MODE
|
|
1227
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT_feature.md" "$PLANNING_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || {
|
|
639
1228
|
echo "ERROR: Planning phase failed"
|
|
640
1229
|
write_phase_end "planning" "failed"
|
|
641
1230
|
exit 1
|
|
642
1231
|
}
|
|
643
|
-
extract_session_result "${CLAUDE_OUTPUT}.raw"
|
|
644
|
-
accumulate_tokens_from_session "$LAST_SESSION_ID"
|
|
1232
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1233
|
+
accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
645
1234
|
write_phase_end "planning" "success"
|
|
646
1235
|
else
|
|
647
1236
|
echo "Plan file exists, skipping planning phase"
|
|
@@ -693,10 +1282,30 @@ while true; do
|
|
|
693
1282
|
TASKS_BEFORE=$(count_pending_tasks "$PLAN_FILE")
|
|
694
1283
|
echo "Legacy plan format — relying on source-file gate for completion."
|
|
695
1284
|
fi
|
|
696
|
-
export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
1285
|
+
export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR CODING_CLI REVIEW_CLI REVIEW_MODE
|
|
1286
|
+
# Continuation prompt for implementation loop iterations 2+
|
|
1287
|
+
CONTINUATION_PROMPT="Continue implementing the remaining tasks in the implementation plan at $SPEC_DIR/${FEATURE}-implementation-plan.md.
|
|
1288
|
+
Check off completed tasks as you go. Skip any E2E testing tasks.
|
|
1289
|
+
Run validation (lint, typecheck, test) after completing tasks."
|
|
1290
|
+
if [ $ITERATION -eq 1 ] || [ -z "$LAST_SESSION_ID" ]; then
|
|
1291
|
+
echo "Mode: fresh"
|
|
1292
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT.md" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
1293
|
+
else
|
|
1294
|
+
echo "Mode: resume (session: $LAST_SESSION_ID)"
|
|
1295
|
+
RESUME_EXIT=0
|
|
1296
|
+
run_claude_resume "$LAST_SESSION_ID" "$CONTINUATION_PROMPT" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || RESUME_EXIT=$?
|
|
1297
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1298
|
+
if [ $RESUME_EXIT -ne 0 ] || [ -z "$LAST_SESSION_ID" ]; then
|
|
1299
|
+
if [ $RESUME_EXIT -ne 0 ]; then
|
|
1300
|
+
echo "Resume failed (resume_exit_nonzero: exit=$RESUME_EXIT). Fallback: using fresh prompt"
|
|
1301
|
+
else
|
|
1302
|
+
echo "Resume failed (resume_no_session_id). Fallback: using fresh prompt"
|
|
1303
|
+
fi
|
|
1304
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT.md" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
1305
|
+
fi
|
|
1306
|
+
fi
|
|
1307
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1308
|
+
accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
700
1309
|
|
|
701
1310
|
# Check if any progress was made
|
|
702
1311
|
TASKS_AFTER=$(count_pending_tasks "$PLAN_FILE")
|
|
@@ -752,6 +1361,20 @@ if [ "$IMPL_SUCCESS" = true ]; then
|
|
|
752
1361
|
fi
|
|
753
1362
|
fi
|
|
754
1363
|
|
|
1364
|
+
# Stop early when implementation failed to avoid wasting E2E/review cycles.
|
|
1365
|
+
if [ "$IMPL_SUCCESS" != true ]; then
|
|
1366
|
+
echo "Implementation phase failed. Skipping remaining phases."
|
|
1367
|
+
write_phase_start "e2e_testing"
|
|
1368
|
+
write_phase_end "e2e_testing" "skipped"
|
|
1369
|
+
write_phase_start "verification"
|
|
1370
|
+
write_phase_end "verification" "skipped"
|
|
1371
|
+
write_phase_start "pr_review"
|
|
1372
|
+
write_phase_end "pr_review" "skipped"
|
|
1373
|
+
echo "$ITERATION|$MAX_ITERATIONS|$(date +%s)|failed" > "$FINAL_STATUS_FILE"
|
|
1374
|
+
rm -f "$STATUS_FILE" 2>/dev/null || true
|
|
1375
|
+
exit 1
|
|
1376
|
+
fi
|
|
1377
|
+
|
|
755
1378
|
# Phase 5: E2E Testing
|
|
756
1379
|
echo "======================== E2E TESTING PHASE ========================"
|
|
757
1380
|
E2E_TOTAL=$({ grep "^- \[.\].*E2E:" "$PLAN_FILE" 2>/dev/null || true; } | wc -l | tr -d ' ')
|
|
@@ -763,14 +1386,32 @@ else
|
|
|
763
1386
|
write_phase_start "e2e_testing"
|
|
764
1387
|
E2E_SUCCESS=false
|
|
765
1388
|
E2E_ATTEMPT=0
|
|
1389
|
+
E2E_SESSION_ID=""
|
|
1390
|
+
E2E_CONTINUATION_PROMPT="Continue remaining E2E scenarios. Check the implementation plan for unchecked \`- [ ] E2E:\` entries and implement/run those tests. Run validation after completing each scenario."
|
|
766
1391
|
while [ $E2E_ATTEMPT -lt $MAX_E2E_ATTEMPTS ]; do
|
|
767
1392
|
E2E_ATTEMPT=$((E2E_ATTEMPT + 1))
|
|
1393
|
+
echo "$E2E_ATTEMPT|$MAX_E2E_ATTEMPTS|$(date +%s)" > "$STATUS_FILE"
|
|
768
1394
|
echo "------------------------ E2E Attempt $E2E_ATTEMPT of $MAX_E2E_ATTEMPTS ------------------------"
|
|
769
1395
|
|
|
770
|
-
export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
1396
|
+
export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR CODING_CLI REVIEW_CLI REVIEW_MODE
|
|
1397
|
+
if [ $E2E_ATTEMPT -eq 1 ] || [ -z "$E2E_SESSION_ID" ]; then
|
|
1398
|
+
echo "E2E attempt $E2E_ATTEMPT: using full prompt"
|
|
1399
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT_e2e.md" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
1400
|
+
else
|
|
1401
|
+
echo "E2E attempt $E2E_ATTEMPT: using resume session $E2E_SESSION_ID"
|
|
1402
|
+
E2E_RESUME_EXIT=0
|
|
1403
|
+
run_claude_resume "$E2E_SESSION_ID" "$E2E_CONTINUATION_PROMPT" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || E2E_RESUME_EXIT=$?
|
|
1404
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1405
|
+
if [ $E2E_RESUME_EXIT -ne 0 ] || [ -z "$LAST_SESSION_ID" ]; then
|
|
1406
|
+
echo "E2E attempt $E2E_ATTEMPT: resume unavailable, using full prompt"
|
|
1407
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT_e2e.md" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
1408
|
+
fi
|
|
1409
|
+
fi
|
|
1410
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1411
|
+
if [ -n "$LAST_SESSION_ID" ]; then
|
|
1412
|
+
E2E_SESSION_ID="$LAST_SESSION_ID"
|
|
1413
|
+
fi
|
|
1414
|
+
accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
774
1415
|
|
|
775
1416
|
# Check if all E2E tests passed
|
|
776
1417
|
E2E_FAILED=$({ grep "^- \[ \].*E2E:.*FAILED" "$PLAN_FILE" 2>/dev/null || true; } | wc -l | tr -d ' ')
|
|
@@ -784,9 +1425,24 @@ else
|
|
|
784
1425
|
|
|
785
1426
|
if [ $E2E_ATTEMPT -lt $MAX_E2E_ATTEMPTS ]; then
|
|
786
1427
|
echo "E2E tests have failures. Running fix iteration..."
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
1428
|
+
if [ -n "$E2E_SESSION_ID" ]; then
|
|
1429
|
+
echo "E2E fix: using resume session $E2E_SESSION_ID"
|
|
1430
|
+
E2E_FIX_EXIT=0
|
|
1431
|
+
run_claude_resume "$E2E_SESSION_ID" "Fix the failing E2E tests identified above. Run validation after fixing." "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || E2E_FIX_EXIT=$?
|
|
1432
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1433
|
+
if [ $E2E_FIX_EXIT -ne 0 ] || [ -z "$LAST_SESSION_ID" ]; then
|
|
1434
|
+
echo "E2E fix: resume unavailable, using full prompt"
|
|
1435
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT_e2e_fix.md" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
1436
|
+
fi
|
|
1437
|
+
else
|
|
1438
|
+
echo "E2E fix: resume unavailable, using full prompt"
|
|
1439
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT_e2e_fix.md" "$IMPL_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
1440
|
+
fi
|
|
1441
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1442
|
+
if [ -n "$LAST_SESSION_ID" ]; then
|
|
1443
|
+
E2E_SESSION_ID="$LAST_SESSION_ID"
|
|
1444
|
+
fi
|
|
1445
|
+
accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
790
1446
|
fi
|
|
791
1447
|
done
|
|
792
1448
|
|
|
@@ -797,21 +1453,16 @@ else
|
|
|
797
1453
|
fi
|
|
798
1454
|
fi
|
|
799
1455
|
|
|
800
|
-
# Phase 6: Spec Verification
|
|
801
|
-
|
|
1456
|
+
# Phase 6: Spec Verification (merged into review phase)
|
|
1457
|
+
# Verification responsibilities (spec status, acceptance criteria, README updates)
|
|
1458
|
+
# are now handled in Step 0 of the review prompt templates.
|
|
1459
|
+
# This no-op marker preserves backward compatibility for TUI phase tracking.
|
|
802
1460
|
write_phase_start "verification"
|
|
803
|
-
|
|
804
|
-
VERIFY_STATUS="success"
|
|
805
|
-
if ! run_claude_prompt "$PROMPTS_DIR/PROMPT_verify.md" "$CLAUDE_CMD_OPUS" 2>&1 | tee "${CLAUDE_OUTPUT}.raw"; then
|
|
806
|
-
VERIFY_STATUS="failed"
|
|
807
|
-
fi
|
|
808
|
-
extract_session_result "${CLAUDE_OUTPUT}.raw"
|
|
809
|
-
accumulate_tokens_from_session "$LAST_SESSION_ID"
|
|
810
|
-
write_phase_end "verification" "$VERIFY_STATUS"
|
|
1461
|
+
write_phase_end "verification" "skipped"
|
|
811
1462
|
|
|
812
1463
|
# Guard B: Skip PR phase if branch has no diff to default branch
|
|
813
1464
|
# Safety net for cases where implementation ran but produced no net diff.
|
|
814
|
-
_PR_DIFF_STAT=$(git diff "$DEFAULT_BRANCH..HEAD" --stat 2>/dev/null || echo "")
|
|
1465
|
+
_PR_DIFF_STAT=$(git diff "${DEFAULT_BRANCH}..HEAD" --stat 2>/dev/null || echo "")
|
|
815
1466
|
if [ -z "$_PR_DIFF_STAT" ]; then
|
|
816
1467
|
echo "No diff between $BRANCH and $DEFAULT_BRANCH — skipping PR phase."
|
|
817
1468
|
write_phase_start "pr_review"
|
|
@@ -824,12 +1475,14 @@ if [ -z "$_PR_DIFF_STAT" ]; then
|
|
|
824
1475
|
exit 0
|
|
825
1476
|
fi
|
|
826
1477
|
|
|
827
|
-
# Phase 7: PR and Review
|
|
1478
|
+
# Phase 7: PR and Review (includes spec verification via Step 0 in review prompts)
|
|
828
1479
|
echo "======================== PR & REVIEW PHASE ========================"
|
|
829
1480
|
write_phase_start "pr_review"
|
|
830
|
-
export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR
|
|
1481
|
+
export FEATURE APP_DIR SPEC_DIR PROMPTS_DIR CODING_CLI REVIEW_CLI REVIEW_MODE
|
|
831
1482
|
PR_STATUS="success"
|
|
832
1483
|
MAX_REVIEW_ATTEMPTS=3
|
|
1484
|
+
REVIEW_SESSION_ID=""
|
|
1485
|
+
REVIEW_CONTINUATION_PROMPT="The issues from the previous review have been fixed. Re-run the code review, checking only for remaining issues. Report your verdict."
|
|
833
1486
|
|
|
834
1487
|
# Short-circuit: skip review if no files exist in diff
|
|
835
1488
|
_REVIEW_FILE_CHANGES=$(count_file_changes "$BASELINE_COMMIT")
|
|
@@ -860,14 +1513,7 @@ check_review_approved() {
|
|
|
860
1513
|
fi
|
|
861
1514
|
fi
|
|
862
1515
|
|
|
863
|
-
# Secondary: check
|
|
864
|
-
local pr_state
|
|
865
|
-
pr_state=$(gh pr view "$BRANCH" --json state --jq '.state' 2>/dev/null || echo "")
|
|
866
|
-
if [ "$pr_state" = "MERGED" ]; then
|
|
867
|
-
return 0
|
|
868
|
-
fi
|
|
869
|
-
|
|
870
|
-
# Tertiary: check the latest PR comment for approval signal
|
|
1516
|
+
# Secondary: check the latest PR comment for approval signal
|
|
871
1517
|
local latest_comment
|
|
872
1518
|
latest_comment=$(gh pr view "$BRANCH" --json comments --jq '.comments[-1].body' 2>/dev/null || echo "")
|
|
873
1519
|
if echo "$latest_comment" | grep -qi "VERDICT:.*APPROVED" 2>/dev/null; then
|
|
@@ -879,12 +1525,60 @@ check_review_approved() {
|
|
|
879
1525
|
return 1
|
|
880
1526
|
}
|
|
881
1527
|
|
|
1528
|
+
# Wait for CI checks to finish and pass.
|
|
1529
|
+
# Returns 0 when checks pass (or no checks exist), 1 on failure.
|
|
1530
|
+
wait_for_ci_checks() {
|
|
1531
|
+
local pr_ref="$1"
|
|
1532
|
+
echo "Waiting for CI checks on $pr_ref..."
|
|
1533
|
+
|
|
1534
|
+
local checks_output=""
|
|
1535
|
+
checks_output=$(gh pr checks "$pr_ref" --watch --interval 10 2>&1)
|
|
1536
|
+
local checks_exit=$?
|
|
1537
|
+
|
|
1538
|
+
echo "$checks_output"
|
|
1539
|
+
|
|
1540
|
+
if [ $checks_exit -eq 0 ]; then
|
|
1541
|
+
echo "CI checks passed."
|
|
1542
|
+
return 0
|
|
1543
|
+
fi
|
|
1544
|
+
|
|
1545
|
+
# Some repos have no checks configured for certain PRs.
|
|
1546
|
+
if echo "$checks_output" | grep -qiE "no checks|no status checks"; then
|
|
1547
|
+
echo "No CI checks found for $pr_ref. Continuing."
|
|
1548
|
+
return 0
|
|
1549
|
+
fi
|
|
1550
|
+
|
|
1551
|
+
echo "ERROR: CI checks failed or did not complete successfully." >&2
|
|
1552
|
+
return 1
|
|
1553
|
+
}
|
|
1554
|
+
|
|
1555
|
+
# Merge PR after all gates are green.
|
|
1556
|
+
# Returns 0 on success, 1 on failure.
|
|
1557
|
+
merge_pr_after_ci_gate() {
|
|
1558
|
+
local pr_ref="$1"
|
|
1559
|
+
local pr_state
|
|
1560
|
+
pr_state=$(gh pr view "$pr_ref" --json state --jq '.state' 2>/dev/null || echo "")
|
|
1561
|
+
|
|
1562
|
+
if [ "$pr_state" = "MERGED" ]; then
|
|
1563
|
+
echo "PR already merged."
|
|
1564
|
+
return 0
|
|
1565
|
+
fi
|
|
1566
|
+
|
|
1567
|
+
echo "Merging PR after CI gate..."
|
|
1568
|
+
if gh pr merge "$pr_ref" --squash --delete-branch; then
|
|
1569
|
+
return 0
|
|
1570
|
+
fi
|
|
1571
|
+
|
|
1572
|
+
echo "ERROR: Failed to merge PR $pr_ref after CI gate." >&2
|
|
1573
|
+
return 1
|
|
1574
|
+
}
|
|
1575
|
+
|
|
882
1576
|
if [ "$REVIEW_MODE" = "manual" ]; then
|
|
883
|
-
if ! run_claude_prompt "$PROMPTS_DIR/PROMPT_review_manual.md" "$
|
|
1577
|
+
if ! run_claude_prompt "$PROMPTS_DIR/PROMPT_review_manual.md" "$REVIEW_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw"; then
|
|
884
1578
|
PR_STATUS="failed"
|
|
885
1579
|
fi
|
|
886
|
-
extract_session_result "${CLAUDE_OUTPUT}.raw"
|
|
887
|
-
accumulate_tokens_from_session "$LAST_SESSION_ID"
|
|
1580
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1581
|
+
accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
888
1582
|
|
|
889
1583
|
elif [ "$REVIEW_MODE" = "merge" ]; then
|
|
890
1584
|
# Merge mode: create PR, iterate review+fixes until approved, then merge
|
|
@@ -893,22 +1587,53 @@ elif [ "$REVIEW_MODE" = "merge" ]; then
|
|
|
893
1587
|
while [ $REVIEW_ATTEMPT -lt $MAX_REVIEW_ATTEMPTS ]; do
|
|
894
1588
|
REVIEW_ATTEMPT=$((REVIEW_ATTEMPT + 1))
|
|
895
1589
|
echo "--- Review attempt $REVIEW_ATTEMPT of $MAX_REVIEW_ATTEMPTS ---"
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
1590
|
+
if [ $REVIEW_ATTEMPT -eq 1 ] || [ -z "$REVIEW_SESSION_ID" ]; then
|
|
1591
|
+
echo "Review attempt $REVIEW_ATTEMPT: using full prompt"
|
|
1592
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT_review_merge.md" "$REVIEW_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
1593
|
+
else
|
|
1594
|
+
echo "Review attempt $REVIEW_ATTEMPT: using resume session $REVIEW_SESSION_ID"
|
|
1595
|
+
REVIEW_RESUME_EXIT=0
|
|
1596
|
+
run_claude_resume "$REVIEW_SESSION_ID" "$REVIEW_CONTINUATION_PROMPT" "$REVIEW_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || REVIEW_RESUME_EXIT=$?
|
|
1597
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1598
|
+
if [ $REVIEW_RESUME_EXIT -ne 0 ] || [ -z "$LAST_SESSION_ID" ]; then
|
|
1599
|
+
echo "Review attempt $REVIEW_ATTEMPT: resume unavailable, using full prompt"
|
|
1600
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT_review_merge.md" "$REVIEW_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
1601
|
+
fi
|
|
1602
|
+
fi
|
|
1603
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1604
|
+
if [ $REVIEW_ATTEMPT -eq 1 ] && [ -n "$LAST_SESSION_ID" ]; then
|
|
1605
|
+
REVIEW_SESSION_ID="$LAST_SESSION_ID"
|
|
1606
|
+
fi
|
|
1607
|
+
accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
899
1608
|
|
|
900
1609
|
# Check stdout and PR comment for approval
|
|
901
1610
|
if check_review_approved "${CLAUDE_OUTPUT}.raw"; then
|
|
902
1611
|
echo "Review approved! Running post-approval test gate..."
|
|
903
1612
|
if check_tests_pass_or_baseline; then
|
|
904
1613
|
echo "Post-approval test gate passed."
|
|
1614
|
+
if ! wait_for_ci_checks "$BRANCH"; then
|
|
1615
|
+
PR_STATUS="failed"
|
|
1616
|
+
break
|
|
1617
|
+
fi
|
|
1618
|
+
if ! merge_pr_after_ci_gate "$BRANCH"; then
|
|
1619
|
+
PR_STATUS="failed"
|
|
1620
|
+
break
|
|
1621
|
+
fi
|
|
905
1622
|
REVIEW_APPROVED=true
|
|
906
1623
|
break
|
|
907
1624
|
else
|
|
908
1625
|
echo "WARNING: Tests failing after review approval. Running fix iteration..."
|
|
909
1626
|
run_review_fix
|
|
910
1627
|
if check_tests_pass_or_baseline; then
|
|
911
|
-
echo "Tests pass after fix.
|
|
1628
|
+
echo "Tests pass after fix. Running CI and merge gates."
|
|
1629
|
+
if ! wait_for_ci_checks "$BRANCH"; then
|
|
1630
|
+
PR_STATUS="failed"
|
|
1631
|
+
break
|
|
1632
|
+
fi
|
|
1633
|
+
if ! merge_pr_after_ci_gate "$BRANCH"; then
|
|
1634
|
+
PR_STATUS="failed"
|
|
1635
|
+
break
|
|
1636
|
+
fi
|
|
912
1637
|
REVIEW_APPROVED=true
|
|
913
1638
|
break
|
|
914
1639
|
else
|
|
@@ -936,9 +1661,24 @@ else
|
|
|
936
1661
|
while [ $REVIEW_ATTEMPT -lt $MAX_REVIEW_ATTEMPTS ]; do
|
|
937
1662
|
REVIEW_ATTEMPT=$((REVIEW_ATTEMPT + 1))
|
|
938
1663
|
echo "--- Review attempt $REVIEW_ATTEMPT of $MAX_REVIEW_ATTEMPTS ---"
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
1664
|
+
if [ $REVIEW_ATTEMPT -eq 1 ] || [ -z "$REVIEW_SESSION_ID" ]; then
|
|
1665
|
+
echo "Review attempt $REVIEW_ATTEMPT: using full prompt"
|
|
1666
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT_review_auto.md" "$REVIEW_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
1667
|
+
else
|
|
1668
|
+
echo "Review attempt $REVIEW_ATTEMPT: using resume session $REVIEW_SESSION_ID"
|
|
1669
|
+
REVIEW_RESUME_EXIT=0
|
|
1670
|
+
run_claude_resume "$REVIEW_SESSION_ID" "$REVIEW_CONTINUATION_PROMPT" "$REVIEW_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || REVIEW_RESUME_EXIT=$?
|
|
1671
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1672
|
+
if [ $REVIEW_RESUME_EXIT -ne 0 ] || [ -z "$LAST_SESSION_ID" ]; then
|
|
1673
|
+
echo "Review attempt $REVIEW_ATTEMPT: resume unavailable, using full prompt"
|
|
1674
|
+
run_claude_prompt "$PROMPTS_DIR/PROMPT_review_auto.md" "$REVIEW_CMD" 2>&1 | tee "${CLAUDE_OUTPUT}.raw" || true
|
|
1675
|
+
fi
|
|
1676
|
+
fi
|
|
1677
|
+
extract_session_result "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
1678
|
+
if [ $REVIEW_ATTEMPT -eq 1 ] && [ -n "$LAST_SESSION_ID" ]; then
|
|
1679
|
+
REVIEW_SESSION_ID="$LAST_SESSION_ID"
|
|
1680
|
+
fi
|
|
1681
|
+
accumulate_tokens_from_session "$LAST_SESSION_ID" "${CLAUDE_OUTPUT}.raw" "$LAST_RUN_CLI"
|
|
942
1682
|
|
|
943
1683
|
# Check stdout and PR comment for approval
|
|
944
1684
|
if check_review_approved "${CLAUDE_OUTPUT}.raw"; then
|
|
@@ -1016,6 +1756,8 @@ fi
|
|
|
1016
1756
|
rm -f "$STATUS_FILE" 2>/dev/null || true
|
|
1017
1757
|
rm -f "/tmp/ralph-loop-${FEATURE}.output" 2>/dev/null || true
|
|
1018
1758
|
rm -f "/tmp/ralph-loop-${FEATURE}.output.raw" 2>/dev/null || true
|
|
1759
|
+
rm -f "/tmp/ralph-loop-${FEATURE}.last-message" 2>/dev/null || true
|
|
1760
|
+
rm -f "$PRE_RUN_DIRTY_FILE" 2>/dev/null || true
|
|
1019
1761
|
|
|
1020
1762
|
# Print final token usage
|
|
1021
1763
|
if [ -f "/tmp/ralph-loop-${FEATURE}.tokens" ]; then
|