agent-control-plane 0.1.8 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/pr-risk.sh +54 -10
- package/hooks/heartbeat-hooks.sh +166 -13
- package/package.json +8 -2
- package/references/commands.md +1 -0
- package/tools/bin/agent-project-cleanup-session +143 -2
- package/tools/bin/agent-project-heartbeat-loop +29 -2
- package/tools/bin/agent-project-publish-issue-pr +178 -62
- package/tools/bin/agent-project-reconcile-issue-session +230 -5
- package/tools/bin/agent-project-reconcile-pr-session +104 -13
- package/tools/bin/agent-project-run-claude-session +19 -1
- package/tools/bin/agent-project-run-codex-resilient +121 -16
- package/tools/bin/agent-project-run-codex-session +61 -11
- package/tools/bin/agent-project-run-openclaw-session +274 -7
- package/tools/bin/agent-project-sync-anchor-repo +13 -2
- package/tools/bin/agent-project-worker-status +19 -14
- package/tools/bin/cleanup-worktree.sh +4 -1
- package/tools/bin/dashboard-launchd-bootstrap.sh +16 -4
- package/tools/bin/ensure-runtime-sync.sh +182 -0
- package/tools/bin/flow-config-lib.sh +76 -30
- package/tools/bin/flow-resident-worker-lib.sh +28 -2
- package/tools/bin/flow-shell-lib.sh +28 -8
- package/tools/bin/heartbeat-safe-auto.sh +32 -0
- package/tools/bin/issue-publish-localization-guard.sh +142 -0
- package/tools/bin/prepare-worktree.sh +3 -1
- package/tools/bin/project-launchd-bootstrap.sh +17 -4
- package/tools/bin/project-runtime-supervisor.sh +7 -1
- package/tools/bin/project-runtimectl.sh +78 -15
- package/tools/bin/provider-cooldown-state.sh +1 -1
- package/tools/bin/render-flow-config.sh +16 -1
- package/tools/bin/reuse-issue-worktree.sh +46 -0
- package/tools/bin/run-codex-task.sh +2 -2
- package/tools/bin/scaffold-profile.sh +2 -2
- package/tools/bin/start-issue-worker.sh +118 -16
- package/tools/bin/start-resident-issue-loop.sh +1 -0
- package/tools/bin/sync-shared-agent-home.sh +26 -0
- package/tools/bin/test-smoke.sh +6 -1
- package/tools/dashboard/app.js +91 -3
- package/tools/dashboard/dashboard_snapshot.py +119 -0
- package/tools/dashboard/styles.css +43 -0
- package/tools/templates/issue-prompt-template.md +18 -66
- package/tools/templates/legacy/issue-prompt-template-pre-slim.md +109 -0
- package/bin/audit-issue-routing.sh +0 -74
- package/tools/bin/audit-agent-worktrees.sh +0 -310
- package/tools/bin/audit-issue-routing.sh +0 -11
- package/tools/bin/audit-retained-layout.sh +0 -58
- package/tools/bin/audit-retained-overlap.sh +0 -135
- package/tools/bin/audit-retained-worktrees.sh +0 -228
- package/tools/bin/check-skill-contracts.sh +0 -324
|
@@ -168,6 +168,9 @@ fi
|
|
|
168
168
|
result_outcome=""
|
|
169
169
|
result_action=""
|
|
170
170
|
result_issue_id="${ISSUE_ID:-}"
|
|
171
|
+
result_detail=""
|
|
172
|
+
run_started_at="${STARTED_AT:-}"
|
|
173
|
+
expected_run_started_at="${ACP_EXPECTED_RUN_STARTED_AT:-${F_LOSNING_EXPECTED_RUN_STARTED_AT:-}}"
|
|
171
174
|
host_blocker_file="${run_dir}/host-blocker.md"
|
|
172
175
|
prompt_file="${run_dir}/prompt.md"
|
|
173
176
|
pr_comment_file="${run_dir}/pr-comment.md"
|
|
@@ -184,9 +187,18 @@ if [[ -f "$result_file_candidate" ]]; then
|
|
|
184
187
|
set +a
|
|
185
188
|
result_outcome="${OUTCOME:-}"
|
|
186
189
|
result_action="${ACTION:-}"
|
|
190
|
+
result_detail="${DETAIL:-}"
|
|
187
191
|
result_issue_id="${ISSUE_ID:-${result_issue_id}}"
|
|
188
192
|
fi
|
|
189
193
|
|
|
194
|
+
if [[ -n "${expected_run_started_at}" && "${expected_run_started_at}" != "${run_started_at}" ]]; then
|
|
195
|
+
printf 'STATUS=STALE-RUN-SKIPPED\n'
|
|
196
|
+
printf 'SESSION=%s\n' "$session"
|
|
197
|
+
printf 'EXPECTED_STARTED_AT=%s\n' "${expected_run_started_at}"
|
|
198
|
+
printf 'ACTUAL_STARTED_AT=%s\n' "${run_started_at}"
|
|
199
|
+
exit 0
|
|
200
|
+
fi
|
|
201
|
+
|
|
190
202
|
pr_schedule_retry() { :; }
|
|
191
203
|
pr_clear_retry() { :; }
|
|
192
204
|
pr_cleanup_linked_issue_session() { :; }
|
|
@@ -225,6 +237,8 @@ clear_provider_quota_cooldown() {
|
|
|
225
237
|
"${provider_cooldown_script}" clear >/dev/null || true
|
|
226
238
|
}
|
|
227
239
|
|
|
240
|
+
blocked_runtime_reason=""
|
|
241
|
+
|
|
228
242
|
owner="${repo_slug%%/*}"
|
|
229
243
|
repo="${repo_slug#*/}"
|
|
230
244
|
pr_view_json="$(flow_github_pr_view_json "$repo_slug" "$pr_number")"
|
|
@@ -339,6 +353,11 @@ normalize_pr_result_contract() {
|
|
|
339
353
|
host-comment-pr-blocker)
|
|
340
354
|
return 0
|
|
341
355
|
;;
|
|
356
|
+
host-comment-blocker)
|
|
357
|
+
result_action="host-comment-pr-blocker"
|
|
358
|
+
pr_result_contract_note="normalized-legacy-blocked-action"
|
|
359
|
+
return 0
|
|
360
|
+
;;
|
|
342
361
|
requested-changes-or-blocked)
|
|
343
362
|
result_action="host-comment-pr-blocker"
|
|
344
363
|
pr_result_contract_note="normalized-legacy-blocked-action"
|
|
@@ -358,8 +377,15 @@ normalize_pr_result_contract() {
|
|
|
358
377
|
}
|
|
359
378
|
|
|
360
379
|
mark_reconciled() {
|
|
380
|
+
local reconciled_at tmp_file
|
|
361
381
|
if [[ -d "$run_dir" ]]; then
|
|
362
|
-
|
|
382
|
+
reconciled_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
|
|
383
|
+
tmp_file="${run_dir}/reconciled.ok.tmp.$$"
|
|
384
|
+
{
|
|
385
|
+
printf 'STARTED_AT=%s\n' "${run_started_at}"
|
|
386
|
+
printf 'RECONCILED_AT=%s\n' "${reconciled_at}"
|
|
387
|
+
} >"${tmp_file}"
|
|
388
|
+
mv "${tmp_file}" "${run_dir}/reconciled.ok"
|
|
363
389
|
fi
|
|
364
390
|
}
|
|
365
391
|
|
|
@@ -393,6 +419,20 @@ blocked_result_indicates_local_bind_failure() {
|
|
|
393
419
|
return 1
|
|
394
420
|
}
|
|
395
421
|
|
|
422
|
+
classify_pr_blocked_runtime_reason() {
|
|
423
|
+
if [[ "${result_detail:-}" == "worker-tool-exec-empty-command" ]]; then
|
|
424
|
+
printf 'worker-tool-exec-empty-command\n'
|
|
425
|
+
return 0
|
|
426
|
+
fi
|
|
427
|
+
|
|
428
|
+
if [[ -f "$session_log_file" ]] && grep -Fq '[tools] exec failed: Provide a command to start.' "$session_log_file"; then
|
|
429
|
+
printf 'worker-tool-exec-empty-command\n'
|
|
430
|
+
return 0
|
|
431
|
+
fi
|
|
432
|
+
|
|
433
|
+
return 1
|
|
434
|
+
}
|
|
435
|
+
|
|
396
436
|
extract_preapproved_host_recovery_commands() {
|
|
397
437
|
[[ -f "$prompt_file" ]] || return 0
|
|
398
438
|
sed -n 's/^.*loopback retry command: `\(.*\)`$/\1/p' "$prompt_file"
|
|
@@ -700,18 +740,53 @@ merge_state_prepared() {
|
|
|
700
740
|
git -C "$pr_worktree" rev-parse -q --verify MERGE_HEAD >/dev/null 2>&1
|
|
701
741
|
}
|
|
702
742
|
|
|
743
|
+
current_github_login() {
|
|
744
|
+
flow_export_github_cli_auth_env "${repo_slug}"
|
|
745
|
+
gh api user --jq '.login // ""' 2>/dev/null || true
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
pr_author_login() {
|
|
749
|
+
flow_export_github_cli_auth_env "${repo_slug}"
|
|
750
|
+
gh pr view "${pr_number}" -R "${repo_slug}" --json author --jq '.author.login // ""' 2>/dev/null || true
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
pr_is_self_authored_for_current_actor() {
|
|
754
|
+
local actor_login=""
|
|
755
|
+
local author_login=""
|
|
756
|
+
|
|
757
|
+
actor_login="$(current_github_login)"
|
|
758
|
+
author_login="$(pr_author_login)"
|
|
759
|
+
[[ -n "${actor_login}" && -n "${author_login}" && "${actor_login}" == "${author_login}" ]]
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
pr_remote_head_oid() {
|
|
763
|
+
flow_export_github_cli_auth_env "${repo_slug}"
|
|
764
|
+
gh pr view "${pr_number}" -R "${repo_slug}" --json headRefOid --jq '.headRefOid // ""' 2>/dev/null || true
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
pr_remote_already_has_final_head() {
|
|
768
|
+
local final_head="${FINAL_HEAD:-}"
|
|
769
|
+
local remote_head=""
|
|
770
|
+
|
|
771
|
+
[[ -n "${final_head}" ]] || return 1
|
|
772
|
+
remote_head="$(pr_remote_head_oid)"
|
|
773
|
+
[[ -n "${remote_head}" && "${remote_head}" == "${final_head}" ]]
|
|
774
|
+
}
|
|
775
|
+
|
|
703
776
|
approve_and_merge() {
|
|
704
777
|
local approve_output
|
|
705
|
-
if !
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
778
|
+
if ! pr_is_self_authored_for_current_actor; then
|
|
779
|
+
if ! approve_output="$(
|
|
780
|
+
flow_github_api_repo "${repo_slug}" "pulls/${pr_number}/reviews" \
|
|
781
|
+
--method POST \
|
|
782
|
+
-f event=APPROVE \
|
|
783
|
+
-f body="Automated final review passed. Safe low-risk scope, green checks, and host-side merge approved." \
|
|
784
|
+
2>&1
|
|
785
|
+
)"; then
|
|
786
|
+
if ! grep -q "Can not approve your own pull request" <<<"$approve_output"; then
|
|
787
|
+
printf '%s\n' "$approve_output" >&2
|
|
788
|
+
return 1
|
|
789
|
+
fi
|
|
715
790
|
fi
|
|
716
791
|
fi
|
|
717
792
|
|
|
@@ -753,7 +828,14 @@ handle_linked_issue_merge_cleanup() {
|
|
|
753
828
|
|
|
754
829
|
handle_updated_branch_result() {
|
|
755
830
|
if [[ -z "$pr_worktree" || ! -d "$pr_worktree" ]]; then
|
|
756
|
-
if
|
|
831
|
+
if pr_remote_already_has_final_head; then
|
|
832
|
+
post_pr_comment_if_present
|
|
833
|
+
require_transition "pr_clear_retry" pr_clear_retry
|
|
834
|
+
require_transition "pr_after_updated_branch" pr_after_updated_branch "$pr_number"
|
|
835
|
+
cleanup_pr_session
|
|
836
|
+
result_action="${result_action:-host-push-pr-branch}"
|
|
837
|
+
notify_pr_reconciled
|
|
838
|
+
elif pr_comment_already_posted; then
|
|
757
839
|
require_transition "pr_clear_retry" pr_clear_retry
|
|
758
840
|
require_transition "pr_after_updated_branch" pr_after_updated_branch "$pr_number"
|
|
759
841
|
cleanup_pr_session
|
|
@@ -968,7 +1050,16 @@ elif [[ "$status" == "SUCCEEDED" && "$result_outcome" == "no-change-needed" ]];
|
|
|
968
1050
|
fi
|
|
969
1051
|
fi
|
|
970
1052
|
elif [[ "$status" == "SUCCEEDED" && "$result_outcome" == "blocked" ]]; then
|
|
971
|
-
|
|
1053
|
+
blocked_runtime_reason="$(classify_pr_blocked_runtime_reason || true)"
|
|
1054
|
+
if [[ -n "${blocked_runtime_reason:-}" ]]; then
|
|
1055
|
+
status="FAILED"
|
|
1056
|
+
failure_reason="${blocked_runtime_reason}"
|
|
1057
|
+
require_transition "pr_schedule_retry" pr_schedule_retry "$failure_reason"
|
|
1058
|
+
require_transition "pr_after_failed" pr_after_failed "$pr_number"
|
|
1059
|
+
cleanup_pr_session
|
|
1060
|
+
result_action="queued-pr-retry"
|
|
1061
|
+
notify_pr_reconciled
|
|
1062
|
+
elif attempt_blocked_pr_host_verification_recovery; then
|
|
972
1063
|
handle_updated_branch_result
|
|
973
1064
|
else
|
|
974
1065
|
post_pr_comment_if_present
|
|
@@ -61,6 +61,24 @@ resolve_claude_bin() {
|
|
|
61
61
|
return 0
|
|
62
62
|
fi
|
|
63
63
|
|
|
64
|
+
# Well-known install locations for Claude Code CLI.
|
|
65
|
+
# Detached supervisors and LaunchAgents run with a minimal PATH that
|
|
66
|
+
# does not include user-local directories, so command -v alone is not
|
|
67
|
+
# enough. Try the common locations explicitly.
|
|
68
|
+
local -a fallback_paths=(
|
|
69
|
+
"${HOME}/.local/bin/claude"
|
|
70
|
+
"${HOME}/.claude/local/bin/claude"
|
|
71
|
+
"/usr/local/bin/claude"
|
|
72
|
+
"/opt/homebrew/bin/claude"
|
|
73
|
+
)
|
|
74
|
+
local p
|
|
75
|
+
for p in "${fallback_paths[@]}"; do
|
|
76
|
+
if [[ -x "${p}" ]]; then
|
|
77
|
+
printf '%s\n' "${p}"
|
|
78
|
+
return 0
|
|
79
|
+
fi
|
|
80
|
+
done
|
|
81
|
+
|
|
64
82
|
return 1
|
|
65
83
|
}
|
|
66
84
|
|
|
@@ -348,7 +366,7 @@ fi
|
|
|
348
366
|
|
|
349
367
|
reconcile_snippet=""
|
|
350
368
|
if [[ -n "$reconcile_command" ]]; then
|
|
351
|
-
printf -v delayed_reconcile_q '%q' "sleep 2; $reconcile_command"
|
|
369
|
+
printf -v delayed_reconcile_q '%q' "export ACP_EXPECTED_RUN_STARTED_AT=${started_at_q}; export F_LOSNING_EXPECTED_RUN_STARTED_AT=${started_at_q}; while tmux has-session -t ${session_q} 2>/dev/null; do sleep 1; done; sleep 2; $reconcile_command"
|
|
352
370
|
reconcile_snippet="nohup bash -lc ${delayed_reconcile_q} >> ${output_q} 2>&1 </dev/null &"
|
|
353
371
|
fi
|
|
354
372
|
|
|
@@ -18,6 +18,7 @@ Options:
|
|
|
18
18
|
--max-resume-attempts <count> Maximum resume attempts after interruption
|
|
19
19
|
--auth-refresh-timeout-seconds <secs> How long to wait for refreshed auth before failing
|
|
20
20
|
--auth-refresh-poll-seconds <secs> Poll interval while waiting for refreshed auth
|
|
21
|
+
--stall-seconds <secs> Fail if Codex stops producing output for too long
|
|
21
22
|
--help Show this help
|
|
22
23
|
EOF
|
|
23
24
|
}
|
|
@@ -35,6 +36,8 @@ max_resume_attempts="${ACP_CODEX_MAX_RESUME_ATTEMPTS:-${F_LOSNING_CODEX_MAX_RESU
|
|
|
35
36
|
auth_refresh_timeout_seconds="${ACP_CODEX_AUTH_REFRESH_TIMEOUT_SECONDS:-${F_LOSNING_CODEX_AUTH_REFRESH_TIMEOUT_SECONDS:-900}}"
|
|
36
37
|
auth_refresh_poll_seconds="${ACP_CODEX_AUTH_REFRESH_POLL_SECONDS:-${F_LOSNING_CODEX_AUTH_REFRESH_POLL_SECONDS:-10}}"
|
|
37
38
|
max_quota_autoswitch_attempts="${ACP_CODEX_MAX_AUTOSWITCH_ATTEMPTS:-${F_LOSNING_CODEX_MAX_AUTOSWITCH_ATTEMPTS:-1}}"
|
|
39
|
+
codex_progress_heartbeat_seconds="${ACP_CODEX_PROGRESS_HEARTBEAT_SECONDS:-${F_LOSNING_CODEX_PROGRESS_HEARTBEAT_SECONDS:-30}}"
|
|
40
|
+
codex_stall_seconds="${ACP_CODEX_STALL_SECONDS:-${F_LOSNING_CODEX_STALL_SECONDS:-300}}"
|
|
38
41
|
|
|
39
42
|
while [[ $# -gt 0 ]]; do
|
|
40
43
|
case "$1" in
|
|
@@ -50,6 +53,7 @@ while [[ $# -gt 0 ]]; do
|
|
|
50
53
|
--max-resume-attempts) max_resume_attempts="${2:-}"; shift 2 ;;
|
|
51
54
|
--auth-refresh-timeout-seconds) auth_refresh_timeout_seconds="${2:-}"; shift 2 ;;
|
|
52
55
|
--auth-refresh-poll-seconds) auth_refresh_poll_seconds="${2:-}"; shift 2 ;;
|
|
56
|
+
--stall-seconds) codex_stall_seconds="${2:-}"; shift 2 ;;
|
|
53
57
|
--help|-h) usage; exit 0 ;;
|
|
54
58
|
*) echo "Unknown argument: $1" >&2; usage >&2; exit 1 ;;
|
|
55
59
|
esac
|
|
@@ -80,6 +84,13 @@ esac
|
|
|
80
84
|
case "$max_quota_autoswitch_attempts" in
|
|
81
85
|
''|*[!0-9]*) echo "ACP_CODEX_MAX_AUTOSWITCH_ATTEMPTS must be numeric" >&2; exit 1 ;;
|
|
82
86
|
esac
|
|
87
|
+
case "$codex_progress_heartbeat_seconds" in
|
|
88
|
+
''|*[!0-9]*) echo "ACP_CODEX_PROGRESS_HEARTBEAT_SECONDS must be numeric" >&2; exit 1 ;;
|
|
89
|
+
0) echo "ACP_CODEX_PROGRESS_HEARTBEAT_SECONDS must be greater than zero" >&2; exit 1 ;;
|
|
90
|
+
esac
|
|
91
|
+
case "$codex_stall_seconds" in
|
|
92
|
+
''|*[!0-9]*) echo "ACP_CODEX_STALL_SECONDS must be numeric" >&2; exit 1 ;;
|
|
93
|
+
esac
|
|
83
94
|
|
|
84
95
|
FLOW_SKILL_DIR="$(resolve_flow_skill_dir "${BASH_SOURCE[0]}")"
|
|
85
96
|
state_file="${host_run_dir}/runner.env"
|
|
@@ -116,6 +127,7 @@ last_quota_switch_status=""
|
|
|
116
127
|
last_quota_next_retry_at=""
|
|
117
128
|
last_quota_selected_label=""
|
|
118
129
|
quota_autoswitch_attempt_count=0
|
|
130
|
+
last_attempt_started_epoch=0
|
|
119
131
|
|
|
120
132
|
mkdir -p "$host_run_dir"
|
|
121
133
|
touch "$output_file"
|
|
@@ -148,6 +160,19 @@ write_state() {
|
|
|
148
160
|
mv "$tmp_file" "$state_file"
|
|
149
161
|
}
|
|
150
162
|
|
|
163
|
+
run_codex_command() {
|
|
164
|
+
# Nested workers must not inherit a parent thread id; the wrapper persists the child thread explicitly.
|
|
165
|
+
env -u CODEX_THREAD_ID "$codex_bin" "$@"
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
codex_recovery_target() {
|
|
169
|
+
if [[ -n "$thread_id" ]]; then
|
|
170
|
+
printf 'thread %s' "$thread_id"
|
|
171
|
+
return 0
|
|
172
|
+
fi
|
|
173
|
+
printf 'initial Codex exec'
|
|
174
|
+
}
|
|
175
|
+
|
|
151
176
|
run_with_timeout() {
|
|
152
177
|
local timeout_seconds="${1:?timeout seconds required}"
|
|
153
178
|
shift
|
|
@@ -398,13 +423,43 @@ persist_thread_id_from_line() {
|
|
|
398
423
|
fi
|
|
399
424
|
}
|
|
400
425
|
|
|
426
|
+
terminate_codex_producer_tree() {
|
|
427
|
+
local pid="${1:?pid required}"
|
|
428
|
+
local deadline=""
|
|
429
|
+
|
|
430
|
+
if ! kill -0 "$pid" 2>/dev/null; then
|
|
431
|
+
return 0
|
|
432
|
+
fi
|
|
433
|
+
|
|
434
|
+
pkill -TERM -P "$pid" 2>/dev/null || true
|
|
435
|
+
kill "$pid" 2>/dev/null || true
|
|
436
|
+
|
|
437
|
+
deadline=$(( $(date +%s) + 2 ))
|
|
438
|
+
while kill -0 "$pid" 2>/dev/null; do
|
|
439
|
+
if (( $(date +%s) >= deadline )); then
|
|
440
|
+
break
|
|
441
|
+
fi
|
|
442
|
+
sleep 0.1
|
|
443
|
+
done
|
|
444
|
+
|
|
445
|
+
if kill -0 "$pid" 2>/dev/null; then
|
|
446
|
+
pkill -KILL -P "$pid" 2>/dev/null || true
|
|
447
|
+
kill -9 "$pid" 2>/dev/null || true
|
|
448
|
+
fi
|
|
449
|
+
}
|
|
450
|
+
|
|
401
451
|
stream_codex_exec() {
|
|
402
452
|
local phase="${1:?phase required}"
|
|
403
453
|
local stream_fifo=""
|
|
404
454
|
local producer_pid=""
|
|
455
|
+
local heartbeat_pid=""
|
|
456
|
+
local progress_file=""
|
|
405
457
|
local line=""
|
|
406
458
|
|
|
407
459
|
last_attempt_start_size="$(stat -f %z "$output_file" 2>/dev/null || printf '0')"
|
|
460
|
+
last_attempt_started_epoch="$(date +%s)"
|
|
461
|
+
progress_file="${host_run_dir}/.codex-progress.$$"
|
|
462
|
+
rm -f "$progress_file"
|
|
408
463
|
stream_fifo="$(mktemp -u "${TMPDIR:-/tmp}/codex-stream.XXXXXX")"
|
|
409
464
|
mkfifo "$stream_fifo"
|
|
410
465
|
|
|
@@ -413,10 +468,10 @@ stream_codex_exec() {
|
|
|
413
468
|
(
|
|
414
469
|
case "$mode" in
|
|
415
470
|
safe)
|
|
416
|
-
|
|
471
|
+
run_codex_command exec --json --profile "$safe_profile" --full-auto <"$prompt_file"
|
|
417
472
|
;;
|
|
418
473
|
bypass)
|
|
419
|
-
|
|
474
|
+
run_codex_command exec --json --profile "$bypass_profile" --dangerously-bypass-approvals-and-sandbox <"$prompt_file"
|
|
420
475
|
;;
|
|
421
476
|
esac
|
|
422
477
|
) >"$stream_fifo" 2>&1 &
|
|
@@ -425,10 +480,10 @@ stream_codex_exec() {
|
|
|
425
480
|
(
|
|
426
481
|
case "$mode" in
|
|
427
482
|
safe)
|
|
428
|
-
resume_prompt |
|
|
483
|
+
resume_prompt | run_codex_command exec resume --json --full-auto "$thread_id" -
|
|
429
484
|
;;
|
|
430
485
|
bypass)
|
|
431
|
-
resume_prompt |
|
|
486
|
+
resume_prompt | run_codex_command exec resume --json --dangerously-bypass-approvals-and-sandbox "$thread_id" -
|
|
432
487
|
;;
|
|
433
488
|
esac
|
|
434
489
|
) >"$stream_fifo" 2>&1 &
|
|
@@ -441,12 +496,55 @@ stream_codex_exec() {
|
|
|
441
496
|
esac
|
|
442
497
|
|
|
443
498
|
producer_pid="$!"
|
|
499
|
+
(
|
|
500
|
+
local now elapsed last_progress_epoch idle_for
|
|
501
|
+
while kill -0 "$producer_pid" 2>/dev/null; do
|
|
502
|
+
sleep "$codex_progress_heartbeat_seconds"
|
|
503
|
+
if ! kill -0 "$producer_pid" 2>/dev/null; then
|
|
504
|
+
break
|
|
505
|
+
fi
|
|
506
|
+
now="$(date +%s)"
|
|
507
|
+
elapsed=$((now - last_attempt_started_epoch))
|
|
508
|
+
if (( codex_stall_seconds > 0 )); then
|
|
509
|
+
if [[ ! -f "$progress_file" ]]; then
|
|
510
|
+
if (( elapsed >= codex_stall_seconds )); then
|
|
511
|
+
write_state "running" ""
|
|
512
|
+
log_runner "stale-run no-codex-output-before-stall-threshold elapsed=${elapsed}s"
|
|
513
|
+
terminate_codex_producer_tree "$producer_pid"
|
|
514
|
+
break
|
|
515
|
+
fi
|
|
516
|
+
else
|
|
517
|
+
last_progress_epoch="$(stat -f %m "$progress_file" 2>/dev/null || printf '0')"
|
|
518
|
+
if [[ -n "$last_progress_epoch" && "$last_progress_epoch" != "0" ]]; then
|
|
519
|
+
idle_for=$((now - last_progress_epoch))
|
|
520
|
+
if (( idle_for >= codex_stall_seconds )); then
|
|
521
|
+
write_state "running" ""
|
|
522
|
+
log_runner "stale-run no-codex-progress-before-stall-threshold elapsed=${elapsed}s idle=${idle_for}s"
|
|
523
|
+
terminate_codex_producer_tree "$producer_pid"
|
|
524
|
+
break
|
|
525
|
+
fi
|
|
526
|
+
fi
|
|
527
|
+
fi
|
|
528
|
+
fi
|
|
529
|
+
write_state "running" ""
|
|
530
|
+
log_runner "heartbeat waiting-for-codex-output elapsed=${elapsed}s"
|
|
531
|
+
done
|
|
532
|
+
) &
|
|
533
|
+
heartbeat_pid="$!"
|
|
534
|
+
|
|
444
535
|
while IFS= read -r line || [[ -n "$line" ]]; do
|
|
445
536
|
printf '%s\n' "$line" | tee -a "$output_file"
|
|
537
|
+
touch "$progress_file" 2>/dev/null || true
|
|
446
538
|
persist_thread_id_from_line "$line"
|
|
447
539
|
done <"$stream_fifo"
|
|
448
540
|
|
|
541
|
+
if [[ -n "$heartbeat_pid" ]] && kill -0 "$heartbeat_pid" 2>/dev/null; then
|
|
542
|
+
kill "$heartbeat_pid" 2>/dev/null || true
|
|
543
|
+
wait "$heartbeat_pid" 2>/dev/null || true
|
|
544
|
+
fi
|
|
545
|
+
|
|
449
546
|
rm -f "$stream_fifo"
|
|
547
|
+
rm -f "$progress_file"
|
|
450
548
|
|
|
451
549
|
if wait "$producer_pid"; then
|
|
452
550
|
last_exit_code="0"
|
|
@@ -485,6 +583,16 @@ classify_failure_reason() {
|
|
|
485
583
|
|
|
486
584
|
recent_chunk="$(tail -n 120 <<<"$chunk")"
|
|
487
585
|
|
|
586
|
+
if grep -Eiq 'stale-run no-codex-output-before-stall-threshold|no-codex-output-before-stall-threshold' <<<"$recent_chunk"; then
|
|
587
|
+
printf 'no-codex-output-before-stall-threshold\n'
|
|
588
|
+
return 0
|
|
589
|
+
fi
|
|
590
|
+
|
|
591
|
+
if grep -Eiq 'stale-run no-codex-progress-before-stall-threshold|no-codex-progress-before-stall-threshold' <<<"$recent_chunk"; then
|
|
592
|
+
printf 'no-codex-progress-before-stall-threshold\n'
|
|
593
|
+
return 0
|
|
594
|
+
fi
|
|
595
|
+
|
|
488
596
|
if grep -Eiq "You've hit your usage limit|You have reached your Codex usage limits|visit https://chatgpt.com/codex/settings/usage|Upgrade to Pro|rate limit exceeded|quota exceeded|usage cap (reached|exceeded)|usage quota (reached|exceeded)" <<<"$recent_chunk"; then
|
|
489
597
|
printf 'usage-limit\n'
|
|
490
598
|
return 0
|
|
@@ -523,7 +631,7 @@ EOF
|
|
|
523
631
|
}
|
|
524
632
|
|
|
525
633
|
codex_login_healthy() {
|
|
526
|
-
|
|
634
|
+
run_codex_command login status >/dev/null 2>&1
|
|
527
635
|
}
|
|
528
636
|
|
|
529
637
|
wait_for_auth_refresh() {
|
|
@@ -533,7 +641,9 @@ wait_for_auth_refresh() {
|
|
|
533
641
|
local baseline_switch_signature="${4:-}"
|
|
534
642
|
local deadline now current_fingerprint current_quota_label current_switch_signature
|
|
535
643
|
local sleep_seconds
|
|
644
|
+
local recovery_target
|
|
536
645
|
|
|
646
|
+
recovery_target="$(codex_recovery_target)"
|
|
537
647
|
auth_wait_started_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
|
|
538
648
|
last_trigger_reason="$trigger_reason"
|
|
539
649
|
write_state "waiting-auth-refresh" "$trigger_reason"
|
|
@@ -548,28 +658,28 @@ wait_for_auth_refresh() {
|
|
|
548
658
|
current_switch_signature="$(quota_switch_signature)"
|
|
549
659
|
if codex_login_healthy; then
|
|
550
660
|
if [[ "$current_fingerprint" != "$baseline_fingerprint" ]]; then
|
|
551
|
-
log_runner "detected refreshed Codex auth after quota interruption; resuming
|
|
661
|
+
log_runner "detected refreshed Codex auth after quota interruption; resuming ${recovery_target}"
|
|
552
662
|
auth_wait_started_at=""
|
|
553
663
|
write_state "running" ""
|
|
554
664
|
return 0
|
|
555
665
|
fi
|
|
556
666
|
|
|
557
667
|
if [[ -n "$baseline_quota_label" && -n "$current_quota_label" && "$current_quota_label" != "$baseline_quota_label" ]]; then
|
|
558
|
-
log_runner "detected rotated Codex quota account (${baseline_quota_label} -> ${current_quota_label}); resuming
|
|
668
|
+
log_runner "detected rotated Codex quota account (${baseline_quota_label} -> ${current_quota_label}); resuming ${recovery_target}"
|
|
559
669
|
auth_wait_started_at=""
|
|
560
670
|
write_state "running" ""
|
|
561
671
|
return 0
|
|
562
672
|
fi
|
|
563
673
|
|
|
564
674
|
if [[ -n "$baseline_switch_signature" && -n "$current_switch_signature" && "$current_switch_signature" != "$baseline_switch_signature" ]]; then
|
|
565
|
-
log_runner "detected quota switch state refresh; resuming
|
|
675
|
+
log_runner "detected quota switch state refresh; resuming ${recovery_target}"
|
|
566
676
|
auth_wait_started_at=""
|
|
567
677
|
write_state "running" ""
|
|
568
678
|
return 0
|
|
569
679
|
fi
|
|
570
680
|
|
|
571
681
|
if [[ "$last_quota_switch_status" == "switched" && -n "$current_quota_label" ]]; then
|
|
572
|
-
log_runner "quota manager reports healthy Codex account ${current_quota_label}; resuming
|
|
682
|
+
log_runner "quota manager reports healthy Codex account ${current_quota_label}; resuming ${recovery_target}"
|
|
573
683
|
auth_wait_started_at=""
|
|
574
684
|
write_state "running" ""
|
|
575
685
|
return 0
|
|
@@ -580,9 +690,9 @@ wait_for_auth_refresh() {
|
|
|
580
690
|
*)
|
|
581
691
|
if codex_login_healthy; then
|
|
582
692
|
if [[ "$current_fingerprint" != "$baseline_fingerprint" ]]; then
|
|
583
|
-
log_runner "detected refreshed Codex auth; resuming
|
|
693
|
+
log_runner "detected refreshed Codex auth; resuming ${recovery_target}"
|
|
584
694
|
else
|
|
585
|
-
log_runner "Codex auth is healthy again; resuming
|
|
695
|
+
log_runner "Codex auth is healthy again; resuming ${recovery_target}"
|
|
586
696
|
fi
|
|
587
697
|
auth_wait_started_at=""
|
|
588
698
|
write_state "running" ""
|
|
@@ -643,11 +753,6 @@ attempt_run() {
|
|
|
643
753
|
reason="$(classify_failure_reason "$(new_output_since "$last_attempt_start_size")")"
|
|
644
754
|
last_failure_reason="${reason:-worker-exit-failed}"
|
|
645
755
|
|
|
646
|
-
if [[ -z "$thread_id" ]]; then
|
|
647
|
-
write_state "failed" "$last_failure_reason"
|
|
648
|
-
return 1
|
|
649
|
-
fi
|
|
650
|
-
|
|
651
756
|
case "$last_failure_reason" in
|
|
652
757
|
usage-limit|auth-failure|auth-401|account-banned)
|
|
653
758
|
if (( resume_count >= max_resume_attempts )); then
|
|
@@ -24,6 +24,7 @@ EOF
|
|
|
24
24
|
mode=""
|
|
25
25
|
session=""
|
|
26
26
|
worktree=""
|
|
27
|
+
worktree_realpath=""
|
|
27
28
|
prompt_file=""
|
|
28
29
|
runs_root=""
|
|
29
30
|
adapter_id=""
|
|
@@ -105,6 +106,12 @@ if [[ -z "$mode" || -z "$session" || -z "$worktree" || -z "$prompt_file" || -z "
|
|
|
105
106
|
exit 1
|
|
106
107
|
fi
|
|
107
108
|
|
|
109
|
+
worktree_realpath="$(cd "$worktree" 2>/dev/null && pwd -P || true)"
|
|
110
|
+
if [[ -z "$worktree_realpath" || ! -d "$worktree_realpath" ]]; then
|
|
111
|
+
echo "unable to resolve worktree realpath: $worktree" >&2
|
|
112
|
+
exit 1
|
|
113
|
+
fi
|
|
114
|
+
|
|
108
115
|
case "$mode" in
|
|
109
116
|
safe|bypass) ;;
|
|
110
117
|
*)
|
|
@@ -119,7 +126,7 @@ inner_script="${artifact_dir}/${session}.sh"
|
|
|
119
126
|
meta_file="${artifact_dir}/run.env"
|
|
120
127
|
result_file="${artifact_dir}/result.env"
|
|
121
128
|
runner_state_file="${artifact_dir}/runner.env"
|
|
122
|
-
sandbox_run_dir="${
|
|
129
|
+
sandbox_run_dir="${worktree_realpath%/}/${sandbox_subdir}/${session}"
|
|
123
130
|
started_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
|
|
124
131
|
codex_bin="$(resolve_codex_bin || true)"
|
|
125
132
|
runner_bin="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/agent-project-run-codex-resilient"
|
|
@@ -137,13 +144,14 @@ if [[ -z "$codex_bin" || ! -x "$codex_bin" ]]; then
|
|
|
137
144
|
exit 1
|
|
138
145
|
fi
|
|
139
146
|
|
|
140
|
-
branch_name="$(git -C "$
|
|
147
|
+
branch_name="$(git -C "$worktree_realpath" branch --show-current 2>/dev/null || true)"
|
|
141
148
|
|
|
142
149
|
printf -v session_q '%q' "$session"
|
|
143
150
|
printf -v task_kind_q '%q' "$task_kind"
|
|
144
151
|
printf -v task_id_q '%q' "$task_id"
|
|
145
152
|
printf -v mode_q '%q' "$mode"
|
|
146
153
|
printf -v worktree_q '%q' "$worktree"
|
|
154
|
+
printf -v worktree_realpath_q '%q' "$worktree_realpath"
|
|
147
155
|
printf -v prompt_q '%q' "$prompt_file"
|
|
148
156
|
printf -v output_q '%q' "$output_file"
|
|
149
157
|
printf -v artifact_dir_q '%q' "$artifact_dir"
|
|
@@ -166,6 +174,7 @@ printf -v bypass_profile_q '%q' "$bypass_profile"
|
|
|
166
174
|
printf 'SESSION=%s\n' "$session_q"
|
|
167
175
|
printf 'MODE=%s\n' "$mode_q"
|
|
168
176
|
printf 'WORKTREE=%s\n' "$worktree_q"
|
|
177
|
+
printf 'WORKTREE_REALPATH=%s\n' "$worktree_realpath_q"
|
|
169
178
|
printf 'PROMPT_FILE=%s\n' "$prompt_q"
|
|
170
179
|
printf 'OUTPUT_FILE=%s\n' "$output_q"
|
|
171
180
|
printf 'SCRIPT=%s\n' "$script_q"
|
|
@@ -256,7 +265,7 @@ fi
|
|
|
256
265
|
|
|
257
266
|
reconcile_snippet=""
|
|
258
267
|
if [[ -n "$reconcile_command" ]]; then
|
|
259
|
-
printf -v delayed_reconcile_q '%q' "sleep 2; $reconcile_command"
|
|
268
|
+
printf -v delayed_reconcile_q '%q' "export ACP_EXPECTED_RUN_STARTED_AT=${started_at_q}; export F_LOSNING_EXPECTED_RUN_STARTED_AT=${started_at_q}; while tmux has-session -t ${session_q} 2>/dev/null; do sleep 1; done; sleep 2; $reconcile_command"
|
|
260
269
|
reconcile_snippet="nohup bash -lc ${delayed_reconcile_q} >> ${output_q} 2>&1 </dev/null &"
|
|
261
270
|
fi
|
|
262
271
|
|
|
@@ -264,19 +273,59 @@ cat >"$inner_script" <<EOF
|
|
|
264
273
|
#!/usr/bin/env bash
|
|
265
274
|
set -euo pipefail
|
|
266
275
|
${runtime_exports}
|
|
267
|
-
${context_exports}cd ${
|
|
276
|
+
${context_exports}cd ${worktree_realpath_q}
|
|
268
277
|
reset_sandbox_run_dir() {
|
|
269
278
|
mkdir -p ${sandbox_run_dir_q}
|
|
270
279
|
find ${sandbox_run_dir_q} -mindepth 1 -maxdepth 1 -exec rm -rf {} + 2>/dev/null || true
|
|
271
280
|
}
|
|
281
|
+
find_logged_artifact_path() {
|
|
282
|
+
local artifact_name="\${1:?artifact name required}"
|
|
283
|
+
local candidate=""
|
|
284
|
+
|
|
285
|
+
while IFS= read -r candidate; do
|
|
286
|
+
[[ -n "\${candidate}" ]] || continue
|
|
287
|
+
while [[ "\${candidate}" == *')' || "\${candidate}" == *']' || "\${candidate}" == *',' || "\${candidate}" == *'"' || "\${candidate}" == *"'" ]]; do
|
|
288
|
+
candidate="\${candidate%?}"
|
|
289
|
+
done
|
|
290
|
+
if [[ "\$(basename "\${candidate}")" == "\${artifact_name}" && -f "\${candidate}" ]]; then
|
|
291
|
+
printf '%s\n' "\${candidate}"
|
|
292
|
+
fi
|
|
293
|
+
done < <(grep -oE '/(Users|Volumes|tmp)/[^[:space:])"]+' ${output_q} 2>/dev/null || true)
|
|
294
|
+
}
|
|
295
|
+
recover_logged_artifact() {
|
|
296
|
+
local artifact_name="\${1:?artifact name required}"
|
|
297
|
+
local destination="\${2:?destination required}"
|
|
298
|
+
local source_path=""
|
|
299
|
+
|
|
300
|
+
source_path="\$(find_logged_artifact_path "\${artifact_name}" | tail -n 1)"
|
|
301
|
+
[[ -n "\${source_path}" ]] || return 0
|
|
302
|
+
mkdir -p "\$(dirname "\${destination}")"
|
|
303
|
+
if [[ "\${source_path}" != "\${destination}" ]]; then
|
|
304
|
+
cp "\${source_path}" "\${destination}"
|
|
305
|
+
fi
|
|
306
|
+
}
|
|
307
|
+
recover_collected_artifact() {
|
|
308
|
+
local artifact_name="\${1:?artifact name required}"
|
|
309
|
+
local destination="\${2:?destination required}"
|
|
310
|
+
|
|
311
|
+
if [[ -f ${sandbox_run_dir_q}/"\${artifact_name}" ]]; then
|
|
312
|
+
if [[ ${sandbox_run_dir_q}/"\${artifact_name}" != "\${destination}" ]]; then
|
|
313
|
+
cp ${sandbox_run_dir_q}/"\${artifact_name}" "\${destination}"
|
|
314
|
+
fi
|
|
315
|
+
return 0
|
|
316
|
+
fi
|
|
317
|
+
|
|
318
|
+
recover_logged_artifact "\${artifact_name}" "\${destination}"
|
|
319
|
+
}
|
|
272
320
|
record_final_git_state() {
|
|
273
321
|
local final_head final_branch tmp_file
|
|
274
322
|
|
|
275
|
-
final_head="\$(git -C ${
|
|
276
|
-
final_branch="\$(git -C ${
|
|
323
|
+
final_head="\$(git -C ${worktree_realpath_q} rev-parse HEAD 2>/dev/null || true)"
|
|
324
|
+
final_branch="\$(git -C ${worktree_realpath_q} branch --show-current 2>/dev/null || true)"
|
|
277
325
|
tmp_file=${meta_file_q}.tmp.final.$$
|
|
278
|
-
grep -vE '^(FINAL_HEAD|FINAL_BRANCH)=' ${meta_file_q} >"\${tmp_file}" 2>/dev/null || true
|
|
326
|
+
grep -vE '^(FINAL_HEAD|FINAL_BRANCH|WORKTREE_REALPATH)=' ${meta_file_q} >"\${tmp_file}" 2>/dev/null || true
|
|
279
327
|
{
|
|
328
|
+
printf 'WORKTREE_REALPATH=%s\n' ${worktree_realpath_q}
|
|
280
329
|
printf 'FINAL_HEAD=%q\n' "\${final_head}"
|
|
281
330
|
printf 'FINAL_BRANCH=%q\n' "\${final_branch}"
|
|
282
331
|
} >>"\${tmp_file}"
|
|
@@ -286,7 +335,7 @@ reset_sandbox_run_dir
|
|
|
286
335
|
set +e
|
|
287
336
|
bash ${runner_bin_q} \\
|
|
288
337
|
--mode ${mode_q} \\
|
|
289
|
-
--worktree ${
|
|
338
|
+
--worktree ${worktree_realpath_q} \\
|
|
290
339
|
--prompt-file ${prompt_q} \\
|
|
291
340
|
--output-file ${output_q} \\
|
|
292
341
|
--host-run-dir ${artifact_dir_q} \\
|
|
@@ -296,9 +345,10 @@ bash ${runner_bin_q} \\
|
|
|
296
345
|
--codex-bin ${codex_bin_q}
|
|
297
346
|
status=\$?
|
|
298
347
|
record_final_git_state
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
348
|
+
recover_collected_artifact result.env ${result_q}
|
|
349
|
+
recover_collected_artifact issue-comment.md ${artifact_dir_q}/issue-comment.md
|
|
350
|
+
recover_collected_artifact pr-comment.md ${artifact_dir_q}/pr-comment.md
|
|
351
|
+
recover_collected_artifact verification.jsonl ${artifact_dir_q}/verification.jsonl
|
|
302
352
|
${collect_copy_snippet}${reconcile_snippet}
|
|
303
353
|
printf '\n__CODEX_EXIT__:%s\n' "\$status" | tee -a ${output_q}
|
|
304
354
|
exit "\$status"
|