agent-control-plane 0.1.13 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +250 -355
- package/SKILL.md +1 -1
- package/hooks/heartbeat-hooks.sh +16 -9
- package/npm/bin/agent-control-plane.js +117 -8
- package/package.json +3 -1
- package/references/commands.md +2 -2
- package/references/control-plane-map.md +1 -1
- package/tools/bin/agent-project-reconcile-issue-session +23 -0
- package/tools/bin/agent-project-reconcile-pr-session +191 -22
- package/tools/bin/agent-project-run-codex-resilient +57 -2
- package/tools/bin/agent-project-run-openclaw-session +46 -0
- package/tools/bin/agent-project-worker-status +37 -0
- package/tools/bin/flow-config-lib.sh +7 -0
- package/tools/bin/flow-shell-lib.sh +2 -0
- package/tools/bin/heartbeat-safe-auto.sh +20 -10
- package/tools/bin/project-runtimectl.sh +1 -1
- package/tools/bin/provider-cooldown-state.sh +39 -1
- package/tools/bin/start-issue-worker.sh +35 -0
- package/tools/bin/start-pr-fix-worker.sh +3 -0
- package/tools/bin/start-pr-review-worker.sh +3 -0
- package/tools/bin/start-resident-issue-loop.sh +1 -0
- package/tools/dashboard/app.js +136 -0
- package/tools/dashboard/dashboard_snapshot.py +253 -3
- package/tools/dashboard/index.html +5 -1
- package/tools/dashboard/styles.css +97 -20
- package/tools/templates/pr-fix-template.md +6 -6
- package/tools/templates/pr-merge-repair-template.md +6 -6
- package/tools/vendor/codex-quota-manager/scripts/auto-switch.sh +8 -6
- package/tools/bin/render-dashboard-snapshot.py +0 -16
- package/tools/templates/legacy/issue-prompt-template-pre-slim.md +0 -109
|
@@ -227,23 +227,30 @@ schedule_provider_quota_cooldown() {
|
|
|
227
227
|
local reason="${1:-provider-quota-limit}"
|
|
228
228
|
[[ "${failure_reason:-}" == "provider-quota-limit" ]] || return 0
|
|
229
229
|
[[ -x "${provider_cooldown_script}" ]] || return 0
|
|
230
|
+
[[ "${CODING_WORKER:-}" == "codex" ]] && return 0
|
|
230
231
|
|
|
231
232
|
"${provider_cooldown_script}" schedule "${reason}" >/dev/null || true
|
|
232
233
|
}
|
|
233
234
|
|
|
234
235
|
clear_provider_quota_cooldown() {
|
|
235
236
|
[[ -x "${provider_cooldown_script}" ]] || return 0
|
|
237
|
+
[[ "${CODING_WORKER:-}" == "codex" ]] && return 0
|
|
236
238
|
|
|
237
239
|
"${provider_cooldown_script}" clear >/dev/null || true
|
|
238
240
|
}
|
|
239
241
|
|
|
240
242
|
blocked_runtime_reason=""
|
|
243
|
+
host_github_rate_limited="no"
|
|
244
|
+
host_github_rate_limit_detail=""
|
|
241
245
|
|
|
242
246
|
owner="${repo_slug%%/*}"
|
|
243
247
|
repo="${repo_slug#*/}"
|
|
244
248
|
pr_view_json="$(flow_github_pr_view_json "$repo_slug" "$pr_number")"
|
|
245
249
|
pr_state="$(jq -r '.state' <<<"$pr_view_json")"
|
|
246
|
-
pr_base_ref="$(jq -r '.baseRefName //
|
|
250
|
+
pr_base_ref="$(jq -r '.baseRefName // empty' <<<"$pr_view_json")"
|
|
251
|
+
if [[ -z "${pr_base_ref}" ]]; then
|
|
252
|
+
pr_base_ref="main"
|
|
253
|
+
fi
|
|
247
254
|
|
|
248
255
|
if [[ "$status" == "RUNNING" && "$pr_state" != "MERGED" && "$pr_state" != "CLOSED" ]]; then
|
|
249
256
|
printf 'STATUS=%s\n' "$status"
|
|
@@ -395,7 +402,9 @@ post_pr_comment_if_present() {
|
|
|
395
402
|
if pr_comment_already_posted; then
|
|
396
403
|
return 0
|
|
397
404
|
fi
|
|
398
|
-
|
|
405
|
+
if ! host_github_post_issue_comment "${pr_number}" "$(cat "$comment_file")"; then
|
|
406
|
+
return 1
|
|
407
|
+
fi
|
|
399
408
|
}
|
|
400
409
|
|
|
401
410
|
pr_comment_already_posted() {
|
|
@@ -408,6 +417,120 @@ pr_comment_already_posted() {
|
|
|
408
417
|
jq -e --arg body "$comment_body" 'any(.comments[]?; .body == $body)' >/dev/null <<<"$comments_json"
|
|
409
418
|
}
|
|
410
419
|
|
|
420
|
+
host_github_output_indicates_rate_limit() {
|
|
421
|
+
grep -Eiq 'API rate limit exceeded|secondary rate limit|rate limit exceeded|HTTP 403' <<<"${1:-}"
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
record_host_github_rate_limit() {
|
|
425
|
+
local output="${1:-}"
|
|
426
|
+
local detail_file="${run_dir}/host-github-rate-limit.log"
|
|
427
|
+
host_github_rate_limited="yes"
|
|
428
|
+
host_github_rate_limit_detail="${output}"
|
|
429
|
+
printf '%s\n' "${output}" >"${detail_file}"
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
host_github_post_issue_comment() {
|
|
433
|
+
local issue_number="${1:?issue number required}"
|
|
434
|
+
local body="${2:-}"
|
|
435
|
+
local output=""
|
|
436
|
+
|
|
437
|
+
flow_export_github_cli_auth_env "${repo_slug}"
|
|
438
|
+
if output="$(
|
|
439
|
+
gh api "repos/${repo_slug}/issues/${issue_number}/comments" \
|
|
440
|
+
--method POST \
|
|
441
|
+
-f body="${body}" 2>&1
|
|
442
|
+
)"; then
|
|
443
|
+
return 0
|
|
444
|
+
fi
|
|
445
|
+
|
|
446
|
+
if host_github_output_indicates_rate_limit "${output}"; then
|
|
447
|
+
record_host_github_rate_limit "${output}"
|
|
448
|
+
return 1
|
|
449
|
+
fi
|
|
450
|
+
|
|
451
|
+
printf '%s\n' "${output}" >&2
|
|
452
|
+
return 1
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
host_github_submit_pr_approval() {
|
|
456
|
+
local output=""
|
|
457
|
+
|
|
458
|
+
flow_export_github_cli_auth_env "${repo_slug}"
|
|
459
|
+
if output="$(
|
|
460
|
+
gh api "repos/${repo_slug}/pulls/${pr_number}/reviews" \
|
|
461
|
+
--method POST \
|
|
462
|
+
-f event=APPROVE \
|
|
463
|
+
-f body="Automated final review passed. Safe low-risk scope, green checks, and host-side merge approved." \
|
|
464
|
+
2>&1
|
|
465
|
+
)"; then
|
|
466
|
+
return 0
|
|
467
|
+
fi
|
|
468
|
+
|
|
469
|
+
if grep -q "Can not approve your own pull request" <<<"${output}"; then
|
|
470
|
+
return 0
|
|
471
|
+
fi
|
|
472
|
+
|
|
473
|
+
if host_github_output_indicates_rate_limit "${output}"; then
|
|
474
|
+
record_host_github_rate_limit "${output}"
|
|
475
|
+
return 1
|
|
476
|
+
fi
|
|
477
|
+
|
|
478
|
+
printf '%s\n' "${output}" >&2
|
|
479
|
+
return 1
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
append_host_rate_limit_comment() {
|
|
483
|
+
local detail="${1:-GitHub API rate limit blocked host actions.}"
|
|
484
|
+
local reset_line=""
|
|
485
|
+
|
|
486
|
+
if grep -Eiq 'resets at ' <<<"${detail}"; then
|
|
487
|
+
reset_line="$(grep -Eio 'resets at [^.]+' <<<"${detail}" | head -n 1 || true)"
|
|
488
|
+
fi
|
|
489
|
+
|
|
490
|
+
{
|
|
491
|
+
if [[ -s "${pr_comment_file}" ]]; then
|
|
492
|
+
printf '\n\n'
|
|
493
|
+
fi
|
|
494
|
+
printf '## Host action blocked\n\n'
|
|
495
|
+
printf 'GitHub API rate limit blocked ACP from posting the PR review outcome or merge action.\n'
|
|
496
|
+
if [[ -n "${reset_line}" ]]; then
|
|
497
|
+
printf '\n- %s\n' "${reset_line}"
|
|
498
|
+
fi
|
|
499
|
+
printf -- '- ACP kept the local review artifacts and scheduled an automatic retry for the host action.\n'
|
|
500
|
+
} >>"${pr_comment_file}"
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
handle_host_github_rate_limit_retry() {
|
|
504
|
+
local reason="${1:-github-api-rate-limit}"
|
|
505
|
+
local result_action_override="${2:-host-rate-limit-retry}"
|
|
506
|
+
|
|
507
|
+
append_host_rate_limit_comment "${host_github_rate_limit_detail:-}"
|
|
508
|
+
require_transition "pr_schedule_retry" pr_schedule_retry "${reason}"
|
|
509
|
+
require_transition "pr_after_blocked" pr_after_blocked "${pr_number}"
|
|
510
|
+
cleanup_pr_session
|
|
511
|
+
result_outcome="blocked"
|
|
512
|
+
result_action="${result_action_override}"
|
|
513
|
+
failure_reason="${reason}"
|
|
514
|
+
notify_pr_reconciled
|
|
515
|
+
mark_reconciled
|
|
516
|
+
printf 'STATUS=FAILED\n'
|
|
517
|
+
printf 'PR_NUMBER=%s\n' "${pr_number}"
|
|
518
|
+
printf 'PR_STATE=%s\n' "${pr_state}"
|
|
519
|
+
printf 'OUTCOME=%s\n' "${result_outcome}"
|
|
520
|
+
printf 'ACTION=%s\n' "${result_action}"
|
|
521
|
+
printf 'FAILURE_REASON=%s\n' "${failure_reason}"
|
|
522
|
+
exit 0
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
maybe_handle_host_github_rate_limit() {
|
|
526
|
+
local reason="${1:-github-api-rate-limit}"
|
|
527
|
+
local result_action_override="${2:-host-rate-limit-retry}"
|
|
528
|
+
if [[ "${host_github_rate_limited}" == "yes" ]]; then
|
|
529
|
+
handle_host_github_rate_limit_retry "${reason}" "${result_action_override}"
|
|
530
|
+
fi
|
|
531
|
+
return 1
|
|
532
|
+
}
|
|
533
|
+
|
|
411
534
|
blocked_result_indicates_local_bind_failure() {
|
|
412
535
|
local candidate_file
|
|
413
536
|
for candidate_file in "$pr_comment_file" "$session_log_file"; do
|
|
@@ -430,6 +553,31 @@ classify_pr_blocked_runtime_reason() {
|
|
|
430
553
|
return 0
|
|
431
554
|
fi
|
|
432
555
|
|
|
556
|
+
if [[ -f "$session_log_file" ]] && grep -Eiq 'no-codex-output-before-stall-threshold|no-codex-progress-before-stall-threshold' "$session_log_file" 2>/dev/null; then
|
|
557
|
+
printf 'codex-stalled\n'
|
|
558
|
+
return 0
|
|
559
|
+
fi
|
|
560
|
+
|
|
561
|
+
if [[ -f "$session_log_file" ]] && grep -Eiq 'no-agent-output-before-stall-threshold|no-agent-progress-before-stall-threshold' "$session_log_file" 2>/dev/null; then
|
|
562
|
+
printf 'agent-stalled\n'
|
|
563
|
+
return 0
|
|
564
|
+
fi
|
|
565
|
+
|
|
566
|
+
if [[ -f "$session_log_file" ]] && grep -Eiq 'provider-quota-limit|quota.*exhausted|rate.limit.*exceeded' "$session_log_file" 2>/dev/null; then
|
|
567
|
+
printf 'provider-quota-limit\n'
|
|
568
|
+
return 0
|
|
569
|
+
fi
|
|
570
|
+
|
|
571
|
+
if [[ -f "$pr_comment_file" ]] && grep -Eiq 'no-codex-output-before-stall-threshold|no-codex-progress-before-stall-threshold' "$pr_comment_file" 2>/dev/null; then
|
|
572
|
+
printf 'codex-stalled\n'
|
|
573
|
+
return 0
|
|
574
|
+
fi
|
|
575
|
+
|
|
576
|
+
if [[ -f "$pr_comment_file" ]] && grep -Eiq 'no-agent-output-before-stall-threshold|no-agent-progress-before-stall-threshold' "$pr_comment_file" 2>/dev/null; then
|
|
577
|
+
printf 'agent-stalled\n'
|
|
578
|
+
return 0
|
|
579
|
+
fi
|
|
580
|
+
|
|
433
581
|
return 1
|
|
434
582
|
}
|
|
435
583
|
|
|
@@ -774,23 +922,38 @@ pr_remote_already_has_final_head() {
|
|
|
774
922
|
}
|
|
775
923
|
|
|
776
924
|
approve_and_merge() {
|
|
777
|
-
local approve_output
|
|
778
925
|
if ! pr_is_self_authored_for_current_actor; then
|
|
779
|
-
if !
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
-f event=APPROVE \
|
|
783
|
-
-f body="Automated final review passed. Safe low-risk scope, green checks, and host-side merge approved." \
|
|
784
|
-
2>&1
|
|
785
|
-
)"; then
|
|
786
|
-
if ! grep -q "Can not approve your own pull request" <<<"$approve_output"; then
|
|
787
|
-
printf '%s\n' "$approve_output" >&2
|
|
788
|
-
return 1
|
|
926
|
+
if ! host_github_submit_pr_approval; then
|
|
927
|
+
if [[ "${host_github_rate_limited}" == "yes" ]]; then
|
|
928
|
+
return 2
|
|
789
929
|
fi
|
|
930
|
+
return 1
|
|
790
931
|
fi
|
|
791
932
|
fi
|
|
792
933
|
|
|
793
|
-
|
|
934
|
+
flow_export_github_cli_auth_env "${repo_slug}"
|
|
935
|
+
if ! gh pr merge "${pr_number}" -R "${repo_slug}" --squash --delete-branch --admin >"${run_dir}/host-github-merge.out" 2>"${run_dir}/host-github-merge.err"; then
|
|
936
|
+
local merge_output=""
|
|
937
|
+
merge_output="$(cat "${run_dir}/host-github-merge.err" 2>/dev/null || true)"
|
|
938
|
+
if host_github_output_indicates_rate_limit "${merge_output}"; then
|
|
939
|
+
record_host_github_rate_limit "${merge_output}"
|
|
940
|
+
return 2
|
|
941
|
+
fi
|
|
942
|
+
if flow_github_pr_merge "$repo_slug" "$pr_number" "squash" "yes" 2>"${run_dir}/host-github-merge.err"; then
|
|
943
|
+
return 0
|
|
944
|
+
fi
|
|
945
|
+
merge_output="$(cat "${run_dir}/host-github-merge.err" 2>/dev/null || true)"
|
|
946
|
+
if host_github_output_indicates_rate_limit "${merge_output}"; then
|
|
947
|
+
record_host_github_rate_limit "${merge_output}"
|
|
948
|
+
return 2
|
|
949
|
+
fi
|
|
950
|
+
if [[ -n "${merge_output}" ]]; then
|
|
951
|
+
printf '%s\n' "${merge_output}" >&2
|
|
952
|
+
fi
|
|
953
|
+
return 1
|
|
954
|
+
fi
|
|
955
|
+
|
|
956
|
+
return 0
|
|
794
957
|
}
|
|
795
958
|
|
|
796
959
|
cleanup_pr_session() {
|
|
@@ -829,7 +992,7 @@ handle_linked_issue_merge_cleanup() {
|
|
|
829
992
|
handle_updated_branch_result() {
|
|
830
993
|
if [[ -z "$pr_worktree" || ! -d "$pr_worktree" ]]; then
|
|
831
994
|
if pr_remote_already_has_final_head; then
|
|
832
|
-
post_pr_comment_if_present
|
|
995
|
+
post_pr_comment_if_present || maybe_handle_host_github_rate_limit "github-api-rate-limit" "host-comment-rate-limit-retry"
|
|
833
996
|
require_transition "pr_clear_retry" pr_clear_retry
|
|
834
997
|
require_transition "pr_after_updated_branch" pr_after_updated_branch "$pr_number"
|
|
835
998
|
cleanup_pr_session
|
|
@@ -895,7 +1058,7 @@ handle_updated_branch_result() {
|
|
|
895
1058
|
fi
|
|
896
1059
|
|
|
897
1060
|
push_pr_branch
|
|
898
|
-
post_pr_comment_if_present
|
|
1061
|
+
post_pr_comment_if_present || maybe_handle_host_github_rate_limit "github-api-rate-limit" "host-comment-rate-limit-retry"
|
|
899
1062
|
require_transition "pr_clear_retry" pr_clear_retry
|
|
900
1063
|
require_transition "pr_after_updated_branch" pr_after_updated_branch "$pr_number"
|
|
901
1064
|
cleanup_pr_session
|
|
@@ -936,7 +1099,7 @@ elif [[ "$pr_state" == "CLOSED" ]]; then
|
|
|
936
1099
|
result_action="${result_action:-cleaned-closed-pr}"
|
|
937
1100
|
notify_pr_reconciled
|
|
938
1101
|
elif [[ "$status" == "SUCCEEDED" && "$result_outcome" == "approved-local-review-passed" ]]; then
|
|
939
|
-
|
|
1102
|
+
if ! review_pass_action="$(review_pass_action_from_result_action "${result_action:-}" 2>/dev/null)"; then
|
|
940
1103
|
review_pass_action="$(pr_review_pass_action "$pr_number")"
|
|
941
1104
|
fi
|
|
942
1105
|
case "$review_pass_action" in
|
|
@@ -963,8 +1126,13 @@ elif [[ "$status" == "SUCCEEDED" && "$result_outcome" == "approved-local-review-
|
|
|
963
1126
|
fi
|
|
964
1127
|
|
|
965
1128
|
require_transition "pr_clear_retry" pr_clear_retry
|
|
966
|
-
approve_and_merge
|
|
967
|
-
|
|
1129
|
+
if ! approve_and_merge; then
|
|
1130
|
+
if [[ "${host_github_rate_limited}" == "yes" ]]; then
|
|
1131
|
+
handle_host_github_rate_limit_retry "github-api-rate-limit" "host-merge-rate-limit-retry"
|
|
1132
|
+
fi
|
|
1133
|
+
exit 1
|
|
1134
|
+
fi
|
|
1135
|
+
pr_state="MERGED"
|
|
968
1136
|
if [[ "$pr_state" != "MERGED" ]]; then
|
|
969
1137
|
echo "PR ${pr_number} did not merge successfully" >&2
|
|
970
1138
|
exit 1
|
|
@@ -1018,7 +1186,7 @@ elif [[ "$status" == "SUCCEEDED" && "$result_outcome" == "no-change-needed" ]];
|
|
|
1018
1186
|
result_action="host-rejected-noop-promotion"
|
|
1019
1187
|
else
|
|
1020
1188
|
push_pr_branch
|
|
1021
|
-
post_pr_comment_if_present
|
|
1189
|
+
post_pr_comment_if_present || maybe_handle_host_github_rate_limit "github-api-rate-limit" "host-comment-rate-limit-retry"
|
|
1022
1190
|
require_transition "pr_clear_retry" pr_clear_retry
|
|
1023
1191
|
require_transition "pr_after_updated_branch" pr_after_updated_branch "$pr_number"
|
|
1024
1192
|
cleanup_pr_session
|
|
@@ -1041,7 +1209,7 @@ elif [[ "$status" == "SUCCEEDED" && "$result_outcome" == "no-change-needed" ]];
|
|
|
1041
1209
|
result_action="host-rejected-no-change-needed"
|
|
1042
1210
|
notify_pr_reconciled
|
|
1043
1211
|
else
|
|
1044
|
-
post_pr_comment_if_present
|
|
1212
|
+
post_pr_comment_if_present || maybe_handle_host_github_rate_limit "github-api-rate-limit" "host-comment-rate-limit-retry"
|
|
1045
1213
|
require_transition "pr_clear_retry" pr_clear_retry
|
|
1046
1214
|
require_transition "pr_after_succeeded" pr_after_succeeded "$pr_number"
|
|
1047
1215
|
cleanup_pr_session
|
|
@@ -1062,7 +1230,7 @@ elif [[ "$status" == "SUCCEEDED" && "$result_outcome" == "blocked" ]]; then
|
|
|
1062
1230
|
elif attempt_blocked_pr_host_verification_recovery; then
|
|
1063
1231
|
handle_updated_branch_result
|
|
1064
1232
|
else
|
|
1065
|
-
post_pr_comment_if_present
|
|
1233
|
+
post_pr_comment_if_present || maybe_handle_host_github_rate_limit "github-api-rate-limit" "host-comment-rate-limit-retry"
|
|
1066
1234
|
require_transition "pr_clear_retry" pr_clear_retry
|
|
1067
1235
|
require_transition "pr_after_blocked" pr_after_blocked "$pr_number"
|
|
1068
1236
|
cleanup_pr_session
|
|
@@ -1079,6 +1247,7 @@ elif [[ "$status" == "FAILED" ]]; then
|
|
|
1079
1247
|
schedule_provider_quota_cooldown "${failure_reason:-worker-exit-failed}"
|
|
1080
1248
|
require_transition "pr_schedule_retry" pr_schedule_retry "${failure_reason:-worker-exit-failed}"
|
|
1081
1249
|
require_transition "pr_after_failed" pr_after_failed "$pr_number"
|
|
1250
|
+
cleanup_pr_session
|
|
1082
1251
|
notify_pr_reconciled
|
|
1083
1252
|
fi
|
|
1084
1253
|
|
|
@@ -618,6 +618,22 @@ classify_failure_reason() {
|
|
|
618
618
|
fi
|
|
619
619
|
}
|
|
620
620
|
|
|
621
|
+
failure_chunk_indicates_startup_stall() {
|
|
622
|
+
local chunk="${1:-}"
|
|
623
|
+
local recent_chunk
|
|
624
|
+
|
|
625
|
+
recent_chunk="$(tail -n 120 <<<"$chunk")"
|
|
626
|
+
grep -q '"type":"thread.started"' <<<"$recent_chunk" || return 1
|
|
627
|
+
grep -q '"type":"turn.started"' <<<"$recent_chunk" || return 1
|
|
628
|
+
if grep -Eq '"type":"item\.(started|completed)"' <<<"$recent_chunk"; then
|
|
629
|
+
return 1
|
|
630
|
+
fi
|
|
631
|
+
if grep -q '"type":"turn.completed"' <<<"$recent_chunk"; then
|
|
632
|
+
return 1
|
|
633
|
+
fi
|
|
634
|
+
return 0
|
|
635
|
+
}
|
|
636
|
+
|
|
621
637
|
resume_prompt() {
|
|
622
638
|
cat <<EOF
|
|
623
639
|
The previous Codex exec turn in this same thread was interrupted because the host refreshed Codex authentication after a quota or auth failure.
|
|
@@ -729,7 +745,7 @@ run_resume_exec() {
|
|
|
729
745
|
}
|
|
730
746
|
|
|
731
747
|
attempt_run() {
|
|
732
|
-
local reason auth_before_switch quota_label_before_switch quota_switch_signature_before_switch quota_switch_result shell_flags_before_quota_switch
|
|
748
|
+
local reason auth_before_switch quota_label_before_switch quota_switch_signature_before_switch quota_switch_result shell_flags_before_quota_switch failure_chunk startup_stall
|
|
733
749
|
|
|
734
750
|
attempt=$((attempt + 1))
|
|
735
751
|
last_quota_switch_status=""
|
|
@@ -750,8 +766,15 @@ attempt_run() {
|
|
|
750
766
|
return 0
|
|
751
767
|
fi
|
|
752
768
|
|
|
753
|
-
|
|
769
|
+
failure_chunk="$(new_output_since "$last_attempt_start_size")"
|
|
770
|
+
reason="$(classify_failure_reason "$failure_chunk")"
|
|
754
771
|
last_failure_reason="${reason:-worker-exit-failed}"
|
|
772
|
+
startup_stall="no"
|
|
773
|
+
if [[ "$last_failure_reason" == "no-codex-output-before-stall-threshold" || "$last_failure_reason" == "no-codex-progress-before-stall-threshold" ]]; then
|
|
774
|
+
if failure_chunk_indicates_startup_stall "$failure_chunk"; then
|
|
775
|
+
startup_stall="yes"
|
|
776
|
+
fi
|
|
777
|
+
fi
|
|
755
778
|
|
|
756
779
|
case "$last_failure_reason" in
|
|
757
780
|
usage-limit|auth-failure|auth-401|account-banned)
|
|
@@ -796,6 +819,38 @@ attempt_run() {
|
|
|
796
819
|
resume_count=$((resume_count + 1))
|
|
797
820
|
return 2
|
|
798
821
|
;;
|
|
822
|
+
no-codex-output-before-stall-threshold|no-codex-progress-before-stall-threshold)
|
|
823
|
+
if [[ "$startup_stall" == "yes" && $quota_autoswitch_attempt_count -lt $max_quota_autoswitch_attempts ]]; then
|
|
824
|
+
auth_before_switch="$(auth_fingerprint)"
|
|
825
|
+
quota_label_before_switch="$last_attempt_start_quota_label"
|
|
826
|
+
quota_switch_signature_before_switch="$(quota_switch_signature)"
|
|
827
|
+
last_auth_fingerprint="$auth_before_switch"
|
|
828
|
+
write_state "switching-account" "$last_failure_reason"
|
|
829
|
+
log_runner "startup-stall detected before first Codex tool activity; attempting Codex account rotation"
|
|
830
|
+
shell_flags_before_quota_switch="$-"
|
|
831
|
+
set +e
|
|
832
|
+
run_quota_autoswitch
|
|
833
|
+
quota_switch_result=$?
|
|
834
|
+
case "$shell_flags_before_quota_switch" in
|
|
835
|
+
*e*) set -e ;;
|
|
836
|
+
*) set +e ;;
|
|
837
|
+
esac
|
|
838
|
+
if [[ "$quota_switch_result" == "0" ]]; then
|
|
839
|
+
thread_id=""
|
|
840
|
+
auth_wait_started_at=""
|
|
841
|
+
write_state "running" ""
|
|
842
|
+
return 2
|
|
843
|
+
fi
|
|
844
|
+
if [[ "$quota_switch_result" == "10" ]]; then
|
|
845
|
+
log_runner "startup-stall rotation deferred until ${last_quota_next_retry_at:-unknown}"
|
|
846
|
+
last_failure_reason="quota-switch-deferred"
|
|
847
|
+
write_state "failed" "$last_failure_reason"
|
|
848
|
+
return 1
|
|
849
|
+
fi
|
|
850
|
+
fi
|
|
851
|
+
write_state "failed" "$last_failure_reason"
|
|
852
|
+
return 1
|
|
853
|
+
;;
|
|
799
854
|
*)
|
|
800
855
|
write_state "failed" "$last_failure_reason"
|
|
801
856
|
return 1
|
|
@@ -115,6 +115,7 @@ result_file="${artifact_dir}/result.env"
|
|
|
115
115
|
runner_state_file="${artifact_dir}/runner.env"
|
|
116
116
|
sandbox_artifact_dir="${worktree%/}/${sandbox_subdir}"
|
|
117
117
|
sandbox_run_dir="${worktree%/}/${sandbox_subdir}/${session}"
|
|
118
|
+
retained_repo_root="${ACP_RETAINED_REPO_ROOT:-${F_LOSNING_RETAINED_REPO_ROOT:-}}"
|
|
118
119
|
started_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
|
|
119
120
|
openclaw_bin="$(command -v openclaw)"
|
|
120
121
|
default_openclaw_agent_id="$(
|
|
@@ -154,6 +155,7 @@ printf -v runner_state_q '%q' "$runner_state_file"
|
|
|
154
155
|
printf -v branch_q '%q' "$branch_name"
|
|
155
156
|
printf -v sandbox_artifact_dir_q '%q' "$sandbox_artifact_dir"
|
|
156
157
|
printf -v sandbox_run_dir_q '%q' "$sandbox_run_dir"
|
|
158
|
+
printf -v retained_repo_root_q '%q' "$retained_repo_root"
|
|
157
159
|
printf -v adapter_id_q '%q' "$adapter_id"
|
|
158
160
|
printf -v started_at_q '%q' "$started_at"
|
|
159
161
|
printf -v openclaw_bin_q '%q' "$openclaw_bin"
|
|
@@ -230,18 +232,21 @@ export AGENT_PROJECT_RUN_DIR=${sandbox_run_dir_q}
|
|
|
230
232
|
export AGENT_PROJECT_HOST_RUN_DIR=${artifact_dir_q}
|
|
231
233
|
export AGENT_PROJECT_RESULT_FILE=${sandbox_run_dir_q}/result.env
|
|
232
234
|
export AGENT_PROJECT_OPENCLAW_BIN=${openclaw_bin_q}
|
|
235
|
+
export AGENT_PROJECT_RETAINED_REPO_ROOT=${retained_repo_root_q}
|
|
233
236
|
export ACP_SESSION=${session_q}
|
|
234
237
|
export ACP_RUN_DIR=${sandbox_run_dir_q}
|
|
235
238
|
export ACP_HOST_RUN_DIR=${artifact_dir_q}
|
|
236
239
|
export ACP_RESULT_FILE=${sandbox_run_dir_q}/result.env
|
|
237
240
|
export ACP_OPENCLAW_BIN=${openclaw_bin_q}
|
|
238
241
|
export ACP_OPENCLAW_SESSION_ID=${openclaw_session_id_q}
|
|
242
|
+
export ACP_RETAINED_REPO_ROOT=${retained_repo_root_q}
|
|
239
243
|
export F_LOSNING_SESSION=${session_q}
|
|
240
244
|
export F_LOSNING_RUN_DIR=${sandbox_run_dir_q}
|
|
241
245
|
export F_LOSNING_HOST_RUN_DIR=${artifact_dir_q}
|
|
242
246
|
export F_LOSNING_RESULT_FILE=${sandbox_run_dir_q}/result.env
|
|
243
247
|
export F_LOSNING_OPENCLAW_BIN=${openclaw_bin_q}
|
|
244
248
|
export F_LOSNING_OPENCLAW_SESSION_ID=${openclaw_session_id_q}
|
|
249
|
+
export F_LOSNING_RETAINED_REPO_ROOT=${retained_repo_root_q}
|
|
245
250
|
export OPENCLAW_STATE_DIR=${openclaw_state_dir_q}
|
|
246
251
|
export OPENCLAW_CONFIG_PATH=${openclaw_config_path_q}
|
|
247
252
|
EOF
|
|
@@ -294,6 +299,7 @@ runner_state_file=${runner_state_q}
|
|
|
294
299
|
output_file=${output_q}
|
|
295
300
|
sandbox_artifact_dir=${sandbox_artifact_dir_q}
|
|
296
301
|
sandbox_run_dir=${sandbox_run_dir_q}
|
|
302
|
+
retained_repo_root=${retained_repo_root_q}
|
|
297
303
|
artifact_dir=${artifact_dir_q}
|
|
298
304
|
run_dir=${artifact_dir_q}
|
|
299
305
|
task_kind=${task_kind_q}
|
|
@@ -554,6 +560,45 @@ recover_literal_runtime_artifacts() {
|
|
|
554
560
|
return 0
|
|
555
561
|
}
|
|
556
562
|
|
|
563
|
+
recover_retained_repo_artifact_leaks() {
|
|
564
|
+
local retained_worktree_root=""
|
|
565
|
+
local leaked_run_dir=""
|
|
566
|
+
local worktree_name=""
|
|
567
|
+
local session_name=""
|
|
568
|
+
local artifact_name=""
|
|
569
|
+
local recovered="no"
|
|
570
|
+
|
|
571
|
+
[[ -n "\${retained_repo_root}" ]] || return 0
|
|
572
|
+
worktree_name="\$(basename "\${worktree}")"
|
|
573
|
+
session_name="\${AGENT_PROJECT_SESSION:-}"
|
|
574
|
+
[[ -n "\${session_name}" ]] || return 0
|
|
575
|
+
retained_worktree_root="\${retained_repo_root%/}/worktrees"
|
|
576
|
+
leaked_run_dir="\${retained_worktree_root}/\${worktree_name}/.openclaw-artifacts/\${session_name}"
|
|
577
|
+
|
|
578
|
+
if [[ ! -d "\${leaked_run_dir}" || "\${leaked_run_dir}" == "\${sandbox_run_dir}" ]]; then
|
|
579
|
+
return 0
|
|
580
|
+
fi
|
|
581
|
+
|
|
582
|
+
for artifact_name in result.env verification.jsonl issue-comment.md pr-comment.md; do
|
|
583
|
+
if [[ -f "\${leaked_run_dir}/\${artifact_name}" ]]; then
|
|
584
|
+
cp "\${leaked_run_dir}/\${artifact_name}" "\${sandbox_run_dir}/\${artifact_name}" 2>/dev/null || true
|
|
585
|
+
cp "\${leaked_run_dir}/\${artifact_name}" "\${artifact_dir}/\${artifact_name}" 2>/dev/null || true
|
|
586
|
+
recovered="yes"
|
|
587
|
+
fi
|
|
588
|
+
done
|
|
589
|
+
|
|
590
|
+
rm -rf "\${leaked_run_dir}" 2>/dev/null || true
|
|
591
|
+
rmdir "\${retained_worktree_root}/\${worktree_name}/.openclaw-artifacts" 2>/dev/null || true
|
|
592
|
+
rmdir "\${retained_worktree_root}/\${worktree_name}" 2>/dev/null || true
|
|
593
|
+
rmdir "\${retained_worktree_root}" 2>/dev/null || true
|
|
594
|
+
|
|
595
|
+
if [[ "\${recovered}" == "yes" ]]; then
|
|
596
|
+
printf '[openclaw] recovered retained-repo artifact leak: %s\n' "\${leaked_run_dir}" >>"\${output_file}" 2>/dev/null || true
|
|
597
|
+
fi
|
|
598
|
+
|
|
599
|
+
return 0
|
|
600
|
+
}
|
|
601
|
+
|
|
557
602
|
reset_sandbox_run_dir() {
|
|
558
603
|
mkdir -p "\${sandbox_run_dir}"
|
|
559
604
|
find "\${sandbox_run_dir}" -mindepth 1 -maxdepth 1 -exec rm -rf {} + 2>/dev/null || true
|
|
@@ -1217,6 +1262,7 @@ while true; do
|
|
|
1217
1262
|
break
|
|
1218
1263
|
done
|
|
1219
1264
|
recover_literal_runtime_artifacts
|
|
1265
|
+
recover_retained_repo_artifact_leaks
|
|
1220
1266
|
infer_result_from_output
|
|
1221
1267
|
synthesize_comment_artifact_from_output
|
|
1222
1268
|
if [[ "\${status}" -eq 0 ]]; then
|
|
@@ -44,6 +44,36 @@ runner_state=""
|
|
|
44
44
|
thread_id=""
|
|
45
45
|
last_exit_code=""
|
|
46
46
|
|
|
47
|
+
failure_reason_from_output() {
|
|
48
|
+
[[ -f "$output_file" ]] || return 1
|
|
49
|
+
|
|
50
|
+
if rg -qi "You've hit your usage limit|You have reached your Codex usage limits|visit https://chatgpt.com/codex/settings/usage|Upgrade to Pro|rate limit exceeded|quota exceeded|usage cap (reached|exceeded)|usage quota (reached|exceeded)" "$output_file"; then
|
|
51
|
+
printf 'usage-limit\n'
|
|
52
|
+
return 0
|
|
53
|
+
fi
|
|
54
|
+
|
|
55
|
+
if rg -qi 'stale-run no-codex-output-before-stall-threshold|no-codex-output-before-stall-threshold' "$output_file"; then
|
|
56
|
+
printf 'no-codex-output-before-stall-threshold\n'
|
|
57
|
+
return 0
|
|
58
|
+
fi
|
|
59
|
+
|
|
60
|
+
if rg -qi 'stale-run no-codex-progress-before-stall-threshold|no-codex-progress-before-stall-threshold' "$output_file"; then
|
|
61
|
+
printf 'no-codex-progress-before-stall-threshold\n'
|
|
62
|
+
return 0
|
|
63
|
+
fi
|
|
64
|
+
|
|
65
|
+
# Recover Codex startup stalls when the wrapper was archived before it could
|
|
66
|
+
# flush a terminal runner.env state. This is intentionally narrow: the log
|
|
67
|
+
# must show a turn started, but no tool activity or turn completion.
|
|
68
|
+
if rg -q '"type":"turn.started"' "$output_file" \
|
|
69
|
+
&& ! rg -q '"type":"item.started"|"type":"item.completed"|"type":"turn.completed"' "$output_file"; then
|
|
70
|
+
printf 'no-codex-progress-before-stall-threshold\n'
|
|
71
|
+
return 0
|
|
72
|
+
fi
|
|
73
|
+
|
|
74
|
+
return 1
|
|
75
|
+
}
|
|
76
|
+
|
|
47
77
|
if tmux has-session -t "$session" 2>/dev/null; then
|
|
48
78
|
status="RUNNING"
|
|
49
79
|
fi
|
|
@@ -87,6 +117,13 @@ if [[ "$status" == "UNKNOWN" && -f "$output_file" ]]; then
|
|
|
87
117
|
fi
|
|
88
118
|
fi
|
|
89
119
|
|
|
120
|
+
if [[ "$status" == "UNKNOWN" && -z "$failure_reason" ]]; then
|
|
121
|
+
failure_reason="$(failure_reason_from_output || true)"
|
|
122
|
+
if [[ -n "$failure_reason" ]]; then
|
|
123
|
+
status="FAILED"
|
|
124
|
+
fi
|
|
125
|
+
fi
|
|
126
|
+
|
|
90
127
|
if [[ "$status" == "UNKNOWN" && -n "$runner_state" ]]; then
|
|
91
128
|
case "$runner_state" in
|
|
92
129
|
running|waiting-auth-refresh|switching-account)
|
|
@@ -2039,6 +2039,7 @@ flow_export_execution_env() {
|
|
|
2039
2039
|
local openclaw_model=""
|
|
2040
2040
|
local openclaw_thinking=""
|
|
2041
2041
|
local openclaw_timeout=""
|
|
2042
|
+
local openclaw_stall=""
|
|
2042
2043
|
|
|
2043
2044
|
repo_id="$(flow_resolve_repo_id "${config_file}")"
|
|
2044
2045
|
provider_quota_cooldowns="$(flow_resolve_provider_quota_cooldowns "${config_file}")"
|
|
@@ -2071,6 +2072,7 @@ flow_export_execution_env() {
|
|
|
2071
2072
|
openclaw_model="$(flow_kv_get "${provider_pool_selection}" "OPENCLAW_MODEL")"
|
|
2072
2073
|
openclaw_thinking="$(flow_kv_get "${provider_pool_selection}" "OPENCLAW_THINKING")"
|
|
2073
2074
|
openclaw_timeout="$(flow_kv_get "${provider_pool_selection}" "OPENCLAW_TIMEOUT_SECONDS")"
|
|
2075
|
+
openclaw_stall="$(flow_kv_get "${provider_pool_selection}" "OPENCLAW_STALL_SECONDS")"
|
|
2074
2076
|
else
|
|
2075
2077
|
if [[ -n "${explicit_coding_worker}" ]]; then
|
|
2076
2078
|
active_provider_selection_reason="env-override"
|
|
@@ -2087,6 +2089,7 @@ flow_export_execution_env() {
|
|
|
2087
2089
|
openclaw_model="$(flow_env_or_config "${config_file}" "ACP_OPENCLAW_MODEL F_LOSNING_OPENCLAW_MODEL" "execution.openclaw.model" "")"
|
|
2088
2090
|
openclaw_thinking="$(flow_env_or_config "${config_file}" "ACP_OPENCLAW_THINKING F_LOSNING_OPENCLAW_THINKING" "execution.openclaw.thinking" "")"
|
|
2089
2091
|
openclaw_timeout="$(flow_env_or_config "${config_file}" "ACP_OPENCLAW_TIMEOUT_SECONDS F_LOSNING_OPENCLAW_TIMEOUT_SECONDS" "execution.openclaw.timeout_seconds" "")"
|
|
2092
|
+
openclaw_stall="$(flow_env_or_config "${config_file}" "ACP_OPENCLAW_STALL_SECONDS F_LOSNING_OPENCLAW_STALL_SECONDS" "execution.openclaw.stall_seconds" "")"
|
|
2090
2093
|
fi
|
|
2091
2094
|
|
|
2092
2095
|
if [[ -n "${coding_worker}" ]]; then
|
|
@@ -2167,6 +2170,10 @@ flow_export_execution_env() {
|
|
|
2167
2170
|
export F_LOSNING_OPENCLAW_TIMEOUT_SECONDS="${openclaw_timeout}"
|
|
2168
2171
|
export ACP_OPENCLAW_TIMEOUT_SECONDS="${openclaw_timeout}"
|
|
2169
2172
|
fi
|
|
2173
|
+
if [[ -n "${openclaw_stall}" ]]; then
|
|
2174
|
+
export F_LOSNING_OPENCLAW_STALL_SECONDS="${openclaw_stall}"
|
|
2175
|
+
export ACP_OPENCLAW_STALL_SECONDS="${openclaw_stall}"
|
|
2176
|
+
fi
|
|
2170
2177
|
|
|
2171
2178
|
flow_export_github_cli_auth_env "$(flow_resolve_repo_slug "${config_file}")"
|
|
2172
2179
|
flow_export_project_env_aliases
|
|
@@ -52,6 +52,7 @@ flow_export_compat_env_aliases() {
|
|
|
52
52
|
flow_export_env_alias_if_unset F_LOSNING_OPENCLAW_MODEL ACP_OPENCLAW_MODEL
|
|
53
53
|
flow_export_env_alias_if_unset F_LOSNING_OPENCLAW_THINKING ACP_OPENCLAW_THINKING
|
|
54
54
|
flow_export_env_alias_if_unset F_LOSNING_OPENCLAW_TIMEOUT_SECONDS ACP_OPENCLAW_TIMEOUT_SECONDS
|
|
55
|
+
flow_export_env_alias_if_unset F_LOSNING_OPENCLAW_STALL_SECONDS ACP_OPENCLAW_STALL_SECONDS
|
|
55
56
|
flow_export_env_alias_if_unset F_LOSNING_ALLOW_INFRA_CI_BYPASS ACP_ALLOW_INFRA_CI_BYPASS
|
|
56
57
|
flow_export_env_alias_if_unset F_LOSNING_LOCAL_FIRST_PR_POLICY ACP_LOCAL_FIRST_PR_POLICY
|
|
57
58
|
flow_export_env_alias_if_unset F_LOSNING_PR_RISK_CACHE_TTL_SECONDS ACP_PR_RISK_CACHE_TTL_SECONDS
|
|
@@ -100,6 +101,7 @@ flow_export_canonical_env_aliases() {
|
|
|
100
101
|
flow_export_env_alias_if_unset ACP_OPENCLAW_MODEL F_LOSNING_OPENCLAW_MODEL
|
|
101
102
|
flow_export_env_alias_if_unset ACP_OPENCLAW_THINKING F_LOSNING_OPENCLAW_THINKING
|
|
102
103
|
flow_export_env_alias_if_unset ACP_OPENCLAW_TIMEOUT_SECONDS F_LOSNING_OPENCLAW_TIMEOUT_SECONDS
|
|
104
|
+
flow_export_env_alias_if_unset ACP_OPENCLAW_STALL_SECONDS F_LOSNING_OPENCLAW_STALL_SECONDS
|
|
103
105
|
flow_export_env_alias_if_unset ACP_ALLOW_INFRA_CI_BYPASS F_LOSNING_ALLOW_INFRA_CI_BYPASS
|
|
104
106
|
flow_export_env_alias_if_unset ACP_LOCAL_FIRST_PR_POLICY F_LOSNING_LOCAL_FIRST_PR_POLICY
|
|
105
107
|
flow_export_env_alias_if_unset ACP_PR_RISK_CACHE_TTL_SECONDS F_LOSNING_PR_RISK_CACHE_TTL_SECONDS
|
|
@@ -345,24 +345,28 @@ PY
|
|
|
345
345
|
local effective_pools=""
|
|
346
346
|
healthy_pools="$(
|
|
347
347
|
jq -r --argjson primaryThresh "${CODEX_QUOTA_THRESHOLD}" --argjson weeklyThresh "${CODEX_QUOTA_WEEKLY_THRESHOLD}" '
|
|
348
|
-
|
|
348
|
+
map(. + {poolKey: (.label // .trackedLabel // .email // .accountId // "")})
|
|
349
349
|
| map(select(
|
|
350
|
-
(
|
|
351
|
-
and ((.
|
|
352
|
-
and ((.
|
|
353
|
-
|
|
350
|
+
(.poolKey != "")
|
|
351
|
+
and ((.usage.rate_limit.limit_reached // false) | not)
|
|
352
|
+
and ((.usage.rate_limit.primary_window.used_percent // 100) < $primaryThresh)
|
|
353
|
+
and ((.usage.rate_limit.secondary_window.used_percent // 100) < $weeklyThresh)
|
|
354
|
+
) | .poolKey)
|
|
355
|
+
| unique
|
|
354
356
|
| length
|
|
355
357
|
' "${CODEX_QUOTA_FULL_CACHE_FILE}" 2>/dev/null || true
|
|
356
358
|
)"
|
|
357
359
|
|
|
358
360
|
rotation_pools="$(
|
|
359
361
|
jq -r --argjson weeklyThresh "${CODEX_QUOTA_WEEKLY_THRESHOLD}" '
|
|
360
|
-
|
|
362
|
+
map(. + {poolKey: (.label // .trackedLabel // .email // .accountId // "")})
|
|
361
363
|
| map(select(
|
|
362
|
-
(
|
|
363
|
-
and ((.
|
|
364
|
-
and ((.
|
|
365
|
-
|
|
364
|
+
(.poolKey != "")
|
|
365
|
+
and ((.usage.rate_limit.limit_reached // false) | not)
|
|
366
|
+
and ((.usage.rate_limit.secondary_window.used_percent // 100) < $weeklyThresh)
|
|
367
|
+
and ((.planType // "") != "free")
|
|
368
|
+
) | .poolKey)
|
|
369
|
+
| unique
|
|
366
370
|
| length
|
|
367
371
|
' "${CODEX_QUOTA_FULL_CACHE_FILE}" 2>/dev/null || true
|
|
368
372
|
)"
|
|
@@ -471,6 +475,12 @@ fi
|
|
|
471
475
|
|
|
472
476
|
run_codex_quota_preflight
|
|
473
477
|
|
|
478
|
+
# Sync skill files to runtime-home if source has changed since last sync.
|
|
479
|
+
# This ensures start-issue-worker.sh and other scripts are always up to date.
|
|
480
|
+
if [[ -x "${FLOW_TOOLS_DIR}/ensure-runtime-sync.sh" ]]; then
|
|
481
|
+
"${FLOW_TOOLS_DIR}/ensure-runtime-sync.sh" --quiet 2>/dev/null || true
|
|
482
|
+
fi
|
|
483
|
+
|
|
474
484
|
acquire_lock
|
|
475
485
|
|
|
476
486
|
reap_orphan_shared_loop_groups
|