agent-control-plane 0.1.13 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -227,23 +227,30 @@ schedule_provider_quota_cooldown() {
227
227
  local reason="${1:-provider-quota-limit}"
228
228
  [[ "${failure_reason:-}" == "provider-quota-limit" ]] || return 0
229
229
  [[ -x "${provider_cooldown_script}" ]] || return 0
230
+ [[ "${CODING_WORKER:-}" == "codex" ]] && return 0
230
231
 
231
232
  "${provider_cooldown_script}" schedule "${reason}" >/dev/null || true
232
233
  }
233
234
 
234
235
  clear_provider_quota_cooldown() {
235
236
  [[ -x "${provider_cooldown_script}" ]] || return 0
237
+ [[ "${CODING_WORKER:-}" == "codex" ]] && return 0
236
238
 
237
239
  "${provider_cooldown_script}" clear >/dev/null || true
238
240
  }
239
241
 
240
242
  blocked_runtime_reason=""
243
+ host_github_rate_limited="no"
244
+ host_github_rate_limit_detail=""
241
245
 
242
246
  owner="${repo_slug%%/*}"
243
247
  repo="${repo_slug#*/}"
244
248
  pr_view_json="$(flow_github_pr_view_json "$repo_slug" "$pr_number")"
245
249
  pr_state="$(jq -r '.state' <<<"$pr_view_json")"
246
- pr_base_ref="$(jq -r '.baseRefName // "main"' <<<"$pr_view_json")"
250
+ pr_base_ref="$(jq -r '.baseRefName // empty' <<<"$pr_view_json")"
251
+ if [[ -z "${pr_base_ref}" ]]; then
252
+ pr_base_ref="main"
253
+ fi
247
254
 
248
255
  if [[ "$status" == "RUNNING" && "$pr_state" != "MERGED" && "$pr_state" != "CLOSED" ]]; then
249
256
  printf 'STATUS=%s\n' "$status"
@@ -395,7 +402,9 @@ post_pr_comment_if_present() {
395
402
  if pr_comment_already_posted; then
396
403
  return 0
397
404
  fi
398
- flow_github_api_repo "${repo_slug}" "issues/${pr_number}/comments" --method POST -f body="$(cat "$comment_file")" >/dev/null
405
+ if ! host_github_post_issue_comment "${pr_number}" "$(cat "$comment_file")"; then
406
+ return 1
407
+ fi
399
408
  }
400
409
 
401
410
  pr_comment_already_posted() {
@@ -408,6 +417,120 @@ pr_comment_already_posted() {
408
417
  jq -e --arg body "$comment_body" 'any(.comments[]?; .body == $body)' >/dev/null <<<"$comments_json"
409
418
  }
410
419
 
420
+ host_github_output_indicates_rate_limit() {
421
+ grep -Eiq 'API rate limit exceeded|secondary rate limit|rate limit exceeded|HTTP 403' <<<"${1:-}"
422
+ }
423
+
424
+ record_host_github_rate_limit() {
425
+ local output="${1:-}"
426
+ local detail_file="${run_dir}/host-github-rate-limit.log"
427
+ host_github_rate_limited="yes"
428
+ host_github_rate_limit_detail="${output}"
429
+ printf '%s\n' "${output}" >"${detail_file}"
430
+ }
431
+
432
+ host_github_post_issue_comment() {
433
+ local issue_number="${1:?issue number required}"
434
+ local body="${2:-}"
435
+ local output=""
436
+
437
+ flow_export_github_cli_auth_env "${repo_slug}"
438
+ if output="$(
439
+ gh api "repos/${repo_slug}/issues/${issue_number}/comments" \
440
+ --method POST \
441
+ -f body="${body}" 2>&1
442
+ )"; then
443
+ return 0
444
+ fi
445
+
446
+ if host_github_output_indicates_rate_limit "${output}"; then
447
+ record_host_github_rate_limit "${output}"
448
+ return 1
449
+ fi
450
+
451
+ printf '%s\n' "${output}" >&2
452
+ return 1
453
+ }
454
+
455
+ host_github_submit_pr_approval() {
456
+ local output=""
457
+
458
+ flow_export_github_cli_auth_env "${repo_slug}"
459
+ if output="$(
460
+ gh api "repos/${repo_slug}/pulls/${pr_number}/reviews" \
461
+ --method POST \
462
+ -f event=APPROVE \
463
+ -f body="Automated final review passed. Safe low-risk scope, green checks, and host-side merge approved." \
464
+ 2>&1
465
+ )"; then
466
+ return 0
467
+ fi
468
+
469
+ if grep -q "Can not approve your own pull request" <<<"${output}"; then
470
+ return 0
471
+ fi
472
+
473
+ if host_github_output_indicates_rate_limit "${output}"; then
474
+ record_host_github_rate_limit "${output}"
475
+ return 1
476
+ fi
477
+
478
+ printf '%s\n' "${output}" >&2
479
+ return 1
480
+ }
481
+
482
+ append_host_rate_limit_comment() {
483
+ local detail="${1:-GitHub API rate limit blocked host actions.}"
484
+ local reset_line=""
485
+
486
+ if grep -Eiq 'resets at ' <<<"${detail}"; then
487
+ reset_line="$(grep -Eio 'resets at [^.]+' <<<"${detail}" | head -n 1 || true)"
488
+ fi
489
+
490
+ {
491
+ if [[ -s "${pr_comment_file}" ]]; then
492
+ printf '\n\n'
493
+ fi
494
+ printf '## Host action blocked\n\n'
495
+ printf 'GitHub API rate limit blocked ACP from posting the PR review outcome or merge action.\n'
496
+ if [[ -n "${reset_line}" ]]; then
497
+ printf '\n- %s\n' "${reset_line}"
498
+ fi
499
+ printf -- '- ACP kept the local review artifacts and scheduled an automatic retry for the host action.\n'
500
+ } >>"${pr_comment_file}"
501
+ }
502
+
503
+ handle_host_github_rate_limit_retry() {
504
+ local reason="${1:-github-api-rate-limit}"
505
+ local result_action_override="${2:-host-rate-limit-retry}"
506
+
507
+ append_host_rate_limit_comment "${host_github_rate_limit_detail:-}"
508
+ require_transition "pr_schedule_retry" pr_schedule_retry "${reason}"
509
+ require_transition "pr_after_blocked" pr_after_blocked "${pr_number}"
510
+ cleanup_pr_session
511
+ result_outcome="blocked"
512
+ result_action="${result_action_override}"
513
+ failure_reason="${reason}"
514
+ notify_pr_reconciled
515
+ mark_reconciled
516
+ printf 'STATUS=FAILED\n'
517
+ printf 'PR_NUMBER=%s\n' "${pr_number}"
518
+ printf 'PR_STATE=%s\n' "${pr_state}"
519
+ printf 'OUTCOME=%s\n' "${result_outcome}"
520
+ printf 'ACTION=%s\n' "${result_action}"
521
+ printf 'FAILURE_REASON=%s\n' "${failure_reason}"
522
+ exit 0
523
+ }
524
+
525
+ maybe_handle_host_github_rate_limit() {
526
+ local reason="${1:-github-api-rate-limit}"
527
+ local result_action_override="${2:-host-rate-limit-retry}"
528
+ if [[ "${host_github_rate_limited}" == "yes" ]]; then
529
+ handle_host_github_rate_limit_retry "${reason}" "${result_action_override}"
530
+ fi
531
+ return 1
532
+ }
533
+
411
534
  blocked_result_indicates_local_bind_failure() {
412
535
  local candidate_file
413
536
  for candidate_file in "$pr_comment_file" "$session_log_file"; do
@@ -430,6 +553,31 @@ classify_pr_blocked_runtime_reason() {
430
553
  return 0
431
554
  fi
432
555
 
556
+ if [[ -f "$session_log_file" ]] && grep -Eiq 'no-codex-output-before-stall-threshold|no-codex-progress-before-stall-threshold' "$session_log_file" 2>/dev/null; then
557
+ printf 'codex-stalled\n'
558
+ return 0
559
+ fi
560
+
561
+ if [[ -f "$session_log_file" ]] && grep -Eiq 'no-agent-output-before-stall-threshold|no-agent-progress-before-stall-threshold' "$session_log_file" 2>/dev/null; then
562
+ printf 'agent-stalled\n'
563
+ return 0
564
+ fi
565
+
566
+ if [[ -f "$session_log_file" ]] && grep -Eiq 'provider-quota-limit|quota.*exhausted|rate.limit.*exceeded' "$session_log_file" 2>/dev/null; then
567
+ printf 'provider-quota-limit\n'
568
+ return 0
569
+ fi
570
+
571
+ if [[ -f "$pr_comment_file" ]] && grep -Eiq 'no-codex-output-before-stall-threshold|no-codex-progress-before-stall-threshold' "$pr_comment_file" 2>/dev/null; then
572
+ printf 'codex-stalled\n'
573
+ return 0
574
+ fi
575
+
576
+ if [[ -f "$pr_comment_file" ]] && grep -Eiq 'no-agent-output-before-stall-threshold|no-agent-progress-before-stall-threshold' "$pr_comment_file" 2>/dev/null; then
577
+ printf 'agent-stalled\n'
578
+ return 0
579
+ fi
580
+
433
581
  return 1
434
582
  }
435
583
 
@@ -774,23 +922,38 @@ pr_remote_already_has_final_head() {
774
922
  }
775
923
 
776
924
  approve_and_merge() {
777
- local approve_output
778
925
  if ! pr_is_self_authored_for_current_actor; then
779
- if ! approve_output="$(
780
- flow_github_api_repo "${repo_slug}" "pulls/${pr_number}/reviews" \
781
- --method POST \
782
- -f event=APPROVE \
783
- -f body="Automated final review passed. Safe low-risk scope, green checks, and host-side merge approved." \
784
- 2>&1
785
- )"; then
786
- if ! grep -q "Can not approve your own pull request" <<<"$approve_output"; then
787
- printf '%s\n' "$approve_output" >&2
788
- return 1
926
+ if ! host_github_submit_pr_approval; then
927
+ if [[ "${host_github_rate_limited}" == "yes" ]]; then
928
+ return 2
789
929
  fi
930
+ return 1
790
931
  fi
791
932
  fi
792
933
 
793
- flow_github_pr_merge "$repo_slug" "$pr_number" "squash" "yes"
934
+ flow_export_github_cli_auth_env "${repo_slug}"
935
+ if ! gh pr merge "${pr_number}" -R "${repo_slug}" --squash --delete-branch --admin >"${run_dir}/host-github-merge.out" 2>"${run_dir}/host-github-merge.err"; then
936
+ local merge_output=""
937
+ merge_output="$(cat "${run_dir}/host-github-merge.err" 2>/dev/null || true)"
938
+ if host_github_output_indicates_rate_limit "${merge_output}"; then
939
+ record_host_github_rate_limit "${merge_output}"
940
+ return 2
941
+ fi
942
+ if flow_github_pr_merge "$repo_slug" "$pr_number" "squash" "yes" 2>"${run_dir}/host-github-merge.err"; then
943
+ return 0
944
+ fi
945
+ merge_output="$(cat "${run_dir}/host-github-merge.err" 2>/dev/null || true)"
946
+ if host_github_output_indicates_rate_limit "${merge_output}"; then
947
+ record_host_github_rate_limit "${merge_output}"
948
+ return 2
949
+ fi
950
+ if [[ -n "${merge_output}" ]]; then
951
+ printf '%s\n' "${merge_output}" >&2
952
+ fi
953
+ return 1
954
+ fi
955
+
956
+ return 0
794
957
  }
795
958
 
796
959
  cleanup_pr_session() {
@@ -829,7 +992,7 @@ handle_linked_issue_merge_cleanup() {
829
992
  handle_updated_branch_result() {
830
993
  if [[ -z "$pr_worktree" || ! -d "$pr_worktree" ]]; then
831
994
  if pr_remote_already_has_final_head; then
832
- post_pr_comment_if_present
995
+ post_pr_comment_if_present || maybe_handle_host_github_rate_limit "github-api-rate-limit" "host-comment-rate-limit-retry"
833
996
  require_transition "pr_clear_retry" pr_clear_retry
834
997
  require_transition "pr_after_updated_branch" pr_after_updated_branch "$pr_number"
835
998
  cleanup_pr_session
@@ -895,7 +1058,7 @@ handle_updated_branch_result() {
895
1058
  fi
896
1059
 
897
1060
  push_pr_branch
898
- post_pr_comment_if_present
1061
+ post_pr_comment_if_present || maybe_handle_host_github_rate_limit "github-api-rate-limit" "host-comment-rate-limit-retry"
899
1062
  require_transition "pr_clear_retry" pr_clear_retry
900
1063
  require_transition "pr_after_updated_branch" pr_after_updated_branch "$pr_number"
901
1064
  cleanup_pr_session
@@ -936,7 +1099,7 @@ elif [[ "$pr_state" == "CLOSED" ]]; then
936
1099
  result_action="${result_action:-cleaned-closed-pr}"
937
1100
  notify_pr_reconciled
938
1101
  elif [[ "$status" == "SUCCEEDED" && "$result_outcome" == "approved-local-review-passed" ]]; then
939
- if ! review_pass_action="$(review_pass_action_from_result_action "${result_action:-}" 2>/dev/null)"; then
1102
+ if ! review_pass_action="$(review_pass_action_from_result_action "${result_action:-}" 2>/dev/null)"; then
940
1103
  review_pass_action="$(pr_review_pass_action "$pr_number")"
941
1104
  fi
942
1105
  case "$review_pass_action" in
@@ -963,8 +1126,13 @@ elif [[ "$status" == "SUCCEEDED" && "$result_outcome" == "approved-local-review-
963
1126
  fi
964
1127
 
965
1128
  require_transition "pr_clear_retry" pr_clear_retry
966
- approve_and_merge
967
- pr_state="$(flow_github_pr_view_json "$repo_slug" "$pr_number" | jq -r '.state')"
1129
+ if ! approve_and_merge; then
1130
+ if [[ "${host_github_rate_limited}" == "yes" ]]; then
1131
+ handle_host_github_rate_limit_retry "github-api-rate-limit" "host-merge-rate-limit-retry"
1132
+ fi
1133
+ exit 1
1134
+ fi
1135
+ pr_state="MERGED"
968
1136
  if [[ "$pr_state" != "MERGED" ]]; then
969
1137
  echo "PR ${pr_number} did not merge successfully" >&2
970
1138
  exit 1
@@ -1018,7 +1186,7 @@ elif [[ "$status" == "SUCCEEDED" && "$result_outcome" == "no-change-needed" ]];
1018
1186
  result_action="host-rejected-noop-promotion"
1019
1187
  else
1020
1188
  push_pr_branch
1021
- post_pr_comment_if_present
1189
+ post_pr_comment_if_present || maybe_handle_host_github_rate_limit "github-api-rate-limit" "host-comment-rate-limit-retry"
1022
1190
  require_transition "pr_clear_retry" pr_clear_retry
1023
1191
  require_transition "pr_after_updated_branch" pr_after_updated_branch "$pr_number"
1024
1192
  cleanup_pr_session
@@ -1041,7 +1209,7 @@ elif [[ "$status" == "SUCCEEDED" && "$result_outcome" == "no-change-needed" ]];
1041
1209
  result_action="host-rejected-no-change-needed"
1042
1210
  notify_pr_reconciled
1043
1211
  else
1044
- post_pr_comment_if_present
1212
+ post_pr_comment_if_present || maybe_handle_host_github_rate_limit "github-api-rate-limit" "host-comment-rate-limit-retry"
1045
1213
  require_transition "pr_clear_retry" pr_clear_retry
1046
1214
  require_transition "pr_after_succeeded" pr_after_succeeded "$pr_number"
1047
1215
  cleanup_pr_session
@@ -1062,7 +1230,7 @@ elif [[ "$status" == "SUCCEEDED" && "$result_outcome" == "blocked" ]]; then
1062
1230
  elif attempt_blocked_pr_host_verification_recovery; then
1063
1231
  handle_updated_branch_result
1064
1232
  else
1065
- post_pr_comment_if_present
1233
+ post_pr_comment_if_present || maybe_handle_host_github_rate_limit "github-api-rate-limit" "host-comment-rate-limit-retry"
1066
1234
  require_transition "pr_clear_retry" pr_clear_retry
1067
1235
  require_transition "pr_after_blocked" pr_after_blocked "$pr_number"
1068
1236
  cleanup_pr_session
@@ -1079,6 +1247,7 @@ elif [[ "$status" == "FAILED" ]]; then
1079
1247
  schedule_provider_quota_cooldown "${failure_reason:-worker-exit-failed}"
1080
1248
  require_transition "pr_schedule_retry" pr_schedule_retry "${failure_reason:-worker-exit-failed}"
1081
1249
  require_transition "pr_after_failed" pr_after_failed "$pr_number"
1250
+ cleanup_pr_session
1082
1251
  notify_pr_reconciled
1083
1252
  fi
1084
1253
 
@@ -618,6 +618,22 @@ classify_failure_reason() {
618
618
  fi
619
619
  }
620
620
 
621
+ failure_chunk_indicates_startup_stall() {
622
+ local chunk="${1:-}"
623
+ local recent_chunk
624
+
625
+ recent_chunk="$(tail -n 120 <<<"$chunk")"
626
+ grep -q '"type":"thread.started"' <<<"$recent_chunk" || return 1
627
+ grep -q '"type":"turn.started"' <<<"$recent_chunk" || return 1
628
+ if grep -Eq '"type":"item\.(started|completed)"' <<<"$recent_chunk"; then
629
+ return 1
630
+ fi
631
+ if grep -q '"type":"turn.completed"' <<<"$recent_chunk"; then
632
+ return 1
633
+ fi
634
+ return 0
635
+ }
636
+
621
637
  resume_prompt() {
622
638
  cat <<EOF
623
639
  The previous Codex exec turn in this same thread was interrupted because the host refreshed Codex authentication after a quota or auth failure.
@@ -729,7 +745,7 @@ run_resume_exec() {
729
745
  }
730
746
 
731
747
  attempt_run() {
732
- local reason auth_before_switch quota_label_before_switch quota_switch_signature_before_switch quota_switch_result shell_flags_before_quota_switch
748
+ local reason auth_before_switch quota_label_before_switch quota_switch_signature_before_switch quota_switch_result shell_flags_before_quota_switch failure_chunk startup_stall
733
749
 
734
750
  attempt=$((attempt + 1))
735
751
  last_quota_switch_status=""
@@ -750,8 +766,15 @@ attempt_run() {
750
766
  return 0
751
767
  fi
752
768
 
753
- reason="$(classify_failure_reason "$(new_output_since "$last_attempt_start_size")")"
769
+ failure_chunk="$(new_output_since "$last_attempt_start_size")"
770
+ reason="$(classify_failure_reason "$failure_chunk")"
754
771
  last_failure_reason="${reason:-worker-exit-failed}"
772
+ startup_stall="no"
773
+ if [[ "$last_failure_reason" == "no-codex-output-before-stall-threshold" || "$last_failure_reason" == "no-codex-progress-before-stall-threshold" ]]; then
774
+ if failure_chunk_indicates_startup_stall "$failure_chunk"; then
775
+ startup_stall="yes"
776
+ fi
777
+ fi
755
778
 
756
779
  case "$last_failure_reason" in
757
780
  usage-limit|auth-failure|auth-401|account-banned)
@@ -796,6 +819,38 @@ attempt_run() {
796
819
  resume_count=$((resume_count + 1))
797
820
  return 2
798
821
  ;;
822
+ no-codex-output-before-stall-threshold|no-codex-progress-before-stall-threshold)
823
+ if [[ "$startup_stall" == "yes" && $quota_autoswitch_attempt_count -lt $max_quota_autoswitch_attempts ]]; then
824
+ auth_before_switch="$(auth_fingerprint)"
825
+ quota_label_before_switch="$last_attempt_start_quota_label"
826
+ quota_switch_signature_before_switch="$(quota_switch_signature)"
827
+ last_auth_fingerprint="$auth_before_switch"
828
+ write_state "switching-account" "$last_failure_reason"
829
+ log_runner "startup-stall detected before first Codex tool activity; attempting Codex account rotation"
830
+ shell_flags_before_quota_switch="$-"
831
+ set +e
832
+ run_quota_autoswitch
833
+ quota_switch_result=$?
834
+ case "$shell_flags_before_quota_switch" in
835
+ *e*) set -e ;;
836
+ *) set +e ;;
837
+ esac
838
+ if [[ "$quota_switch_result" == "0" ]]; then
839
+ thread_id=""
840
+ auth_wait_started_at=""
841
+ write_state "running" ""
842
+ return 2
843
+ fi
844
+ if [[ "$quota_switch_result" == "10" ]]; then
845
+ log_runner "startup-stall rotation deferred until ${last_quota_next_retry_at:-unknown}"
846
+ last_failure_reason="quota-switch-deferred"
847
+ write_state "failed" "$last_failure_reason"
848
+ return 1
849
+ fi
850
+ fi
851
+ write_state "failed" "$last_failure_reason"
852
+ return 1
853
+ ;;
799
854
  *)
800
855
  write_state "failed" "$last_failure_reason"
801
856
  return 1
@@ -115,6 +115,7 @@ result_file="${artifact_dir}/result.env"
115
115
  runner_state_file="${artifact_dir}/runner.env"
116
116
  sandbox_artifact_dir="${worktree%/}/${sandbox_subdir}"
117
117
  sandbox_run_dir="${worktree%/}/${sandbox_subdir}/${session}"
118
+ retained_repo_root="${ACP_RETAINED_REPO_ROOT:-${F_LOSNING_RETAINED_REPO_ROOT:-}}"
118
119
  started_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
119
120
  openclaw_bin="$(command -v openclaw)"
120
121
  default_openclaw_agent_id="$(
@@ -154,6 +155,7 @@ printf -v runner_state_q '%q' "$runner_state_file"
154
155
  printf -v branch_q '%q' "$branch_name"
155
156
  printf -v sandbox_artifact_dir_q '%q' "$sandbox_artifact_dir"
156
157
  printf -v sandbox_run_dir_q '%q' "$sandbox_run_dir"
158
+ printf -v retained_repo_root_q '%q' "$retained_repo_root"
157
159
  printf -v adapter_id_q '%q' "$adapter_id"
158
160
  printf -v started_at_q '%q' "$started_at"
159
161
  printf -v openclaw_bin_q '%q' "$openclaw_bin"
@@ -230,18 +232,21 @@ export AGENT_PROJECT_RUN_DIR=${sandbox_run_dir_q}
230
232
  export AGENT_PROJECT_HOST_RUN_DIR=${artifact_dir_q}
231
233
  export AGENT_PROJECT_RESULT_FILE=${sandbox_run_dir_q}/result.env
232
234
  export AGENT_PROJECT_OPENCLAW_BIN=${openclaw_bin_q}
235
+ export AGENT_PROJECT_RETAINED_REPO_ROOT=${retained_repo_root_q}
233
236
  export ACP_SESSION=${session_q}
234
237
  export ACP_RUN_DIR=${sandbox_run_dir_q}
235
238
  export ACP_HOST_RUN_DIR=${artifact_dir_q}
236
239
  export ACP_RESULT_FILE=${sandbox_run_dir_q}/result.env
237
240
  export ACP_OPENCLAW_BIN=${openclaw_bin_q}
238
241
  export ACP_OPENCLAW_SESSION_ID=${openclaw_session_id_q}
242
+ export ACP_RETAINED_REPO_ROOT=${retained_repo_root_q}
239
243
  export F_LOSNING_SESSION=${session_q}
240
244
  export F_LOSNING_RUN_DIR=${sandbox_run_dir_q}
241
245
  export F_LOSNING_HOST_RUN_DIR=${artifact_dir_q}
242
246
  export F_LOSNING_RESULT_FILE=${sandbox_run_dir_q}/result.env
243
247
  export F_LOSNING_OPENCLAW_BIN=${openclaw_bin_q}
244
248
  export F_LOSNING_OPENCLAW_SESSION_ID=${openclaw_session_id_q}
249
+ export F_LOSNING_RETAINED_REPO_ROOT=${retained_repo_root_q}
245
250
  export OPENCLAW_STATE_DIR=${openclaw_state_dir_q}
246
251
  export OPENCLAW_CONFIG_PATH=${openclaw_config_path_q}
247
252
  EOF
@@ -294,6 +299,7 @@ runner_state_file=${runner_state_q}
294
299
  output_file=${output_q}
295
300
  sandbox_artifact_dir=${sandbox_artifact_dir_q}
296
301
  sandbox_run_dir=${sandbox_run_dir_q}
302
+ retained_repo_root=${retained_repo_root_q}
297
303
  artifact_dir=${artifact_dir_q}
298
304
  run_dir=${artifact_dir_q}
299
305
  task_kind=${task_kind_q}
@@ -554,6 +560,45 @@ recover_literal_runtime_artifacts() {
554
560
  return 0
555
561
  }
556
562
 
563
+ recover_retained_repo_artifact_leaks() {
564
+ local retained_worktree_root=""
565
+ local leaked_run_dir=""
566
+ local worktree_name=""
567
+ local session_name=""
568
+ local artifact_name=""
569
+ local recovered="no"
570
+
571
+ [[ -n "\${retained_repo_root}" ]] || return 0
572
+ worktree_name="\$(basename "\${worktree}")"
573
+ session_name="\${AGENT_PROJECT_SESSION:-}"
574
+ [[ -n "\${session_name}" ]] || return 0
575
+ retained_worktree_root="\${retained_repo_root%/}/worktrees"
576
+ leaked_run_dir="\${retained_worktree_root}/\${worktree_name}/.openclaw-artifacts/\${session_name}"
577
+
578
+ if [[ ! -d "\${leaked_run_dir}" || "\${leaked_run_dir}" == "\${sandbox_run_dir}" ]]; then
579
+ return 0
580
+ fi
581
+
582
+ for artifact_name in result.env verification.jsonl issue-comment.md pr-comment.md; do
583
+ if [[ -f "\${leaked_run_dir}/\${artifact_name}" ]]; then
584
+ cp "\${leaked_run_dir}/\${artifact_name}" "\${sandbox_run_dir}/\${artifact_name}" 2>/dev/null || true
585
+ cp "\${leaked_run_dir}/\${artifact_name}" "\${artifact_dir}/\${artifact_name}" 2>/dev/null || true
586
+ recovered="yes"
587
+ fi
588
+ done
589
+
590
+ rm -rf "\${leaked_run_dir}" 2>/dev/null || true
591
+ rmdir "\${retained_worktree_root}/\${worktree_name}/.openclaw-artifacts" 2>/dev/null || true
592
+ rmdir "\${retained_worktree_root}/\${worktree_name}" 2>/dev/null || true
593
+ rmdir "\${retained_worktree_root}" 2>/dev/null || true
594
+
595
+ if [[ "\${recovered}" == "yes" ]]; then
596
+ printf '[openclaw] recovered retained-repo artifact leak: %s\n' "\${leaked_run_dir}" >>"\${output_file}" 2>/dev/null || true
597
+ fi
598
+
599
+ return 0
600
+ }
601
+
557
602
  reset_sandbox_run_dir() {
558
603
  mkdir -p "\${sandbox_run_dir}"
559
604
  find "\${sandbox_run_dir}" -mindepth 1 -maxdepth 1 -exec rm -rf {} + 2>/dev/null || true
@@ -1217,6 +1262,7 @@ while true; do
1217
1262
  break
1218
1263
  done
1219
1264
  recover_literal_runtime_artifacts
1265
+ recover_retained_repo_artifact_leaks
1220
1266
  infer_result_from_output
1221
1267
  synthesize_comment_artifact_from_output
1222
1268
  if [[ "\${status}" -eq 0 ]]; then
@@ -44,6 +44,36 @@ runner_state=""
44
44
  thread_id=""
45
45
  last_exit_code=""
46
46
 
47
+ failure_reason_from_output() {
48
+ [[ -f "$output_file" ]] || return 1
49
+
50
+ if rg -qi "You've hit your usage limit|You have reached your Codex usage limits|visit https://chatgpt.com/codex/settings/usage|Upgrade to Pro|rate limit exceeded|quota exceeded|usage cap (reached|exceeded)|usage quota (reached|exceeded)" "$output_file"; then
51
+ printf 'usage-limit\n'
52
+ return 0
53
+ fi
54
+
55
+ if rg -qi 'stale-run no-codex-output-before-stall-threshold|no-codex-output-before-stall-threshold' "$output_file"; then
56
+ printf 'no-codex-output-before-stall-threshold\n'
57
+ return 0
58
+ fi
59
+
60
+ if rg -qi 'stale-run no-codex-progress-before-stall-threshold|no-codex-progress-before-stall-threshold' "$output_file"; then
61
+ printf 'no-codex-progress-before-stall-threshold\n'
62
+ return 0
63
+ fi
64
+
65
+ # Recover Codex startup stalls when the wrapper was archived before it could
66
+ # flush a terminal runner.env state. This is intentionally narrow: the log
67
+ # must show a turn started, but no tool activity or turn completion.
68
+ if rg -q '"type":"turn.started"' "$output_file" \
69
+ && ! rg -q '"type":"item.started"|"type":"item.completed"|"type":"turn.completed"' "$output_file"; then
70
+ printf 'no-codex-progress-before-stall-threshold\n'
71
+ return 0
72
+ fi
73
+
74
+ return 1
75
+ }
76
+
47
77
  if tmux has-session -t "$session" 2>/dev/null; then
48
78
  status="RUNNING"
49
79
  fi
@@ -87,6 +117,13 @@ if [[ "$status" == "UNKNOWN" && -f "$output_file" ]]; then
87
117
  fi
88
118
  fi
89
119
 
120
+ if [[ "$status" == "UNKNOWN" && -z "$failure_reason" ]]; then
121
+ failure_reason="$(failure_reason_from_output || true)"
122
+ if [[ -n "$failure_reason" ]]; then
123
+ status="FAILED"
124
+ fi
125
+ fi
126
+
90
127
  if [[ "$status" == "UNKNOWN" && -n "$runner_state" ]]; then
91
128
  case "$runner_state" in
92
129
  running|waiting-auth-refresh|switching-account)
@@ -2039,6 +2039,7 @@ flow_export_execution_env() {
2039
2039
  local openclaw_model=""
2040
2040
  local openclaw_thinking=""
2041
2041
  local openclaw_timeout=""
2042
+ local openclaw_stall=""
2042
2043
 
2043
2044
  repo_id="$(flow_resolve_repo_id "${config_file}")"
2044
2045
  provider_quota_cooldowns="$(flow_resolve_provider_quota_cooldowns "${config_file}")"
@@ -2071,6 +2072,7 @@ flow_export_execution_env() {
2071
2072
  openclaw_model="$(flow_kv_get "${provider_pool_selection}" "OPENCLAW_MODEL")"
2072
2073
  openclaw_thinking="$(flow_kv_get "${provider_pool_selection}" "OPENCLAW_THINKING")"
2073
2074
  openclaw_timeout="$(flow_kv_get "${provider_pool_selection}" "OPENCLAW_TIMEOUT_SECONDS")"
2075
+ openclaw_stall="$(flow_kv_get "${provider_pool_selection}" "OPENCLAW_STALL_SECONDS")"
2074
2076
  else
2075
2077
  if [[ -n "${explicit_coding_worker}" ]]; then
2076
2078
  active_provider_selection_reason="env-override"
@@ -2087,6 +2089,7 @@ flow_export_execution_env() {
2087
2089
  openclaw_model="$(flow_env_or_config "${config_file}" "ACP_OPENCLAW_MODEL F_LOSNING_OPENCLAW_MODEL" "execution.openclaw.model" "")"
2088
2090
  openclaw_thinking="$(flow_env_or_config "${config_file}" "ACP_OPENCLAW_THINKING F_LOSNING_OPENCLAW_THINKING" "execution.openclaw.thinking" "")"
2089
2091
  openclaw_timeout="$(flow_env_or_config "${config_file}" "ACP_OPENCLAW_TIMEOUT_SECONDS F_LOSNING_OPENCLAW_TIMEOUT_SECONDS" "execution.openclaw.timeout_seconds" "")"
2092
+ openclaw_stall="$(flow_env_or_config "${config_file}" "ACP_OPENCLAW_STALL_SECONDS F_LOSNING_OPENCLAW_STALL_SECONDS" "execution.openclaw.stall_seconds" "")"
2090
2093
  fi
2091
2094
 
2092
2095
  if [[ -n "${coding_worker}" ]]; then
@@ -2167,6 +2170,10 @@ flow_export_execution_env() {
2167
2170
  export F_LOSNING_OPENCLAW_TIMEOUT_SECONDS="${openclaw_timeout}"
2168
2171
  export ACP_OPENCLAW_TIMEOUT_SECONDS="${openclaw_timeout}"
2169
2172
  fi
2173
+ if [[ -n "${openclaw_stall}" ]]; then
2174
+ export F_LOSNING_OPENCLAW_STALL_SECONDS="${openclaw_stall}"
2175
+ export ACP_OPENCLAW_STALL_SECONDS="${openclaw_stall}"
2176
+ fi
2170
2177
 
2171
2178
  flow_export_github_cli_auth_env "$(flow_resolve_repo_slug "${config_file}")"
2172
2179
  flow_export_project_env_aliases
@@ -52,6 +52,7 @@ flow_export_compat_env_aliases() {
52
52
  flow_export_env_alias_if_unset F_LOSNING_OPENCLAW_MODEL ACP_OPENCLAW_MODEL
53
53
  flow_export_env_alias_if_unset F_LOSNING_OPENCLAW_THINKING ACP_OPENCLAW_THINKING
54
54
  flow_export_env_alias_if_unset F_LOSNING_OPENCLAW_TIMEOUT_SECONDS ACP_OPENCLAW_TIMEOUT_SECONDS
55
+ flow_export_env_alias_if_unset F_LOSNING_OPENCLAW_STALL_SECONDS ACP_OPENCLAW_STALL_SECONDS
55
56
  flow_export_env_alias_if_unset F_LOSNING_ALLOW_INFRA_CI_BYPASS ACP_ALLOW_INFRA_CI_BYPASS
56
57
  flow_export_env_alias_if_unset F_LOSNING_LOCAL_FIRST_PR_POLICY ACP_LOCAL_FIRST_PR_POLICY
57
58
  flow_export_env_alias_if_unset F_LOSNING_PR_RISK_CACHE_TTL_SECONDS ACP_PR_RISK_CACHE_TTL_SECONDS
@@ -100,6 +101,7 @@ flow_export_canonical_env_aliases() {
100
101
  flow_export_env_alias_if_unset ACP_OPENCLAW_MODEL F_LOSNING_OPENCLAW_MODEL
101
102
  flow_export_env_alias_if_unset ACP_OPENCLAW_THINKING F_LOSNING_OPENCLAW_THINKING
102
103
  flow_export_env_alias_if_unset ACP_OPENCLAW_TIMEOUT_SECONDS F_LOSNING_OPENCLAW_TIMEOUT_SECONDS
104
+ flow_export_env_alias_if_unset ACP_OPENCLAW_STALL_SECONDS F_LOSNING_OPENCLAW_STALL_SECONDS
103
105
  flow_export_env_alias_if_unset ACP_ALLOW_INFRA_CI_BYPASS F_LOSNING_ALLOW_INFRA_CI_BYPASS
104
106
  flow_export_env_alias_if_unset ACP_LOCAL_FIRST_PR_POLICY F_LOSNING_LOCAL_FIRST_PR_POLICY
105
107
  flow_export_env_alias_if_unset ACP_PR_RISK_CACHE_TTL_SECONDS F_LOSNING_PR_RISK_CACHE_TTL_SECONDS
@@ -345,24 +345,28 @@ PY
345
345
  local effective_pools=""
346
346
  healthy_pools="$(
347
347
  jq -r --argjson primaryThresh "${CODEX_QUOTA_THRESHOLD}" --argjson weeklyThresh "${CODEX_QUOTA_WEEKLY_THRESHOLD}" '
348
- group_by(.accountId)
348
+ map(. + {poolKey: (.label // .trackedLabel // .email // .accountId // "")})
349
349
  | map(select(
350
- ((.[0].usage.rate_limit.limit_reached // false) | not)
351
- and ((.[0].usage.rate_limit.primary_window.used_percent // 100) < $primaryThresh)
352
- and ((.[0].usage.rate_limit.secondary_window.used_percent // 100) < $weeklyThresh)
353
- ))
350
+ (.poolKey != "")
351
+ and ((.usage.rate_limit.limit_reached // false) | not)
352
+ and ((.usage.rate_limit.primary_window.used_percent // 100) < $primaryThresh)
353
+ and ((.usage.rate_limit.secondary_window.used_percent // 100) < $weeklyThresh)
354
+ ) | .poolKey)
355
+ | unique
354
356
  | length
355
357
  ' "${CODEX_QUOTA_FULL_CACHE_FILE}" 2>/dev/null || true
356
358
  )"
357
359
 
358
360
  rotation_pools="$(
359
361
  jq -r --argjson weeklyThresh "${CODEX_QUOTA_WEEKLY_THRESHOLD}" '
360
- group_by(.accountId)
362
+ map(. + {poolKey: (.label // .trackedLabel // .email // .accountId // "")})
361
363
  | map(select(
362
- ((.[0].usage.rate_limit.limit_reached // false) | not)
363
- and ((.[0].usage.rate_limit.secondary_window.used_percent // 100) < $weeklyThresh)
364
- and ((.[0].planType // "") != "free")
365
- ))
364
+ (.poolKey != "")
365
+ and ((.usage.rate_limit.limit_reached // false) | not)
366
+ and ((.usage.rate_limit.secondary_window.used_percent // 100) < $weeklyThresh)
367
+ and ((.planType // "") != "free")
368
+ ) | .poolKey)
369
+ | unique
366
370
  | length
367
371
  ' "${CODEX_QUOTA_FULL_CACHE_FILE}" 2>/dev/null || true
368
372
  )"
@@ -471,6 +475,12 @@ fi
471
475
 
472
476
  run_codex_quota_preflight
473
477
 
478
+ # Sync skill files to runtime-home if source has changed since last sync.
479
+ # This ensures start-issue-worker.sh and other scripts are always up to date.
480
+ if [[ -x "${FLOW_TOOLS_DIR}/ensure-runtime-sync.sh" ]]; then
481
+ "${FLOW_TOOLS_DIR}/ensure-runtime-sync.sh" --quiet 2>/dev/null || true
482
+ fi
483
+
474
484
  acquire_lock
475
485
 
476
486
  reap_orphan_shared_loop_groups