agent-control-plane 0.1.8 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/bin/pr-risk.sh +54 -10
  2. package/hooks/heartbeat-hooks.sh +166 -13
  3. package/package.json +8 -2
  4. package/references/commands.md +1 -0
  5. package/tools/bin/agent-project-cleanup-session +143 -2
  6. package/tools/bin/agent-project-heartbeat-loop +29 -2
  7. package/tools/bin/agent-project-publish-issue-pr +178 -62
  8. package/tools/bin/agent-project-reconcile-issue-session +230 -5
  9. package/tools/bin/agent-project-reconcile-pr-session +104 -13
  10. package/tools/bin/agent-project-run-claude-session +19 -1
  11. package/tools/bin/agent-project-run-codex-resilient +121 -16
  12. package/tools/bin/agent-project-run-codex-session +61 -11
  13. package/tools/bin/agent-project-run-openclaw-session +274 -7
  14. package/tools/bin/agent-project-sync-anchor-repo +13 -2
  15. package/tools/bin/agent-project-worker-status +19 -14
  16. package/tools/bin/cleanup-worktree.sh +4 -1
  17. package/tools/bin/dashboard-launchd-bootstrap.sh +16 -4
  18. package/tools/bin/ensure-runtime-sync.sh +182 -0
  19. package/tools/bin/flow-config-lib.sh +76 -30
  20. package/tools/bin/flow-resident-worker-lib.sh +28 -2
  21. package/tools/bin/flow-shell-lib.sh +28 -8
  22. package/tools/bin/heartbeat-safe-auto.sh +32 -0
  23. package/tools/bin/issue-publish-localization-guard.sh +142 -0
  24. package/tools/bin/prepare-worktree.sh +3 -1
  25. package/tools/bin/project-launchd-bootstrap.sh +17 -4
  26. package/tools/bin/project-runtime-supervisor.sh +7 -1
  27. package/tools/bin/project-runtimectl.sh +78 -15
  28. package/tools/bin/provider-cooldown-state.sh +1 -1
  29. package/tools/bin/render-flow-config.sh +16 -1
  30. package/tools/bin/reuse-issue-worktree.sh +46 -0
  31. package/tools/bin/run-codex-task.sh +2 -2
  32. package/tools/bin/scaffold-profile.sh +2 -2
  33. package/tools/bin/start-issue-worker.sh +118 -16
  34. package/tools/bin/start-resident-issue-loop.sh +1 -0
  35. package/tools/bin/sync-shared-agent-home.sh +26 -0
  36. package/tools/bin/test-smoke.sh +6 -1
  37. package/tools/dashboard/app.js +91 -3
  38. package/tools/dashboard/dashboard_snapshot.py +119 -0
  39. package/tools/dashboard/styles.css +43 -0
  40. package/tools/templates/issue-prompt-template.md +18 -66
  41. package/tools/templates/legacy/issue-prompt-template-pre-slim.md +109 -0
  42. package/bin/audit-issue-routing.sh +0 -74
  43. package/tools/bin/audit-agent-worktrees.sh +0 -310
  44. package/tools/bin/audit-issue-routing.sh +0 -11
  45. package/tools/bin/audit-retained-layout.sh +0 -58
  46. package/tools/bin/audit-retained-overlap.sh +0 -135
  47. package/tools/bin/audit-retained-worktrees.sh +0 -228
  48. package/tools/bin/check-skill-contracts.sh +0 -324
@@ -168,6 +168,9 @@ fi
168
168
  result_outcome=""
169
169
  result_action=""
170
170
  result_issue_id="${ISSUE_ID:-}"
171
+ result_detail=""
172
+ run_started_at="${STARTED_AT:-}"
173
+ expected_run_started_at="${ACP_EXPECTED_RUN_STARTED_AT:-${F_LOSNING_EXPECTED_RUN_STARTED_AT:-}}"
171
174
  host_blocker_file="${run_dir}/host-blocker.md"
172
175
  prompt_file="${run_dir}/prompt.md"
173
176
  pr_comment_file="${run_dir}/pr-comment.md"
@@ -184,9 +187,18 @@ if [[ -f "$result_file_candidate" ]]; then
184
187
  set +a
185
188
  result_outcome="${OUTCOME:-}"
186
189
  result_action="${ACTION:-}"
190
+ result_detail="${DETAIL:-}"
187
191
  result_issue_id="${ISSUE_ID:-${result_issue_id}}"
188
192
  fi
189
193
 
194
+ if [[ -n "${expected_run_started_at}" && "${expected_run_started_at}" != "${run_started_at}" ]]; then
195
+ printf 'STATUS=STALE-RUN-SKIPPED\n'
196
+ printf 'SESSION=%s\n' "$session"
197
+ printf 'EXPECTED_STARTED_AT=%s\n' "${expected_run_started_at}"
198
+ printf 'ACTUAL_STARTED_AT=%s\n' "${run_started_at}"
199
+ exit 0
200
+ fi
201
+
190
202
  pr_schedule_retry() { :; }
191
203
  pr_clear_retry() { :; }
192
204
  pr_cleanup_linked_issue_session() { :; }
@@ -225,6 +237,8 @@ clear_provider_quota_cooldown() {
225
237
  "${provider_cooldown_script}" clear >/dev/null || true
226
238
  }
227
239
 
240
+ blocked_runtime_reason=""
241
+
228
242
  owner="${repo_slug%%/*}"
229
243
  repo="${repo_slug#*/}"
230
244
  pr_view_json="$(flow_github_pr_view_json "$repo_slug" "$pr_number")"
@@ -339,6 +353,11 @@ normalize_pr_result_contract() {
339
353
  host-comment-pr-blocker)
340
354
  return 0
341
355
  ;;
356
+ host-comment-blocker)
357
+ result_action="host-comment-pr-blocker"
358
+ pr_result_contract_note="normalized-legacy-blocked-action"
359
+ return 0
360
+ ;;
342
361
  requested-changes-or-blocked)
343
362
  result_action="host-comment-pr-blocker"
344
363
  pr_result_contract_note="normalized-legacy-blocked-action"
@@ -358,8 +377,15 @@ normalize_pr_result_contract() {
358
377
  }
359
378
 
360
379
  mark_reconciled() {
380
+ local reconciled_at tmp_file
361
381
  if [[ -d "$run_dir" ]]; then
362
- touch "${run_dir}/reconciled.ok"
382
+ reconciled_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
383
+ tmp_file="${run_dir}/reconciled.ok.tmp.$$"
384
+ {
385
+ printf 'STARTED_AT=%s\n' "${run_started_at}"
386
+ printf 'RECONCILED_AT=%s\n' "${reconciled_at}"
387
+ } >"${tmp_file}"
388
+ mv "${tmp_file}" "${run_dir}/reconciled.ok"
363
389
  fi
364
390
  }
365
391
 
@@ -393,6 +419,20 @@ blocked_result_indicates_local_bind_failure() {
393
419
  return 1
394
420
  }
395
421
 
422
+ classify_pr_blocked_runtime_reason() {
423
+ if [[ "${result_detail:-}" == "worker-tool-exec-empty-command" ]]; then
424
+ printf 'worker-tool-exec-empty-command\n'
425
+ return 0
426
+ fi
427
+
428
+ if [[ -f "$session_log_file" ]] && grep -Fq '[tools] exec failed: Provide a command to start.' "$session_log_file"; then
429
+ printf 'worker-tool-exec-empty-command\n'
430
+ return 0
431
+ fi
432
+
433
+ return 1
434
+ }
435
+
396
436
  extract_preapproved_host_recovery_commands() {
397
437
  [[ -f "$prompt_file" ]] || return 0
398
438
  sed -n 's/^.*loopback retry command: `\(.*\)`$/\1/p' "$prompt_file"
@@ -700,18 +740,53 @@ merge_state_prepared() {
700
740
  git -C "$pr_worktree" rev-parse -q --verify MERGE_HEAD >/dev/null 2>&1
701
741
  }
702
742
 
743
+ current_github_login() {
744
+ flow_export_github_cli_auth_env "${repo_slug}"
745
+ gh api user --jq '.login // ""' 2>/dev/null || true
746
+ }
747
+
748
+ pr_author_login() {
749
+ flow_export_github_cli_auth_env "${repo_slug}"
750
+ gh pr view "${pr_number}" -R "${repo_slug}" --json author --jq '.author.login // ""' 2>/dev/null || true
751
+ }
752
+
753
+ pr_is_self_authored_for_current_actor() {
754
+ local actor_login=""
755
+ local author_login=""
756
+
757
+ actor_login="$(current_github_login)"
758
+ author_login="$(pr_author_login)"
759
+ [[ -n "${actor_login}" && -n "${author_login}" && "${actor_login}" == "${author_login}" ]]
760
+ }
761
+
762
+ pr_remote_head_oid() {
763
+ flow_export_github_cli_auth_env "${repo_slug}"
764
+ gh pr view "${pr_number}" -R "${repo_slug}" --json headRefOid --jq '.headRefOid // ""' 2>/dev/null || true
765
+ }
766
+
767
+ pr_remote_already_has_final_head() {
768
+ local final_head="${FINAL_HEAD:-}"
769
+ local remote_head=""
770
+
771
+ [[ -n "${final_head}" ]] || return 1
772
+ remote_head="$(pr_remote_head_oid)"
773
+ [[ -n "${remote_head}" && "${remote_head}" == "${final_head}" ]]
774
+ }
775
+
703
776
  approve_and_merge() {
704
777
  local approve_output
705
- if ! approve_output="$(
706
- flow_github_api_repo "${repo_slug}" "pulls/${pr_number}/reviews" \
707
- --method POST \
708
- -f event=APPROVE \
709
- -f body="Automated final review passed. Safe low-risk scope, green checks, and host-side merge approved." \
710
- 2>&1
711
- )"; then
712
- if ! grep -q "Can not approve your own pull request" <<<"$approve_output"; then
713
- printf '%s\n' "$approve_output" >&2
714
- return 1
778
+ if ! pr_is_self_authored_for_current_actor; then
779
+ if ! approve_output="$(
780
+ flow_github_api_repo "${repo_slug}" "pulls/${pr_number}/reviews" \
781
+ --method POST \
782
+ -f event=APPROVE \
783
+ -f body="Automated final review passed. Safe low-risk scope, green checks, and host-side merge approved." \
784
+ 2>&1
785
+ )"; then
786
+ if ! grep -q "Can not approve your own pull request" <<<"$approve_output"; then
787
+ printf '%s\n' "$approve_output" >&2
788
+ return 1
789
+ fi
715
790
  fi
716
791
  fi
717
792
 
@@ -753,7 +828,14 @@ handle_linked_issue_merge_cleanup() {
753
828
 
754
829
  handle_updated_branch_result() {
755
830
  if [[ -z "$pr_worktree" || ! -d "$pr_worktree" ]]; then
756
- if pr_comment_already_posted; then
831
+ if pr_remote_already_has_final_head; then
832
+ post_pr_comment_if_present
833
+ require_transition "pr_clear_retry" pr_clear_retry
834
+ require_transition "pr_after_updated_branch" pr_after_updated_branch "$pr_number"
835
+ cleanup_pr_session
836
+ result_action="${result_action:-host-push-pr-branch}"
837
+ notify_pr_reconciled
838
+ elif pr_comment_already_posted; then
757
839
  require_transition "pr_clear_retry" pr_clear_retry
758
840
  require_transition "pr_after_updated_branch" pr_after_updated_branch "$pr_number"
759
841
  cleanup_pr_session
@@ -968,7 +1050,16 @@ elif [[ "$status" == "SUCCEEDED" && "$result_outcome" == "no-change-needed" ]];
968
1050
  fi
969
1051
  fi
970
1052
  elif [[ "$status" == "SUCCEEDED" && "$result_outcome" == "blocked" ]]; then
971
- if attempt_blocked_pr_host_verification_recovery; then
1053
+ blocked_runtime_reason="$(classify_pr_blocked_runtime_reason || true)"
1054
+ if [[ -n "${blocked_runtime_reason:-}" ]]; then
1055
+ status="FAILED"
1056
+ failure_reason="${blocked_runtime_reason}"
1057
+ require_transition "pr_schedule_retry" pr_schedule_retry "$failure_reason"
1058
+ require_transition "pr_after_failed" pr_after_failed "$pr_number"
1059
+ cleanup_pr_session
1060
+ result_action="queued-pr-retry"
1061
+ notify_pr_reconciled
1062
+ elif attempt_blocked_pr_host_verification_recovery; then
972
1063
  handle_updated_branch_result
973
1064
  else
974
1065
  post_pr_comment_if_present
@@ -61,6 +61,24 @@ resolve_claude_bin() {
61
61
  return 0
62
62
  fi
63
63
 
64
+ # Well-known install locations for Claude Code CLI.
65
+ # Detached supervisors and LaunchAgents run with a minimal PATH that
66
+ # does not include user-local directories, so command -v alone is not
67
+ # enough. Try the common locations explicitly.
68
+ local -a fallback_paths=(
69
+ "${HOME}/.local/bin/claude"
70
+ "${HOME}/.claude/local/bin/claude"
71
+ "/usr/local/bin/claude"
72
+ "/opt/homebrew/bin/claude"
73
+ )
74
+ local p
75
+ for p in "${fallback_paths[@]}"; do
76
+ if [[ -x "${p}" ]]; then
77
+ printf '%s\n' "${p}"
78
+ return 0
79
+ fi
80
+ done
81
+
64
82
  return 1
65
83
  }
66
84
 
@@ -348,7 +366,7 @@ fi
348
366
 
349
367
  reconcile_snippet=""
350
368
  if [[ -n "$reconcile_command" ]]; then
351
- printf -v delayed_reconcile_q '%q' "sleep 2; $reconcile_command"
369
+ printf -v delayed_reconcile_q '%q' "export ACP_EXPECTED_RUN_STARTED_AT=${started_at_q}; export F_LOSNING_EXPECTED_RUN_STARTED_AT=${started_at_q}; while tmux has-session -t ${session_q} 2>/dev/null; do sleep 1; done; sleep 2; $reconcile_command"
352
370
  reconcile_snippet="nohup bash -lc ${delayed_reconcile_q} >> ${output_q} 2>&1 </dev/null &"
353
371
  fi
354
372
 
@@ -18,6 +18,7 @@ Options:
18
18
  --max-resume-attempts <count> Maximum resume attempts after interruption
19
19
  --auth-refresh-timeout-seconds <secs> How long to wait for refreshed auth before failing
20
20
  --auth-refresh-poll-seconds <secs> Poll interval while waiting for refreshed auth
21
+ --stall-seconds <secs> Fail if Codex stops producing output for too long
21
22
  --help Show this help
22
23
  EOF
23
24
  }
@@ -35,6 +36,8 @@ max_resume_attempts="${ACP_CODEX_MAX_RESUME_ATTEMPTS:-${F_LOSNING_CODEX_MAX_RESU
35
36
  auth_refresh_timeout_seconds="${ACP_CODEX_AUTH_REFRESH_TIMEOUT_SECONDS:-${F_LOSNING_CODEX_AUTH_REFRESH_TIMEOUT_SECONDS:-900}}"
36
37
  auth_refresh_poll_seconds="${ACP_CODEX_AUTH_REFRESH_POLL_SECONDS:-${F_LOSNING_CODEX_AUTH_REFRESH_POLL_SECONDS:-10}}"
37
38
  max_quota_autoswitch_attempts="${ACP_CODEX_MAX_AUTOSWITCH_ATTEMPTS:-${F_LOSNING_CODEX_MAX_AUTOSWITCH_ATTEMPTS:-1}}"
39
+ codex_progress_heartbeat_seconds="${ACP_CODEX_PROGRESS_HEARTBEAT_SECONDS:-${F_LOSNING_CODEX_PROGRESS_HEARTBEAT_SECONDS:-30}}"
40
+ codex_stall_seconds="${ACP_CODEX_STALL_SECONDS:-${F_LOSNING_CODEX_STALL_SECONDS:-300}}"
38
41
 
39
42
  while [[ $# -gt 0 ]]; do
40
43
  case "$1" in
@@ -50,6 +53,7 @@ while [[ $# -gt 0 ]]; do
50
53
  --max-resume-attempts) max_resume_attempts="${2:-}"; shift 2 ;;
51
54
  --auth-refresh-timeout-seconds) auth_refresh_timeout_seconds="${2:-}"; shift 2 ;;
52
55
  --auth-refresh-poll-seconds) auth_refresh_poll_seconds="${2:-}"; shift 2 ;;
56
+ --stall-seconds) codex_stall_seconds="${2:-}"; shift 2 ;;
53
57
  --help|-h) usage; exit 0 ;;
54
58
  *) echo "Unknown argument: $1" >&2; usage >&2; exit 1 ;;
55
59
  esac
@@ -80,6 +84,13 @@ esac
80
84
  case "$max_quota_autoswitch_attempts" in
81
85
  ''|*[!0-9]*) echo "ACP_CODEX_MAX_AUTOSWITCH_ATTEMPTS must be numeric" >&2; exit 1 ;;
82
86
  esac
87
+ case "$codex_progress_heartbeat_seconds" in
88
+ ''|*[!0-9]*) echo "ACP_CODEX_PROGRESS_HEARTBEAT_SECONDS must be numeric" >&2; exit 1 ;;
89
+ 0) echo "ACP_CODEX_PROGRESS_HEARTBEAT_SECONDS must be greater than zero" >&2; exit 1 ;;
90
+ esac
91
+ case "$codex_stall_seconds" in
92
+ ''|*[!0-9]*) echo "ACP_CODEX_STALL_SECONDS must be numeric" >&2; exit 1 ;;
93
+ esac
83
94
 
84
95
  FLOW_SKILL_DIR="$(resolve_flow_skill_dir "${BASH_SOURCE[0]}")"
85
96
  state_file="${host_run_dir}/runner.env"
@@ -116,6 +127,7 @@ last_quota_switch_status=""
116
127
  last_quota_next_retry_at=""
117
128
  last_quota_selected_label=""
118
129
  quota_autoswitch_attempt_count=0
130
+ last_attempt_started_epoch=0
119
131
 
120
132
  mkdir -p "$host_run_dir"
121
133
  touch "$output_file"
@@ -148,6 +160,19 @@ write_state() {
148
160
  mv "$tmp_file" "$state_file"
149
161
  }
150
162
 
163
+ run_codex_command() {
164
+ # Nested workers must not inherit a parent thread id; the wrapper persists the child thread explicitly.
165
+ env -u CODEX_THREAD_ID "$codex_bin" "$@"
166
+ }
167
+
168
+ codex_recovery_target() {
169
+ if [[ -n "$thread_id" ]]; then
170
+ printf 'thread %s' "$thread_id"
171
+ return 0
172
+ fi
173
+ printf 'initial Codex exec'
174
+ }
175
+
151
176
  run_with_timeout() {
152
177
  local timeout_seconds="${1:?timeout seconds required}"
153
178
  shift
@@ -398,13 +423,43 @@ persist_thread_id_from_line() {
398
423
  fi
399
424
  }
400
425
 
426
+ terminate_codex_producer_tree() {
427
+ local pid="${1:?pid required}"
428
+ local deadline=""
429
+
430
+ if ! kill -0 "$pid" 2>/dev/null; then
431
+ return 0
432
+ fi
433
+
434
+ pkill -TERM -P "$pid" 2>/dev/null || true
435
+ kill "$pid" 2>/dev/null || true
436
+
437
+ deadline=$(( $(date +%s) + 2 ))
438
+ while kill -0 "$pid" 2>/dev/null; do
439
+ if (( $(date +%s) >= deadline )); then
440
+ break
441
+ fi
442
+ sleep 0.1
443
+ done
444
+
445
+ if kill -0 "$pid" 2>/dev/null; then
446
+ pkill -KILL -P "$pid" 2>/dev/null || true
447
+ kill -9 "$pid" 2>/dev/null || true
448
+ fi
449
+ }
450
+
401
451
  stream_codex_exec() {
402
452
  local phase="${1:?phase required}"
403
453
  local stream_fifo=""
404
454
  local producer_pid=""
455
+ local heartbeat_pid=""
456
+ local progress_file=""
405
457
  local line=""
406
458
 
407
459
  last_attempt_start_size="$(stat -f %z "$output_file" 2>/dev/null || printf '0')"
460
+ last_attempt_started_epoch="$(date +%s)"
461
+ progress_file="${host_run_dir}/.codex-progress.$$"
462
+ rm -f "$progress_file"
408
463
  stream_fifo="$(mktemp -u "${TMPDIR:-/tmp}/codex-stream.XXXXXX")"
409
464
  mkfifo "$stream_fifo"
410
465
 
@@ -413,10 +468,10 @@ stream_codex_exec() {
413
468
  (
414
469
  case "$mode" in
415
470
  safe)
416
- "$codex_bin" exec --json --profile "$safe_profile" --full-auto <"$prompt_file"
471
+ run_codex_command exec --json --profile "$safe_profile" --full-auto <"$prompt_file"
417
472
  ;;
418
473
  bypass)
419
- "$codex_bin" exec --json --profile "$bypass_profile" --dangerously-bypass-approvals-and-sandbox <"$prompt_file"
474
+ run_codex_command exec --json --profile "$bypass_profile" --dangerously-bypass-approvals-and-sandbox <"$prompt_file"
420
475
  ;;
421
476
  esac
422
477
  ) >"$stream_fifo" 2>&1 &
@@ -425,10 +480,10 @@ stream_codex_exec() {
425
480
  (
426
481
  case "$mode" in
427
482
  safe)
428
- resume_prompt | "$codex_bin" exec resume --json --full-auto "$thread_id" -
483
+ resume_prompt | run_codex_command exec resume --json --full-auto "$thread_id" -
429
484
  ;;
430
485
  bypass)
431
- resume_prompt | "$codex_bin" exec resume --json --dangerously-bypass-approvals-and-sandbox "$thread_id" -
486
+ resume_prompt | run_codex_command exec resume --json --dangerously-bypass-approvals-and-sandbox "$thread_id" -
432
487
  ;;
433
488
  esac
434
489
  ) >"$stream_fifo" 2>&1 &
@@ -441,12 +496,55 @@ stream_codex_exec() {
441
496
  esac
442
497
 
443
498
  producer_pid="$!"
499
+ (
500
+ local now elapsed last_progress_epoch idle_for
501
+ while kill -0 "$producer_pid" 2>/dev/null; do
502
+ sleep "$codex_progress_heartbeat_seconds"
503
+ if ! kill -0 "$producer_pid" 2>/dev/null; then
504
+ break
505
+ fi
506
+ now="$(date +%s)"
507
+ elapsed=$((now - last_attempt_started_epoch))
508
+ if (( codex_stall_seconds > 0 )); then
509
+ if [[ ! -f "$progress_file" ]]; then
510
+ if (( elapsed >= codex_stall_seconds )); then
511
+ write_state "running" ""
512
+ log_runner "stale-run no-codex-output-before-stall-threshold elapsed=${elapsed}s"
513
+ terminate_codex_producer_tree "$producer_pid"
514
+ break
515
+ fi
516
+ else
517
+ last_progress_epoch="$(stat -f %m "$progress_file" 2>/dev/null || printf '0')"
518
+ if [[ -n "$last_progress_epoch" && "$last_progress_epoch" != "0" ]]; then
519
+ idle_for=$((now - last_progress_epoch))
520
+ if (( idle_for >= codex_stall_seconds )); then
521
+ write_state "running" ""
522
+ log_runner "stale-run no-codex-progress-before-stall-threshold elapsed=${elapsed}s idle=${idle_for}s"
523
+ terminate_codex_producer_tree "$producer_pid"
524
+ break
525
+ fi
526
+ fi
527
+ fi
528
+ fi
529
+ write_state "running" ""
530
+ log_runner "heartbeat waiting-for-codex-output elapsed=${elapsed}s"
531
+ done
532
+ ) &
533
+ heartbeat_pid="$!"
534
+
444
535
  while IFS= read -r line || [[ -n "$line" ]]; do
445
536
  printf '%s\n' "$line" | tee -a "$output_file"
537
+ touch "$progress_file" 2>/dev/null || true
446
538
  persist_thread_id_from_line "$line"
447
539
  done <"$stream_fifo"
448
540
 
541
+ if [[ -n "$heartbeat_pid" ]] && kill -0 "$heartbeat_pid" 2>/dev/null; then
542
+ kill "$heartbeat_pid" 2>/dev/null || true
543
+ wait "$heartbeat_pid" 2>/dev/null || true
544
+ fi
545
+
449
546
  rm -f "$stream_fifo"
547
+ rm -f "$progress_file"
450
548
 
451
549
  if wait "$producer_pid"; then
452
550
  last_exit_code="0"
@@ -485,6 +583,16 @@ classify_failure_reason() {
485
583
 
486
584
  recent_chunk="$(tail -n 120 <<<"$chunk")"
487
585
 
586
+ if grep -Eiq 'stale-run no-codex-output-before-stall-threshold|no-codex-output-before-stall-threshold' <<<"$recent_chunk"; then
587
+ printf 'no-codex-output-before-stall-threshold\n'
588
+ return 0
589
+ fi
590
+
591
+ if grep -Eiq 'stale-run no-codex-progress-before-stall-threshold|no-codex-progress-before-stall-threshold' <<<"$recent_chunk"; then
592
+ printf 'no-codex-progress-before-stall-threshold\n'
593
+ return 0
594
+ fi
595
+
488
596
  if grep -Eiq "You've hit your usage limit|You have reached your Codex usage limits|visit https://chatgpt.com/codex/settings/usage|Upgrade to Pro|rate limit exceeded|quota exceeded|usage cap (reached|exceeded)|usage quota (reached|exceeded)" <<<"$recent_chunk"; then
489
597
  printf 'usage-limit\n'
490
598
  return 0
@@ -523,7 +631,7 @@ EOF
523
631
  }
524
632
 
525
633
  codex_login_healthy() {
526
- "$codex_bin" login status >/dev/null 2>&1
634
+ run_codex_command login status >/dev/null 2>&1
527
635
  }
528
636
 
529
637
  wait_for_auth_refresh() {
@@ -533,7 +641,9 @@ wait_for_auth_refresh() {
533
641
  local baseline_switch_signature="${4:-}"
534
642
  local deadline now current_fingerprint current_quota_label current_switch_signature
535
643
  local sleep_seconds
644
+ local recovery_target
536
645
 
646
+ recovery_target="$(codex_recovery_target)"
537
647
  auth_wait_started_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
538
648
  last_trigger_reason="$trigger_reason"
539
649
  write_state "waiting-auth-refresh" "$trigger_reason"
@@ -548,28 +658,28 @@ wait_for_auth_refresh() {
548
658
  current_switch_signature="$(quota_switch_signature)"
549
659
  if codex_login_healthy; then
550
660
  if [[ "$current_fingerprint" != "$baseline_fingerprint" ]]; then
551
- log_runner "detected refreshed Codex auth after quota interruption; resuming thread ${thread_id}"
661
+ log_runner "detected refreshed Codex auth after quota interruption; resuming ${recovery_target}"
552
662
  auth_wait_started_at=""
553
663
  write_state "running" ""
554
664
  return 0
555
665
  fi
556
666
 
557
667
  if [[ -n "$baseline_quota_label" && -n "$current_quota_label" && "$current_quota_label" != "$baseline_quota_label" ]]; then
558
- log_runner "detected rotated Codex quota account (${baseline_quota_label} -> ${current_quota_label}); resuming thread ${thread_id}"
668
+ log_runner "detected rotated Codex quota account (${baseline_quota_label} -> ${current_quota_label}); resuming ${recovery_target}"
559
669
  auth_wait_started_at=""
560
670
  write_state "running" ""
561
671
  return 0
562
672
  fi
563
673
 
564
674
  if [[ -n "$baseline_switch_signature" && -n "$current_switch_signature" && "$current_switch_signature" != "$baseline_switch_signature" ]]; then
565
- log_runner "detected quota switch state refresh; resuming thread ${thread_id}"
675
+ log_runner "detected quota switch state refresh; resuming ${recovery_target}"
566
676
  auth_wait_started_at=""
567
677
  write_state "running" ""
568
678
  return 0
569
679
  fi
570
680
 
571
681
  if [[ "$last_quota_switch_status" == "switched" && -n "$current_quota_label" ]]; then
572
- log_runner "quota manager reports healthy Codex account ${current_quota_label}; resuming thread ${thread_id}"
682
+ log_runner "quota manager reports healthy Codex account ${current_quota_label}; resuming ${recovery_target}"
573
683
  auth_wait_started_at=""
574
684
  write_state "running" ""
575
685
  return 0
@@ -580,9 +690,9 @@ wait_for_auth_refresh() {
580
690
  *)
581
691
  if codex_login_healthy; then
582
692
  if [[ "$current_fingerprint" != "$baseline_fingerprint" ]]; then
583
- log_runner "detected refreshed Codex auth; resuming thread ${thread_id}"
693
+ log_runner "detected refreshed Codex auth; resuming ${recovery_target}"
584
694
  else
585
- log_runner "Codex auth is healthy again; resuming thread ${thread_id}"
695
+ log_runner "Codex auth is healthy again; resuming ${recovery_target}"
586
696
  fi
587
697
  auth_wait_started_at=""
588
698
  write_state "running" ""
@@ -643,11 +753,6 @@ attempt_run() {
643
753
  reason="$(classify_failure_reason "$(new_output_since "$last_attempt_start_size")")"
644
754
  last_failure_reason="${reason:-worker-exit-failed}"
645
755
 
646
- if [[ -z "$thread_id" ]]; then
647
- write_state "failed" "$last_failure_reason"
648
- return 1
649
- fi
650
-
651
756
  case "$last_failure_reason" in
652
757
  usage-limit|auth-failure|auth-401|account-banned)
653
758
  if (( resume_count >= max_resume_attempts )); then
@@ -24,6 +24,7 @@ EOF
24
24
  mode=""
25
25
  session=""
26
26
  worktree=""
27
+ worktree_realpath=""
27
28
  prompt_file=""
28
29
  runs_root=""
29
30
  adapter_id=""
@@ -105,6 +106,12 @@ if [[ -z "$mode" || -z "$session" || -z "$worktree" || -z "$prompt_file" || -z "
105
106
  exit 1
106
107
  fi
107
108
 
109
+ worktree_realpath="$(cd "$worktree" 2>/dev/null && pwd -P || true)"
110
+ if [[ -z "$worktree_realpath" || ! -d "$worktree_realpath" ]]; then
111
+ echo "unable to resolve worktree realpath: $worktree" >&2
112
+ exit 1
113
+ fi
114
+
108
115
  case "$mode" in
109
116
  safe|bypass) ;;
110
117
  *)
@@ -119,7 +126,7 @@ inner_script="${artifact_dir}/${session}.sh"
119
126
  meta_file="${artifact_dir}/run.env"
120
127
  result_file="${artifact_dir}/result.env"
121
128
  runner_state_file="${artifact_dir}/runner.env"
122
- sandbox_run_dir="${worktree%/}/${sandbox_subdir}/${session}"
129
+ sandbox_run_dir="${worktree_realpath%/}/${sandbox_subdir}/${session}"
123
130
  started_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
124
131
  codex_bin="$(resolve_codex_bin || true)"
125
132
  runner_bin="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/agent-project-run-codex-resilient"
@@ -137,13 +144,14 @@ if [[ -z "$codex_bin" || ! -x "$codex_bin" ]]; then
137
144
  exit 1
138
145
  fi
139
146
 
140
- branch_name="$(git -C "$worktree" branch --show-current 2>/dev/null || true)"
147
+ branch_name="$(git -C "$worktree_realpath" branch --show-current 2>/dev/null || true)"
141
148
 
142
149
  printf -v session_q '%q' "$session"
143
150
  printf -v task_kind_q '%q' "$task_kind"
144
151
  printf -v task_id_q '%q' "$task_id"
145
152
  printf -v mode_q '%q' "$mode"
146
153
  printf -v worktree_q '%q' "$worktree"
154
+ printf -v worktree_realpath_q '%q' "$worktree_realpath"
147
155
  printf -v prompt_q '%q' "$prompt_file"
148
156
  printf -v output_q '%q' "$output_file"
149
157
  printf -v artifact_dir_q '%q' "$artifact_dir"
@@ -166,6 +174,7 @@ printf -v bypass_profile_q '%q' "$bypass_profile"
166
174
  printf 'SESSION=%s\n' "$session_q"
167
175
  printf 'MODE=%s\n' "$mode_q"
168
176
  printf 'WORKTREE=%s\n' "$worktree_q"
177
+ printf 'WORKTREE_REALPATH=%s\n' "$worktree_realpath_q"
169
178
  printf 'PROMPT_FILE=%s\n' "$prompt_q"
170
179
  printf 'OUTPUT_FILE=%s\n' "$output_q"
171
180
  printf 'SCRIPT=%s\n' "$script_q"
@@ -256,7 +265,7 @@ fi
256
265
 
257
266
  reconcile_snippet=""
258
267
  if [[ -n "$reconcile_command" ]]; then
259
- printf -v delayed_reconcile_q '%q' "sleep 2; $reconcile_command"
268
+ printf -v delayed_reconcile_q '%q' "export ACP_EXPECTED_RUN_STARTED_AT=${started_at_q}; export F_LOSNING_EXPECTED_RUN_STARTED_AT=${started_at_q}; while tmux has-session -t ${session_q} 2>/dev/null; do sleep 1; done; sleep 2; $reconcile_command"
260
269
  reconcile_snippet="nohup bash -lc ${delayed_reconcile_q} >> ${output_q} 2>&1 </dev/null &"
261
270
  fi
262
271
 
@@ -264,19 +273,59 @@ cat >"$inner_script" <<EOF
264
273
  #!/usr/bin/env bash
265
274
  set -euo pipefail
266
275
  ${runtime_exports}
267
- ${context_exports}cd ${worktree_q}
276
+ ${context_exports}cd ${worktree_realpath_q}
268
277
  reset_sandbox_run_dir() {
269
278
  mkdir -p ${sandbox_run_dir_q}
270
279
  find ${sandbox_run_dir_q} -mindepth 1 -maxdepth 1 -exec rm -rf {} + 2>/dev/null || true
271
280
  }
281
+ find_logged_artifact_path() {
282
+ local artifact_name="\${1:?artifact name required}"
283
+ local candidate=""
284
+
285
+ while IFS= read -r candidate; do
286
+ [[ -n "\${candidate}" ]] || continue
287
+ while [[ "\${candidate}" == *')' || "\${candidate}" == *']' || "\${candidate}" == *',' || "\${candidate}" == *'"' || "\${candidate}" == *"'" ]]; do
288
+ candidate="\${candidate%?}"
289
+ done
290
+ if [[ "\$(basename "\${candidate}")" == "\${artifact_name}" && -f "\${candidate}" ]]; then
291
+ printf '%s\n' "\${candidate}"
292
+ fi
293
+ done < <(grep -oE '/(Users|Volumes|tmp)/[^[:space:])"]+' ${output_q} 2>/dev/null || true)
294
+ }
295
+ recover_logged_artifact() {
296
+ local artifact_name="\${1:?artifact name required}"
297
+ local destination="\${2:?destination required}"
298
+ local source_path=""
299
+
300
+ source_path="\$(find_logged_artifact_path "\${artifact_name}" | tail -n 1)"
301
+ [[ -n "\${source_path}" ]] || return 0
302
+ mkdir -p "\$(dirname "\${destination}")"
303
+ if [[ "\${source_path}" != "\${destination}" ]]; then
304
+ cp "\${source_path}" "\${destination}"
305
+ fi
306
+ }
307
+ recover_collected_artifact() {
308
+ local artifact_name="\${1:?artifact name required}"
309
+ local destination="\${2:?destination required}"
310
+
311
+ if [[ -f ${sandbox_run_dir_q}/"\${artifact_name}" ]]; then
312
+ if [[ ${sandbox_run_dir_q}/"\${artifact_name}" != "\${destination}" ]]; then
313
+ cp ${sandbox_run_dir_q}/"\${artifact_name}" "\${destination}"
314
+ fi
315
+ return 0
316
+ fi
317
+
318
+ recover_logged_artifact "\${artifact_name}" "\${destination}"
319
+ }
272
320
  record_final_git_state() {
273
321
  local final_head final_branch tmp_file
274
322
 
275
- final_head="\$(git -C ${worktree_q} rev-parse HEAD 2>/dev/null || true)"
276
- final_branch="\$(git -C ${worktree_q} branch --show-current 2>/dev/null || true)"
323
+ final_head="\$(git -C ${worktree_realpath_q} rev-parse HEAD 2>/dev/null || true)"
324
+ final_branch="\$(git -C ${worktree_realpath_q} branch --show-current 2>/dev/null || true)"
277
325
  tmp_file=${meta_file_q}.tmp.final.$$
278
- grep -vE '^(FINAL_HEAD|FINAL_BRANCH)=' ${meta_file_q} >"\${tmp_file}" 2>/dev/null || true
326
+ grep -vE '^(FINAL_HEAD|FINAL_BRANCH|WORKTREE_REALPATH)=' ${meta_file_q} >"\${tmp_file}" 2>/dev/null || true
279
327
  {
328
+ printf 'WORKTREE_REALPATH=%s\n' ${worktree_realpath_q}
280
329
  printf 'FINAL_HEAD=%q\n' "\${final_head}"
281
330
  printf 'FINAL_BRANCH=%q\n' "\${final_branch}"
282
331
  } >>"\${tmp_file}"
@@ -286,7 +335,7 @@ reset_sandbox_run_dir
286
335
  set +e
287
336
  bash ${runner_bin_q} \\
288
337
  --mode ${mode_q} \\
289
- --worktree ${worktree_q} \\
338
+ --worktree ${worktree_realpath_q} \\
290
339
  --prompt-file ${prompt_q} \\
291
340
  --output-file ${output_q} \\
292
341
  --host-run-dir ${artifact_dir_q} \\
@@ -296,9 +345,10 @@ bash ${runner_bin_q} \\
296
345
  --codex-bin ${codex_bin_q}
297
346
  status=\$?
298
347
  record_final_git_state
299
- if [[ -f ${sandbox_run_dir_q}/result.env ]]; then
300
- cp ${sandbox_run_dir_q}/result.env ${result_q}
301
- fi
348
+ recover_collected_artifact result.env ${result_q}
349
+ recover_collected_artifact issue-comment.md ${artifact_dir_q}/issue-comment.md
350
+ recover_collected_artifact pr-comment.md ${artifact_dir_q}/pr-comment.md
351
+ recover_collected_artifact verification.jsonl ${artifact_dir_q}/verification.jsonl
302
352
  ${collect_copy_snippet}${reconcile_snippet}
303
353
  printf '\n__CODEX_EXIT__:%s\n' "\$status" | tee -a ${output_q}
304
354
  exit "\$status"