agent-control-plane 0.1.13 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/SKILL.md CHANGED
@@ -22,7 +22,7 @@ external profile registry, not inside this repository.
22
22
  - installed project profiles in `~/.agent-runtime/control-plane/profiles/*/control-plane.yaml`
23
23
  - installed profile notes in `~/.agent-runtime/control-plane/profiles/*/README.md`
24
24
  - workflow catalog in `assets/workflow-catalog.json`
25
- - worker dashboard in `tools/dashboard/` with launchers in `tools/bin/render-dashboard-snapshot.py`
25
+ - worker dashboard in `tools/dashboard/` with launcher at `tools/dashboard/dashboard_snapshot.py`
26
26
  and `tools/bin/serve-dashboard.sh`
27
27
  - dashboard autostart helpers in `tools/bin/dashboard-launchd-bootstrap.sh` and
28
28
  `tools/bin/install-dashboard-launchd.sh`
@@ -95,6 +95,7 @@ heartbeat_open_agent_pr_issue_ids() {
95
95
  local pr_issue_ids_json=""
96
96
  pr_issue_ids_json="$(
97
97
  flow_github_pr_list_json "$REPO_SLUG" open 100 \
98
+ 2>/dev/null \
98
99
  | jq --argjson agentPrPrefixes "${AGENT_PR_PREFIXES_JSON}" --arg handoffLabel "${AGENT_PR_HANDOFF_LABEL}" --arg branchIssueRegex "${AGENT_PR_ISSUE_CAPTURE_REGEX}" '
99
100
  map(
100
101
  . as $pr
@@ -119,7 +120,7 @@ heartbeat_open_agent_pr_issue_ids() {
119
120
  | select(. != null and . != "")
120
121
  )
121
122
  | unique
122
- '
123
+ ' 2>/dev/null || true
123
124
  )"
124
125
 
125
126
  if [[ -z "${pr_issue_ids_json:-}" ]]; then
@@ -136,6 +137,7 @@ heartbeat_list_ready_issue_ids() {
136
137
 
137
138
  ready_issue_rows="$(
138
139
  flow_github_issue_list_json "$REPO_SLUG" open 100 \
140
+ 2>/dev/null \
139
141
  | jq -r --argjson openAgentPrIssueIds "${open_agent_pr_issue_ids}" '
140
142
  map(select(
141
143
  (any(.labels[]?; .name == "agent-running") | not)
@@ -145,7 +147,7 @@ heartbeat_list_ready_issue_ids() {
145
147
  | .[]
146
148
  | [.number, (any(.labels[]?; .name == "agent-blocked"))]
147
149
  | @tsv
148
- '
150
+ ' 2>/dev/null || true
149
151
  )"
150
152
 
151
153
  while IFS=$'\t' read -r issue_id is_blocked; do
@@ -170,6 +172,7 @@ heartbeat_list_blocked_recovery_issue_ids() {
170
172
 
171
173
  blocked_issue_rows="$(
172
174
  flow_github_issue_list_json "$REPO_SLUG" open 100 \
175
+ 2>/dev/null \
173
176
  | jq -r --argjson openAgentPrIssueIds "${open_agent_pr_issue_ids}" '
174
177
  map(select(
175
178
  any(.labels[]?; .name == "agent-blocked")
@@ -178,7 +181,7 @@ heartbeat_list_blocked_recovery_issue_ids() {
178
181
  ))
179
182
  | sort_by(.createdAt, .number)
180
183
  | .[].number
181
- '
184
+ ' 2>/dev/null || true
182
185
  )"
183
186
 
184
187
  while IFS= read -r issue_id; do
@@ -268,6 +271,7 @@ heartbeat_list_exclusive_issue_ids() {
268
271
  open_agent_pr_issue_ids="$(heartbeat_open_agent_pr_issue_ids)"
269
272
 
270
273
  flow_github_issue_list_json "$REPO_SLUG" open 100 \
274
+ 2>/dev/null \
271
275
  | jq -r --arg exclusiveLabel "${AGENT_EXCLUSIVE_LABEL}" --argjson openAgentPrIssueIds "${open_agent_pr_issue_ids}" '
272
276
  map(select(
273
277
  any(.labels[]?; .name == $exclusiveLabel)
@@ -277,20 +281,22 @@ heartbeat_list_exclusive_issue_ids() {
277
281
  ))
278
282
  | sort_by(.createdAt, .number)
279
283
  | .[].number
280
- '
284
+ ' 2>/dev/null || true
281
285
  }
282
286
 
283
287
  heartbeat_list_running_issue_ids() {
284
288
  flow_github_issue_list_json "$REPO_SLUG" open 100 \
289
+ 2>/dev/null \
285
290
  | jq -r '
286
291
  map(select(any(.labels[]?; .name == "agent-running")))
287
292
  | sort_by(.createdAt, .number)
288
293
  | .[].number
289
- '
294
+ ' 2>/dev/null || true
290
295
  }
291
296
 
292
297
  heartbeat_list_open_agent_pr_ids() {
293
298
  flow_github_pr_list_json "$REPO_SLUG" open 100 \
299
+ 2>/dev/null \
294
300
  | jq -r --argjson agentPrPrefixes "${AGENT_PR_PREFIXES_JSON}" --arg handoffLabel "${AGENT_PR_HANDOFF_LABEL}" '
295
301
  map(select(
296
302
  . as $pr
@@ -302,11 +308,12 @@ heartbeat_list_open_agent_pr_ids() {
302
308
  ))
303
309
  | sort_by(.createdAt)
304
310
  | .[].number
305
- '
311
+ ' 2>/dev/null || true
306
312
  }
307
313
 
308
314
  heartbeat_list_exclusive_pr_ids() {
309
315
  flow_github_pr_list_json "$REPO_SLUG" open 100 \
316
+ 2>/dev/null \
310
317
  | jq -r --argjson agentPrPrefixes "${AGENT_PR_PREFIXES_JSON}" --arg handoffLabel "${AGENT_PR_HANDOFF_LABEL}" --arg exclusiveLabel "${AGENT_EXCLUSIVE_LABEL}" '
311
318
  map(select(
312
319
  . as $pr
@@ -319,7 +326,7 @@ heartbeat_list_exclusive_pr_ids() {
319
326
  ))
320
327
  | sort_by(.createdAt)
321
328
  | .[].number
322
- '
329
+ ' 2>/dev/null || true
323
330
  }
324
331
 
325
332
  heartbeat_issue_is_heavy() {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-control-plane",
3
- "version": "0.1.13",
3
+ "version": "0.1.14",
4
4
  "description": "Help a repo keep GitHub-driven coding agents running reliably without constant human babysitting",
5
5
  "homepage": "https://github.com/ducminhnguyen0319/agent-control-plane",
6
6
  "bugs": {
@@ -32,12 +32,14 @@
32
32
  "tools/bin",
33
33
  "!tools/bin/audit-*.sh",
34
34
  "!tools/bin/check-skill-contracts.sh",
35
+ "!tools/bin/render-dashboard-snapshot.py",
35
36
  "tools/dashboard/app.js",
36
37
  "tools/dashboard/dashboard_snapshot.py",
37
38
  "tools/dashboard/index.html",
38
39
  "tools/dashboard/server.py",
39
40
  "tools/dashboard/styles.css",
40
41
  "tools/templates",
42
+ "!tools/templates/legacy/",
41
43
  "tools/vendor/codex-quota/LICENSE",
42
44
  "tools/vendor/codex-quota/codex-quota.js",
43
45
  "tools/vendor/codex-quota/lib",
@@ -85,7 +85,7 @@ tools/bin/uninstall-project-launchd.sh --profile-id <id>
85
85
  tools/bin/project-remove.sh --profile-id <id>
86
86
  tools/bin/project-remove.sh --profile-id <id> --purge-paths
87
87
  tools/bin/sync-shared-agent-home.sh
88
- python3 tools/bin/render-dashboard-snapshot.py --pretty
88
+ python3 tools/dashboard/dashboard_snapshot.py --pretty
89
89
  bash tools/bin/serve-dashboard.sh --host 127.0.0.1 --port 8765
90
90
  bash tools/bin/install-dashboard-launchd.sh --host 127.0.0.1 --port 8765
91
91
  ```
@@ -97,7 +97,7 @@ prompts live under `tools/templates/`.
97
97
  ## Dashboard
98
98
 
99
99
  ```bash
100
- python3 tools/bin/render-dashboard-snapshot.py --pretty
100
+ python3 tools/dashboard/dashboard_snapshot.py --pretty
101
101
  bash tools/bin/serve-dashboard.sh --host 127.0.0.1 --port 8765
102
102
  bash tools/bin/install-dashboard-launchd.sh --host 127.0.0.1 --port 8765
103
103
  ```
@@ -64,7 +64,7 @@ roots, labels, worker preferences, prompts, and project-specific guardrails.
64
64
  before scheduler use.
65
65
  - `tools/bin/test-smoke.sh`
66
66
  Runs the main shared-package smoke gates in one operator-facing command.
67
- - `tools/bin/render-dashboard-snapshot.py`
67
+ - `tools/dashboard/dashboard_snapshot.py`
68
68
  Emits a JSON snapshot of active runs, resident controllers, cooldown state,
69
69
  queue depth, and scheduled issues across installed profiles.
70
70
  - `tools/bin/serve-dashboard.sh`
@@ -307,12 +307,14 @@ schedule_provider_quota_cooldown() {
307
307
  local reason="${1:-provider-quota-limit}"
308
308
  [[ "${reason}" == "provider-quota-limit" ]] || return 0
309
309
  [[ -x "${provider_cooldown_script}" ]] || return 0
310
+ [[ "${CODING_WORKER:-}" == "codex" ]] && return 0
310
311
 
311
312
  "${provider_cooldown_script}" schedule "${reason}" >/dev/null || true
312
313
  }
313
314
 
314
315
  clear_provider_quota_cooldown() {
315
316
  [[ -x "${provider_cooldown_script}" ]] || return 0
317
+ [[ "${CODING_WORKER:-}" == "codex" ]] && return 0
316
318
 
317
319
  "${provider_cooldown_script}" clear >/dev/null || true
318
320
  }
@@ -227,23 +227,30 @@ schedule_provider_quota_cooldown() {
227
227
  local reason="${1:-provider-quota-limit}"
228
228
  [[ "${failure_reason:-}" == "provider-quota-limit" ]] || return 0
229
229
  [[ -x "${provider_cooldown_script}" ]] || return 0
230
+ [[ "${CODING_WORKER:-}" == "codex" ]] && return 0
230
231
 
231
232
  "${provider_cooldown_script}" schedule "${reason}" >/dev/null || true
232
233
  }
233
234
 
234
235
  clear_provider_quota_cooldown() {
235
236
  [[ -x "${provider_cooldown_script}" ]] || return 0
237
+ [[ "${CODING_WORKER:-}" == "codex" ]] && return 0
236
238
 
237
239
  "${provider_cooldown_script}" clear >/dev/null || true
238
240
  }
239
241
 
240
242
  blocked_runtime_reason=""
243
+ host_github_rate_limited="no"
244
+ host_github_rate_limit_detail=""
241
245
 
242
246
  owner="${repo_slug%%/*}"
243
247
  repo="${repo_slug#*/}"
244
248
  pr_view_json="$(flow_github_pr_view_json "$repo_slug" "$pr_number")"
245
249
  pr_state="$(jq -r '.state' <<<"$pr_view_json")"
246
- pr_base_ref="$(jq -r '.baseRefName // "main"' <<<"$pr_view_json")"
250
+ pr_base_ref="$(jq -r '.baseRefName // empty' <<<"$pr_view_json")"
251
+ if [[ -z "${pr_base_ref}" ]]; then
252
+ pr_base_ref="main"
253
+ fi
247
254
 
248
255
  if [[ "$status" == "RUNNING" && "$pr_state" != "MERGED" && "$pr_state" != "CLOSED" ]]; then
249
256
  printf 'STATUS=%s\n' "$status"
@@ -395,7 +402,9 @@ post_pr_comment_if_present() {
395
402
  if pr_comment_already_posted; then
396
403
  return 0
397
404
  fi
398
- flow_github_api_repo "${repo_slug}" "issues/${pr_number}/comments" --method POST -f body="$(cat "$comment_file")" >/dev/null
405
+ if ! host_github_post_issue_comment "${pr_number}" "$(cat "$comment_file")"; then
406
+ return 1
407
+ fi
399
408
  }
400
409
 
401
410
  pr_comment_already_posted() {
@@ -408,6 +417,120 @@ pr_comment_already_posted() {
408
417
  jq -e --arg body "$comment_body" 'any(.comments[]?; .body == $body)' >/dev/null <<<"$comments_json"
409
418
  }
410
419
 
420
+ host_github_output_indicates_rate_limit() {
421
+ grep -Eiq 'API rate limit exceeded|secondary rate limit|rate limit exceeded|HTTP 403' <<<"${1:-}"
422
+ }
423
+
424
+ record_host_github_rate_limit() {
425
+ local output="${1:-}"
426
+ local detail_file="${run_dir}/host-github-rate-limit.log"
427
+ host_github_rate_limited="yes"
428
+ host_github_rate_limit_detail="${output}"
429
+ printf '%s\n' "${output}" >"${detail_file}"
430
+ }
431
+
432
+ host_github_post_issue_comment() {
433
+ local issue_number="${1:?issue number required}"
434
+ local body="${2:-}"
435
+ local output=""
436
+
437
+ flow_export_github_cli_auth_env "${repo_slug}"
438
+ if output="$(
439
+ gh api "repos/${repo_slug}/issues/${issue_number}/comments" \
440
+ --method POST \
441
+ -f body="${body}" 2>&1
442
+ )"; then
443
+ return 0
444
+ fi
445
+
446
+ if host_github_output_indicates_rate_limit "${output}"; then
447
+ record_host_github_rate_limit "${output}"
448
+ return 1
449
+ fi
450
+
451
+ printf '%s\n' "${output}" >&2
452
+ return 1
453
+ }
454
+
455
+ host_github_submit_pr_approval() {
456
+ local output=""
457
+
458
+ flow_export_github_cli_auth_env "${repo_slug}"
459
+ if output="$(
460
+ gh api "repos/${repo_slug}/pulls/${pr_number}/reviews" \
461
+ --method POST \
462
+ -f event=APPROVE \
463
+ -f body="Automated final review passed. Safe low-risk scope, green checks, and host-side merge approved." \
464
+ 2>&1
465
+ )"; then
466
+ return 0
467
+ fi
468
+
469
+ if grep -q "Can not approve your own pull request" <<<"${output}"; then
470
+ return 0
471
+ fi
472
+
473
+ if host_github_output_indicates_rate_limit "${output}"; then
474
+ record_host_github_rate_limit "${output}"
475
+ return 1
476
+ fi
477
+
478
+ printf '%s\n' "${output}" >&2
479
+ return 1
480
+ }
481
+
482
+ append_host_rate_limit_comment() {
483
+ local detail="${1:-GitHub API rate limit blocked host actions.}"
484
+ local reset_line=""
485
+
486
+ if grep -Eiq 'resets at ' <<<"${detail}"; then
487
+ reset_line="$(grep -Eio 'resets at [^.]+' <<<"${detail}" | head -n 1 || true)"
488
+ fi
489
+
490
+ {
491
+ if [[ -s "${pr_comment_file}" ]]; then
492
+ printf '\n\n'
493
+ fi
494
+ printf '## Host action blocked\n\n'
495
+ printf 'GitHub API rate limit blocked ACP from posting the PR review outcome or merge action.\n'
496
+ if [[ -n "${reset_line}" ]]; then
497
+ printf '\n- %s\n' "${reset_line}"
498
+ fi
499
+ printf -- '- ACP kept the local review artifacts and scheduled an automatic retry for the host action.\n'
500
+ } >>"${pr_comment_file}"
501
+ }
502
+
503
+ handle_host_github_rate_limit_retry() {
504
+ local reason="${1:-github-api-rate-limit}"
505
+ local result_action_override="${2:-host-rate-limit-retry}"
506
+
507
+ append_host_rate_limit_comment "${host_github_rate_limit_detail:-}"
508
+ require_transition "pr_schedule_retry" pr_schedule_retry "${reason}"
509
+ require_transition "pr_after_blocked" pr_after_blocked "${pr_number}"
510
+ cleanup_pr_session
511
+ result_outcome="blocked"
512
+ result_action="${result_action_override}"
513
+ failure_reason="${reason}"
514
+ notify_pr_reconciled
515
+ mark_reconciled
516
+ printf 'STATUS=FAILED\n'
517
+ printf 'PR_NUMBER=%s\n' "${pr_number}"
518
+ printf 'PR_STATE=%s\n' "${pr_state}"
519
+ printf 'OUTCOME=%s\n' "${result_outcome}"
520
+ printf 'ACTION=%s\n' "${result_action}"
521
+ printf 'FAILURE_REASON=%s\n' "${failure_reason}"
522
+ exit 0
523
+ }
524
+
525
+ maybe_handle_host_github_rate_limit() {
526
+ local reason="${1:-github-api-rate-limit}"
527
+ local result_action_override="${2:-host-rate-limit-retry}"
528
+ if [[ "${host_github_rate_limited}" == "yes" ]]; then
529
+ handle_host_github_rate_limit_retry "${reason}" "${result_action_override}"
530
+ fi
531
+ return 1
532
+ }
533
+
411
534
  blocked_result_indicates_local_bind_failure() {
412
535
  local candidate_file
413
536
  for candidate_file in "$pr_comment_file" "$session_log_file"; do
@@ -774,23 +897,38 @@ pr_remote_already_has_final_head() {
774
897
  }
775
898
 
776
899
  approve_and_merge() {
777
- local approve_output
778
900
  if ! pr_is_self_authored_for_current_actor; then
779
- if ! approve_output="$(
780
- flow_github_api_repo "${repo_slug}" "pulls/${pr_number}/reviews" \
781
- --method POST \
782
- -f event=APPROVE \
783
- -f body="Automated final review passed. Safe low-risk scope, green checks, and host-side merge approved." \
784
- 2>&1
785
- )"; then
786
- if ! grep -q "Can not approve your own pull request" <<<"$approve_output"; then
787
- printf '%s\n' "$approve_output" >&2
788
- return 1
901
+ if ! host_github_submit_pr_approval; then
902
+ if [[ "${host_github_rate_limited}" == "yes" ]]; then
903
+ return 2
789
904
  fi
905
+ return 1
790
906
  fi
791
907
  fi
792
908
 
793
- flow_github_pr_merge "$repo_slug" "$pr_number" "squash" "yes"
909
+ flow_export_github_cli_auth_env "${repo_slug}"
910
+ if ! gh pr merge "${pr_number}" -R "${repo_slug}" --squash --delete-branch --admin >"${run_dir}/host-github-merge.out" 2>"${run_dir}/host-github-merge.err"; then
911
+ local merge_output=""
912
+ merge_output="$(cat "${run_dir}/host-github-merge.err" 2>/dev/null || true)"
913
+ if host_github_output_indicates_rate_limit "${merge_output}"; then
914
+ record_host_github_rate_limit "${merge_output}"
915
+ return 2
916
+ fi
917
+ if flow_github_pr_merge "$repo_slug" "$pr_number" "squash" "yes" 2>"${run_dir}/host-github-merge.err"; then
918
+ return 0
919
+ fi
920
+ merge_output="$(cat "${run_dir}/host-github-merge.err" 2>/dev/null || true)"
921
+ if host_github_output_indicates_rate_limit "${merge_output}"; then
922
+ record_host_github_rate_limit "${merge_output}"
923
+ return 2
924
+ fi
925
+ if [[ -n "${merge_output}" ]]; then
926
+ printf '%s\n' "${merge_output}" >&2
927
+ fi
928
+ return 1
929
+ fi
930
+
931
+ return 0
794
932
  }
795
933
 
796
934
  cleanup_pr_session() {
@@ -829,7 +967,7 @@ handle_linked_issue_merge_cleanup() {
829
967
  handle_updated_branch_result() {
830
968
  if [[ -z "$pr_worktree" || ! -d "$pr_worktree" ]]; then
831
969
  if pr_remote_already_has_final_head; then
832
- post_pr_comment_if_present
970
+ post_pr_comment_if_present || maybe_handle_host_github_rate_limit "github-api-rate-limit" "host-comment-rate-limit-retry"
833
971
  require_transition "pr_clear_retry" pr_clear_retry
834
972
  require_transition "pr_after_updated_branch" pr_after_updated_branch "$pr_number"
835
973
  cleanup_pr_session
@@ -895,7 +1033,7 @@ handle_updated_branch_result() {
895
1033
  fi
896
1034
 
897
1035
  push_pr_branch
898
- post_pr_comment_if_present
1036
+ post_pr_comment_if_present || maybe_handle_host_github_rate_limit "github-api-rate-limit" "host-comment-rate-limit-retry"
899
1037
  require_transition "pr_clear_retry" pr_clear_retry
900
1038
  require_transition "pr_after_updated_branch" pr_after_updated_branch "$pr_number"
901
1039
  cleanup_pr_session
@@ -936,7 +1074,7 @@ elif [[ "$pr_state" == "CLOSED" ]]; then
936
1074
  result_action="${result_action:-cleaned-closed-pr}"
937
1075
  notify_pr_reconciled
938
1076
  elif [[ "$status" == "SUCCEEDED" && "$result_outcome" == "approved-local-review-passed" ]]; then
939
- if ! review_pass_action="$(review_pass_action_from_result_action "${result_action:-}" 2>/dev/null)"; then
1077
+ if ! review_pass_action="$(review_pass_action_from_result_action "${result_action:-}" 2>/dev/null)"; then
940
1078
  review_pass_action="$(pr_review_pass_action "$pr_number")"
941
1079
  fi
942
1080
  case "$review_pass_action" in
@@ -963,8 +1101,13 @@ elif [[ "$status" == "SUCCEEDED" && "$result_outcome" == "approved-local-review-
963
1101
  fi
964
1102
 
965
1103
  require_transition "pr_clear_retry" pr_clear_retry
966
- approve_and_merge
967
- pr_state="$(flow_github_pr_view_json "$repo_slug" "$pr_number" | jq -r '.state')"
1104
+ if ! approve_and_merge; then
1105
+ if [[ "${host_github_rate_limited}" == "yes" ]]; then
1106
+ handle_host_github_rate_limit_retry "github-api-rate-limit" "host-merge-rate-limit-retry"
1107
+ fi
1108
+ exit 1
1109
+ fi
1110
+ pr_state="MERGED"
968
1111
  if [[ "$pr_state" != "MERGED" ]]; then
969
1112
  echo "PR ${pr_number} did not merge successfully" >&2
970
1113
  exit 1
@@ -1018,7 +1161,7 @@ elif [[ "$status" == "SUCCEEDED" && "$result_outcome" == "no-change-needed" ]];
1018
1161
  result_action="host-rejected-noop-promotion"
1019
1162
  else
1020
1163
  push_pr_branch
1021
- post_pr_comment_if_present
1164
+ post_pr_comment_if_present || maybe_handle_host_github_rate_limit "github-api-rate-limit" "host-comment-rate-limit-retry"
1022
1165
  require_transition "pr_clear_retry" pr_clear_retry
1023
1166
  require_transition "pr_after_updated_branch" pr_after_updated_branch "$pr_number"
1024
1167
  cleanup_pr_session
@@ -1041,7 +1184,7 @@ elif [[ "$status" == "SUCCEEDED" && "$result_outcome" == "no-change-needed" ]];
1041
1184
  result_action="host-rejected-no-change-needed"
1042
1185
  notify_pr_reconciled
1043
1186
  else
1044
- post_pr_comment_if_present
1187
+ post_pr_comment_if_present || maybe_handle_host_github_rate_limit "github-api-rate-limit" "host-comment-rate-limit-retry"
1045
1188
  require_transition "pr_clear_retry" pr_clear_retry
1046
1189
  require_transition "pr_after_succeeded" pr_after_succeeded "$pr_number"
1047
1190
  cleanup_pr_session
@@ -1062,7 +1205,7 @@ elif [[ "$status" == "SUCCEEDED" && "$result_outcome" == "blocked" ]]; then
1062
1205
  elif attempt_blocked_pr_host_verification_recovery; then
1063
1206
  handle_updated_branch_result
1064
1207
  else
1065
- post_pr_comment_if_present
1208
+ post_pr_comment_if_present || maybe_handle_host_github_rate_limit "github-api-rate-limit" "host-comment-rate-limit-retry"
1066
1209
  require_transition "pr_clear_retry" pr_clear_retry
1067
1210
  require_transition "pr_after_blocked" pr_after_blocked "$pr_number"
1068
1211
  cleanup_pr_session
@@ -1079,6 +1222,7 @@ elif [[ "$status" == "FAILED" ]]; then
1079
1222
  schedule_provider_quota_cooldown "${failure_reason:-worker-exit-failed}"
1080
1223
  require_transition "pr_schedule_retry" pr_schedule_retry "${failure_reason:-worker-exit-failed}"
1081
1224
  require_transition "pr_after_failed" pr_after_failed "$pr_number"
1225
+ cleanup_pr_session
1082
1226
  notify_pr_reconciled
1083
1227
  fi
1084
1228
 
@@ -345,24 +345,28 @@ PY
345
345
  local effective_pools=""
346
346
  healthy_pools="$(
347
347
  jq -r --argjson primaryThresh "${CODEX_QUOTA_THRESHOLD}" --argjson weeklyThresh "${CODEX_QUOTA_WEEKLY_THRESHOLD}" '
348
- group_by(.accountId)
348
+ map(. + {poolKey: (.label // .trackedLabel // .email // .accountId // "")})
349
349
  | map(select(
350
- ((.[0].usage.rate_limit.limit_reached // false) | not)
351
- and ((.[0].usage.rate_limit.primary_window.used_percent // 100) < $primaryThresh)
352
- and ((.[0].usage.rate_limit.secondary_window.used_percent // 100) < $weeklyThresh)
353
- ))
350
+ (.poolKey != "")
351
+ and ((.usage.rate_limit.limit_reached // false) | not)
352
+ and ((.usage.rate_limit.primary_window.used_percent // 100) < $primaryThresh)
353
+ and ((.usage.rate_limit.secondary_window.used_percent // 100) < $weeklyThresh)
354
+ ) | .poolKey)
355
+ | unique
354
356
  | length
355
357
  ' "${CODEX_QUOTA_FULL_CACHE_FILE}" 2>/dev/null || true
356
358
  )"
357
359
 
358
360
  rotation_pools="$(
359
361
  jq -r --argjson weeklyThresh "${CODEX_QUOTA_WEEKLY_THRESHOLD}" '
360
- group_by(.accountId)
362
+ map(. + {poolKey: (.label // .trackedLabel // .email // .accountId // "")})
361
363
  | map(select(
362
- ((.[0].usage.rate_limit.limit_reached // false) | not)
363
- and ((.[0].usage.rate_limit.secondary_window.used_percent // 100) < $weeklyThresh)
364
- and ((.[0].planType // "") != "free")
365
- ))
364
+ (.poolKey != "")
365
+ and ((.usage.rate_limit.limit_reached // false) | not)
366
+ and ((.usage.rate_limit.secondary_window.used_percent // 100) < $weeklyThresh)
367
+ and ((.planType // "") != "free")
368
+ ) | .poolKey)
369
+ | unique
366
370
  | length
367
371
  ' "${CODEX_QUOTA_FULL_CACHE_FILE}" 2>/dev/null || true
368
372
  )"
@@ -471,6 +475,12 @@ fi
471
475
 
472
476
  run_codex_quota_preflight
473
477
 
478
+ # Sync skill files to runtime-home if source has changed since last sync.
479
+ # This ensures start-issue-worker.sh and other scripts are always up to date.
480
+ if [[ -x "${FLOW_TOOLS_DIR}/ensure-runtime-sync.sh" ]]; then
481
+ "${FLOW_TOOLS_DIR}/ensure-runtime-sync.sh" --quiet 2>/dev/null || true
482
+ fi
483
+
474
484
  acquire_lock
475
485
 
476
486
  reap_orphan_shared_loop_groups
@@ -20,6 +20,7 @@ backend=""
20
20
  model=""
21
21
  action=""
22
22
  reason=""
23
+ label=""
23
24
 
24
25
  case "$#" in
25
26
  1)
@@ -77,6 +78,33 @@ resolve_backend() {
77
78
  flow_config_get "${CONFIG_YAML}" "execution.coding_worker"
78
79
  }
79
80
 
81
+ resolve_codex_label() {
82
+ local configured_label="${ACP_ACTIVE_PROVIDER_LABEL:-${F_LOSNING_ACTIVE_PROVIDER_LABEL:-}}"
83
+ local codex_quota_bin=""
84
+ local active_label=""
85
+
86
+ if [[ -n "${configured_label}" ]]; then
87
+ printf '%s\n' "${configured_label}"
88
+ return 0
89
+ fi
90
+
91
+ if [[ -n "${ACP_CODEX_QUOTA_LABEL:-${F_LOSNING_CODEX_QUOTA_LABEL:-}}" ]]; then
92
+ printf '%s\n' "${ACP_CODEX_QUOTA_LABEL:-${F_LOSNING_CODEX_QUOTA_LABEL:-}}"
93
+ return 0
94
+ fi
95
+
96
+ codex_quota_bin="$(flow_resolve_codex_quota_bin "${SCRIPT_DIR}")"
97
+ if [[ -n "${codex_quota_bin}" && -x "${codex_quota_bin}" ]]; then
98
+ active_label="$("${codex_quota_bin}" codex list --json 2>/dev/null | jq -r '.activeInfo.trackedLabel // .activeInfo.activeLabel // empty' 2>/dev/null || true)"
99
+ if [[ -n "${active_label}" ]]; then
100
+ printf '%s\n' "${active_label}"
101
+ return 0
102
+ fi
103
+ fi
104
+
105
+ return 1
106
+ }
107
+
80
108
  resolve_model() {
81
109
  local resolved_backend="${1:?backend required}"
82
110
  local raw_model="${2:-}"
@@ -147,7 +175,16 @@ case "${action}" in
147
175
  ;;
148
176
  esac
149
177
 
150
- provider_key="$(flow_sanitize_provider_key "${backend}-${model}")"
178
+ if [[ "${backend}" == "codex" ]]; then
179
+ label="$(resolve_codex_label || true)"
180
+ fi
181
+
182
+ provider_key_source="${backend}-${model}"
183
+ if [[ "${backend}" == "codex" && -n "${label}" ]]; then
184
+ provider_key_source="${provider_key_source}-${label}"
185
+ fi
186
+
187
+ provider_key="$(flow_sanitize_provider_key "${provider_key_source}")"
151
188
  out="$(
152
189
  ACP_STATE_ROOT="${STATE_ROOT}" \
153
190
  ACP_PROVIDER_QUOTA_COOLDOWNS="${COOLDOWNS}" \
@@ -162,5 +199,6 @@ out="$(
162
199
 
163
200
  printf 'BACKEND=%s\n' "${backend}"
164
201
  printf 'MODEL=%s\n' "${model}"
202
+ printf 'LABEL=%s\n' "${label}"
165
203
  printf 'PROVIDER_KEY=%s\n' "${provider_key}"
166
204
  printf '%s\n' "${out}"
@@ -377,6 +377,30 @@ const recentPrs = recentNumbers.map((number) => {
377
377
  const activePrs = recentPrs.filter((pr) => pr.state === 'open' || pr.state === 'draft');
378
378
  const completedPrs = recentPrs.filter((pr) => pr.state !== 'open' && pr.state !== 'draft');
379
379
 
380
+ const recentCycleNotes = [];
381
+ for (const comment of [...(issue.comments || [])].reverse()) {
382
+ const body = String(comment?.body || '').trim();
383
+ if (!body) {
384
+ continue;
385
+ }
386
+ if (!/^(Completed|Blocked on|# Blocker:|Host-side publish blocked|Host-side publish failed)/im.test(body)) {
387
+ continue;
388
+ }
389
+ const summaryLines = body
390
+ .split(/\r?\n/)
391
+ .map((line) => line.trim())
392
+ .filter(Boolean)
393
+ .slice(0, 6);
394
+ if (summaryLines.length === 0) {
395
+ continue;
396
+ }
397
+ const summary = summaryLines.join(' | ');
398
+ recentCycleNotes.push(summary.length > 420 ? `${summary.slice(0, 417)}...` : summary);
399
+ if (recentCycleNotes.length >= 3) {
400
+ break;
401
+ }
402
+ }
403
+
380
404
  const formatPr = (pr) => {
381
405
  const suffix = pr.url ? ` ${pr.url}` : '';
382
406
  return `- #${pr.number} (${pr.state}): ${pr.title}${suffix}`;
@@ -389,6 +413,7 @@ const lines = [
389
413
  '- Before editing, choose exactly one concrete target module, screen, or flow and keep the cycle limited to that target.',
390
414
  '- Do not work on a target already covered by an open or draft PR for this issue, or by the most recent completed cycles listed below, unless you are explicitly fixing a regression introduced there.',
391
415
  '- If you cannot identify a small non-overlapping target after reviewing recent cycle history, stop blocked using the blocker contract instead of forcing another PR.',
416
+ '- Prefer the recent cycle notes below over repeating broad web research; only fetch outside context when the local baseline or linked advisories materially changed.',
392
417
  '- In your final worker output, start with `Target:` and `Why now:` lines before the changed-files list.',
393
418
  ];
394
419
 
@@ -406,6 +431,13 @@ if (completedPrs.length > 0) {
406
431
  }
407
432
  }
408
433
 
434
+ if (recentCycleNotes.length > 0) {
435
+ lines.push('', '### Recent cycle notes from issue comments');
436
+ for (const note of recentCycleNotes) {
437
+ lines.push(`- ${note}`);
438
+ }
439
+ }
440
+
409
441
  process.stdout.write(`${lines.join('\n')}\n`);
410
442
  EOF
411
443
  ISSUE_RECURRING_CONTEXT="$(cat "$ISSUE_RECURRING_CONTEXT_FILE")"
@@ -138,6 +138,33 @@ function renderAlerts(alerts) {
138
138
  `;
139
139
  }
140
140
 
141
+ function renderCodexRotation(rotation) {
142
+ if (!rotation || !rotation.active_label) {
143
+ return `<div class="empty-state">Codex rotation data is not available yet for this Codex profile.</div>`;
144
+ }
145
+ const candidates = (rotation.candidate_labels || []).length ? rotation.candidate_labels.join(", ") : "n/a";
146
+ const ready = (rotation.ready_candidates || []).length ? rotation.ready_candidates.join(", ") : "none";
147
+ const nextRetry = rotation.next_retry_at
148
+ ? `${rotation.next_retry_label || "n/a"} · ${relativeTime(rotation.next_retry_at)}<div class="muted">${rotation.next_retry_at}</div>`
149
+ : "n/a";
150
+ const lastSwitch = rotation.last_switch_label
151
+ ? `${rotation.last_switch_label}${rotation.last_switch_reason ? ` · ${rotation.last_switch_reason}` : ""}`
152
+ : "n/a";
153
+
154
+ return renderTable(
155
+ [
156
+ { label: "Current", render: () => `<div class="mono">${rotation.active_label}</div>` },
157
+ { label: "Decision", render: () => `<span class="status-pill ${statusClass(rotation.switch_decision || "unknown")}">${rotation.switch_decision || "unknown"}</span>` },
158
+ { label: "Candidates", render: () => `<div class="mono">${candidates}</div>` },
159
+ { label: "Ready now", render: () => `<div class="mono">${ready}</div>` },
160
+ { label: "Next retry", render: () => nextRetry },
161
+ { label: "Last switch", render: () => `<div class="mono">${lastSwitch}</div>` },
162
+ ],
163
+ [{}],
164
+ "No Codex rotation data for this profile.",
165
+ );
166
+ }
167
+
141
168
  function renderProfile(profile) {
142
169
  const providerBadges = [
143
170
  profile.coding_worker ? `<span class="badge good">${profile.coding_worker}</span>` : "",
@@ -153,6 +180,7 @@ function renderProfile(profile) {
153
180
  const summaryCards = [
154
181
  ["Run sessions", profile.counts.active_runs],
155
182
  ["Running", profile.counts.running_runs],
183
+ ["Recent completed", profile.counts.recent_history_runs || 0],
156
184
  ["Implemented", profile.counts.implemented_runs],
157
185
  ["Reported", profile.counts.reported_runs],
158
186
  ["Blocked", profile.counts.blocked_runs],
@@ -188,6 +216,19 @@ function renderProfile(profile) {
188
216
  "No active run directories for this profile.",
189
217
  );
190
218
 
219
+ const recentHistoryTable = renderTable(
220
+ [
221
+ { label: "Session", render: (row) => `<div class="mono">${row.session}</div>` },
222
+ { label: "Task", render: (row) => `${row.task_kind || "n/a"} ${row.task_id || ""}`.trim() },
223
+ { label: "Lifecycle", render: renderLifecycle },
224
+ { label: "Worker", key: "coding_worker" },
225
+ { label: "Result", render: renderResult },
226
+ { label: "Updated", render: (row) => row.updated_at ? `${relativeTime(row.updated_at)}<div class="muted">${row.updated_at}</div>` : "n/a" },
227
+ ],
228
+ profile.recent_history || [],
229
+ "No recently archived runs.",
230
+ );
231
+
191
232
  const controllerTable = renderTable(
192
233
  [
193
234
  { label: "Issue", key: "issue_id" },
@@ -214,6 +255,18 @@ function renderProfile(profile) {
214
255
  "No issue retries recorded.",
215
256
  );
216
257
 
258
+ const prRetryTable = renderTable(
259
+ [
260
+ { label: "PR", key: "pr_number" },
261
+ { label: "Status", render: (row) => `<span class="status-pill ${row.ready ? "" : "waiting-provider"}">${row.ready ? "ready" : "retrying"}</span>` },
262
+ { label: "Reason", render: (row) => row.last_reason || "n/a" },
263
+ { label: "Attempts", key: "attempts" },
264
+ { label: "Next attempt", render: (row) => row.next_attempt_at ? `${relativeTime(row.next_attempt_at)}<div class="muted">${row.next_attempt_at}</div>` : "n/a" },
265
+ ],
266
+ profile.pr_retries || [],
267
+ "No PR retries recorded.",
268
+ );
269
+
217
270
  const workerTable = renderTable(
218
271
  [
219
272
  { label: "Key", render: (row) => `<div class="mono">${row.key}</div>` },
@@ -261,6 +314,17 @@ function renderProfile(profile) {
261
314
  "No pending leased issues.",
262
315
  );
263
316
 
317
+ const codexRotationPanel =
318
+ profile.coding_worker === "codex"
319
+ ? `
320
+ <section class="panel">
321
+ <h3>Codex Rotation</h3>
322
+ <p class="panel-subtitle">Shows the active Codex label, candidate labels, and whether failover is ready or deferred.</p>
323
+ ${renderCodexRotation(profile.codex_rotation)}
324
+ </section>
325
+ `
326
+ : "";
327
+
264
328
  return `
265
329
  <article class="profile">
266
330
  <header class="profile-header">
@@ -285,15 +349,25 @@ function renderProfile(profile) {
285
349
  <p class="panel-subtitle">Lifecycle shows technical session completion. Result shows what the run achieved: implemented, reported, or blocked.</p>
286
350
  ${runsTable}
287
351
  </section>
352
+ <section class="panel">
353
+ <h3>Recent Completed Runs</h3>
354
+ <p class="panel-subtitle">Recently archived runs so they do not disappear from the dashboard immediately after completion.</p>
355
+ ${recentHistoryTable}
356
+ </section>
288
357
  <section class="panel">
289
358
  <h3>Resident Controllers</h3>
290
359
  <p class="panel-subtitle">Includes provider wait and failover telemetry. Stale controllers show a warning.</p>
291
360
  ${controllerTable}
292
361
  </section>
362
+ ${codexRotationPanel}
293
363
  <section class="panel half">
294
364
  <h3>Issue Retries</h3>
295
365
  ${retryTable}
296
366
  </section>
367
+ <section class="panel half">
368
+ <h3>PR Retries</h3>
369
+ ${prRetryTable}
370
+ </section>
297
371
  <section class="panel">
298
372
  <h3>Resident Worker Metadata</h3>
299
373
  ${workerTable}
@@ -143,6 +143,15 @@ def file_mtime_iso(path: Path) -> str:
143
143
  return datetime.fromtimestamp(path.stat().st_mtime, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
144
144
 
145
145
 
146
+ def read_json_file(path: Path) -> dict[str, Any]:
147
+ if not path.is_file():
148
+ return {}
149
+ try:
150
+ return json.loads(path.read_text(encoding="utf-8", errors="replace"))
151
+ except Exception:
152
+ return {}
153
+
154
+
146
155
  def read_tail_text(path: Path, max_bytes: int = 65536) -> str:
147
156
  if not path.is_file():
148
157
  return ""
@@ -293,6 +302,64 @@ def collect_runs(runs_root: Path) -> list[dict[str, Any]]:
293
302
  return runs
294
303
 
295
304
 
305
+ def collect_recent_history(history_root: Path, limit: int = 8) -> list[dict[str, Any]]:
306
+ if not history_root.is_dir():
307
+ return []
308
+
309
+ items: list[dict[str, Any]] = []
310
+ seen_sessions: set[str] = set()
311
+ for run_dir in sorted(
312
+ [entry for entry in history_root.iterdir() if entry.is_dir()],
313
+ key=lambda item: item.stat().st_mtime,
314
+ reverse=True,
315
+ ):
316
+ run_env = read_env_file(run_dir / "run.env")
317
+ runner_env = read_env_file(run_dir / "runner.env")
318
+ result_env = read_env_file(run_dir / "result.env")
319
+ session = run_env.get("SESSION", "")
320
+ if not session:
321
+ name = run_dir.name
322
+ parts = name.split("-")
323
+ session = "-".join(parts[:-2]) if len(parts) > 2 else name
324
+ if session in seen_sessions:
325
+ continue
326
+ lifecycle_status = (runner_env.get("RUNNER_STATE", "") or "").strip().upper()
327
+ if lifecycle_status == "SUCCEEDED":
328
+ lifecycle_status = "SUCCEEDED"
329
+ elif lifecycle_status == "FAILED":
330
+ lifecycle_status = "FAILED"
331
+ elif lifecycle_status:
332
+ lifecycle_status = lifecycle_status.upper()
333
+ else:
334
+ lifecycle_status = "UNKNOWN"
335
+ outcome = result_env.get("OUTCOME", "")
336
+ failure_reason = runner_env.get("LAST_FAILURE_REASON", "")
337
+ result_kind, result_label = classify_run_result(lifecycle_status, outcome, failure_reason)
338
+ item = {
339
+ "session": session,
340
+ "task_kind": run_env.get("TASK_KIND", ""),
341
+ "task_id": run_env.get("TASK_ID", ""),
342
+ "status": lifecycle_status,
343
+ "lifecycle_status": lifecycle_status,
344
+ "updated_at": result_env.get("UPDATED_AT", "") or runner_env.get("UPDATED_AT", "") or file_mtime_iso(run_dir),
345
+ "coding_worker": run_env.get("CODING_WORKER", ""),
346
+ "failure_reason": failure_reason,
347
+ "outcome": outcome,
348
+ "action": result_env.get("ACTION", ""),
349
+ "result_kind": result_kind,
350
+ "result_label": result_label,
351
+ "run_dir": str(run_dir),
352
+ "archived": True,
353
+ }
354
+ alert = extract_github_rate_limit_alert(run_dir, item)
355
+ item["alerts"] = [alert] if alert else []
356
+ items.append(item)
357
+ seen_sessions.add(session)
358
+ if len(items) >= limit:
359
+ break
360
+ return items
361
+
362
+
296
363
  def controller_is_stale(env: dict[str, str], controller_path: Path) -> bool:
297
364
  """A controller is stale if it claims to be running but its PID is dead or its
298
365
  UPDATED_AT file mtime is older than 10 minutes."""
@@ -408,6 +475,94 @@ def collect_provider_cooldowns(state_root: Path) -> list[dict[str, Any]]:
408
475
  return items
409
476
 
410
477
 
478
+ def collect_codex_rotation(profile: dict[str, str]) -> dict[str, Any]:
479
+ coding_worker = profile.get("EFFECTIVE_CODING_WORKER", "")
480
+ if coding_worker != "codex":
481
+ return {}
482
+
483
+ cache_root = Path(os.environ.get("XDG_CACHE_HOME", str(Path.home() / ".cache"))) / "codex-quota-manager"
484
+ state_file = cache_root / "rotation-state.json"
485
+ switch_file = cache_root / "last-switch.env"
486
+ state_json = read_json_file(state_file)
487
+ state_accounts = state_json.get("accounts", {}) if isinstance(state_json, dict) else {}
488
+ now_epoch = int(datetime.now(timezone.utc).timestamp())
489
+
490
+ active_label = ""
491
+ candidate_labels: list[str] = []
492
+ list_json: dict[str, Any] = {}
493
+ quota_bin_override = os.environ.get("CODEX_QUOTA_BIN", "").strip()
494
+ quota_bin = Path(quota_bin_override) if quota_bin_override else TOOLS_BIN_DIR / "codex-quota"
495
+ if quota_bin.is_file():
496
+ try:
497
+ raw = subprocess.check_output(
498
+ [str(quota_bin), "codex", "list", "--json"],
499
+ cwd=str(ROOT_DIR),
500
+ env=os.environ.copy(),
501
+ text=True,
502
+ stderr=subprocess.DEVNULL,
503
+ timeout=20,
504
+ )
505
+ list_json = json.loads(raw)
506
+ except Exception:
507
+ list_json = {}
508
+
509
+ if isinstance(list_json, dict):
510
+ active_info = list_json.get("activeInfo", {}) or {}
511
+ active_label = str(active_info.get("trackedLabel") or active_info.get("activeLabel") or "")
512
+ seen: set[str] = set()
513
+ for account in list_json.get("accounts", []) or []:
514
+ label = str(account.get("label") or "").strip()
515
+ if not label or label == active_label or label in seen:
516
+ continue
517
+ candidate_labels.append(label)
518
+ seen.add(label)
519
+
520
+ next_retry_label = ""
521
+ next_retry_epoch = 0
522
+ for label in candidate_labels:
523
+ entry = state_accounts.get(label, {}) if isinstance(state_accounts, dict) else {}
524
+ retry_epoch = safe_int(str(entry.get("next_retry_at", "")))
525
+ removed = bool(entry.get("removed", False))
526
+ if removed or not retry_epoch or retry_epoch <= now_epoch:
527
+ continue
528
+ if next_retry_epoch == 0 or retry_epoch < next_retry_epoch:
529
+ next_retry_epoch = retry_epoch
530
+ next_retry_label = label
531
+
532
+ ready_candidates = []
533
+ for label in candidate_labels:
534
+ entry = state_accounts.get(label, {}) if isinstance(state_accounts, dict) else {}
535
+ retry_epoch = safe_int(str(entry.get("next_retry_at", ""))) or 0
536
+ removed = bool(entry.get("removed", False))
537
+ if not removed and retry_epoch <= now_epoch:
538
+ ready_candidates.append(label)
539
+
540
+ last_switch = read_env_file(switch_file)
541
+ switch_decision = "unknown"
542
+ if ready_candidates:
543
+ switch_decision = "ready-candidate"
544
+ elif next_retry_label:
545
+ switch_decision = "deferred"
546
+ elif last_switch.get("LAST_SWITCH_LABEL"):
547
+ switch_decision = "switched"
548
+ elif candidate_labels:
549
+ switch_decision = "failed"
550
+
551
+ return {
552
+ "active_label": active_label,
553
+ "candidate_labels": candidate_labels,
554
+ "ready_candidates": ready_candidates,
555
+ "next_retry_label": next_retry_label,
556
+ "next_retry_epoch": next_retry_epoch,
557
+ "next_retry_at": datetime.fromtimestamp(next_retry_epoch, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") if next_retry_epoch else "",
558
+ "switch_decision": switch_decision,
559
+ "last_switch_label": last_switch.get("LAST_SWITCH_LABEL", ""),
560
+ "last_switch_reason": last_switch.get("LAST_SWITCH_REASON", ""),
561
+ "last_switch_epoch": safe_int(last_switch.get("LAST_SWITCH_EPOCH")),
562
+ "state_file": str(state_file),
563
+ }
564
+
565
+
411
566
  def collect_scheduled_issues(state_root: Path) -> list[dict[str, Any]]:
412
567
  scheduled_root = state_root / "scheduled-issues"
413
568
  if not scheduled_root.is_dir():
@@ -455,6 +610,43 @@ def collect_issue_retries(state_root: Path) -> list[dict[str, Any]]:
455
610
  return items
456
611
 
457
612
 
613
+ def collect_pr_retries(state_root: Path) -> list[dict[str, Any]]:
614
+ retries_root = state_root / "retries" / "prs"
615
+ if not retries_root.is_dir():
616
+ return []
617
+
618
+ now_epoch = int(datetime.now(timezone.utc).timestamp())
619
+ items: list[dict[str, Any]] = []
620
+ for path in sorted(retries_root.glob("*.env"), key=lambda item: item.stat().st_mtime, reverse=True):
621
+ env = read_env_file(path)
622
+ next_attempt_epoch = safe_int(env.get("NEXT_ATTEMPT_EPOCH"))
623
+ items.append(
624
+ {
625
+ "pr_number": path.stem,
626
+ "attempts": safe_int(env.get("ATTEMPTS")) or 0,
627
+ "next_attempt_epoch": next_attempt_epoch,
628
+ "next_attempt_at": env.get("NEXT_ATTEMPT_AT", ""),
629
+ "last_reason": env.get("LAST_REASON", ""),
630
+ "updated_at": env.get("UPDATED_AT", "") or file_mtime_iso(path),
631
+ "ready": not bool(next_attempt_epoch and next_attempt_epoch > now_epoch),
632
+ "state_file": str(path),
633
+ }
634
+ )
635
+ return items
636
+
637
+
638
+ def resolve_history_root(render_env: dict[str, str], yaml_env: dict[str, str], runs_root: Path) -> Path:
639
+ configured = (
640
+ render_env.get("EFFECTIVE_HISTORY_ROOT", "").strip()
641
+ or yaml_env.get("runtime.history_root", "").strip()
642
+ )
643
+ if configured and configured != ".":
644
+ return Path(configured)
645
+ if runs_root.name == "runs":
646
+ return runs_root.parent / "history"
647
+ return Path(".")
648
+
649
+
458
650
  def collect_issue_queue(state_root: Path) -> dict[str, list[dict[str, Any]]]:
459
651
  queue_root = state_root / "resident-workers" / "issue-queue"
460
652
  pending_root = queue_root / "pending"
@@ -491,14 +683,18 @@ def build_profile_snapshot(profile_id: str, registry_root: Path) -> dict[str, An
491
683
 
492
684
  runs_root = Path(render_env.get("EFFECTIVE_RUNS_ROOT", ""))
493
685
  state_root = Path(render_env.get("EFFECTIVE_STATE_ROOT", ""))
686
+ history_root = resolve_history_root(render_env, yaml_env, runs_root)
494
687
  runs = collect_runs(runs_root)
688
+ recent_history = collect_recent_history(history_root)
495
689
  controllers = collect_resident_controllers(state_root)
496
690
  resident_workers = collect_resident_workers(state_root)
497
691
  cooldowns = collect_provider_cooldowns(state_root)
498
692
  scheduled = collect_scheduled_issues(state_root)
499
693
  retries = collect_issue_retries(state_root)
694
+ pr_retries = collect_pr_retries(state_root)
500
695
  queue = collect_issue_queue(state_root)
501
- alerts = [alert for run in runs for alert in run.get("alerts", [])]
696
+ alerts = [alert for run in (runs + recent_history) for alert in run.get("alerts", [])]
697
+ codex_rotation = collect_codex_rotation(render_env)
502
698
 
503
699
  return {
504
700
  "id": profile_id,
@@ -506,6 +702,7 @@ def build_profile_snapshot(profile_id: str, registry_root: Path) -> dict[str, An
506
702
  "repo_root": render_env.get("EFFECTIVE_REPO_ROOT", ""),
507
703
  "runs_root": str(runs_root),
508
704
  "state_root": str(state_root),
705
+ "history_root": str(history_root),
509
706
  "issue_prefix": yaml_env.get("session_naming.issue_prefix", ""),
510
707
  "pr_prefix": yaml_env.get("session_naming.pr_prefix", ""),
511
708
  "coding_worker": render_env.get("EFFECTIVE_CODING_WORKER", ""),
@@ -520,6 +717,7 @@ def build_profile_snapshot(profile_id: str, registry_root: Path) -> dict[str, An
520
717
  "last_reason": render_env.get("EFFECTIVE_PROVIDER_POOL_LAST_REASON", ""),
521
718
  "pools_exhausted": render_env.get("EFFECTIVE_PROVIDER_POOLS_EXHAUSTED", ""),
522
719
  },
720
+ "codex_rotation": codex_rotation,
523
721
  "counts": {
524
722
  "active_runs": len(runs),
525
723
  "running_runs": sum(1 for item in runs if item["status"] == "RUNNING"),
@@ -531,6 +729,7 @@ def build_profile_snapshot(profile_id: str, registry_root: Path) -> dict[str, An
531
729
  "completed_runs": sum(
532
730
  1 for item in runs if item["status"] == "SUCCEEDED" and item["result_kind"] not in {"implemented", "reported", "blocked"}
533
731
  ),
732
+ "recent_history_runs": len(recent_history),
534
733
  "resident_controllers": len(controllers),
535
734
  "live_resident_controllers": sum(1 for item in controllers if item["state"] != "stopped" and item["controller_live"]),
536
735
  "stale_resident_controllers": sum(1 for item in controllers if item.get("controller_stale", False)),
@@ -543,12 +742,14 @@ def build_profile_snapshot(profile_id: str, registry_root: Path) -> dict[str, An
543
742
  "alerts": len(alerts),
544
743
  },
545
744
  "runs": runs,
745
+ "recent_history": recent_history,
546
746
  "alerts": alerts,
547
747
  "resident_controllers": controllers,
548
748
  "resident_workers": resident_workers,
549
749
  "provider_cooldowns": cooldowns,
550
750
  "scheduled_issues": scheduled,
551
751
  "issue_retries": retries,
752
+ "pr_retries": pr_retries,
552
753
  "issue_queue": queue,
553
754
  }
554
755
 
@@ -410,12 +410,6 @@ for label in "${CANDIDATE_LABELS[@]}"; do
410
410
  continue
411
411
  fi
412
412
 
413
- retry_at="$(state_next_retry_at "$label")"
414
- if [[ "$retry_at" =~ ^[0-9]+$ ]] && (( retry_at > now_epoch )); then
415
- note_candidate_retry "$label" "$retry_at"
416
- continue
417
- fi
418
-
419
413
  quota_output="$(load_account_quota_json "$label" 2>&1 || true)"
420
414
  if ! jq -e 'type == "array" and length > 0' >/dev/null 2>&1 <<<"$quota_output"; then
421
415
  if is_auth_401_output "$quota_output"; then
@@ -436,6 +430,14 @@ for label in "${CANDIDATE_LABELS[@]}"; do
436
430
  continue
437
431
  fi
438
432
 
433
+ retry_at="$(state_next_retry_at "$label")"
434
+ if [[ "$retry_at" =~ ^[0-9]+$ ]] && (( retry_at > now_epoch )) && account_is_eligible "$label" "$quota_output"; then
435
+ state_mark_ready "$label" "quota-revalidated" "$now_epoch"
436
+ elif [[ "$retry_at" =~ ^[0-9]+$ ]] && (( retry_at > now_epoch )); then
437
+ note_candidate_retry "$label" "$retry_at"
438
+ continue
439
+ fi
440
+
439
441
  if ! account_is_eligible "$label" "$quota_output"; then
440
442
  retry_at="$(account_retry_epoch "$label" "$quota_output")"
441
443
  if [[ "$retry_at" =~ ^[0-9]+$ ]] && (( retry_at > now_epoch )); then
@@ -1,16 +0,0 @@
1
- #!/usr/bin/env python3
2
- from __future__ import annotations
3
-
4
- import pathlib
5
- import sys
6
-
7
-
8
- ROOT = pathlib.Path(__file__).resolve().parents[1] / "dashboard"
9
- if str(ROOT) not in sys.path:
10
- sys.path.insert(0, str(ROOT))
11
-
12
- from dashboard_snapshot import main
13
-
14
-
15
- if __name__ == "__main__":
16
- raise SystemExit(main())
@@ -1,109 +0,0 @@
1
- # Task
2
-
3
- Implement issue #{ISSUE_ID} in `{REPO_SLUG}`.
4
-
5
- # Issue Context
6
-
7
- - Title: {ISSUE_TITLE}
8
- - URL: {ISSUE_URL}
9
- - Auto-merge requested: {ISSUE_AUTOMERGE}
10
-
11
- {ISSUE_BODY}
12
- {ISSUE_RECURRING_CONTEXT}
13
- {ISSUE_BLOCKER_CONTEXT}
14
-
15
- # MANDATORY WORKFLOW (follow in order, no skipping)
16
-
17
- You MUST complete ALL 5 phases in order. Do not skip any phase. Do not commit until Phase 4 passes.
18
-
19
- ## Phase 1: READ & SCOPE
20
-
21
- - Read the repo instructions: `AGENTS.md`, relevant spec or design docs, and any repo conventions tied to this issue.
22
- - Identify the single primary product surface you will touch.
23
- - If the issue spans multiple surfaces, pick ONE and create follow-up issues for the rest using:
24
- ```bash
25
- bash "$ACP_FLOW_TOOLS_DIR/create-follow-up-issue.sh" --parent {ISSUE_ID} --title "..." --body-file /tmp/follow-up.md
26
- ```
27
- - Treat a broad umbrella issue as a coordination brief rather than permission to ship every slice in one PR.
28
- - Write down your scope decision before coding.
29
-
30
- ## Phase 2: IMPLEMENT
31
-
32
- - Make the smallest root-cause fix that satisfies the issue.
33
- - Work only inside the dedicated worktree.
34
- - Add or update tests when feasible.
35
- - STOP after implementation. Do not commit yet.
36
-
37
- ## Phase 3: VERIFY (MANDATORY)
38
-
39
- After implementing, you MUST run verification commands and record each one.
40
- Every successful command must be recorded or the host publish will fail.
41
- After each successful verification command, record it with `record-verification.sh`.
42
-
43
- ```bash
44
- {ISSUE_VERIFICATION_COMMAND_SNIPPET}
45
- ```
46
-
47
- Required verification coverage:
48
- - Run the narrowest repo-supported `typecheck`, `build`, `test`, or `lint` command that proves the touched surface is safe.
49
- - If you changed tests only, run the most relevant targeted test command and record it.
50
- - If you changed localization resources or user-facing copy, run repo locale validation or hardcoded-copy scans if the repo provides them.
51
- - If a verification command fails, fix the issue and rerun until it passes.
52
-
53
- CRITICAL: `verification.jsonl` must exist in `$ACP_RUN_DIR` with at least one `pass` entry before you can write `OUTCOME=implemented`.
54
-
55
- ## Phase 4: SELF-REVIEW (MANDATORY)
56
-
57
- Before committing, perform this checklist:
58
-
59
- - [ ] Run `git diff --check`.
60
- - [ ] Count non-test product files: if the change is broad, stop and split scope instead of publishing one large PR.
61
- - [ ] If you touched auth, login, session, or reset flows, verify existing users and legacy data still behave correctly.
62
- - [ ] If you touched public endpoints, public routes, or operator workflows, search downstream consumers in `scripts/`, `docs/`, and specs.
63
- - [ ] If you changed localization resources or user-facing copy, confirm localization coverage and scanning are still valid.
64
- - [ ] If you touched mobile routes or screens, keep route scope narrow and verify loading, empty, and error states.
65
-
66
- Before committing, verify the journal exists:
67
- ```bash
68
- test -s "$ACP_RUN_DIR/verification.jsonl" && echo "OK: verification.jsonl exists" || echo "BLOCKED: missing verification.jsonl"
69
- ```
70
-
71
- ## Phase 5: COMMIT & REPORT
72
-
73
- - Commit with a conventional commit message.
74
- - Do NOT push or open a PR; the host handles that.
75
- - Write `$ACP_RESULT_FILE`:
76
- ```bash
77
- cat > "$ACP_RESULT_FILE" <<'OUTER_EOF'
78
- OUTCOME=implemented
79
- ACTION=host-publish-issue-pr
80
- ISSUE_ID={ISSUE_ID}
81
- OUTER_EOF
82
- ```
83
- - In your final output, include the changed files, verification commands actually run, and one short self-review note naming the main regression risk you checked.
84
-
85
- # STOP CONDITIONS
86
-
87
- Stop and report blocked if:
88
- - The issue is ambiguous, blocked by missing credentials, or expands into high-risk scope.
89
- - You cannot complete verification successfully.
90
- - The issue needs full decomposition into focused follow-up issues.
91
-
92
- If stopped blocked, write `$ACP_RUN_DIR/issue-comment.md` with a blocker summary, then:
93
- ```bash
94
- cat > "$ACP_RESULT_FILE" <<'OUTER_EOF'
95
- OUTCOME=blocked
96
- ACTION=host-comment-blocker
97
- ISSUE_ID={ISSUE_ID}
98
- OUTER_EOF
99
- ```
100
-
101
- If fully decomposed into follow-up issues, start the first line of `issue-comment.md` with exactly:
102
- `Superseded by focused follow-up issues: #...`
103
-
104
- # Git Rules
105
-
106
- - Do NOT push the branch from inside the worker.
107
- - Do NOT open a PR from inside the worker.
108
- - Do NOT comment on the source issue with a PR URL from inside the worker.
109
- - Exit successfully after writing the result file.