agent-control-plane 0.1.9 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/hooks/heartbeat-hooks.sh +97 -8
- package/package.json +8 -2
- package/references/commands.md +1 -0
- package/tools/bin/agent-project-cleanup-session +133 -0
- package/tools/bin/agent-project-publish-issue-pr +178 -62
- package/tools/bin/agent-project-reconcile-issue-session +171 -3
- package/tools/bin/agent-project-run-codex-resilient +121 -16
- package/tools/bin/agent-project-run-codex-session +60 -10
- package/tools/bin/agent-project-run-openclaw-session +82 -8
- package/tools/bin/cleanup-worktree.sh +4 -1
- package/tools/bin/dashboard-launchd-bootstrap.sh +16 -4
- package/tools/bin/ensure-runtime-sync.sh +182 -0
- package/tools/bin/flow-config-lib.sh +76 -30
- package/tools/bin/flow-resident-worker-lib.sh +28 -2
- package/tools/bin/flow-shell-lib.sh +15 -1
- package/tools/bin/heartbeat-safe-auto.sh +32 -0
- package/tools/bin/issue-publish-localization-guard.sh +142 -0
- package/tools/bin/project-launchd-bootstrap.sh +17 -4
- package/tools/bin/project-runtime-supervisor.sh +7 -1
- package/tools/bin/project-runtimectl.sh +78 -15
- package/tools/bin/reuse-issue-worktree.sh +46 -0
- package/tools/bin/start-issue-worker.sh +76 -6
- package/tools/bin/start-resident-issue-loop.sh +1 -0
- package/tools/bin/sync-shared-agent-home.sh +26 -0
- package/tools/bin/test-smoke.sh +6 -1
- package/tools/dashboard/app.js +71 -1
- package/tools/dashboard/dashboard_snapshot.py +74 -0
- package/tools/dashboard/styles.css +43 -0
- package/tools/templates/issue-prompt-template.md +18 -66
- package/tools/templates/legacy/issue-prompt-template-pre-slim.md +109 -0
- package/bin/audit-issue-routing.sh +0 -74
- package/tools/bin/audit-agent-worktrees.sh +0 -310
- package/tools/bin/audit-issue-routing.sh +0 -11
- package/tools/bin/audit-retained-layout.sh +0 -58
- package/tools/bin/audit-retained-overlap.sh +0 -135
- package/tools/bin/audit-retained-worktrees.sh +0 -228
- package/tools/bin/check-skill-contracts.sh +0 -324
|
@@ -18,6 +18,7 @@ Options:
|
|
|
18
18
|
--max-resume-attempts <count> Maximum resume attempts after interruption
|
|
19
19
|
--auth-refresh-timeout-seconds <secs> How long to wait for refreshed auth before failing
|
|
20
20
|
--auth-refresh-poll-seconds <secs> Poll interval while waiting for refreshed auth
|
|
21
|
+
--stall-seconds <secs> Fail if Codex stops producing output for too long
|
|
21
22
|
--help Show this help
|
|
22
23
|
EOF
|
|
23
24
|
}
|
|
@@ -35,6 +36,8 @@ max_resume_attempts="${ACP_CODEX_MAX_RESUME_ATTEMPTS:-${F_LOSNING_CODEX_MAX_RESU
|
|
|
35
36
|
auth_refresh_timeout_seconds="${ACP_CODEX_AUTH_REFRESH_TIMEOUT_SECONDS:-${F_LOSNING_CODEX_AUTH_REFRESH_TIMEOUT_SECONDS:-900}}"
|
|
36
37
|
auth_refresh_poll_seconds="${ACP_CODEX_AUTH_REFRESH_POLL_SECONDS:-${F_LOSNING_CODEX_AUTH_REFRESH_POLL_SECONDS:-10}}"
|
|
37
38
|
max_quota_autoswitch_attempts="${ACP_CODEX_MAX_AUTOSWITCH_ATTEMPTS:-${F_LOSNING_CODEX_MAX_AUTOSWITCH_ATTEMPTS:-1}}"
|
|
39
|
+
codex_progress_heartbeat_seconds="${ACP_CODEX_PROGRESS_HEARTBEAT_SECONDS:-${F_LOSNING_CODEX_PROGRESS_HEARTBEAT_SECONDS:-30}}"
|
|
40
|
+
codex_stall_seconds="${ACP_CODEX_STALL_SECONDS:-${F_LOSNING_CODEX_STALL_SECONDS:-300}}"
|
|
38
41
|
|
|
39
42
|
while [[ $# -gt 0 ]]; do
|
|
40
43
|
case "$1" in
|
|
@@ -50,6 +53,7 @@ while [[ $# -gt 0 ]]; do
|
|
|
50
53
|
--max-resume-attempts) max_resume_attempts="${2:-}"; shift 2 ;;
|
|
51
54
|
--auth-refresh-timeout-seconds) auth_refresh_timeout_seconds="${2:-}"; shift 2 ;;
|
|
52
55
|
--auth-refresh-poll-seconds) auth_refresh_poll_seconds="${2:-}"; shift 2 ;;
|
|
56
|
+
--stall-seconds) codex_stall_seconds="${2:-}"; shift 2 ;;
|
|
53
57
|
--help|-h) usage; exit 0 ;;
|
|
54
58
|
*) echo "Unknown argument: $1" >&2; usage >&2; exit 1 ;;
|
|
55
59
|
esac
|
|
@@ -80,6 +84,13 @@ esac
|
|
|
80
84
|
case "$max_quota_autoswitch_attempts" in
|
|
81
85
|
''|*[!0-9]*) echo "ACP_CODEX_MAX_AUTOSWITCH_ATTEMPTS must be numeric" >&2; exit 1 ;;
|
|
82
86
|
esac
|
|
87
|
+
case "$codex_progress_heartbeat_seconds" in
|
|
88
|
+
''|*[!0-9]*) echo "ACP_CODEX_PROGRESS_HEARTBEAT_SECONDS must be numeric" >&2; exit 1 ;;
|
|
89
|
+
0) echo "ACP_CODEX_PROGRESS_HEARTBEAT_SECONDS must be greater than zero" >&2; exit 1 ;;
|
|
90
|
+
esac
|
|
91
|
+
case "$codex_stall_seconds" in
|
|
92
|
+
''|*[!0-9]*) echo "ACP_CODEX_STALL_SECONDS must be numeric" >&2; exit 1 ;;
|
|
93
|
+
esac
|
|
83
94
|
|
|
84
95
|
FLOW_SKILL_DIR="$(resolve_flow_skill_dir "${BASH_SOURCE[0]}")"
|
|
85
96
|
state_file="${host_run_dir}/runner.env"
|
|
@@ -116,6 +127,7 @@ last_quota_switch_status=""
|
|
|
116
127
|
last_quota_next_retry_at=""
|
|
117
128
|
last_quota_selected_label=""
|
|
118
129
|
quota_autoswitch_attempt_count=0
|
|
130
|
+
last_attempt_started_epoch=0
|
|
119
131
|
|
|
120
132
|
mkdir -p "$host_run_dir"
|
|
121
133
|
touch "$output_file"
|
|
@@ -148,6 +160,19 @@ write_state() {
|
|
|
148
160
|
mv "$tmp_file" "$state_file"
|
|
149
161
|
}
|
|
150
162
|
|
|
163
|
+
run_codex_command() {
|
|
164
|
+
# Nested workers must not inherit a parent thread id; the wrapper persists the child thread explicitly.
|
|
165
|
+
env -u CODEX_THREAD_ID "$codex_bin" "$@"
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
codex_recovery_target() {
|
|
169
|
+
if [[ -n "$thread_id" ]]; then
|
|
170
|
+
printf 'thread %s' "$thread_id"
|
|
171
|
+
return 0
|
|
172
|
+
fi
|
|
173
|
+
printf 'initial Codex exec'
|
|
174
|
+
}
|
|
175
|
+
|
|
151
176
|
run_with_timeout() {
|
|
152
177
|
local timeout_seconds="${1:?timeout seconds required}"
|
|
153
178
|
shift
|
|
@@ -398,13 +423,43 @@ persist_thread_id_from_line() {
|
|
|
398
423
|
fi
|
|
399
424
|
}
|
|
400
425
|
|
|
426
|
+
terminate_codex_producer_tree() {
|
|
427
|
+
local pid="${1:?pid required}"
|
|
428
|
+
local deadline=""
|
|
429
|
+
|
|
430
|
+
if ! kill -0 "$pid" 2>/dev/null; then
|
|
431
|
+
return 0
|
|
432
|
+
fi
|
|
433
|
+
|
|
434
|
+
pkill -TERM -P "$pid" 2>/dev/null || true
|
|
435
|
+
kill "$pid" 2>/dev/null || true
|
|
436
|
+
|
|
437
|
+
deadline=$(( $(date +%s) + 2 ))
|
|
438
|
+
while kill -0 "$pid" 2>/dev/null; do
|
|
439
|
+
if (( $(date +%s) >= deadline )); then
|
|
440
|
+
break
|
|
441
|
+
fi
|
|
442
|
+
sleep 0.1
|
|
443
|
+
done
|
|
444
|
+
|
|
445
|
+
if kill -0 "$pid" 2>/dev/null; then
|
|
446
|
+
pkill -KILL -P "$pid" 2>/dev/null || true
|
|
447
|
+
kill -9 "$pid" 2>/dev/null || true
|
|
448
|
+
fi
|
|
449
|
+
}
|
|
450
|
+
|
|
401
451
|
stream_codex_exec() {
|
|
402
452
|
local phase="${1:?phase required}"
|
|
403
453
|
local stream_fifo=""
|
|
404
454
|
local producer_pid=""
|
|
455
|
+
local heartbeat_pid=""
|
|
456
|
+
local progress_file=""
|
|
405
457
|
local line=""
|
|
406
458
|
|
|
407
459
|
last_attempt_start_size="$(stat -f %z "$output_file" 2>/dev/null || printf '0')"
|
|
460
|
+
last_attempt_started_epoch="$(date +%s)"
|
|
461
|
+
progress_file="${host_run_dir}/.codex-progress.$$"
|
|
462
|
+
rm -f "$progress_file"
|
|
408
463
|
stream_fifo="$(mktemp -u "${TMPDIR:-/tmp}/codex-stream.XXXXXX")"
|
|
409
464
|
mkfifo "$stream_fifo"
|
|
410
465
|
|
|
@@ -413,10 +468,10 @@ stream_codex_exec() {
|
|
|
413
468
|
(
|
|
414
469
|
case "$mode" in
|
|
415
470
|
safe)
|
|
416
|
-
|
|
471
|
+
run_codex_command exec --json --profile "$safe_profile" --full-auto <"$prompt_file"
|
|
417
472
|
;;
|
|
418
473
|
bypass)
|
|
419
|
-
|
|
474
|
+
run_codex_command exec --json --profile "$bypass_profile" --dangerously-bypass-approvals-and-sandbox <"$prompt_file"
|
|
420
475
|
;;
|
|
421
476
|
esac
|
|
422
477
|
) >"$stream_fifo" 2>&1 &
|
|
@@ -425,10 +480,10 @@ stream_codex_exec() {
|
|
|
425
480
|
(
|
|
426
481
|
case "$mode" in
|
|
427
482
|
safe)
|
|
428
|
-
resume_prompt |
|
|
483
|
+
resume_prompt | run_codex_command exec resume --json --full-auto "$thread_id" -
|
|
429
484
|
;;
|
|
430
485
|
bypass)
|
|
431
|
-
resume_prompt |
|
|
486
|
+
resume_prompt | run_codex_command exec resume --json --dangerously-bypass-approvals-and-sandbox "$thread_id" -
|
|
432
487
|
;;
|
|
433
488
|
esac
|
|
434
489
|
) >"$stream_fifo" 2>&1 &
|
|
@@ -441,12 +496,55 @@ stream_codex_exec() {
|
|
|
441
496
|
esac
|
|
442
497
|
|
|
443
498
|
producer_pid="$!"
|
|
499
|
+
(
|
|
500
|
+
local now elapsed last_progress_epoch idle_for
|
|
501
|
+
while kill -0 "$producer_pid" 2>/dev/null; do
|
|
502
|
+
sleep "$codex_progress_heartbeat_seconds"
|
|
503
|
+
if ! kill -0 "$producer_pid" 2>/dev/null; then
|
|
504
|
+
break
|
|
505
|
+
fi
|
|
506
|
+
now="$(date +%s)"
|
|
507
|
+
elapsed=$((now - last_attempt_started_epoch))
|
|
508
|
+
if (( codex_stall_seconds > 0 )); then
|
|
509
|
+
if [[ ! -f "$progress_file" ]]; then
|
|
510
|
+
if (( elapsed >= codex_stall_seconds )); then
|
|
511
|
+
write_state "running" ""
|
|
512
|
+
log_runner "stale-run no-codex-output-before-stall-threshold elapsed=${elapsed}s"
|
|
513
|
+
terminate_codex_producer_tree "$producer_pid"
|
|
514
|
+
break
|
|
515
|
+
fi
|
|
516
|
+
else
|
|
517
|
+
last_progress_epoch="$(stat -f %m "$progress_file" 2>/dev/null || printf '0')"
|
|
518
|
+
if [[ -n "$last_progress_epoch" && "$last_progress_epoch" != "0" ]]; then
|
|
519
|
+
idle_for=$((now - last_progress_epoch))
|
|
520
|
+
if (( idle_for >= codex_stall_seconds )); then
|
|
521
|
+
write_state "running" ""
|
|
522
|
+
log_runner "stale-run no-codex-progress-before-stall-threshold elapsed=${elapsed}s idle=${idle_for}s"
|
|
523
|
+
terminate_codex_producer_tree "$producer_pid"
|
|
524
|
+
break
|
|
525
|
+
fi
|
|
526
|
+
fi
|
|
527
|
+
fi
|
|
528
|
+
fi
|
|
529
|
+
write_state "running" ""
|
|
530
|
+
log_runner "heartbeat waiting-for-codex-output elapsed=${elapsed}s"
|
|
531
|
+
done
|
|
532
|
+
) &
|
|
533
|
+
heartbeat_pid="$!"
|
|
534
|
+
|
|
444
535
|
while IFS= read -r line || [[ -n "$line" ]]; do
|
|
445
536
|
printf '%s\n' "$line" | tee -a "$output_file"
|
|
537
|
+
touch "$progress_file" 2>/dev/null || true
|
|
446
538
|
persist_thread_id_from_line "$line"
|
|
447
539
|
done <"$stream_fifo"
|
|
448
540
|
|
|
541
|
+
if [[ -n "$heartbeat_pid" ]] && kill -0 "$heartbeat_pid" 2>/dev/null; then
|
|
542
|
+
kill "$heartbeat_pid" 2>/dev/null || true
|
|
543
|
+
wait "$heartbeat_pid" 2>/dev/null || true
|
|
544
|
+
fi
|
|
545
|
+
|
|
449
546
|
rm -f "$stream_fifo"
|
|
547
|
+
rm -f "$progress_file"
|
|
450
548
|
|
|
451
549
|
if wait "$producer_pid"; then
|
|
452
550
|
last_exit_code="0"
|
|
@@ -485,6 +583,16 @@ classify_failure_reason() {
|
|
|
485
583
|
|
|
486
584
|
recent_chunk="$(tail -n 120 <<<"$chunk")"
|
|
487
585
|
|
|
586
|
+
if grep -Eiq 'stale-run no-codex-output-before-stall-threshold|no-codex-output-before-stall-threshold' <<<"$recent_chunk"; then
|
|
587
|
+
printf 'no-codex-output-before-stall-threshold\n'
|
|
588
|
+
return 0
|
|
589
|
+
fi
|
|
590
|
+
|
|
591
|
+
if grep -Eiq 'stale-run no-codex-progress-before-stall-threshold|no-codex-progress-before-stall-threshold' <<<"$recent_chunk"; then
|
|
592
|
+
printf 'no-codex-progress-before-stall-threshold\n'
|
|
593
|
+
return 0
|
|
594
|
+
fi
|
|
595
|
+
|
|
488
596
|
if grep -Eiq "You've hit your usage limit|You have reached your Codex usage limits|visit https://chatgpt.com/codex/settings/usage|Upgrade to Pro|rate limit exceeded|quota exceeded|usage cap (reached|exceeded)|usage quota (reached|exceeded)" <<<"$recent_chunk"; then
|
|
489
597
|
printf 'usage-limit\n'
|
|
490
598
|
return 0
|
|
@@ -523,7 +631,7 @@ EOF
|
|
|
523
631
|
}
|
|
524
632
|
|
|
525
633
|
codex_login_healthy() {
|
|
526
|
-
|
|
634
|
+
run_codex_command login status >/dev/null 2>&1
|
|
527
635
|
}
|
|
528
636
|
|
|
529
637
|
wait_for_auth_refresh() {
|
|
@@ -533,7 +641,9 @@ wait_for_auth_refresh() {
|
|
|
533
641
|
local baseline_switch_signature="${4:-}"
|
|
534
642
|
local deadline now current_fingerprint current_quota_label current_switch_signature
|
|
535
643
|
local sleep_seconds
|
|
644
|
+
local recovery_target
|
|
536
645
|
|
|
646
|
+
recovery_target="$(codex_recovery_target)"
|
|
537
647
|
auth_wait_started_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
|
|
538
648
|
last_trigger_reason="$trigger_reason"
|
|
539
649
|
write_state "waiting-auth-refresh" "$trigger_reason"
|
|
@@ -548,28 +658,28 @@ wait_for_auth_refresh() {
|
|
|
548
658
|
current_switch_signature="$(quota_switch_signature)"
|
|
549
659
|
if codex_login_healthy; then
|
|
550
660
|
if [[ "$current_fingerprint" != "$baseline_fingerprint" ]]; then
|
|
551
|
-
log_runner "detected refreshed Codex auth after quota interruption; resuming
|
|
661
|
+
log_runner "detected refreshed Codex auth after quota interruption; resuming ${recovery_target}"
|
|
552
662
|
auth_wait_started_at=""
|
|
553
663
|
write_state "running" ""
|
|
554
664
|
return 0
|
|
555
665
|
fi
|
|
556
666
|
|
|
557
667
|
if [[ -n "$baseline_quota_label" && -n "$current_quota_label" && "$current_quota_label" != "$baseline_quota_label" ]]; then
|
|
558
|
-
log_runner "detected rotated Codex quota account (${baseline_quota_label} -> ${current_quota_label}); resuming
|
|
668
|
+
log_runner "detected rotated Codex quota account (${baseline_quota_label} -> ${current_quota_label}); resuming ${recovery_target}"
|
|
559
669
|
auth_wait_started_at=""
|
|
560
670
|
write_state "running" ""
|
|
561
671
|
return 0
|
|
562
672
|
fi
|
|
563
673
|
|
|
564
674
|
if [[ -n "$baseline_switch_signature" && -n "$current_switch_signature" && "$current_switch_signature" != "$baseline_switch_signature" ]]; then
|
|
565
|
-
log_runner "detected quota switch state refresh; resuming
|
|
675
|
+
log_runner "detected quota switch state refresh; resuming ${recovery_target}"
|
|
566
676
|
auth_wait_started_at=""
|
|
567
677
|
write_state "running" ""
|
|
568
678
|
return 0
|
|
569
679
|
fi
|
|
570
680
|
|
|
571
681
|
if [[ "$last_quota_switch_status" == "switched" && -n "$current_quota_label" ]]; then
|
|
572
|
-
log_runner "quota manager reports healthy Codex account ${current_quota_label}; resuming
|
|
682
|
+
log_runner "quota manager reports healthy Codex account ${current_quota_label}; resuming ${recovery_target}"
|
|
573
683
|
auth_wait_started_at=""
|
|
574
684
|
write_state "running" ""
|
|
575
685
|
return 0
|
|
@@ -580,9 +690,9 @@ wait_for_auth_refresh() {
|
|
|
580
690
|
*)
|
|
581
691
|
if codex_login_healthy; then
|
|
582
692
|
if [[ "$current_fingerprint" != "$baseline_fingerprint" ]]; then
|
|
583
|
-
log_runner "detected refreshed Codex auth; resuming
|
|
693
|
+
log_runner "detected refreshed Codex auth; resuming ${recovery_target}"
|
|
584
694
|
else
|
|
585
|
-
log_runner "Codex auth is healthy again; resuming
|
|
695
|
+
log_runner "Codex auth is healthy again; resuming ${recovery_target}"
|
|
586
696
|
fi
|
|
587
697
|
auth_wait_started_at=""
|
|
588
698
|
write_state "running" ""
|
|
@@ -643,11 +753,6 @@ attempt_run() {
|
|
|
643
753
|
reason="$(classify_failure_reason "$(new_output_since "$last_attempt_start_size")")"
|
|
644
754
|
last_failure_reason="${reason:-worker-exit-failed}"
|
|
645
755
|
|
|
646
|
-
if [[ -z "$thread_id" ]]; then
|
|
647
|
-
write_state "failed" "$last_failure_reason"
|
|
648
|
-
return 1
|
|
649
|
-
fi
|
|
650
|
-
|
|
651
756
|
case "$last_failure_reason" in
|
|
652
757
|
usage-limit|auth-failure|auth-401|account-banned)
|
|
653
758
|
if (( resume_count >= max_resume_attempts )); then
|
|
@@ -24,6 +24,7 @@ EOF
|
|
|
24
24
|
mode=""
|
|
25
25
|
session=""
|
|
26
26
|
worktree=""
|
|
27
|
+
worktree_realpath=""
|
|
27
28
|
prompt_file=""
|
|
28
29
|
runs_root=""
|
|
29
30
|
adapter_id=""
|
|
@@ -105,6 +106,12 @@ if [[ -z "$mode" || -z "$session" || -z "$worktree" || -z "$prompt_file" || -z "
|
|
|
105
106
|
exit 1
|
|
106
107
|
fi
|
|
107
108
|
|
|
109
|
+
worktree_realpath="$(cd "$worktree" 2>/dev/null && pwd -P || true)"
|
|
110
|
+
if [[ -z "$worktree_realpath" || ! -d "$worktree_realpath" ]]; then
|
|
111
|
+
echo "unable to resolve worktree realpath: $worktree" >&2
|
|
112
|
+
exit 1
|
|
113
|
+
fi
|
|
114
|
+
|
|
108
115
|
case "$mode" in
|
|
109
116
|
safe|bypass) ;;
|
|
110
117
|
*)
|
|
@@ -119,7 +126,7 @@ inner_script="${artifact_dir}/${session}.sh"
|
|
|
119
126
|
meta_file="${artifact_dir}/run.env"
|
|
120
127
|
result_file="${artifact_dir}/result.env"
|
|
121
128
|
runner_state_file="${artifact_dir}/runner.env"
|
|
122
|
-
sandbox_run_dir="${
|
|
129
|
+
sandbox_run_dir="${worktree_realpath%/}/${sandbox_subdir}/${session}"
|
|
123
130
|
started_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
|
|
124
131
|
codex_bin="$(resolve_codex_bin || true)"
|
|
125
132
|
runner_bin="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/agent-project-run-codex-resilient"
|
|
@@ -137,13 +144,14 @@ if [[ -z "$codex_bin" || ! -x "$codex_bin" ]]; then
|
|
|
137
144
|
exit 1
|
|
138
145
|
fi
|
|
139
146
|
|
|
140
|
-
branch_name="$(git -C "$
|
|
147
|
+
branch_name="$(git -C "$worktree_realpath" branch --show-current 2>/dev/null || true)"
|
|
141
148
|
|
|
142
149
|
printf -v session_q '%q' "$session"
|
|
143
150
|
printf -v task_kind_q '%q' "$task_kind"
|
|
144
151
|
printf -v task_id_q '%q' "$task_id"
|
|
145
152
|
printf -v mode_q '%q' "$mode"
|
|
146
153
|
printf -v worktree_q '%q' "$worktree"
|
|
154
|
+
printf -v worktree_realpath_q '%q' "$worktree_realpath"
|
|
147
155
|
printf -v prompt_q '%q' "$prompt_file"
|
|
148
156
|
printf -v output_q '%q' "$output_file"
|
|
149
157
|
printf -v artifact_dir_q '%q' "$artifact_dir"
|
|
@@ -166,6 +174,7 @@ printf -v bypass_profile_q '%q' "$bypass_profile"
|
|
|
166
174
|
printf 'SESSION=%s\n' "$session_q"
|
|
167
175
|
printf 'MODE=%s\n' "$mode_q"
|
|
168
176
|
printf 'WORKTREE=%s\n' "$worktree_q"
|
|
177
|
+
printf 'WORKTREE_REALPATH=%s\n' "$worktree_realpath_q"
|
|
169
178
|
printf 'PROMPT_FILE=%s\n' "$prompt_q"
|
|
170
179
|
printf 'OUTPUT_FILE=%s\n' "$output_q"
|
|
171
180
|
printf 'SCRIPT=%s\n' "$script_q"
|
|
@@ -264,19 +273,59 @@ cat >"$inner_script" <<EOF
|
|
|
264
273
|
#!/usr/bin/env bash
|
|
265
274
|
set -euo pipefail
|
|
266
275
|
${runtime_exports}
|
|
267
|
-
${context_exports}cd ${
|
|
276
|
+
${context_exports}cd ${worktree_realpath_q}
|
|
268
277
|
reset_sandbox_run_dir() {
|
|
269
278
|
mkdir -p ${sandbox_run_dir_q}
|
|
270
279
|
find ${sandbox_run_dir_q} -mindepth 1 -maxdepth 1 -exec rm -rf {} + 2>/dev/null || true
|
|
271
280
|
}
|
|
281
|
+
find_logged_artifact_path() {
|
|
282
|
+
local artifact_name="\${1:?artifact name required}"
|
|
283
|
+
local candidate=""
|
|
284
|
+
|
|
285
|
+
while IFS= read -r candidate; do
|
|
286
|
+
[[ -n "\${candidate}" ]] || continue
|
|
287
|
+
while [[ "\${candidate}" == *')' || "\${candidate}" == *']' || "\${candidate}" == *',' || "\${candidate}" == *'"' || "\${candidate}" == *"'" ]]; do
|
|
288
|
+
candidate="\${candidate%?}"
|
|
289
|
+
done
|
|
290
|
+
if [[ "\$(basename "\${candidate}")" == "\${artifact_name}" && -f "\${candidate}" ]]; then
|
|
291
|
+
printf '%s\n' "\${candidate}"
|
|
292
|
+
fi
|
|
293
|
+
done < <(grep -oE '/(Users|Volumes|tmp)/[^[:space:])"]+' ${output_q} 2>/dev/null || true)
|
|
294
|
+
}
|
|
295
|
+
recover_logged_artifact() {
|
|
296
|
+
local artifact_name="\${1:?artifact name required}"
|
|
297
|
+
local destination="\${2:?destination required}"
|
|
298
|
+
local source_path=""
|
|
299
|
+
|
|
300
|
+
source_path="\$(find_logged_artifact_path "\${artifact_name}" | tail -n 1)"
|
|
301
|
+
[[ -n "\${source_path}" ]] || return 0
|
|
302
|
+
mkdir -p "\$(dirname "\${destination}")"
|
|
303
|
+
if [[ "\${source_path}" != "\${destination}" ]]; then
|
|
304
|
+
cp "\${source_path}" "\${destination}"
|
|
305
|
+
fi
|
|
306
|
+
}
|
|
307
|
+
recover_collected_artifact() {
|
|
308
|
+
local artifact_name="\${1:?artifact name required}"
|
|
309
|
+
local destination="\${2:?destination required}"
|
|
310
|
+
|
|
311
|
+
if [[ -f ${sandbox_run_dir_q}/"\${artifact_name}" ]]; then
|
|
312
|
+
if [[ ${sandbox_run_dir_q}/"\${artifact_name}" != "\${destination}" ]]; then
|
|
313
|
+
cp ${sandbox_run_dir_q}/"\${artifact_name}" "\${destination}"
|
|
314
|
+
fi
|
|
315
|
+
return 0
|
|
316
|
+
fi
|
|
317
|
+
|
|
318
|
+
recover_logged_artifact "\${artifact_name}" "\${destination}"
|
|
319
|
+
}
|
|
272
320
|
record_final_git_state() {
|
|
273
321
|
local final_head final_branch tmp_file
|
|
274
322
|
|
|
275
|
-
final_head="\$(git -C ${
|
|
276
|
-
final_branch="\$(git -C ${
|
|
323
|
+
final_head="\$(git -C ${worktree_realpath_q} rev-parse HEAD 2>/dev/null || true)"
|
|
324
|
+
final_branch="\$(git -C ${worktree_realpath_q} branch --show-current 2>/dev/null || true)"
|
|
277
325
|
tmp_file=${meta_file_q}.tmp.final.$$
|
|
278
|
-
grep -vE '^(FINAL_HEAD|FINAL_BRANCH)=' ${meta_file_q} >"\${tmp_file}" 2>/dev/null || true
|
|
326
|
+
grep -vE '^(FINAL_HEAD|FINAL_BRANCH|WORKTREE_REALPATH)=' ${meta_file_q} >"\${tmp_file}" 2>/dev/null || true
|
|
279
327
|
{
|
|
328
|
+
printf 'WORKTREE_REALPATH=%s\n' ${worktree_realpath_q}
|
|
280
329
|
printf 'FINAL_HEAD=%q\n' "\${final_head}"
|
|
281
330
|
printf 'FINAL_BRANCH=%q\n' "\${final_branch}"
|
|
282
331
|
} >>"\${tmp_file}"
|
|
@@ -286,7 +335,7 @@ reset_sandbox_run_dir
|
|
|
286
335
|
set +e
|
|
287
336
|
bash ${runner_bin_q} \\
|
|
288
337
|
--mode ${mode_q} \\
|
|
289
|
-
--worktree ${
|
|
338
|
+
--worktree ${worktree_realpath_q} \\
|
|
290
339
|
--prompt-file ${prompt_q} \\
|
|
291
340
|
--output-file ${output_q} \\
|
|
292
341
|
--host-run-dir ${artifact_dir_q} \\
|
|
@@ -296,9 +345,10 @@ bash ${runner_bin_q} \\
|
|
|
296
345
|
--codex-bin ${codex_bin_q}
|
|
297
346
|
status=\$?
|
|
298
347
|
record_final_git_state
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
348
|
+
recover_collected_artifact result.env ${result_q}
|
|
349
|
+
recover_collected_artifact issue-comment.md ${artifact_dir_q}/issue-comment.md
|
|
350
|
+
recover_collected_artifact pr-comment.md ${artifact_dir_q}/pr-comment.md
|
|
351
|
+
recover_collected_artifact verification.jsonl ${artifact_dir_q}/verification.jsonl
|
|
302
352
|
${collect_copy_snippet}${reconcile_snippet}
|
|
303
353
|
printf '\n__CODEX_EXIT__:%s\n' "\$status" | tee -a ${output_q}
|
|
304
354
|
exit "\$status"
|
|
@@ -918,8 +918,9 @@ heartbeat_seconds = max(float(heartbeat_seconds_raw), 1.0)
|
|
|
918
918
|
hard_deadline = time.monotonic() + timeout_seconds + 15.0
|
|
919
919
|
started_at = time.monotonic()
|
|
920
920
|
next_heartbeat = time.monotonic() + heartbeat_seconds
|
|
921
|
-
|
|
922
|
-
|
|
921
|
+
seen_agent_progress = False
|
|
922
|
+
last_agent_progress_at = started_at
|
|
923
|
+
last_progress_source = "none"
|
|
923
924
|
terminal_patterns = [
|
|
924
925
|
re.compile(r"Config was last written by a newer OpenClaw", re.I),
|
|
925
926
|
re.compile(r"invalid api key|authentication failed|unauthorized|provider api key|login required|please authenticate|api_key_invalid", re.I),
|
|
@@ -932,6 +933,21 @@ proc = None
|
|
|
932
933
|
sel = selectors.DefaultSelector()
|
|
933
934
|
matched_terminal_error = False
|
|
934
935
|
tail = ""
|
|
936
|
+
openclaw_state_dir = os.environ.get("OPENCLAW_STATE_DIR", "")
|
|
937
|
+
sandbox_run_dir = (
|
|
938
|
+
os.environ.get("ACP_RUN_DIR")
|
|
939
|
+
or os.environ.get("AGENT_PROJECT_RUN_DIR")
|
|
940
|
+
or os.environ.get("F_LOSNING_RUN_DIR")
|
|
941
|
+
or ""
|
|
942
|
+
)
|
|
943
|
+
host_managed_prefixes = tuple(
|
|
944
|
+
prefix
|
|
945
|
+
for prefix in (
|
|
946
|
+
os.path.realpath(runner_state_path) if runner_state_path else "",
|
|
947
|
+
os.path.realpath(output_path) if output_path else "",
|
|
948
|
+
)
|
|
949
|
+
if prefix
|
|
950
|
+
)
|
|
935
951
|
|
|
936
952
|
def write_running_heartbeat() -> None:
|
|
937
953
|
updated_at = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
|
@@ -967,6 +983,47 @@ def terminate_process_group(process: subprocess.Popen) -> None:
|
|
|
967
983
|
except ProcessLookupError:
|
|
968
984
|
return
|
|
969
985
|
|
|
986
|
+
def progress_signature() -> tuple[tuple[str, int, int], ...]:
|
|
987
|
+
entries: list[tuple[str, int, int]] = []
|
|
988
|
+
|
|
989
|
+
def add_file(path: str) -> None:
|
|
990
|
+
real_path = ""
|
|
991
|
+
if not path:
|
|
992
|
+
return
|
|
993
|
+
try:
|
|
994
|
+
stat_result = os.stat(path)
|
|
995
|
+
except OSError:
|
|
996
|
+
return
|
|
997
|
+
if not os.path.isfile(path):
|
|
998
|
+
return
|
|
999
|
+
real_path = os.path.realpath(path)
|
|
1000
|
+
for prefix in host_managed_prefixes:
|
|
1001
|
+
if real_path == prefix or real_path.startswith(f"{prefix}.tmp."):
|
|
1002
|
+
return
|
|
1003
|
+
entries.append((real_path, stat_result.st_mtime_ns, stat_result.st_size))
|
|
1004
|
+
|
|
1005
|
+
if sandbox_run_dir:
|
|
1006
|
+
try:
|
|
1007
|
+
for name in os.listdir(sandbox_run_dir):
|
|
1008
|
+
add_file(os.path.join(sandbox_run_dir, name))
|
|
1009
|
+
except OSError:
|
|
1010
|
+
pass
|
|
1011
|
+
|
|
1012
|
+
if openclaw_state_dir:
|
|
1013
|
+
sessions_dir = os.path.join(openclaw_state_dir, "agents", agent_id, "sessions")
|
|
1014
|
+
add_file(os.path.join(sessions_dir, "sessions.json"))
|
|
1015
|
+
try:
|
|
1016
|
+
for name in os.listdir(sessions_dir):
|
|
1017
|
+
if name.endswith(".jsonl") and not name.endswith(".lock"):
|
|
1018
|
+
add_file(os.path.join(sessions_dir, name))
|
|
1019
|
+
except OSError:
|
|
1020
|
+
pass
|
|
1021
|
+
|
|
1022
|
+
entries.sort()
|
|
1023
|
+
return tuple(entries)
|
|
1024
|
+
|
|
1025
|
+
last_progress_signature = progress_signature()
|
|
1026
|
+
|
|
970
1027
|
with open(output_path, "ab", buffering=0) as log_handle:
|
|
971
1028
|
proc = subprocess.Popen(
|
|
972
1029
|
cmd,
|
|
@@ -989,26 +1046,42 @@ with open(output_path, "ab", buffering=0) as log_handle:
|
|
|
989
1046
|
terminate_process_group(proc)
|
|
990
1047
|
break
|
|
991
1048
|
|
|
1049
|
+
current_progress_signature = progress_signature()
|
|
1050
|
+
if current_progress_signature != last_progress_signature:
|
|
1051
|
+
last_progress_signature = current_progress_signature
|
|
1052
|
+
seen_agent_progress = True
|
|
1053
|
+
last_agent_progress_at = time.monotonic()
|
|
1054
|
+
last_progress_source = "session-state"
|
|
1055
|
+
next_heartbeat = time.monotonic() + heartbeat_seconds
|
|
1056
|
+
|
|
992
1057
|
events = sel.select(timeout=0.2)
|
|
993
1058
|
if not events:
|
|
994
|
-
if proc.poll() is None and not
|
|
1059
|
+
if proc.poll() is None and not seen_agent_progress and stall_seconds > 0 and (time.monotonic() - started_at) >= stall_seconds:
|
|
995
1060
|
elapsed = int(time.monotonic() - started_at)
|
|
996
1061
|
write_running_heartbeat()
|
|
997
1062
|
log_handle.write(f"[openclaw] stale-run no-agent-output-before-stall-threshold elapsed={elapsed}s\n".encode("utf-8"))
|
|
998
1063
|
terminate_process_group(proc)
|
|
999
1064
|
break
|
|
1000
|
-
if proc.poll() is None and
|
|
1065
|
+
if proc.poll() is None and seen_agent_progress and stall_seconds > 0 and (time.monotonic() - last_agent_progress_at) >= stall_seconds:
|
|
1001
1066
|
elapsed = int(time.monotonic() - started_at)
|
|
1002
|
-
idle_for = int(time.monotonic() -
|
|
1067
|
+
idle_for = int(time.monotonic() - last_agent_progress_at)
|
|
1003
1068
|
write_running_heartbeat()
|
|
1004
1069
|
log_handle.write(f"[openclaw] stale-run no-agent-progress-before-stall-threshold elapsed={elapsed}s idle={idle_for}s\n".encode("utf-8"))
|
|
1005
1070
|
terminate_process_group(proc)
|
|
1006
1071
|
break
|
|
1007
|
-
if proc.poll() is None and time.monotonic() >= next_heartbeat:
|
|
1072
|
+
if proc.poll() is None and not seen_agent_progress and time.monotonic() >= next_heartbeat:
|
|
1008
1073
|
elapsed = int(time.monotonic() - started_at)
|
|
1009
1074
|
write_running_heartbeat()
|
|
1010
1075
|
log_handle.write(f"[openclaw] heartbeat waiting-for-agent-output elapsed={elapsed}s\n".encode("utf-8"))
|
|
1011
1076
|
next_heartbeat = time.monotonic() + heartbeat_seconds
|
|
1077
|
+
if proc.poll() is None and seen_agent_progress and time.monotonic() >= next_heartbeat:
|
|
1078
|
+
elapsed = int(time.monotonic() - started_at)
|
|
1079
|
+
idle_for = int(time.monotonic() - last_agent_progress_at)
|
|
1080
|
+
write_running_heartbeat()
|
|
1081
|
+
log_handle.write(
|
|
1082
|
+
f"[openclaw] heartbeat progress source={last_progress_source} elapsed={elapsed}s idle={idle_for}s\n".encode("utf-8")
|
|
1083
|
+
)
|
|
1084
|
+
next_heartbeat = time.monotonic() + heartbeat_seconds
|
|
1012
1085
|
if proc.poll() is not None:
|
|
1013
1086
|
break
|
|
1014
1087
|
continue
|
|
@@ -1022,8 +1095,9 @@ with open(output_path, "ab", buffering=0) as log_handle:
|
|
|
1022
1095
|
text = chunk.decode("utf-8", errors="replace")
|
|
1023
1096
|
tail = (tail + text)[-8192:]
|
|
1024
1097
|
next_heartbeat = time.monotonic() + heartbeat_seconds
|
|
1025
|
-
|
|
1026
|
-
|
|
1098
|
+
seen_agent_progress = True
|
|
1099
|
+
last_agent_progress_at = time.monotonic()
|
|
1100
|
+
last_progress_source = "stdout"
|
|
1027
1101
|
|
|
1028
1102
|
if not matched_terminal_error and any(pattern.search(tail) for pattern in terminal_patterns):
|
|
1029
1103
|
matched_terminal_error = True
|
|
@@ -12,6 +12,7 @@ AGENT_REPO_ROOT="$(flow_resolve_agent_repo_root "${CONFIG_YAML}")"
|
|
|
12
12
|
AGENT_ROOT="$(flow_resolve_agent_root "${CONFIG_YAML}")"
|
|
13
13
|
RUNS_ROOT="$(flow_resolve_runs_root "${CONFIG_YAML}")"
|
|
14
14
|
HISTORY_ROOT="$(flow_resolve_history_root "${CONFIG_YAML}")"
|
|
15
|
+
WORKTREE_ROOT="$(flow_resolve_worktree_root "${CONFIG_YAML}")"
|
|
15
16
|
RETAINED_REPO_ROOT="$(flow_resolve_retained_repo_root "${CONFIG_YAML}")"
|
|
16
17
|
VSCODE_WORKSPACE_FILE="$(flow_resolve_vscode_workspace_file "${CONFIG_YAML}")"
|
|
17
18
|
ISSUE_SESSION_PREFIX="$(flow_resolve_issue_session_prefix "${CONFIG_YAML}")"
|
|
@@ -36,7 +37,9 @@ if [[ -n "$SESSION" ]]; then
|
|
|
36
37
|
ARGS+=(--session "$SESSION")
|
|
37
38
|
fi
|
|
38
39
|
|
|
39
|
-
|
|
40
|
+
AGENT_PROJECT_WORKTREE_ROOT="$WORKTREE_ROOT" \
|
|
41
|
+
F_LOSNING_WORKTREE_ROOT="$WORKTREE_ROOT" \
|
|
42
|
+
bash "${FLOW_TOOLS_DIR}/agent-project-cleanup-session" "${ARGS[@]}" >/dev/null
|
|
40
43
|
|
|
41
44
|
F_LOSNING_AGENT_REPO_ROOT="$AGENT_REPO_ROOT" \
|
|
42
45
|
F_LOSNING_RETAINED_REPO_ROOT="$RETAINED_REPO_ROOT" \
|
|
@@ -4,13 +4,14 @@ set -euo pipefail
|
|
|
4
4
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
5
5
|
FLOW_SKILL_DIR="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
|
6
6
|
HOME_DIR="${ACP_DASHBOARD_HOME_DIR:-${HOME:-}}"
|
|
7
|
-
SOURCE_HOME="${ACP_DASHBOARD_SOURCE_HOME
|
|
7
|
+
SOURCE_HOME="${ACP_DASHBOARD_SOURCE_HOME:-}"
|
|
8
8
|
RUNTIME_HOME="${ACP_DASHBOARD_RUNTIME_HOME:-${HOME_DIR}/.agent-runtime/runtime-home}"
|
|
9
9
|
PROFILE_REGISTRY_ROOT="${ACP_DASHBOARD_PROFILE_REGISTRY_ROOT:-${ACP_PROFILE_REGISTRY_ROOT:-${HOME_DIR}/.agent-runtime/control-plane/profiles}}"
|
|
10
10
|
HOST="${ACP_DASHBOARD_HOST:-127.0.0.1}"
|
|
11
11
|
PORT="${ACP_DASHBOARD_PORT:-8765}"
|
|
12
12
|
BASE_PATH="${ACP_DASHBOARD_PATH:-/opt/homebrew/bin:/usr/bin:/bin:/usr/sbin:/sbin}"
|
|
13
13
|
SYNC_SCRIPT="${ACP_DASHBOARD_SYNC_SCRIPT:-${FLOW_SKILL_DIR}/tools/bin/sync-shared-agent-home.sh}"
|
|
14
|
+
ENSURE_SYNC_SCRIPT="${ACP_DASHBOARD_ENSURE_SYNC_SCRIPT:-${FLOW_SKILL_DIR}/tools/bin/ensure-runtime-sync.sh}"
|
|
14
15
|
RUNTIME_SERVE_SCRIPT="${ACP_DASHBOARD_RUNTIME_SERVE_SCRIPT:-${RUNTIME_HOME}/skills/openclaw/agent-control-plane/tools/bin/serve-dashboard.sh}"
|
|
15
16
|
|
|
16
17
|
if [[ -z "${HOME_DIR}" ]]; then
|
|
@@ -23,12 +24,23 @@ export PATH="${BASE_PATH}"
|
|
|
23
24
|
export ACP_PROFILE_REGISTRY_ROOT="${PROFILE_REGISTRY_ROOT}"
|
|
24
25
|
export PYTHONDONTWRITEBYTECODE=1
|
|
25
26
|
|
|
26
|
-
if [[ ! -x "${SYNC_SCRIPT}" ]]; then
|
|
27
|
-
echo "dashboard launchd bootstrap missing sync
|
|
27
|
+
if [[ ! -x "${ENSURE_SYNC_SCRIPT}" && ! -x "${SYNC_SCRIPT}" ]]; then
|
|
28
|
+
echo "dashboard launchd bootstrap missing sync helper: ${ENSURE_SYNC_SCRIPT}" >&2
|
|
28
29
|
exit 65
|
|
29
30
|
fi
|
|
30
31
|
|
|
31
|
-
|
|
32
|
+
if [[ -x "${ENSURE_SYNC_SCRIPT}" ]]; then
|
|
33
|
+
ensure_args=(--runtime-home "${RUNTIME_HOME}" --quiet)
|
|
34
|
+
if [[ -n "${SOURCE_HOME}" ]]; then
|
|
35
|
+
ensure_args=(--source-home "${SOURCE_HOME}" "${ensure_args[@]}")
|
|
36
|
+
fi
|
|
37
|
+
bash "${ENSURE_SYNC_SCRIPT}" "${ensure_args[@]}"
|
|
38
|
+
else
|
|
39
|
+
if [[ -z "${SOURCE_HOME}" ]]; then
|
|
40
|
+
SOURCE_HOME="${FLOW_SKILL_DIR}"
|
|
41
|
+
fi
|
|
42
|
+
bash "${SYNC_SCRIPT}" "${SOURCE_HOME}" "${RUNTIME_HOME}" >/dev/null
|
|
43
|
+
fi
|
|
32
44
|
|
|
33
45
|
if [[ ! -x "${RUNTIME_SERVE_SCRIPT}" ]]; then
|
|
34
46
|
echo "dashboard launchd bootstrap missing runtime serve script: ${RUNTIME_SERVE_SCRIPT}" >&2
|