agent-control-plane 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/pr-risk.sh +54 -10
- package/hooks/heartbeat-hooks.sh +70 -6
- package/package.json +1 -1
- package/tools/bin/agent-project-cleanup-session +10 -2
- package/tools/bin/agent-project-heartbeat-loop +29 -2
- package/tools/bin/agent-project-reconcile-issue-session +59 -2
- package/tools/bin/agent-project-reconcile-pr-session +104 -13
- package/tools/bin/agent-project-run-claude-session +19 -1
- package/tools/bin/agent-project-run-codex-session +1 -1
- package/tools/bin/agent-project-run-openclaw-session +200 -7
- package/tools/bin/agent-project-sync-anchor-repo +13 -2
- package/tools/bin/agent-project-worker-status +19 -14
- package/tools/bin/flow-shell-lib.sh +13 -7
- package/tools/bin/prepare-worktree.sh +3 -1
- package/tools/bin/provider-cooldown-state.sh +1 -1
- package/tools/bin/render-flow-config.sh +16 -1
- package/tools/bin/run-codex-task.sh +2 -2
- package/tools/bin/scaffold-profile.sh +2 -2
- package/tools/bin/start-issue-worker.sh +42 -10
- package/tools/dashboard/app.js +20 -2
- package/tools/dashboard/dashboard_snapshot.py +45 -0
|
@@ -18,6 +18,7 @@ Options:
|
|
|
18
18
|
--openclaw-model <id> Model id for the isolated OpenClaw agent
|
|
19
19
|
--openclaw-thinking <level> OpenClaw thinking level
|
|
20
20
|
--openclaw-timeout-seconds <secs> OpenClaw local-agent timeout
|
|
21
|
+
--openclaw-stall-seconds <secs> Fail when the agent produces no output for too long (0 disables)
|
|
21
22
|
--help Show this help
|
|
22
23
|
EOF
|
|
23
24
|
}
|
|
@@ -34,9 +35,11 @@ env_prefix=""
|
|
|
34
35
|
sandbox_subdir=".openclaw-artifacts"
|
|
35
36
|
reconcile_command=""
|
|
36
37
|
keep_agent="false"
|
|
37
|
-
openclaw_model="${ACP_OPENCLAW_MODEL:-${F_LOSNING_OPENCLAW_MODEL:-openrouter/
|
|
38
|
-
openclaw_thinking="${ACP_OPENCLAW_THINKING:-${F_LOSNING_OPENCLAW_THINKING:-
|
|
38
|
+
openclaw_model="${ACP_OPENCLAW_MODEL:-${F_LOSNING_OPENCLAW_MODEL:-openrouter/qwen/qwen3.6-plus-preview:free}}"
|
|
39
|
+
openclaw_thinking="${ACP_OPENCLAW_THINKING:-${F_LOSNING_OPENCLAW_THINKING:-low}}"
|
|
39
40
|
openclaw_timeout_seconds="${ACP_OPENCLAW_TIMEOUT_SECONDS:-${F_LOSNING_OPENCLAW_TIMEOUT_SECONDS:-900}}"
|
|
41
|
+
openclaw_stall_seconds="${ACP_OPENCLAW_STALL_SECONDS:-${F_LOSNING_OPENCLAW_STALL_SECONDS:-180}}"
|
|
42
|
+
openclaw_progress_heartbeat_seconds="${ACP_OPENCLAW_PROGRESS_HEARTBEAT_SECONDS:-${F_LOSNING_OPENCLAW_PROGRESS_HEARTBEAT_SECONDS:-30}}"
|
|
40
43
|
provided_openclaw_agent_id=""
|
|
41
44
|
provided_openclaw_session_id=""
|
|
42
45
|
provided_openclaw_agent_dir=""
|
|
@@ -64,6 +67,7 @@ while [[ $# -gt 0 ]]; do
|
|
|
64
67
|
--openclaw-model) openclaw_model="${2:-}"; shift 2 ;;
|
|
65
68
|
--openclaw-thinking) openclaw_thinking="${2:-}"; shift 2 ;;
|
|
66
69
|
--openclaw-timeout-seconds) openclaw_timeout_seconds="${2:-}"; shift 2 ;;
|
|
70
|
+
--openclaw-stall-seconds) openclaw_stall_seconds="${2:-}"; shift 2 ;;
|
|
67
71
|
--openclaw-agent-id) provided_openclaw_agent_id="${2:-}"; shift 2 ;;
|
|
68
72
|
--openclaw-session-id) provided_openclaw_session_id="${2:-}"; shift 2 ;;
|
|
69
73
|
--openclaw-agent-dir) provided_openclaw_agent_dir="${2:-}"; shift 2 ;;
|
|
@@ -90,6 +94,13 @@ esac
|
|
|
90
94
|
case "$openclaw_timeout_seconds" in
|
|
91
95
|
''|*[!0-9]*) echo "--openclaw-timeout-seconds must be numeric" >&2; exit 1 ;;
|
|
92
96
|
esac
|
|
97
|
+
case "$openclaw_stall_seconds" in
|
|
98
|
+
''|*[!0-9]*) echo "--openclaw-stall-seconds must be numeric" >&2; exit 1 ;;
|
|
99
|
+
esac
|
|
100
|
+
case "$openclaw_progress_heartbeat_seconds" in
|
|
101
|
+
''|*[!0-9]*) echo "OpenClaw progress heartbeat seconds must be numeric" >&2; exit 1 ;;
|
|
102
|
+
0) echo "OpenClaw progress heartbeat seconds must be greater than zero" >&2; exit 1 ;;
|
|
103
|
+
esac
|
|
93
104
|
|
|
94
105
|
if ! command -v openclaw >/dev/null 2>&1; then
|
|
95
106
|
echo "unable to resolve a runnable openclaw binary" >&2
|
|
@@ -154,6 +165,8 @@ printf -v openclaw_session_id_q '%q' "$openclaw_session_id"
|
|
|
154
165
|
printf -v openclaw_model_q '%q' "$openclaw_model"
|
|
155
166
|
printf -v openclaw_thinking_q '%q' "$openclaw_thinking"
|
|
156
167
|
printf -v openclaw_timeout_q '%q' "$openclaw_timeout_seconds"
|
|
168
|
+
printf -v openclaw_stall_q '%q' "$openclaw_stall_seconds"
|
|
169
|
+
printf -v openclaw_progress_heartbeat_q '%q' "$openclaw_progress_heartbeat_seconds"
|
|
157
170
|
printf -v keep_agent_q '%q' "$keep_agent"
|
|
158
171
|
|
|
159
172
|
{
|
|
@@ -180,6 +193,8 @@ printf -v keep_agent_q '%q' "$keep_agent"
|
|
|
180
193
|
printf 'OPENCLAW_MODEL=%s\n' "$openclaw_model_q"
|
|
181
194
|
printf 'OPENCLAW_THINKING=%s\n' "$openclaw_thinking_q"
|
|
182
195
|
printf 'OPENCLAW_TIMEOUT_SECONDS=%s\n' "$openclaw_timeout_q"
|
|
196
|
+
printf 'OPENCLAW_STALL_SECONDS=%s\n' "$openclaw_stall_q"
|
|
197
|
+
printf 'OPENCLAW_PROGRESS_HEARTBEAT_SECONDS=%s\n' "$openclaw_progress_heartbeat_q"
|
|
183
198
|
printf 'OPENCLAW_KEEP_AGENT=%s\n' "$keep_agent_q"
|
|
184
199
|
} >"$meta_file"
|
|
185
200
|
|
|
@@ -265,7 +280,7 @@ fi
|
|
|
265
280
|
|
|
266
281
|
reconcile_snippet=""
|
|
267
282
|
if [[ -n "$reconcile_command" ]]; then
|
|
268
|
-
printf -v delayed_reconcile_q '%q' "sleep 2; $reconcile_command"
|
|
283
|
+
printf -v delayed_reconcile_q '%q' "export ACP_EXPECTED_RUN_STARTED_AT=${started_at_q}; export F_LOSNING_EXPECTED_RUN_STARTED_AT=${started_at_q}; while tmux has-session -t ${session_q} 2>/dev/null; do sleep 1; done; sleep 2; $reconcile_command"
|
|
269
284
|
reconcile_snippet="nohup bash -lc ${delayed_reconcile_q} >> ${output_q} 2>&1 </dev/null &"
|
|
270
285
|
fi
|
|
271
286
|
|
|
@@ -281,6 +296,7 @@ sandbox_artifact_dir=${sandbox_artifact_dir_q}
|
|
|
281
296
|
sandbox_run_dir=${sandbox_run_dir_q}
|
|
282
297
|
artifact_dir=${artifact_dir_q}
|
|
283
298
|
run_dir=${artifact_dir_q}
|
|
299
|
+
task_kind=${task_kind_q}
|
|
284
300
|
worktree=${worktree_q}
|
|
285
301
|
prompt_file_path=${prompt_q}
|
|
286
302
|
openclaw_state_dir=${openclaw_state_dir_q}
|
|
@@ -291,6 +307,8 @@ openclaw_session_id=${openclaw_session_id_q}
|
|
|
291
307
|
openclaw_model=${openclaw_model_q}
|
|
292
308
|
openclaw_bin=${openclaw_bin_q}
|
|
293
309
|
openclaw_timeout=${openclaw_timeout_q}
|
|
310
|
+
openclaw_stall_seconds=${openclaw_stall_q}
|
|
311
|
+
openclaw_progress_heartbeat_seconds=${openclaw_progress_heartbeat_q}
|
|
294
312
|
openclaw_thinking=${openclaw_thinking_q}
|
|
295
313
|
keep_agent=${keep_agent_q}
|
|
296
314
|
openclaw_add_log="\${sandbox_run_dir}/openclaw-agents-add.log"
|
|
@@ -334,6 +352,9 @@ record_final_git_state() {
|
|
|
334
352
|
|
|
335
353
|
ensure_openclaw_workspace_excludes() {
|
|
336
354
|
local exclude_file line
|
|
355
|
+
if ! git -C ${worktree_q} rev-parse --git-dir >/dev/null 2>&1; then
|
|
356
|
+
return 0
|
|
357
|
+
fi
|
|
337
358
|
exclude_file="\$(git -C ${worktree_q} config --worktree --get core.excludesFile 2>/dev/null || true)"
|
|
338
359
|
if [[ -z "\${exclude_file}" ]]; then
|
|
339
360
|
exclude_file="\${sandbox_artifact_dir}/git-exclude"
|
|
@@ -357,6 +378,7 @@ IDENTITY.md
|
|
|
357
378
|
USER.md
|
|
358
379
|
HEARTBEAT.md
|
|
359
380
|
BOOTSTRAP.md
|
|
381
|
+
AGENTS.md
|
|
360
382
|
.agent-session.env
|
|
361
383
|
\$ACP_RUN_DIR
|
|
362
384
|
\$AGENT_PROJECT_RUN_DIR
|
|
@@ -558,6 +580,14 @@ classify_failure_reason() {
|
|
|
558
580
|
printf 'context-length-exceeded\n'
|
|
559
581
|
return 0
|
|
560
582
|
fi
|
|
583
|
+
if grep -Eiq 'stale-run no-agent-output-before-stall-threshold|no-agent-output-before-stall-threshold' "\${output_file}" 2>/dev/null; then
|
|
584
|
+
printf 'no-agent-output-before-stall-threshold\n'
|
|
585
|
+
return 0
|
|
586
|
+
fi
|
|
587
|
+
if grep -Eiq 'stale-run no-agent-progress-before-stall-threshold|no-agent-progress-before-stall-threshold' "\${output_file}" 2>/dev/null; then
|
|
588
|
+
printf 'no-agent-progress-before-stall-threshold\n'
|
|
589
|
+
return 0
|
|
590
|
+
fi
|
|
561
591
|
if grep -Eiq 'timeout|timed out|ETIMEDOUT|ECONNREFUSED' "\${output_file}" 2>/dev/null; then
|
|
562
592
|
printf 'timeout\n'
|
|
563
593
|
return 0
|
|
@@ -570,6 +600,7 @@ infer_result_from_output() {
|
|
|
570
600
|
local verification_file="\${sandbox_run_dir}/verification.jsonl"
|
|
571
601
|
# Host-side result file (always writable, never inside worktree)
|
|
572
602
|
local host_result_file="\${run_dir}/result.env"
|
|
603
|
+
local recovered_contract=""
|
|
573
604
|
local write_result=''
|
|
574
605
|
|
|
575
606
|
write_result() {
|
|
@@ -577,6 +608,38 @@ infer_result_from_output() {
|
|
|
577
608
|
printf '%b' "\$1" > "\${host_result_file}" 2>/dev/null || true
|
|
578
609
|
}
|
|
579
610
|
|
|
611
|
+
recover_result_contract_from_output() {
|
|
612
|
+
python3 - "\${output_file}" <<'PY'
|
|
613
|
+
import re
|
|
614
|
+
import sys
|
|
615
|
+
|
|
616
|
+
log_path = sys.argv[1]
|
|
617
|
+
try:
|
|
618
|
+
raw = open(log_path, "r", encoding="utf-8", errors="replace").read()
|
|
619
|
+
except Exception:
|
|
620
|
+
raise SystemExit(1)
|
|
621
|
+
|
|
622
|
+
matches = re.findall(r"Result file written:\s*([^\r\n]+)", raw, flags=re.IGNORECASE)
|
|
623
|
+
if not matches:
|
|
624
|
+
raise SystemExit(1)
|
|
625
|
+
|
|
626
|
+
line = matches[-1]
|
|
627
|
+
fields = {}
|
|
628
|
+
for key in ("OUTCOME", "ACTION", "DETAIL", "ISSUE_ID"):
|
|
629
|
+
match = re.search(rf"{key}=([A-Za-z0-9._/-]+)", line)
|
|
630
|
+
if match:
|
|
631
|
+
fields[key] = match.group(1).strip()
|
|
632
|
+
|
|
633
|
+
if "OUTCOME" not in fields or "ACTION" not in fields:
|
|
634
|
+
raise SystemExit(1)
|
|
635
|
+
|
|
636
|
+
for key in ("OUTCOME", "ACTION", "DETAIL", "ISSUE_ID"):
|
|
637
|
+
value = fields.get(key)
|
|
638
|
+
if value:
|
|
639
|
+
print(f"{key}={value}")
|
|
640
|
+
PY
|
|
641
|
+
}
|
|
642
|
+
|
|
580
643
|
# If sandbox result.env exists with implemented, validate it has verification
|
|
581
644
|
if [[ -f "\${result_file_path}" ]]; then
|
|
582
645
|
if grep -q 'OUTCOME=implemented' "\${result_file_path}" 2>/dev/null; then
|
|
@@ -589,6 +652,17 @@ infer_result_from_output() {
|
|
|
589
652
|
return 0
|
|
590
653
|
fi
|
|
591
654
|
|
|
655
|
+
if grep -Fq '[tools] exec failed: Provide a command to start.' "\${output_file}" 2>/dev/null; then
|
|
656
|
+
write_result 'OUTCOME=blocked\nACTION=host-comment-blocker\nDETAIL=worker-tool-exec-empty-command\n'
|
|
657
|
+
return 0
|
|
658
|
+
fi
|
|
659
|
+
|
|
660
|
+
recovered_contract="\$(recover_result_contract_from_output 2>/dev/null || true)"
|
|
661
|
+
if [[ -n "\${recovered_contract}" ]]; then
|
|
662
|
+
write_result "\${recovered_contract}"$'\n'
|
|
663
|
+
return 0
|
|
664
|
+
fi
|
|
665
|
+
|
|
592
666
|
# Check if there are actual code changes (not just artifact files)
|
|
593
667
|
local has_product_changes="no"
|
|
594
668
|
if git -C ${worktree_q} diff --name-only HEAD 2>/dev/null | grep -qvE '\.openclaw-artifacts/|\.md$' 2>/dev/null; then
|
|
@@ -640,6 +714,79 @@ infer_result_from_output() {
|
|
|
640
714
|
write_result 'OUTCOME=blocked\nACTION=host-comment-blocker\n'
|
|
641
715
|
}
|
|
642
716
|
|
|
717
|
+
synthesize_comment_artifact_from_output() {
|
|
718
|
+
local target_file=""
|
|
719
|
+
local result_file_path="\${sandbox_run_dir}/result.env"
|
|
720
|
+
|
|
721
|
+
if [[ ! -f "\${result_file_path}" ]] || ! grep -Eq '^ACTION=host-comment-' "\${result_file_path}" 2>/dev/null; then
|
|
722
|
+
return 0
|
|
723
|
+
fi
|
|
724
|
+
|
|
725
|
+
case "\${task_kind}" in
|
|
726
|
+
issue|task)
|
|
727
|
+
target_file="\${sandbox_run_dir}/issue-comment.md"
|
|
728
|
+
;;
|
|
729
|
+
pr)
|
|
730
|
+
target_file="\${sandbox_run_dir}/pr-comment.md"
|
|
731
|
+
;;
|
|
732
|
+
*)
|
|
733
|
+
return 0
|
|
734
|
+
;;
|
|
735
|
+
esac
|
|
736
|
+
|
|
737
|
+
[[ -n "\${target_file}" ]] || return 0
|
|
738
|
+
[[ ! -f "\${target_file}" ]] || return 0
|
|
739
|
+
|
|
740
|
+
python3 - "\${output_file}" "\${target_file}" <<'PY2'
|
|
741
|
+
import json
|
|
742
|
+
import os
|
|
743
|
+
import sys
|
|
744
|
+
|
|
745
|
+
log_path, target_path = sys.argv[1:3]
|
|
746
|
+
|
|
747
|
+
try:
|
|
748
|
+
raw = open(log_path, 'r', encoding='utf-8', errors='replace').read()
|
|
749
|
+
except Exception:
|
|
750
|
+
raise SystemExit(0)
|
|
751
|
+
|
|
752
|
+
decoder = json.JSONDecoder()
|
|
753
|
+
message = ''
|
|
754
|
+
idx = 0
|
|
755
|
+
while idx < len(raw):
|
|
756
|
+
start = raw.find('{', idx)
|
|
757
|
+
if start == -1:
|
|
758
|
+
break
|
|
759
|
+
try:
|
|
760
|
+
payload, end = decoder.raw_decode(raw, start)
|
|
761
|
+
except Exception:
|
|
762
|
+
idx = start + 1
|
|
763
|
+
continue
|
|
764
|
+
idx = end
|
|
765
|
+
if not isinstance(payload, dict):
|
|
766
|
+
continue
|
|
767
|
+
payloads = payload.get('payloads')
|
|
768
|
+
if not isinstance(payloads, list):
|
|
769
|
+
continue
|
|
770
|
+
parts = []
|
|
771
|
+
for item in payloads:
|
|
772
|
+
if not isinstance(item, dict):
|
|
773
|
+
continue
|
|
774
|
+
value = item.get('text')
|
|
775
|
+
if isinstance(value, str) and value.strip():
|
|
776
|
+
parts.append(value.rstrip())
|
|
777
|
+
if parts:
|
|
778
|
+
message = '\n\n'.join(parts).strip()
|
|
779
|
+
|
|
780
|
+
if not message:
|
|
781
|
+
raise SystemExit(0)
|
|
782
|
+
|
|
783
|
+
os.makedirs(os.path.dirname(target_path), exist_ok=True)
|
|
784
|
+
with open(target_path, 'w', encoding='utf-8') as handle:
|
|
785
|
+
handle.write(message)
|
|
786
|
+
handle.write('\n')
|
|
787
|
+
PY2
|
|
788
|
+
}
|
|
789
|
+
|
|
643
790
|
cleanup_agent() {
|
|
644
791
|
# --force required for non-interactive (tmux) sessions, otherwise delete waits for confirmation
|
|
645
792
|
"\${openclaw_bin}" agents delete "\${openclaw_agent_id}" --json --force >/dev/null 2>&1 || true
|
|
@@ -734,7 +881,7 @@ PY
|
|
|
734
881
|
}
|
|
735
882
|
|
|
736
883
|
run_openclaw_agent_command() {
|
|
737
|
-
python3 - "\${output_file}" "\${openclaw_timeout}" "\${openclaw_bin}" "\${openclaw_agent_id}" "\${openclaw_session_id}" "\${openclaw_thinking}" "\${prompt_file_path}" <<'PY'
|
|
884
|
+
python3 - "\${output_file}" "\${runner_state_file}" "\${openclaw_timeout}" "\${openclaw_stall_seconds}" "\${openclaw_progress_heartbeat_seconds}" "\${openclaw_bin}" "\${openclaw_agent_id}" "\${openclaw_session_id}" "\${openclaw_thinking}" "\${prompt_file_path}" <<'PY'
|
|
738
885
|
import os
|
|
739
886
|
import re
|
|
740
887
|
import selectors
|
|
@@ -743,7 +890,7 @@ import subprocess
|
|
|
743
890
|
import sys
|
|
744
891
|
import time
|
|
745
892
|
|
|
746
|
-
output_path, timeout_seconds_raw, openclaw_bin, agent_id, session_id, thinking, prompt_path = sys.argv[1:
|
|
893
|
+
output_path, runner_state_path, timeout_seconds_raw, stall_seconds_raw, heartbeat_seconds_raw, openclaw_bin, agent_id, session_id, thinking, prompt_path = sys.argv[1:11]
|
|
747
894
|
|
|
748
895
|
with open(prompt_path, "r", encoding="utf-8") as handle:
|
|
749
896
|
prompt = handle.read()
|
|
@@ -766,7 +913,13 @@ cmd = [
|
|
|
766
913
|
]
|
|
767
914
|
|
|
768
915
|
timeout_seconds = float(timeout_seconds_raw)
|
|
916
|
+
stall_seconds = float(stall_seconds_raw)
|
|
917
|
+
heartbeat_seconds = max(float(heartbeat_seconds_raw), 1.0)
|
|
769
918
|
hard_deadline = time.monotonic() + timeout_seconds + 15.0
|
|
919
|
+
started_at = time.monotonic()
|
|
920
|
+
next_heartbeat = time.monotonic() + heartbeat_seconds
|
|
921
|
+
seen_agent_output = False
|
|
922
|
+
last_agent_output_at = started_at
|
|
770
923
|
terminal_patterns = [
|
|
771
924
|
re.compile(r"Config was last written by a newer OpenClaw", re.I),
|
|
772
925
|
re.compile(r"invalid api key|authentication failed|unauthorized|provider api key|login required|please authenticate|api_key_invalid", re.I),
|
|
@@ -780,6 +933,25 @@ sel = selectors.DefaultSelector()
|
|
|
780
933
|
matched_terminal_error = False
|
|
781
934
|
tail = ""
|
|
782
935
|
|
|
936
|
+
def write_running_heartbeat() -> None:
|
|
937
|
+
updated_at = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
|
938
|
+
tmp_path = f"{runner_state_path}.tmp.{os.getpid()}"
|
|
939
|
+
with open(tmp_path, "w", encoding="utf-8") as handle:
|
|
940
|
+
handle.write("RUNNER_STATE=running\n")
|
|
941
|
+
handle.write(f"THREAD_ID={sh_quote(session_id)}\n")
|
|
942
|
+
handle.write("ATTEMPT=1\n")
|
|
943
|
+
handle.write("RESUME_COUNT=0\n")
|
|
944
|
+
handle.write("LAST_EXIT_CODE=''\n")
|
|
945
|
+
handle.write("LAST_FAILURE_REASON=''\n")
|
|
946
|
+
handle.write("LAST_TRIGGER_REASON=''\n")
|
|
947
|
+
handle.write("AUTH_WAIT_STARTED_AT=''\n")
|
|
948
|
+
handle.write("LAST_AUTH_FINGERPRINT=''\n")
|
|
949
|
+
handle.write(f"UPDATED_AT={sh_quote(updated_at)}\n")
|
|
950
|
+
os.replace(tmp_path, runner_state_path)
|
|
951
|
+
|
|
952
|
+
def sh_quote(value: str) -> str:
|
|
953
|
+
return "'" + value.replace("'", "'\"'\"'") + "'"
|
|
954
|
+
|
|
783
955
|
def terminate_process_group(process: subprocess.Popen) -> None:
|
|
784
956
|
try:
|
|
785
957
|
os.killpg(process.pid, signal.SIGTERM)
|
|
@@ -819,6 +991,24 @@ with open(output_path, "ab", buffering=0) as log_handle:
|
|
|
819
991
|
|
|
820
992
|
events = sel.select(timeout=0.2)
|
|
821
993
|
if not events:
|
|
994
|
+
if proc.poll() is None and not seen_agent_output and stall_seconds > 0 and (time.monotonic() - started_at) >= stall_seconds:
|
|
995
|
+
elapsed = int(time.monotonic() - started_at)
|
|
996
|
+
write_running_heartbeat()
|
|
997
|
+
log_handle.write(f"[openclaw] stale-run no-agent-output-before-stall-threshold elapsed={elapsed}s\n".encode("utf-8"))
|
|
998
|
+
terminate_process_group(proc)
|
|
999
|
+
break
|
|
1000
|
+
if proc.poll() is None and seen_agent_output and stall_seconds > 0 and (time.monotonic() - last_agent_output_at) >= stall_seconds:
|
|
1001
|
+
elapsed = int(time.monotonic() - started_at)
|
|
1002
|
+
idle_for = int(time.monotonic() - last_agent_output_at)
|
|
1003
|
+
write_running_heartbeat()
|
|
1004
|
+
log_handle.write(f"[openclaw] stale-run no-agent-progress-before-stall-threshold elapsed={elapsed}s idle={idle_for}s\n".encode("utf-8"))
|
|
1005
|
+
terminate_process_group(proc)
|
|
1006
|
+
break
|
|
1007
|
+
if proc.poll() is None and time.monotonic() >= next_heartbeat:
|
|
1008
|
+
elapsed = int(time.monotonic() - started_at)
|
|
1009
|
+
write_running_heartbeat()
|
|
1010
|
+
log_handle.write(f"[openclaw] heartbeat waiting-for-agent-output elapsed={elapsed}s\n".encode("utf-8"))
|
|
1011
|
+
next_heartbeat = time.monotonic() + heartbeat_seconds
|
|
822
1012
|
if proc.poll() is not None:
|
|
823
1013
|
break
|
|
824
1014
|
continue
|
|
@@ -831,6 +1021,9 @@ with open(output_path, "ab", buffering=0) as log_handle:
|
|
|
831
1021
|
log_handle.write(chunk)
|
|
832
1022
|
text = chunk.decode("utf-8", errors="replace")
|
|
833
1023
|
tail = (tail + text)[-8192:]
|
|
1024
|
+
next_heartbeat = time.monotonic() + heartbeat_seconds
|
|
1025
|
+
seen_agent_output = True
|
|
1026
|
+
last_agent_output_at = time.monotonic()
|
|
834
1027
|
|
|
835
1028
|
if not matched_terminal_error and any(pattern.search(tail) for pattern in terminal_patterns):
|
|
836
1029
|
matched_terminal_error = True
|
|
@@ -950,9 +1143,9 @@ while true; do
|
|
|
950
1143
|
break
|
|
951
1144
|
done
|
|
952
1145
|
recover_literal_runtime_artifacts
|
|
1146
|
+
infer_result_from_output
|
|
1147
|
+
synthesize_comment_artifact_from_output
|
|
953
1148
|
if [[ "\${status}" -eq 0 ]]; then
|
|
954
|
-
# Infer result.env from output if agent didn't create one
|
|
955
|
-
infer_result_from_output
|
|
956
1149
|
write_state succeeded "0" ""
|
|
957
1150
|
else
|
|
958
1151
|
if [[ -z "\${failure_reason}" ]]; then
|
|
@@ -16,6 +16,8 @@ canonical_root=""
|
|
|
16
16
|
anchor_root=""
|
|
17
17
|
remote_name="origin"
|
|
18
18
|
default_branch="main"
|
|
19
|
+
dirty_state_stashed="no"
|
|
20
|
+
dirty_stash_message=""
|
|
19
21
|
|
|
20
22
|
while [[ $# -gt 0 ]]; do
|
|
21
23
|
case "$1" in
|
|
@@ -103,8 +105,13 @@ git -C "$anchor_root" fetch "$remote_name" --prune >/dev/null
|
|
|
103
105
|
git -C "$anchor_root" worktree prune >/dev/null 2>&1 || true
|
|
104
106
|
|
|
105
107
|
if [[ -n "$(git -C "$anchor_root" status --porcelain --untracked-files=no)" ]]; then
|
|
106
|
-
|
|
107
|
-
|
|
108
|
+
dirty_stash_message="acp-anchor-sync-$(date -u +%Y%m%dT%H%M%SZ)"
|
|
109
|
+
git -C "$anchor_root" stash push --message "$dirty_stash_message" >/dev/null
|
|
110
|
+
if [[ -n "$(git -C "$anchor_root" status --porcelain --untracked-files=no)" ]]; then
|
|
111
|
+
echo "[agent-sync-anchor] anchor repo is dirty; refuse to reset: $anchor_root" >&2
|
|
112
|
+
exit 1
|
|
113
|
+
fi
|
|
114
|
+
dirty_state_stashed="yes"
|
|
108
115
|
fi
|
|
109
116
|
|
|
110
117
|
default_ref="${remote_name}/${default_branch}"
|
|
@@ -126,3 +133,7 @@ printf 'ANCHOR_ROOT=%s\n' "$anchor_root"
|
|
|
126
133
|
printf 'REMOTE=%s\n' "$remote_name"
|
|
127
134
|
printf 'DEFAULT_BRANCH=%s\n' "$default_branch"
|
|
128
135
|
printf 'ORIGIN_URL=%s\n' "$origin_url"
|
|
136
|
+
printf 'DIRTY_STATE_STASHED=%s\n' "$dirty_state_stashed"
|
|
137
|
+
if [[ "$dirty_state_stashed" == "yes" ]]; then
|
|
138
|
+
printf 'DIRTY_STASH_MESSAGE=%s\n' "$dirty_stash_message"
|
|
139
|
+
fi
|
|
@@ -87,23 +87,14 @@ if [[ "$status" == "UNKNOWN" && -f "$output_file" ]]; then
|
|
|
87
87
|
fi
|
|
88
88
|
fi
|
|
89
89
|
|
|
90
|
-
if [[ "$status" == "UNKNOWN" && -f "$result_file" ]]; then
|
|
91
|
-
# A worker that managed to persist result.env already completed its contract,
|
|
92
|
-
# even if the tmux session disappeared before the exit marker was flushed.
|
|
93
|
-
status="SUCCEEDED"
|
|
94
|
-
result_only_completion="yes"
|
|
95
|
-
fi
|
|
96
|
-
|
|
97
|
-
if [[ "$status" == "UNKNOWN" && -f "$output_file" ]]; then
|
|
98
|
-
if rg -qi "You've hit your usage limit|You have reached your Codex usage limits|visit https://chatgpt.com/codex/settings/usage|Upgrade to Pro|rate limit exceeded|quota exceeded|usage cap (reached|exceeded)|usage quota (reached|exceeded)" "$output_file"; then
|
|
99
|
-
status="FAILED"
|
|
100
|
-
failure_reason="usage-limit"
|
|
101
|
-
fi
|
|
102
|
-
fi
|
|
103
|
-
|
|
104
90
|
if [[ "$status" == "UNKNOWN" && -n "$runner_state" ]]; then
|
|
105
91
|
case "$runner_state" in
|
|
106
92
|
running|waiting-auth-refresh|switching-account)
|
|
93
|
+
# Tmux session is gone and runner never reached a terminal state.
|
|
94
|
+
# This detects crashes where the worker process died before updating
|
|
95
|
+
# runner.env or writing an exit marker.
|
|
96
|
+
# Check BEFORE stale result.env to avoid false SUCCEEDED when a prior
|
|
97
|
+
# cycle's result.env happens to exist.
|
|
107
98
|
status="FAILED"
|
|
108
99
|
if [[ -z "$failure_reason" ]]; then
|
|
109
100
|
failure_reason="runner-aborted-before-completion"
|
|
@@ -115,6 +106,20 @@ if [[ "$status" == "UNKNOWN" && -n "$runner_state" ]]; then
|
|
|
115
106
|
esac
|
|
116
107
|
fi
|
|
117
108
|
|
|
109
|
+
if [[ "$status" == "UNKNOWN" && -f "$result_file" ]]; then
|
|
110
|
+
# A worker that managed to persist result.env already completed its contract,
|
|
111
|
+
# even if the tmux session disappeared before the exit marker was flushed.
|
|
112
|
+
status="SUCCEEDED"
|
|
113
|
+
result_only_completion="yes"
|
|
114
|
+
fi
|
|
115
|
+
|
|
116
|
+
if [[ "$status" == "UNKNOWN" && -f "$output_file" ]]; then
|
|
117
|
+
if rg -qi "You've hit your usage limit|You have reached your Codex usage limits|visit https://chatgpt.com/codex/settings/usage|Upgrade to Pro|rate limit exceeded|quota exceeded|usage cap (reached|exceeded)|usage quota (reached|exceeded)" "$output_file"; then
|
|
118
|
+
status="FAILED"
|
|
119
|
+
failure_reason="usage-limit"
|
|
120
|
+
fi
|
|
121
|
+
fi
|
|
122
|
+
|
|
118
123
|
printf 'SESSION=%s\n' "$session"
|
|
119
124
|
printf 'STATUS=%s\n' "$status"
|
|
120
125
|
if [[ -f "$meta_file" ]]; then
|
|
@@ -144,6 +144,7 @@ flow_print_dir() {
|
|
|
144
144
|
resolve_flow_skill_dir() {
|
|
145
145
|
local script_path="${1:-}"
|
|
146
146
|
local candidate=""
|
|
147
|
+
local search_dir=""
|
|
147
148
|
local skill_name=""
|
|
148
149
|
|
|
149
150
|
for candidate in \
|
|
@@ -158,13 +159,18 @@ resolve_flow_skill_dir() {
|
|
|
158
159
|
done
|
|
159
160
|
|
|
160
161
|
if [[ -n "${script_path}" ]]; then
|
|
161
|
-
|
|
162
|
-
cd "$(dirname "${script_path}")
|
|
163
|
-
)" ||
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
162
|
+
search_dir="$(
|
|
163
|
+
cd "$(dirname "${script_path}")" 2>/dev/null && pwd -P
|
|
164
|
+
)" || search_dir=""
|
|
165
|
+
while [[ -n "${search_dir}" && "${search_dir}" != "/" ]]; do
|
|
166
|
+
if flow_is_skill_root "${search_dir}"; then
|
|
167
|
+
printf '%s\n' "${search_dir}"
|
|
168
|
+
return 0
|
|
169
|
+
fi
|
|
170
|
+
candidate="$(dirname "${search_dir}")"
|
|
171
|
+
[[ "${candidate}" != "${search_dir}" ]] || break
|
|
172
|
+
search_dir="${candidate}"
|
|
173
|
+
done
|
|
168
174
|
fi
|
|
169
175
|
|
|
170
176
|
if [[ -n "${SHARED_AGENT_HOME:-}" ]]; then
|
|
@@ -119,7 +119,9 @@ mkdir -p "$WORKTREE/.openclaw-artifacts"
|
|
|
119
119
|
configure_worktree_excludes "$WORKTREE"
|
|
120
120
|
|
|
121
121
|
if [[ -x "$SYNC_DEPENDENCY_BASELINE_SCRIPT" && "$DEPENDENCY_REAL" == "$CANONICAL_REAL" ]]; then
|
|
122
|
-
|
|
122
|
+
# Sync dependency baseline is non-fatal — worker can function without it
|
|
123
|
+
# (especially for repos with zero dependencies or external volume issues)
|
|
124
|
+
"$SYNC_DEPENDENCY_BASELINE_SCRIPT" >/dev/null 2>&1 || true
|
|
123
125
|
fi
|
|
124
126
|
|
|
125
127
|
if [[ "$LOCAL_WORKSPACE_INSTALL" == "true" ]]; then
|
|
@@ -12,7 +12,7 @@ Usage:
|
|
|
12
12
|
|
|
13
13
|
Examples:
|
|
14
14
|
provider-cooldown-state.sh get
|
|
15
|
-
provider-cooldown-state.sh openclaw openrouter/
|
|
15
|
+
provider-cooldown-state.sh openclaw openrouter/qwen/qwen3.6-plus-preview:free schedule provider-quota-limit
|
|
16
16
|
EOF
|
|
17
17
|
}
|
|
18
18
|
|
|
@@ -8,7 +8,22 @@ source "${SCRIPT_DIR}/flow-config-lib.sh"
|
|
|
8
8
|
FLOW_SKILL_DIR="$(resolve_flow_skill_dir "${BASH_SOURCE[0]}")"
|
|
9
9
|
PROFILE_REGISTRY_ROOT="$(resolve_flow_profile_registry_root)"
|
|
10
10
|
CONFIG_YAML="$(resolve_flow_config_yaml "${BASH_SOURCE[0]}")"
|
|
11
|
-
|
|
11
|
+
# Do NOT export execution env for the current profile here — render-flow-config
|
|
12
|
+
# is meant to render the SELECTED profile's config (via CONFIG_YAML), and exporting
|
|
13
|
+
# the ambient profile's vars into the shell causes config_or_env to silently override
|
|
14
|
+
# per-profile YAML with defaults from the current resident worker's own config.
|
|
15
|
+
# Also, ambient env vars from the shell are cleared below so they don't leak into
|
|
16
|
+
# profile-smoke or other callers.
|
|
17
|
+
for _clean in ACP_CODING_WORKER ACP_OPENCLAW_MODEL ACP_CLAUDE_MODEL \
|
|
18
|
+
ACP_CLAUDE_TIMEOUT_SECONDS ACP_CLAUDE_MAX_ATTEMPTS ACP_CLAUDE_RETRY_BACKOFF_SECONDS \
|
|
19
|
+
ACP_OPENCLAW_THINKING ACP_OPENCLAW_TIMEOUT_SECONDS \
|
|
20
|
+
F_LOSNING_CODING_WORKER F_LOSNING_OPENCLAW_MODEL F_LOSNING_CLAUDE_MODEL \
|
|
21
|
+
F_LOSNING_CLAUDE_TIMEOUT_SECONDS F_LOSNING_CLAUDE_MAX_ATTEMPTS F_LOSNING_CLAUDE_RETRY_BACKOFF_SECONDS \
|
|
22
|
+
F_LOSNING_OPENCLAW_THINKING F_LOSNING_OPENCLAW_TIMEOUT_SECONDS \
|
|
23
|
+
CODING_WORKER; do
|
|
24
|
+
unset "${_clean}" 2>/dev/null || true
|
|
25
|
+
done
|
|
26
|
+
unset _clean
|
|
12
27
|
AVAILABLE_PROFILES="$(flow_list_profile_ids "${FLOW_SKILL_DIR}" | paste -sd, -)"
|
|
13
28
|
INSTALLED_PROFILES="$(flow_list_installed_profile_ids | paste -sd, -)"
|
|
14
29
|
PROFILE_ID="$(flow_resolve_adapter_id "${CONFIG_YAML}")"
|
|
@@ -61,8 +61,8 @@ RESIDENT_OPENCLAW_AGENT_DIR="${ACP_RESIDENT_OPENCLAW_AGENT_DIR:-${F_LOSNING_RESI
|
|
|
61
61
|
RESIDENT_OPENCLAW_STATE_DIR="${ACP_RESIDENT_OPENCLAW_STATE_DIR:-${F_LOSNING_RESIDENT_OPENCLAW_STATE_DIR:-}}"
|
|
62
62
|
RESIDENT_OPENCLAW_CONFIG_PATH="${ACP_RESIDENT_OPENCLAW_CONFIG_PATH:-${F_LOSNING_RESIDENT_OPENCLAW_CONFIG_PATH:-}}"
|
|
63
63
|
# Set defaults if not set from yaml or env
|
|
64
|
-
OPENCLAW_MODEL="${OPENCLAW_MODEL:-${ACP_OPENCLAW_MODEL:-${F_LOSNING_OPENCLAW_MODEL:-openrouter/
|
|
65
|
-
OPENCLAW_THINKING="${OPENCLAW_THINKING:-${ACP_OPENCLAW_THINKING:-${F_LOSNING_OPENCLAW_THINKING:-
|
|
64
|
+
OPENCLAW_MODEL="${OPENCLAW_MODEL:-${ACP_OPENCLAW_MODEL:-${F_LOSNING_OPENCLAW_MODEL:-openrouter/qwen/qwen3.6-plus-preview:free}}}"
|
|
65
|
+
OPENCLAW_THINKING="${OPENCLAW_THINKING:-${ACP_OPENCLAW_THINKING:-${F_LOSNING_OPENCLAW_THINKING:-low}}}"
|
|
66
66
|
OPENCLAW_TIMEOUT_SECONDS="${OPENCLAW_TIMEOUT_SECONDS:-${ACP_OPENCLAW_TIMEOUT_SECONDS:-${F_LOSNING_OPENCLAW_TIMEOUT_SECONDS:-900}}}"
|
|
67
67
|
printf -v SESSION_Q '%q' "$SESSION"
|
|
68
68
|
printf -v CONFIG_YAML_Q '%q' "$CONFIG_YAML"
|
|
@@ -54,8 +54,8 @@ claude_effort="medium"
|
|
|
54
54
|
claude_timeout_seconds="900"
|
|
55
55
|
claude_max_attempts="3"
|
|
56
56
|
claude_retry_backoff_seconds="30"
|
|
57
|
-
openclaw_model="openrouter/
|
|
58
|
-
openclaw_thinking="
|
|
57
|
+
openclaw_model="openrouter/qwen/qwen3.6-plus-preview:free"
|
|
58
|
+
openclaw_thinking="low"
|
|
59
59
|
openclaw_timeout_seconds="600"
|
|
60
60
|
force="0"
|
|
61
61
|
|
|
@@ -386,6 +386,38 @@ const blockerComment = [...(issue.comments || [])]
|
|
|
386
386
|
),
|
|
387
387
|
);
|
|
388
388
|
|
|
389
|
+
const inferCommentReason = (bodyText) => {
|
|
390
|
+
const body = String(bodyText || '');
|
|
391
|
+
const marker = 'Failure reason:';
|
|
392
|
+
const markerIndex = body.search(/Failure reason:/i);
|
|
393
|
+
if (markerIndex !== -1) {
|
|
394
|
+
const backtick = String.fromCharCode(96);
|
|
395
|
+
const tail = body.slice(markerIndex + marker.length);
|
|
396
|
+
const firstQuoted = tail.split(backtick)[1];
|
|
397
|
+
if (firstQuoted) {
|
|
398
|
+
return firstQuoted.trim();
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
if (/^# Blocker: Verification requirements were not satisfied$/im.test(body)) {
|
|
402
|
+
return 'verification-guard-blocked';
|
|
403
|
+
}
|
|
404
|
+
if (/^# Blocker: (All checklist items already completed|Worker produced no publishable delta)$/im.test(body)) {
|
|
405
|
+
return 'no-publishable-commits';
|
|
406
|
+
}
|
|
407
|
+
if (/scope guard/i.test(body)) {
|
|
408
|
+
return 'scope-guard-blocked';
|
|
409
|
+
}
|
|
410
|
+
if (/^# Blocker: Provider quota is currently exhausted$/im.test(body)) {
|
|
411
|
+
return 'provider-quota-limit';
|
|
412
|
+
}
|
|
413
|
+
return '';
|
|
414
|
+
};
|
|
415
|
+
|
|
416
|
+
const effectiveLastReason =
|
|
417
|
+
lastReason && lastReason !== 'issue-worker-blocked'
|
|
418
|
+
? lastReason
|
|
419
|
+
: inferCommentReason(blockerComment?.body || '') || lastReason;
|
|
420
|
+
|
|
389
421
|
if (!blockerComment || !blockerComment.body) {
|
|
390
422
|
const fallbackLines = [
|
|
391
423
|
'',
|
|
@@ -393,13 +425,13 @@ if (!blockerComment || !blockerComment.body) {
|
|
|
393
425
|
'This issue is being retried after an `agent-blocked` stop.',
|
|
394
426
|
'- First resolve the prior blocker instead of repeating the same broad implementation path.',
|
|
395
427
|
];
|
|
396
|
-
if (
|
|
397
|
-
fallbackLines.push(
|
|
428
|
+
if (effectiveLastReason) {
|
|
429
|
+
fallbackLines.push('- Last recorded blocker: `' + effectiveLastReason + '`.');
|
|
398
430
|
}
|
|
399
431
|
if (attempts > 0) {
|
|
400
|
-
fallbackLines.push(
|
|
432
|
+
fallbackLines.push('- Blocked retries so far: ' + attempts + '.');
|
|
401
433
|
}
|
|
402
|
-
if (
|
|
434
|
+
if (effectiveLastReason === 'scope-guard-blocked' && attempts >= 2) {
|
|
403
435
|
fallbackLines.push(
|
|
404
436
|
'- This issue has already hit the scope guard multiple times. Do not attempt another broad multi-surface patch.',
|
|
405
437
|
`- Either ship one focused slice that stays under the scope guard, or create focused follow-up issues with \`bash "$FLOW_TOOLS_DIR/create-follow-up-issue.sh" --parent ${issue.number} --title "..." --body-file /tmp/follow-up.md\` and supersede the umbrella.`,
|
|
@@ -420,21 +452,21 @@ const lines = [
|
|
|
420
452
|
'- Address the blocker below before attempting a new implementation/publish cycle.',
|
|
421
453
|
];
|
|
422
454
|
|
|
423
|
-
if (
|
|
424
|
-
lines.push(
|
|
455
|
+
if (effectiveLastReason) {
|
|
456
|
+
lines.push('- Last recorded blocker: `' + effectiveLastReason + '`.');
|
|
425
457
|
}
|
|
426
458
|
if (attempts > 0) {
|
|
427
|
-
lines.push(
|
|
459
|
+
lines.push('- Blocked retries so far: ' + attempts + '.');
|
|
428
460
|
}
|
|
429
461
|
if (nextAttemptAt) {
|
|
430
|
-
lines.push(
|
|
462
|
+
lines.push('- Last scheduled retry target was ' + nextAttemptAt + '.');
|
|
431
463
|
}
|
|
432
|
-
if (
|
|
464
|
+
if (effectiveLastReason === 'scope-guard-blocked') {
|
|
433
465
|
lines.push('- Treat this as a scope problem first: narrow to one safe slice or decompose into focused follow-up issues.');
|
|
434
466
|
if (attempts >= 2) {
|
|
435
467
|
lines.push(`- Because the scope guard has already fired multiple times, do not retry the same umbrella patch. Use \`bash "$FLOW_TOOLS_DIR/create-follow-up-issue.sh" --parent ${issue.number} --title "..." --body-file /tmp/follow-up.md\` for the remaining slices, then supersede the umbrella if you covered the full decomposition.`);
|
|
436
468
|
}
|
|
437
|
-
} else if (
|
|
469
|
+
} else if (effectiveLastReason === 'verification-guard-blocked') {
|
|
438
470
|
lines.push('- Add the missing verification or shrink the touched surface before attempting another publish cycle.');
|
|
439
471
|
}
|
|
440
472
|
|