agent-control-plane 0.1.14 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -445,9 +445,9 @@ heartbeat_mark_issue_running() {
445
445
  local issue_id="${1:?issue id required}"
446
446
  local is_heavy="${2:-no}"
447
447
  if [[ "$is_heavy" == "yes" ]]; then
448
- bash "${FLOW_TOOLS_DIR}/agent-github-update-labels" --repo-slug "$REPO_SLUG" --number "$issue_id" --remove agent-ready --remove agent-blocked --add agent-running --add agent-e2e-heavy >/dev/null
448
+ bash "${FLOW_TOOLS_DIR}/agent-github-update-labels" --repo-slug "$REPO_SLUG" --number "$issue_id" --remove agent-ready --remove agent-blocked --add agent-running --add agent-e2e-heavy >/dev/null || true
449
449
  else
450
- bash "${FLOW_TOOLS_DIR}/agent-github-update-labels" --repo-slug "$REPO_SLUG" --number "$issue_id" --remove agent-ready --remove agent-blocked --add agent-running >/dev/null
450
+ bash "${FLOW_TOOLS_DIR}/agent-github-update-labels" --repo-slug "$REPO_SLUG" --number "$issue_id" --remove agent-ready --remove agent-blocked --add agent-running >/dev/null || true
451
451
  fi
452
452
  }
453
453
 
@@ -22,6 +22,7 @@ Commands:
22
22
  help Show this help
23
23
  version Print package version
24
24
  setup Guided setup flow for one repo profile
25
+ onboard Alias for setup
25
26
  sync Publish the packaged runtime into ~/.agent-runtime
26
27
  install Alias for sync
27
28
  init Scaffold and adopt a project profile
@@ -503,6 +504,20 @@ function createPromptInterface() {
503
504
  });
504
505
  }
505
506
 
507
+ function printWizardBanner() {
508
+ console.log("============================================================");
509
+ console.log(" Agent Control Plane — Setup Wizard");
510
+ console.log("============================================================");
511
+ console.log("");
512
+ console.log("This wizard will guide you through setting up one repo profile.");
513
+ console.log("Press Enter at any prompt to accept the value shown in [brackets].");
514
+ console.log("");
515
+ }
516
+
517
+ function printWizardStep(step, total, title) {
518
+ console.log(`\n[${step}/${total}] ${title}`);
519
+ }
520
+
506
521
  function question(rl, prompt) {
507
522
  return new Promise((resolve) => rl.question(prompt, resolve));
508
523
  }
@@ -1341,7 +1356,9 @@ function printSetupDryRunPlan(context, config, plan) {
1341
1356
  }
1342
1357
  console.log(`- GitHub auth step: ${plan.githubAuthAction.status}${plan.githubAuthAction.reason ? ` (${plan.githubAuthAction.reason})` : ""}`);
1343
1358
  console.log(`- runtime start: ${plan.runtimeStartAction.status}${plan.runtimeStartAction.reason ? ` (${plan.runtimeStartAction.reason})` : ""}`);
1344
- console.log(`- launchd install: ${plan.launchdAction.status}${plan.launchdAction.reason ? ` (${plan.launchdAction.reason})` : ""}`);
1359
+ if (process.platform === "darwin") {
1360
+ console.log(`- launchd install: ${plan.launchdAction.status}${plan.launchdAction.reason ? ` (${plan.launchdAction.reason})` : ""}`);
1361
+ }
1345
1362
  }
1346
1363
 
1347
1364
  function buildSetupResultPayload(params) {
@@ -1457,6 +1474,23 @@ async function maybeRunFinalSetupFixups(options, scopedContext, config, currentS
1457
1474
  console.log(`- ${issue}`);
1458
1475
  }
1459
1476
 
1477
+ // Always show actionable hints so operators know what to fix,
1478
+ // even when running non-interactively (--yes / --json / CI).
1479
+ if (!currentState.prereq.coreToolsOk) {
1480
+ const missing = currentState.prereq.missingRequired.join(", ");
1481
+ console.log(` Fix: install missing core tools (${missing})`);
1482
+ }
1483
+ if (!currentState.prereq.workerAvailable) {
1484
+ const worker = currentState.prereq.workerCommand;
1485
+ if (worker === "codex") console.log(" Fix: npm install -g @openai/codex && codex login");
1486
+ else if (worker === "openclaw") console.log(" Fix: npm install -g openclaw && openclaw setup");
1487
+ else if (worker === "claude") console.log(" Fix: npm install -g @anthropic-ai/claude-code && claude auth login");
1488
+ else console.log(` Fix: install ${worker} and add it to PATH`);
1489
+ }
1490
+ if (!currentState.prereq.ghAuthOk) {
1491
+ console.log(" Fix: run gh auth login");
1492
+ }
1493
+
1460
1494
  if (!options.interactive) {
1461
1495
  return {
1462
1496
  status: "skipped",
@@ -1597,19 +1631,20 @@ async function collectSetupConfig(options, context) {
1597
1631
  throw new Error("setup could not detect --repo-slug automatically; pass --repo-slug <owner/repo> or run interactively inside a git checkout with origin set");
1598
1632
  }
1599
1633
  } else {
1634
+ printWizardBanner();
1600
1635
  const rl = createPromptInterface();
1601
1636
  try {
1602
- console.log("ACP setup will guide one repo profile from install to operator-ready defaults.");
1603
- console.log("Press Enter to accept the suggested value shown in brackets.\n");
1637
+ printWizardStep(1, 4, "Project details");
1604
1638
 
1605
1639
  repoRoot = path.resolve(await promptText(rl, "Local repo root", detectedRepoRoot));
1606
1640
  repoSlug = await promptText(rl, "GitHub repo slug", repoSlug || "");
1607
1641
  profileId = sanitizeProfileId(await promptText(rl, "Profile id", profileId));
1608
- codingWorker = await promptText(rl, "Coding worker (codex|claude|openclaw)", codingWorker);
1609
1642
 
1610
- if (!["codex", "claude", "openclaw"].includes(codingWorker)) {
1611
- throw new Error(`unsupported coding worker: ${codingWorker}`);
1643
+ let workerInput = codingWorker;
1644
+ while (!["codex", "claude", "openclaw"].includes(workerInput)) {
1645
+ workerInput = await promptText(rl, "Coding worker (codex / claude / openclaw)", codingWorker || "openclaw");
1612
1646
  }
1647
+ codingWorker = workerInput;
1613
1648
  } finally {
1614
1649
  rl.close();
1615
1650
  }
@@ -1630,6 +1665,9 @@ async function collectSetupConfig(options, context) {
1630
1665
  prereq
1631
1666
  };
1632
1667
 
1668
+ if (options.interactive) {
1669
+ printWizardStep(2, 4, "Review plan");
1670
+ }
1633
1671
  renderSetupSummary(config);
1634
1672
 
1635
1673
  if (options.interactive) {
@@ -1641,7 +1679,7 @@ async function collectSetupConfig(options, context) {
1641
1679
  }
1642
1680
  const shouldContinue = await promptYesNo(rl, "Continue with these values", true);
1643
1681
  if (!shouldContinue) {
1644
- throw new Error("setup cancelled");
1682
+ return null;
1645
1683
  }
1646
1684
  if (options.startRuntime === null) {
1647
1685
  options.startRuntime = await promptYesNo(rl, "Start the runtime after setup", true);
@@ -1733,6 +1771,10 @@ async function runSetupFlow(forwardedArgs) {
1733
1771
 
1734
1772
  try {
1735
1773
  const config = await collectSetupConfig(options, context);
1774
+ if (config === null) {
1775
+ console.log("\nSetup cancelled. Run again when you are ready.");
1776
+ return 0;
1777
+ }
1736
1778
  if (options.dryRun) {
1737
1779
  const plan = buildSetupDryRunPlan(options, context, config);
1738
1780
  printSetupDryRunPlan(context, config, plan);
@@ -1869,6 +1911,10 @@ async function runSetupFlow(forwardedArgs) {
1869
1911
  return 1;
1870
1912
  }
1871
1913
 
1914
+ if (options.interactive) {
1915
+ printWizardStep(3, 4, "Prerequisites");
1916
+ }
1917
+
1872
1918
  let prereq = config.prereq;
1873
1919
  let dependencyInstall = await maybeInstallMissingDependencies(options, prereq);
1874
1920
  if (dependencyInstall.status === "failed") {
@@ -1886,6 +1932,35 @@ async function runSetupFlow(forwardedArgs) {
1886
1932
  let workerSetupStep = await maybeShowWorkerSetupGuide(options, prereq);
1887
1933
  prereq = collectPrereqStatus(config.codingWorker);
1888
1934
 
1935
+ // Check OpenRouter API key when openclaw is selected
1936
+ if (config.codingWorker === "openclaw" && !process.env.OPENROUTER_API_KEY) {
1937
+ console.log("\nOpenClaw requires an OpenRouter API key (OPENROUTER_API_KEY).");
1938
+ console.log("- Get a free key at: https://openrouter.ai/keys");
1939
+ if (options.interactive) {
1940
+ const rl = createPromptInterface();
1941
+ let apiKey = "";
1942
+ try {
1943
+ apiKey = (await promptText(rl, "OpenRouter API key (Enter to skip)", "")).trim();
1944
+ } finally {
1945
+ rl.close();
1946
+ }
1947
+ if (apiKey) {
1948
+ process.env.OPENROUTER_API_KEY = apiKey;
1949
+ console.log("API key set for this session.");
1950
+ console.log("To persist it, add the following to your shell profile (~/.zshrc or ~/.bashrc):");
1951
+ console.log(` export OPENROUTER_API_KEY=${JSON.stringify(apiKey)}`);
1952
+ } else {
1953
+ console.log("Skipped. Set OPENROUTER_API_KEY before starting the runtime.");
1954
+ }
1955
+ } else {
1956
+ console.log("Set OPENROUTER_API_KEY in your environment before starting the runtime.");
1957
+ }
1958
+ }
1959
+
1960
+ if (options.interactive) {
1961
+ printWizardStep(4, 4, "Install");
1962
+ }
1963
+
1889
1964
  const scopedContext = buildScopedContext(context, config.profileId);
1890
1965
  const anchorSync = buildAnchorSyncDecision(options, config.paths.sourceRepoRoot);
1891
1966
 
@@ -2047,12 +2122,45 @@ async function runSetupFlow(forwardedArgs) {
2047
2122
 
2048
2123
  if (options.json) {
2049
2124
  emitSetupJsonPayload(runPayload);
2125
+ } else if (options.interactive) {
2126
+ // Human-friendly summary for interactive terminal runs
2127
+ console.log("\n============================================================");
2128
+ console.log(" Setup complete!");
2129
+ console.log("============================================================");
2130
+ console.log(` Profile : ${config.profileId}`);
2131
+ console.log(` Repo : ${config.repoSlug}`);
2132
+ console.log(` Worker : ${config.codingWorker}`);
2133
+ console.log(` Runtime : ${context.runtimeHome}`);
2134
+
2135
+ const pendingItems = [];
2136
+ if (!prereq.ghAuthOk) pendingItems.push("GitHub CLI not authenticated — run: gh auth login");
2137
+ if (!prereq.workerAvailable) pendingItems.push(`${config.codingWorker} not found on PATH — install it before starting`);
2138
+ if (config.codingWorker === "openclaw" && !process.env.OPENROUTER_API_KEY) {
2139
+ pendingItems.push("OPENROUTER_API_KEY not set — required for openclaw workers");
2140
+ }
2141
+ if (anchorSync.status !== "ok") pendingItems.push(`Anchor repo sync deferred (${anchorSync.reason}) — fix git access and re-run setup`);
2142
+ if ((doctorKv.DOCTOR_STATUS || "") !== "ok") pendingItems.push(`Doctor check flagged issues — run: npx agent-control-plane@latest doctor`);
2143
+
2144
+ if (pendingItems.length > 0) {
2145
+ console.log("\n Pending items before starting:");
2146
+ for (const item of pendingItems) {
2147
+ console.log(` - ${item}`);
2148
+ }
2149
+ }
2150
+
2151
+ console.log("\n Next commands:");
2152
+ if (runtimeStartStatus !== "ok") {
2153
+ console.log(` npx agent-control-plane@latest runtime start --profile-id ${config.profileId}`);
2154
+ }
2155
+ console.log(` npx agent-control-plane@latest runtime status --profile-id ${config.profileId}`);
2156
+ console.log(` npx agent-control-plane@latest doctor`);
2157
+ console.log("");
2050
2158
  } else {
2159
+ // Machine-readable KV output for non-interactive / scripted runs
2051
2160
  console.log("\nSetup complete.");
2052
2161
  console.log(`- profile: ${config.profileId}`);
2053
2162
  console.log(`- repo: ${config.repoSlug}`);
2054
2163
  console.log(`- runtime home: ${context.runtimeHome}`);
2055
- console.log(`- next status command: npx agent-control-plane@latest runtime status --profile-id ${config.profileId}`);
2056
2164
 
2057
2165
  console.log(`SETUP_STATUS=ok`);
2058
2166
  console.log(`PROFILE_ID=${config.profileId}`);
@@ -2170,6 +2278,7 @@ async function main() {
2170
2278
  console.log(packageJson.version);
2171
2279
  return 0;
2172
2280
  case "setup":
2281
+ case "onboard":
2173
2282
  return runSetupFlow(forwardedArgs);
2174
2283
  case "sync":
2175
2284
  case "install":
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-control-plane",
3
- "version": "0.1.14",
3
+ "version": "0.1.16",
4
4
  "description": "Help a repo keep GitHub-driven coding agents running reliably without constant human babysitting",
5
5
  "homepage": "https://github.com/ducminhnguyen0319/agent-control-plane",
6
6
  "bugs": {
@@ -359,6 +359,16 @@ infer_issue_runtime_failure_from_log() {
359
359
  return 0
360
360
  fi
361
361
 
362
+ if grep -Eiq 'stale-run no-agent-output-before-stall-threshold|no-agent-output-before-stall-threshold' "${log_file}" 2>/dev/null; then
363
+ printf 'no-agent-output-before-stall-threshold\n'
364
+ return 0
365
+ fi
366
+
367
+ if grep -Eiq 'stale-run no-agent-progress-before-stall-threshold|no-agent-progress-before-stall-threshold' "${log_file}" 2>/dev/null; then
368
+ printf 'no-agent-progress-before-stall-threshold\n'
369
+ return 0
370
+ fi
371
+
362
372
  if grep -Eiq 'Ignoring invalid cwd .* No such file or directory|/tmp is absolute|Custom tool call output is missing' "${log_file}" 2>/dev/null; then
363
373
  printf 'worker-environment-blocked\n'
364
374
  return 0
@@ -987,6 +997,17 @@ if (explicitFailureReason) {
987
997
  reason = 'scope-guard-blocked';
988
998
  } else if (/^# Blocker: Provider quota is currently exhausted$/im.test(body)) {
989
999
  reason = 'provider-quota-limit';
1000
+ } else if (
1001
+ /blocked on external network access/i.test(body) &&
1002
+ (/What I ran:/i.test(body) ||
1003
+ /`pnpm audit`/i.test(body) ||
1004
+ /`gh issue view`/i.test(body)) &&
1005
+ (/failed with `ENOTFOUND`/i.test(body) ||
1006
+ /Exact failure:/i.test(body) ||
1007
+ /registry\.npmjs\.org/i.test(body) ||
1008
+ /api\.github\.com/i.test(body))
1009
+ ) {
1010
+ reason = 'worker-preflight-network-blocked';
990
1011
  } else if (
991
1012
  /blocked on external network access/i.test(body) ||
992
1013
  /could not perform a safe offline bump/i.test(body) ||
@@ -553,6 +553,31 @@ classify_pr_blocked_runtime_reason() {
553
553
  return 0
554
554
  fi
555
555
 
556
+ if [[ -f "$session_log_file" ]] && grep -Eiq 'no-codex-output-before-stall-threshold|no-codex-progress-before-stall-threshold' "$session_log_file" 2>/dev/null; then
557
+ printf 'codex-stalled\n'
558
+ return 0
559
+ fi
560
+
561
+ if [[ -f "$session_log_file" ]] && grep -Eiq 'no-agent-output-before-stall-threshold|no-agent-progress-before-stall-threshold' "$session_log_file" 2>/dev/null; then
562
+ printf 'agent-stalled\n'
563
+ return 0
564
+ fi
565
+
566
+ if [[ -f "$session_log_file" ]] && grep -Eiq 'provider-quota-limit|quota.*exhausted|rate.limit.*exceeded' "$session_log_file" 2>/dev/null; then
567
+ printf 'provider-quota-limit\n'
568
+ return 0
569
+ fi
570
+
571
+ if [[ -f "$pr_comment_file" ]] && grep -Eiq 'no-codex-output-before-stall-threshold|no-codex-progress-before-stall-threshold' "$pr_comment_file" 2>/dev/null; then
572
+ printf 'codex-stalled\n'
573
+ return 0
574
+ fi
575
+
576
+ if [[ -f "$pr_comment_file" ]] && grep -Eiq 'no-agent-output-before-stall-threshold|no-agent-progress-before-stall-threshold' "$pr_comment_file" 2>/dev/null; then
577
+ printf 'agent-stalled\n'
578
+ return 0
579
+ fi
580
+
556
581
  return 1
557
582
  }
558
583
 
@@ -618,6 +618,22 @@ classify_failure_reason() {
618
618
  fi
619
619
  }
620
620
 
621
+ failure_chunk_indicates_startup_stall() {
622
+ local chunk="${1:-}"
623
+ local recent_chunk
624
+
625
+ recent_chunk="$(tail -n 120 <<<"$chunk")"
626
+ grep -q '"type":"thread.started"' <<<"$recent_chunk" || return 1
627
+ grep -q '"type":"turn.started"' <<<"$recent_chunk" || return 1
628
+ if grep -Eq '"type":"item\.(started|completed)"' <<<"$recent_chunk"; then
629
+ return 1
630
+ fi
631
+ if grep -q '"type":"turn.completed"' <<<"$recent_chunk"; then
632
+ return 1
633
+ fi
634
+ return 0
635
+ }
636
+
621
637
  resume_prompt() {
622
638
  cat <<EOF
623
639
  The previous Codex exec turn in this same thread was interrupted because the host refreshed Codex authentication after a quota or auth failure.
@@ -729,7 +745,7 @@ run_resume_exec() {
729
745
  }
730
746
 
731
747
  attempt_run() {
732
- local reason auth_before_switch quota_label_before_switch quota_switch_signature_before_switch quota_switch_result shell_flags_before_quota_switch
748
+ local reason auth_before_switch quota_label_before_switch quota_switch_signature_before_switch quota_switch_result shell_flags_before_quota_switch failure_chunk startup_stall
733
749
 
734
750
  attempt=$((attempt + 1))
735
751
  last_quota_switch_status=""
@@ -750,8 +766,15 @@ attempt_run() {
750
766
  return 0
751
767
  fi
752
768
 
753
- reason="$(classify_failure_reason "$(new_output_since "$last_attempt_start_size")")"
769
+ failure_chunk="$(new_output_since "$last_attempt_start_size")"
770
+ reason="$(classify_failure_reason "$failure_chunk")"
754
771
  last_failure_reason="${reason:-worker-exit-failed}"
772
+ startup_stall="no"
773
+ if [[ "$last_failure_reason" == "no-codex-output-before-stall-threshold" || "$last_failure_reason" == "no-codex-progress-before-stall-threshold" ]]; then
774
+ if failure_chunk_indicates_startup_stall "$failure_chunk"; then
775
+ startup_stall="yes"
776
+ fi
777
+ fi
755
778
 
756
779
  case "$last_failure_reason" in
757
780
  usage-limit|auth-failure|auth-401|account-banned)
@@ -796,6 +819,38 @@ attempt_run() {
796
819
  resume_count=$((resume_count + 1))
797
820
  return 2
798
821
  ;;
822
+ no-codex-output-before-stall-threshold|no-codex-progress-before-stall-threshold)
823
+ if [[ "$startup_stall" == "yes" && $quota_autoswitch_attempt_count -lt $max_quota_autoswitch_attempts ]]; then
824
+ auth_before_switch="$(auth_fingerprint)"
825
+ quota_label_before_switch="$last_attempt_start_quota_label"
826
+ quota_switch_signature_before_switch="$(quota_switch_signature)"
827
+ last_auth_fingerprint="$auth_before_switch"
828
+ write_state "switching-account" "$last_failure_reason"
829
+ log_runner "startup-stall detected before first Codex tool activity; attempting Codex account rotation"
830
+ shell_flags_before_quota_switch="$-"
831
+ set +e
832
+ run_quota_autoswitch
833
+ quota_switch_result=$?
834
+ case "$shell_flags_before_quota_switch" in
835
+ *e*) set -e ;;
836
+ *) set +e ;;
837
+ esac
838
+ if [[ "$quota_switch_result" == "0" ]]; then
839
+ thread_id=""
840
+ auth_wait_started_at=""
841
+ write_state "running" ""
842
+ return 2
843
+ fi
844
+ if [[ "$quota_switch_result" == "10" ]]; then
845
+ log_runner "startup-stall rotation deferred until ${last_quota_next_retry_at:-unknown}"
846
+ last_failure_reason="quota-switch-deferred"
847
+ write_state "failed" "$last_failure_reason"
848
+ return 1
849
+ fi
850
+ fi
851
+ write_state "failed" "$last_failure_reason"
852
+ return 1
853
+ ;;
799
854
  *)
800
855
  write_state "failed" "$last_failure_reason"
801
856
  return 1
@@ -115,6 +115,7 @@ result_file="${artifact_dir}/result.env"
115
115
  runner_state_file="${artifact_dir}/runner.env"
116
116
  sandbox_artifact_dir="${worktree%/}/${sandbox_subdir}"
117
117
  sandbox_run_dir="${worktree%/}/${sandbox_subdir}/${session}"
118
+ retained_repo_root="${ACP_RETAINED_REPO_ROOT:-${F_LOSNING_RETAINED_REPO_ROOT:-}}"
118
119
  started_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
119
120
  openclaw_bin="$(command -v openclaw)"
120
121
  default_openclaw_agent_id="$(
@@ -154,6 +155,7 @@ printf -v runner_state_q '%q' "$runner_state_file"
154
155
  printf -v branch_q '%q' "$branch_name"
155
156
  printf -v sandbox_artifact_dir_q '%q' "$sandbox_artifact_dir"
156
157
  printf -v sandbox_run_dir_q '%q' "$sandbox_run_dir"
158
+ printf -v retained_repo_root_q '%q' "$retained_repo_root"
157
159
  printf -v adapter_id_q '%q' "$adapter_id"
158
160
  printf -v started_at_q '%q' "$started_at"
159
161
  printf -v openclaw_bin_q '%q' "$openclaw_bin"
@@ -230,18 +232,21 @@ export AGENT_PROJECT_RUN_DIR=${sandbox_run_dir_q}
230
232
  export AGENT_PROJECT_HOST_RUN_DIR=${artifact_dir_q}
231
233
  export AGENT_PROJECT_RESULT_FILE=${sandbox_run_dir_q}/result.env
232
234
  export AGENT_PROJECT_OPENCLAW_BIN=${openclaw_bin_q}
235
+ export AGENT_PROJECT_RETAINED_REPO_ROOT=${retained_repo_root_q}
233
236
  export ACP_SESSION=${session_q}
234
237
  export ACP_RUN_DIR=${sandbox_run_dir_q}
235
238
  export ACP_HOST_RUN_DIR=${artifact_dir_q}
236
239
  export ACP_RESULT_FILE=${sandbox_run_dir_q}/result.env
237
240
  export ACP_OPENCLAW_BIN=${openclaw_bin_q}
238
241
  export ACP_OPENCLAW_SESSION_ID=${openclaw_session_id_q}
242
+ export ACP_RETAINED_REPO_ROOT=${retained_repo_root_q}
239
243
  export F_LOSNING_SESSION=${session_q}
240
244
  export F_LOSNING_RUN_DIR=${sandbox_run_dir_q}
241
245
  export F_LOSNING_HOST_RUN_DIR=${artifact_dir_q}
242
246
  export F_LOSNING_RESULT_FILE=${sandbox_run_dir_q}/result.env
243
247
  export F_LOSNING_OPENCLAW_BIN=${openclaw_bin_q}
244
248
  export F_LOSNING_OPENCLAW_SESSION_ID=${openclaw_session_id_q}
249
+ export F_LOSNING_RETAINED_REPO_ROOT=${retained_repo_root_q}
245
250
  export OPENCLAW_STATE_DIR=${openclaw_state_dir_q}
246
251
  export OPENCLAW_CONFIG_PATH=${openclaw_config_path_q}
247
252
  EOF
@@ -294,6 +299,7 @@ runner_state_file=${runner_state_q}
294
299
  output_file=${output_q}
295
300
  sandbox_artifact_dir=${sandbox_artifact_dir_q}
296
301
  sandbox_run_dir=${sandbox_run_dir_q}
302
+ retained_repo_root=${retained_repo_root_q}
297
303
  artifact_dir=${artifact_dir_q}
298
304
  run_dir=${artifact_dir_q}
299
305
  task_kind=${task_kind_q}
@@ -554,6 +560,45 @@ recover_literal_runtime_artifacts() {
554
560
  return 0
555
561
  }
556
562
 
563
+ recover_retained_repo_artifact_leaks() {
564
+ local retained_worktree_root=""
565
+ local leaked_run_dir=""
566
+ local worktree_name=""
567
+ local session_name=""
568
+ local artifact_name=""
569
+ local recovered="no"
570
+
571
+ [[ -n "\${retained_repo_root}" ]] || return 0
572
+ worktree_name="\$(basename "\${worktree}")"
573
+ session_name="\${AGENT_PROJECT_SESSION:-}"
574
+ [[ -n "\${session_name}" ]] || return 0
575
+ retained_worktree_root="\${retained_repo_root%/}/worktrees"
576
+ leaked_run_dir="\${retained_worktree_root}/\${worktree_name}/.openclaw-artifacts/\${session_name}"
577
+
578
+ if [[ ! -d "\${leaked_run_dir}" || "\${leaked_run_dir}" == "\${sandbox_run_dir}" ]]; then
579
+ return 0
580
+ fi
581
+
582
+ for artifact_name in result.env verification.jsonl issue-comment.md pr-comment.md; do
583
+ if [[ -f "\${leaked_run_dir}/\${artifact_name}" ]]; then
584
+ cp "\${leaked_run_dir}/\${artifact_name}" "\${sandbox_run_dir}/\${artifact_name}" 2>/dev/null || true
585
+ cp "\${leaked_run_dir}/\${artifact_name}" "\${artifact_dir}/\${artifact_name}" 2>/dev/null || true
586
+ recovered="yes"
587
+ fi
588
+ done
589
+
590
+ rm -rf "\${leaked_run_dir}" 2>/dev/null || true
591
+ rmdir "\${retained_worktree_root}/\${worktree_name}/.openclaw-artifacts" 2>/dev/null || true
592
+ rmdir "\${retained_worktree_root}/\${worktree_name}" 2>/dev/null || true
593
+ rmdir "\${retained_worktree_root}" 2>/dev/null || true
594
+
595
+ if [[ "\${recovered}" == "yes" ]]; then
596
+ printf '[openclaw] recovered retained-repo artifact leak: %s\n' "\${leaked_run_dir}" >>"\${output_file}" 2>/dev/null || true
597
+ fi
598
+
599
+ return 0
600
+ }
601
+
557
602
  reset_sandbox_run_dir() {
558
603
  mkdir -p "\${sandbox_run_dir}"
559
604
  find "\${sandbox_run_dir}" -mindepth 1 -maxdepth 1 -exec rm -rf {} + 2>/dev/null || true
@@ -1217,6 +1262,7 @@ while true; do
1217
1262
  break
1218
1263
  done
1219
1264
  recover_literal_runtime_artifacts
1265
+ recover_retained_repo_artifact_leaks
1220
1266
  infer_result_from_output
1221
1267
  synthesize_comment_artifact_from_output
1222
1268
  if [[ "\${status}" -eq 0 ]]; then
@@ -44,6 +44,36 @@ runner_state=""
44
44
  thread_id=""
45
45
  last_exit_code=""
46
46
 
47
+ failure_reason_from_output() {
48
+ [[ -f "$output_file" ]] || return 1
49
+
50
+ if rg -qi "You've hit your usage limit|You have reached your Codex usage limits|visit https://chatgpt.com/codex/settings/usage|Upgrade to Pro|rate limit exceeded|quota exceeded|usage cap (reached|exceeded)|usage quota (reached|exceeded)" "$output_file"; then
51
+ printf 'usage-limit\n'
52
+ return 0
53
+ fi
54
+
55
+ if rg -qi 'stale-run no-codex-output-before-stall-threshold|no-codex-output-before-stall-threshold' "$output_file"; then
56
+ printf 'no-codex-output-before-stall-threshold\n'
57
+ return 0
58
+ fi
59
+
60
+ if rg -qi 'stale-run no-codex-progress-before-stall-threshold|no-codex-progress-before-stall-threshold' "$output_file"; then
61
+ printf 'no-codex-progress-before-stall-threshold\n'
62
+ return 0
63
+ fi
64
+
65
+ # Recover Codex startup stalls when the wrapper was archived before it could
66
+ # flush a terminal runner.env state. This is intentionally narrow: the log
67
+ # must show a turn started, but no tool activity or turn completion.
68
+ if rg -q '"type":"turn.started"' "$output_file" \
69
+ && ! rg -q '"type":"item.started"|"type":"item.completed"|"type":"turn.completed"' "$output_file"; then
70
+ printf 'no-codex-progress-before-stall-threshold\n'
71
+ return 0
72
+ fi
73
+
74
+ return 1
75
+ }
76
+
47
77
  if tmux has-session -t "$session" 2>/dev/null; then
48
78
  status="RUNNING"
49
79
  fi
@@ -87,6 +117,13 @@ if [[ "$status" == "UNKNOWN" && -f "$output_file" ]]; then
87
117
  fi
88
118
  fi
89
119
 
120
+ if [[ "$status" == "UNKNOWN" && -z "$failure_reason" ]]; then
121
+ failure_reason="$(failure_reason_from_output || true)"
122
+ if [[ -n "$failure_reason" ]]; then
123
+ status="FAILED"
124
+ fi
125
+ fi
126
+
90
127
  if [[ "$status" == "UNKNOWN" && -n "$runner_state" ]]; then
91
128
  case "$runner_state" in
92
129
  running|waiting-auth-refresh|switching-account)
@@ -2039,6 +2039,7 @@ flow_export_execution_env() {
2039
2039
  local openclaw_model=""
2040
2040
  local openclaw_thinking=""
2041
2041
  local openclaw_timeout=""
2042
+ local openclaw_stall=""
2042
2043
 
2043
2044
  repo_id="$(flow_resolve_repo_id "${config_file}")"
2044
2045
  provider_quota_cooldowns="$(flow_resolve_provider_quota_cooldowns "${config_file}")"
@@ -2071,6 +2072,7 @@ flow_export_execution_env() {
2071
2072
  openclaw_model="$(flow_kv_get "${provider_pool_selection}" "OPENCLAW_MODEL")"
2072
2073
  openclaw_thinking="$(flow_kv_get "${provider_pool_selection}" "OPENCLAW_THINKING")"
2073
2074
  openclaw_timeout="$(flow_kv_get "${provider_pool_selection}" "OPENCLAW_TIMEOUT_SECONDS")"
2075
+ openclaw_stall="$(flow_kv_get "${provider_pool_selection}" "OPENCLAW_STALL_SECONDS")"
2074
2076
  else
2075
2077
  if [[ -n "${explicit_coding_worker}" ]]; then
2076
2078
  active_provider_selection_reason="env-override"
@@ -2087,6 +2089,7 @@ flow_export_execution_env() {
2087
2089
  openclaw_model="$(flow_env_or_config "${config_file}" "ACP_OPENCLAW_MODEL F_LOSNING_OPENCLAW_MODEL" "execution.openclaw.model" "")"
2088
2090
  openclaw_thinking="$(flow_env_or_config "${config_file}" "ACP_OPENCLAW_THINKING F_LOSNING_OPENCLAW_THINKING" "execution.openclaw.thinking" "")"
2089
2091
  openclaw_timeout="$(flow_env_or_config "${config_file}" "ACP_OPENCLAW_TIMEOUT_SECONDS F_LOSNING_OPENCLAW_TIMEOUT_SECONDS" "execution.openclaw.timeout_seconds" "")"
2092
+ openclaw_stall="$(flow_env_or_config "${config_file}" "ACP_OPENCLAW_STALL_SECONDS F_LOSNING_OPENCLAW_STALL_SECONDS" "execution.openclaw.stall_seconds" "")"
2090
2093
  fi
2091
2094
 
2092
2095
  if [[ -n "${coding_worker}" ]]; then
@@ -2167,6 +2170,10 @@ flow_export_execution_env() {
2167
2170
  export F_LOSNING_OPENCLAW_TIMEOUT_SECONDS="${openclaw_timeout}"
2168
2171
  export ACP_OPENCLAW_TIMEOUT_SECONDS="${openclaw_timeout}"
2169
2172
  fi
2173
+ if [[ -n "${openclaw_stall}" ]]; then
2174
+ export F_LOSNING_OPENCLAW_STALL_SECONDS="${openclaw_stall}"
2175
+ export ACP_OPENCLAW_STALL_SECONDS="${openclaw_stall}"
2176
+ fi
2170
2177
 
2171
2178
  flow_export_github_cli_auth_env "$(flow_resolve_repo_slug "${config_file}")"
2172
2179
  flow_export_project_env_aliases
@@ -52,6 +52,7 @@ flow_export_compat_env_aliases() {
52
52
  flow_export_env_alias_if_unset F_LOSNING_OPENCLAW_MODEL ACP_OPENCLAW_MODEL
53
53
  flow_export_env_alias_if_unset F_LOSNING_OPENCLAW_THINKING ACP_OPENCLAW_THINKING
54
54
  flow_export_env_alias_if_unset F_LOSNING_OPENCLAW_TIMEOUT_SECONDS ACP_OPENCLAW_TIMEOUT_SECONDS
55
+ flow_export_env_alias_if_unset F_LOSNING_OPENCLAW_STALL_SECONDS ACP_OPENCLAW_STALL_SECONDS
55
56
  flow_export_env_alias_if_unset F_LOSNING_ALLOW_INFRA_CI_BYPASS ACP_ALLOW_INFRA_CI_BYPASS
56
57
  flow_export_env_alias_if_unset F_LOSNING_LOCAL_FIRST_PR_POLICY ACP_LOCAL_FIRST_PR_POLICY
57
58
  flow_export_env_alias_if_unset F_LOSNING_PR_RISK_CACHE_TTL_SECONDS ACP_PR_RISK_CACHE_TTL_SECONDS
@@ -100,6 +101,7 @@ flow_export_canonical_env_aliases() {
100
101
  flow_export_env_alias_if_unset ACP_OPENCLAW_MODEL F_LOSNING_OPENCLAW_MODEL
101
102
  flow_export_env_alias_if_unset ACP_OPENCLAW_THINKING F_LOSNING_OPENCLAW_THINKING
102
103
  flow_export_env_alias_if_unset ACP_OPENCLAW_TIMEOUT_SECONDS F_LOSNING_OPENCLAW_TIMEOUT_SECONDS
104
+ flow_export_env_alias_if_unset ACP_OPENCLAW_STALL_SECONDS F_LOSNING_OPENCLAW_STALL_SECONDS
103
105
  flow_export_env_alias_if_unset ACP_ALLOW_INFRA_CI_BYPASS F_LOSNING_ALLOW_INFRA_CI_BYPASS
104
106
  flow_export_env_alias_if_unset ACP_LOCAL_FIRST_PR_POLICY F_LOSNING_LOCAL_FIRST_PR_POLICY
105
107
  flow_export_env_alias_if_unset ACP_PR_RISK_CACHE_TTL_SECONDS F_LOSNING_PR_RISK_CACHE_TTL_SECONDS
@@ -361,7 +361,7 @@ print_status() {
361
361
  if [[ -n "${heartbeat}" || -n "${shared_loop}" || -n "${supervisor}" || "${controller_count}" != "0" || "${active_session_count}" != "0" ]]; then
362
362
  runtime_status="running"
363
363
  fi
364
- if [[ -z "${heartbeat}" && -z "${supervisor}" && ( -n "${shared_loop}" || "${controller_count}" != "0" || "${active_session_count}" != "0" ) ]]; then
364
+ if [[ -z "${heartbeat}" && -z "${supervisor}" && "${active_session_count}" == "0" && ( -n "${shared_loop}" || "${controller_count}" != "0" ) ]]; then
365
365
  runtime_status="partial"
366
366
  fi
367
367
 
@@ -54,6 +54,9 @@ rollback_labels_on_failure() {
54
54
  if [[ "${label_rollback_armed}" != "yes" || "${launch_success}" == "yes" ]]; then
55
55
  return 0
56
56
  fi
57
+ if [[ -d "${RUN_DIR}" && ! -f "${RUN_DIR}/run.env" && ! -f "${RUN_DIR}/runner.env" && ! -f "${RUN_DIR}/result.env" ]]; then
58
+ rm -rf "${RUN_DIR}" >/dev/null 2>&1 || true
59
+ fi
57
60
  if [[ -x "${UPDATE_LABELS_BIN}" ]]; then
58
61
  bash "${UPDATE_LABELS_BIN}" --repo-slug "${REPO_SLUG}" --number "${ISSUE_ID}" --remove agent-running >/dev/null 2>&1 || true
59
62
  fi
@@ -33,6 +33,9 @@ rollback_labels_on_failure() {
33
33
  if [[ "${label_rollback_armed}" != "yes" || "${launch_success}" == "yes" ]]; then
34
34
  return 0
35
35
  fi
36
+ if [[ -d "${RUN_DIR}" && ! -f "${RUN_DIR}/run.env" && ! -f "${RUN_DIR}/runner.env" && ! -f "${RUN_DIR}/result.env" ]]; then
37
+ rm -rf "${RUN_DIR}" >/dev/null 2>&1 || true
38
+ fi
36
39
  if [[ -x "${UPDATE_LABELS_BIN}" ]]; then
37
40
  bash "${UPDATE_LABELS_BIN}" --repo-slug "${REPO_SLUG}" --number "${PR_NUMBER}" --remove agent-running >/dev/null 2>&1 || true
38
41
  fi
@@ -32,6 +32,9 @@ rollback_labels_on_failure() {
32
32
  if [[ "${label_rollback_armed}" != "yes" || "${launch_success}" == "yes" ]]; then
33
33
  return 0
34
34
  fi
35
+ if [[ -d "${RUN_DIR}" && ! -f "${RUN_DIR}/run.env" && ! -f "${RUN_DIR}/runner.env" && ! -f "${RUN_DIR}/result.env" ]]; then
36
+ rm -rf "${RUN_DIR}" >/dev/null 2>&1 || true
37
+ fi
35
38
  if [[ -x "${UPDATE_LABELS_BIN}" ]]; then
36
39
  bash "${UPDATE_LABELS_BIN}" --repo-slug "${REPO_SLUG}" --number "${PR_NUMBER}" --remove agent-running >/dev/null 2>&1 || true
37
40
  fi
@@ -204,6 +204,7 @@ controller_refresh_execution_context() {
204
204
  ACP_OPENCLAW_MODEL F_LOSNING_OPENCLAW_MODEL \
205
205
  ACP_OPENCLAW_THINKING F_LOSNING_OPENCLAW_THINKING \
206
206
  ACP_OPENCLAW_TIMEOUT_SECONDS F_LOSNING_OPENCLAW_TIMEOUT_SECONDS \
207
+ ACP_OPENCLAW_STALL_SECONDS F_LOSNING_OPENCLAW_STALL_SECONDS \
207
208
  ACP_ACTIVE_PROVIDER_POOL_NAME F_LOSNING_ACTIVE_PROVIDER_POOL_NAME \
208
209
  ACP_ACTIVE_PROVIDER_BACKEND F_LOSNING_ACTIVE_PROVIDER_BACKEND \
209
210
  ACP_ACTIVE_PROVIDER_MODEL F_LOSNING_ACTIVE_PROVIDER_MODEL \