npm - agent-control-plane - Versions diffs - 0.1.14 → 0.1.16 - Mend

agent-control-plane 0.1.14 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/README.md +250 -355
package/hooks/heartbeat-hooks.sh +2 -2
package/npm/bin/agent-control-plane.js +117 -8
package/package.json +1 -1
package/tools/bin/agent-project-reconcile-issue-session +21 -0
package/tools/bin/agent-project-reconcile-pr-session +25 -0
package/tools/bin/agent-project-run-codex-resilient +57 -2
package/tools/bin/agent-project-run-openclaw-session +46 -0
package/tools/bin/agent-project-worker-status +37 -0
package/tools/bin/flow-config-lib.sh +7 -0
package/tools/bin/flow-shell-lib.sh +2 -0
package/tools/bin/project-runtimectl.sh +1 -1
package/tools/bin/start-issue-worker.sh +3 -0
package/tools/bin/start-pr-fix-worker.sh +3 -0
package/tools/bin/start-pr-review-worker.sh +3 -0
package/tools/bin/start-resident-issue-loop.sh +1 -0
package/tools/dashboard/app.js +62 -0
package/tools/dashboard/dashboard_snapshot.py +53 -4
package/tools/dashboard/index.html +5 -1
package/tools/dashboard/styles.css +97 -20
package/tools/templates/pr-fix-template.md +6 -6
package/tools/templates/pr-merge-repair-template.md +6 -6

package/hooks/heartbeat-hooks.sh CHANGED Viewed

@@ -445,9 +445,9 @@ heartbeat_mark_issue_running() {
   local issue_id="${1:?issue id required}"
   local is_heavy="${2:-no}"
   if [[ "$is_heavy" == "yes" ]]; then
-    bash "${FLOW_TOOLS_DIR}/agent-github-update-labels" --repo-slug "$REPO_SLUG" --number "$issue_id" --remove agent-ready --remove agent-blocked --add agent-running --add agent-e2e-heavy >/dev/null
+    bash "${FLOW_TOOLS_DIR}/agent-github-update-labels" --repo-slug "$REPO_SLUG" --number "$issue_id" --remove agent-ready --remove agent-blocked --add agent-running --add agent-e2e-heavy >/dev/null || true
   else
-    bash "${FLOW_TOOLS_DIR}/agent-github-update-labels" --repo-slug "$REPO_SLUG" --number "$issue_id" --remove agent-ready --remove agent-blocked --add agent-running >/dev/null
+    bash "${FLOW_TOOLS_DIR}/agent-github-update-labels" --repo-slug "$REPO_SLUG" --number "$issue_id" --remove agent-ready --remove agent-blocked --add agent-running >/dev/null || true
   fi
 }

package/npm/bin/agent-control-plane.js CHANGED Viewed

@@ -22,6 +22,7 @@ Commands:
   help                 Show this help
   version              Print package version
   setup                Guided setup flow for one repo profile
+  onboard              Alias for setup
   sync                 Publish the packaged runtime into ~/.agent-runtime
   install              Alias for sync
   init                 Scaffold and adopt a project profile
@@ -503,6 +504,20 @@ function createPromptInterface() {
   });
 }
+function printWizardBanner() {
+  console.log("============================================================");
+  console.log("  Agent Control Plane — Setup Wizard");
+  console.log("============================================================");
+  console.log("");
+  console.log("This wizard will guide you through setting up one repo profile.");
+  console.log("Press Enter at any prompt to accept the value shown in [brackets].");
+  console.log("");
+}
+function printWizardStep(step, total, title) {
+  console.log(`\n[${step}/${total}] ${title}`);
+}
 function question(rl, prompt) {
   return new Promise((resolve) => rl.question(prompt, resolve));
 }
@@ -1341,7 +1356,9 @@ function printSetupDryRunPlan(context, config, plan) {
   }
   console.log(`- GitHub auth step: ${plan.githubAuthAction.status}${plan.githubAuthAction.reason ? ` (${plan.githubAuthAction.reason})` : ""}`);
   console.log(`- runtime start: ${plan.runtimeStartAction.status}${plan.runtimeStartAction.reason ? ` (${plan.runtimeStartAction.reason})` : ""}`);
-  console.log(`- launchd install: ${plan.launchdAction.status}${plan.launchdAction.reason ? ` (${plan.launchdAction.reason})` : ""}`);
+  if (process.platform === "darwin") {
+    console.log(`- launchd install: ${plan.launchdAction.status}${plan.launchdAction.reason ? ` (${plan.launchdAction.reason})` : ""}`);
+  }
 }
 function buildSetupResultPayload(params) {
@@ -1457,6 +1474,23 @@ async function maybeRunFinalSetupFixups(options, scopedContext, config, currentS
     console.log(`- ${issue}`);
   }
+  // Always show actionable hints so operators know what to fix,
+  // even when running non-interactively (--yes / --json / CI).
+  if (!currentState.prereq.coreToolsOk) {
+    const missing = currentState.prereq.missingRequired.join(", ");
+    console.log(`  Fix: install missing core tools (${missing})`);
+  }
+  if (!currentState.prereq.workerAvailable) {
+    const worker = currentState.prereq.workerCommand;
+    if (worker === "codex") console.log("  Fix: npm install -g @openai/codex && codex login");
+    else if (worker === "openclaw") console.log("  Fix: npm install -g openclaw && openclaw setup");
+    else if (worker === "claude") console.log("  Fix: npm install -g @anthropic-ai/claude-code && claude auth login");
+    else console.log(`  Fix: install ${worker} and add it to PATH`);
+  }
+  if (!currentState.prereq.ghAuthOk) {
+    console.log("  Fix: run gh auth login");
+  }
   if (!options.interactive) {
     return {
       status: "skipped",
@@ -1597,19 +1631,20 @@ async function collectSetupConfig(options, context) {
       throw new Error("setup could not detect --repo-slug automatically; pass --repo-slug <owner/repo> or run interactively inside a git checkout with origin set");
     }
   } else {
+    printWizardBanner();
     const rl = createPromptInterface();
     try {
-      console.log("ACP setup will guide one repo profile from install to operator-ready defaults.");
-      console.log("Press Enter to accept the suggested value shown in brackets.\n");
+      printWizardStep(1, 4, "Project details");
       repoRoot = path.resolve(await promptText(rl, "Local repo root", detectedRepoRoot));
       repoSlug = await promptText(rl, "GitHub repo slug", repoSlug || "");
       profileId = sanitizeProfileId(await promptText(rl, "Profile id", profileId));
-      codingWorker = await promptText(rl, "Coding worker (codex|claude|openclaw)", codingWorker);
-      if (!["codex", "claude", "openclaw"].includes(codingWorker)) {
-        throw new Error(`unsupported coding worker: ${codingWorker}`);
+      let workerInput = codingWorker;
+      while (!["codex", "claude", "openclaw"].includes(workerInput)) {
+        workerInput = await promptText(rl, "Coding worker (codex / claude / openclaw)", codingWorker || "openclaw");
       }
+      codingWorker = workerInput;
     } finally {
       rl.close();
     }
@@ -1630,6 +1665,9 @@ async function collectSetupConfig(options, context) {
     prereq
   };
+  if (options.interactive) {
+    printWizardStep(2, 4, "Review plan");
+  }
   renderSetupSummary(config);
   if (options.interactive) {
@@ -1641,7 +1679,7 @@ async function collectSetupConfig(options, context) {
       }
       const shouldContinue = await promptYesNo(rl, "Continue with these values", true);
       if (!shouldContinue) {
-        throw new Error("setup cancelled");
+        return null;
       }
       if (options.startRuntime === null) {
         options.startRuntime = await promptYesNo(rl, "Start the runtime after setup", true);
@@ -1733,6 +1771,10 @@ async function runSetupFlow(forwardedArgs) {
   try {
     const config = await collectSetupConfig(options, context);
+    if (config === null) {
+      console.log("\nSetup cancelled. Run again when you are ready.");
+      return 0;
+    }
     if (options.dryRun) {
       const plan = buildSetupDryRunPlan(options, context, config);
       printSetupDryRunPlan(context, config, plan);
@@ -1869,6 +1911,10 @@ async function runSetupFlow(forwardedArgs) {
       return 1;
     }
+    if (options.interactive) {
+      printWizardStep(3, 4, "Prerequisites");
+    }
     let prereq = config.prereq;
     let dependencyInstall = await maybeInstallMissingDependencies(options, prereq);
     if (dependencyInstall.status === "failed") {
@@ -1886,6 +1932,35 @@ async function runSetupFlow(forwardedArgs) {
     let workerSetupStep = await maybeShowWorkerSetupGuide(options, prereq);
     prereq = collectPrereqStatus(config.codingWorker);
+    // Check OpenRouter API key when openclaw is selected
+    if (config.codingWorker === "openclaw" && !process.env.OPENROUTER_API_KEY) {
+      console.log("\nOpenClaw requires an OpenRouter API key (OPENROUTER_API_KEY).");
+      console.log("- Get a free key at: https://openrouter.ai/keys");
+      if (options.interactive) {
+        const rl = createPromptInterface();
+        let apiKey = "";
+        try {
+          apiKey = (await promptText(rl, "OpenRouter API key (Enter to skip)", "")).trim();
+        } finally {
+          rl.close();
+        }
+        if (apiKey) {
+          process.env.OPENROUTER_API_KEY = apiKey;
+          console.log("API key set for this session.");
+          console.log("To persist it, add the following to your shell profile (~/.zshrc or ~/.bashrc):");
+          console.log(`  export OPENROUTER_API_KEY=${JSON.stringify(apiKey)}`);
+        } else {
+          console.log("Skipped. Set OPENROUTER_API_KEY before starting the runtime.");
+        }
+      } else {
+        console.log("Set OPENROUTER_API_KEY in your environment before starting the runtime.");
+      }
+    }
+    if (options.interactive) {
+      printWizardStep(4, 4, "Install");
+    }
     const scopedContext = buildScopedContext(context, config.profileId);
     const anchorSync = buildAnchorSyncDecision(options, config.paths.sourceRepoRoot);
@@ -2047,12 +2122,45 @@ async function runSetupFlow(forwardedArgs) {
     if (options.json) {
       emitSetupJsonPayload(runPayload);
+    } else if (options.interactive) {
+      // Human-friendly summary for interactive terminal runs
+      console.log("\n============================================================");
+      console.log("  Setup complete!");
+      console.log("============================================================");
+      console.log(`  Profile : ${config.profileId}`);
+      console.log(`  Repo    : ${config.repoSlug}`);
+      console.log(`  Worker  : ${config.codingWorker}`);
+      console.log(`  Runtime : ${context.runtimeHome}`);
+      const pendingItems = [];
+      if (!prereq.ghAuthOk) pendingItems.push("GitHub CLI not authenticated — run: gh auth login");
+      if (!prereq.workerAvailable) pendingItems.push(`${config.codingWorker} not found on PATH — install it before starting`);
+      if (config.codingWorker === "openclaw" && !process.env.OPENROUTER_API_KEY) {
+        pendingItems.push("OPENROUTER_API_KEY not set — required for openclaw workers");
+      }
+      if (anchorSync.status !== "ok") pendingItems.push(`Anchor repo sync deferred (${anchorSync.reason}) — fix git access and re-run setup`);
+      if ((doctorKv.DOCTOR_STATUS || "") !== "ok") pendingItems.push(`Doctor check flagged issues — run: npx agent-control-plane@latest doctor`);
+      if (pendingItems.length > 0) {
+        console.log("\n  Pending items before starting:");
+        for (const item of pendingItems) {
+          console.log(`    - ${item}`);
+        }
+      }
+      console.log("\n  Next commands:");
+      if (runtimeStartStatus !== "ok") {
+        console.log(`    npx agent-control-plane@latest runtime start --profile-id ${config.profileId}`);
+      }
+      console.log(`    npx agent-control-plane@latest runtime status --profile-id ${config.profileId}`);
+      console.log(`    npx agent-control-plane@latest doctor`);
+      console.log("");
     } else {
+      // Machine-readable KV output for non-interactive / scripted runs
       console.log("\nSetup complete.");
       console.log(`- profile: ${config.profileId}`);
       console.log(`- repo: ${config.repoSlug}`);
       console.log(`- runtime home: ${context.runtimeHome}`);
-      console.log(`- next status command: npx agent-control-plane@latest runtime status --profile-id ${config.profileId}`);
       console.log(`SETUP_STATUS=ok`);
       console.log(`PROFILE_ID=${config.profileId}`);
@@ -2170,6 +2278,7 @@ async function main() {
       console.log(packageJson.version);
       return 0;
     case "setup":
+    case "onboard":
       return runSetupFlow(forwardedArgs);
     case "sync":
     case "install":

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agent-control-plane",
-  "version": "0.1.14",
+  "version": "0.1.16",
   "description": "Help a repo keep GitHub-driven coding agents running reliably without constant human babysitting",
   "homepage": "https://github.com/ducminhnguyen0319/agent-control-plane",
   "bugs": {

package/tools/bin/agent-project-reconcile-issue-session CHANGED Viewed

@@ -359,6 +359,16 @@ infer_issue_runtime_failure_from_log() {
     return 0
   fi
+  if grep -Eiq 'stale-run no-agent-output-before-stall-threshold|no-agent-output-before-stall-threshold' "${log_file}" 2>/dev/null; then
+    printf 'no-agent-output-before-stall-threshold\n'
+    return 0
+  fi
+  if grep -Eiq 'stale-run no-agent-progress-before-stall-threshold|no-agent-progress-before-stall-threshold' "${log_file}" 2>/dev/null; then
+    printf 'no-agent-progress-before-stall-threshold\n'
+    return 0
+  fi
   if grep -Eiq 'Ignoring invalid cwd .* No such file or directory|/tmp is absolute|Custom tool call output is missing' "${log_file}" 2>/dev/null; then
     printf 'worker-environment-blocked\n'
     return 0
@@ -987,6 +997,17 @@ if (explicitFailureReason) {
   reason = 'scope-guard-blocked';
 } else if (/^# Blocker: Provider quota is currently exhausted$/im.test(body)) {
   reason = 'provider-quota-limit';
+} else if (
+  /blocked on external network access/i.test(body) &&
+  (/What I ran:/i.test(body) ||
+    /`pnpm audit`/i.test(body) ||
+    /`gh issue view`/i.test(body)) &&
+  (/failed with `ENOTFOUND`/i.test(body) ||
+    /Exact failure:/i.test(body) ||
+    /registry\.npmjs\.org/i.test(body) ||
+    /api\.github\.com/i.test(body))
+) {
+  reason = 'worker-preflight-network-blocked';
 } else if (
   /blocked on external network access/i.test(body) ||
   /could not perform a safe offline bump/i.test(body) ||

package/tools/bin/agent-project-reconcile-pr-session CHANGED Viewed

@@ -553,6 +553,31 @@ classify_pr_blocked_runtime_reason() {
     return 0
   fi
+  if [[ -f "$session_log_file" ]] && grep -Eiq 'no-codex-output-before-stall-threshold|no-codex-progress-before-stall-threshold' "$session_log_file" 2>/dev/null; then
+    printf 'codex-stalled\n'
+    return 0
+  fi
+  if [[ -f "$session_log_file" ]] && grep -Eiq 'no-agent-output-before-stall-threshold|no-agent-progress-before-stall-threshold' "$session_log_file" 2>/dev/null; then
+    printf 'agent-stalled\n'
+    return 0
+  fi
+  if [[ -f "$session_log_file" ]] && grep -Eiq 'provider-quota-limit|quota.*exhausted|rate.limit.*exceeded' "$session_log_file" 2>/dev/null; then
+    printf 'provider-quota-limit\n'
+    return 0
+  fi
+  if [[ -f "$pr_comment_file" ]] && grep -Eiq 'no-codex-output-before-stall-threshold|no-codex-progress-before-stall-threshold' "$pr_comment_file" 2>/dev/null; then
+    printf 'codex-stalled\n'
+    return 0
+  fi
+  if [[ -f "$pr_comment_file" ]] && grep -Eiq 'no-agent-output-before-stall-threshold|no-agent-progress-before-stall-threshold' "$pr_comment_file" 2>/dev/null; then
+    printf 'agent-stalled\n'
+    return 0
+  fi
   return 1
 }

package/tools/bin/agent-project-run-codex-resilient CHANGED Viewed

@@ -618,6 +618,22 @@ classify_failure_reason() {
   fi
 }
+failure_chunk_indicates_startup_stall() {
+  local chunk="${1:-}"
+  local recent_chunk
+  recent_chunk="$(tail -n 120 <<<"$chunk")"
+  grep -q '"type":"thread.started"' <<<"$recent_chunk" || return 1
+  grep -q '"type":"turn.started"' <<<"$recent_chunk" || return 1
+  if grep -Eq '"type":"item\.(started|completed)"' <<<"$recent_chunk"; then
+    return 1
+  fi
+  if grep -q '"type":"turn.completed"' <<<"$recent_chunk"; then
+    return 1
+  fi
+  return 0
+}
 resume_prompt() {
   cat <<EOF
 The previous Codex exec turn in this same thread was interrupted because the host refreshed Codex authentication after a quota or auth failure.
@@ -729,7 +745,7 @@ run_resume_exec() {
 }
 attempt_run() {
-  local reason auth_before_switch quota_label_before_switch quota_switch_signature_before_switch quota_switch_result shell_flags_before_quota_switch
+  local reason auth_before_switch quota_label_before_switch quota_switch_signature_before_switch quota_switch_result shell_flags_before_quota_switch failure_chunk startup_stall
   attempt=$((attempt + 1))
   last_quota_switch_status=""
@@ -750,8 +766,15 @@ attempt_run() {
     return 0
   fi
-  reason="$(classify_failure_reason "$(new_output_since "$last_attempt_start_size")")"
+  failure_chunk="$(new_output_since "$last_attempt_start_size")"
+  reason="$(classify_failure_reason "$failure_chunk")"
   last_failure_reason="${reason:-worker-exit-failed}"
+  startup_stall="no"
+  if [[ "$last_failure_reason" == "no-codex-output-before-stall-threshold" || "$last_failure_reason" == "no-codex-progress-before-stall-threshold" ]]; then
+    if failure_chunk_indicates_startup_stall "$failure_chunk"; then
+      startup_stall="yes"
+    fi
+  fi
   case "$last_failure_reason" in
     usage-limit|auth-failure|auth-401|account-banned)
@@ -796,6 +819,38 @@ attempt_run() {
       resume_count=$((resume_count + 1))
       return 2
       ;;
+    no-codex-output-before-stall-threshold|no-codex-progress-before-stall-threshold)
+      if [[ "$startup_stall" == "yes" && $quota_autoswitch_attempt_count -lt $max_quota_autoswitch_attempts ]]; then
+        auth_before_switch="$(auth_fingerprint)"
+        quota_label_before_switch="$last_attempt_start_quota_label"
+        quota_switch_signature_before_switch="$(quota_switch_signature)"
+        last_auth_fingerprint="$auth_before_switch"
+        write_state "switching-account" "$last_failure_reason"
+        log_runner "startup-stall detected before first Codex tool activity; attempting Codex account rotation"
+        shell_flags_before_quota_switch="$-"
+        set +e
+        run_quota_autoswitch
+        quota_switch_result=$?
+        case "$shell_flags_before_quota_switch" in
+          *e*) set -e ;;
+          *) set +e ;;
+        esac
+        if [[ "$quota_switch_result" == "0" ]]; then
+          thread_id=""
+          auth_wait_started_at=""
+          write_state "running" ""
+          return 2
+        fi
+        if [[ "$quota_switch_result" == "10" ]]; then
+          log_runner "startup-stall rotation deferred until ${last_quota_next_retry_at:-unknown}"
+          last_failure_reason="quota-switch-deferred"
+          write_state "failed" "$last_failure_reason"
+          return 1
+        fi
+      fi
+      write_state "failed" "$last_failure_reason"
+      return 1
+      ;;
     *)
       write_state "failed" "$last_failure_reason"
       return 1

package/tools/bin/agent-project-run-openclaw-session CHANGED Viewed

@@ -115,6 +115,7 @@ result_file="${artifact_dir}/result.env"
 runner_state_file="${artifact_dir}/runner.env"
 sandbox_artifact_dir="${worktree%/}/${sandbox_subdir}"
 sandbox_run_dir="${worktree%/}/${sandbox_subdir}/${session}"
+retained_repo_root="${ACP_RETAINED_REPO_ROOT:-${F_LOSNING_RETAINED_REPO_ROOT:-}}"
 started_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
 openclaw_bin="$(command -v openclaw)"
 default_openclaw_agent_id="$(
@@ -154,6 +155,7 @@ printf -v runner_state_q '%q' "$runner_state_file"
 printf -v branch_q '%q' "$branch_name"
 printf -v sandbox_artifact_dir_q '%q' "$sandbox_artifact_dir"
 printf -v sandbox_run_dir_q '%q' "$sandbox_run_dir"
+printf -v retained_repo_root_q '%q' "$retained_repo_root"
 printf -v adapter_id_q '%q' "$adapter_id"
 printf -v started_at_q '%q' "$started_at"
 printf -v openclaw_bin_q '%q' "$openclaw_bin"
@@ -230,18 +232,21 @@ export AGENT_PROJECT_RUN_DIR=${sandbox_run_dir_q}
 export AGENT_PROJECT_HOST_RUN_DIR=${artifact_dir_q}
 export AGENT_PROJECT_RESULT_FILE=${sandbox_run_dir_q}/result.env
 export AGENT_PROJECT_OPENCLAW_BIN=${openclaw_bin_q}
+export AGENT_PROJECT_RETAINED_REPO_ROOT=${retained_repo_root_q}
 export ACP_SESSION=${session_q}
 export ACP_RUN_DIR=${sandbox_run_dir_q}
 export ACP_HOST_RUN_DIR=${artifact_dir_q}
 export ACP_RESULT_FILE=${sandbox_run_dir_q}/result.env
 export ACP_OPENCLAW_BIN=${openclaw_bin_q}
 export ACP_OPENCLAW_SESSION_ID=${openclaw_session_id_q}
+export ACP_RETAINED_REPO_ROOT=${retained_repo_root_q}
 export F_LOSNING_SESSION=${session_q}
 export F_LOSNING_RUN_DIR=${sandbox_run_dir_q}
 export F_LOSNING_HOST_RUN_DIR=${artifact_dir_q}
 export F_LOSNING_RESULT_FILE=${sandbox_run_dir_q}/result.env
 export F_LOSNING_OPENCLAW_BIN=${openclaw_bin_q}
 export F_LOSNING_OPENCLAW_SESSION_ID=${openclaw_session_id_q}
+export F_LOSNING_RETAINED_REPO_ROOT=${retained_repo_root_q}
 export OPENCLAW_STATE_DIR=${openclaw_state_dir_q}
 export OPENCLAW_CONFIG_PATH=${openclaw_config_path_q}
 EOF
@@ -294,6 +299,7 @@ runner_state_file=${runner_state_q}
 output_file=${output_q}
 sandbox_artifact_dir=${sandbox_artifact_dir_q}
 sandbox_run_dir=${sandbox_run_dir_q}
+retained_repo_root=${retained_repo_root_q}
 artifact_dir=${artifact_dir_q}
 run_dir=${artifact_dir_q}
 task_kind=${task_kind_q}
@@ -554,6 +560,45 @@ recover_literal_runtime_artifacts() {
   return 0
 }
+recover_retained_repo_artifact_leaks() {
+  local retained_worktree_root=""
+  local leaked_run_dir=""
+  local worktree_name=""
+  local session_name=""
+  local artifact_name=""
+  local recovered="no"
+  [[ -n "\${retained_repo_root}" ]] || return 0
+  worktree_name="\$(basename "\${worktree}")"
+  session_name="\${AGENT_PROJECT_SESSION:-}"
+  [[ -n "\${session_name}" ]] || return 0
+  retained_worktree_root="\${retained_repo_root%/}/worktrees"
+  leaked_run_dir="\${retained_worktree_root}/\${worktree_name}/.openclaw-artifacts/\${session_name}"
+  if [[ ! -d "\${leaked_run_dir}" || "\${leaked_run_dir}" == "\${sandbox_run_dir}" ]]; then
+    return 0
+  fi
+  for artifact_name in result.env verification.jsonl issue-comment.md pr-comment.md; do
+    if [[ -f "\${leaked_run_dir}/\${artifact_name}" ]]; then
+      cp "\${leaked_run_dir}/\${artifact_name}" "\${sandbox_run_dir}/\${artifact_name}" 2>/dev/null || true
+      cp "\${leaked_run_dir}/\${artifact_name}" "\${artifact_dir}/\${artifact_name}" 2>/dev/null || true
+      recovered="yes"
+    fi
+  done
+  rm -rf "\${leaked_run_dir}" 2>/dev/null || true
+  rmdir "\${retained_worktree_root}/\${worktree_name}/.openclaw-artifacts" 2>/dev/null || true
+  rmdir "\${retained_worktree_root}/\${worktree_name}" 2>/dev/null || true
+  rmdir "\${retained_worktree_root}" 2>/dev/null || true
+  if [[ "\${recovered}" == "yes" ]]; then
+    printf '[openclaw] recovered retained-repo artifact leak: %s\n' "\${leaked_run_dir}" >>"\${output_file}" 2>/dev/null || true
+  fi
+  return 0
+}
 reset_sandbox_run_dir() {
   mkdir -p "\${sandbox_run_dir}"
   find "\${sandbox_run_dir}" -mindepth 1 -maxdepth 1 -exec rm -rf {} + 2>/dev/null || true
@@ -1217,6 +1262,7 @@ while true; do
   break
 done
 recover_literal_runtime_artifacts
+recover_retained_repo_artifact_leaks
 infer_result_from_output
 synthesize_comment_artifact_from_output
 if [[ "\${status}" -eq 0 ]]; then

package/tools/bin/agent-project-worker-status CHANGED Viewed

@@ -44,6 +44,36 @@ runner_state=""
 thread_id=""
 last_exit_code=""
+failure_reason_from_output() {
+  [[ -f "$output_file" ]] || return 1
+  if rg -qi "You've hit your usage limit|You have reached your Codex usage limits|visit https://chatgpt.com/codex/settings/usage|Upgrade to Pro|rate limit exceeded|quota exceeded|usage cap (reached|exceeded)|usage quota (reached|exceeded)" "$output_file"; then
+    printf 'usage-limit\n'
+    return 0
+  fi
+  if rg -qi 'stale-run no-codex-output-before-stall-threshold|no-codex-output-before-stall-threshold' "$output_file"; then
+    printf 'no-codex-output-before-stall-threshold\n'
+    return 0
+  fi
+  if rg -qi 'stale-run no-codex-progress-before-stall-threshold|no-codex-progress-before-stall-threshold' "$output_file"; then
+    printf 'no-codex-progress-before-stall-threshold\n'
+    return 0
+  fi
+  # Recover Codex startup stalls when the wrapper was archived before it could
+  # flush a terminal runner.env state. This is intentionally narrow: the log
+  # must show a turn started, but no tool activity or turn completion.
+  if rg -q '"type":"turn.started"' "$output_file" \
+    && ! rg -q '"type":"item.started"|"type":"item.completed"|"type":"turn.completed"' "$output_file"; then
+    printf 'no-codex-progress-before-stall-threshold\n'
+    return 0
+  fi
+  return 1
+}
 if tmux has-session -t "$session" 2>/dev/null; then
   status="RUNNING"
 fi
@@ -87,6 +117,13 @@ if [[ "$status" == "UNKNOWN" && -f "$output_file" ]]; then
   fi
 fi
+if [[ "$status" == "UNKNOWN" && -z "$failure_reason" ]]; then
+  failure_reason="$(failure_reason_from_output || true)"
+  if [[ -n "$failure_reason" ]]; then
+    status="FAILED"
+  fi
+fi
 if [[ "$status" == "UNKNOWN" && -n "$runner_state" ]]; then
   case "$runner_state" in
     running|waiting-auth-refresh|switching-account)

package/tools/bin/flow-config-lib.sh CHANGED Viewed

@@ -2039,6 +2039,7 @@ flow_export_execution_env() {
   local openclaw_model=""
   local openclaw_thinking=""
   local openclaw_timeout=""
+  local openclaw_stall=""
   repo_id="$(flow_resolve_repo_id "${config_file}")"
   provider_quota_cooldowns="$(flow_resolve_provider_quota_cooldowns "${config_file}")"
@@ -2071,6 +2072,7 @@ flow_export_execution_env() {
     openclaw_model="$(flow_kv_get "${provider_pool_selection}" "OPENCLAW_MODEL")"
     openclaw_thinking="$(flow_kv_get "${provider_pool_selection}" "OPENCLAW_THINKING")"
     openclaw_timeout="$(flow_kv_get "${provider_pool_selection}" "OPENCLAW_TIMEOUT_SECONDS")"
+    openclaw_stall="$(flow_kv_get "${provider_pool_selection}" "OPENCLAW_STALL_SECONDS")"
   else
     if [[ -n "${explicit_coding_worker}" ]]; then
       active_provider_selection_reason="env-override"
@@ -2087,6 +2089,7 @@ flow_export_execution_env() {
     openclaw_model="$(flow_env_or_config "${config_file}" "ACP_OPENCLAW_MODEL F_LOSNING_OPENCLAW_MODEL" "execution.openclaw.model" "")"
     openclaw_thinking="$(flow_env_or_config "${config_file}" "ACP_OPENCLAW_THINKING F_LOSNING_OPENCLAW_THINKING" "execution.openclaw.thinking" "")"
     openclaw_timeout="$(flow_env_or_config "${config_file}" "ACP_OPENCLAW_TIMEOUT_SECONDS F_LOSNING_OPENCLAW_TIMEOUT_SECONDS" "execution.openclaw.timeout_seconds" "")"
+    openclaw_stall="$(flow_env_or_config "${config_file}" "ACP_OPENCLAW_STALL_SECONDS F_LOSNING_OPENCLAW_STALL_SECONDS" "execution.openclaw.stall_seconds" "")"
   fi
   if [[ -n "${coding_worker}" ]]; then
@@ -2167,6 +2170,10 @@ flow_export_execution_env() {
     export F_LOSNING_OPENCLAW_TIMEOUT_SECONDS="${openclaw_timeout}"
     export ACP_OPENCLAW_TIMEOUT_SECONDS="${openclaw_timeout}"
   fi
+  if [[ -n "${openclaw_stall}" ]]; then
+    export F_LOSNING_OPENCLAW_STALL_SECONDS="${openclaw_stall}"
+    export ACP_OPENCLAW_STALL_SECONDS="${openclaw_stall}"
+  fi
   flow_export_github_cli_auth_env "$(flow_resolve_repo_slug "${config_file}")"
   flow_export_project_env_aliases

package/tools/bin/flow-shell-lib.sh CHANGED Viewed

@@ -52,6 +52,7 @@ flow_export_compat_env_aliases() {
   flow_export_env_alias_if_unset F_LOSNING_OPENCLAW_MODEL ACP_OPENCLAW_MODEL
   flow_export_env_alias_if_unset F_LOSNING_OPENCLAW_THINKING ACP_OPENCLAW_THINKING
   flow_export_env_alias_if_unset F_LOSNING_OPENCLAW_TIMEOUT_SECONDS ACP_OPENCLAW_TIMEOUT_SECONDS
+  flow_export_env_alias_if_unset F_LOSNING_OPENCLAW_STALL_SECONDS ACP_OPENCLAW_STALL_SECONDS
   flow_export_env_alias_if_unset F_LOSNING_ALLOW_INFRA_CI_BYPASS ACP_ALLOW_INFRA_CI_BYPASS
   flow_export_env_alias_if_unset F_LOSNING_LOCAL_FIRST_PR_POLICY ACP_LOCAL_FIRST_PR_POLICY
   flow_export_env_alias_if_unset F_LOSNING_PR_RISK_CACHE_TTL_SECONDS ACP_PR_RISK_CACHE_TTL_SECONDS
@@ -100,6 +101,7 @@ flow_export_canonical_env_aliases() {
   flow_export_env_alias_if_unset ACP_OPENCLAW_MODEL F_LOSNING_OPENCLAW_MODEL
   flow_export_env_alias_if_unset ACP_OPENCLAW_THINKING F_LOSNING_OPENCLAW_THINKING
   flow_export_env_alias_if_unset ACP_OPENCLAW_TIMEOUT_SECONDS F_LOSNING_OPENCLAW_TIMEOUT_SECONDS
+  flow_export_env_alias_if_unset ACP_OPENCLAW_STALL_SECONDS F_LOSNING_OPENCLAW_STALL_SECONDS
   flow_export_env_alias_if_unset ACP_ALLOW_INFRA_CI_BYPASS F_LOSNING_ALLOW_INFRA_CI_BYPASS
   flow_export_env_alias_if_unset ACP_LOCAL_FIRST_PR_POLICY F_LOSNING_LOCAL_FIRST_PR_POLICY
   flow_export_env_alias_if_unset ACP_PR_RISK_CACHE_TTL_SECONDS F_LOSNING_PR_RISK_CACHE_TTL_SECONDS

package/tools/bin/project-runtimectl.sh CHANGED Viewed

@@ -361,7 +361,7 @@ print_status() {
   if [[ -n "${heartbeat}" || -n "${shared_loop}" || -n "${supervisor}" || "${controller_count}" != "0" || "${active_session_count}" != "0" ]]; then
     runtime_status="running"
   fi
-  if [[ -z "${heartbeat}" && -z "${supervisor}" && ( -n "${shared_loop}" || "${controller_count}" != "0" || "${active_session_count}" != "0" ) ]]; then
+  if [[ -z "${heartbeat}" && -z "${supervisor}" && "${active_session_count}" == "0" && ( -n "${shared_loop}" || "${controller_count}" != "0" ) ]]; then
     runtime_status="partial"
   fi

package/tools/bin/start-issue-worker.sh CHANGED Viewed

@@ -54,6 +54,9 @@ rollback_labels_on_failure() {
   if [[ "${label_rollback_armed}" != "yes" || "${launch_success}" == "yes" ]]; then
     return 0
   fi
+  if [[ -d "${RUN_DIR}" && ! -f "${RUN_DIR}/run.env" && ! -f "${RUN_DIR}/runner.env" && ! -f "${RUN_DIR}/result.env" ]]; then
+    rm -rf "${RUN_DIR}" >/dev/null 2>&1 || true
+  fi
   if [[ -x "${UPDATE_LABELS_BIN}" ]]; then
     bash "${UPDATE_LABELS_BIN}" --repo-slug "${REPO_SLUG}" --number "${ISSUE_ID}" --remove agent-running >/dev/null 2>&1 || true
   fi

package/tools/bin/start-pr-fix-worker.sh CHANGED Viewed

@@ -33,6 +33,9 @@ rollback_labels_on_failure() {
   if [[ "${label_rollback_armed}" != "yes" || "${launch_success}" == "yes" ]]; then
     return 0
   fi
+  if [[ -d "${RUN_DIR}" && ! -f "${RUN_DIR}/run.env" && ! -f "${RUN_DIR}/runner.env" && ! -f "${RUN_DIR}/result.env" ]]; then
+    rm -rf "${RUN_DIR}" >/dev/null 2>&1 || true
+  fi
   if [[ -x "${UPDATE_LABELS_BIN}" ]]; then
     bash "${UPDATE_LABELS_BIN}" --repo-slug "${REPO_SLUG}" --number "${PR_NUMBER}" --remove agent-running >/dev/null 2>&1 || true
   fi

package/tools/bin/start-pr-review-worker.sh CHANGED Viewed

@@ -32,6 +32,9 @@ rollback_labels_on_failure() {
   if [[ "${label_rollback_armed}" != "yes" || "${launch_success}" == "yes" ]]; then
     return 0
   fi
+  if [[ -d "${RUN_DIR}" && ! -f "${RUN_DIR}/run.env" && ! -f "${RUN_DIR}/runner.env" && ! -f "${RUN_DIR}/result.env" ]]; then
+    rm -rf "${RUN_DIR}" >/dev/null 2>&1 || true
+  fi
   if [[ -x "${UPDATE_LABELS_BIN}" ]]; then
     bash "${UPDATE_LABELS_BIN}" --repo-slug "${REPO_SLUG}" --number "${PR_NUMBER}" --remove agent-running >/dev/null 2>&1 || true
   fi

package/tools/bin/start-resident-issue-loop.sh CHANGED Viewed

@@ -204,6 +204,7 @@ controller_refresh_execution_context() {
     ACP_OPENCLAW_MODEL F_LOSNING_OPENCLAW_MODEL \
     ACP_OPENCLAW_THINKING F_LOSNING_OPENCLAW_THINKING \
     ACP_OPENCLAW_TIMEOUT_SECONDS F_LOSNING_OPENCLAW_TIMEOUT_SECONDS \
+    ACP_OPENCLAW_STALL_SECONDS F_LOSNING_OPENCLAW_STALL_SECONDS \
     ACP_ACTIVE_PROVIDER_POOL_NAME F_LOSNING_ACTIVE_PROVIDER_POOL_NAME \
     ACP_ACTIVE_PROVIDER_BACKEND F_LOSNING_ACTIVE_PROVIDER_BACKEND \
     ACP_ACTIVE_PROVIDER_MODEL F_LOSNING_ACTIVE_PROVIDER_MODEL \