npm - @pushpalsdev/cli - Versions diffs - 1.1.37 → 1.1.38 - Mend

@pushpalsdev/cli 1.1.37 → 1.1.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pushpalsdev/cli",
-  "version": "1.1.37",
+  "version": "1.1.38",
   "description": "PushPals terminal CLI for LocalBuddy -> RemoteBuddy orchestration",
   "license": "MIT",
   "repository": {

package/runtime/sandbox/.pushpals-remotebuddy-fallback.js CHANGED Viewed

@@ -6545,6 +6545,33 @@ function sanitizeForGitRef(value) {
   const text = value.trim().replace(/[^A-Za-z0-9._-]/g, "-");
   return text || "default";
 }
+function isSafeGitBranchName(value) {
+  const text = String(value ?? "").trim();
+  if (!text || text.length > 200)
+    return false;
+  if (text.startsWith("-") || text.startsWith("/") || text.endsWith("/"))
+    return false;
+  if (text.endsWith(".") || text.endsWith(".lock"))
+    return false;
+  if (text.includes("..") || text.includes("//") || text.includes("@{"))
+    return false;
+  return !/[\\\s~^:?*\[\]\x00-\x1F\x7F]/.test(text);
+}
+function normalizeConfiguredGitBranchName(value, fallback, label = "branch") {
+  const candidate = String(value ?? "").trim();
+  if (isSafeGitBranchName(candidate))
+    return candidate;
+  const safeFallback = isSafeGitBranchName(fallback) ? fallback : "main";
+  console.warn(`[RemoteBuddyAutonomousEngine] Ignoring unsafe ${label} ref ${JSON.stringify(candidate)}; using ${safeFallback}.`);
+  return safeFallback;
+}
+function normalizeConfiguredGitRemoteName(value, fallback = "origin") {
+  const candidate = String(value ?? "").trim();
+  if (/^[A-Za-z0-9._-]+$/.test(candidate) && !candidate.startsWith("-"))
+    return candidate;
+  console.warn(`[RemoteBuddyAutonomousEngine] Ignoring unsafe git remote ${JSON.stringify(candidate)}; using ${fallback}.`);
+  return fallback;
+}
 async function repoPreflight(repo) {
   const porcelain = await gitOutput(repo, ["status", "--porcelain"]);
   const mergeHead = await gitOutput(repo, ["rev-parse", "-q", "--verify", "MERGE_HEAD"]);
@@ -6594,9 +6621,9 @@ class RemoteBuddyAutonomousEngine {
     const safeSession = sanitizeForGitRef(this.sessionId).slice(0, 40);
     this.autonomyRepo = resolve4(this.repoRoot, ".worktrees", `remotebuddy-autonomy-${safeSession}`);
     this.autonomyBranch = `_remotebuddy/autonomy-${safeSession}`;
-    this.gitRemote = String(opts.config.sourceControlManager.remote || "origin").trim() || "origin";
-    this.integrationBranch = String(opts.config.sourceControlManager.mainBranch || "main_agents").trim() || "main_agents";
-    this.baseBranch = String(opts.config.sourceControlManager.baseBranch || "main").trim() || "main";
+    this.gitRemote = normalizeConfiguredGitRemoteName(String(opts.config.sourceControlManager.remote || "origin"), "origin");
+    this.integrationBranch = normalizeConfiguredGitBranchName(String(opts.config.sourceControlManager.mainBranch || "main_agents"), "main_agents", "integration branch");
+    this.baseBranch = normalizeConfiguredGitBranchName(String(opts.config.sourceControlManager.baseBranch || "main"), "main", "base branch");
     this.llm = opts.llm;
     this.comm = opts.comm;
     this.llmCfg = opts.config.remotebuddy.llm;

package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py CHANGED Viewed

@@ -116,7 +116,10 @@ _WEB_REVIEW_NO_EDIT_WATCHDOG_S = 240
 _BACKGROUND_NO_EDIT_WATCHDOG_S = 120
 _NO_EDIT_RECOVERY_WATCHDOG_S = 90
 _DEFAULT_NO_EDIT_RECHECK_S = 120
+_NO_EDIT_RECOVERY_RECHECK_S = 30
 _DEFAULT_NO_EDIT_COMMAND_GRACE_S = 240
+_DEFAULT_NO_EDIT_COMMAND_PROGRESS_CAP_S = 360
+_NO_EDIT_RECOVERY_COMMAND_PROGRESS_CAP_S = 120
 _DEFAULT_STARTUP_STALL_WATCHDOG_S = 210
 _RECOVERY_STARTUP_STALL_WATCHDOG_S = 150
 _DEFAULT_ROLLOUT_WATCHDOG_S = 300
@@ -124,6 +127,7 @@ _SMALL_TASK_ROLLOUT_WATCHDOG_S = 240
 _NARROW_TEST_TASK_ROLLOUT_WATCHDOG_S = 150
 _WEB_REVIEW_ROLLOUT_WATCHDOG_S = 180
 _BACKGROUND_ROLLOUT_WATCHDOG_S = 90
+_MIN_CODEX_RECOVERY_ATTEMPT_S = 120
 _NO_PUBLISHABLE_FAILURE_COOLDOWN_MS = 10 * 60 * 1000
 _CODEX_STARTUP_ONLY_EVENT_TYPES = {"thread.started", "turn.started"}
@@ -609,11 +613,19 @@ def _looks_like_small_task_prompt(prompt: str) -> bool:
         "contract-level tests",
         "contract around",
         "contract coverage",
+        "focused contract coverage",
         "ranking contract",
         "regression coverage",
+        "focused coverage",
         "focused regression",
         "focused scenario",
         "targeted test",
+        "small deterministic",
+        "review-fix",
+        "review fix",
+        "rejected pr",
+        "must-fix",
+        "cleanup harness",
         "one-file",
         "one file",
         "single-file",
@@ -648,13 +660,16 @@ def _looks_like_narrow_test_task_prompt(prompt: str) -> bool:
         "contract-level tests",
         "contract around",
         "contract coverage",
+        "focused contract coverage",
         "ranking contract",
         "regression coverage",
+        "focused coverage",
         "focused regression",
         "test-only",
         "test only",
         "targeted test",
         "focused scenario",
+        "cleanup harness",
     )
     if not any(marker in text for marker in narrow_markers):
         return False
@@ -668,6 +683,13 @@ def _looks_like_narrow_test_task_prompt(prompt: str) -> bool:
     return not any(marker in text for marker in broad_markers)
+def _minimum_recovery_attempt_seconds(requested_timeout_s: Optional[int]) -> int:
+    if not requested_timeout_s or requested_timeout_s <= 0:
+        return _MIN_CODEX_RECOVERY_ATTEMPT_S
+    scaled_s = max(1, int(requested_timeout_s * 0.25))
+    return max(1, min(_MIN_CODEX_RECOVERY_ATTEMPT_S, scaled_s))
 def _resolve_task_reasoning_effort(
     configured_effort: str,
     prompt: str,
@@ -743,7 +765,10 @@ def _resolve_no_edit_watchdog_seconds(
     return max(floor_s, min(default_s, max(floor_s, communicate_timeout_s - 60)))
-def _resolve_no_edit_recheck_seconds(communicate_timeout_s: Optional[int]) -> int:
+def _resolve_no_edit_recheck_seconds(
+    communicate_timeout_s: Optional[int],
+    recovery_attempt: int = 0,
+) -> int:
     raw = os.environ.get("WORKERPALS_OPENAI_CODEX_NO_EDIT_RECHECK_S", "").strip()
     if raw:
         parsed = _to_positive_int(raw)
@@ -754,8 +779,13 @@ def _resolve_no_edit_recheck_seconds(communicate_timeout_s: Optional[int]) -> in
         else:
             upper = max(1, (communicate_timeout_s or parsed + 1) - 1)
             return max(1, min(parsed, upper))
-    upper = max(1, (communicate_timeout_s or _DEFAULT_NO_EDIT_RECHECK_S + 1) - 1)
-    return max(1, min(_DEFAULT_NO_EDIT_RECHECK_S, upper))
+    default_s = (
+        _NO_EDIT_RECOVERY_RECHECK_S
+        if recovery_attempt > 0
+        else _DEFAULT_NO_EDIT_RECHECK_S
+    )
+    upper = max(1, (communicate_timeout_s or default_s + 1) - 1)
+    return max(1, min(default_s, upper))
 def _resolve_no_edit_command_grace_seconds(communicate_timeout_s: Optional[int]) -> Optional[int]:
@@ -779,6 +809,36 @@ def _resolve_no_edit_command_grace_seconds(communicate_timeout_s: Optional[int])
     return max(1, min(_DEFAULT_NO_EDIT_COMMAND_GRACE_S, upper))
+def _resolve_no_edit_command_progress_cap_seconds(
+    communicate_timeout_s: Optional[int],
+    no_edit_command_grace_s: Optional[int],
+    recovery_attempt: int = 0,
+) -> Optional[int]:
+    if not communicate_timeout_s or no_edit_command_grace_s is None:
+        return None
+    raw = os.environ.get("WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_PROGRESS_CAP_S", "").strip()
+    if raw:
+        if raw == "0":
+            return None
+        parsed = _to_positive_int(raw)
+        if parsed is None:
+            log.info(
+                "Invalid WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_PROGRESS_CAP_S="
+                f"{raw!r}; using default command-progress cap."
+            )
+        else:
+            return max(1, min(parsed, max(1, communicate_timeout_s - 1)))
+    default_s = (
+        _NO_EDIT_RECOVERY_COMMAND_PROGRESS_CAP_S
+        if recovery_attempt > 0
+        else _DEFAULT_NO_EDIT_COMMAND_PROGRESS_CAP_S
+    )
+    upper = max(1, communicate_timeout_s - 1)
+    return max(1, min(default_s, upper))
 def _resolve_startup_stall_watchdog_seconds(
     communicate_timeout_s: Optional[int],
     recovery_attempt: int = 0,
@@ -2091,6 +2151,118 @@ def _codex_changed_paths(repo: str, baseline_snapshot: Any) -> Tuple[List[str],
     return changed_paths, delta, effective
+def _safe_repo_relative_path(repo: str, path: str) -> Optional[Path]:
+    raw = str(path or "").replace("\\", "/").strip()
+    if not raw or raw.startswith("/") or re.match(r"^[A-Za-z]:", raw):
+        return None
+    parts = [part for part in raw.split("/") if part]
+    if not parts or any(part in ("..", ".") for part in parts):
+        return None
+    try:
+        repo_path = Path(repo).resolve()
+        candidate = (repo_path / Path(*parts)).resolve()
+        candidate.relative_to(repo_path)
+        return candidate
+    except Exception:
+        return None
+def _git_status_entries(repo: str) -> List[Tuple[str, str]]:
+    try:
+        proc = subprocess.run(
+            ["git", "status", "--porcelain"],
+            cwd=repo,
+            capture_output=True,
+            text=True,
+            timeout=20,
+            check=False,
+        )
+    except Exception:
+        return []
+    if proc.returncode != 0:
+        return []
+    entries: List[Tuple[str, str]] = []
+    for raw_line in proc.stdout.splitlines():
+        line = str(raw_line or "").rstrip("\r\n")
+        if len(line) < 4:
+            continue
+        status = line[:2]
+        path = line[3:].strip()
+        if " -> " in path:
+            path = path.split(" -> ", 1)[1].strip()
+        if path:
+            entries.append((status, path))
+    return entries
+def _restore_retry_baseline(repo: str, baseline_snapshot: Any, reason: str = "") -> bool:
+    _changed_paths, delta_paths, _effective_paths = _codex_changed_paths(repo, baseline_snapshot)
+    if not delta_paths:
+        return True
+    baseline_paths = set(_baseline_snapshot_paths(baseline_snapshot))
+    unsafe_delta = [path for path in delta_paths if _safe_repo_relative_path(repo, path) is None]
+    if unsafe_delta:
+        log.info(
+            "Rollout recovery cannot safely restore worker sandbox baseline; unsafe changed paths: "
+            f"{_describe_publishable_paths(unsafe_delta)}"
+        )
+        return False
+    mutated_baseline_paths = [path for path in delta_paths if path in baseline_paths]
+    if mutated_baseline_paths:
+        log.info(
+            "Rollout recovery will not reset paths that were already dirty at baseline: "
+            f"{_describe_publishable_paths(mutated_baseline_paths)}"
+        )
+        return False
+    log.info(
+        "Restoring worker sandbox baseline before rollout recovery retry"
+        f"{f' ({reason})' if reason else ''}: {_describe_publishable_paths(delta_paths)}"
+    )
+    try:
+        subprocess.run(
+            ["git", "restore", "--staged", "--worktree", "--", *delta_paths],
+            cwd=repo,
+            capture_output=True,
+            text=True,
+            timeout=30,
+            check=False,
+        )
+    except Exception as exc:
+        log.info(f"Failed to run git restore for rollout recovery baseline: {exc}")
+        return False
+    delta_set = set(delta_paths)
+    for status, path in _git_status_entries(repo):
+        if status != "??":
+            continue
+        if path not in delta_set and not any(path.startswith(f"{delta.rstrip('/')}/") for delta in delta_set):
+            continue
+        candidate = _safe_repo_relative_path(repo, path)
+        if candidate is None:
+            return False
+        try:
+            if candidate.is_dir():
+                rmtree(candidate)
+            elif candidate.exists():
+                candidate.unlink()
+        except Exception as exc:
+            log.info(f"Failed to remove untracked rollout recovery path {path}: {exc}")
+            return False
+    _remaining_changed, remaining_delta, remaining_effective = _codex_changed_paths(
+        repo,
+        baseline_snapshot,
+    )
+    if remaining_delta:
+        log.info(
+            "Rollout recovery baseline restore left changed paths after cleanup: "
+            f"{_describe_publishable_paths(remaining_effective or remaining_delta)}"
+        )
+        return False
+    return True
 def _changed_path_top_level(path: str) -> str:
     raw = str(path or "").replace("\\", "/").strip()
     is_top_level_directory = raw.endswith("/")
@@ -2183,6 +2355,7 @@ def _run_codex_task(
     rollout_recovery_attempt: int = 0,
     model_override: Optional[str] = None,
     baseline_changes: Optional[List[str]] = None,
+    execution_deadline_monotonic: Optional[float] = None,
 ) -> Dict[str, Any]:
     global _ACTIVE_CHILD, _INTERRUPTED_SIGNAL
     _INTERRUPTED_SIGNAL = None
@@ -2242,7 +2415,39 @@ def _run_codex_task(
     )
     # JSON event output is noisy by default; prefer plain text + output-last-message.
     use_json = runtime_config.json_output
-    communicate_timeout_s = _resolve_communicate_timeout_seconds(runtime_config)
+    requested_communicate_timeout_s = _resolve_communicate_timeout_seconds(runtime_config)
+    recovery_depth = (
+        wrapper_recovery_attempt
+        + model_compatibility_recovery_attempt
+        + startup_stall_recovery_attempt
+        + no_edit_recovery_attempt
+        + rollout_recovery_attempt
+    )
+    communicate_timeout_s = requested_communicate_timeout_s
+    overall_deadline = execution_deadline_monotonic
+    if requested_communicate_timeout_s and requested_communicate_timeout_s > 0:
+        if overall_deadline is None:
+            overall_deadline = time.monotonic() + float(requested_communicate_timeout_s)
+        else:
+            remaining_s = int(max(0.0, overall_deadline - time.monotonic()))
+            min_attempt_s = (
+                _minimum_recovery_attempt_seconds(requested_communicate_timeout_s)
+                if recovery_depth > 0
+                else 1
+            )
+            if remaining_s < min_attempt_s:
+                return {
+                    "ok": False,
+                    "summary": "openai_codex recovery budget exhausted before retry",
+                    "stderr": (
+                        "Codex recovery was requested, but the shared executor budget had only "
+                        f"{remaining_s}s remaining (< {min_attempt_s}s). Stopping before a low-odds "
+                        "retry so ValidationGate/QualityGate can return a structured result."
+                    ),
+                    "exitCode": 124,
+                    "cooldownMs": _NO_PUBLISHABLE_FAILURE_COOLDOWN_MS,
+                }
+            communicate_timeout_s = max(1, min(requested_communicate_timeout_s, remaining_s))
     effective_supplemental_guidance = _augment_supplemental_guidance(supplemental_guidance)
     prompt = _build_instruction(instruction, effective_supplemental_guidance)
     reasoning_effort = _resolve_task_reasoning_effort(
@@ -2484,6 +2689,7 @@ def _run_codex_task(
             rollout_watchdog_reason = ""
             rollout_artifact_only_paths = ""
             rollout_watchdog_retryable = True
+            rollout_restore_before_retry = False
             command_policy_rejection_loop = False
             no_edit_watchdog_s = (
                 _resolve_no_edit_watchdog_seconds(
@@ -2494,8 +2700,16 @@ def _run_codex_task(
                 if no_edit_recovery_attempt <= _MAX_NO_EDIT_RECOVERY_ATTEMPTS
                 else None
             )
-            no_edit_recheck_s = _resolve_no_edit_recheck_seconds(communicate_timeout_s)
+            no_edit_recheck_s = _resolve_no_edit_recheck_seconds(
+                communicate_timeout_s,
+                recovery_attempt=recovery_depth,
+            )
             no_edit_command_grace_s = _resolve_no_edit_command_grace_seconds(communicate_timeout_s)
+            no_edit_command_progress_cap_s = _resolve_no_edit_command_progress_cap_seconds(
+                communicate_timeout_s,
+                no_edit_command_grace_s,
+                recovery_attempt=recovery_depth,
+            )
             startup_stall_watchdog_s = _resolve_startup_stall_watchdog_seconds(
                 communicate_timeout_s,
                 recovery_attempt=startup_stall_recovery_attempt,
@@ -2527,6 +2741,7 @@ def _run_codex_task(
             publishable_progress_seen_at: Optional[float] = None
             publishable_progress_finalized = False
             publishable_progress_paths: List[str] = []
+            first_no_edit_command_progress_at: Optional[float] = None
             while proc.poll() is None:
                 now = time.monotonic()
@@ -2593,17 +2808,50 @@ def _run_codex_task(
                             )
                         except Exception:
                             last_command_activity_at = 0.0
+                        command_progress_cap_reached = False
+                        command_progress_elapsed_s = 0
                         if command_event_count > 0 and no_edit_command_grace_s is not None:
+                            observed_command_progress_at = (
+                                last_command_activity_at if last_command_activity_at > 0 else now
+                            )
+                            if first_no_edit_command_progress_at is None:
+                                first_no_edit_command_progress_at = observed_command_progress_at
+                            if no_edit_command_progress_cap_s is not None:
+                                command_progress_cap_deadline = (
+                                    first_no_edit_command_progress_at
+                                    + float(no_edit_command_progress_cap_s)
+                                )
+                                command_progress_elapsed_s = int(
+                                    max(0.0, now - first_no_edit_command_progress_at)
+                                )
+                                if now >= command_progress_cap_deadline:
+                                    command_progress_cap_reached = True
                             command_grace_deadline = 0.0
                             if active_command_count > 0:
                                 # Do not kill while Codex is actively running a tool command; poll
-                                # again soon, but keep the total grace bounded by the hard cap below.
+                                # again soon, but keep endless read-only discovery bounded by the
+                                # command-progress cap above.
                                 command_grace_deadline = now + min(60.0, float(no_edit_command_grace_s))
                             elif last_command_activity_at > 0:
                                 command_grace_deadline = last_command_activity_at + float(
                                     no_edit_command_grace_s
                                 )
-                            if command_grace_deadline > now:
+                            if (
+                                no_edit_command_progress_cap_s is not None
+                                and first_no_edit_command_progress_at is not None
+                            ):
+                                command_grace_deadline = min(
+                                    command_grace_deadline,
+                                    first_no_edit_command_progress_at
+                                    + float(no_edit_command_progress_cap_s),
+                                )
+                            if command_progress_cap_reached:
+                                log.info(
+                                    "No-edit watchdog observed Codex tool progress for "
+                                    f"{command_progress_elapsed_s}s without a publishable patch; "
+                                    "forcing patch-first recovery instead of waiting for the child timeout."
+                                )
+                            elif command_grace_deadline > now:
                                 no_edit_deadline = command_grace_deadline
                                 remaining_s = int(max(1.0, command_grace_deadline - now))
                                 command_detail = (
@@ -2680,7 +2928,8 @@ def _run_codex_task(
                                 "publishable-looking changed paths are broad/noisy for a small task: "
                                 f"{_describe_publishable_paths(effective_paths)}"
                             )
-                            rollout_watchdog_retryable = False
+                            rollout_watchdog_retryable = True
+                            rollout_restore_before_retry = True
                         else:
                             rollout_deadline = None
                     else:
@@ -2699,9 +2948,16 @@ def _run_codex_task(
                             if rollout_artifact_only_paths
                             else ""
                         )
+                        can_retry_rollout = (
+                            rollout_watchdog_retryable
+                            and rollout_recovery_attempt < _MAX_ROLLOUT_RECOVERY_ATTEMPTS
+                        )
                         action = (
+                            "Restoring worker sandbox baseline and retrying with stricter guidance."
+                            if rollout_restore_before_retry and can_retry_rollout
+                            else
                             "Retrying with course-correction guidance."
-                            if rollout_watchdog_retryable
+                            if can_retry_rollout
                             else "Failing fast instead of retrying on top of a broad/noisy diff."
                         )
                         log.info(
@@ -2779,6 +3035,27 @@ def _run_codex_task(
         if rollout_watchdog_fired:
             if rollout_watchdog_retryable and rollout_recovery_attempt < _MAX_ROLLOUT_RECOVERY_ATTEMPTS:
+                if rollout_restore_before_retry and not _restore_retry_baseline(
+                    repo,
+                    baseline_snapshot,
+                    rollout_watchdog_reason,
+                ):
+                    detail = (
+                        "Codex trajectory drifted into broad/noisy changes and the worker sandbox "
+                        "could not be restored safely for a clean recovery retry: "
+                        f"{rollout_watchdog_reason or 'broad/noisy changes'}."
+                    )
+                    if trace_excerpt:
+                        detail = f"{detail}\n{trace_excerpt}"
+                    return {
+                        "ok": False,
+                        "summary": "openai_codex rollout coach could not safely reset broad changes",
+                        "stdout": _truncate(stdout),
+                        "stderr": _truncate(f"{detail}\n{stderr}".strip()),
+                        "exitCode": 124,
+                        "usage": usage,
+                        "cooldownMs": _NO_PUBLISHABLE_FAILURE_COOLDOWN_MS,
+                    }
                 retry_guidance = [
                     *supplemental_guidance,
                     _build_rollout_recovery_guidance(
@@ -2798,6 +3075,7 @@ def _run_codex_task(
                     rollout_recovery_attempt=rollout_recovery_attempt + 1,
                     model_override=model_override,
                     baseline_changes=baseline_snapshot,
+                    execution_deadline_monotonic=overall_deadline,
                 )
             detail = (
                 "Codex trajectory remained off-track or too broad for safe recovery: "
@@ -2872,6 +3150,7 @@ def _run_codex_task(
                     rollout_recovery_attempt=rollout_recovery_attempt,
                     model_override=recovery_model or model_override,
                     baseline_changes=baseline_snapshot,
+                    execution_deadline_monotonic=overall_deadline,
                 )
                 retry_result["usage"] = _merge_usage_records(usage, retry_result.get("usage"))
                 if retry_result.get("ok"):
@@ -2918,6 +3197,7 @@ def _run_codex_task(
                     rollout_recovery_attempt=rollout_recovery_attempt,
                     model_override=model_override,
                     baseline_changes=baseline_snapshot,
+                    execution_deadline_monotonic=overall_deadline,
                 )
             detail = "Codex spent too much of the execution budget without producing publishable file changes."
             if trace_excerpt:
@@ -3114,6 +3394,7 @@ def _run_codex_task(
                         rollout_recovery_attempt=rollout_recovery_attempt,
                         model_override=model_override,
                         baseline_changes=baseline_snapshot,
+                        execution_deadline_monotonic=overall_deadline,
                     )
                     retry_result["usage"] = _merge_usage_records(usage, retry_result.get("usage"))
                     if wrapper_recovery_attempt == 0 and retry_result.get("ok"):
@@ -3229,6 +3510,7 @@ def _run_codex_task(
                     rollout_recovery_attempt=rollout_recovery_attempt,
                     model_override=LEGACY_CODEX_MODEL_FALLBACK,
                     baseline_changes=baseline_snapshot,
+                    execution_deadline_monotonic=overall_deadline,
                 )
                 retry_result["usage"] = _merge_usage_records(usage, retry_result.get("usage"))
                 if retry_result.get("ok"):

package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py CHANGED Viewed

@@ -4,6 +4,7 @@ import re
 import json
 import subprocess
 import sys
+import time
 import unittest
 import tempfile
 from unittest import mock
@@ -47,6 +48,9 @@ from openai_codex_executor import (
     _repo_root_for_prompt_loading,
     _restore_repo_local_codex_files,
     _resolve_codex_command_prefix,
+    _resolve_no_edit_command_grace_seconds,
+    _resolve_no_edit_command_progress_cap_seconds,
+    _resolve_no_edit_recheck_seconds,
     _resolve_no_edit_watchdog_seconds,
     _resolve_rollout_watchdog_seconds,
     _resolve_startup_stall_watchdog_seconds,
@@ -1519,6 +1523,96 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
         self.assertIn("Patched after later command progress", str(result.get("stdout") or ""))
         self.assertIn("src/", str(result.get("stdout") or ""))
+    def test_run_codex_task_command_progress_cap_forces_patch_first_recovery(self) -> None:
+        with tempfile.TemporaryDirectory(prefix="pushpals-codex-command-progress-cap-") as temp_dir:
+            repo = Path(temp_dir) / "repo"
+            repo.mkdir(parents=True, exist_ok=True)
+            (repo / "README.md").write_text("# command progress cap repo\n", encoding="utf-8")
+            subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "config", "user.name", "PushPals Test"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(
+                ["git", "config", "user.email", "pushpals-tests@example.com"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "commit", "-m", "chore: seed command progress cap repo"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            stub_path = Path(temp_dir) / "fake_codex_command_progress_cap.py"
+            stub_path.write_text(
+                "\n".join(
+                    [
+                        "from pathlib import Path",
+                        "import json",
+                        "import sys",
+                        "import time",
+                        "",
+                        "argv = sys.argv[1:]",
+                        "last_message_path = None",
+                        "for index, arg in enumerate(argv):",
+                        "    if arg == '--output-last-message' and index + 1 < len(argv):",
+                        "        last_message_path = argv[index + 1]",
+                        "        break",
+                        "",
+                        "prompt = sys.stdin.read()",
+                        "if 'No-edit watchdog recovery' in prompt:",
+                        "    Path('src').mkdir(exist_ok=True)",
+                        "    Path('src/capped-command-recovery.txt').write_text('patched after capped command progress\\n', encoding='utf-8')",
+                        "    if last_message_path:",
+                        "        Path(last_message_path).write_text('Patched after capped command progress.', encoding='utf-8')",
+                        "    print(json.dumps({'type': 'item.completed', 'item': {'type': 'message', 'text': 'Patched after capped command progress.'}}), flush=True)",
+                        "    raise SystemExit(0)",
+                        "",
+                        "print(json.dumps({'type': 'thread.started'}), flush=True)",
+                        "print(json.dumps({'type': 'turn.started'}), flush=True)",
+                        "for index in range(8):",
+                        "    command_id = f'cmd-{index}'",
+                        "    print(json.dumps({'type': 'item.started', 'item': {'id': command_id, 'type': 'command_execution', 'command': 'cat README.md', 'status': 'in_progress'}}), flush=True)",
+                        "    time.sleep(0.2)",
+                        "    print(json.dumps({'type': 'item.completed', 'item': {'id': command_id, 'type': 'command_execution', 'command': 'cat README.md', 'status': 'completed', 'exit_code': 0}}), flush=True)",
+                        "    time.sleep(0.8)",
+                    ]
+                ),
+                encoding="utf-8",
+            )
+            env_overrides = {
+                "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
+                "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
+                "OPENAI_API_KEY": "pushpals-command-progress-cap-test-key",
+                "WORKERPALS_OPENAI_CODEX_JSON": "true",
+                "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "12",
+                "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
+                "WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S": "3",
+                "WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_PROGRESS_CAP_S": "3",
+                "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
+            }
+            with mock.patch.dict(os.environ, env_overrides, clear=False):
+                result = _run_codex_task(
+                    str(repo),
+                    "Add one focused patch after bounded command-backed discovery.",
+                    [],
+                )
+        self.assertTrue(result.get("ok"), result)
+        self.assertEqual(result.get("exitCode"), 0)
+        self.assertIn("Patched after capped command progress", str(result.get("stdout") or ""))
+        self.assertIn("src/", str(result.get("stdout") or ""))
     def test_run_codex_task_finalizes_after_durable_publishable_progress(self) -> None:
         with tempfile.TemporaryDirectory(prefix="pushpals-codex-durable-progress-") as temp_dir:
             repo = Path(temp_dir) / "repo"
@@ -1962,6 +2056,86 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
         self.assertEqual(watchdog_s, 300)
+    def test_no_edit_recovery_attempt_uses_short_durable_recheck_and_command_cap(self) -> None:
+        env = {
+            "WORKERPALS_OPENAI_CODEX_NO_EDIT_RECHECK_S": "",
+            "WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S": "",
+            "WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_PROGRESS_CAP_S": "",
+        }
+        with mock.patch.dict(os.environ, env, clear=False):
+            first_recheck_s = _resolve_no_edit_recheck_seconds(750)
+            recovery_recheck_s = _resolve_no_edit_recheck_seconds(750, recovery_attempt=1)
+            command_grace_s = _resolve_no_edit_command_grace_seconds(750)
+            first_command_cap_s = _resolve_no_edit_command_progress_cap_seconds(
+                750,
+                command_grace_s,
+            )
+            recovery_command_cap_s = _resolve_no_edit_command_progress_cap_seconds(
+                750,
+                command_grace_s,
+                recovery_attempt=1,
+            )
+        self.assertEqual(first_recheck_s, 120)
+        self.assertEqual(recovery_recheck_s, 30)
+        self.assertEqual(first_command_cap_s, 360)
+        self.assertEqual(recovery_command_cap_s, 120)
+    def test_codex_recovery_attempt_refuses_exhausted_shared_deadline(self) -> None:
+        with tempfile.TemporaryDirectory(prefix="pushpals-codex-exhausted-recovery-") as temp_dir:
+            repo = Path(temp_dir) / "repo"
+            repo.mkdir(parents=True, exist_ok=True)
+            (repo / "README.md").write_text("# exhausted recovery repo\n", encoding="utf-8")
+            subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "config", "user.name", "PushPals Test"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(
+                ["git", "config", "user.email", "pushpals-tests@example.com"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "commit", "-m", "chore: seed exhausted recovery repo"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            stub_path = Path(temp_dir) / "fake_codex_should_not_run.py"
+            stub_path.write_text(
+                "raise SystemExit('fake codex should not run when recovery budget is exhausted')\n",
+                encoding="utf-8",
+            )
+            env_overrides = {
+                "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
+                "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
+                "OPENAI_API_KEY": "pushpals-exhausted-recovery-test-key",
+                "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "750",
+            }
+            with mock.patch.dict(os.environ, env_overrides, clear=False):
+                result = _run_codex_task(
+                    str(repo),
+                    "Apply patch-first recovery.",
+                    [],
+                    no_edit_recovery_attempt=1,
+                    execution_deadline_monotonic=time.monotonic() - 1.0,
+                )
+        self.assertFalse(result.get("ok"), result)
+        self.assertEqual(result.get("exitCode"), 124)
+        self.assertIn("recovery budget exhausted", str(result.get("summary") or ""))
+        self.assertIn("Stopping before a low-odds retry", str(result.get("stderr") or ""))
     def test_review_fix_contract_level_tests_use_fast_no_edit_watchdog(self) -> None:
         prompt = (
             "Restore exact score assertions for contract-level tests where score is part "
@@ -1972,6 +2146,16 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
         self.assertEqual(watchdog_s, 180)
+    def test_rejected_pr_review_fix_prompt_uses_compact_no_edit_watchdog(self) -> None:
+        prompt = (
+            "Rejected PR revision brief: Previous ReviewAgent score: 7.6 / 10. "
+            "Address reviewer must-fix items in the cleanup harness with focused coverage."
+        )
+        with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
+            watchdog_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
+        self.assertEqual(watchdog_s, 180)
     def test_no_edit_recovery_guidance_warns_against_artifact_only_progress(self) -> None:
         guidance = _build_no_edit_recovery_guidance(
             "item.completed | still inspecting",
@@ -2112,7 +2296,7 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
         self.assertIn("Patched after rollout coach guidance", str(result.get("stdout") or ""))
         self.assertIn("scripts/", str(result.get("stdout") or ""))
-    def test_run_codex_task_rollout_coach_fails_fast_on_broad_small_task_changes(self) -> None:
+    def test_run_codex_task_rollout_coach_resets_broad_small_task_changes_before_retry(self) -> None:
         with tempfile.TemporaryDirectory(prefix="pushpals-codex-rollout-noisy-") as temp_dir:
             repo = Path(temp_dir) / "repo"
             repo.mkdir(parents=True, exist_ok=True)
@@ -2149,7 +2333,22 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
                         "import sys",
                         "import time",
                         "",
-                        "sys.stdin.read()",
+                        "argv = sys.argv[1:]",
+                        "last_message_path = None",
+                        "for index, arg in enumerate(argv):",
+                        "    if arg == '--output-last-message' and index + 1 < len(argv):",
+                        "        last_message_path = argv[index + 1]",
+                        "        break",
+                        "",
+                        "prompt = sys.stdin.read()",
+                        "if 'Rollout coach recovery' in prompt:",
+                        "    Path('src').mkdir(exist_ok=True)",
+                        "    Path('src/narrow-rollout-recovery.txt').write_text('narrow recovery patch\\n', encoding='utf-8')",
+                        "    if last_message_path:",
+                        "        Path(last_message_path).write_text('Patched narrowly after broad rollout reset.', encoding='utf-8')",
+                        "    print('item.completed | Patched narrowly after broad rollout reset.', flush=True)",
+                        "    sys.exit(0)",
+                        "",
                         "for index in range(5):",
                         "    root = Path(f'area{index}')",
                         "    root.mkdir(exist_ok=True)",
@@ -2176,6 +2375,78 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
                     "Make a small low-risk repo-native patch.",
                     [],
                 )
+            area0_exists_after_retry = (repo / "area0").exists()
+        self.assertTrue(result.get("ok"), result)
+        self.assertEqual(result.get("exitCode"), 0)
+        self.assertIn("Patched narrowly after broad rollout reset", str(result.get("stdout") or ""))
+        self.assertIn("src/", str(result.get("stdout") or ""))
+        self.assertFalse(area0_exists_after_retry)
+    def test_run_codex_task_rollout_coach_fails_after_repeated_broad_small_task_changes(self) -> None:
+        with tempfile.TemporaryDirectory(prefix="pushpals-codex-rollout-repeat-noisy-") as temp_dir:
+            repo = Path(temp_dir) / "repo"
+            repo.mkdir(parents=True, exist_ok=True)
+            (repo / "README.md").write_text("# repeated rollout noisy repo\n", encoding="utf-8")
+            subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "config", "user.name", "PushPals Test"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(
+                ["git", "config", "user.email", "pushpals-tests@example.com"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "commit", "-m", "chore: seed repeated rollout noisy repo"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            stub_path = Path(temp_dir) / "fake_codex_rollout_repeat_noisy.py"
+            stub_path.write_text(
+                "\n".join(
+                    [
+                        "from pathlib import Path",
+                        "import sys",
+                        "import time",
+                        "",
+                        "sys.stdin.read()",
+                        "for index in range(5):",
+                        "    root = Path(f'area{index}')",
+                        "    root.mkdir(exist_ok=True)",
+                        "    (root / 'changed.txt').write_text('broad rollout change\\n', encoding='utf-8')",
+                        "print('item.completed | Repeated broad edits for a small task.', flush=True)",
+                        "time.sleep(10)",
+                    ]
+                ),
+                encoding="utf-8",
+            )
+            env_overrides = {
+                "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
+                "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
+                "OPENAI_API_KEY": "pushpals-rollout-repeat-noisy-test-key",
+                "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "700",
+                "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "10",
+                "WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S": "1",
+                "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
+            }
+            with mock.patch.dict(os.environ, env_overrides, clear=False):
+                result = _run_codex_task(
+                    str(repo),
+                    "Make a small low-risk repo-native patch.",
+                    [],
+                )
         self.assertFalse(result.get("ok"), result)
         self.assertEqual(result.get("exitCode"), 124)

package/runtime/sandbox/apps/workerpals/src/common/generic_python_executor.ts CHANGED Viewed

@@ -34,9 +34,9 @@ interface GenericPythonExecutorConfig {
 }
 const BACKEND_TIMEOUT_RESULT_GRACE_MS = 30_000;
-const OPENAI_CODEX_MIN_VALIDATION_RESERVE_MS = 180_000;
-const OPENAI_CODEX_MAX_VALIDATION_RESERVE_MS = 600_000;
-const OPENAI_CODEX_MIN_PRIMARY_TURN_BUDGET_MS = 600_000;
+const OPENAI_CODEX_MIN_VALIDATION_RESERVE_MS = 240_000;
+const OPENAI_CODEX_MAX_VALIDATION_RESERVE_MS = 720_000;
+const OPENAI_CODEX_MIN_PRIMARY_TURN_BUDGET_MS = 540_000;
 function estimateTokensFromText(text: string): number {
   return Math.max(0, Math.ceil(String(text ?? "").length / 3));
@@ -161,7 +161,7 @@ export function resolveOpenAICodexValidationReserveMs(
       budgetMs,
       Math.max(
         OPENAI_CODEX_MIN_VALIDATION_RESERVE_MS,
-        Math.min(OPENAI_CODEX_MAX_VALIDATION_RESERVE_MS, budgetMs * 0.35),
+        Math.min(OPENAI_CODEX_MAX_VALIDATION_RESERVE_MS, budgetMs * 0.5),
       ),
     ),
   );

package/runtime/sandbox/apps/workerpals/src/execute_job.ts CHANGED Viewed

@@ -201,6 +201,9 @@ const BROWSER_VALIDATION_MAX_AUTO_REVISIONS = 3;
 const CRITIC_COMPACT_RETRY_MIN_REDUCTION_RATIO = 0.25;
 const MAX_DIAGNOSTIC_PATH_SAMPLES = 50;
 const MAX_DIAGNOSTIC_TEXT_CHARS = 8_000;
+const QUALITY_MIN_REVISION_BUDGET_MS = 120_000;
+const QUALITY_MAX_REVISION_BUDGET_MS = 420_000;
+const QUALITY_REVISION_BUDGET_RATIO = 0.25;
 export function qualityRevisionLoopUpperBound(policy: {
   maxAutoRevisions: number;
@@ -234,7 +237,13 @@ export function qualityRevisionBudgetDecision(opts: {
   const elapsedMs = Math.max(0, Number(opts.jobElapsedMs) || 0);
   const remainingBudgetMs = Math.max(0, Math.floor(executionBudgetMs - elapsedMs));
   const minimumRevisionBudgetMs = Math.floor(
-    Math.min(executionBudgetMs, Math.max(180_000, Math.min(600_000, executionBudgetMs * 0.35))),
+    Math.min(
+      executionBudgetMs,
+      Math.max(
+        QUALITY_MIN_REVISION_BUDGET_MS,
+        Math.min(QUALITY_MAX_REVISION_BUDGET_MS, executionBudgetMs * QUALITY_REVISION_BUDGET_RATIO),
+      ),
+    ),
   );
   return {
     shouldStart: remainingBudgetMs >= minimumRevisionBudgetMs,