npm - @pushpalsdev/cli - Versions diffs - 1.1.35 → 1.1.37 - Mend

@pushpalsdev/cli 1.1.35 → 1.1.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pushpalsdev/cli",
-  "version": "1.1.35",
+  "version": "1.1.37",
   "description": "PushPals terminal CLI for LocalBuddy -> RemoteBuddy orchestration",
   "license": "MIT",
   "repository": {

package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py CHANGED Viewed

@@ -116,6 +116,7 @@ _WEB_REVIEW_NO_EDIT_WATCHDOG_S = 240
 _BACKGROUND_NO_EDIT_WATCHDOG_S = 120
 _NO_EDIT_RECOVERY_WATCHDOG_S = 90
 _DEFAULT_NO_EDIT_RECHECK_S = 120
+_DEFAULT_NO_EDIT_COMMAND_GRACE_S = 240
 _DEFAULT_STARTUP_STALL_WATCHDOG_S = 210
 _RECOVERY_STARTUP_STALL_WATCHDOG_S = 150
 _DEFAULT_ROLLOUT_WATCHDOG_S = 300
@@ -757,6 +758,27 @@ def _resolve_no_edit_recheck_seconds(communicate_timeout_s: Optional[int]) -> in
     return max(1, min(_DEFAULT_NO_EDIT_RECHECK_S, upper))
+def _resolve_no_edit_command_grace_seconds(communicate_timeout_s: Optional[int]) -> Optional[int]:
+    if not communicate_timeout_s:
+        return None
+    raw = os.environ.get("WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S", "").strip()
+    if raw:
+        if raw == "0":
+            return None
+        parsed = _to_positive_int(raw)
+        if parsed is None:
+            log.info(
+                "Invalid WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S="
+                f"{raw!r}; using default command-progress grace."
+            )
+        else:
+            return max(1, min(parsed, max(1, communicate_timeout_s - 1)))
+    upper = max(1, communicate_timeout_s - 1)
+    return max(1, min(_DEFAULT_NO_EDIT_COMMAND_GRACE_S, upper))
 def _resolve_startup_stall_watchdog_seconds(
     communicate_timeout_s: Optional[int],
     recovery_attempt: int = 0,
@@ -1339,10 +1361,94 @@ def _empty_codex_trace() -> Dict[str, Any]:
         "prompt_tokens": 0,
         "completion_tokens": 0,
         "total_tokens": 0,
+        "active_command_ids": [],
+        "command_event_count": 0,
+        "last_command_activity_at": None,
+        "last_command_summary": "",
     }
-def _record_live_codex_stdout_line(line: str, use_json: bool, trace: Dict[str, Any]) -> None:
+def _looks_like_codex_command_item(value: Any) -> bool:
+    if not isinstance(value, dict):
+        return False
+    type_text = " ".join(
+        str(value.get(key) or "")
+        for key in ("type", "item_type", "kind", "name", "tool_name")
+    ).lower()
+    if any(marker in type_text for marker in ("command_execution", "exec_command", "shell_command")):
+        return True
+    return any(key in value for key in ("command", "cmd", "exit_code", "aggregated_output"))
+def _record_codex_command_activity(
+    parsed: Dict[str, Any],
+    event_type: str,
+    trace: Dict[str, Any],
+    now: float,
+) -> None:
+    item = parsed.get("item")
+    command_source: Any = item if _looks_like_codex_command_item(item) else parsed
+    if not _looks_like_codex_command_item(command_source):
+        return
+    command_text = ""
+    if isinstance(command_source, dict):
+        for key in ("command", "cmd", "name"):
+            raw = command_source.get(key)
+            if isinstance(raw, str) and raw.strip():
+                command_text = _truncate_inline(raw.strip(), 160)
+                break
+    command_id = ""
+    if isinstance(command_source, dict):
+        command_id = str(
+            command_source.get("id")
+            or command_source.get("call_id")
+            or command_source.get("item_id")
+            or command_text
+            or "command"
+        ).strip()
+    command_id = command_id or "command"
+    active = trace.setdefault("active_command_ids", [])
+    if not isinstance(active, list):
+        active = []
+        trace["active_command_ids"] = active
+    status_text = ""
+    if isinstance(command_source, dict):
+        status_text = " ".join(
+            str(command_source.get(key) or "")
+            for key in ("status", "state", "outcome")
+        ).lower()
+    event_lower = event_type.lower()
+    completed = (
+        "completed" in event_lower
+        or "failed" in event_lower
+        or "error" in event_lower
+        or any(marker in status_text for marker in ("completed", "failed", "cancelled", "canceled", "exited"))
+    )
+    started = (
+        "started" in event_lower
+        or "updated" in event_lower
+        or any(marker in status_text for marker in ("running", "in_progress", "started"))
+    )
+    if completed:
+        trace["active_command_ids"] = [item for item in active if str(item) != command_id]
+    elif started and command_id not in active:
+        active.append(command_id)
+    trace["command_event_count"] = to_int(trace.get("command_event_count"), 0) + 1
+    trace["last_command_activity_at"] = float(now)
+    trace["last_command_summary"] = command_text or event_type
+def _record_live_codex_stdout_line(
+    line: str,
+    use_json: bool,
+    trace: Dict[str, Any],
+    now: Optional[float] = None,
+) -> None:
     stripped = line.strip()
     if not stripped:
         return
@@ -1369,6 +1475,7 @@ def _record_live_codex_stdout_line(line: str, use_json: bool, trace: Dict[str, A
             return
         if isinstance(parsed, dict):
+            observed_at = float(now if now is not None else time.monotonic())
             usage = _extract_usage_counts(parsed)
             if usage is not None:
                 trace["prompt_tokens"] = max(
@@ -1385,6 +1492,7 @@ def _record_live_codex_stdout_line(line: str, use_json: bool, trace: Dict[str, A
                 .strip()
                 or "event"
             )
+            _record_codex_command_activity(parsed, event_type, trace, observed_at)
             event_type_counts[event_type] = to_int(event_type_counts.get(event_type), 0) + 1
             summary = _summarize_json_event(parsed)
             # Reasoning can arrive under generic event types (for example item.updated).
@@ -1449,10 +1557,13 @@ def _finalize_codex_stdout_trace(trace: Dict[str, Any], use_json: bool) -> Dict[
     prompt_tokens = to_int(trace.get("prompt_tokens"), 0)
     completion_tokens = to_int(trace.get("completion_tokens"), 0)
     total_tokens = to_int(trace.get("total_tokens"), 0)
+    command_event_count = to_int(trace.get("command_event_count"), 0)
     if reasoning_events > 0:
         log.info(f"[codex] Reasoning-like event(s): {reasoning_events}")
     elif use_json and valid_json > 0:
         log.info("[codex] No reasoning-like events observed in this run.")
+    if command_event_count > 0:
+        log.info(f"[codex] Command execution event(s): {command_event_count}")
     if total_tokens > 0:
         log.info(
             f"[codex] Usage observed: prompt={prompt_tokens} completion={completion_tokens} total={total_tokens}"
@@ -1473,6 +1584,7 @@ def _finalize_codex_stdout_trace(trace: Dict[str, Any], use_json: bool) -> Dict[
         "prompt_tokens": prompt_tokens,
         "completion_tokens": completion_tokens,
         "total_tokens": total_tokens,
+        "command_event_count": command_event_count,
     }
@@ -2299,8 +2411,14 @@ def _run_codex_task(
                         if not line:
                             continue
                         with trace_lock:
-                            last_activity_at["ts"] = time.monotonic()
-                            _record_live_codex_stdout_line(line, use_json, stdout_trace_state)
+                            observed_at = time.monotonic()
+                            last_activity_at["ts"] = observed_at
+                            _record_live_codex_stdout_line(
+                                line,
+                                use_json,
+                                stdout_trace_state,
+                                observed_at,
+                            )
                 except Exception:
                     pass
                 finally:
@@ -2377,6 +2495,7 @@ def _run_codex_task(
                 else None
             )
             no_edit_recheck_s = _resolve_no_edit_recheck_seconds(communicate_timeout_s)
+            no_edit_command_grace_s = _resolve_no_edit_command_grace_seconds(communicate_timeout_s)
             startup_stall_watchdog_s = _resolve_startup_stall_watchdog_seconds(
                 communicate_timeout_s,
                 recovery_attempt=startup_stall_recovery_attempt,
@@ -2405,6 +2524,9 @@ def _run_codex_task(
                 if rollout_watchdog_s is not None
                 else None
             )
+            publishable_progress_seen_at: Optional[float] = None
+            publishable_progress_finalized = False
+            publishable_progress_paths: List[str] = []
             while proc.poll() is None:
                 now = time.monotonic()
@@ -2457,6 +2579,44 @@ def _run_codex_task(
                                 "before startup-stall recovery."
                             )
                             continue
+                        command_event_count = to_int(live_trace.get("command_event_count"), 0)
+                        active_commands_raw = live_trace.get("active_command_ids")
+                        active_command_count = (
+                            len(active_commands_raw)
+                            if isinstance(active_commands_raw, list)
+                            else 0
+                        )
+                        last_command_activity_at = 0.0
+                        try:
+                            last_command_activity_at = float(
+                                live_trace.get("last_command_activity_at") or 0.0
+                            )
+                        except Exception:
+                            last_command_activity_at = 0.0
+                        if command_event_count > 0 and no_edit_command_grace_s is not None:
+                            command_grace_deadline = 0.0
+                            if active_command_count > 0:
+                                # Do not kill while Codex is actively running a tool command; poll
+                                # again soon, but keep the total grace bounded by the hard cap below.
+                                command_grace_deadline = now + min(60.0, float(no_edit_command_grace_s))
+                            elif last_command_activity_at > 0:
+                                command_grace_deadline = last_command_activity_at + float(
+                                    no_edit_command_grace_s
+                                )
+                            if command_grace_deadline > now:
+                                no_edit_deadline = command_grace_deadline
+                                remaining_s = int(max(1.0, command_grace_deadline - now))
+                                command_detail = (
+                                    f"{active_command_count} active command(s)"
+                                    if active_command_count > 0
+                                    else "recent command completion"
+                                )
+                                log.info(
+                                    "No-edit watchdog observed Codex tool progress "
+                                    f"({command_detail}); allowing {remaining_s}s for a "
+                                    "publishable patch before recovery."
+                                )
+                                continue
                         no_edit_artifact_only_paths = _describe_non_publishable_paths(
                             changed_paths,
                             baseline_snapshot,
@@ -2478,6 +2638,22 @@ def _run_codex_task(
                             )
                         _terminate_active_child()
                         break
+                    if publishable_progress_seen_at is None:
+                        publishable_progress_seen_at = now
+                        publishable_progress_paths = list(effective_paths)
+                    elif _has_credible_shell_wrapper_progress(effective_paths):
+                        publishable_progress_paths = list(effective_paths)
+                        publishable_age_s = now - publishable_progress_seen_at
+                        if publishable_age_s >= float(no_edit_recheck_s):
+                            publishable_progress_finalized = True
+                            log.info(
+                                "No-edit watchdog observed durable publishable file changes "
+                                f"({_describe_publishable_paths(effective_paths)}) for "
+                                f"{int(publishable_age_s)}s; stopping Codex early so "
+                                "QualityGate/ValidationGate can use the remaining budget."
+                            )
+                            _terminate_active_child()
+                            break
                     no_edit_deadline = now + float(no_edit_recheck_s)
                     log.info(
                         "No-edit watchdog observed publishable-looking file changes "
@@ -2639,6 +2815,35 @@ def _run_codex_task(
                 "cooldownMs": _NO_PUBLISHABLE_FAILURE_COOLDOWN_MS,
             }
+        if publishable_progress_finalized:
+            changed_paths, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
+            effective_paths = effective_paths or publishable_progress_paths
+            last_message = _read_text_if_exists(last_message_path)
+            log_git_status(repo, log)
+            prefix = (
+                "Codex produced durable publishable file changes. PushPals stopped the "
+                "Codex child early to preserve validation and revision budget; the normal "
+                "QualityGate/ValidationGate will catch any incomplete edit."
+            )
+            return {
+                "ok": True,
+                "summary": (
+                    "openai_codex stopped after durable publishable progress "
+                    f"({len(effective_paths)} file(s))"
+                ),
+                "stdout": _truncate(
+                    _build_success_stdout(
+                        effective_paths=effective_paths,
+                        last_message=last_message,
+                        trace_excerpt=trace_excerpt,
+                        prefix=prefix,
+                    )
+                ),
+                "stderr": _truncate(stderr),
+                "exitCode": 0,
+                "usage": usage,
+            }
         if no_edit_watchdog_fired:
             startup_stall = _codex_trace_is_startup_stall(stdout_trace)
             if startup_stall and startup_stall_recovery_attempt < _MAX_STARTUP_STALL_RECOVERY_ATTEMPTS:

package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py CHANGED Viewed

@@ -1347,6 +1347,259 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
         self.assertIn("Patched immediately after no-edit recovery", str(result.get("stdout") or ""))
         self.assertIn("src/", str(result.get("stdout") or ""))
+    def test_run_codex_task_no_edit_watchdog_allows_command_backed_discovery(self) -> None:
+        with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-command-grace-") as temp_dir:
+            repo = Path(temp_dir) / "repo"
+            repo.mkdir(parents=True, exist_ok=True)
+            (repo / "README.md").write_text("# command grace repo\n", encoding="utf-8")
+            subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "config", "user.name", "PushPals Test"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(
+                ["git", "config", "user.email", "pushpals-tests@example.com"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "commit", "-m", "chore: seed command grace repo"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            stub_path = Path(temp_dir) / "fake_codex_no_edit_command_grace.py"
+            stub_path.write_text(
+                "\n".join(
+                    [
+                        "from pathlib import Path",
+                        "import json",
+                        "import sys",
+                        "import time",
+                        "",
+                        "argv = sys.argv[1:]",
+                        "last_message_path = None",
+                        "for index, arg in enumerate(argv):",
+                        "    if arg == '--output-last-message' and index + 1 < len(argv):",
+                        "        last_message_path = argv[index + 1]",
+                        "        break",
+                        "",
+                        "sys.stdin.read()",
+                        "print(json.dumps({'type': 'thread.started'}), flush=True)",
+                        "print(json.dumps({'type': 'turn.started'}), flush=True)",
+                        "print(json.dumps({'type': 'item.started', 'item': {'id': 'cmd-read-target', 'type': 'command_execution', 'command': 'sed -n 1,120p README.md', 'status': 'in_progress'}}), flush=True)",
+                        "time.sleep(1.4)",
+                        "print(json.dumps({'type': 'item.completed', 'item': {'id': 'cmd-read-target', 'type': 'command_execution', 'command': 'sed -n 1,120p README.md', 'status': 'completed', 'exit_code': 0, 'aggregated_output': '# command grace repo'}}), flush=True)",
+                        "time.sleep(1.6)",
+                        "Path('src').mkdir(exist_ok=True)",
+                        "Path('src/command-grace.txt').write_text('patched after command-backed discovery\\n', encoding='utf-8')",
+                        "if last_message_path:",
+                        "    Path(last_message_path).write_text('Patched after command-backed discovery.', encoding='utf-8')",
+                        "print(json.dumps({'type': 'item.completed', 'item': {'type': 'message', 'text': 'Patched after command-backed discovery.'}}), flush=True)",
+                    ]
+                ),
+                encoding="utf-8",
+            )
+            env_overrides = {
+                "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
+                "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
+                "OPENAI_API_KEY": "pushpals-no-edit-command-grace-test-key",
+                "WORKERPALS_OPENAI_CODEX_JSON": "true",
+                "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
+                "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
+                "WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S": "5",
+                "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
+            }
+            with mock.patch.dict(os.environ, env_overrides, clear=False):
+                result = _run_codex_task(
+                    str(repo),
+                    "Add one focused contract assertion after inspecting the hinted test.",
+                    [],
+                )
+        self.assertTrue(result.get("ok"), result)
+        self.assertEqual(result.get("exitCode"), 0)
+        self.assertIn("Patched after command-backed discovery", str(result.get("stdout") or ""))
+        self.assertIn("src/", str(result.get("stdout") or ""))
+    def test_run_codex_task_no_edit_watchdog_extends_after_later_command_progress(self) -> None:
+        with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-late-command-") as temp_dir:
+            repo = Path(temp_dir) / "repo"
+            repo.mkdir(parents=True, exist_ok=True)
+            (repo / "README.md").write_text("# late command grace repo\n", encoding="utf-8")
+            subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "config", "user.name", "PushPals Test"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(
+                ["git", "config", "user.email", "pushpals-tests@example.com"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "commit", "-m", "chore: seed late command repo"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            stub_path = Path(temp_dir) / "fake_codex_late_command_grace.py"
+            stub_path.write_text(
+                "\n".join(
+                    [
+                        "from pathlib import Path",
+                        "import json",
+                        "import sys",
+                        "import time",
+                        "",
+                        "argv = sys.argv[1:]",
+                        "last_message_path = None",
+                        "for index, arg in enumerate(argv):",
+                        "    if arg == '--output-last-message' and index + 1 < len(argv):",
+                        "        last_message_path = argv[index + 1]",
+                        "        break",
+                        "",
+                        "sys.stdin.read()",
+                        "print(json.dumps({'type': 'thread.started'}), flush=True)",
+                        "print(json.dumps({'type': 'turn.started'}), flush=True)",
+                        "print(json.dumps({'type': 'item.started', 'item': {'id': 'cmd-one', 'type': 'command_execution', 'command': 'cat README.md', 'status': 'in_progress'}}), flush=True)",
+                        "time.sleep(0.2)",
+                        "print(json.dumps({'type': 'item.completed', 'item': {'id': 'cmd-one', 'type': 'command_execution', 'command': 'cat README.md', 'status': 'completed', 'exit_code': 0}}), flush=True)",
+                        "time.sleep(2.2)",
+                        "print(json.dumps({'type': 'item.started', 'item': {'id': 'cmd-two', 'type': 'command_execution', 'command': 'ls', 'status': 'in_progress'}}), flush=True)",
+                        "time.sleep(0.2)",
+                        "print(json.dumps({'type': 'item.completed', 'item': {'id': 'cmd-two', 'type': 'command_execution', 'command': 'ls', 'status': 'completed', 'exit_code': 0}}), flush=True)",
+                        "time.sleep(2.0)",
+                        "Path('src').mkdir(exist_ok=True)",
+                        "Path('src/late-command-grace.txt').write_text('patched after later command progress\\n', encoding='utf-8')",
+                        "if last_message_path:",
+                        "    Path(last_message_path).write_text('Patched after later command progress.', encoding='utf-8')",
+                        "print(json.dumps({'type': 'item.completed', 'item': {'type': 'message', 'text': 'Patched after later command progress.'}}), flush=True)",
+                    ]
+                ),
+                encoding="utf-8",
+            )
+            env_overrides = {
+                "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
+                "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
+                "OPENAI_API_KEY": "pushpals-no-edit-late-command-test-key",
+                "WORKERPALS_OPENAI_CODEX_JSON": "true",
+                "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
+                "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
+                "WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S": "3",
+                "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
+            }
+            with mock.patch.dict(os.environ, env_overrides, clear=False):
+                result = _run_codex_task(
+                    str(repo),
+                    "Add one focused contract assertion after a later targeted read.",
+                    [],
+                )
+        self.assertTrue(result.get("ok"), result)
+        self.assertEqual(result.get("exitCode"), 0)
+        self.assertIn("Patched after later command progress", str(result.get("stdout") or ""))
+        self.assertIn("src/", str(result.get("stdout") or ""))
+    def test_run_codex_task_finalizes_after_durable_publishable_progress(self) -> None:
+        with tempfile.TemporaryDirectory(prefix="pushpals-codex-durable-progress-") as temp_dir:
+            repo = Path(temp_dir) / "repo"
+            repo.mkdir(parents=True, exist_ok=True)
+            (repo / "README.md").write_text("# durable progress repo\n", encoding="utf-8")
+            subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "config", "user.name", "PushPals Test"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(
+                ["git", "config", "user.email", "pushpals-tests@example.com"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "commit", "-m", "chore: seed durable progress repo"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            stub_path = Path(temp_dir) / "fake_codex_durable_progress.py"
+            stub_path.write_text(
+                "\n".join(
+                    [
+                        "from pathlib import Path",
+                        "import json",
+                        "import sys",
+                        "import time",
+                        "",
+                        "argv = sys.argv[1:]",
+                        "last_message_path = None",
+                        "for index, arg in enumerate(argv):",
+                        "    if arg == '--output-last-message' and index + 1 < len(argv):",
+                        "        last_message_path = argv[index + 1]",
+                        "        break",
+                        "",
+                        "sys.stdin.read()",
+                        "print(json.dumps({'type': 'thread.started'}), flush=True)",
+                        "print(json.dumps({'type': 'turn.started'}), flush=True)",
+                        "Path('src').mkdir(exist_ok=True)",
+                        "Path('src/durable-progress.txt').write_text('durable patch\\n', encoding='utf-8')",
+                        "if last_message_path:",
+                        "    Path(last_message_path).write_text('Created durable patch and kept thinking.', encoding='utf-8')",
+                        "print(json.dumps({'type': 'item.completed', 'item': {'type': 'message', 'text': 'Created durable patch and kept thinking.'}}), flush=True)",
+                        "time.sleep(10)",
+                    ]
+                ),
+                encoding="utf-8",
+            )
+            env_overrides = {
+                "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
+                "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
+                "OPENAI_API_KEY": "pushpals-durable-progress-test-key",
+                "WORKERPALS_OPENAI_CODEX_JSON": "true",
+                "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
+                "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
+                "WORKERPALS_OPENAI_CODEX_NO_EDIT_RECHECK_S": "1",
+                "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
+            }
+            with mock.patch.dict(os.environ, env_overrides, clear=False):
+                result = _run_codex_task(
+                    str(repo),
+                    "Make a focused patch and stop once it is durable.",
+                    [],
+                )
+        self.assertTrue(result.get("ok"), result)
+        self.assertEqual(result.get("exitCode"), 0)
+        self.assertIn("stopped after durable publishable progress", str(result.get("summary") or ""))
+        self.assertIn("src/", str(result.get("stdout") or ""))
     def test_run_codex_task_recovery_attempt_is_still_guarded_by_no_edit_watchdog(self) -> None:
         with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-watchdog-fail-") as temp_dir:
             repo = Path(temp_dir) / "repo"

package/runtime/sandbox/apps/workerpals/src/execute_job.ts CHANGED Viewed

@@ -335,6 +335,22 @@ export function shouldSkipCriticForDeterministicValidationRevision(opts: {
   return opts.validationRuns.some(isDeterministicFastValidationFailure);
 }
+export function shouldSkipCriticToPreserveRevisionBudget(opts: {
+  deterministicRequiresRevision: boolean;
+  remainingBudgetMs: number;
+  minimumRevisionBudgetMs: number;
+  criticTimeoutMs: number;
+  criticTimeoutBehavior: "skip" | "retry_once" | "block" | string;
+}): boolean {
+  if (!opts.deterministicRequiresRevision) return false;
+  const remainingBudgetMs = Math.max(0, Math.floor(opts.remainingBudgetMs));
+  const minimumRevisionBudgetMs = Math.max(0, Math.floor(opts.minimumRevisionBudgetMs));
+  const criticTimeoutMs = Math.max(0, Math.floor(opts.criticTimeoutMs));
+  const criticAttempts = opts.criticTimeoutBehavior === "retry_once" ? 2 : 1;
+  const criticWorstCaseMs = criticTimeoutMs * criticAttempts;
+  return remainingBudgetMs < minimumRevisionBudgetMs + criticWorstCaseMs;
+}
 export function workerAttemptRolloutScore(params: {
   executorElapsedMs: number;
   qualityElapsedMs: number;
@@ -7975,11 +7991,23 @@ export async function executeJob(
         validationOutsideTaskScope,
         validationRuns: quality.validationRuns,
       });
+    const preCriticRevisionBudget = qualityRevisionBudgetDecision({
+      jobElapsedMs: Date.now() - jobStartedAt,
+      executionBudgetMs,
+    });
+    const skipCriticForRevisionBudget = shouldSkipCriticToPreserveRevisionBudget({
+      deterministicRequiresRevision: preCriticDeterministicRequiresRevision,
+      remainingBudgetMs: preCriticRevisionBudget.remainingBudgetMs,
+      minimumRevisionBudgetMs: preCriticRevisionBudget.minimumRevisionBudgetMs,
+      criticTimeoutMs: resolveQualityCriticTimeoutMs(runtimeConfig),
+      criticTimeoutBehavior: resolveQualityCriticTimeoutBehavior(runtimeConfig),
+    });
     const critic =
       quality.skipped ||
       !qualityGatePolicy.criticGateEnabled ||
       skipCriticAfterExecutorTimeout ||
-      skipCriticForDeterministicValidationRevision
+      skipCriticForDeterministicValidationRevision ||
+      skipCriticForRevisionBudget
         ? null
         : executor === "openai_codex"
           ? await runCodexCriticReview(repo, attemptParams, qualityForCritic, runtimeConfig, onLog)
@@ -8020,6 +8048,11 @@ export async function executeJob(
         "stdout",
         "[CriticGate] Skipping critic because deterministic fast validation already requires a quality revision.",
       );
+    } else if (skipCriticForRevisionBudget) {
+      onLog?.(
+        "stdout",
+        `[CriticGate] Skipping critic because deterministic quality already requires revision and remaining budget (${preCriticRevisionBudget.remainingBudgetMs}ms) must be reserved for the next worker turn.`,
+      );
     }
     const rolloutScore = workerAttemptRolloutScore({
       executorElapsedMs,