npm - @pushpalsdev/cli - Versions diffs - 1.1.34 → 1.1.36 - Mend

@pushpalsdev/cli 1.1.34 → 1.1.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pushpalsdev/cli",
-  "version": "1.1.34",
+  "version": "1.1.36",
   "description": "PushPals terminal CLI for LocalBuddy -> RemoteBuddy orchestration",
   "license": "MIT",
   "repository": {

package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py CHANGED Viewed

@@ -116,6 +116,9 @@ _WEB_REVIEW_NO_EDIT_WATCHDOG_S = 240
 _BACKGROUND_NO_EDIT_WATCHDOG_S = 120
 _NO_EDIT_RECOVERY_WATCHDOG_S = 90
 _DEFAULT_NO_EDIT_RECHECK_S = 120
+_DEFAULT_NO_EDIT_COMMAND_GRACE_S = 240
+_DEFAULT_STARTUP_STALL_WATCHDOG_S = 210
+_RECOVERY_STARTUP_STALL_WATCHDOG_S = 150
 _DEFAULT_ROLLOUT_WATCHDOG_S = 300
 _SMALL_TASK_ROLLOUT_WATCHDOG_S = 240
 _NARROW_TEST_TASK_ROLLOUT_WATCHDOG_S = 150
@@ -755,6 +758,65 @@ def _resolve_no_edit_recheck_seconds(communicate_timeout_s: Optional[int]) -> in
     return max(1, min(_DEFAULT_NO_EDIT_RECHECK_S, upper))
+def _resolve_no_edit_command_grace_seconds(communicate_timeout_s: Optional[int]) -> Optional[int]:
+    if not communicate_timeout_s:
+        return None
+    raw = os.environ.get("WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S", "").strip()
+    if raw:
+        if raw == "0":
+            return None
+        parsed = _to_positive_int(raw)
+        if parsed is None:
+            log.info(
+                "Invalid WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S="
+                f"{raw!r}; using default command-progress grace."
+            )
+        else:
+            return max(1, min(parsed, max(1, communicate_timeout_s - 1)))
+    upper = max(1, communicate_timeout_s - 1)
+    return max(1, min(_DEFAULT_NO_EDIT_COMMAND_GRACE_S, upper))
+def _resolve_startup_stall_watchdog_seconds(
+    communicate_timeout_s: Optional[int],
+    recovery_attempt: int = 0,
+) -> Optional[int]:
+    if not communicate_timeout_s:
+        return None
+    raw = os.environ.get("WORKERPALS_OPENAI_CODEX_STARTUP_STALL_WATCHDOG_S", "").strip()
+    if raw:
+        if raw == "0":
+            return None
+        parsed = _to_positive_int(raw)
+        if parsed is None:
+            log.info(
+                "Invalid WORKERPALS_OPENAI_CODEX_STARTUP_STALL_WATCHDOG_S="
+                f"{raw!r}; using default startup-stall watchdog."
+            )
+        else:
+            return max(1, min(parsed, max(1, communicate_timeout_s - 1)))
+    default_s = (
+        _RECOVERY_STARTUP_STALL_WATCHDOG_S
+        if recovery_attempt > 0
+        else _DEFAULT_STARTUP_STALL_WATCHDOG_S
+    )
+    floor_s = 60
+    return max(floor_s, min(default_s, max(floor_s, communicate_timeout_s - 1)))
+def _startup_stall_recovery_model(current_model: str) -> str:
+    normalized = str(current_model or "").strip()
+    if not normalized:
+        return LEGACY_CODEX_MODEL_FALLBACK
+    if normalized.lower() == LEGACY_CODEX_MODEL_FALLBACK.lower():
+        return normalized
+    return LEGACY_CODEX_MODEL_FALLBACK
 def _looks_like_web_review_prompt(prompt: str) -> bool:
     text = str(prompt or "").lower()
     return "repo-native web review" in text or "web review path" in text
@@ -1299,10 +1361,94 @@ def _empty_codex_trace() -> Dict[str, Any]:
         "prompt_tokens": 0,
         "completion_tokens": 0,
         "total_tokens": 0,
+        "active_command_ids": [],
+        "command_event_count": 0,
+        "last_command_activity_at": None,
+        "last_command_summary": "",
     }
-def _record_live_codex_stdout_line(line: str, use_json: bool, trace: Dict[str, Any]) -> None:
+def _looks_like_codex_command_item(value: Any) -> bool:
+    if not isinstance(value, dict):
+        return False
+    type_text = " ".join(
+        str(value.get(key) or "")
+        for key in ("type", "item_type", "kind", "name", "tool_name")
+    ).lower()
+    if any(marker in type_text for marker in ("command_execution", "exec_command", "shell_command")):
+        return True
+    return any(key in value for key in ("command", "cmd", "exit_code", "aggregated_output"))
+def _record_codex_command_activity(
+    parsed: Dict[str, Any],
+    event_type: str,
+    trace: Dict[str, Any],
+    now: float,
+) -> None:
+    item = parsed.get("item")
+    command_source: Any = item if _looks_like_codex_command_item(item) else parsed
+    if not _looks_like_codex_command_item(command_source):
+        return
+    command_text = ""
+    if isinstance(command_source, dict):
+        for key in ("command", "cmd", "name"):
+            raw = command_source.get(key)
+            if isinstance(raw, str) and raw.strip():
+                command_text = _truncate_inline(raw.strip(), 160)
+                break
+    command_id = ""
+    if isinstance(command_source, dict):
+        command_id = str(
+            command_source.get("id")
+            or command_source.get("call_id")
+            or command_source.get("item_id")
+            or command_text
+            or "command"
+        ).strip()
+    command_id = command_id or "command"
+    active = trace.setdefault("active_command_ids", [])
+    if not isinstance(active, list):
+        active = []
+        trace["active_command_ids"] = active
+    status_text = ""
+    if isinstance(command_source, dict):
+        status_text = " ".join(
+            str(command_source.get(key) or "")
+            for key in ("status", "state", "outcome")
+        ).lower()
+    event_lower = event_type.lower()
+    completed = (
+        "completed" in event_lower
+        or "failed" in event_lower
+        or "error" in event_lower
+        or any(marker in status_text for marker in ("completed", "failed", "cancelled", "canceled", "exited"))
+    )
+    started = (
+        "started" in event_lower
+        or "updated" in event_lower
+        or any(marker in status_text for marker in ("running", "in_progress", "started"))
+    )
+    if completed:
+        trace["active_command_ids"] = [item for item in active if str(item) != command_id]
+    elif started and command_id not in active:
+        active.append(command_id)
+    trace["command_event_count"] = to_int(trace.get("command_event_count"), 0) + 1
+    trace["last_command_activity_at"] = float(now)
+    trace["last_command_summary"] = command_text or event_type
+def _record_live_codex_stdout_line(
+    line: str,
+    use_json: bool,
+    trace: Dict[str, Any],
+    now: Optional[float] = None,
+) -> None:
     stripped = line.strip()
     if not stripped:
         return
@@ -1329,6 +1475,7 @@ def _record_live_codex_stdout_line(line: str, use_json: bool, trace: Dict[str, A
             return
         if isinstance(parsed, dict):
+            observed_at = float(now if now is not None else time.monotonic())
             usage = _extract_usage_counts(parsed)
             if usage is not None:
                 trace["prompt_tokens"] = max(
@@ -1345,6 +1492,7 @@ def _record_live_codex_stdout_line(line: str, use_json: bool, trace: Dict[str, A
                 .strip()
                 or "event"
             )
+            _record_codex_command_activity(parsed, event_type, trace, observed_at)
             event_type_counts[event_type] = to_int(event_type_counts.get(event_type), 0) + 1
             summary = _summarize_json_event(parsed)
             # Reasoning can arrive under generic event types (for example item.updated).
@@ -1409,10 +1557,13 @@ def _finalize_codex_stdout_trace(trace: Dict[str, Any], use_json: bool) -> Dict[
     prompt_tokens = to_int(trace.get("prompt_tokens"), 0)
     completion_tokens = to_int(trace.get("completion_tokens"), 0)
     total_tokens = to_int(trace.get("total_tokens"), 0)
+    command_event_count = to_int(trace.get("command_event_count"), 0)
     if reasoning_events > 0:
         log.info(f"[codex] Reasoning-like event(s): {reasoning_events}")
     elif use_json and valid_json > 0:
         log.info("[codex] No reasoning-like events observed in this run.")
+    if command_event_count > 0:
+        log.info(f"[codex] Command execution event(s): {command_event_count}")
     if total_tokens > 0:
         log.info(
             f"[codex] Usage observed: prompt={prompt_tokens} completion={completion_tokens} total={total_tokens}"
@@ -1433,6 +1584,7 @@ def _finalize_codex_stdout_trace(trace: Dict[str, Any], use_json: bool) -> Dict[
         "prompt_tokens": prompt_tokens,
         "completion_tokens": completion_tokens,
         "total_tokens": total_tokens,
+        "command_event_count": command_event_count,
     }
@@ -2259,8 +2411,14 @@ def _run_codex_task(
                         if not line:
                             continue
                         with trace_lock:
-                            last_activity_at["ts"] = time.monotonic()
-                            _record_live_codex_stdout_line(line, use_json, stdout_trace_state)
+                            observed_at = time.monotonic()
+                            last_activity_at["ts"] = observed_at
+                            _record_live_codex_stdout_line(
+                                line,
+                                use_json,
+                                stdout_trace_state,
+                                observed_at,
+                            )
                 except Exception:
                     pass
                 finally:
@@ -2337,6 +2495,16 @@ def _run_codex_task(
                 else None
             )
             no_edit_recheck_s = _resolve_no_edit_recheck_seconds(communicate_timeout_s)
+            no_edit_command_grace_s = _resolve_no_edit_command_grace_seconds(communicate_timeout_s)
+            startup_stall_watchdog_s = _resolve_startup_stall_watchdog_seconds(
+                communicate_timeout_s,
+                recovery_attempt=startup_stall_recovery_attempt,
+            )
+            startup_stall_deadline = (
+                started_at + float(startup_stall_watchdog_s)
+                if startup_stall_watchdog_s is not None
+                else None
+            )
             rollout_watchdog_s = (
                 _resolve_rollout_watchdog_seconds(
                     prompt,
@@ -2351,6 +2519,11 @@ def _run_codex_task(
                 if no_edit_watchdog_s is not None
                 else None
             )
+            no_edit_command_grace_cap_deadline = (
+                started_at + float(no_edit_watchdog_s + no_edit_command_grace_s)
+                if no_edit_watchdog_s is not None and no_edit_command_grace_s is not None
+                else None
+            )
             rollout_deadline = (
                 started_at + float(rollout_watchdog_s)
                 if rollout_watchdog_s is not None
@@ -2364,9 +2537,93 @@ def _run_codex_task(
                     _terminate_active_child()
                     break
+                if startup_stall_deadline is not None and now >= startup_stall_deadline:
+                    with trace_lock:
+                        live_trace = dict(stdout_trace_state)
+                        summaries = stdout_trace_state.get("summaries")
+                        if isinstance(summaries, list):
+                            live_trace["summaries"] = list(summaries)
+                    if _codex_trace_is_startup_stall(live_trace):
+                        changed_paths, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
+                        if not effective_paths:
+                            no_edit_artifact_only_paths = _describe_non_publishable_paths(
+                                changed_paths,
+                                baseline_snapshot,
+                            )
+                            no_edit_watchdog_fired = True
+                            elapsed_s = int(max(0.0, now - started_at))
+                            log.info(
+                                f"Startup-stall watchdog fired after {elapsed_s}s with no assistant/tool progress."
+                            )
+                            _terminate_active_child()
+                            break
+                    startup_stall_deadline = None
                 if no_edit_deadline is not None and now >= no_edit_deadline:
                     changed_paths, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
                     if not effective_paths:
+                        with trace_lock:
+                            live_trace = dict(stdout_trace_state)
+                            summaries = stdout_trace_state.get("summaries")
+                            if isinstance(summaries, list):
+                                live_trace["summaries"] = list(summaries)
+                        startup_only = _codex_trace_is_startup_stall(live_trace)
+                        if (
+                            startup_only
+                            and startup_stall_deadline is not None
+                            and now < startup_stall_deadline
+                        ):
+                            no_edit_deadline = startup_stall_deadline
+                            remaining_s = int(max(1.0, startup_stall_deadline - now))
+                            log.info(
+                                "No-edit watchdog observed only Codex startup events; "
+                                f"allowing {remaining_s}s for first assistant/tool progress "
+                                "before startup-stall recovery."
+                            )
+                            continue
+                        command_event_count = to_int(live_trace.get("command_event_count"), 0)
+                        active_commands_raw = live_trace.get("active_command_ids")
+                        active_command_count = (
+                            len(active_commands_raw)
+                            if isinstance(active_commands_raw, list)
+                            else 0
+                        )
+                        last_command_activity_at = 0.0
+                        try:
+                            last_command_activity_at = float(
+                                live_trace.get("last_command_activity_at") or 0.0
+                            )
+                        except Exception:
+                            last_command_activity_at = 0.0
+                        if command_event_count > 0 and no_edit_command_grace_s is not None:
+                            command_grace_deadline = 0.0
+                            if active_command_count > 0:
+                                # Do not kill while Codex is actively running a tool command; poll
+                                # again soon, but keep the total grace bounded by the hard cap below.
+                                command_grace_deadline = now + min(60.0, float(no_edit_command_grace_s))
+                            elif last_command_activity_at > 0:
+                                command_grace_deadline = last_command_activity_at + float(
+                                    no_edit_command_grace_s
+                                )
+                            if no_edit_command_grace_cap_deadline is not None:
+                                command_grace_deadline = min(
+                                    command_grace_deadline,
+                                    no_edit_command_grace_cap_deadline,
+                                )
+                            if command_grace_deadline > now:
+                                no_edit_deadline = command_grace_deadline
+                                remaining_s = int(max(1.0, command_grace_deadline - now))
+                                command_detail = (
+                                    f"{active_command_count} active command(s)"
+                                    if active_command_count > 0
+                                    else "recent command completion"
+                                )
+                                log.info(
+                                    "No-edit watchdog observed Codex tool progress "
+                                    f"({command_detail}); allowing {remaining_s}s for a "
+                                    "publishable patch before recovery."
+                                )
+                                continue
                         no_edit_artifact_only_paths = _describe_non_publishable_paths(
                             changed_paths,
                             baseline_snapshot,
@@ -2377,9 +2634,15 @@ def _run_codex_task(
                             if no_edit_artifact_only_paths
                             else ""
                         )
-                        log.info(
-                            f"No-edit watchdog fired after {int(no_edit_watchdog_s or 0)}s with no publishable file changes.{artifact_detail} Retrying with patch-first guidance."
-                        )
+                        if startup_only:
+                            elapsed_s = int(max(0.0, now - started_at))
+                            log.info(
+                                f"Startup-stall watchdog fired after {elapsed_s}s with no assistant/tool progress."
+                            )
+                        else:
+                            log.info(
+                                f"No-edit watchdog fired after {int(no_edit_watchdog_s or 0)}s with no publishable file changes.{artifact_detail} Retrying with patch-first guidance."
+                            )
                         _terminate_active_child()
                         break
                     no_edit_deadline = now + float(no_edit_recheck_s)
@@ -2550,9 +2813,15 @@ def _run_codex_task(
                     *supplemental_guidance,
                     _build_startup_stall_recovery_guidance(trace_excerpt),
                 ]
+                recovery_model = _startup_stall_recovery_model(model)
+                recovery_detail = (
+                    f" using fallback model {recovery_model!r}"
+                    if recovery_model and recovery_model != model
+                    else ""
+                )
                 log.warning(
                     "Codex emitted only startup events before the no-edit watchdog; "
-                    "restarting Codex once before classifying the job terminally."
+                    f"restarting Codex once{recovery_detail} before classifying the job terminally."
                 )
                 retry_result = _run_codex_task(
                     repo,
@@ -2563,7 +2832,7 @@ def _run_codex_task(
                     startup_stall_recovery_attempt=startup_stall_recovery_attempt + 1,
                     no_edit_recovery_attempt=no_edit_recovery_attempt,
                     rollout_recovery_attempt=rollout_recovery_attempt,
-                    model_override=model_override,
+                    model_override=recovery_model or model_override,
                     baseline_changes=baseline_snapshot,
                 )
                 retry_result["usage"] = _merge_usage_records(usage, retry_result.get("usage"))

package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py CHANGED Viewed

@@ -49,6 +49,7 @@ from openai_codex_executor import (
     _resolve_codex_command_prefix,
     _resolve_no_edit_watchdog_seconds,
     _resolve_rollout_watchdog_seconds,
+    _resolve_startup_stall_watchdog_seconds,
     _unwrap_shell_wrapper_command,
     _usage_from_trace_or_estimate,
 )
@@ -372,6 +373,63 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
             self.assertEqual(task.repo, str(repo.resolve()))
             self.assertEqual(task.instruction, "Make one small publishable change")
+    def test_parse_payload_accepts_positional_payload_file_path(self) -> None:
+        with tempfile.TemporaryDirectory(prefix="pushpals-payload-file-positional-") as temp_dir:
+            repo = Path(temp_dir) / "repo"
+            repo.mkdir(parents=True, exist_ok=True)
+            payload = {
+                "kind": "task.execute",
+                "repo": str(repo),
+                "params": {"instruction": "Recover from a direct-worker payload handoff"},
+            }
+            encoded = base64.b64encode(json.dumps(payload).encode("utf-8")).decode("ascii")
+            payload_file = Path(temp_dir) / "payload.b64"
+            payload_file.write_text(encoded, encoding="utf-8")
+            task = parse_task_execute_payload(
+                ["executor", str(payload_file)],
+                logger=Logger("[test]"),
+            )
+            self.assertEqual(task.kind, "task.execute")
+            self.assertEqual(task.repo, str(repo.resolve()))
+            self.assertEqual(task.instruction, "Recover from a direct-worker payload handoff")
+    def test_parse_payload_accepts_unpadded_base64_payload(self) -> None:
+        with tempfile.TemporaryDirectory(prefix="pushpals-payload-unpadded-") as temp_dir:
+            repo = Path(temp_dir) / "repo"
+            repo.mkdir(parents=True, exist_ok=True)
+            payload = {
+                "kind": "task.execute",
+                "repo": str(repo),
+                "params": {"instruction": "Accept wrapper-normalized payload padding"},
+            }
+            encoded = base64.b64encode(json.dumps(payload).encode("utf-8")).decode("ascii")
+            unpadded = encoded.rstrip("=")
+            task = parse_task_execute_payload(["executor", unpadded], logger=Logger("[test]"))
+            self.assertEqual(task.kind, "task.execute")
+            self.assertEqual(task.repo, str(repo.resolve()))
+            self.assertEqual(task.instruction, "Accept wrapper-normalized payload padding")
+    def test_parse_payload_accepts_raw_json_payload(self) -> None:
+        with tempfile.TemporaryDirectory(prefix="pushpals-payload-raw-json-") as temp_dir:
+            repo = Path(temp_dir) / "repo"
+            repo.mkdir(parents=True, exist_ok=True)
+            payload = {
+                "kind": "task.execute",
+                "repo": str(repo),
+                "params": {"instruction": "Accept raw JSON from a recovery wrapper"},
+            }
+            raw_json = json.dumps(payload)
+            task = parse_task_execute_payload(["executor", raw_json], logger=Logger("[test]"))
+            self.assertEqual(task.kind, "task.execute")
+            self.assertEqual(task.repo, str(repo.resolve()))
+            self.assertEqual(task.instruction, "Accept raw JSON from a recovery wrapper")
     def test_parse_payload_prefers_helper_tests_for_visual_derivation_tasks(self) -> None:
         with tempfile.TemporaryDirectory(prefix="pushpals-visual-guidance-") as temp_dir:
             repo = Path(temp_dir) / "repo"
@@ -1091,13 +1149,16 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
                         "",
                         "argv = sys.argv[1:]",
                         "last_message_path = None",
+                        "model = ''",
                         "for index, arg in enumerate(argv):",
                         "    if arg == '--output-last-message' and index + 1 < len(argv):",
                         "        last_message_path = argv[index + 1]",
+                        "    if arg == '-m' and index + 1 < len(argv):",
+                        "        model = argv[index + 1]",
                         "        break",
                         "",
                         "prompt = sys.stdin.read()",
-                        "if 'Codex startup-stall recovery' in prompt:",
+                        "if 'Codex startup-stall recovery' in prompt and model == 'gpt-5.4':",
                         "    Path('src').mkdir(exist_ok=True)",
                         "    Path('src/startup-stall-recovered.txt').write_text('patched after restart\\n', encoding='utf-8')",
                         "    if last_message_path:",
@@ -1119,7 +1180,8 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
                 "OPENAI_API_KEY": "pushpals-startup-stall-test-key",
                 "WORKERPALS_OPENAI_CODEX_JSON": "true",
                 "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
-                "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
+                "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "0",
+                "WORKERPALS_OPENAI_CODEX_STARTUP_STALL_WATCHDOG_S": "1",
                 "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
             }
             with mock.patch.dict(os.environ, env_overrides, clear=False):
@@ -1189,6 +1251,7 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
                 "WORKERPALS_OPENAI_CODEX_JSON": "true",
                 "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
                 "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
+                "WORKERPALS_OPENAI_CODEX_STARTUP_STALL_WATCHDOG_S": "1",
                 "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
             }
             with mock.patch.dict(os.environ, env_overrides, clear=False):
@@ -1284,6 +1347,90 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
         self.assertIn("Patched immediately after no-edit recovery", str(result.get("stdout") or ""))
         self.assertIn("src/", str(result.get("stdout") or ""))
+    def test_run_codex_task_no_edit_watchdog_allows_command_backed_discovery(self) -> None:
+        with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-command-grace-") as temp_dir:
+            repo = Path(temp_dir) / "repo"
+            repo.mkdir(parents=True, exist_ok=True)
+            (repo / "README.md").write_text("# command grace repo\n", encoding="utf-8")
+            subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "config", "user.name", "PushPals Test"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(
+                ["git", "config", "user.email", "pushpals-tests@example.com"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "commit", "-m", "chore: seed command grace repo"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            stub_path = Path(temp_dir) / "fake_codex_no_edit_command_grace.py"
+            stub_path.write_text(
+                "\n".join(
+                    [
+                        "from pathlib import Path",
+                        "import json",
+                        "import sys",
+                        "import time",
+                        "",
+                        "argv = sys.argv[1:]",
+                        "last_message_path = None",
+                        "for index, arg in enumerate(argv):",
+                        "    if arg == '--output-last-message' and index + 1 < len(argv):",
+                        "        last_message_path = argv[index + 1]",
+                        "        break",
+                        "",
+                        "sys.stdin.read()",
+                        "print(json.dumps({'type': 'thread.started'}), flush=True)",
+                        "print(json.dumps({'type': 'turn.started'}), flush=True)",
+                        "print(json.dumps({'type': 'item.started', 'item': {'id': 'cmd-read-target', 'type': 'command_execution', 'command': 'sed -n 1,120p README.md', 'status': 'in_progress'}}), flush=True)",
+                        "time.sleep(1.4)",
+                        "print(json.dumps({'type': 'item.completed', 'item': {'id': 'cmd-read-target', 'type': 'command_execution', 'command': 'sed -n 1,120p README.md', 'status': 'completed', 'exit_code': 0, 'aggregated_output': '# command grace repo'}}), flush=True)",
+                        "time.sleep(1.6)",
+                        "Path('src').mkdir(exist_ok=True)",
+                        "Path('src/command-grace.txt').write_text('patched after command-backed discovery\\n', encoding='utf-8')",
+                        "if last_message_path:",
+                        "    Path(last_message_path).write_text('Patched after command-backed discovery.', encoding='utf-8')",
+                        "print(json.dumps({'type': 'item.completed', 'item': {'type': 'message', 'text': 'Patched after command-backed discovery.'}}), flush=True)",
+                    ]
+                ),
+                encoding="utf-8",
+            )
+            env_overrides = {
+                "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
+                "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
+                "OPENAI_API_KEY": "pushpals-no-edit-command-grace-test-key",
+                "WORKERPALS_OPENAI_CODEX_JSON": "true",
+                "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
+                "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
+                "WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S": "5",
+                "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
+            }
+            with mock.patch.dict(os.environ, env_overrides, clear=False):
+                result = _run_codex_task(
+                    str(repo),
+                    "Add one focused contract assertion after inspecting the hinted test.",
+                    [],
+                )
+        self.assertTrue(result.get("ok"), result)
+        self.assertEqual(result.get("exitCode"), 0)
+        self.assertIn("Patched after command-backed discovery", str(result.get("stdout") or ""))
+        self.assertIn("src/", str(result.get("stdout") or ""))
     def test_run_codex_task_recovery_attempt_is_still_guarded_by_no_edit_watchdog(self) -> None:
         with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-watchdog-fail-") as temp_dir:
             repo = Path(temp_dir) / "repo"
@@ -1587,6 +1734,31 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
         self.assertEqual(watchdog_s, 180)
+    def test_startup_stall_watchdog_allows_slower_first_response_than_no_edit_watchdog(self) -> None:
+        with mock.patch.dict(
+            os.environ,
+            {"WORKERPALS_OPENAI_CODEX_STARTUP_STALL_WATCHDOG_S": ""},
+            clear=False,
+        ):
+            watchdog_s = _resolve_startup_stall_watchdog_seconds(1200)
+            recovery_watchdog_s = _resolve_startup_stall_watchdog_seconds(
+                1200,
+                recovery_attempt=1,
+            )
+        self.assertEqual(watchdog_s, 210)
+        self.assertEqual(recovery_watchdog_s, 150)
+    def test_explicit_startup_stall_watchdog_override_is_bounded(self) -> None:
+        with mock.patch.dict(
+            os.environ,
+            {"WORKERPALS_OPENAI_CODEX_STARTUP_STALL_WATCHDOG_S": "500"},
+            clear=False,
+        ):
+            watchdog_s = _resolve_startup_stall_watchdog_seconds(120)
+        self.assertEqual(watchdog_s, 119)
     def test_narrow_contract_regression_with_required_e2e_uses_fast_no_edit_watchdog(self) -> None:
         prompt = (
             "Harden the opportunity graph contract around autonomous delivery-loop failure signals. "

package/runtime/sandbox/apps/workerpals/src/backends/shared/executor_base.py CHANGED Viewed

@@ -155,14 +155,39 @@ def fail(summary: str, stderr: Optional[str] = None, exit_code: int = 1) -> int:
     return exit_code
-def decode_payload(raw: str) -> Dict[str, Any]:
-    decoded = base64.b64decode(raw).decode("utf-8")
-    payload = json.loads(decoded)
+def _parse_payload_json(raw: str) -> Dict[str, Any]:
+    payload = json.loads(raw)
     if not isinstance(payload, dict):
         raise ValueError("payload must be a JSON object")
     return payload
+def decode_payload(raw: str) -> Dict[str, Any]:
+    stripped = str(raw or "").strip()
+    if not stripped:
+        raise ValueError("empty job payload")
+    # Direct workers normally receive a file-backed base64 payload, but this
+    # parser intentionally accepts the safe adjacent encodings too. That keeps
+    # executor startup resilient if an outer wrapper normalizes padding, uses
+    # url-safe base64, or hands through raw JSON during recovery.
+    if stripped.startswith("{"):
+        return _parse_payload_json(stripped)
+    compact = "".join(stripped.split())
+    padded = compact + ("=" * ((4 - len(compact) % 4) % 4))
+    decode_errors: List[str] = []
+    for decoder in (base64.b64decode, base64.urlsafe_b64decode):
+        try:
+            decoded = decoder(padded).decode("utf-8")
+            return _parse_payload_json(decoded)
+        except Exception as exc:
+            decode_errors.append(str(exc))
+    detail = "; ".join(error for error in decode_errors if error) or "unknown decode error"
+    raise ValueError(f"invalid base64/JSON job payload: {detail}")
 def read_encoded_payload_arg(argv: List[str]) -> str:
     if len(argv) < 2:
         raise ValueError("missing base64 job payload")
@@ -174,6 +199,13 @@ def read_encoded_payload_arg(argv: List[str]) -> str:
         return path.read_text(encoding="utf-8").strip()
     if mode == "--payload-stdin":
         return sys.stdin.read().strip()
+    if len(mode) < 4096:
+        try:
+            path = Path(mode).expanduser()
+            if path.is_file():
+                return path.read_text(encoding="utf-8").strip()
+        except OSError:
+            pass
     return mode

package/runtime/sandbox/apps/workerpals/src/docker_executor.ts CHANGED Viewed

@@ -1918,8 +1918,6 @@ export class DockerExecutor {
   private matchesRetryablePattern(text: string): boolean {
     const transientPatterns: RegExp[] = [
-      /\bstalled before first response\b/i,
-      /\bstartup stall\b/i,
       /warm .*runtime/i,
       /failed to start warm container/i,
       /docker execution error/i,