npm - @pushpalsdev/cli - Versions diffs - 1.1.12 → 1.1.14 - Mend

@pushpalsdev/cli 1.1.12 → 1.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/pushpals-cli.js CHANGED Viewed

@@ -5308,22 +5308,32 @@ function shouldSuppressCliSessionJobLogLine(line) {
     return true;
   if (/^(___RESULT___|__PUSHPALS_OH_RESULT__)\b/.test(text))
     return true;
-  if (/^\[DockerExecutor\]\s+Linked worktree dependency artifact/i.test(text))
+  if (/^\[DockerExecutor\]\s+(?:Linked worktree dependency artifact|Capped job timeout|Extended job timeout)/i.test(text)) {
     return true;
-  if (/^\[OpenAICodexExecutor\]\s+(?:Planner guidance|Codex auth mode|ChatGPT auth mode|Starting codex exec|codex exec finished|Codex JSON stream captured|Codex stdout captured|No reasoning-like|Reasoning-like event|Usage observed|Temporarily masked repo-local)/i.test(text)) {
+  }
+  if (/^\[JobRunner\]\s+Starting job\b/i.test(text))
+    return true;
+  if (/^\[QualityGate\]\s+(?:Policy:|Gates:)/i.test(text))
+    return true;
+  if (/^\[(?:Openai_codex|OpenHands|Miniswe)Executor\]\s+(?:Spawning\b|Timeout reached\b|Still running\b|Process did not exit after graceful timeout termination\b)/i.test(text)) {
+    return true;
+  }
+  if (/^\[OpenAICodexExecutor\]\s+(?:Planner guidance|Codex auth mode|ChatGPT auth mode|Starting codex exec|codex exec finished|Codex JSON stream captured|Codex stdout captured|No reasoning-like|Reasoning-like event|Usage observed|Temporarily masked repo-local|Timeout reached after|Process did not exit after graceful timeout termination)/i.test(text)) {
     return true;
   }
   if (/^\[OpenAICodexExecutor\]\s+codex exec still running\b/i.test(text))
     return true;
+  if (/^\[OpenAICodexExecutor\]\s+\[codex\]\s+(?:No reasoning-like|Reasoning-like|turn\.failed|turn\.completed|error\s+\|)/i.test(text)) {
+    return true;
+  }
   if (/^\[OpenAICodexExecutor\]\s+\[codex\]\s+(?:thread|turn)\.started\b/i.test(text)) {
     return true;
   }
   if (/^\[OpenAICodexExecutor\]\s+\[codex\]\s+item\.started\b/i.test(text))
     return true;
-  if (/^\[OpenAICodexExecutor\]\s+\[codex\]\s+item\.completed\s*$/i.test(text))
-    return true;
-  if (/^\[OpenAICodexExecutor\]\s+\[codex\]\s+item\.updated\s*$/i.test(text))
+  if (/^\[OpenAICodexExecutor\]\s+\[codex\]\s+item\.(?:completed|updated)\b/i.test(text)) {
     return true;
+  }
   if (/^\[OpenAICodexExecutor\]\s+\[stderr\].*codex_core::tools::router: error=exec_command failed/i.test(text)) {
     return true;
   }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pushpalsdev/cli",
-  "version": "1.1.12",
+  "version": "1.1.14",
   "description": "PushPals terminal CLI for LocalBuddy -> RemoteBuddy orchestration",
   "license": "MIT",
   "repository": {

package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py CHANGED Viewed

@@ -103,6 +103,9 @@ _VALID_REASONING_EFFORTS = {"low", "medium", "high", "xhigh"}
 _MAX_WRAPPER_RECOVERY_ATTEMPTS = 2
 _MAX_WRAPPER_BOOTSTRAP_OUTPUT_CHARS = 1_200
 _MAX_WRAPPER_BOOTSTRAP_TOTAL_CHARS = 5_000
+_MAX_NO_EDIT_RECOVERY_ATTEMPTS = 1
+_DEFAULT_NO_EDIT_WATCHDOG_S = 480
+_SMALL_TASK_NO_EDIT_WATCHDOG_S = 360
 def _model_supports_xhigh_reasoning(model: str) -> bool:
@@ -559,12 +562,99 @@ def _resolve_reasoning_effort(config: OpenAICodexRuntimeConfig, model: str = DEF
     return default_effort
+def _looks_like_small_task_prompt(prompt: str) -> bool:
+    text = str(prompt or "").lower()
+    small_markers = (
+        "risk=low",
+        "small scoped",
+        "small or medium repo tasks",
+        "compact",
+        "low-risk",
+        "low risk",
+        "route-entry",
+        "first-entry",
+        "home shell",
+        "startup shell",
+        "shell polish",
+        "visual/affordance",
+    )
+    heavy_markers = (
+        "merge-conflict",
+        "merge conflict",
+        "rebase",
+        "broad refactor",
+        "migration",
+        "security",
+        "architecture",
+        "deep debug",
+    )
+    return any(marker in text for marker in small_markers) and not any(
+        marker in text for marker in heavy_markers
+    )
+def _resolve_task_reasoning_effort(
+    configured_effort: str,
+    prompt: str,
+    model: str = DEFAULT_CODEX_MODEL,
+) -> str:
+    effort = configured_effort if configured_effort in _VALID_REASONING_EFFORTS else "high"
+    if not _looks_like_small_task_prompt(prompt):
+        return effort
+    if effort == "xhigh":
+        log.info(
+            f"Routing compact task on model {model!r} from reasoning_effort='xhigh' to 'high' for faster convergence."
+        )
+        return "high"
+    return effort
 def _resolve_progress_log_interval_seconds(config: OpenAICodexRuntimeConfig) -> int:
     interval = to_int(config.progress_log_interval_s, 30)
     # Avoid noisy logs (<30s) and stale logs (>120s).
     return max(30, min(120, interval))
+def _resolve_no_edit_watchdog_seconds(
+    prompt: str,
+    communicate_timeout_s: Optional[int],
+) -> Optional[int]:
+    if not communicate_timeout_s:
+        return None
+    raw = os.environ.get("WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S", "").strip()
+    if raw:
+        if raw == "0":
+            return None
+        parsed = _to_positive_int(raw)
+        if parsed is None:
+            log.info(
+                f"Invalid WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S={raw!r}; using default no-edit watchdog."
+            )
+        else:
+            return max(1, min(parsed, max(1, communicate_timeout_s - 1)))
+    if communicate_timeout_s < 600:
+        return None
+    default_s = _SMALL_TASK_NO_EDIT_WATCHDOG_S if _looks_like_small_task_prompt(prompt) else _DEFAULT_NO_EDIT_WATCHDOG_S
+    return max(120, min(default_s, max(120, communicate_timeout_s - 60)))
+def _build_no_edit_recovery_guidance(trace_excerpt: str) -> str:
+    lines = [
+        "No-edit watchdog recovery: the previous Codex attempt spent too much of the execution budget without producing publishable file changes.",
+        "Start from the already inspected context. Do not re-read broad repo topology, route wrappers, or missing test infrastructure unless that is the blocker.",
+        "Within the first response/action, edit the smallest behavior-owning file that satisfies the task. If the hinted file is a thin wrapper, patch the owner you already identified.",
+        "Use existing tests or a narrow helper/style assertion; do not create broad React Native mocks or a new full render harness for a compact shell/visual polish task.",
+        "Run at most one focused fast validation check before final diff review; let PushPals ValidationGate own long required/browser validation.",
+    ]
+    if trace_excerpt:
+        lines.append("Previous Codex event trace excerpt:")
+        lines.append(trace_excerpt)
+    return "\n".join(lines)
 def _normalize_auth_mode(raw: str) -> str:
     lowered = (raw or "").strip().lower()
     aliases = {
@@ -1506,6 +1596,7 @@ def _run_codex_task(
     *,
     wrapper_recovery_attempt: int = 0,
     model_compatibility_recovery_attempt: int = 0,
+    no_edit_recovery_attempt: int = 0,
     model_override: Optional[str] = None,
     baseline_changes: Optional[List[str]] = None,
 ) -> Dict[str, Any]:
@@ -1567,10 +1658,14 @@ def _run_codex_task(
     )
     # JSON event output is noisy by default; prefer plain text + output-last-message.
     use_json = runtime_config.json_output
-    reasoning_effort = _resolve_reasoning_effort(runtime_config, model)
     communicate_timeout_s = _resolve_communicate_timeout_seconds(runtime_config)
     effective_supplemental_guidance = _augment_supplemental_guidance(supplemental_guidance)
     prompt = _build_instruction(instruction, effective_supplemental_guidance)
+    reasoning_effort = _resolve_task_reasoning_effort(
+        _resolve_reasoning_effort(runtime_config, model),
+        prompt,
+        model,
+    )
     baseline_snapshot = list(baseline_changes) if baseline_changes is not None else summarize_git_changes(repo)
     with tempfile.TemporaryDirectory(prefix="pushpals-codex-") as tmp_dir:
@@ -1793,7 +1888,18 @@ def _run_codex_task(
             )
             next_progress_at = started_at + float(progress_interval_s)
             timed_out = False
+            no_edit_watchdog_fired = False
             command_policy_rejection_loop = False
+            no_edit_watchdog_s = (
+                _resolve_no_edit_watchdog_seconds(prompt, communicate_timeout_s)
+                if no_edit_recovery_attempt <= _MAX_NO_EDIT_RECOVERY_ATTEMPTS
+                else None
+            )
+            no_edit_deadline = (
+                started_at + float(no_edit_watchdog_s)
+                if no_edit_watchdog_s is not None
+                else None
+            )
             while proc.poll() is None:
                 now = time.monotonic()
@@ -1802,6 +1908,17 @@ def _run_codex_task(
                     _terminate_active_child()
                     break
+                if no_edit_deadline is not None and now >= no_edit_deadline:
+                    _, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
+                    if not effective_paths:
+                        no_edit_watchdog_fired = True
+                        log.info(
+                            f"No-edit watchdog fired after {int(no_edit_watchdog_s or 0)}s with no publishable file changes; retrying with patch-first guidance."
+                        )
+                        _terminate_active_child()
+                        break
+                    no_edit_deadline = None
                 with trace_lock:
                     wrapper_rejections = to_int(wrapper_rejection_state.get("count"), 0)
                 if wrapper_rejections >= 3:
@@ -1869,6 +1986,34 @@ def _run_codex_task(
                         continue
                     rejected_shell_wrappers.append(text)
+        if no_edit_watchdog_fired:
+            if no_edit_recovery_attempt < _MAX_NO_EDIT_RECOVERY_ATTEMPTS:
+                retry_guidance = [
+                    *supplemental_guidance,
+                    _build_no_edit_recovery_guidance(trace_excerpt),
+                ]
+                return _run_codex_task(
+                    repo,
+                    instruction,
+                    retry_guidance,
+                    wrapper_recovery_attempt=wrapper_recovery_attempt,
+                    model_compatibility_recovery_attempt=model_compatibility_recovery_attempt,
+                    no_edit_recovery_attempt=no_edit_recovery_attempt + 1,
+                    model_override=model_override,
+                    baseline_changes=baseline_snapshot,
+                )
+            detail = "Codex spent too much of the execution budget without producing publishable file changes."
+            if trace_excerpt:
+                detail = f"{detail}\n{trace_excerpt}"
+            return {
+                "ok": False,
+                "summary": "openai_codex made no publishable changes before the no-edit watchdog",
+                "stdout": _truncate(stdout),
+                "stderr": _truncate(f"{detail}\n{stderr}".strip()),
+                "exitCode": 124,
+                "usage": usage,
+            }
         if timed_out:
             detail = (
                 f"codex exec timed out after {communicate_timeout_s}s"
@@ -1877,6 +2022,34 @@ def _run_codex_task(
             )
             if trace_excerpt:
                 detail = f"{detail}\n{trace_excerpt}"
+            _, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
+            if effective_paths:
+                last_message = _read_text_if_exists(last_message_path)
+                log_git_status(repo, log)
+                prefix = (
+                    "Codex reached the execution timeout after producing publishable file "
+                    "changes. Returning the partial patch to QualityGate/ValidationGate "
+                    "instead of discarding it; any incomplete edit will be caught by the "
+                    "normal gates or revision loop."
+                )
+                return {
+                    "ok": True,
+                    "summary": (
+                        f"openai_codex timed out after modifying {len(effective_paths)} "
+                        "publishable file(s)"
+                    ),
+                    "stdout": _truncate(
+                        _build_success_stdout(
+                            effective_paths=effective_paths,
+                            last_message=last_message,
+                            trace_excerpt=trace_excerpt,
+                            prefix=prefix,
+                        )
+                    ),
+                    "stderr": _truncate(f"{detail}\n{stderr}".strip()),
+                    "exitCode": 0,
+                    "usage": usage,
+                }
             return {
                 "ok": False,
                 "summary": "openai_codex execution timed out",
@@ -1975,6 +2148,7 @@ def _run_codex_task(
                         ],
                         wrapper_recovery_attempt=wrapper_recovery_attempt + 1,
                         model_compatibility_recovery_attempt=model_compatibility_recovery_attempt,
+                        no_edit_recovery_attempt=no_edit_recovery_attempt,
                         model_override=model_override,
                         baseline_changes=baseline_snapshot,
                     )
@@ -2057,6 +2231,7 @@ def _run_codex_task(
                     effective_supplemental_guidance,
                     wrapper_recovery_attempt=wrapper_recovery_attempt,
                     model_compatibility_recovery_attempt=model_compatibility_recovery_attempt + 1,
+                    no_edit_recovery_attempt=no_edit_recovery_attempt,
                     model_override=LEGACY_CODEX_MODEL_FALLBACK,
                     baseline_changes=baseline_snapshot,
                 )

package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py CHANGED Viewed

@@ -30,6 +30,7 @@ from openai_codex_executor import (
     _build_wrapper_recovery_guidance,
     _run_codex_task,
     _resolve_reasoning_effort,
+    _resolve_task_reasoning_effort,
     _build_instruction,
     _collect_disallowed_shell_wrapper_rejections,
     _codex_changed_paths,
@@ -202,6 +203,24 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
         )
         self.assertEqual(_resolve_reasoning_effort(cfg, model="gpt-6-preview"), "xhigh")
+    def test_task_reasoning_effort_routes_compact_shell_tasks_to_high(self) -> None:
+        prompt = (
+            "Task planning contract from PushPals:\n"
+            "- Planning summary: intent=code_change, risk=low, priority=normal\n"
+            "- Route-entry/shell task rule: inspect the hinted route wrapper, then patch the owner.\n"
+        )
+        self.assertEqual(_resolve_task_reasoning_effort("xhigh", prompt, "gpt-5.5"), "high")
+        self.assertEqual(_resolve_task_reasoning_effort("high", prompt, "gpt-5.5"), "high")
+        self.assertEqual(
+            _resolve_task_reasoning_effort(
+                "xhigh",
+                "Merge-conflict rebase task with risk=low wording in reviewer text.",
+                "gpt-5.5",
+            ),
+            "xhigh",
+        )
     def test_runtime_config_prefers_explicit_config_dir_override(self) -> None:
         import executor_base
@@ -344,6 +363,43 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
             self.assertIn("prefer pure helper/state/style-prop tests", guidance)
             self.assertIn("full React Native/component render regression", guidance)
+    def test_parse_payload_adds_route_shell_convergence_guidance(self) -> None:
+        with tempfile.TemporaryDirectory(prefix="pushpals-shell-guidance-") as temp_dir:
+            repo = Path(temp_dir) / "repo"
+            repo.mkdir(parents=True, exist_ok=True)
+            payload = {
+                "kind": "task.execute",
+                "repo": str(repo),
+                "params": {
+                    "instruction": (
+                        "Polish the first-entry shell. Start with app/_layout.tsx and "
+                        "app/index.tsx, then tighten the home/settings route-entry affordance."
+                    ),
+                    "schemaVersion": 2,
+                    "planning": {
+                        "intent": "code_change",
+                        "riskLevel": "low",
+                        "queuePriority": "normal",
+                        "queueWaitBudgetMs": 90_000,
+                        "executionBudgetMs": 1_200_000,
+                        "finalizationBudgetMs": 120_000,
+                        "scope": {"readAnywhere": True, "writeAllowed": True},
+                        "targetPaths": ["app/_layout.tsx", "app/index.tsx"],
+                        "acceptanceCriteria": ["Home shell feels coherent with the match UI"],
+                    },
+                },
+            }
+            encoded = base64.b64encode(json.dumps(payload).encode("utf-8")).decode("ascii")
+            task = parse_task_execute_payload(["executor", encoded], logger=Logger("[test]"))
+            guidance = "\n".join(task.supplemental_guidance)
+            self.assertIn("Route-entry/shell task rule", guidance)
+            self.assertIn("route is thin", guidance)
+            self.assertIn("Do not keep re-reading navigation topology", guidance)
+            self.assertIn("missing test infrastructure", guidance)
+            self.assertIn("make one small visual/affordance patch", guidance)
     def test_detects_codex_workaround_signals(self) -> None:
         signal = _detect_codex_workaround_signal(
             "Adapting test to avoid external Codex calls because Codex CLI isn't available in this environment.",
@@ -610,6 +666,226 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
         self.assertIn("src/", str(result.get("stdout") or ""))
         self.assertNotIn("Recovered after Codex attempts", str(result.get("stdout") or ""))
+    def test_run_codex_task_hands_changed_worktree_to_gates_after_timeout(self) -> None:
+        with tempfile.TemporaryDirectory(prefix="pushpals-codex-timeout-changed-") as temp_dir:
+            repo = Path(temp_dir) / "repo"
+            repo.mkdir(parents=True, exist_ok=True)
+            (repo / "README.md").write_text("# timeout changed repo\n", encoding="utf-8")
+            subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "config", "user.name", "PushPals Test"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(
+                ["git", "config", "user.email", "pushpals-tests@example.com"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "commit", "-m", "chore: seed timeout changed repo"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            stub_path = Path(temp_dir) / "fake_codex_timeout_changed.py"
+            stub_path.write_text(
+                "\n".join(
+                    [
+                        "from pathlib import Path",
+                        "import sys",
+                        "import time",
+                        "",
+                        "argv = sys.argv[1:]",
+                        "last_message_path = None",
+                        "for index, arg in enumerate(argv):",
+                        "    if arg == '--output-last-message' and index + 1 < len(argv):",
+                        "        last_message_path = argv[index + 1]",
+                        "        break",
+                        "",
+                        "sys.stdin.read()",
+                        "Path('src').mkdir(exist_ok=True)",
+                        "Path('src/timeout-patch.txt').write_text('changed before timeout\\n', encoding='utf-8')",
+                        "if last_message_path:",
+                        "    Path(last_message_path).write_text('Made a small patch before timeout.', encoding='utf-8')",
+                        "print('item.completed | Made a small patch before timeout.', flush=True)",
+                        "time.sleep(5)",
+                    ]
+                ),
+                encoding="utf-8",
+            )
+            env_overrides = {
+                "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
+                "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
+                "OPENAI_API_KEY": "pushpals-timeout-changed-test-key",
+                "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "1",
+                "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
+            }
+            with mock.patch.dict(os.environ, env_overrides, clear=False):
+                result = _run_codex_task(
+                    str(repo),
+                    "Create a small file, then continue thinking too long.",
+                    [],
+                )
+        self.assertTrue(result.get("ok"), result)
+        self.assertEqual(result.get("exitCode"), 0)
+        self.assertIn("timed out after modifying", str(result.get("summary") or ""))
+        self.assertIn("partial patch", str(result.get("stdout") or "").lower())
+        self.assertIn("src/", str(result.get("stdout") or ""))
+        self.assertIn("Made a small patch before timeout", str(result.get("stdout") or ""))
+    def test_run_codex_task_retries_once_when_no_edit_watchdog_fires(self) -> None:
+        with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-watchdog-") as temp_dir:
+            repo = Path(temp_dir) / "repo"
+            repo.mkdir(parents=True, exist_ok=True)
+            (repo / "README.md").write_text("# no edit watchdog repo\n", encoding="utf-8")
+            subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "config", "user.name", "PushPals Test"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(
+                ["git", "config", "user.email", "pushpals-tests@example.com"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "commit", "-m", "chore: seed no-edit watchdog repo"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            stub_path = Path(temp_dir) / "fake_codex_no_edit_watchdog.py"
+            stub_path.write_text(
+                "\n".join(
+                    [
+                        "from pathlib import Path",
+                        "import sys",
+                        "import time",
+                        "",
+                        "argv = sys.argv[1:]",
+                        "last_message_path = None",
+                        "for index, arg in enumerate(argv):",
+                        "    if arg == '--output-last-message' and index + 1 < len(argv):",
+                        "        last_message_path = argv[index + 1]",
+                        "        break",
+                        "",
+                        "prompt = sys.stdin.read()",
+                        "if 'No-edit watchdog recovery' in prompt:",
+                        "    Path('src').mkdir(exist_ok=True)",
+                        "    Path('src/no-edit-retry.txt').write_text('patched on retry\\n', encoding='utf-8')",
+                        "    if last_message_path:",
+                        "        Path(last_message_path).write_text('Patched immediately after no-edit recovery.', encoding='utf-8')",
+                        "    print('item.completed | Patched immediately after no-edit recovery.', flush=True)",
+                        "    sys.exit(0)",
+                        "",
+                        "print('item.completed | Still inspecting route wrappers.', flush=True)",
+                        "time.sleep(10)",
+                    ]
+                ),
+                encoding="utf-8",
+            )
+            env_overrides = {
+                "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
+                "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
+                "OPENAI_API_KEY": "pushpals-no-edit-watchdog-test-key",
+                "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
+                "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
+                "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
+            }
+            with mock.patch.dict(os.environ, env_overrides, clear=False):
+                result = _run_codex_task(
+                    str(repo),
+                    "Polish the first-entry home shell with a compact visual patch.",
+                    [],
+                )
+        self.assertTrue(result.get("ok"), result)
+        self.assertEqual(result.get("exitCode"), 0)
+        self.assertIn("Patched immediately after no-edit recovery", str(result.get("stdout") or ""))
+        self.assertIn("src/", str(result.get("stdout") or ""))
+    def test_run_codex_task_recovery_attempt_is_still_guarded_by_no_edit_watchdog(self) -> None:
+        with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-watchdog-fail-") as temp_dir:
+            repo = Path(temp_dir) / "repo"
+            repo.mkdir(parents=True, exist_ok=True)
+            (repo / "README.md").write_text("# no edit watchdog failure repo\n", encoding="utf-8")
+            subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "config", "user.name", "PushPals Test"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(
+                ["git", "config", "user.email", "pushpals-tests@example.com"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
+            subprocess.run(
+                ["git", "commit", "-m", "chore: seed no-edit watchdog failure repo"],
+                cwd=repo,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            stub_path = Path(temp_dir) / "fake_codex_no_edit_watchdog_fail.py"
+            stub_path.write_text(
+                "\n".join(
+                    [
+                        "import sys",
+                        "import time",
+                        "",
+                        "sys.stdin.read()",
+                        "print('item.completed | Still inspecting, no patch yet.', flush=True)",
+                        "time.sleep(10)",
+                    ]
+                ),
+                encoding="utf-8",
+            )
+            env_overrides = {
+                "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
+                "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
+                "OPENAI_API_KEY": "pushpals-no-edit-watchdog-fail-test-key",
+                "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
+                "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
+                "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
+            }
+            with mock.patch.dict(os.environ, env_overrides, clear=False):
+                result = _run_codex_task(
+                    str(repo),
+                    "Polish the first-entry home shell with a compact visual patch.",
+                    [],
+                )
+        self.assertFalse(result.get("ok"), result)
+        self.assertEqual(result.get("exitCode"), 124)
+        self.assertIn("no publishable changes", str(result.get("summary") or ""))
     def test_codex_changed_paths_filters_dependency_artifacts_from_publishable_delta(self) -> None:
         with tempfile.TemporaryDirectory(prefix="pushpals-codex-artifact-delta-") as temp_dir:
             repo = Path(temp_dir) / "repo"

package/runtime/sandbox/apps/workerpals/src/backends/shared/executor_base.py CHANGED Viewed

@@ -787,14 +787,58 @@ def _looks_like_visual_derivation_task(params: Dict[str, Any]) -> bool:
     return any(marker in text for marker in visual_markers)
+def _looks_like_route_shell_task(params: Dict[str, Any]) -> bool:
+    text = _joined_task_text(params)
+    shell_markers = (
+        "route-entry",
+        "route entry",
+        "first-entry",
+        "first entry",
+        "startup shell",
+        "home shell",
+        "entry route",
+        "shell/navigation",
+        "app/_layout",
+        "app/index",
+        "homescreen",
+        "home screen",
+        "settingsscreen",
+        "settings screen",
+        "shopscreen",
+        "shop screen",
+        "help",
+        "game-over",
+        "game over",
+        "match-start",
+        "match start",
+        "return affordance",
+    )
+    return any(marker in text for marker in shell_markers)
 def _build_efficiency_guidance(params: Dict[str, Any]) -> str:
     lines: List[str] = [
         "Worker speed/convergence contract from PushPals:",
         "- Target useful completion in roughly 20 minutes for small or medium repo tasks; optimize for the smallest coherent patch over exhaustive exploration.",
-        "- Phase soft budgets: discovery <= 5m, editing <= 10m, focused validation <= 5m, final diff review <= 2m. If a phase runs long, narrow scope rather than expanding the harness.",
-        "- Test-harness soft budget: if setting up a focused test requires multiple new shared mocks, broad React Native shims, or repeated import fixes, stop building that harness and switch to smaller pure helper/state coverage.",
+        "- Phase soft budgets: discovery <= 3m for small scoped tasks and <= 5m otherwise, editing <= 10m, focused validation <= 5m, final diff review <= 2m. If a phase runs long, narrow scope rather than expanding the harness.",
+        "- No-edit checkpoint: if you have not made a patch after identifying the behavior-owning file, stop discovering and edit that file now. Do not spend the execution budget proving every adjacent assumption first.",
+        "- Discovery command budget: for compact tasks, use at most 5-8 targeted read/search commands before editing. If that is not enough, state the blocker and patch the best behavior owner rather than widening discovery.",
     ]
-    if _looks_like_visual_derivation_task(params):
+    route_shell_task = _looks_like_route_shell_task(params)
+    visual_task = _looks_like_visual_derivation_task(params)
+    if route_shell_task or visual_task:
+        lines.append(
+            "- Test-harness soft budget: if setting up a focused test requires multiple new shared mocks, broad React Native shims, or repeated import fixes, stop building that harness and switch to smaller pure helper/state/style coverage.",
+        )
+    if route_shell_task:
+        lines.extend(
+            [
+                "- Route-entry/shell task rule: inspect the hinted route wrapper, then move immediately to the behavior-owning shell component when the route is thin. Do not keep re-reading navigation topology once the owner is found.",
+                "- Compact shell polish rule: make one small visual/affordance patch before chasing missing test infrastructure. If a referenced React Native mock or app/__tests__ path is absent, use existing nearby tests or a focused style/helper assertion instead of creating a broad render harness.",
+                "- Shell task deadline: by the first clear owner hypothesis, choose the home/settings/shop/help/game-over surface and patch it; ValidationGate can run long browser checks after your focused validation.",
+            ]
+        )
+    if visual_task:
         lines.extend(
             [
                 "- Visual/rendering task rule: prefer pure helper/state/style-prop tests for derived visual cues. Use a full React Native/component render regression only if the repo already has a stable harness for that exact surface.",
@@ -809,6 +853,7 @@ def _build_planning_guidance(params: Dict[str, Any]) -> str:
     if not isinstance(planning, dict):
         return ""
+    compact_task = _looks_like_route_shell_task(params) or _looks_like_visual_derivation_task(params)
     lines: List[str] = ["Task planning contract from PushPals:"]
     intent = to_single_line(planning.get("intent"), 80)
     risk = to_single_line(planning.get("riskLevel"), 80)
@@ -856,17 +901,29 @@ def _build_planning_guidance(params: Dict[str, Any]) -> str:
         forbidden = _string_list(scope.get("forbiddenGlobs"), limit=8)
         _append_list_guidance(lines, "Forbidden path hints", forbidden)
-    _append_list_guidance(lines, "Target path hints", _string_list(planning.get("targetPaths"), limit=12))
+    _append_list_guidance(
+        lines,
+        "Target path hints",
+        _string_list(planning.get("targetPaths"), limit=6 if compact_task else 12),
+    )
     discovery = planning.get("discovery")
     if isinstance(discovery, dict):
         _append_list_guidance(
             lines,
             "Suggested discovery commands",
-            _string_list(discovery.get("ripgrepQueries"), limit=8),
+            _string_list(discovery.get("ripgrepQueries"), limit=4 if compact_task else 8),
+        )
+        _append_list_guidance(
+            lines,
+            "Likely directories",
+            _string_list(discovery.get("likelyDirs"), limit=4 if compact_task else 8),
+        )
+        _append_list_guidance(
+            lines,
+            "Search keywords",
+            _string_list(discovery.get("keywords"), limit=8 if compact_task else 12),
         )
-        _append_list_guidance(lines, "Likely directories", _string_list(discovery.get("likelyDirs"), limit=8))
-        _append_list_guidance(lines, "Search keywords", _string_list(discovery.get("keywords"), limit=12))
     _append_list_guidance(
         lines,

package/runtime/sandbox/apps/workerpals/src/common/generic_python_executor.ts CHANGED Viewed

@@ -28,6 +28,8 @@ interface GenericPythonExecutorConfig {
   capTimeoutToExecutionBudget?: boolean;
 }
+const BACKEND_TIMEOUT_RESULT_GRACE_MS = 30_000;
 function estimateTokensFromText(text: string): number {
   return Math.max(0, Math.ceil(String(text ?? "").length / 3));
 }
@@ -123,6 +125,7 @@ function resolveRuntimeSettings(
 export function resolveGenericPythonExecutorTimeoutMs(params: {
   configuredTimeoutMs: number;
   executionBudgetMs?: number | null;
+  finalizationBudgetMs?: number | null;
   capTimeoutToExecutionBudget?: boolean;
 }): number {
   const configuredTimeoutMs = Math.max(10_000, Math.floor(params.configuredTimeoutMs));
@@ -130,12 +133,49 @@ export function resolveGenericPythonExecutorTimeoutMs(params: {
     typeof params.executionBudgetMs === "number" && Number.isFinite(params.executionBudgetMs)
       ? Math.max(10_000, Math.floor(params.executionBudgetMs))
       : null;
+  const finalizationBudgetMs =
+    typeof params.finalizationBudgetMs === "number" && Number.isFinite(params.finalizationBudgetMs)
+      ? Math.max(0, Math.floor(params.finalizationBudgetMs))
+      : 0;
   if (executionBudgetMs != null && params.capTimeoutToExecutionBudget !== false) {
-    return Math.min(configuredTimeoutMs, executionBudgetMs);
+    return Math.min(configuredTimeoutMs, executionBudgetMs + finalizationBudgetMs);
   }
   return configuredTimeoutMs;
 }
+export function resolveGenericPythonExecutorChildTimeoutMs(params: {
+  backendName: string;
+  hostTimeoutMs: number;
+  executionBudgetMs?: number | null;
+}): number | null {
+  const hostTimeoutMs = Math.max(10_000, Math.floor(params.hostTimeoutMs));
+  if (params.backendName !== "openai_codex") return null;
+  const executionBudgetMs =
+    typeof params.executionBudgetMs === "number" && Number.isFinite(params.executionBudgetMs)
+      ? Math.max(10_000, Math.floor(params.executionBudgetMs))
+      : null;
+  const childBudgetMs =
+    executionBudgetMs == null ? hostTimeoutMs : Math.min(hostTimeoutMs, executionBudgetMs);
+  const graceMs = Math.min(
+    BACKEND_TIMEOUT_RESULT_GRACE_MS,
+    Math.max(2_000, Math.floor(childBudgetMs / 10)),
+  );
+  return Math.max(1_000, childBudgetMs - graceMs);
+}
+export function resolveGenericPythonExecutorChildTimeoutEnv(params: {
+  backendName: string;
+  hostTimeoutMs: number;
+  executionBudgetMs?: number | null;
+}): Record<string, string> {
+  const childTimeoutMs = resolveGenericPythonExecutorChildTimeoutMs(params);
+  if (childTimeoutMs == null) return {};
+  return {
+    WORKERPALS_OPENAI_CODEX_TIMEOUT_MS: String(childTimeoutMs),
+    WORKERPALS_OPENAI_CODEX_TIMEOUT_S: String(Math.max(1, Math.floor(childTimeoutMs / 1000))),
+  };
+}
 function toSnakeConfigKey(key: string): string {
   return key.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);
 }
@@ -144,6 +184,7 @@ function formatGenericPythonExecutorTimeoutDetail(
   config: GenericPythonExecutorConfig,
   configuredTimeoutMs: number,
   executionBudgetMs: number | null,
+  finalizationBudgetMs: number | null,
   timeoutMs: number,
 ): string {
   const configPath = `workerpals.${toSnakeConfigKey(config.timeoutConfigKey)}`;
@@ -154,7 +195,11 @@ function formatGenericPythonExecutorTimeoutDetail(
     return `${configPath}=${configuredTimeoutMs}ms; planning executionBudgetMs=${executionBudgetMs}ms ignored by backend opt-out`;
   }
   if (timeoutMs < configuredTimeoutMs) {
-    return `${configPath}=${configuredTimeoutMs}ms capped by planning executionBudgetMs=${executionBudgetMs}ms`;
+    const finalizationDetail =
+      finalizationBudgetMs && finalizationBudgetMs > 0
+        ? ` + finalizationBudgetMs=${finalizationBudgetMs}ms`
+        : "";
+    return `${configPath}=${configuredTimeoutMs}ms capped by planning executionBudgetMs=${executionBudgetMs}ms${finalizationDetail}`;
   }
   return `${configPath}=${configuredTimeoutMs}ms within planning executionBudgetMs=${executionBudgetMs}ms`;
 }
@@ -190,15 +235,21 @@ export function createGenericPythonExecutor(
       typeof budgets?.executionBudgetMs === "number" && Number.isFinite(budgets.executionBudgetMs)
         ? Math.max(10_000, Math.floor(budgets.executionBudgetMs))
         : null;
+    const finalizationBudgetMs =
+      typeof budgets?.finalizationBudgetMs === "number" && Number.isFinite(budgets.finalizationBudgetMs)
+        ? Math.max(0, Math.floor(budgets.finalizationBudgetMs))
+        : null;
     const timeoutMs = resolveGenericPythonExecutorTimeoutMs({
       configuredTimeoutMs,
       executionBudgetMs,
+      finalizationBudgetMs,
       capTimeoutToExecutionBudget: config.capTimeoutToExecutionBudget,
     });
     const timeoutDetail = formatGenericPythonExecutorTimeoutDetail(
       config,
       configuredTimeoutMs,
       executionBudgetMs,
+      finalizationBudgetMs,
       timeoutMs,
     );
     const payloadBase64 = Buffer.from(
@@ -210,6 +261,11 @@ export function createGenericPythonExecutor(
       "utf-8",
     ).toString("base64");
     const args = [pythonBin, scriptPath, payloadBase64];
+    const childTimeoutEnv = resolveGenericPythonExecutorChildTimeoutEnv({
+      backendName,
+      hostTimeoutMs: timeoutMs,
+      executionBudgetMs,
+    });
     onLog?.(
       "stdout",
@@ -229,6 +285,7 @@ export function createGenericPythonExecutor(
         stderr: "pipe",
         env: {
           ...buildWorkerSandboxWritableEnv(repo),
+          ...childTimeoutEnv,
           PUSHPALS_REPO_PATH: repo,
           PUSHPALS_ASSIGNED_REPO_ROOT: repo,
           PYTHONIOENCODING: "utf-8",

package/runtime/sandbox/apps/workerpals/src/execute_job.ts CHANGED Viewed

@@ -1145,6 +1145,21 @@ export function isLongRunningBrowserValidationCommand(command: string): boolean
   );
 }
+export function isParallelSafeFastValidationCommand(repo: string, command: string): boolean {
+  if (isLongRunningBrowserValidationCommand(command)) return false;
+  if (shouldEnsurePlaywrightBrowserRuntime(repo, command)) return false;
+  const tokens = tokenizeValidationCommandArgv(command);
+  if (!tokens || tokens.length === 0) return false;
+  const lower = tokens.map((token) => token.toLowerCase());
+  if (lower[0] !== "bun") return false;
+  if (lower[1] === "test") return true;
+  if (lower[1] === "x" && lower[2] === "tsc") return true;
+  if (lower[1] === "run" && ["lint", "typecheck", "test", "test:unit"].includes(lower[2] ?? "")) {
+    return true;
+  }
+  return false;
+}
 function readPackageJson(repo: string): {
   scripts?: Record<string, unknown>;
   dependencies?: Record<string, unknown>;
@@ -3116,7 +3131,71 @@ async function runDeterministicQualityGate(
       );
     }
     const playwrightBrowserRuntimeReadyTargets = new Set<string>();
-    for (const command of commandsToRun) {
+    for (let commandIndex = 0; commandIndex < commandsToRun.length; ) {
+      const parallelBatch: string[] = [];
+      while (
+        commandIndex + parallelBatch.length < commandsToRun.length &&
+        parallelBatch.length < 3
+      ) {
+        const candidate = commandsToRun[commandIndex + parallelBatch.length];
+        if (!isParallelSafeFastValidationCommand(repo, candidate)) break;
+        parallelBatch.push(candidate);
+      }
+      if (parallelBatch.length > 1) {
+        onLog?.(
+          "stdout",
+          `[ValidationGate] Running fast validation batch in parallel: ${parallelBatch.join(" | ")}`,
+        );
+        const batchRuns = await Promise.all(
+          parallelBatch.map(async (command) => {
+            const commandMissingTools = requirementsForValidationCommand(
+              toolchainPlan,
+              command,
+            ).filter((requirement) =>
+              missingToolRequirements.some((missing) => missing.tool === requirement.tool),
+            );
+            if (commandMissingTools.length > 0) {
+              const stderr = `Validation skipped before execution because required tool(s) are missing: ${formatMissingToolRequirements(
+                commandMissingTools,
+              )}.`;
+              return {
+                run: {
+                  step: command,
+                  command,
+                  ok: false,
+                  exitCode: 127,
+                  stdout: "",
+                  stderr,
+                  elapsedMs: 1,
+                } satisfies ValidationExecutionResult,
+                stream: "stderr" as const,
+                summary: `[ValidationGate] Validation skipped (missing toolchain): ${command}`,
+              };
+            }
+            const run = await runValidationCommand(
+              repo,
+              command,
+              resolveValidationCommandTimeoutMs(command, qualityValidationStepTimeoutMs),
+              outputPolicy,
+            );
+            const digest = run.ok ? "" : extractValidationFailureDigest(run);
+            return {
+              run,
+              stream: (run.ok ? "stdout" : "stderr") as "stdout" | "stderr",
+              summary: `[ValidationGate] ${run.ok ? "Passed" : "Failed"} (${run.elapsedMs}ms, exit ${run.exitCode}): ${command}${digest ? ` - ${digest}` : ""}`,
+            };
+          }),
+        );
+        for (const { run, stream, summary } of batchRuns) {
+          validationRuns.push(run);
+          onLog?.(stream, summary);
+        }
+        commandIndex += parallelBatch.length;
+        continue;
+      }
+      const command = commandsToRun[commandIndex];
+      commandIndex += 1;
       const commandMissingTools = requirementsForValidationCommand(toolchainPlan, command).filter(
         (requirement) =>
           missingToolRequirements.some((missing) => missing.tool === requirement.tool),
@@ -6665,6 +6744,7 @@ export async function executeJob(
   const previousValidationFailureDigests = new Map<string, string>();
   const failureJobFamily = buildTaskFailureJobFamily(normalizedParams);
   while (revisionAttempt <= qualityRevisionLoopMax) {
+    const attemptStartedAt = Date.now();
     const attemptParams: Record<string, unknown> = { ...normalizedParams };
     if (revisionHint) {
       attemptParams.qualityRevisionHint = revisionHint;
@@ -6683,6 +6763,7 @@ export async function executeJob(
     }
     let result: Awaited<ReturnType<typeof runExecutor>> | null = null;
     let mergeConflictPass = 0;
+    let executorElapsedMs = 0;
     while (true) {
       const currentResult = await runExecutor(
         kind,
@@ -6751,6 +6832,7 @@ export async function executeJob(
         exitCode: 4,
       };
     }
+    executorElapsedMs = Date.now() - attemptStartedAt;
     const preQualityStatus = await git(repo, ["status", "--porcelain"]);
     const preQualityChangedPaths = preQualityStatus.ok
@@ -6799,6 +6881,7 @@ export async function executeJob(
       };
     }
+    const qualityStartedAt = Date.now();
     const quality = await runDeterministicQualityGate(
       repo,
       attemptParams,
@@ -6810,6 +6893,15 @@ export async function executeJob(
         revisionAttempt,
       },
     );
+    const qualityElapsedMs = Date.now() - qualityStartedAt;
+    const validationCommandElapsedMs = quality.validationRuns.reduce(
+      (total, run) => total + Math.max(0, Number(run.elapsedMs) || 0),
+      0,
+    );
+    onLog?.(
+      "stdout",
+      `[JobRunner] Performance summary: attempt=${revisionAttempt}, executor=${executorElapsedMs}ms, quality=${qualityElapsedMs}ms, validation_commands=${quality.validationRuns.length}, validation_command_time=${validationCommandElapsedMs}ms, changed_files=${quality.changedPaths.length}`,
+    );
     let browserRepairPacket = buildBrowserValidationRepairPacket(
       quality.validationRuns,
       previousValidationFailureDigests,