@pushpalsdev/cli 1.1.11 → 1.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -103,6 +103,9 @@ _VALID_REASONING_EFFORTS = {"low", "medium", "high", "xhigh"}
103
103
  _MAX_WRAPPER_RECOVERY_ATTEMPTS = 2
104
104
  _MAX_WRAPPER_BOOTSTRAP_OUTPUT_CHARS = 1_200
105
105
  _MAX_WRAPPER_BOOTSTRAP_TOTAL_CHARS = 5_000
106
+ _MAX_NO_EDIT_RECOVERY_ATTEMPTS = 1
107
+ _DEFAULT_NO_EDIT_WATCHDOG_S = 480
108
+ _SMALL_TASK_NO_EDIT_WATCHDOG_S = 360
106
109
 
107
110
 
108
111
  def _model_supports_xhigh_reasoning(model: str) -> bool:
@@ -559,12 +562,99 @@ def _resolve_reasoning_effort(config: OpenAICodexRuntimeConfig, model: str = DEF
559
562
  return default_effort
560
563
 
561
564
 
565
+ def _looks_like_small_task_prompt(prompt: str) -> bool:
566
+ text = str(prompt or "").lower()
567
+ small_markers = (
568
+ "risk=low",
569
+ "small scoped",
570
+ "small or medium repo tasks",
571
+ "compact",
572
+ "low-risk",
573
+ "low risk",
574
+ "route-entry",
575
+ "first-entry",
576
+ "home shell",
577
+ "startup shell",
578
+ "shell polish",
579
+ "visual/affordance",
580
+ )
581
+ heavy_markers = (
582
+ "merge-conflict",
583
+ "merge conflict",
584
+ "rebase",
585
+ "broad refactor",
586
+ "migration",
587
+ "security",
588
+ "architecture",
589
+ "deep debug",
590
+ )
591
+ return any(marker in text for marker in small_markers) and not any(
592
+ marker in text for marker in heavy_markers
593
+ )
594
+
595
+
596
+ def _resolve_task_reasoning_effort(
597
+ configured_effort: str,
598
+ prompt: str,
599
+ model: str = DEFAULT_CODEX_MODEL,
600
+ ) -> str:
601
+ effort = configured_effort if configured_effort in _VALID_REASONING_EFFORTS else "high"
602
+ if not _looks_like_small_task_prompt(prompt):
603
+ return effort
604
+ if effort == "xhigh":
605
+ log.info(
606
+ f"Routing compact task on model {model!r} from reasoning_effort='xhigh' to 'high' for faster convergence."
607
+ )
608
+ return "high"
609
+ return effort
610
+
611
+
562
612
  def _resolve_progress_log_interval_seconds(config: OpenAICodexRuntimeConfig) -> int:
563
613
  interval = to_int(config.progress_log_interval_s, 30)
564
614
  # Avoid noisy logs (<30s) and stale logs (>120s).
565
615
  return max(30, min(120, interval))
566
616
 
567
617
 
618
+ def _resolve_no_edit_watchdog_seconds(
619
+ prompt: str,
620
+ communicate_timeout_s: Optional[int],
621
+ ) -> Optional[int]:
622
+ if not communicate_timeout_s:
623
+ return None
624
+
625
+ raw = os.environ.get("WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S", "").strip()
626
+ if raw:
627
+ if raw == "0":
628
+ return None
629
+ parsed = _to_positive_int(raw)
630
+ if parsed is None:
631
+ log.info(
632
+ f"Invalid WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S={raw!r}; using default no-edit watchdog."
633
+ )
634
+ else:
635
+ return max(1, min(parsed, max(1, communicate_timeout_s - 1)))
636
+
637
+ if communicate_timeout_s < 600:
638
+ return None
639
+
640
+ default_s = _SMALL_TASK_NO_EDIT_WATCHDOG_S if _looks_like_small_task_prompt(prompt) else _DEFAULT_NO_EDIT_WATCHDOG_S
641
+ return max(120, min(default_s, max(120, communicate_timeout_s - 60)))
642
+
643
+
644
+ def _build_no_edit_recovery_guidance(trace_excerpt: str) -> str:
645
+ lines = [
646
+ "No-edit watchdog recovery: the previous Codex attempt spent too much of the execution budget without producing publishable file changes.",
647
+ "Start from the already inspected context. Do not re-read broad repo topology, route wrappers, or missing test infrastructure unless that is the blocker.",
648
+ "Within the first response/action, edit the smallest behavior-owning file that satisfies the task. If the hinted file is a thin wrapper, patch the owner you already identified.",
649
+ "Use existing tests or a narrow helper/style assertion; do not create broad React Native mocks or a new full render harness for a compact shell/visual polish task.",
650
+ "Run at most one focused fast validation check before final diff review; let PushPals ValidationGate own long required/browser validation.",
651
+ ]
652
+ if trace_excerpt:
653
+ lines.append("Previous Codex event trace excerpt:")
654
+ lines.append(trace_excerpt)
655
+ return "\n".join(lines)
656
+
657
+
568
658
  def _normalize_auth_mode(raw: str) -> str:
569
659
  lowered = (raw or "").strip().lower()
570
660
  aliases = {
@@ -1506,6 +1596,7 @@ def _run_codex_task(
1506
1596
  *,
1507
1597
  wrapper_recovery_attempt: int = 0,
1508
1598
  model_compatibility_recovery_attempt: int = 0,
1599
+ no_edit_recovery_attempt: int = 0,
1509
1600
  model_override: Optional[str] = None,
1510
1601
  baseline_changes: Optional[List[str]] = None,
1511
1602
  ) -> Dict[str, Any]:
@@ -1567,10 +1658,14 @@ def _run_codex_task(
1567
1658
  )
1568
1659
  # JSON event output is noisy by default; prefer plain text + output-last-message.
1569
1660
  use_json = runtime_config.json_output
1570
- reasoning_effort = _resolve_reasoning_effort(runtime_config, model)
1571
1661
  communicate_timeout_s = _resolve_communicate_timeout_seconds(runtime_config)
1572
1662
  effective_supplemental_guidance = _augment_supplemental_guidance(supplemental_guidance)
1573
1663
  prompt = _build_instruction(instruction, effective_supplemental_guidance)
1664
+ reasoning_effort = _resolve_task_reasoning_effort(
1665
+ _resolve_reasoning_effort(runtime_config, model),
1666
+ prompt,
1667
+ model,
1668
+ )
1574
1669
  baseline_snapshot = list(baseline_changes) if baseline_changes is not None else summarize_git_changes(repo)
1575
1670
 
1576
1671
  with tempfile.TemporaryDirectory(prefix="pushpals-codex-") as tmp_dir:
@@ -1793,7 +1888,18 @@ def _run_codex_task(
1793
1888
  )
1794
1889
  next_progress_at = started_at + float(progress_interval_s)
1795
1890
  timed_out = False
1891
+ no_edit_watchdog_fired = False
1796
1892
  command_policy_rejection_loop = False
1893
+ no_edit_watchdog_s = (
1894
+ _resolve_no_edit_watchdog_seconds(prompt, communicate_timeout_s)
1895
+ if no_edit_recovery_attempt < _MAX_NO_EDIT_RECOVERY_ATTEMPTS
1896
+ else None
1897
+ )
1898
+ no_edit_deadline = (
1899
+ started_at + float(no_edit_watchdog_s)
1900
+ if no_edit_watchdog_s is not None
1901
+ else None
1902
+ )
1797
1903
 
1798
1904
  while proc.poll() is None:
1799
1905
  now = time.monotonic()
@@ -1802,6 +1908,17 @@ def _run_codex_task(
1802
1908
  _terminate_active_child()
1803
1909
  break
1804
1910
 
1911
+ if no_edit_deadline is not None and now >= no_edit_deadline:
1912
+ _, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
1913
+ if not effective_paths:
1914
+ no_edit_watchdog_fired = True
1915
+ log.info(
1916
+ f"No-edit watchdog fired after {int(no_edit_watchdog_s or 0)}s with no publishable file changes; retrying with patch-first guidance."
1917
+ )
1918
+ _terminate_active_child()
1919
+ break
1920
+ no_edit_deadline = None
1921
+
1805
1922
  with trace_lock:
1806
1923
  wrapper_rejections = to_int(wrapper_rejection_state.get("count"), 0)
1807
1924
  if wrapper_rejections >= 3:
@@ -1869,6 +1986,34 @@ def _run_codex_task(
1869
1986
  continue
1870
1987
  rejected_shell_wrappers.append(text)
1871
1988
 
1989
+ if no_edit_watchdog_fired:
1990
+ if no_edit_recovery_attempt < _MAX_NO_EDIT_RECOVERY_ATTEMPTS:
1991
+ retry_guidance = [
1992
+ *supplemental_guidance,
1993
+ _build_no_edit_recovery_guidance(trace_excerpt),
1994
+ ]
1995
+ return _run_codex_task(
1996
+ repo,
1997
+ instruction,
1998
+ retry_guidance,
1999
+ wrapper_recovery_attempt=wrapper_recovery_attempt,
2000
+ model_compatibility_recovery_attempt=model_compatibility_recovery_attempt,
2001
+ no_edit_recovery_attempt=no_edit_recovery_attempt + 1,
2002
+ model_override=model_override,
2003
+ baseline_changes=baseline_snapshot,
2004
+ )
2005
+ detail = "Codex spent too much of the execution budget without producing publishable file changes."
2006
+ if trace_excerpt:
2007
+ detail = f"{detail}\n{trace_excerpt}"
2008
+ return {
2009
+ "ok": False,
2010
+ "summary": "openai_codex made no publishable changes before the no-edit watchdog",
2011
+ "stdout": _truncate(stdout),
2012
+ "stderr": _truncate(f"{detail}\n{stderr}".strip()),
2013
+ "exitCode": 124,
2014
+ "usage": usage,
2015
+ }
2016
+
1872
2017
  if timed_out:
1873
2018
  detail = (
1874
2019
  f"codex exec timed out after {communicate_timeout_s}s"
@@ -1877,6 +2022,34 @@ def _run_codex_task(
1877
2022
  )
1878
2023
  if trace_excerpt:
1879
2024
  detail = f"{detail}\n{trace_excerpt}"
2025
+ _, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
2026
+ if effective_paths:
2027
+ last_message = _read_text_if_exists(last_message_path)
2028
+ log_git_status(repo, log)
2029
+ prefix = (
2030
+ "Codex reached the execution timeout after producing publishable file "
2031
+ "changes. Returning the partial patch to QualityGate/ValidationGate "
2032
+ "instead of discarding it; any incomplete edit will be caught by the "
2033
+ "normal gates or revision loop."
2034
+ )
2035
+ return {
2036
+ "ok": True,
2037
+ "summary": (
2038
+ f"openai_codex timed out after modifying {len(effective_paths)} "
2039
+ "publishable file(s)"
2040
+ ),
2041
+ "stdout": _truncate(
2042
+ _build_success_stdout(
2043
+ effective_paths=effective_paths,
2044
+ last_message=last_message,
2045
+ trace_excerpt=trace_excerpt,
2046
+ prefix=prefix,
2047
+ )
2048
+ ),
2049
+ "stderr": _truncate(f"{detail}\n{stderr}".strip()),
2050
+ "exitCode": 0,
2051
+ "usage": usage,
2052
+ }
1880
2053
  return {
1881
2054
  "ok": False,
1882
2055
  "summary": "openai_codex execution timed out",
@@ -1975,6 +2148,7 @@ def _run_codex_task(
1975
2148
  ],
1976
2149
  wrapper_recovery_attempt=wrapper_recovery_attempt + 1,
1977
2150
  model_compatibility_recovery_attempt=model_compatibility_recovery_attempt,
2151
+ no_edit_recovery_attempt=no_edit_recovery_attempt,
1978
2152
  model_override=model_override,
1979
2153
  baseline_changes=baseline_snapshot,
1980
2154
  )
@@ -2057,6 +2231,7 @@ def _run_codex_task(
2057
2231
  effective_supplemental_guidance,
2058
2232
  wrapper_recovery_attempt=wrapper_recovery_attempt,
2059
2233
  model_compatibility_recovery_attempt=model_compatibility_recovery_attempt + 1,
2234
+ no_edit_recovery_attempt=no_edit_recovery_attempt,
2060
2235
  model_override=LEGACY_CODEX_MODEL_FALLBACK,
2061
2236
  baseline_changes=baseline_snapshot,
2062
2237
  )
@@ -30,6 +30,7 @@ from openai_codex_executor import (
30
30
  _build_wrapper_recovery_guidance,
31
31
  _run_codex_task,
32
32
  _resolve_reasoning_effort,
33
+ _resolve_task_reasoning_effort,
33
34
  _build_instruction,
34
35
  _collect_disallowed_shell_wrapper_rejections,
35
36
  _codex_changed_paths,
@@ -202,6 +203,24 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
202
203
  )
203
204
  self.assertEqual(_resolve_reasoning_effort(cfg, model="gpt-6-preview"), "xhigh")
204
205
 
206
+ def test_task_reasoning_effort_routes_compact_shell_tasks_to_high(self) -> None:
207
+ prompt = (
208
+ "Task planning contract from PushPals:\n"
209
+ "- Planning summary: intent=code_change, risk=low, priority=normal\n"
210
+ "- Route-entry/shell task rule: inspect the hinted route wrapper, then patch the owner.\n"
211
+ )
212
+
213
+ self.assertEqual(_resolve_task_reasoning_effort("xhigh", prompt, "gpt-5.5"), "high")
214
+ self.assertEqual(_resolve_task_reasoning_effort("high", prompt, "gpt-5.5"), "high")
215
+ self.assertEqual(
216
+ _resolve_task_reasoning_effort(
217
+ "xhigh",
218
+ "Merge-conflict rebase task with risk=low wording in reviewer text.",
219
+ "gpt-5.5",
220
+ ),
221
+ "xhigh",
222
+ )
223
+
205
224
  def test_runtime_config_prefers_explicit_config_dir_override(self) -> None:
206
225
  import executor_base
207
226
 
@@ -344,6 +363,43 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
344
363
  self.assertIn("prefer pure helper/state/style-prop tests", guidance)
345
364
  self.assertIn("full React Native/component render regression", guidance)
346
365
 
366
+ def test_parse_payload_adds_route_shell_convergence_guidance(self) -> None:
367
+ with tempfile.TemporaryDirectory(prefix="pushpals-shell-guidance-") as temp_dir:
368
+ repo = Path(temp_dir) / "repo"
369
+ repo.mkdir(parents=True, exist_ok=True)
370
+ payload = {
371
+ "kind": "task.execute",
372
+ "repo": str(repo),
373
+ "params": {
374
+ "instruction": (
375
+ "Polish the first-entry shell. Start with app/_layout.tsx and "
376
+ "app/index.tsx, then tighten the home/settings route-entry affordance."
377
+ ),
378
+ "schemaVersion": 2,
379
+ "planning": {
380
+ "intent": "code_change",
381
+ "riskLevel": "low",
382
+ "queuePriority": "normal",
383
+ "queueWaitBudgetMs": 90_000,
384
+ "executionBudgetMs": 1_200_000,
385
+ "finalizationBudgetMs": 120_000,
386
+ "scope": {"readAnywhere": True, "writeAllowed": True},
387
+ "targetPaths": ["app/_layout.tsx", "app/index.tsx"],
388
+ "acceptanceCriteria": ["Home shell feels coherent with the match UI"],
389
+ },
390
+ },
391
+ }
392
+ encoded = base64.b64encode(json.dumps(payload).encode("utf-8")).decode("ascii")
393
+
394
+ task = parse_task_execute_payload(["executor", encoded], logger=Logger("[test]"))
395
+ guidance = "\n".join(task.supplemental_guidance)
396
+
397
+ self.assertIn("Route-entry/shell task rule", guidance)
398
+ self.assertIn("route is thin", guidance)
399
+ self.assertIn("Do not keep re-reading navigation topology", guidance)
400
+ self.assertIn("missing test infrastructure", guidance)
401
+ self.assertIn("make one small visual/affordance patch", guidance)
402
+
347
403
  def test_detects_codex_workaround_signals(self) -> None:
348
404
  signal = _detect_codex_workaround_signal(
349
405
  "Adapting test to avoid external Codex calls because Codex CLI isn't available in this environment.",
@@ -610,6 +666,163 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
610
666
  self.assertIn("src/", str(result.get("stdout") or ""))
611
667
  self.assertNotIn("Recovered after Codex attempts", str(result.get("stdout") or ""))
612
668
 
669
+ def test_run_codex_task_hands_changed_worktree_to_gates_after_timeout(self) -> None:
670
+ with tempfile.TemporaryDirectory(prefix="pushpals-codex-timeout-changed-") as temp_dir:
671
+ repo = Path(temp_dir) / "repo"
672
+ repo.mkdir(parents=True, exist_ok=True)
673
+ (repo / "README.md").write_text("# timeout changed repo\n", encoding="utf-8")
674
+ subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
675
+ subprocess.run(
676
+ ["git", "config", "user.name", "PushPals Test"],
677
+ cwd=repo,
678
+ check=True,
679
+ capture_output=True,
680
+ text=True,
681
+ )
682
+ subprocess.run(
683
+ ["git", "config", "user.email", "pushpals-tests@example.com"],
684
+ cwd=repo,
685
+ check=True,
686
+ capture_output=True,
687
+ text=True,
688
+ )
689
+ subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
690
+ subprocess.run(
691
+ ["git", "commit", "-m", "chore: seed timeout changed repo"],
692
+ cwd=repo,
693
+ check=True,
694
+ capture_output=True,
695
+ text=True,
696
+ )
697
+
698
+ stub_path = Path(temp_dir) / "fake_codex_timeout_changed.py"
699
+ stub_path.write_text(
700
+ "\n".join(
701
+ [
702
+ "from pathlib import Path",
703
+ "import sys",
704
+ "import time",
705
+ "",
706
+ "argv = sys.argv[1:]",
707
+ "last_message_path = None",
708
+ "for index, arg in enumerate(argv):",
709
+ " if arg == '--output-last-message' and index + 1 < len(argv):",
710
+ " last_message_path = argv[index + 1]",
711
+ " break",
712
+ "",
713
+ "sys.stdin.read()",
714
+ "Path('src').mkdir(exist_ok=True)",
715
+ "Path('src/timeout-patch.txt').write_text('changed before timeout\\n', encoding='utf-8')",
716
+ "if last_message_path:",
717
+ " Path(last_message_path).write_text('Made a small patch before timeout.', encoding='utf-8')",
718
+ "print('item.completed | Made a small patch before timeout.', flush=True)",
719
+ "time.sleep(5)",
720
+ ]
721
+ ),
722
+ encoding="utf-8",
723
+ )
724
+
725
+ env_overrides = {
726
+ "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
727
+ "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
728
+ "OPENAI_API_KEY": "pushpals-timeout-changed-test-key",
729
+ "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "1",
730
+ "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
731
+ }
732
+ with mock.patch.dict(os.environ, env_overrides, clear=False):
733
+ result = _run_codex_task(
734
+ str(repo),
735
+ "Create a small file, then continue thinking too long.",
736
+ [],
737
+ )
738
+
739
+ self.assertTrue(result.get("ok"), result)
740
+ self.assertEqual(result.get("exitCode"), 0)
741
+ self.assertIn("timed out after modifying", str(result.get("summary") or ""))
742
+ self.assertIn("partial patch", str(result.get("stdout") or "").lower())
743
+ self.assertIn("src/", str(result.get("stdout") or ""))
744
+ self.assertIn("Made a small patch before timeout", str(result.get("stdout") or ""))
745
+
746
+ def test_run_codex_task_retries_once_when_no_edit_watchdog_fires(self) -> None:
747
+ with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-watchdog-") as temp_dir:
748
+ repo = Path(temp_dir) / "repo"
749
+ repo.mkdir(parents=True, exist_ok=True)
750
+ (repo / "README.md").write_text("# no edit watchdog repo\n", encoding="utf-8")
751
+ subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
752
+ subprocess.run(
753
+ ["git", "config", "user.name", "PushPals Test"],
754
+ cwd=repo,
755
+ check=True,
756
+ capture_output=True,
757
+ text=True,
758
+ )
759
+ subprocess.run(
760
+ ["git", "config", "user.email", "pushpals-tests@example.com"],
761
+ cwd=repo,
762
+ check=True,
763
+ capture_output=True,
764
+ text=True,
765
+ )
766
+ subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
767
+ subprocess.run(
768
+ ["git", "commit", "-m", "chore: seed no-edit watchdog repo"],
769
+ cwd=repo,
770
+ check=True,
771
+ capture_output=True,
772
+ text=True,
773
+ )
774
+
775
+ stub_path = Path(temp_dir) / "fake_codex_no_edit_watchdog.py"
776
+ stub_path.write_text(
777
+ "\n".join(
778
+ [
779
+ "from pathlib import Path",
780
+ "import sys",
781
+ "import time",
782
+ "",
783
+ "argv = sys.argv[1:]",
784
+ "last_message_path = None",
785
+ "for index, arg in enumerate(argv):",
786
+ " if arg == '--output-last-message' and index + 1 < len(argv):",
787
+ " last_message_path = argv[index + 1]",
788
+ " break",
789
+ "",
790
+ "prompt = sys.stdin.read()",
791
+ "if 'No-edit watchdog recovery' in prompt:",
792
+ " Path('src').mkdir(exist_ok=True)",
793
+ " Path('src/no-edit-retry.txt').write_text('patched on retry\\n', encoding='utf-8')",
794
+ " if last_message_path:",
795
+ " Path(last_message_path).write_text('Patched immediately after no-edit recovery.', encoding='utf-8')",
796
+ " print('item.completed | Patched immediately after no-edit recovery.', flush=True)",
797
+ " sys.exit(0)",
798
+ "",
799
+ "print('item.completed | Still inspecting route wrappers.', flush=True)",
800
+ "time.sleep(10)",
801
+ ]
802
+ ),
803
+ encoding="utf-8",
804
+ )
805
+
806
+ env_overrides = {
807
+ "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
808
+ "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
809
+ "OPENAI_API_KEY": "pushpals-no-edit-watchdog-test-key",
810
+ "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
811
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
812
+ "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
813
+ }
814
+ with mock.patch.dict(os.environ, env_overrides, clear=False):
815
+ result = _run_codex_task(
816
+ str(repo),
817
+ "Polish the first-entry home shell with a compact visual patch.",
818
+ [],
819
+ )
820
+
821
+ self.assertTrue(result.get("ok"), result)
822
+ self.assertEqual(result.get("exitCode"), 0)
823
+ self.assertIn("Patched immediately after no-edit recovery", str(result.get("stdout") or ""))
824
+ self.assertIn("src/", str(result.get("stdout") or ""))
825
+
613
826
  def test_codex_changed_paths_filters_dependency_artifacts_from_publishable_delta(self) -> None:
614
827
  with tempfile.TemporaryDirectory(prefix="pushpals-codex-artifact-delta-") as temp_dir:
615
828
  repo = Path(temp_dir) / "repo"
@@ -63,6 +63,5 @@ export const OPENAI_CODEX_BACKEND: DockerBackendSpec = {
63
63
  scriptPath: resolve(import.meta.dir, "openai_codex", "openai_codex_executor.py"),
64
64
  pythonConfigKey: "openaiCodexPython",
65
65
  timeoutConfigKey: "openaiCodexTimeoutMs",
66
- capTimeoutToExecutionBudget: false,
67
66
  }),
68
67
  };
@@ -787,14 +787,58 @@ def _looks_like_visual_derivation_task(params: Dict[str, Any]) -> bool:
787
787
  return any(marker in text for marker in visual_markers)
788
788
 
789
789
 
790
+ def _looks_like_route_shell_task(params: Dict[str, Any]) -> bool:
791
+ text = _joined_task_text(params)
792
+ shell_markers = (
793
+ "route-entry",
794
+ "route entry",
795
+ "first-entry",
796
+ "first entry",
797
+ "startup shell",
798
+ "home shell",
799
+ "entry route",
800
+ "shell/navigation",
801
+ "app/_layout",
802
+ "app/index",
803
+ "homescreen",
804
+ "home screen",
805
+ "settingsscreen",
806
+ "settings screen",
807
+ "shopscreen",
808
+ "shop screen",
809
+ "help",
810
+ "game-over",
811
+ "game over",
812
+ "match-start",
813
+ "match start",
814
+ "return affordance",
815
+ )
816
+ return any(marker in text for marker in shell_markers)
817
+
818
+
790
819
  def _build_efficiency_guidance(params: Dict[str, Any]) -> str:
791
820
  lines: List[str] = [
792
821
  "Worker speed/convergence contract from PushPals:",
793
822
  "- Target useful completion in roughly 20 minutes for small or medium repo tasks; optimize for the smallest coherent patch over exhaustive exploration.",
794
- "- Phase soft budgets: discovery <= 5m, editing <= 10m, focused validation <= 5m, final diff review <= 2m. If a phase runs long, narrow scope rather than expanding the harness.",
795
- "- Test-harness soft budget: if setting up a focused test requires multiple new shared mocks, broad React Native shims, or repeated import fixes, stop building that harness and switch to smaller pure helper/state coverage.",
823
+ "- Phase soft budgets: discovery <= 3m for small scoped tasks and <= 5m otherwise, editing <= 10m, focused validation <= 5m, final diff review <= 2m. If a phase runs long, narrow scope rather than expanding the harness.",
824
+ "- No-edit checkpoint: if you have not made a patch after identifying the behavior-owning file, stop discovering and edit that file now. Do not spend the execution budget proving every adjacent assumption first.",
825
+ "- Discovery command budget: for compact tasks, use at most 5-8 targeted read/search commands before editing. If that is not enough, state the blocker and patch the best behavior owner rather than widening discovery.",
796
826
  ]
797
- if _looks_like_visual_derivation_task(params):
827
+ route_shell_task = _looks_like_route_shell_task(params)
828
+ visual_task = _looks_like_visual_derivation_task(params)
829
+ if route_shell_task or visual_task:
830
+ lines.append(
831
+ "- Test-harness soft budget: if setting up a focused test requires multiple new shared mocks, broad React Native shims, or repeated import fixes, stop building that harness and switch to smaller pure helper/state/style coverage.",
832
+ )
833
+ if route_shell_task:
834
+ lines.extend(
835
+ [
836
+ "- Route-entry/shell task rule: inspect the hinted route wrapper, then move immediately to the behavior-owning shell component when the route is thin. Do not keep re-reading navigation topology once the owner is found.",
837
+ "- Compact shell polish rule: make one small visual/affordance patch before chasing missing test infrastructure. If a referenced React Native mock or app/__tests__ path is absent, use existing nearby tests or a focused style/helper assertion instead of creating a broad render harness.",
838
+ "- Shell task deadline: by the first clear owner hypothesis, choose the home/settings/shop/help/game-over surface and patch it; ValidationGate can run long browser checks after your focused validation.",
839
+ ]
840
+ )
841
+ if visual_task:
798
842
  lines.extend(
799
843
  [
800
844
  "- Visual/rendering task rule: prefer pure helper/state/style-prop tests for derived visual cues. Use a full React Native/component render regression only if the repo already has a stable harness for that exact surface.",
@@ -809,6 +853,7 @@ def _build_planning_guidance(params: Dict[str, Any]) -> str:
809
853
  if not isinstance(planning, dict):
810
854
  return ""
811
855
 
856
+ compact_task = _looks_like_route_shell_task(params) or _looks_like_visual_derivation_task(params)
812
857
  lines: List[str] = ["Task planning contract from PushPals:"]
813
858
  intent = to_single_line(planning.get("intent"), 80)
814
859
  risk = to_single_line(planning.get("riskLevel"), 80)
@@ -856,17 +901,29 @@ def _build_planning_guidance(params: Dict[str, Any]) -> str:
856
901
  forbidden = _string_list(scope.get("forbiddenGlobs"), limit=8)
857
902
  _append_list_guidance(lines, "Forbidden path hints", forbidden)
858
903
 
859
- _append_list_guidance(lines, "Target path hints", _string_list(planning.get("targetPaths"), limit=12))
904
+ _append_list_guidance(
905
+ lines,
906
+ "Target path hints",
907
+ _string_list(planning.get("targetPaths"), limit=6 if compact_task else 12),
908
+ )
860
909
 
861
910
  discovery = planning.get("discovery")
862
911
  if isinstance(discovery, dict):
863
912
  _append_list_guidance(
864
913
  lines,
865
914
  "Suggested discovery commands",
866
- _string_list(discovery.get("ripgrepQueries"), limit=8),
915
+ _string_list(discovery.get("ripgrepQueries"), limit=4 if compact_task else 8),
916
+ )
917
+ _append_list_guidance(
918
+ lines,
919
+ "Likely directories",
920
+ _string_list(discovery.get("likelyDirs"), limit=4 if compact_task else 8),
921
+ )
922
+ _append_list_guidance(
923
+ lines,
924
+ "Search keywords",
925
+ _string_list(discovery.get("keywords"), limit=8 if compact_task else 12),
867
926
  )
868
- _append_list_guidance(lines, "Likely directories", _string_list(discovery.get("likelyDirs"), limit=8))
869
- _append_list_guidance(lines, "Search keywords", _string_list(discovery.get("keywords"), limit=12))
870
927
 
871
928
  _append_list_guidance(
872
929
  lines,