@pushpalsdev/cli 1.1.18 → 1.1.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -104,8 +104,13 @@ _MAX_WRAPPER_RECOVERY_ATTEMPTS = 2
104
104
  _MAX_WRAPPER_BOOTSTRAP_OUTPUT_CHARS = 1_200
105
105
  _MAX_WRAPPER_BOOTSTRAP_TOTAL_CHARS = 5_000
106
106
  _MAX_NO_EDIT_RECOVERY_ATTEMPTS = 1
107
+ _MAX_ROLLOUT_RECOVERY_ATTEMPTS = 1
107
108
  _DEFAULT_NO_EDIT_WATCHDOG_S = 480
108
109
  _SMALL_TASK_NO_EDIT_WATCHDOG_S = 360
110
+ _WEB_REVIEW_NO_EDIT_WATCHDOG_S = 240
111
+ _DEFAULT_ROLLOUT_WATCHDOG_S = 300
112
+ _SMALL_TASK_ROLLOUT_WATCHDOG_S = 240
113
+ _WEB_REVIEW_ROLLOUT_WATCHDOG_S = 180
109
114
 
110
115
 
111
116
  def _model_supports_xhigh_reasoning(model: str) -> bool:
@@ -577,6 +582,11 @@ def _looks_like_small_task_prompt(prompt: str) -> bool:
577
582
  "startup shell",
578
583
  "shell polish",
579
584
  "visual/affordance",
585
+ "repo-native web review",
586
+ "web review path",
587
+ "browser smoke",
588
+ "web delivery",
589
+ "navigation trustworthy",
580
590
  )
581
591
  heavy_markers = (
582
592
  "merge-conflict",
@@ -637,18 +647,142 @@ def _resolve_no_edit_watchdog_seconds(
637
647
  if communicate_timeout_s < 600:
638
648
  return None
639
649
 
640
- default_s = _SMALL_TASK_NO_EDIT_WATCHDOG_S if _looks_like_small_task_prompt(prompt) else _DEFAULT_NO_EDIT_WATCHDOG_S
650
+ prompt_text = str(prompt or "").lower()
651
+ if "repo-native web review" in prompt_text or "web review path" in prompt_text:
652
+ default_s = _WEB_REVIEW_NO_EDIT_WATCHDOG_S
653
+ else:
654
+ default_s = (
655
+ _SMALL_TASK_NO_EDIT_WATCHDOG_S
656
+ if _looks_like_small_task_prompt(prompt)
657
+ else _DEFAULT_NO_EDIT_WATCHDOG_S
658
+ )
641
659
  return max(120, min(default_s, max(120, communicate_timeout_s - 60)))
642
660
 
643
661
 
644
- def _build_no_edit_recovery_guidance(trace_excerpt: str) -> str:
662
+ def _looks_like_web_review_prompt(prompt: str) -> bool:
663
+ text = str(prompt or "").lower()
664
+ return "repo-native web review" in text or "web review path" in text
665
+
666
+
667
+ def _resolve_rollout_watchdog_seconds(
668
+ prompt: str,
669
+ communicate_timeout_s: Optional[int],
670
+ no_edit_watchdog_s: Optional[int],
671
+ ) -> Optional[int]:
672
+ if not communicate_timeout_s or communicate_timeout_s < 600:
673
+ return None
674
+
675
+ raw = os.environ.get("WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S", "").strip()
676
+ if raw:
677
+ if raw == "0":
678
+ return None
679
+ parsed = _to_positive_int(raw)
680
+ if parsed is None:
681
+ log.info(
682
+ f"Invalid WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S={raw!r}; using default rollout watchdog."
683
+ )
684
+ else:
685
+ return max(1, min(parsed, max(1, communicate_timeout_s - 1)))
686
+
687
+ if _looks_like_web_review_prompt(prompt):
688
+ default_s = _WEB_REVIEW_ROLLOUT_WATCHDOG_S
689
+ elif _looks_like_small_task_prompt(prompt):
690
+ default_s = _SMALL_TASK_ROLLOUT_WATCHDOG_S
691
+ else:
692
+ default_s = _DEFAULT_ROLLOUT_WATCHDOG_S
693
+ if no_edit_watchdog_s is not None:
694
+ default_s = min(default_s, max(90, no_edit_watchdog_s - 60))
695
+ return max(90, min(default_s, max(90, communicate_timeout_s - 60)))
696
+
697
+
698
+ def _describe_non_publishable_paths(changed_paths: List[str], baseline_snapshot: List[str]) -> str:
699
+ delta = [p for p in changed_paths if p not in baseline_snapshot]
700
+ inspected = delta if delta else changed_paths
701
+ non_publishable = [p for p in inspected if not _is_publishable_changed_path(p)]
702
+ if not non_publishable:
703
+ return ""
704
+ listed = ", ".join(non_publishable[:8])
705
+ if len(non_publishable) > 8:
706
+ listed += ", ..."
707
+ return listed
708
+
709
+
710
+ def _build_no_edit_recovery_guidance(trace_excerpt: str, artifact_only_paths: str = "") -> str:
645
711
  lines = [
646
712
  "No-edit watchdog recovery: the previous Codex attempt spent too much of the execution budget without producing publishable file changes.",
647
713
  "Start from the already inspected context. Do not re-read broad repo topology, route wrappers, or missing test infrastructure unless that is the blocker.",
714
+ "Runtime/dependency artifacts such as node_modules, outputs, .worktrees, .codex, dist, build, and coverage do not count as progress.",
648
715
  "Within the first response/action, edit the smallest behavior-owning file that satisfies the task. If the hinted file is a thin wrapper, patch the owner you already identified.",
716
+ "If a hinted test path is absent, do not invent PushPals/autonomy-specific files in the user repo. Add repo-native coverage beside existing tests, or make a tiny behavior/script patch with no new broad harness.",
649
717
  "Use existing tests or a narrow helper/style assertion; do not create broad React Native mocks or a new full render harness for a compact shell/visual polish task.",
650
718
  "Run at most one focused fast validation check before final diff review; let PushPals ValidationGate own long required/browser validation.",
651
719
  ]
720
+ if artifact_only_paths:
721
+ lines.append(f"Only non-publishable artifact paths changed so far: {artifact_only_paths}.")
722
+ if trace_excerpt:
723
+ lines.append("Previous Codex event trace excerpt:")
724
+ lines.append(trace_excerpt)
725
+ return "\n".join(lines)
726
+
727
+
728
+ def _trace_summaries_text(trace: Dict[str, Any]) -> str:
729
+ summaries = trace.get("summaries")
730
+ if not isinstance(summaries, list):
731
+ return ""
732
+ return "\n".join(str(item or "") for item in summaries[-80:]).lower()
733
+
734
+
735
+ def _detect_offtrack_rollout(trace: Dict[str, Any], artifact_only_paths: str = "") -> str:
736
+ text = _trace_summaries_text(trace)
737
+ if artifact_only_paths:
738
+ return f"only non-publishable artifact paths changed: {artifact_only_paths}"
739
+ if not text:
740
+ return ""
741
+ checks: List[Tuple[str, re.Pattern[str]]] = [
742
+ (
743
+ "the worker is spending time on missing hinted files or absent repo scaffolding",
744
+ re.compile(
745
+ r"(not present|not found|no existing|no .* directory|missing .* checkout|not listed in the checkout|checkout is much smaller|hinted .* absent)",
746
+ re.I,
747
+ ),
748
+ ),
749
+ (
750
+ "the worker is drifting into broad test-harness or React Native mock repair",
751
+ re.compile(
752
+ r"(full[- ]?(surface|render)|test harness repair|react native mock|broad .*mock|shared mock|adding .*mock helper|full component render)",
753
+ re.I,
754
+ ),
755
+ ),
756
+ (
757
+ "the worker is about to add PushPals/autonomy internals to a user repo",
758
+ re.compile(
759
+ r"(_layout\.autonomy|queue_health|workerpal|remotebuddy|reviewagent|pushpals-internal|no autonomy module)",
760
+ re.I,
761
+ ),
762
+ ),
763
+ ]
764
+ for reason, pattern in checks:
765
+ if pattern.search(text):
766
+ return reason
767
+ return ""
768
+
769
+
770
+ def _build_rollout_recovery_guidance(
771
+ reason: str,
772
+ trace_excerpt: str,
773
+ artifact_only_paths: str = "",
774
+ ) -> str:
775
+ lines = [
776
+ "Rollout coach recovery: the previous Codex trajectory looked unlikely to produce a publishable, repo-native patch inside the budget.",
777
+ f"Detected off-track signal: {reason or 'no publishable progress despite concerning trace signals'}.",
778
+ "Do not continue the same exploration path. Start from the prior findings and make the smallest publishable edit first.",
779
+ "If the requested or hinted file/path is absent, treat it as a stale hint: choose an existing repo-native owner or existing test nearby instead of creating PushPals/autonomy-specific scaffolding.",
780
+ "For web review or shell-validation work, prefer an existing browser/e2e script, route shell, or navigation surface over generic autonomy infrastructure.",
781
+ "Avoid broad React Native render harnesses and shared mock expansion unless the repo already has that stable infrastructure and the task explicitly asks for it.",
782
+ "After the first patch, run one focused fast check or stop with a concise final update so ValidationGate can run the expensive suite.",
783
+ ]
784
+ if artifact_only_paths:
785
+ lines.append(f"Only non-publishable artifact paths changed so far: {artifact_only_paths}.")
652
786
  if trace_excerpt:
653
787
  lines.append("Previous Codex event trace excerpt:")
654
788
  lines.append(trace_excerpt)
@@ -1597,6 +1731,7 @@ def _run_codex_task(
1597
1731
  wrapper_recovery_attempt: int = 0,
1598
1732
  model_compatibility_recovery_attempt: int = 0,
1599
1733
  no_edit_recovery_attempt: int = 0,
1734
+ rollout_recovery_attempt: int = 0,
1600
1735
  model_override: Optional[str] = None,
1601
1736
  baseline_changes: Optional[List[str]] = None,
1602
1737
  ) -> Dict[str, Any]:
@@ -1889,17 +2024,35 @@ def _run_codex_task(
1889
2024
  next_progress_at = started_at + float(progress_interval_s)
1890
2025
  timed_out = False
1891
2026
  no_edit_watchdog_fired = False
2027
+ no_edit_artifact_only_paths = ""
2028
+ rollout_watchdog_fired = False
2029
+ rollout_watchdog_reason = ""
2030
+ rollout_artifact_only_paths = ""
1892
2031
  command_policy_rejection_loop = False
1893
2032
  no_edit_watchdog_s = (
1894
2033
  _resolve_no_edit_watchdog_seconds(prompt, communicate_timeout_s)
1895
2034
  if no_edit_recovery_attempt <= _MAX_NO_EDIT_RECOVERY_ATTEMPTS
1896
2035
  else None
1897
2036
  )
2037
+ rollout_watchdog_s = (
2038
+ _resolve_rollout_watchdog_seconds(
2039
+ prompt,
2040
+ communicate_timeout_s,
2041
+ no_edit_watchdog_s,
2042
+ )
2043
+ if rollout_recovery_attempt <= _MAX_ROLLOUT_RECOVERY_ATTEMPTS
2044
+ else None
2045
+ )
1898
2046
  no_edit_deadline = (
1899
2047
  started_at + float(no_edit_watchdog_s)
1900
2048
  if no_edit_watchdog_s is not None
1901
2049
  else None
1902
2050
  )
2051
+ rollout_deadline = (
2052
+ started_at + float(rollout_watchdog_s)
2053
+ if rollout_watchdog_s is not None
2054
+ else None
2055
+ )
1903
2056
 
1904
2057
  while proc.poll() is None:
1905
2058
  now = time.monotonic()
@@ -1909,16 +2062,54 @@ def _run_codex_task(
1909
2062
  break
1910
2063
 
1911
2064
  if no_edit_deadline is not None and now >= no_edit_deadline:
1912
- _, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
2065
+ changed_paths, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
1913
2066
  if not effective_paths:
2067
+ no_edit_artifact_only_paths = _describe_non_publishable_paths(
2068
+ changed_paths,
2069
+ baseline_snapshot,
2070
+ )
1914
2071
  no_edit_watchdog_fired = True
2072
+ artifact_detail = (
2073
+ f" Artifact-only dirty paths: {no_edit_artifact_only_paths}."
2074
+ if no_edit_artifact_only_paths
2075
+ else ""
2076
+ )
1915
2077
  log.info(
1916
- f"No-edit watchdog fired after {int(no_edit_watchdog_s or 0)}s with no publishable file changes; retrying with patch-first guidance."
2078
+ f"No-edit watchdog fired after {int(no_edit_watchdog_s or 0)}s with no publishable file changes.{artifact_detail} Retrying with patch-first guidance."
1917
2079
  )
1918
2080
  _terminate_active_child()
1919
2081
  break
1920
2082
  no_edit_deadline = None
1921
2083
 
2084
+ if rollout_deadline is not None and now >= rollout_deadline:
2085
+ changed_paths, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
2086
+ if not effective_paths:
2087
+ with trace_lock:
2088
+ live_trace = dict(stdout_trace_state)
2089
+ summaries = stdout_trace_state.get("summaries")
2090
+ if isinstance(summaries, list):
2091
+ live_trace["summaries"] = list(summaries)
2092
+ rollout_artifact_only_paths = _describe_non_publishable_paths(
2093
+ changed_paths,
2094
+ baseline_snapshot,
2095
+ )
2096
+ rollout_watchdog_reason = _detect_offtrack_rollout(
2097
+ live_trace,
2098
+ rollout_artifact_only_paths,
2099
+ )
2100
+ if rollout_watchdog_reason:
2101
+ rollout_watchdog_fired = True
2102
+ artifact_detail = (
2103
+ f" Artifact-only dirty paths: {rollout_artifact_only_paths}."
2104
+ if rollout_artifact_only_paths
2105
+ else ""
2106
+ )
2107
+ log.info(
2108
+ f"Rollout coach fired after {int(rollout_watchdog_s or 0)}s: {rollout_watchdog_reason}.{artifact_detail} Retrying with course-correction guidance."
2109
+ )
2110
+ _terminate_active_child()
2111
+ break
2112
+
1922
2113
  with trace_lock:
1923
2114
  wrapper_rejections = to_int(wrapper_rejection_state.get("count"), 0)
1924
2115
  if wrapper_rejections >= 3:
@@ -1986,11 +2177,50 @@ def _run_codex_task(
1986
2177
  continue
1987
2178
  rejected_shell_wrappers.append(text)
1988
2179
 
2180
+ if rollout_watchdog_fired:
2181
+ if rollout_recovery_attempt < _MAX_ROLLOUT_RECOVERY_ATTEMPTS:
2182
+ retry_guidance = [
2183
+ *supplemental_guidance,
2184
+ _build_rollout_recovery_guidance(
2185
+ rollout_watchdog_reason,
2186
+ trace_excerpt,
2187
+ rollout_artifact_only_paths,
2188
+ ),
2189
+ ]
2190
+ return _run_codex_task(
2191
+ repo,
2192
+ instruction,
2193
+ retry_guidance,
2194
+ wrapper_recovery_attempt=wrapper_recovery_attempt,
2195
+ model_compatibility_recovery_attempt=model_compatibility_recovery_attempt,
2196
+ no_edit_recovery_attempt=no_edit_recovery_attempt,
2197
+ rollout_recovery_attempt=rollout_recovery_attempt + 1,
2198
+ model_override=model_override,
2199
+ baseline_changes=baseline_snapshot,
2200
+ )
2201
+ detail = (
2202
+ "Codex trajectory remained off-track after rollout coach recovery: "
2203
+ f"{rollout_watchdog_reason or 'no publishable progress'}."
2204
+ )
2205
+ if trace_excerpt:
2206
+ detail = f"{detail}\n{trace_excerpt}"
2207
+ return {
2208
+ "ok": False,
2209
+ "summary": "openai_codex rollout coach could not recover publishable progress",
2210
+ "stdout": _truncate(stdout),
2211
+ "stderr": _truncate(f"{detail}\n{stderr}".strip()),
2212
+ "exitCode": 124,
2213
+ "usage": usage,
2214
+ }
2215
+
1989
2216
  if no_edit_watchdog_fired:
1990
2217
  if no_edit_recovery_attempt < _MAX_NO_EDIT_RECOVERY_ATTEMPTS:
1991
2218
  retry_guidance = [
1992
2219
  *supplemental_guidance,
1993
- _build_no_edit_recovery_guidance(trace_excerpt),
2220
+ _build_no_edit_recovery_guidance(
2221
+ trace_excerpt,
2222
+ no_edit_artifact_only_paths,
2223
+ ),
1994
2224
  ]
1995
2225
  return _run_codex_task(
1996
2226
  repo,
@@ -1999,6 +2229,7 @@ def _run_codex_task(
1999
2229
  wrapper_recovery_attempt=wrapper_recovery_attempt,
2000
2230
  model_compatibility_recovery_attempt=model_compatibility_recovery_attempt,
2001
2231
  no_edit_recovery_attempt=no_edit_recovery_attempt + 1,
2232
+ rollout_recovery_attempt=rollout_recovery_attempt,
2002
2233
  model_override=model_override,
2003
2234
  baseline_changes=baseline_snapshot,
2004
2235
  )
@@ -2050,9 +2281,20 @@ def _run_codex_task(
2050
2281
  "exitCode": 0,
2051
2282
  "usage": usage,
2052
2283
  }
2284
+ changed_paths, _, _ = _codex_changed_paths(repo, baseline_snapshot)
2285
+ artifact_only_paths = _describe_non_publishable_paths(changed_paths, baseline_snapshot)
2286
+ if artifact_only_paths:
2287
+ detail = (
2288
+ f"{detail}\nOnly non-publishable artifact paths changed before timeout: "
2289
+ f"{artifact_only_paths}."
2290
+ )
2053
2291
  return {
2054
2292
  "ok": False,
2055
- "summary": "openai_codex execution timed out",
2293
+ "summary": (
2294
+ "openai_codex timed out without publishable changes"
2295
+ if artifact_only_paths
2296
+ else "openai_codex execution timed out"
2297
+ ),
2056
2298
  "stdout": _truncate(stdout),
2057
2299
  "stderr": _truncate(f"{detail}\n{stderr}".strip()),
2058
2300
  "exitCode": 124,
@@ -2149,6 +2391,7 @@ def _run_codex_task(
2149
2391
  wrapper_recovery_attempt=wrapper_recovery_attempt + 1,
2150
2392
  model_compatibility_recovery_attempt=model_compatibility_recovery_attempt,
2151
2393
  no_edit_recovery_attempt=no_edit_recovery_attempt,
2394
+ rollout_recovery_attempt=rollout_recovery_attempt,
2152
2395
  model_override=model_override,
2153
2396
  baseline_changes=baseline_snapshot,
2154
2397
  )
@@ -2232,6 +2475,7 @@ def _run_codex_task(
2232
2475
  wrapper_recovery_attempt=wrapper_recovery_attempt,
2233
2476
  model_compatibility_recovery_attempt=model_compatibility_recovery_attempt + 1,
2234
2477
  no_edit_recovery_attempt=no_edit_recovery_attempt,
2478
+ rollout_recovery_attempt=rollout_recovery_attempt,
2235
2479
  model_override=LEGACY_CODEX_MODEL_FALLBACK,
2236
2480
  baseline_changes=baseline_snapshot,
2237
2481
  )
@@ -32,8 +32,12 @@ from openai_codex_executor import (
32
32
  _resolve_reasoning_effort,
33
33
  _resolve_task_reasoning_effort,
34
34
  _build_instruction,
35
+ _build_no_edit_recovery_guidance,
36
+ _build_rollout_recovery_guidance,
35
37
  _collect_disallowed_shell_wrapper_rejections,
36
38
  _codex_changed_paths,
39
+ _describe_non_publishable_paths,
40
+ _detect_offtrack_rollout,
37
41
  _detect_codex_workaround_signal,
38
42
  _extract_usage_counts,
39
43
  _load_prompt_template,
@@ -41,6 +45,8 @@ from openai_codex_executor import (
41
45
  _repo_root_for_prompt_loading,
42
46
  _restore_repo_local_codex_files,
43
47
  _resolve_codex_command_prefix,
48
+ _resolve_no_edit_watchdog_seconds,
49
+ _resolve_rollout_watchdog_seconds,
44
50
  _unwrap_shell_wrapper_command,
45
51
  _usage_from_trace_or_estimate,
46
52
  )
@@ -925,6 +931,223 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
925
931
  self.assertGreaterEqual(len(delta), 2)
926
932
  self.assertEqual(effective, [])
927
933
 
934
+ def test_non_publishable_path_summary_names_artifact_only_dirty_paths(self) -> None:
935
+ changed_paths = [
936
+ "node_modules/react/index.js",
937
+ "outputs/data/runtime.log",
938
+ "src/real-change.ts",
939
+ ]
940
+ summary = _describe_non_publishable_paths(changed_paths, ["src/real-change.ts"])
941
+
942
+ self.assertIn("node_modules/react/index.js", summary)
943
+ self.assertIn("outputs/data/runtime.log", summary)
944
+ self.assertNotIn("src/real-change.ts", summary)
945
+
946
+ def test_web_review_tasks_use_faster_no_edit_watchdog(self) -> None:
947
+ with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
948
+ watchdog_s = _resolve_no_edit_watchdog_seconds(
949
+ "Strengthen the repo-native web review path with a compact repo-native patch.",
950
+ 1200,
951
+ )
952
+
953
+ self.assertEqual(watchdog_s, 240)
954
+
955
+ def test_no_edit_recovery_guidance_warns_against_artifact_only_progress(self) -> None:
956
+ guidance = _build_no_edit_recovery_guidance(
957
+ "item.completed | still inspecting",
958
+ "node_modules, outputs/data/runtime.log",
959
+ )
960
+
961
+ self.assertIn("node_modules", guidance)
962
+ self.assertIn("do not invent PushPals/autonomy-specific files", guidance)
963
+ self.assertIn("Previous Codex event trace excerpt", guidance)
964
+
965
+ def test_rollout_watchdog_is_earlier_than_web_review_no_edit_watchdog(self) -> None:
966
+ with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S": ""}, clear=False):
967
+ no_edit_s = _resolve_no_edit_watchdog_seconds(
968
+ "Strengthen the repo-native web review path.",
969
+ 1200,
970
+ )
971
+ rollout_s = _resolve_rollout_watchdog_seconds(
972
+ "Strengthen the repo-native web review path.",
973
+ 1200,
974
+ no_edit_s,
975
+ )
976
+
977
+ self.assertEqual(no_edit_s, 240)
978
+ self.assertEqual(rollout_s, 180)
979
+
980
+ def test_offtrack_rollout_detects_missing_path_and_harness_drift(self) -> None:
981
+ trace = {
982
+ "summaries": [
983
+ "item.completed | The requested test path is not present in this checkout.",
984
+ "item.completed | I am checking the React Native test surface before choosing assertion style.",
985
+ ],
986
+ }
987
+
988
+ self.assertIn("missing hinted files", _detect_offtrack_rollout(trace))
989
+
990
+ def test_rollout_recovery_guidance_points_to_repo_native_patch(self) -> None:
991
+ guidance = _build_rollout_recovery_guidance(
992
+ "the worker is spending time on missing hinted files",
993
+ "Codex event trace:\n- missing test path",
994
+ "node_modules",
995
+ )
996
+
997
+ self.assertIn("Rollout coach recovery", guidance)
998
+ self.assertIn("stale hint", guidance)
999
+ self.assertIn("repo-native", guidance)
1000
+ self.assertIn("node_modules", guidance)
1001
+
1002
+ def test_run_codex_task_retries_once_when_rollout_coach_fires(self) -> None:
1003
+ with tempfile.TemporaryDirectory(prefix="pushpals-codex-rollout-coach-") as temp_dir:
1004
+ repo = Path(temp_dir) / "repo"
1005
+ repo.mkdir(parents=True, exist_ok=True)
1006
+ (repo / "README.md").write_text("# rollout coach repo\n", encoding="utf-8")
1007
+ subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
1008
+ subprocess.run(
1009
+ ["git", "config", "user.name", "PushPals Test"],
1010
+ cwd=repo,
1011
+ check=True,
1012
+ capture_output=True,
1013
+ text=True,
1014
+ )
1015
+ subprocess.run(
1016
+ ["git", "config", "user.email", "pushpals-tests@example.com"],
1017
+ cwd=repo,
1018
+ check=True,
1019
+ capture_output=True,
1020
+ text=True,
1021
+ )
1022
+ subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
1023
+ subprocess.run(
1024
+ ["git", "commit", "-m", "chore: seed rollout coach repo"],
1025
+ cwd=repo,
1026
+ check=True,
1027
+ capture_output=True,
1028
+ text=True,
1029
+ )
1030
+
1031
+ stub_path = Path(temp_dir) / "fake_codex_rollout_coach.py"
1032
+ stub_path.write_text(
1033
+ "\n".join(
1034
+ [
1035
+ "from pathlib import Path",
1036
+ "import sys",
1037
+ "import time",
1038
+ "",
1039
+ "argv = sys.argv[1:]",
1040
+ "last_message_path = None",
1041
+ "for index, arg in enumerate(argv):",
1042
+ " if arg == '--output-last-message' and index + 1 < len(argv):",
1043
+ " last_message_path = argv[index + 1]",
1044
+ " break",
1045
+ "",
1046
+ "prompt = sys.stdin.read()",
1047
+ "if 'Rollout coach recovery' in prompt:",
1048
+ " Path('scripts').mkdir(exist_ok=True)",
1049
+ " Path('scripts/web-review-path.txt').write_text('repo-native patch\\n', encoding='utf-8')",
1050
+ " if last_message_path:",
1051
+ " Path(last_message_path).write_text('Patched after rollout coach guidance.', encoding='utf-8')",
1052
+ " print('item.completed | Patched after rollout coach guidance.', flush=True)",
1053
+ " sys.exit(0)",
1054
+ "",
1055
+ "print('item.completed | The requested test path is not present in this checkout.', flush=True)",
1056
+ "print('item.completed | I am checking the React Native test surface before choosing assertion style.', flush=True)",
1057
+ "time.sleep(10)",
1058
+ ]
1059
+ ),
1060
+ encoding="utf-8",
1061
+ )
1062
+
1063
+ env_overrides = {
1064
+ "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
1065
+ "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
1066
+ "OPENAI_API_KEY": "pushpals-rollout-coach-test-key",
1067
+ "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "700",
1068
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "10",
1069
+ "WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S": "1",
1070
+ "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
1071
+ }
1072
+ with mock.patch.dict(os.environ, env_overrides, clear=False):
1073
+ result = _run_codex_task(
1074
+ str(repo),
1075
+ "Strengthen the repo-native web review path.",
1076
+ [],
1077
+ )
1078
+
1079
+ self.assertTrue(result.get("ok"), result)
1080
+ self.assertEqual(result.get("exitCode"), 0)
1081
+ self.assertIn("Patched after rollout coach guidance", str(result.get("stdout") or ""))
1082
+ self.assertIn("scripts/", str(result.get("stdout") or ""))
1083
+
1084
+ def test_run_codex_task_timeout_reports_artifact_only_changes(self) -> None:
1085
+ with tempfile.TemporaryDirectory(prefix="pushpals-codex-artifact-timeout-") as temp_dir:
1086
+ repo = Path(temp_dir) / "repo"
1087
+ repo.mkdir(parents=True, exist_ok=True)
1088
+ (repo / "README.md").write_text("# artifact timeout repo\n", encoding="utf-8")
1089
+ subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
1090
+ subprocess.run(
1091
+ ["git", "config", "user.name", "PushPals Test"],
1092
+ cwd=repo,
1093
+ check=True,
1094
+ capture_output=True,
1095
+ text=True,
1096
+ )
1097
+ subprocess.run(
1098
+ ["git", "config", "user.email", "pushpals-tests@example.com"],
1099
+ cwd=repo,
1100
+ check=True,
1101
+ capture_output=True,
1102
+ text=True,
1103
+ )
1104
+ subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
1105
+ subprocess.run(
1106
+ ["git", "commit", "-m", "chore: seed artifact timeout repo"],
1107
+ cwd=repo,
1108
+ check=True,
1109
+ capture_output=True,
1110
+ text=True,
1111
+ )
1112
+
1113
+ stub_path = Path(temp_dir) / "fake_codex_artifact_timeout.py"
1114
+ stub_path.write_text(
1115
+ "\n".join(
1116
+ [
1117
+ "from pathlib import Path",
1118
+ "import sys",
1119
+ "import time",
1120
+ "",
1121
+ "sys.stdin.read()",
1122
+ "Path('node_modules').mkdir(exist_ok=True)",
1123
+ "Path('node_modules/linked.txt').write_text('artifact only\\n', encoding='utf-8')",
1124
+ "print('item.completed | Touched dependency artifact only.', flush=True)",
1125
+ "time.sleep(10)",
1126
+ ]
1127
+ ),
1128
+ encoding="utf-8",
1129
+ )
1130
+
1131
+ env_overrides = {
1132
+ "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
1133
+ "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
1134
+ "OPENAI_API_KEY": "pushpals-artifact-timeout-test-key",
1135
+ "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "1",
1136
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "0",
1137
+ "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
1138
+ }
1139
+ with mock.patch.dict(os.environ, env_overrides, clear=False):
1140
+ result = _run_codex_task(
1141
+ str(repo),
1142
+ "Strengthen the repo-native web review path.",
1143
+ [],
1144
+ )
1145
+
1146
+ self.assertFalse(result.get("ok"), result)
1147
+ self.assertEqual(result.get("exitCode"), 124)
1148
+ self.assertIn("without publishable changes", str(result.get("summary") or ""))
1149
+ self.assertIn("node_modules", str(result.get("stderr") or ""))
1150
+
928
1151
  def test_run_codex_task_escalates_wrapper_recovery_and_recovers(self) -> None:
929
1152
  with tempfile.TemporaryDirectory(prefix="pushpals-codex-wrapper-recovery-") as temp_dir:
930
1153
  repo = Path(temp_dir) / "repo"
@@ -906,6 +906,15 @@ def _build_planning_guidance(params: Dict[str, Any]) -> str:
906
906
  "Target path hints",
907
907
  _string_list(planning.get("targetPaths"), limit=6 if compact_task else 12),
908
908
  )
909
+ _append_list_guidance(
910
+ lines,
911
+ "Repo hint preflight diagnostics",
912
+ _string_list(planning.get("repoHintDiagnostics"), limit=8),
913
+ )
914
+ if _string_list(planning.get("repoHintDiagnostics"), limit=1):
915
+ lines.append(
916
+ "- If a hinted path is absent, treat it as stale guidance unless the task explicitly asks to create that path; prefer an existing repo-native owner or nearby test."
917
+ )
909
918
 
910
919
  discovery = planning.get("discovery")
911
920
  if isinstance(discovery, dict):