@pushpalsdev/cli 1.1.21 → 1.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,10 +36,12 @@ from openai_codex_executor import (
36
36
  _build_rollout_recovery_guidance,
37
37
  _collect_disallowed_shell_wrapper_rejections,
38
38
  _codex_changed_paths,
39
+ _capture_git_change_snapshot,
39
40
  _describe_non_publishable_paths,
40
41
  _detect_offtrack_rollout,
41
42
  _detect_codex_workaround_signal,
42
43
  _extract_usage_counts,
44
+ _has_credible_shell_wrapper_progress,
43
45
  _load_prompt_template,
44
46
  _mask_repo_local_codex_files,
45
47
  _repo_root_for_prompt_loading,
@@ -672,6 +674,130 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
672
674
  self.assertIn("src/", str(result.get("stdout") or ""))
673
675
  self.assertNotIn("Recovered after Codex attempts", str(result.get("stdout") or ""))
674
676
 
677
+ def test_shell_wrapper_progress_guard_rejects_broad_noisy_path_sets(self) -> None:
678
+ self.assertTrue(
679
+ _has_credible_shell_wrapper_progress(
680
+ [
681
+ "src/change.ts",
682
+ "src/change.test.ts",
683
+ "docs/change.md",
684
+ ]
685
+ )
686
+ )
687
+ self.assertFalse(
688
+ _has_credible_shell_wrapper_progress(
689
+ [f"src/generated-{index}.ts" for index in range(9)]
690
+ )
691
+ )
692
+ self.assertFalse(
693
+ _has_credible_shell_wrapper_progress(
694
+ [
695
+ "app/main.ts",
696
+ "components/card.tsx",
697
+ "docs/readme.md",
698
+ "scripts/check.ts",
699
+ "tests/card.test.ts",
700
+ ]
701
+ )
702
+ )
703
+ self.assertFalse(
704
+ _has_credible_shell_wrapper_progress(
705
+ [f"area{index}/" for index in range(5)]
706
+ )
707
+ )
708
+
709
+ def test_run_codex_task_recovers_instead_of_handing_noisy_wrapper_diff_to_gates(self) -> None:
710
+ with tempfile.TemporaryDirectory(prefix="pushpals-codex-wrapper-noisy-") as temp_dir:
711
+ repo = Path(temp_dir) / "repo"
712
+ repo.mkdir(parents=True, exist_ok=True)
713
+ (repo / "README.md").write_text("# wrapper noisy test\n", encoding="utf-8")
714
+ for index in range(9):
715
+ (repo / f"noisy-{index}.txt").write_text("baseline\n", encoding="utf-8")
716
+ subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
717
+ subprocess.run(
718
+ ["git", "config", "user.name", "PushPals Test"],
719
+ cwd=repo,
720
+ check=True,
721
+ capture_output=True,
722
+ text=True,
723
+ )
724
+ subprocess.run(
725
+ ["git", "config", "user.email", "pushpals-tests@example.com"],
726
+ cwd=repo,
727
+ check=True,
728
+ capture_output=True,
729
+ text=True,
730
+ )
731
+ subprocess.run(["git", "add", "."], cwd=repo, check=True, capture_output=True, text=True)
732
+ subprocess.run(
733
+ ["git", "commit", "-m", "chore: seed wrapper noisy repo"],
734
+ cwd=repo,
735
+ check=True,
736
+ capture_output=True,
737
+ text=True,
738
+ )
739
+
740
+ stub_path = Path(temp_dir) / "fake_codex_wrapper_noisy.py"
741
+ stub_path.write_text(
742
+ "\n".join(
743
+ [
744
+ "from pathlib import Path",
745
+ "import sys",
746
+ "import time",
747
+ "",
748
+ "argv = sys.argv[1:]",
749
+ "last_message_path = None",
750
+ "for index, arg in enumerate(argv):",
751
+ " if arg == '--output-last-message' and index + 1 < len(argv):",
752
+ " last_message_path = argv[index + 1]",
753
+ " break",
754
+ "",
755
+ "prompt = sys.stdin.read()",
756
+ "if 'Command-router recovery:' in prompt:",
757
+ " Path('src').mkdir(exist_ok=True)",
758
+ " Path('src/recovered.txt').write_text('direct recovery\\n', encoding='utf-8')",
759
+ " if last_message_path:",
760
+ " Path(last_message_path).write_text(",
761
+ " 'Recovered after noisy shell-wrapper path detection using direct commands.',",
762
+ " encoding='utf-8',",
763
+ " )",
764
+ " print('item.completed | Recovered with direct-command guidance.', flush=True)",
765
+ " sys.exit(0)",
766
+ "",
767
+ "for index in range(9):",
768
+ " Path(f'noisy-{index}.txt').write_text('noisy path\\n', encoding='utf-8')",
769
+ "for line in (",
770
+ " 'error=exec_command failed for `/bin/bash -lc pwd`: CreateProcess { message: \"Rejected\" }',",
771
+ " 'error=exec_command failed for `/bin/bash -lc \\'git status --porcelain\\'`: CreateProcess { message: \"Rejected\" }',",
772
+ " 'error=exec_command failed for `/bin/bash -lc \\'sed -n 1,40p README.md\\'`: CreateProcess { message: \"Rejected\" }',",
773
+ "):",
774
+ " print(line, file=sys.stderr, flush=True)",
775
+ "time.sleep(10)",
776
+ ]
777
+ ),
778
+ encoding="utf-8",
779
+ )
780
+
781
+ env_overrides = {
782
+ "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
783
+ "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
784
+ "OPENAI_API_KEY": "pushpals-wrapper-noisy-test-key",
785
+ "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "10",
786
+ "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
787
+ }
788
+ with mock.patch.dict(os.environ, env_overrides, clear=False):
789
+ result = _run_codex_task(
790
+ str(repo),
791
+ "Recover from a shell-wrapper loop after noisy repo changes.",
792
+ [],
793
+ )
794
+
795
+ self.assertTrue(result.get("ok"), result)
796
+ stdout = str(result.get("stdout") or "")
797
+ self.assertIn("Recovered after Codex attempts hit command-router shell-wrapper rejections.", stdout)
798
+ self.assertIn("Recovered after noisy shell-wrapper path detection", stdout)
799
+ self.assertNotIn("ValidationGate/CriticGate", stdout)
800
+
675
801
  def test_run_codex_task_hands_changed_worktree_to_gates_after_timeout(self) -> None:
676
802
  with tempfile.TemporaryDirectory(prefix="pushpals-codex-timeout-changed-") as temp_dir:
677
803
  repo = Path(temp_dir) / "repo"
@@ -749,6 +875,145 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
749
875
  self.assertIn("src/", str(result.get("stdout") or ""))
750
876
  self.assertIn("Made a small patch before timeout", str(result.get("stdout") or ""))
751
877
 
878
+ def test_run_codex_task_rejects_broad_timeout_partial_patch(self) -> None:
879
+ with tempfile.TemporaryDirectory(prefix="pushpals-codex-timeout-noisy-") as temp_dir:
880
+ repo = Path(temp_dir) / "repo"
881
+ repo.mkdir(parents=True, exist_ok=True)
882
+ (repo / "README.md").write_text("# timeout noisy repo\n", encoding="utf-8")
883
+ subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
884
+ subprocess.run(
885
+ ["git", "config", "user.name", "PushPals Test"],
886
+ cwd=repo,
887
+ check=True,
888
+ capture_output=True,
889
+ text=True,
890
+ )
891
+ subprocess.run(
892
+ ["git", "config", "user.email", "pushpals-tests@example.com"],
893
+ cwd=repo,
894
+ check=True,
895
+ capture_output=True,
896
+ text=True,
897
+ )
898
+ subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
899
+ subprocess.run(
900
+ ["git", "commit", "-m", "chore: seed timeout noisy repo"],
901
+ cwd=repo,
902
+ check=True,
903
+ capture_output=True,
904
+ text=True,
905
+ )
906
+
907
+ stub_path = Path(temp_dir) / "fake_codex_timeout_noisy.py"
908
+ stub_path.write_text(
909
+ "\n".join(
910
+ [
911
+ "from pathlib import Path",
912
+ "import sys",
913
+ "import time",
914
+ "",
915
+ "sys.stdin.read()",
916
+ "for index in range(5):",
917
+ " root = Path(f'area{index}')",
918
+ " root.mkdir(exist_ok=True)",
919
+ " (root / 'changed.txt').write_text('broad change before timeout\\n', encoding='utf-8')",
920
+ "print('item.completed | Touched a broad set of files before timeout.', flush=True)",
921
+ "time.sleep(5)",
922
+ ]
923
+ ),
924
+ encoding="utf-8",
925
+ )
926
+
927
+ env_overrides = {
928
+ "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
929
+ "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
930
+ "OPENAI_API_KEY": "pushpals-timeout-noisy-test-key",
931
+ "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "1",
932
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "0",
933
+ "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
934
+ }
935
+ with mock.patch.dict(os.environ, env_overrides, clear=False):
936
+ result = _run_codex_task(
937
+ str(repo),
938
+ "Create a broad unfocused patch, then continue thinking too long.",
939
+ [],
940
+ )
941
+
942
+ self.assertFalse(result.get("ok"), result)
943
+ self.assertEqual(result.get("exitCode"), 124)
944
+ self.assertIn("broad/noisy publishable-looking changes", str(result.get("summary") or ""))
945
+ self.assertIn("too broad/noisy", str(result.get("stderr") or ""))
946
+ self.assertIn("area0", str(result.get("stderr") or ""))
947
+
948
+ def test_run_codex_task_timeout_ignores_broad_dirty_baseline(self) -> None:
949
+ with tempfile.TemporaryDirectory(prefix="pushpals-codex-timeout-dirty-baseline-") as temp_dir:
950
+ repo = Path(temp_dir) / "repo"
951
+ repo.mkdir(parents=True, exist_ok=True)
952
+ (repo / "README.md").write_text("# timeout dirty baseline repo\n", encoding="utf-8")
953
+ subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
954
+ subprocess.run(
955
+ ["git", "config", "user.name", "PushPals Test"],
956
+ cwd=repo,
957
+ check=True,
958
+ capture_output=True,
959
+ text=True,
960
+ )
961
+ subprocess.run(
962
+ ["git", "config", "user.email", "pushpals-tests@example.com"],
963
+ cwd=repo,
964
+ check=True,
965
+ capture_output=True,
966
+ text=True,
967
+ )
968
+ subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
969
+ subprocess.run(
970
+ ["git", "commit", "-m", "chore: seed timeout dirty baseline repo"],
971
+ cwd=repo,
972
+ check=True,
973
+ capture_output=True,
974
+ text=True,
975
+ )
976
+ for index in range(5):
977
+ root = repo / f"area{index}"
978
+ root.mkdir(exist_ok=True)
979
+ (root / "changed.txt").write_text("pre-existing dirty change\n", encoding="utf-8")
980
+
981
+ stub_path = Path(temp_dir) / "fake_codex_timeout_dirty_baseline.py"
982
+ stub_path.write_text(
983
+ "\n".join(
984
+ [
985
+ "import sys",
986
+ "import time",
987
+ "",
988
+ "sys.stdin.read()",
989
+ "print('item.completed | Still thinking without changing baseline files.', flush=True)",
990
+ "time.sleep(5)",
991
+ ]
992
+ ),
993
+ encoding="utf-8",
994
+ )
995
+
996
+ env_overrides = {
997
+ "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
998
+ "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
999
+ "OPENAI_API_KEY": "pushpals-timeout-dirty-baseline-test-key",
1000
+ "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "1",
1001
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "0",
1002
+ "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
1003
+ }
1004
+ with mock.patch.dict(os.environ, env_overrides, clear=False):
1005
+ result = _run_codex_task(
1006
+ str(repo),
1007
+ "Make a compact scoped patch, then continue thinking too long.",
1008
+ [],
1009
+ )
1010
+
1011
+ self.assertFalse(result.get("ok"), result)
1012
+ self.assertEqual(result.get("exitCode"), 124)
1013
+ self.assertIn("execution timed out", str(result.get("summary") or ""))
1014
+ self.assertNotIn("broad/noisy", str(result.get("summary") or ""))
1015
+ self.assertNotIn("too broad/noisy", str(result.get("stderr") or ""))
1016
+
752
1017
  def test_run_codex_task_retries_once_when_no_edit_watchdog_fires(self) -> None:
753
1018
  with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-watchdog-") as temp_dir:
754
1019
  repo = Path(temp_dir) / "repo"
@@ -892,6 +1157,95 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
892
1157
  self.assertEqual(result.get("exitCode"), 124)
893
1158
  self.assertIn("no publishable changes", str(result.get("summary") or ""))
894
1159
 
1160
+ def test_run_codex_task_no_edit_watchdog_rechecks_transient_publishable_progress(self) -> None:
1161
+ with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-recheck-") as temp_dir:
1162
+ repo = Path(temp_dir) / "repo"
1163
+ repo.mkdir(parents=True, exist_ok=True)
1164
+ (repo / "README.md").write_text("# no edit recheck repo\n", encoding="utf-8")
1165
+ subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
1166
+ subprocess.run(
1167
+ ["git", "config", "user.name", "PushPals Test"],
1168
+ cwd=repo,
1169
+ check=True,
1170
+ capture_output=True,
1171
+ text=True,
1172
+ )
1173
+ subprocess.run(
1174
+ ["git", "config", "user.email", "pushpals-tests@example.com"],
1175
+ cwd=repo,
1176
+ check=True,
1177
+ capture_output=True,
1178
+ text=True,
1179
+ )
1180
+ subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
1181
+ subprocess.run(
1182
+ ["git", "commit", "-m", "chore: seed no-edit recheck repo"],
1183
+ cwd=repo,
1184
+ check=True,
1185
+ capture_output=True,
1186
+ text=True,
1187
+ )
1188
+
1189
+ stub_path = Path(temp_dir) / "fake_codex_no_edit_recheck.py"
1190
+ stub_path.write_text(
1191
+ "\n".join(
1192
+ [
1193
+ "from pathlib import Path",
1194
+ "import sys",
1195
+ "import time",
1196
+ "",
1197
+ "argv = sys.argv[1:]",
1198
+ "last_message_path = None",
1199
+ "for index, arg in enumerate(argv):",
1200
+ " if arg == '--output-last-message' and index + 1 < len(argv):",
1201
+ " last_message_path = argv[index + 1]",
1202
+ " break",
1203
+ "",
1204
+ "prompt = sys.stdin.read()",
1205
+ "if 'No-edit watchdog recovery' in prompt:",
1206
+ " Path('src').mkdir(exist_ok=True)",
1207
+ " Path('src/no-edit-recheck-retry.txt').write_text('patched after recheck\\n', encoding='utf-8')",
1208
+ " if last_message_path:",
1209
+ " Path(last_message_path).write_text('Patched after transient no-edit recheck.', encoding='utf-8')",
1210
+ " print('item.completed | Patched after transient no-edit recheck.', flush=True)",
1211
+ " sys.exit(0)",
1212
+ "",
1213
+ "Path('src').mkdir(exist_ok=True)",
1214
+ "transient = Path('src/transient-progress.txt')",
1215
+ "transient.write_text('temporary progress\\n', encoding='utf-8')",
1216
+ "print('item.completed | Created transient publishable progress.', flush=True)",
1217
+ "time.sleep(1.4)",
1218
+ "transient.unlink()",
1219
+ "Path('node_modules').mkdir(exist_ok=True)",
1220
+ "Path('node_modules/linked.txt').write_text('artifact only\\n', encoding='utf-8')",
1221
+ "print('item.completed | Lost patch while still thinking.', flush=True)",
1222
+ "time.sleep(10)",
1223
+ ]
1224
+ ),
1225
+ encoding="utf-8",
1226
+ )
1227
+
1228
+ env_overrides = {
1229
+ "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
1230
+ "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
1231
+ "OPENAI_API_KEY": "pushpals-no-edit-recheck-test-key",
1232
+ "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
1233
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
1234
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_RECHECK_S": "1",
1235
+ "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
1236
+ }
1237
+ with mock.patch.dict(os.environ, env_overrides, clear=False):
1238
+ result = _run_codex_task(
1239
+ str(repo),
1240
+ "Polish the first-entry home shell with a compact visual patch.",
1241
+ [],
1242
+ )
1243
+
1244
+ self.assertTrue(result.get("ok"), result)
1245
+ self.assertEqual(result.get("exitCode"), 0)
1246
+ self.assertIn("Patched after transient no-edit recheck", str(result.get("stdout") or ""))
1247
+ self.assertIn("src/", str(result.get("stdout") or ""))
1248
+
895
1249
  def test_codex_changed_paths_filters_dependency_artifacts_from_publishable_delta(self) -> None:
896
1250
  with tempfile.TemporaryDirectory(prefix="pushpals-codex-artifact-delta-") as temp_dir:
897
1251
  repo = Path(temp_dir) / "repo"
@@ -931,6 +1285,86 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
931
1285
  self.assertGreaterEqual(len(delta), 2)
932
1286
  self.assertEqual(effective, [])
933
1287
 
1288
+ def test_codex_changed_paths_ignores_publishable_paths_dirty_at_baseline(self) -> None:
1289
+ with tempfile.TemporaryDirectory(prefix="pushpals-codex-dirty-baseline-") as temp_dir:
1290
+ repo = Path(temp_dir) / "repo"
1291
+ repo.mkdir(parents=True, exist_ok=True)
1292
+ (repo / "README.md").write_text("# dirty baseline repo\n", encoding="utf-8")
1293
+ (repo / "src").mkdir()
1294
+ (repo / "src" / "existing.ts").write_text("export const value = 1;\n", encoding="utf-8")
1295
+ subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
1296
+ subprocess.run(
1297
+ ["git", "config", "user.name", "PushPals Test"],
1298
+ cwd=repo,
1299
+ check=True,
1300
+ capture_output=True,
1301
+ text=True,
1302
+ )
1303
+ subprocess.run(
1304
+ ["git", "config", "user.email", "pushpals-tests@example.com"],
1305
+ cwd=repo,
1306
+ check=True,
1307
+ capture_output=True,
1308
+ text=True,
1309
+ )
1310
+ subprocess.run(["git", "add", "."], cwd=repo, check=True, capture_output=True, text=True)
1311
+ subprocess.run(
1312
+ ["git", "commit", "-m", "chore: seed dirty baseline repo"],
1313
+ cwd=repo,
1314
+ check=True,
1315
+ capture_output=True,
1316
+ text=True,
1317
+ )
1318
+ (repo / "README.md").write_text("# dirty baseline repo\n\npre-existing edit\n", encoding="utf-8")
1319
+ (repo / "src" / "existing.ts").write_text("export const value = 2;\n", encoding="utf-8")
1320
+ baseline = _capture_git_change_snapshot(str(repo))
1321
+
1322
+ changed_paths, delta, effective = _codex_changed_paths(str(repo), baseline)
1323
+
1324
+ self.assertIn("README.md", changed_paths)
1325
+ self.assertEqual(delta, [])
1326
+ self.assertEqual(effective, [])
1327
+
1328
+ def test_codex_changed_paths_counts_worker_edits_to_dirty_baseline_paths(self) -> None:
1329
+ with tempfile.TemporaryDirectory(prefix="pushpals-codex-dirty-baseline-mutated-") as temp_dir:
1330
+ repo = Path(temp_dir) / "repo"
1331
+ repo.mkdir(parents=True, exist_ok=True)
1332
+ (repo / "README.md").write_text("# dirty baseline mutation repo\n", encoding="utf-8")
1333
+ subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
1334
+ subprocess.run(
1335
+ ["git", "config", "user.name", "PushPals Test"],
1336
+ cwd=repo,
1337
+ check=True,
1338
+ capture_output=True,
1339
+ text=True,
1340
+ )
1341
+ subprocess.run(
1342
+ ["git", "config", "user.email", "pushpals-tests@example.com"],
1343
+ cwd=repo,
1344
+ check=True,
1345
+ capture_output=True,
1346
+ text=True,
1347
+ )
1348
+ subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
1349
+ subprocess.run(
1350
+ ["git", "commit", "-m", "chore: seed dirty baseline mutation repo"],
1351
+ cwd=repo,
1352
+ check=True,
1353
+ capture_output=True,
1354
+ text=True,
1355
+ )
1356
+ (repo / "README.md").write_text("# dirty baseline mutation repo\n\npre-existing edit\n", encoding="utf-8")
1357
+ baseline = _capture_git_change_snapshot(str(repo))
1358
+ (repo / "README.md").write_text(
1359
+ "# dirty baseline mutation repo\n\npre-existing edit\nworker edit\n",
1360
+ encoding="utf-8",
1361
+ )
1362
+
1363
+ _, delta, effective = _codex_changed_paths(str(repo), baseline)
1364
+
1365
+ self.assertEqual(delta, ["README.md"])
1366
+ self.assertEqual(effective, ["README.md"])
1367
+
934
1368
  def test_non_publishable_path_summary_names_artifact_only_dirty_paths(self) -> None:
935
1369
  changed_paths = [
936
1370
  "node_modules/react/index.js",
@@ -1081,6 +1515,77 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
1081
1515
  self.assertIn("Patched after rollout coach guidance", str(result.get("stdout") or ""))
1082
1516
  self.assertIn("scripts/", str(result.get("stdout") or ""))
1083
1517
 
1518
+ def test_run_codex_task_rollout_coach_fails_fast_on_broad_small_task_changes(self) -> None:
1519
+ with tempfile.TemporaryDirectory(prefix="pushpals-codex-rollout-noisy-") as temp_dir:
1520
+ repo = Path(temp_dir) / "repo"
1521
+ repo.mkdir(parents=True, exist_ok=True)
1522
+ (repo / "README.md").write_text("# rollout noisy repo\n", encoding="utf-8")
1523
+ subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
1524
+ subprocess.run(
1525
+ ["git", "config", "user.name", "PushPals Test"],
1526
+ cwd=repo,
1527
+ check=True,
1528
+ capture_output=True,
1529
+ text=True,
1530
+ )
1531
+ subprocess.run(
1532
+ ["git", "config", "user.email", "pushpals-tests@example.com"],
1533
+ cwd=repo,
1534
+ check=True,
1535
+ capture_output=True,
1536
+ text=True,
1537
+ )
1538
+ subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
1539
+ subprocess.run(
1540
+ ["git", "commit", "-m", "chore: seed rollout noisy repo"],
1541
+ cwd=repo,
1542
+ check=True,
1543
+ capture_output=True,
1544
+ text=True,
1545
+ )
1546
+
1547
+ stub_path = Path(temp_dir) / "fake_codex_rollout_noisy.py"
1548
+ stub_path.write_text(
1549
+ "\n".join(
1550
+ [
1551
+ "from pathlib import Path",
1552
+ "import sys",
1553
+ "import time",
1554
+ "",
1555
+ "sys.stdin.read()",
1556
+ "for index in range(5):",
1557
+ " root = Path(f'area{index}')",
1558
+ " root.mkdir(exist_ok=True)",
1559
+ " (root / 'changed.txt').write_text('broad rollout change\\n', encoding='utf-8')",
1560
+ "print('item.completed | Made broad edits for a supposedly small task.', flush=True)",
1561
+ "time.sleep(10)",
1562
+ ]
1563
+ ),
1564
+ encoding="utf-8",
1565
+ )
1566
+
1567
+ env_overrides = {
1568
+ "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
1569
+ "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
1570
+ "OPENAI_API_KEY": "pushpals-rollout-noisy-test-key",
1571
+ "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "700",
1572
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "10",
1573
+ "WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S": "1",
1574
+ "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
1575
+ }
1576
+ with mock.patch.dict(os.environ, env_overrides, clear=False):
1577
+ result = _run_codex_task(
1578
+ str(repo),
1579
+ "Make a small low-risk repo-native patch.",
1580
+ [],
1581
+ )
1582
+
1583
+ self.assertFalse(result.get("ok"), result)
1584
+ self.assertEqual(result.get("exitCode"), 124)
1585
+ self.assertIn("rollout coach", str(result.get("summary") or ""))
1586
+ self.assertIn("broad/noisy", str(result.get("stderr") or ""))
1587
+ self.assertIn("area0", str(result.get("stderr") or ""))
1588
+
1084
1589
  def test_run_codex_task_timeout_reports_artifact_only_changes(self) -> None:
1085
1590
  with tempfile.TemporaryDirectory(prefix="pushpals-codex-artifact-timeout-") as temp_dir:
1086
1591
  repo = Path(temp_dir) / "repo"
@@ -18,6 +18,74 @@ export interface JobPublishBlockedInfo {
18
18
  stage: "sync" | "push";
19
19
  }
20
20
 
21
+ export interface JobDiagnosticAttempt {
22
+ attempt: number;
23
+ workerId?: string | null;
24
+ backend?: string | null;
25
+ model?: string | null;
26
+ startedAt?: string | null;
27
+ finishedAt?: string | null;
28
+ durationMs?: number | null;
29
+ terminalReason?: string | null;
30
+ exitCode?: number | null;
31
+ metadata?: Record<string, unknown>;
32
+ }
33
+
34
+ export interface JobTerminalDiagnostics {
35
+ failureClass?: string | null;
36
+ terminalStage?: string | null;
37
+ executorBackend?: string | null;
38
+ summary?: string | null;
39
+ watchdogFired?: boolean;
40
+ timeoutMs?: number | null;
41
+ publishableFileCount?: number | null;
42
+ artifactOnlyPathCount?: number | null;
43
+ changedPathSample?: string[];
44
+ metadata?: Record<string, unknown>;
45
+ }
46
+
47
+ export interface JobPhaseSpanDiagnostics {
48
+ attempt?: number | null;
49
+ phase: string;
50
+ startedAt: string;
51
+ finishedAt: string;
52
+ durationMs: number;
53
+ outcome?: string | null;
54
+ metadata?: Record<string, unknown>;
55
+ }
56
+
57
+ export interface JobValidationRunDiagnostics {
58
+ attempt?: number | null;
59
+ command: string;
60
+ exitCode?: number | null;
61
+ durationMs?: number | null;
62
+ passed: boolean;
63
+ failureClass?: string | null;
64
+ stdoutTail?: string | null;
65
+ stderrTail?: string | null;
66
+ metadata?: Record<string, unknown>;
67
+ }
68
+
69
+ export interface JobPatchSnapshotDiagnostics {
70
+ attempt?: number | null;
71
+ phase?: string | null;
72
+ publishableFileCount?: number | null;
73
+ artifactOnlyPathCount?: number | null;
74
+ changedPathSample?: string[];
75
+ topLevelDirs?: string[];
76
+ capturedAt?: string | null;
77
+ metadata?: Record<string, unknown>;
78
+ }
79
+
80
+ export interface JobDiagnostics {
81
+ attempts?: JobDiagnosticAttempt[];
82
+ terminal?: JobTerminalDiagnostics;
83
+ phaseSpans?: JobPhaseSpanDiagnostics[];
84
+ validationRuns?: JobValidationRunDiagnostics[];
85
+ patchSnapshots?: JobPatchSnapshotDiagnostics[];
86
+ metadata?: Record<string, unknown>;
87
+ }
88
+
21
89
  export interface JobResult {
22
90
  ok: boolean;
23
91
  summary: string;
@@ -26,4 +94,5 @@ export interface JobResult {
26
94
  exitCode?: number;
27
95
  usage?: JobTokenUsage;
28
96
  publishBlocked?: JobPublishBlockedInfo;
97
+ diagnostics?: JobDiagnostics;
29
98
  }