@pushpalsdev/cli 1.1.21 → 1.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pushpals-cli.js +25 -1
- package/package.json +1 -1
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +288 -31
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +505 -0
- package/runtime/sandbox/apps/workerpals/src/common/types.ts +69 -0
- package/runtime/sandbox/apps/workerpals/src/docker_executor.ts +75 -16
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +334 -19
- package/runtime/sandbox/apps/workerpals/src/job_runner.ts +3 -0
- package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +131 -3
|
@@ -36,10 +36,12 @@ from openai_codex_executor import (
|
|
|
36
36
|
_build_rollout_recovery_guidance,
|
|
37
37
|
_collect_disallowed_shell_wrapper_rejections,
|
|
38
38
|
_codex_changed_paths,
|
|
39
|
+
_capture_git_change_snapshot,
|
|
39
40
|
_describe_non_publishable_paths,
|
|
40
41
|
_detect_offtrack_rollout,
|
|
41
42
|
_detect_codex_workaround_signal,
|
|
42
43
|
_extract_usage_counts,
|
|
44
|
+
_has_credible_shell_wrapper_progress,
|
|
43
45
|
_load_prompt_template,
|
|
44
46
|
_mask_repo_local_codex_files,
|
|
45
47
|
_repo_root_for_prompt_loading,
|
|
@@ -672,6 +674,130 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
672
674
|
self.assertIn("src/", str(result.get("stdout") or ""))
|
|
673
675
|
self.assertNotIn("Recovered after Codex attempts", str(result.get("stdout") or ""))
|
|
674
676
|
|
|
677
|
+
def test_shell_wrapper_progress_guard_rejects_broad_noisy_path_sets(self) -> None:
|
|
678
|
+
self.assertTrue(
|
|
679
|
+
_has_credible_shell_wrapper_progress(
|
|
680
|
+
[
|
|
681
|
+
"src/change.ts",
|
|
682
|
+
"src/change.test.ts",
|
|
683
|
+
"docs/change.md",
|
|
684
|
+
]
|
|
685
|
+
)
|
|
686
|
+
)
|
|
687
|
+
self.assertFalse(
|
|
688
|
+
_has_credible_shell_wrapper_progress(
|
|
689
|
+
[f"src/generated-{index}.ts" for index in range(9)]
|
|
690
|
+
)
|
|
691
|
+
)
|
|
692
|
+
self.assertFalse(
|
|
693
|
+
_has_credible_shell_wrapper_progress(
|
|
694
|
+
[
|
|
695
|
+
"app/main.ts",
|
|
696
|
+
"components/card.tsx",
|
|
697
|
+
"docs/readme.md",
|
|
698
|
+
"scripts/check.ts",
|
|
699
|
+
"tests/card.test.ts",
|
|
700
|
+
]
|
|
701
|
+
)
|
|
702
|
+
)
|
|
703
|
+
self.assertFalse(
|
|
704
|
+
_has_credible_shell_wrapper_progress(
|
|
705
|
+
[f"area{index}/" for index in range(5)]
|
|
706
|
+
)
|
|
707
|
+
)
|
|
708
|
+
|
|
709
|
+
def test_run_codex_task_recovers_instead_of_handing_noisy_wrapper_diff_to_gates(self) -> None:
|
|
710
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-wrapper-noisy-") as temp_dir:
|
|
711
|
+
repo = Path(temp_dir) / "repo"
|
|
712
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
713
|
+
(repo / "README.md").write_text("# wrapper noisy test\n", encoding="utf-8")
|
|
714
|
+
for index in range(9):
|
|
715
|
+
(repo / f"noisy-{index}.txt").write_text("baseline\n", encoding="utf-8")
|
|
716
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
717
|
+
subprocess.run(
|
|
718
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
719
|
+
cwd=repo,
|
|
720
|
+
check=True,
|
|
721
|
+
capture_output=True,
|
|
722
|
+
text=True,
|
|
723
|
+
)
|
|
724
|
+
subprocess.run(
|
|
725
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
726
|
+
cwd=repo,
|
|
727
|
+
check=True,
|
|
728
|
+
capture_output=True,
|
|
729
|
+
text=True,
|
|
730
|
+
)
|
|
731
|
+
subprocess.run(["git", "add", "."], cwd=repo, check=True, capture_output=True, text=True)
|
|
732
|
+
subprocess.run(
|
|
733
|
+
["git", "commit", "-m", "chore: seed wrapper noisy repo"],
|
|
734
|
+
cwd=repo,
|
|
735
|
+
check=True,
|
|
736
|
+
capture_output=True,
|
|
737
|
+
text=True,
|
|
738
|
+
)
|
|
739
|
+
|
|
740
|
+
stub_path = Path(temp_dir) / "fake_codex_wrapper_noisy.py"
|
|
741
|
+
stub_path.write_text(
|
|
742
|
+
"\n".join(
|
|
743
|
+
[
|
|
744
|
+
"from pathlib import Path",
|
|
745
|
+
"import sys",
|
|
746
|
+
"import time",
|
|
747
|
+
"",
|
|
748
|
+
"argv = sys.argv[1:]",
|
|
749
|
+
"last_message_path = None",
|
|
750
|
+
"for index, arg in enumerate(argv):",
|
|
751
|
+
" if arg == '--output-last-message' and index + 1 < len(argv):",
|
|
752
|
+
" last_message_path = argv[index + 1]",
|
|
753
|
+
" break",
|
|
754
|
+
"",
|
|
755
|
+
"prompt = sys.stdin.read()",
|
|
756
|
+
"if 'Command-router recovery:' in prompt:",
|
|
757
|
+
" Path('src').mkdir(exist_ok=True)",
|
|
758
|
+
" Path('src/recovered.txt').write_text('direct recovery\\n', encoding='utf-8')",
|
|
759
|
+
" if last_message_path:",
|
|
760
|
+
" Path(last_message_path).write_text(",
|
|
761
|
+
" 'Recovered after noisy shell-wrapper path detection using direct commands.',",
|
|
762
|
+
" encoding='utf-8',",
|
|
763
|
+
" )",
|
|
764
|
+
" print('item.completed | Recovered with direct-command guidance.', flush=True)",
|
|
765
|
+
" sys.exit(0)",
|
|
766
|
+
"",
|
|
767
|
+
"for index in range(9):",
|
|
768
|
+
" Path(f'noisy-{index}.txt').write_text('noisy path\\n', encoding='utf-8')",
|
|
769
|
+
"for line in (",
|
|
770
|
+
" 'error=exec_command failed for `/bin/bash -lc pwd`: CreateProcess { message: \"Rejected\" }',",
|
|
771
|
+
" 'error=exec_command failed for `/bin/bash -lc \\'git status --porcelain\\'`: CreateProcess { message: \"Rejected\" }',",
|
|
772
|
+
" 'error=exec_command failed for `/bin/bash -lc \\'sed -n 1,40p README.md\\'`: CreateProcess { message: \"Rejected\" }',",
|
|
773
|
+
"):",
|
|
774
|
+
" print(line, file=sys.stderr, flush=True)",
|
|
775
|
+
"time.sleep(10)",
|
|
776
|
+
]
|
|
777
|
+
),
|
|
778
|
+
encoding="utf-8",
|
|
779
|
+
)
|
|
780
|
+
|
|
781
|
+
env_overrides = {
|
|
782
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
783
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
784
|
+
"OPENAI_API_KEY": "pushpals-wrapper-noisy-test-key",
|
|
785
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "10",
|
|
786
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
787
|
+
}
|
|
788
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
789
|
+
result = _run_codex_task(
|
|
790
|
+
str(repo),
|
|
791
|
+
"Recover from a shell-wrapper loop after noisy repo changes.",
|
|
792
|
+
[],
|
|
793
|
+
)
|
|
794
|
+
|
|
795
|
+
self.assertTrue(result.get("ok"), result)
|
|
796
|
+
stdout = str(result.get("stdout") or "")
|
|
797
|
+
self.assertIn("Recovered after Codex attempts hit command-router shell-wrapper rejections.", stdout)
|
|
798
|
+
self.assertIn("Recovered after noisy shell-wrapper path detection", stdout)
|
|
799
|
+
self.assertNotIn("ValidationGate/CriticGate", stdout)
|
|
800
|
+
|
|
675
801
|
def test_run_codex_task_hands_changed_worktree_to_gates_after_timeout(self) -> None:
|
|
676
802
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-timeout-changed-") as temp_dir:
|
|
677
803
|
repo = Path(temp_dir) / "repo"
|
|
@@ -749,6 +875,145 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
749
875
|
self.assertIn("src/", str(result.get("stdout") or ""))
|
|
750
876
|
self.assertIn("Made a small patch before timeout", str(result.get("stdout") or ""))
|
|
751
877
|
|
|
878
|
+
def test_run_codex_task_rejects_broad_timeout_partial_patch(self) -> None:
|
|
879
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-timeout-noisy-") as temp_dir:
|
|
880
|
+
repo = Path(temp_dir) / "repo"
|
|
881
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
882
|
+
(repo / "README.md").write_text("# timeout noisy repo\n", encoding="utf-8")
|
|
883
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
884
|
+
subprocess.run(
|
|
885
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
886
|
+
cwd=repo,
|
|
887
|
+
check=True,
|
|
888
|
+
capture_output=True,
|
|
889
|
+
text=True,
|
|
890
|
+
)
|
|
891
|
+
subprocess.run(
|
|
892
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
893
|
+
cwd=repo,
|
|
894
|
+
check=True,
|
|
895
|
+
capture_output=True,
|
|
896
|
+
text=True,
|
|
897
|
+
)
|
|
898
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
899
|
+
subprocess.run(
|
|
900
|
+
["git", "commit", "-m", "chore: seed timeout noisy repo"],
|
|
901
|
+
cwd=repo,
|
|
902
|
+
check=True,
|
|
903
|
+
capture_output=True,
|
|
904
|
+
text=True,
|
|
905
|
+
)
|
|
906
|
+
|
|
907
|
+
stub_path = Path(temp_dir) / "fake_codex_timeout_noisy.py"
|
|
908
|
+
stub_path.write_text(
|
|
909
|
+
"\n".join(
|
|
910
|
+
[
|
|
911
|
+
"from pathlib import Path",
|
|
912
|
+
"import sys",
|
|
913
|
+
"import time",
|
|
914
|
+
"",
|
|
915
|
+
"sys.stdin.read()",
|
|
916
|
+
"for index in range(5):",
|
|
917
|
+
" root = Path(f'area{index}')",
|
|
918
|
+
" root.mkdir(exist_ok=True)",
|
|
919
|
+
" (root / 'changed.txt').write_text('broad change before timeout\\n', encoding='utf-8')",
|
|
920
|
+
"print('item.completed | Touched a broad set of files before timeout.', flush=True)",
|
|
921
|
+
"time.sleep(5)",
|
|
922
|
+
]
|
|
923
|
+
),
|
|
924
|
+
encoding="utf-8",
|
|
925
|
+
)
|
|
926
|
+
|
|
927
|
+
env_overrides = {
|
|
928
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
929
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
930
|
+
"OPENAI_API_KEY": "pushpals-timeout-noisy-test-key",
|
|
931
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "1",
|
|
932
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "0",
|
|
933
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
934
|
+
}
|
|
935
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
936
|
+
result = _run_codex_task(
|
|
937
|
+
str(repo),
|
|
938
|
+
"Create a broad unfocused patch, then continue thinking too long.",
|
|
939
|
+
[],
|
|
940
|
+
)
|
|
941
|
+
|
|
942
|
+
self.assertFalse(result.get("ok"), result)
|
|
943
|
+
self.assertEqual(result.get("exitCode"), 124)
|
|
944
|
+
self.assertIn("broad/noisy publishable-looking changes", str(result.get("summary") or ""))
|
|
945
|
+
self.assertIn("too broad/noisy", str(result.get("stderr") or ""))
|
|
946
|
+
self.assertIn("area0", str(result.get("stderr") or ""))
|
|
947
|
+
|
|
948
|
+
def test_run_codex_task_timeout_ignores_broad_dirty_baseline(self) -> None:
|
|
949
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-timeout-dirty-baseline-") as temp_dir:
|
|
950
|
+
repo = Path(temp_dir) / "repo"
|
|
951
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
952
|
+
(repo / "README.md").write_text("# timeout dirty baseline repo\n", encoding="utf-8")
|
|
953
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
954
|
+
subprocess.run(
|
|
955
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
956
|
+
cwd=repo,
|
|
957
|
+
check=True,
|
|
958
|
+
capture_output=True,
|
|
959
|
+
text=True,
|
|
960
|
+
)
|
|
961
|
+
subprocess.run(
|
|
962
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
963
|
+
cwd=repo,
|
|
964
|
+
check=True,
|
|
965
|
+
capture_output=True,
|
|
966
|
+
text=True,
|
|
967
|
+
)
|
|
968
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
969
|
+
subprocess.run(
|
|
970
|
+
["git", "commit", "-m", "chore: seed timeout dirty baseline repo"],
|
|
971
|
+
cwd=repo,
|
|
972
|
+
check=True,
|
|
973
|
+
capture_output=True,
|
|
974
|
+
text=True,
|
|
975
|
+
)
|
|
976
|
+
for index in range(5):
|
|
977
|
+
root = repo / f"area{index}"
|
|
978
|
+
root.mkdir(exist_ok=True)
|
|
979
|
+
(root / "changed.txt").write_text("pre-existing dirty change\n", encoding="utf-8")
|
|
980
|
+
|
|
981
|
+
stub_path = Path(temp_dir) / "fake_codex_timeout_dirty_baseline.py"
|
|
982
|
+
stub_path.write_text(
|
|
983
|
+
"\n".join(
|
|
984
|
+
[
|
|
985
|
+
"import sys",
|
|
986
|
+
"import time",
|
|
987
|
+
"",
|
|
988
|
+
"sys.stdin.read()",
|
|
989
|
+
"print('item.completed | Still thinking without changing baseline files.', flush=True)",
|
|
990
|
+
"time.sleep(5)",
|
|
991
|
+
]
|
|
992
|
+
),
|
|
993
|
+
encoding="utf-8",
|
|
994
|
+
)
|
|
995
|
+
|
|
996
|
+
env_overrides = {
|
|
997
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
998
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
999
|
+
"OPENAI_API_KEY": "pushpals-timeout-dirty-baseline-test-key",
|
|
1000
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "1",
|
|
1001
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "0",
|
|
1002
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
1003
|
+
}
|
|
1004
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
1005
|
+
result = _run_codex_task(
|
|
1006
|
+
str(repo),
|
|
1007
|
+
"Make a compact scoped patch, then continue thinking too long.",
|
|
1008
|
+
[],
|
|
1009
|
+
)
|
|
1010
|
+
|
|
1011
|
+
self.assertFalse(result.get("ok"), result)
|
|
1012
|
+
self.assertEqual(result.get("exitCode"), 124)
|
|
1013
|
+
self.assertIn("execution timed out", str(result.get("summary") or ""))
|
|
1014
|
+
self.assertNotIn("broad/noisy", str(result.get("summary") or ""))
|
|
1015
|
+
self.assertNotIn("too broad/noisy", str(result.get("stderr") or ""))
|
|
1016
|
+
|
|
752
1017
|
def test_run_codex_task_retries_once_when_no_edit_watchdog_fires(self) -> None:
|
|
753
1018
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-watchdog-") as temp_dir:
|
|
754
1019
|
repo = Path(temp_dir) / "repo"
|
|
@@ -892,6 +1157,95 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
892
1157
|
self.assertEqual(result.get("exitCode"), 124)
|
|
893
1158
|
self.assertIn("no publishable changes", str(result.get("summary") or ""))
|
|
894
1159
|
|
|
1160
|
+
def test_run_codex_task_no_edit_watchdog_rechecks_transient_publishable_progress(self) -> None:
|
|
1161
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-recheck-") as temp_dir:
|
|
1162
|
+
repo = Path(temp_dir) / "repo"
|
|
1163
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
1164
|
+
(repo / "README.md").write_text("# no edit recheck repo\n", encoding="utf-8")
|
|
1165
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1166
|
+
subprocess.run(
|
|
1167
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
1168
|
+
cwd=repo,
|
|
1169
|
+
check=True,
|
|
1170
|
+
capture_output=True,
|
|
1171
|
+
text=True,
|
|
1172
|
+
)
|
|
1173
|
+
subprocess.run(
|
|
1174
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
1175
|
+
cwd=repo,
|
|
1176
|
+
check=True,
|
|
1177
|
+
capture_output=True,
|
|
1178
|
+
text=True,
|
|
1179
|
+
)
|
|
1180
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1181
|
+
subprocess.run(
|
|
1182
|
+
["git", "commit", "-m", "chore: seed no-edit recheck repo"],
|
|
1183
|
+
cwd=repo,
|
|
1184
|
+
check=True,
|
|
1185
|
+
capture_output=True,
|
|
1186
|
+
text=True,
|
|
1187
|
+
)
|
|
1188
|
+
|
|
1189
|
+
stub_path = Path(temp_dir) / "fake_codex_no_edit_recheck.py"
|
|
1190
|
+
stub_path.write_text(
|
|
1191
|
+
"\n".join(
|
|
1192
|
+
[
|
|
1193
|
+
"from pathlib import Path",
|
|
1194
|
+
"import sys",
|
|
1195
|
+
"import time",
|
|
1196
|
+
"",
|
|
1197
|
+
"argv = sys.argv[1:]",
|
|
1198
|
+
"last_message_path = None",
|
|
1199
|
+
"for index, arg in enumerate(argv):",
|
|
1200
|
+
" if arg == '--output-last-message' and index + 1 < len(argv):",
|
|
1201
|
+
" last_message_path = argv[index + 1]",
|
|
1202
|
+
" break",
|
|
1203
|
+
"",
|
|
1204
|
+
"prompt = sys.stdin.read()",
|
|
1205
|
+
"if 'No-edit watchdog recovery' in prompt:",
|
|
1206
|
+
" Path('src').mkdir(exist_ok=True)",
|
|
1207
|
+
" Path('src/no-edit-recheck-retry.txt').write_text('patched after recheck\\n', encoding='utf-8')",
|
|
1208
|
+
" if last_message_path:",
|
|
1209
|
+
" Path(last_message_path).write_text('Patched after transient no-edit recheck.', encoding='utf-8')",
|
|
1210
|
+
" print('item.completed | Patched after transient no-edit recheck.', flush=True)",
|
|
1211
|
+
" sys.exit(0)",
|
|
1212
|
+
"",
|
|
1213
|
+
"Path('src').mkdir(exist_ok=True)",
|
|
1214
|
+
"transient = Path('src/transient-progress.txt')",
|
|
1215
|
+
"transient.write_text('temporary progress\\n', encoding='utf-8')",
|
|
1216
|
+
"print('item.completed | Created transient publishable progress.', flush=True)",
|
|
1217
|
+
"time.sleep(1.4)",
|
|
1218
|
+
"transient.unlink()",
|
|
1219
|
+
"Path('node_modules').mkdir(exist_ok=True)",
|
|
1220
|
+
"Path('node_modules/linked.txt').write_text('artifact only\\n', encoding='utf-8')",
|
|
1221
|
+
"print('item.completed | Lost patch while still thinking.', flush=True)",
|
|
1222
|
+
"time.sleep(10)",
|
|
1223
|
+
]
|
|
1224
|
+
),
|
|
1225
|
+
encoding="utf-8",
|
|
1226
|
+
)
|
|
1227
|
+
|
|
1228
|
+
env_overrides = {
|
|
1229
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
1230
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
1231
|
+
"OPENAI_API_KEY": "pushpals-no-edit-recheck-test-key",
|
|
1232
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
|
|
1233
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
|
|
1234
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_RECHECK_S": "1",
|
|
1235
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
1236
|
+
}
|
|
1237
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
1238
|
+
result = _run_codex_task(
|
|
1239
|
+
str(repo),
|
|
1240
|
+
"Polish the first-entry home shell with a compact visual patch.",
|
|
1241
|
+
[],
|
|
1242
|
+
)
|
|
1243
|
+
|
|
1244
|
+
self.assertTrue(result.get("ok"), result)
|
|
1245
|
+
self.assertEqual(result.get("exitCode"), 0)
|
|
1246
|
+
self.assertIn("Patched after transient no-edit recheck", str(result.get("stdout") or ""))
|
|
1247
|
+
self.assertIn("src/", str(result.get("stdout") or ""))
|
|
1248
|
+
|
|
895
1249
|
def test_codex_changed_paths_filters_dependency_artifacts_from_publishable_delta(self) -> None:
|
|
896
1250
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-artifact-delta-") as temp_dir:
|
|
897
1251
|
repo = Path(temp_dir) / "repo"
|
|
@@ -931,6 +1285,86 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
931
1285
|
self.assertGreaterEqual(len(delta), 2)
|
|
932
1286
|
self.assertEqual(effective, [])
|
|
933
1287
|
|
|
1288
|
+
def test_codex_changed_paths_ignores_publishable_paths_dirty_at_baseline(self) -> None:
|
|
1289
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-dirty-baseline-") as temp_dir:
|
|
1290
|
+
repo = Path(temp_dir) / "repo"
|
|
1291
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
1292
|
+
(repo / "README.md").write_text("# dirty baseline repo\n", encoding="utf-8")
|
|
1293
|
+
(repo / "src").mkdir()
|
|
1294
|
+
(repo / "src" / "existing.ts").write_text("export const value = 1;\n", encoding="utf-8")
|
|
1295
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1296
|
+
subprocess.run(
|
|
1297
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
1298
|
+
cwd=repo,
|
|
1299
|
+
check=True,
|
|
1300
|
+
capture_output=True,
|
|
1301
|
+
text=True,
|
|
1302
|
+
)
|
|
1303
|
+
subprocess.run(
|
|
1304
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
1305
|
+
cwd=repo,
|
|
1306
|
+
check=True,
|
|
1307
|
+
capture_output=True,
|
|
1308
|
+
text=True,
|
|
1309
|
+
)
|
|
1310
|
+
subprocess.run(["git", "add", "."], cwd=repo, check=True, capture_output=True, text=True)
|
|
1311
|
+
subprocess.run(
|
|
1312
|
+
["git", "commit", "-m", "chore: seed dirty baseline repo"],
|
|
1313
|
+
cwd=repo,
|
|
1314
|
+
check=True,
|
|
1315
|
+
capture_output=True,
|
|
1316
|
+
text=True,
|
|
1317
|
+
)
|
|
1318
|
+
(repo / "README.md").write_text("# dirty baseline repo\n\npre-existing edit\n", encoding="utf-8")
|
|
1319
|
+
(repo / "src" / "existing.ts").write_text("export const value = 2;\n", encoding="utf-8")
|
|
1320
|
+
baseline = _capture_git_change_snapshot(str(repo))
|
|
1321
|
+
|
|
1322
|
+
changed_paths, delta, effective = _codex_changed_paths(str(repo), baseline)
|
|
1323
|
+
|
|
1324
|
+
self.assertIn("README.md", changed_paths)
|
|
1325
|
+
self.assertEqual(delta, [])
|
|
1326
|
+
self.assertEqual(effective, [])
|
|
1327
|
+
|
|
1328
|
+
def test_codex_changed_paths_counts_worker_edits_to_dirty_baseline_paths(self) -> None:
|
|
1329
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-dirty-baseline-mutated-") as temp_dir:
|
|
1330
|
+
repo = Path(temp_dir) / "repo"
|
|
1331
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
1332
|
+
(repo / "README.md").write_text("# dirty baseline mutation repo\n", encoding="utf-8")
|
|
1333
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1334
|
+
subprocess.run(
|
|
1335
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
1336
|
+
cwd=repo,
|
|
1337
|
+
check=True,
|
|
1338
|
+
capture_output=True,
|
|
1339
|
+
text=True,
|
|
1340
|
+
)
|
|
1341
|
+
subprocess.run(
|
|
1342
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
1343
|
+
cwd=repo,
|
|
1344
|
+
check=True,
|
|
1345
|
+
capture_output=True,
|
|
1346
|
+
text=True,
|
|
1347
|
+
)
|
|
1348
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1349
|
+
subprocess.run(
|
|
1350
|
+
["git", "commit", "-m", "chore: seed dirty baseline mutation repo"],
|
|
1351
|
+
cwd=repo,
|
|
1352
|
+
check=True,
|
|
1353
|
+
capture_output=True,
|
|
1354
|
+
text=True,
|
|
1355
|
+
)
|
|
1356
|
+
(repo / "README.md").write_text("# dirty baseline mutation repo\n\npre-existing edit\n", encoding="utf-8")
|
|
1357
|
+
baseline = _capture_git_change_snapshot(str(repo))
|
|
1358
|
+
(repo / "README.md").write_text(
|
|
1359
|
+
"# dirty baseline mutation repo\n\npre-existing edit\nworker edit\n",
|
|
1360
|
+
encoding="utf-8",
|
|
1361
|
+
)
|
|
1362
|
+
|
|
1363
|
+
_, delta, effective = _codex_changed_paths(str(repo), baseline)
|
|
1364
|
+
|
|
1365
|
+
self.assertEqual(delta, ["README.md"])
|
|
1366
|
+
self.assertEqual(effective, ["README.md"])
|
|
1367
|
+
|
|
934
1368
|
def test_non_publishable_path_summary_names_artifact_only_dirty_paths(self) -> None:
|
|
935
1369
|
changed_paths = [
|
|
936
1370
|
"node_modules/react/index.js",
|
|
@@ -1081,6 +1515,77 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1081
1515
|
self.assertIn("Patched after rollout coach guidance", str(result.get("stdout") or ""))
|
|
1082
1516
|
self.assertIn("scripts/", str(result.get("stdout") or ""))
|
|
1083
1517
|
|
|
1518
|
+
def test_run_codex_task_rollout_coach_fails_fast_on_broad_small_task_changes(self) -> None:
|
|
1519
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-rollout-noisy-") as temp_dir:
|
|
1520
|
+
repo = Path(temp_dir) / "repo"
|
|
1521
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
1522
|
+
(repo / "README.md").write_text("# rollout noisy repo\n", encoding="utf-8")
|
|
1523
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1524
|
+
subprocess.run(
|
|
1525
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
1526
|
+
cwd=repo,
|
|
1527
|
+
check=True,
|
|
1528
|
+
capture_output=True,
|
|
1529
|
+
text=True,
|
|
1530
|
+
)
|
|
1531
|
+
subprocess.run(
|
|
1532
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
1533
|
+
cwd=repo,
|
|
1534
|
+
check=True,
|
|
1535
|
+
capture_output=True,
|
|
1536
|
+
text=True,
|
|
1537
|
+
)
|
|
1538
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1539
|
+
subprocess.run(
|
|
1540
|
+
["git", "commit", "-m", "chore: seed rollout noisy repo"],
|
|
1541
|
+
cwd=repo,
|
|
1542
|
+
check=True,
|
|
1543
|
+
capture_output=True,
|
|
1544
|
+
text=True,
|
|
1545
|
+
)
|
|
1546
|
+
|
|
1547
|
+
stub_path = Path(temp_dir) / "fake_codex_rollout_noisy.py"
|
|
1548
|
+
stub_path.write_text(
|
|
1549
|
+
"\n".join(
|
|
1550
|
+
[
|
|
1551
|
+
"from pathlib import Path",
|
|
1552
|
+
"import sys",
|
|
1553
|
+
"import time",
|
|
1554
|
+
"",
|
|
1555
|
+
"sys.stdin.read()",
|
|
1556
|
+
"for index in range(5):",
|
|
1557
|
+
" root = Path(f'area{index}')",
|
|
1558
|
+
" root.mkdir(exist_ok=True)",
|
|
1559
|
+
" (root / 'changed.txt').write_text('broad rollout change\\n', encoding='utf-8')",
|
|
1560
|
+
"print('item.completed | Made broad edits for a supposedly small task.', flush=True)",
|
|
1561
|
+
"time.sleep(10)",
|
|
1562
|
+
]
|
|
1563
|
+
),
|
|
1564
|
+
encoding="utf-8",
|
|
1565
|
+
)
|
|
1566
|
+
|
|
1567
|
+
env_overrides = {
|
|
1568
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
1569
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
1570
|
+
"OPENAI_API_KEY": "pushpals-rollout-noisy-test-key",
|
|
1571
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "700",
|
|
1572
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "10",
|
|
1573
|
+
"WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S": "1",
|
|
1574
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
1575
|
+
}
|
|
1576
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
1577
|
+
result = _run_codex_task(
|
|
1578
|
+
str(repo),
|
|
1579
|
+
"Make a small low-risk repo-native patch.",
|
|
1580
|
+
[],
|
|
1581
|
+
)
|
|
1582
|
+
|
|
1583
|
+
self.assertFalse(result.get("ok"), result)
|
|
1584
|
+
self.assertEqual(result.get("exitCode"), 124)
|
|
1585
|
+
self.assertIn("rollout coach", str(result.get("summary") or ""))
|
|
1586
|
+
self.assertIn("broad/noisy", str(result.get("stderr") or ""))
|
|
1587
|
+
self.assertIn("area0", str(result.get("stderr") or ""))
|
|
1588
|
+
|
|
1084
1589
|
def test_run_codex_task_timeout_reports_artifact_only_changes(self) -> None:
|
|
1085
1590
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-artifact-timeout-") as temp_dir:
|
|
1086
1591
|
repo = Path(temp_dir) / "repo"
|
|
@@ -18,6 +18,74 @@ export interface JobPublishBlockedInfo {
|
|
|
18
18
|
stage: "sync" | "push";
|
|
19
19
|
}
|
|
20
20
|
|
|
21
|
+
export interface JobDiagnosticAttempt {
|
|
22
|
+
attempt: number;
|
|
23
|
+
workerId?: string | null;
|
|
24
|
+
backend?: string | null;
|
|
25
|
+
model?: string | null;
|
|
26
|
+
startedAt?: string | null;
|
|
27
|
+
finishedAt?: string | null;
|
|
28
|
+
durationMs?: number | null;
|
|
29
|
+
terminalReason?: string | null;
|
|
30
|
+
exitCode?: number | null;
|
|
31
|
+
metadata?: Record<string, unknown>;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface JobTerminalDiagnostics {
|
|
35
|
+
failureClass?: string | null;
|
|
36
|
+
terminalStage?: string | null;
|
|
37
|
+
executorBackend?: string | null;
|
|
38
|
+
summary?: string | null;
|
|
39
|
+
watchdogFired?: boolean;
|
|
40
|
+
timeoutMs?: number | null;
|
|
41
|
+
publishableFileCount?: number | null;
|
|
42
|
+
artifactOnlyPathCount?: number | null;
|
|
43
|
+
changedPathSample?: string[];
|
|
44
|
+
metadata?: Record<string, unknown>;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export interface JobPhaseSpanDiagnostics {
|
|
48
|
+
attempt?: number | null;
|
|
49
|
+
phase: string;
|
|
50
|
+
startedAt: string;
|
|
51
|
+
finishedAt: string;
|
|
52
|
+
durationMs: number;
|
|
53
|
+
outcome?: string | null;
|
|
54
|
+
metadata?: Record<string, unknown>;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export interface JobValidationRunDiagnostics {
|
|
58
|
+
attempt?: number | null;
|
|
59
|
+
command: string;
|
|
60
|
+
exitCode?: number | null;
|
|
61
|
+
durationMs?: number | null;
|
|
62
|
+
passed: boolean;
|
|
63
|
+
failureClass?: string | null;
|
|
64
|
+
stdoutTail?: string | null;
|
|
65
|
+
stderrTail?: string | null;
|
|
66
|
+
metadata?: Record<string, unknown>;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export interface JobPatchSnapshotDiagnostics {
|
|
70
|
+
attempt?: number | null;
|
|
71
|
+
phase?: string | null;
|
|
72
|
+
publishableFileCount?: number | null;
|
|
73
|
+
artifactOnlyPathCount?: number | null;
|
|
74
|
+
changedPathSample?: string[];
|
|
75
|
+
topLevelDirs?: string[];
|
|
76
|
+
capturedAt?: string | null;
|
|
77
|
+
metadata?: Record<string, unknown>;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export interface JobDiagnostics {
|
|
81
|
+
attempts?: JobDiagnosticAttempt[];
|
|
82
|
+
terminal?: JobTerminalDiagnostics;
|
|
83
|
+
phaseSpans?: JobPhaseSpanDiagnostics[];
|
|
84
|
+
validationRuns?: JobValidationRunDiagnostics[];
|
|
85
|
+
patchSnapshots?: JobPatchSnapshotDiagnostics[];
|
|
86
|
+
metadata?: Record<string, unknown>;
|
|
87
|
+
}
|
|
88
|
+
|
|
21
89
|
export interface JobResult {
|
|
22
90
|
ok: boolean;
|
|
23
91
|
summary: string;
|
|
@@ -26,4 +94,5 @@ export interface JobResult {
|
|
|
26
94
|
exitCode?: number;
|
|
27
95
|
usage?: JobTokenUsage;
|
|
28
96
|
publishBlocked?: JobPublishBlockedInfo;
|
|
97
|
+
diagnostics?: JobDiagnostics;
|
|
29
98
|
}
|