@pushpalsdev/cli 1.1.20 → 1.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pushpals-cli.js +25 -1
- package/package.json +1 -1
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +161 -24
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +355 -0
- package/runtime/sandbox/apps/workerpals/src/common/generic_python_executor.ts +45 -3
- package/runtime/sandbox/apps/workerpals/src/common/types.ts +69 -0
- package/runtime/sandbox/apps/workerpals/src/docker_executor.ts +75 -16
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +557 -57
- package/runtime/sandbox/apps/workerpals/src/job_runner.ts +3 -0
- package/runtime/sandbox/apps/workerpals/src/merge_conflict_job.ts +9 -0
- package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +131 -3
|
@@ -40,6 +40,7 @@ from openai_codex_executor import (
|
|
|
40
40
|
_detect_offtrack_rollout,
|
|
41
41
|
_detect_codex_workaround_signal,
|
|
42
42
|
_extract_usage_counts,
|
|
43
|
+
_has_credible_shell_wrapper_progress,
|
|
43
44
|
_load_prompt_template,
|
|
44
45
|
_mask_repo_local_codex_files,
|
|
45
46
|
_repo_root_for_prompt_loading,
|
|
@@ -672,6 +673,130 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
672
673
|
self.assertIn("src/", str(result.get("stdout") or ""))
|
|
673
674
|
self.assertNotIn("Recovered after Codex attempts", str(result.get("stdout") or ""))
|
|
674
675
|
|
|
676
|
+
def test_shell_wrapper_progress_guard_rejects_broad_noisy_path_sets(self) -> None:
|
|
677
|
+
self.assertTrue(
|
|
678
|
+
_has_credible_shell_wrapper_progress(
|
|
679
|
+
[
|
|
680
|
+
"src/change.ts",
|
|
681
|
+
"src/change.test.ts",
|
|
682
|
+
"docs/change.md",
|
|
683
|
+
]
|
|
684
|
+
)
|
|
685
|
+
)
|
|
686
|
+
self.assertFalse(
|
|
687
|
+
_has_credible_shell_wrapper_progress(
|
|
688
|
+
[f"src/generated-{index}.ts" for index in range(9)]
|
|
689
|
+
)
|
|
690
|
+
)
|
|
691
|
+
self.assertFalse(
|
|
692
|
+
_has_credible_shell_wrapper_progress(
|
|
693
|
+
[
|
|
694
|
+
"app/main.ts",
|
|
695
|
+
"components/card.tsx",
|
|
696
|
+
"docs/readme.md",
|
|
697
|
+
"scripts/check.ts",
|
|
698
|
+
"tests/card.test.ts",
|
|
699
|
+
]
|
|
700
|
+
)
|
|
701
|
+
)
|
|
702
|
+
self.assertFalse(
|
|
703
|
+
_has_credible_shell_wrapper_progress(
|
|
704
|
+
[f"area{index}/" for index in range(5)]
|
|
705
|
+
)
|
|
706
|
+
)
|
|
707
|
+
|
|
708
|
+
def test_run_codex_task_recovers_instead_of_handing_noisy_wrapper_diff_to_gates(self) -> None:
|
|
709
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-wrapper-noisy-") as temp_dir:
|
|
710
|
+
repo = Path(temp_dir) / "repo"
|
|
711
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
712
|
+
(repo / "README.md").write_text("# wrapper noisy test\n", encoding="utf-8")
|
|
713
|
+
for index in range(9):
|
|
714
|
+
(repo / f"noisy-{index}.txt").write_text("baseline\n", encoding="utf-8")
|
|
715
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
716
|
+
subprocess.run(
|
|
717
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
718
|
+
cwd=repo,
|
|
719
|
+
check=True,
|
|
720
|
+
capture_output=True,
|
|
721
|
+
text=True,
|
|
722
|
+
)
|
|
723
|
+
subprocess.run(
|
|
724
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
725
|
+
cwd=repo,
|
|
726
|
+
check=True,
|
|
727
|
+
capture_output=True,
|
|
728
|
+
text=True,
|
|
729
|
+
)
|
|
730
|
+
subprocess.run(["git", "add", "."], cwd=repo, check=True, capture_output=True, text=True)
|
|
731
|
+
subprocess.run(
|
|
732
|
+
["git", "commit", "-m", "chore: seed wrapper noisy repo"],
|
|
733
|
+
cwd=repo,
|
|
734
|
+
check=True,
|
|
735
|
+
capture_output=True,
|
|
736
|
+
text=True,
|
|
737
|
+
)
|
|
738
|
+
|
|
739
|
+
stub_path = Path(temp_dir) / "fake_codex_wrapper_noisy.py"
|
|
740
|
+
stub_path.write_text(
|
|
741
|
+
"\n".join(
|
|
742
|
+
[
|
|
743
|
+
"from pathlib import Path",
|
|
744
|
+
"import sys",
|
|
745
|
+
"import time",
|
|
746
|
+
"",
|
|
747
|
+
"argv = sys.argv[1:]",
|
|
748
|
+
"last_message_path = None",
|
|
749
|
+
"for index, arg in enumerate(argv):",
|
|
750
|
+
" if arg == '--output-last-message' and index + 1 < len(argv):",
|
|
751
|
+
" last_message_path = argv[index + 1]",
|
|
752
|
+
" break",
|
|
753
|
+
"",
|
|
754
|
+
"prompt = sys.stdin.read()",
|
|
755
|
+
"if 'Command-router recovery:' in prompt:",
|
|
756
|
+
" Path('src').mkdir(exist_ok=True)",
|
|
757
|
+
" Path('src/recovered.txt').write_text('direct recovery\\n', encoding='utf-8')",
|
|
758
|
+
" if last_message_path:",
|
|
759
|
+
" Path(last_message_path).write_text(",
|
|
760
|
+
" 'Recovered after noisy shell-wrapper path detection using direct commands.',",
|
|
761
|
+
" encoding='utf-8',",
|
|
762
|
+
" )",
|
|
763
|
+
" print('item.completed | Recovered with direct-command guidance.', flush=True)",
|
|
764
|
+
" sys.exit(0)",
|
|
765
|
+
"",
|
|
766
|
+
"for index in range(9):",
|
|
767
|
+
" Path(f'noisy-{index}.txt').write_text('noisy path\\n', encoding='utf-8')",
|
|
768
|
+
"for line in (",
|
|
769
|
+
" 'error=exec_command failed for `/bin/bash -lc pwd`: CreateProcess { message: \"Rejected\" }',",
|
|
770
|
+
" 'error=exec_command failed for `/bin/bash -lc \\'git status --porcelain\\'`: CreateProcess { message: \"Rejected\" }',",
|
|
771
|
+
" 'error=exec_command failed for `/bin/bash -lc \\'sed -n 1,40p README.md\\'`: CreateProcess { message: \"Rejected\" }',",
|
|
772
|
+
"):",
|
|
773
|
+
" print(line, file=sys.stderr, flush=True)",
|
|
774
|
+
"time.sleep(10)",
|
|
775
|
+
]
|
|
776
|
+
),
|
|
777
|
+
encoding="utf-8",
|
|
778
|
+
)
|
|
779
|
+
|
|
780
|
+
env_overrides = {
|
|
781
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
782
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
783
|
+
"OPENAI_API_KEY": "pushpals-wrapper-noisy-test-key",
|
|
784
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "10",
|
|
785
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
786
|
+
}
|
|
787
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
788
|
+
result = _run_codex_task(
|
|
789
|
+
str(repo),
|
|
790
|
+
"Recover from a shell-wrapper loop after noisy repo changes.",
|
|
791
|
+
[],
|
|
792
|
+
)
|
|
793
|
+
|
|
794
|
+
self.assertTrue(result.get("ok"), result)
|
|
795
|
+
stdout = str(result.get("stdout") or "")
|
|
796
|
+
self.assertIn("Recovered after Codex attempts hit command-router shell-wrapper rejections.", stdout)
|
|
797
|
+
self.assertIn("Recovered after noisy shell-wrapper path detection", stdout)
|
|
798
|
+
self.assertNotIn("ValidationGate/CriticGate", stdout)
|
|
799
|
+
|
|
675
800
|
def test_run_codex_task_hands_changed_worktree_to_gates_after_timeout(self) -> None:
|
|
676
801
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-timeout-changed-") as temp_dir:
|
|
677
802
|
repo = Path(temp_dir) / "repo"
|
|
@@ -749,6 +874,76 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
749
874
|
self.assertIn("src/", str(result.get("stdout") or ""))
|
|
750
875
|
self.assertIn("Made a small patch before timeout", str(result.get("stdout") or ""))
|
|
751
876
|
|
|
877
|
+
def test_run_codex_task_rejects_broad_timeout_partial_patch(self) -> None:
|
|
878
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-timeout-noisy-") as temp_dir:
|
|
879
|
+
repo = Path(temp_dir) / "repo"
|
|
880
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
881
|
+
(repo / "README.md").write_text("# timeout noisy repo\n", encoding="utf-8")
|
|
882
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
883
|
+
subprocess.run(
|
|
884
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
885
|
+
cwd=repo,
|
|
886
|
+
check=True,
|
|
887
|
+
capture_output=True,
|
|
888
|
+
text=True,
|
|
889
|
+
)
|
|
890
|
+
subprocess.run(
|
|
891
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
892
|
+
cwd=repo,
|
|
893
|
+
check=True,
|
|
894
|
+
capture_output=True,
|
|
895
|
+
text=True,
|
|
896
|
+
)
|
|
897
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
898
|
+
subprocess.run(
|
|
899
|
+
["git", "commit", "-m", "chore: seed timeout noisy repo"],
|
|
900
|
+
cwd=repo,
|
|
901
|
+
check=True,
|
|
902
|
+
capture_output=True,
|
|
903
|
+
text=True,
|
|
904
|
+
)
|
|
905
|
+
|
|
906
|
+
stub_path = Path(temp_dir) / "fake_codex_timeout_noisy.py"
|
|
907
|
+
stub_path.write_text(
|
|
908
|
+
"\n".join(
|
|
909
|
+
[
|
|
910
|
+
"from pathlib import Path",
|
|
911
|
+
"import sys",
|
|
912
|
+
"import time",
|
|
913
|
+
"",
|
|
914
|
+
"sys.stdin.read()",
|
|
915
|
+
"for index in range(5):",
|
|
916
|
+
" root = Path(f'area{index}')",
|
|
917
|
+
" root.mkdir(exist_ok=True)",
|
|
918
|
+
" (root / 'changed.txt').write_text('broad change before timeout\\n', encoding='utf-8')",
|
|
919
|
+
"print('item.completed | Touched a broad set of files before timeout.', flush=True)",
|
|
920
|
+
"time.sleep(5)",
|
|
921
|
+
]
|
|
922
|
+
),
|
|
923
|
+
encoding="utf-8",
|
|
924
|
+
)
|
|
925
|
+
|
|
926
|
+
env_overrides = {
|
|
927
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
928
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
929
|
+
"OPENAI_API_KEY": "pushpals-timeout-noisy-test-key",
|
|
930
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "1",
|
|
931
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "0",
|
|
932
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
933
|
+
}
|
|
934
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
935
|
+
result = _run_codex_task(
|
|
936
|
+
str(repo),
|
|
937
|
+
"Create a broad unfocused patch, then continue thinking too long.",
|
|
938
|
+
[],
|
|
939
|
+
)
|
|
940
|
+
|
|
941
|
+
self.assertFalse(result.get("ok"), result)
|
|
942
|
+
self.assertEqual(result.get("exitCode"), 124)
|
|
943
|
+
self.assertIn("broad/noisy publishable-looking changes", str(result.get("summary") or ""))
|
|
944
|
+
self.assertIn("too broad/noisy", str(result.get("stderr") or ""))
|
|
945
|
+
self.assertIn("area0", str(result.get("stderr") or ""))
|
|
946
|
+
|
|
752
947
|
def test_run_codex_task_retries_once_when_no_edit_watchdog_fires(self) -> None:
|
|
753
948
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-watchdog-") as temp_dir:
|
|
754
949
|
repo = Path(temp_dir) / "repo"
|
|
@@ -892,6 +1087,95 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
892
1087
|
self.assertEqual(result.get("exitCode"), 124)
|
|
893
1088
|
self.assertIn("no publishable changes", str(result.get("summary") or ""))
|
|
894
1089
|
|
|
1090
|
+
def test_run_codex_task_no_edit_watchdog_rechecks_transient_publishable_progress(self) -> None:
|
|
1091
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-recheck-") as temp_dir:
|
|
1092
|
+
repo = Path(temp_dir) / "repo"
|
|
1093
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
1094
|
+
(repo / "README.md").write_text("# no edit recheck repo\n", encoding="utf-8")
|
|
1095
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1096
|
+
subprocess.run(
|
|
1097
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
1098
|
+
cwd=repo,
|
|
1099
|
+
check=True,
|
|
1100
|
+
capture_output=True,
|
|
1101
|
+
text=True,
|
|
1102
|
+
)
|
|
1103
|
+
subprocess.run(
|
|
1104
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
1105
|
+
cwd=repo,
|
|
1106
|
+
check=True,
|
|
1107
|
+
capture_output=True,
|
|
1108
|
+
text=True,
|
|
1109
|
+
)
|
|
1110
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1111
|
+
subprocess.run(
|
|
1112
|
+
["git", "commit", "-m", "chore: seed no-edit recheck repo"],
|
|
1113
|
+
cwd=repo,
|
|
1114
|
+
check=True,
|
|
1115
|
+
capture_output=True,
|
|
1116
|
+
text=True,
|
|
1117
|
+
)
|
|
1118
|
+
|
|
1119
|
+
stub_path = Path(temp_dir) / "fake_codex_no_edit_recheck.py"
|
|
1120
|
+
stub_path.write_text(
|
|
1121
|
+
"\n".join(
|
|
1122
|
+
[
|
|
1123
|
+
"from pathlib import Path",
|
|
1124
|
+
"import sys",
|
|
1125
|
+
"import time",
|
|
1126
|
+
"",
|
|
1127
|
+
"argv = sys.argv[1:]",
|
|
1128
|
+
"last_message_path = None",
|
|
1129
|
+
"for index, arg in enumerate(argv):",
|
|
1130
|
+
" if arg == '--output-last-message' and index + 1 < len(argv):",
|
|
1131
|
+
" last_message_path = argv[index + 1]",
|
|
1132
|
+
" break",
|
|
1133
|
+
"",
|
|
1134
|
+
"prompt = sys.stdin.read()",
|
|
1135
|
+
"if 'No-edit watchdog recovery' in prompt:",
|
|
1136
|
+
" Path('src').mkdir(exist_ok=True)",
|
|
1137
|
+
" Path('src/no-edit-recheck-retry.txt').write_text('patched after recheck\\n', encoding='utf-8')",
|
|
1138
|
+
" if last_message_path:",
|
|
1139
|
+
" Path(last_message_path).write_text('Patched after transient no-edit recheck.', encoding='utf-8')",
|
|
1140
|
+
" print('item.completed | Patched after transient no-edit recheck.', flush=True)",
|
|
1141
|
+
" sys.exit(0)",
|
|
1142
|
+
"",
|
|
1143
|
+
"Path('src').mkdir(exist_ok=True)",
|
|
1144
|
+
"transient = Path('src/transient-progress.txt')",
|
|
1145
|
+
"transient.write_text('temporary progress\\n', encoding='utf-8')",
|
|
1146
|
+
"print('item.completed | Created transient publishable progress.', flush=True)",
|
|
1147
|
+
"time.sleep(1.4)",
|
|
1148
|
+
"transient.unlink()",
|
|
1149
|
+
"Path('node_modules').mkdir(exist_ok=True)",
|
|
1150
|
+
"Path('node_modules/linked.txt').write_text('artifact only\\n', encoding='utf-8')",
|
|
1151
|
+
"print('item.completed | Lost patch while still thinking.', flush=True)",
|
|
1152
|
+
"time.sleep(10)",
|
|
1153
|
+
]
|
|
1154
|
+
),
|
|
1155
|
+
encoding="utf-8",
|
|
1156
|
+
)
|
|
1157
|
+
|
|
1158
|
+
env_overrides = {
|
|
1159
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
1160
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
1161
|
+
"OPENAI_API_KEY": "pushpals-no-edit-recheck-test-key",
|
|
1162
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
|
|
1163
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
|
|
1164
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_RECHECK_S": "1",
|
|
1165
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
1166
|
+
}
|
|
1167
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
1168
|
+
result = _run_codex_task(
|
|
1169
|
+
str(repo),
|
|
1170
|
+
"Polish the first-entry home shell with a compact visual patch.",
|
|
1171
|
+
[],
|
|
1172
|
+
)
|
|
1173
|
+
|
|
1174
|
+
self.assertTrue(result.get("ok"), result)
|
|
1175
|
+
self.assertEqual(result.get("exitCode"), 0)
|
|
1176
|
+
self.assertIn("Patched after transient no-edit recheck", str(result.get("stdout") or ""))
|
|
1177
|
+
self.assertIn("src/", str(result.get("stdout") or ""))
|
|
1178
|
+
|
|
895
1179
|
def test_codex_changed_paths_filters_dependency_artifacts_from_publishable_delta(self) -> None:
|
|
896
1180
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-artifact-delta-") as temp_dir:
|
|
897
1181
|
repo = Path(temp_dir) / "repo"
|
|
@@ -1081,6 +1365,77 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1081
1365
|
self.assertIn("Patched after rollout coach guidance", str(result.get("stdout") or ""))
|
|
1082
1366
|
self.assertIn("scripts/", str(result.get("stdout") or ""))
|
|
1083
1367
|
|
|
1368
|
+
def test_run_codex_task_rollout_coach_fails_fast_on_broad_small_task_changes(self) -> None:
|
|
1369
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-rollout-noisy-") as temp_dir:
|
|
1370
|
+
repo = Path(temp_dir) / "repo"
|
|
1371
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
1372
|
+
(repo / "README.md").write_text("# rollout noisy repo\n", encoding="utf-8")
|
|
1373
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1374
|
+
subprocess.run(
|
|
1375
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
1376
|
+
cwd=repo,
|
|
1377
|
+
check=True,
|
|
1378
|
+
capture_output=True,
|
|
1379
|
+
text=True,
|
|
1380
|
+
)
|
|
1381
|
+
subprocess.run(
|
|
1382
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
1383
|
+
cwd=repo,
|
|
1384
|
+
check=True,
|
|
1385
|
+
capture_output=True,
|
|
1386
|
+
text=True,
|
|
1387
|
+
)
|
|
1388
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1389
|
+
subprocess.run(
|
|
1390
|
+
["git", "commit", "-m", "chore: seed rollout noisy repo"],
|
|
1391
|
+
cwd=repo,
|
|
1392
|
+
check=True,
|
|
1393
|
+
capture_output=True,
|
|
1394
|
+
text=True,
|
|
1395
|
+
)
|
|
1396
|
+
|
|
1397
|
+
stub_path = Path(temp_dir) / "fake_codex_rollout_noisy.py"
|
|
1398
|
+
stub_path.write_text(
|
|
1399
|
+
"\n".join(
|
|
1400
|
+
[
|
|
1401
|
+
"from pathlib import Path",
|
|
1402
|
+
"import sys",
|
|
1403
|
+
"import time",
|
|
1404
|
+
"",
|
|
1405
|
+
"sys.stdin.read()",
|
|
1406
|
+
"for index in range(5):",
|
|
1407
|
+
" root = Path(f'area{index}')",
|
|
1408
|
+
" root.mkdir(exist_ok=True)",
|
|
1409
|
+
" (root / 'changed.txt').write_text('broad rollout change\\n', encoding='utf-8')",
|
|
1410
|
+
"print('item.completed | Made broad edits for a supposedly small task.', flush=True)",
|
|
1411
|
+
"time.sleep(10)",
|
|
1412
|
+
]
|
|
1413
|
+
),
|
|
1414
|
+
encoding="utf-8",
|
|
1415
|
+
)
|
|
1416
|
+
|
|
1417
|
+
env_overrides = {
|
|
1418
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
1419
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
1420
|
+
"OPENAI_API_KEY": "pushpals-rollout-noisy-test-key",
|
|
1421
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "700",
|
|
1422
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "10",
|
|
1423
|
+
"WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S": "1",
|
|
1424
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
1425
|
+
}
|
|
1426
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
1427
|
+
result = _run_codex_task(
|
|
1428
|
+
str(repo),
|
|
1429
|
+
"Make a small low-risk repo-native patch.",
|
|
1430
|
+
[],
|
|
1431
|
+
)
|
|
1432
|
+
|
|
1433
|
+
self.assertFalse(result.get("ok"), result)
|
|
1434
|
+
self.assertEqual(result.get("exitCode"), 124)
|
|
1435
|
+
self.assertIn("rollout coach", str(result.get("summary") or ""))
|
|
1436
|
+
self.assertIn("broad/noisy", str(result.get("stderr") or ""))
|
|
1437
|
+
self.assertIn("area0", str(result.get("stderr") or ""))
|
|
1438
|
+
|
|
1084
1439
|
def test_run_codex_task_timeout_reports_artifact_only_changes(self) -> None:
|
|
1085
1440
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-artifact-timeout-") as temp_dir:
|
|
1086
1441
|
repo = Path(temp_dir) / "repo"
|
|
@@ -29,6 +29,9 @@ interface GenericPythonExecutorConfig {
|
|
|
29
29
|
}
|
|
30
30
|
|
|
31
31
|
const BACKEND_TIMEOUT_RESULT_GRACE_MS = 30_000;
|
|
32
|
+
const OPENAI_CODEX_MIN_VALIDATION_RESERVE_MS = 180_000;
|
|
33
|
+
const OPENAI_CODEX_MAX_VALIDATION_RESERVE_MS = 600_000;
|
|
34
|
+
const OPENAI_CODEX_MIN_PRIMARY_TURN_BUDGET_MS = 600_000;
|
|
32
35
|
|
|
33
36
|
function estimateTokensFromText(text: string): number {
|
|
34
37
|
return Math.max(0, Math.ceil(String(text ?? "").length / 3));
|
|
@@ -143,6 +146,27 @@ export function resolveGenericPythonExecutorTimeoutMs(params: {
|
|
|
143
146
|
return configuredTimeoutMs;
|
|
144
147
|
}
|
|
145
148
|
|
|
149
|
+
export function resolveOpenAICodexValidationReserveMs(
|
|
150
|
+
executionBudgetMs: number | null | undefined,
|
|
151
|
+
): number {
|
|
152
|
+
if (typeof executionBudgetMs !== "number" || !Number.isFinite(executionBudgetMs)) return 0;
|
|
153
|
+
const budgetMs = Math.max(10_000, Math.floor(executionBudgetMs));
|
|
154
|
+
const targetReserveMs = Math.floor(
|
|
155
|
+
Math.min(
|
|
156
|
+
budgetMs,
|
|
157
|
+
Math.max(
|
|
158
|
+
OPENAI_CODEX_MIN_VALIDATION_RESERVE_MS,
|
|
159
|
+
Math.min(OPENAI_CODEX_MAX_VALIDATION_RESERVE_MS, budgetMs * 0.35),
|
|
160
|
+
),
|
|
161
|
+
),
|
|
162
|
+
);
|
|
163
|
+
const maxReserveAfterPrimaryTurn = Math.max(
|
|
164
|
+
0,
|
|
165
|
+
budgetMs - OPENAI_CODEX_MIN_PRIMARY_TURN_BUDGET_MS,
|
|
166
|
+
);
|
|
167
|
+
return Math.max(0, Math.min(targetReserveMs, maxReserveAfterPrimaryTurn));
|
|
168
|
+
}
|
|
169
|
+
|
|
146
170
|
export function resolveGenericPythonExecutorChildTimeoutMs(params: {
|
|
147
171
|
backendName: string;
|
|
148
172
|
hostTimeoutMs: number;
|
|
@@ -154,8 +178,11 @@ export function resolveGenericPythonExecutorChildTimeoutMs(params: {
|
|
|
154
178
|
typeof params.executionBudgetMs === "number" && Number.isFinite(params.executionBudgetMs)
|
|
155
179
|
? Math.max(10_000, Math.floor(params.executionBudgetMs))
|
|
156
180
|
: null;
|
|
181
|
+
const validationReserveMs = resolveOpenAICodexValidationReserveMs(executionBudgetMs);
|
|
157
182
|
const childBudgetMs =
|
|
158
|
-
executionBudgetMs == null
|
|
183
|
+
executionBudgetMs == null
|
|
184
|
+
? hostTimeoutMs
|
|
185
|
+
: Math.min(hostTimeoutMs, Math.max(1_000, executionBudgetMs - validationReserveMs));
|
|
159
186
|
const graceMs = Math.min(
|
|
160
187
|
BACKEND_TIMEOUT_RESULT_GRACE_MS,
|
|
161
188
|
Math.max(2_000, Math.floor(childBudgetMs / 10)),
|
|
@@ -305,15 +332,30 @@ export function createGenericPythonExecutor(
|
|
|
305
332
|
"utf-8",
|
|
306
333
|
).toString("base64");
|
|
307
334
|
const args = [pythonBin, scriptPath, payloadBase64];
|
|
308
|
-
const
|
|
335
|
+
const childTimeoutMs = resolveGenericPythonExecutorChildTimeoutMs({
|
|
309
336
|
backendName,
|
|
310
337
|
hostTimeoutMs: timeoutMs,
|
|
311
338
|
executionBudgetMs,
|
|
312
339
|
});
|
|
340
|
+
const childTimeoutEnv =
|
|
341
|
+
childTimeoutMs == null
|
|
342
|
+
? {}
|
|
343
|
+
: {
|
|
344
|
+
WORKERPALS_OPENAI_CODEX_TIMEOUT_MS: String(childTimeoutMs),
|
|
345
|
+
WORKERPALS_OPENAI_CODEX_TIMEOUT_S: String(
|
|
346
|
+
Math.max(1, Math.floor(childTimeoutMs / 1000)),
|
|
347
|
+
),
|
|
348
|
+
};
|
|
349
|
+
const childTimeoutDetail =
|
|
350
|
+
childTimeoutMs != null
|
|
351
|
+
? `; codex_child_timeout=${childTimeoutMs}ms; reserved_validation_budget=${resolveOpenAICodexValidationReserveMs(
|
|
352
|
+
executionBudgetMs,
|
|
353
|
+
)}ms`
|
|
354
|
+
: "";
|
|
313
355
|
|
|
314
356
|
onLog?.(
|
|
315
357
|
"stdout",
|
|
316
|
-
`[${backendLabel}Executor] Spawning ${backendName} executor (timeout=${timeoutMs}ms; ${timeoutDetail})`,
|
|
358
|
+
`[${backendLabel}Executor] Spawning ${backendName} executor (timeout=${timeoutMs}ms; ${timeoutDetail}${childTimeoutDetail})`,
|
|
317
359
|
);
|
|
318
360
|
|
|
319
361
|
try {
|
|
@@ -18,6 +18,74 @@ export interface JobPublishBlockedInfo {
|
|
|
18
18
|
stage: "sync" | "push";
|
|
19
19
|
}
|
|
20
20
|
|
|
21
|
+
export interface JobDiagnosticAttempt {
|
|
22
|
+
attempt: number;
|
|
23
|
+
workerId?: string | null;
|
|
24
|
+
backend?: string | null;
|
|
25
|
+
model?: string | null;
|
|
26
|
+
startedAt?: string | null;
|
|
27
|
+
finishedAt?: string | null;
|
|
28
|
+
durationMs?: number | null;
|
|
29
|
+
terminalReason?: string | null;
|
|
30
|
+
exitCode?: number | null;
|
|
31
|
+
metadata?: Record<string, unknown>;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface JobTerminalDiagnostics {
|
|
35
|
+
failureClass?: string | null;
|
|
36
|
+
terminalStage?: string | null;
|
|
37
|
+
executorBackend?: string | null;
|
|
38
|
+
summary?: string | null;
|
|
39
|
+
watchdogFired?: boolean;
|
|
40
|
+
timeoutMs?: number | null;
|
|
41
|
+
publishableFileCount?: number | null;
|
|
42
|
+
artifactOnlyPathCount?: number | null;
|
|
43
|
+
changedPathSample?: string[];
|
|
44
|
+
metadata?: Record<string, unknown>;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export interface JobPhaseSpanDiagnostics {
|
|
48
|
+
attempt?: number | null;
|
|
49
|
+
phase: string;
|
|
50
|
+
startedAt: string;
|
|
51
|
+
finishedAt: string;
|
|
52
|
+
durationMs: number;
|
|
53
|
+
outcome?: string | null;
|
|
54
|
+
metadata?: Record<string, unknown>;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export interface JobValidationRunDiagnostics {
|
|
58
|
+
attempt?: number | null;
|
|
59
|
+
command: string;
|
|
60
|
+
exitCode?: number | null;
|
|
61
|
+
durationMs?: number | null;
|
|
62
|
+
passed: boolean;
|
|
63
|
+
failureClass?: string | null;
|
|
64
|
+
stdoutTail?: string | null;
|
|
65
|
+
stderrTail?: string | null;
|
|
66
|
+
metadata?: Record<string, unknown>;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export interface JobPatchSnapshotDiagnostics {
|
|
70
|
+
attempt?: number | null;
|
|
71
|
+
phase?: string | null;
|
|
72
|
+
publishableFileCount?: number | null;
|
|
73
|
+
artifactOnlyPathCount?: number | null;
|
|
74
|
+
changedPathSample?: string[];
|
|
75
|
+
topLevelDirs?: string[];
|
|
76
|
+
capturedAt?: string | null;
|
|
77
|
+
metadata?: Record<string, unknown>;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export interface JobDiagnostics {
|
|
81
|
+
attempts?: JobDiagnosticAttempt[];
|
|
82
|
+
terminal?: JobTerminalDiagnostics;
|
|
83
|
+
phaseSpans?: JobPhaseSpanDiagnostics[];
|
|
84
|
+
validationRuns?: JobValidationRunDiagnostics[];
|
|
85
|
+
patchSnapshots?: JobPatchSnapshotDiagnostics[];
|
|
86
|
+
metadata?: Record<string, unknown>;
|
|
87
|
+
}
|
|
88
|
+
|
|
21
89
|
export interface JobResult {
|
|
22
90
|
ok: boolean;
|
|
23
91
|
summary: string;
|
|
@@ -26,4 +94,5 @@ export interface JobResult {
|
|
|
26
94
|
exitCode?: number;
|
|
27
95
|
usage?: JobTokenUsage;
|
|
28
96
|
publishBlocked?: JobPublishBlockedInfo;
|
|
97
|
+
diagnostics?: JobDiagnostics;
|
|
29
98
|
}
|