@pushpalsdev/cli 1.1.21 → 1.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pushpals-cli.js +25 -1
- package/package.json +1 -1
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +161 -24
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +355 -0
- package/runtime/sandbox/apps/workerpals/src/common/types.ts +69 -0
- package/runtime/sandbox/apps/workerpals/src/docker_executor.ts +75 -16
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +334 -19
- package/runtime/sandbox/apps/workerpals/src/job_runner.ts +3 -0
- package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +131 -3
|
@@ -40,6 +40,7 @@ from openai_codex_executor import (
|
|
|
40
40
|
_detect_offtrack_rollout,
|
|
41
41
|
_detect_codex_workaround_signal,
|
|
42
42
|
_extract_usage_counts,
|
|
43
|
+
_has_credible_shell_wrapper_progress,
|
|
43
44
|
_load_prompt_template,
|
|
44
45
|
_mask_repo_local_codex_files,
|
|
45
46
|
_repo_root_for_prompt_loading,
|
|
@@ -672,6 +673,130 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
672
673
|
self.assertIn("src/", str(result.get("stdout") or ""))
|
|
673
674
|
self.assertNotIn("Recovered after Codex attempts", str(result.get("stdout") or ""))
|
|
674
675
|
|
|
676
|
+
def test_shell_wrapper_progress_guard_rejects_broad_noisy_path_sets(self) -> None:
|
|
677
|
+
self.assertTrue(
|
|
678
|
+
_has_credible_shell_wrapper_progress(
|
|
679
|
+
[
|
|
680
|
+
"src/change.ts",
|
|
681
|
+
"src/change.test.ts",
|
|
682
|
+
"docs/change.md",
|
|
683
|
+
]
|
|
684
|
+
)
|
|
685
|
+
)
|
|
686
|
+
self.assertFalse(
|
|
687
|
+
_has_credible_shell_wrapper_progress(
|
|
688
|
+
[f"src/generated-{index}.ts" for index in range(9)]
|
|
689
|
+
)
|
|
690
|
+
)
|
|
691
|
+
self.assertFalse(
|
|
692
|
+
_has_credible_shell_wrapper_progress(
|
|
693
|
+
[
|
|
694
|
+
"app/main.ts",
|
|
695
|
+
"components/card.tsx",
|
|
696
|
+
"docs/readme.md",
|
|
697
|
+
"scripts/check.ts",
|
|
698
|
+
"tests/card.test.ts",
|
|
699
|
+
]
|
|
700
|
+
)
|
|
701
|
+
)
|
|
702
|
+
self.assertFalse(
|
|
703
|
+
_has_credible_shell_wrapper_progress(
|
|
704
|
+
[f"area{index}/" for index in range(5)]
|
|
705
|
+
)
|
|
706
|
+
)
|
|
707
|
+
|
|
708
|
+
def test_run_codex_task_recovers_instead_of_handing_noisy_wrapper_diff_to_gates(self) -> None:
|
|
709
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-wrapper-noisy-") as temp_dir:
|
|
710
|
+
repo = Path(temp_dir) / "repo"
|
|
711
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
712
|
+
(repo / "README.md").write_text("# wrapper noisy test\n", encoding="utf-8")
|
|
713
|
+
for index in range(9):
|
|
714
|
+
(repo / f"noisy-{index}.txt").write_text("baseline\n", encoding="utf-8")
|
|
715
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
716
|
+
subprocess.run(
|
|
717
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
718
|
+
cwd=repo,
|
|
719
|
+
check=True,
|
|
720
|
+
capture_output=True,
|
|
721
|
+
text=True,
|
|
722
|
+
)
|
|
723
|
+
subprocess.run(
|
|
724
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
725
|
+
cwd=repo,
|
|
726
|
+
check=True,
|
|
727
|
+
capture_output=True,
|
|
728
|
+
text=True,
|
|
729
|
+
)
|
|
730
|
+
subprocess.run(["git", "add", "."], cwd=repo, check=True, capture_output=True, text=True)
|
|
731
|
+
subprocess.run(
|
|
732
|
+
["git", "commit", "-m", "chore: seed wrapper noisy repo"],
|
|
733
|
+
cwd=repo,
|
|
734
|
+
check=True,
|
|
735
|
+
capture_output=True,
|
|
736
|
+
text=True,
|
|
737
|
+
)
|
|
738
|
+
|
|
739
|
+
stub_path = Path(temp_dir) / "fake_codex_wrapper_noisy.py"
|
|
740
|
+
stub_path.write_text(
|
|
741
|
+
"\n".join(
|
|
742
|
+
[
|
|
743
|
+
"from pathlib import Path",
|
|
744
|
+
"import sys",
|
|
745
|
+
"import time",
|
|
746
|
+
"",
|
|
747
|
+
"argv = sys.argv[1:]",
|
|
748
|
+
"last_message_path = None",
|
|
749
|
+
"for index, arg in enumerate(argv):",
|
|
750
|
+
" if arg == '--output-last-message' and index + 1 < len(argv):",
|
|
751
|
+
" last_message_path = argv[index + 1]",
|
|
752
|
+
" break",
|
|
753
|
+
"",
|
|
754
|
+
"prompt = sys.stdin.read()",
|
|
755
|
+
"if 'Command-router recovery:' in prompt:",
|
|
756
|
+
" Path('src').mkdir(exist_ok=True)",
|
|
757
|
+
" Path('src/recovered.txt').write_text('direct recovery\\n', encoding='utf-8')",
|
|
758
|
+
" if last_message_path:",
|
|
759
|
+
" Path(last_message_path).write_text(",
|
|
760
|
+
" 'Recovered after noisy shell-wrapper path detection using direct commands.',",
|
|
761
|
+
" encoding='utf-8',",
|
|
762
|
+
" )",
|
|
763
|
+
" print('item.completed | Recovered with direct-command guidance.', flush=True)",
|
|
764
|
+
" sys.exit(0)",
|
|
765
|
+
"",
|
|
766
|
+
"for index in range(9):",
|
|
767
|
+
" Path(f'noisy-{index}.txt').write_text('noisy path\\n', encoding='utf-8')",
|
|
768
|
+
"for line in (",
|
|
769
|
+
" 'error=exec_command failed for `/bin/bash -lc pwd`: CreateProcess { message: \"Rejected\" }',",
|
|
770
|
+
" 'error=exec_command failed for `/bin/bash -lc \\'git status --porcelain\\'`: CreateProcess { message: \"Rejected\" }',",
|
|
771
|
+
" 'error=exec_command failed for `/bin/bash -lc \\'sed -n 1,40p README.md\\'`: CreateProcess { message: \"Rejected\" }',",
|
|
772
|
+
"):",
|
|
773
|
+
" print(line, file=sys.stderr, flush=True)",
|
|
774
|
+
"time.sleep(10)",
|
|
775
|
+
]
|
|
776
|
+
),
|
|
777
|
+
encoding="utf-8",
|
|
778
|
+
)
|
|
779
|
+
|
|
780
|
+
env_overrides = {
|
|
781
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
782
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
783
|
+
"OPENAI_API_KEY": "pushpals-wrapper-noisy-test-key",
|
|
784
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "10",
|
|
785
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
786
|
+
}
|
|
787
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
788
|
+
result = _run_codex_task(
|
|
789
|
+
str(repo),
|
|
790
|
+
"Recover from a shell-wrapper loop after noisy repo changes.",
|
|
791
|
+
[],
|
|
792
|
+
)
|
|
793
|
+
|
|
794
|
+
self.assertTrue(result.get("ok"), result)
|
|
795
|
+
stdout = str(result.get("stdout") or "")
|
|
796
|
+
self.assertIn("Recovered after Codex attempts hit command-router shell-wrapper rejections.", stdout)
|
|
797
|
+
self.assertIn("Recovered after noisy shell-wrapper path detection", stdout)
|
|
798
|
+
self.assertNotIn("ValidationGate/CriticGate", stdout)
|
|
799
|
+
|
|
675
800
|
def test_run_codex_task_hands_changed_worktree_to_gates_after_timeout(self) -> None:
|
|
676
801
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-timeout-changed-") as temp_dir:
|
|
677
802
|
repo = Path(temp_dir) / "repo"
|
|
@@ -749,6 +874,76 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
749
874
|
self.assertIn("src/", str(result.get("stdout") or ""))
|
|
750
875
|
self.assertIn("Made a small patch before timeout", str(result.get("stdout") or ""))
|
|
751
876
|
|
|
877
|
+
def test_run_codex_task_rejects_broad_timeout_partial_patch(self) -> None:
|
|
878
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-timeout-noisy-") as temp_dir:
|
|
879
|
+
repo = Path(temp_dir) / "repo"
|
|
880
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
881
|
+
(repo / "README.md").write_text("# timeout noisy repo\n", encoding="utf-8")
|
|
882
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
883
|
+
subprocess.run(
|
|
884
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
885
|
+
cwd=repo,
|
|
886
|
+
check=True,
|
|
887
|
+
capture_output=True,
|
|
888
|
+
text=True,
|
|
889
|
+
)
|
|
890
|
+
subprocess.run(
|
|
891
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
892
|
+
cwd=repo,
|
|
893
|
+
check=True,
|
|
894
|
+
capture_output=True,
|
|
895
|
+
text=True,
|
|
896
|
+
)
|
|
897
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
898
|
+
subprocess.run(
|
|
899
|
+
["git", "commit", "-m", "chore: seed timeout noisy repo"],
|
|
900
|
+
cwd=repo,
|
|
901
|
+
check=True,
|
|
902
|
+
capture_output=True,
|
|
903
|
+
text=True,
|
|
904
|
+
)
|
|
905
|
+
|
|
906
|
+
stub_path = Path(temp_dir) / "fake_codex_timeout_noisy.py"
|
|
907
|
+
stub_path.write_text(
|
|
908
|
+
"\n".join(
|
|
909
|
+
[
|
|
910
|
+
"from pathlib import Path",
|
|
911
|
+
"import sys",
|
|
912
|
+
"import time",
|
|
913
|
+
"",
|
|
914
|
+
"sys.stdin.read()",
|
|
915
|
+
"for index in range(5):",
|
|
916
|
+
" root = Path(f'area{index}')",
|
|
917
|
+
" root.mkdir(exist_ok=True)",
|
|
918
|
+
" (root / 'changed.txt').write_text('broad change before timeout\\n', encoding='utf-8')",
|
|
919
|
+
"print('item.completed | Touched a broad set of files before timeout.', flush=True)",
|
|
920
|
+
"time.sleep(5)",
|
|
921
|
+
]
|
|
922
|
+
),
|
|
923
|
+
encoding="utf-8",
|
|
924
|
+
)
|
|
925
|
+
|
|
926
|
+
env_overrides = {
|
|
927
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
928
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
929
|
+
"OPENAI_API_KEY": "pushpals-timeout-noisy-test-key",
|
|
930
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "1",
|
|
931
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "0",
|
|
932
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
933
|
+
}
|
|
934
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
935
|
+
result = _run_codex_task(
|
|
936
|
+
str(repo),
|
|
937
|
+
"Create a broad unfocused patch, then continue thinking too long.",
|
|
938
|
+
[],
|
|
939
|
+
)
|
|
940
|
+
|
|
941
|
+
self.assertFalse(result.get("ok"), result)
|
|
942
|
+
self.assertEqual(result.get("exitCode"), 124)
|
|
943
|
+
self.assertIn("broad/noisy publishable-looking changes", str(result.get("summary") or ""))
|
|
944
|
+
self.assertIn("too broad/noisy", str(result.get("stderr") or ""))
|
|
945
|
+
self.assertIn("area0", str(result.get("stderr") or ""))
|
|
946
|
+
|
|
752
947
|
def test_run_codex_task_retries_once_when_no_edit_watchdog_fires(self) -> None:
|
|
753
948
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-watchdog-") as temp_dir:
|
|
754
949
|
repo = Path(temp_dir) / "repo"
|
|
@@ -892,6 +1087,95 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
892
1087
|
self.assertEqual(result.get("exitCode"), 124)
|
|
893
1088
|
self.assertIn("no publishable changes", str(result.get("summary") or ""))
|
|
894
1089
|
|
|
1090
|
+
def test_run_codex_task_no_edit_watchdog_rechecks_transient_publishable_progress(self) -> None:
|
|
1091
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-recheck-") as temp_dir:
|
|
1092
|
+
repo = Path(temp_dir) / "repo"
|
|
1093
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
1094
|
+
(repo / "README.md").write_text("# no edit recheck repo\n", encoding="utf-8")
|
|
1095
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1096
|
+
subprocess.run(
|
|
1097
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
1098
|
+
cwd=repo,
|
|
1099
|
+
check=True,
|
|
1100
|
+
capture_output=True,
|
|
1101
|
+
text=True,
|
|
1102
|
+
)
|
|
1103
|
+
subprocess.run(
|
|
1104
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
1105
|
+
cwd=repo,
|
|
1106
|
+
check=True,
|
|
1107
|
+
capture_output=True,
|
|
1108
|
+
text=True,
|
|
1109
|
+
)
|
|
1110
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1111
|
+
subprocess.run(
|
|
1112
|
+
["git", "commit", "-m", "chore: seed no-edit recheck repo"],
|
|
1113
|
+
cwd=repo,
|
|
1114
|
+
check=True,
|
|
1115
|
+
capture_output=True,
|
|
1116
|
+
text=True,
|
|
1117
|
+
)
|
|
1118
|
+
|
|
1119
|
+
stub_path = Path(temp_dir) / "fake_codex_no_edit_recheck.py"
|
|
1120
|
+
stub_path.write_text(
|
|
1121
|
+
"\n".join(
|
|
1122
|
+
[
|
|
1123
|
+
"from pathlib import Path",
|
|
1124
|
+
"import sys",
|
|
1125
|
+
"import time",
|
|
1126
|
+
"",
|
|
1127
|
+
"argv = sys.argv[1:]",
|
|
1128
|
+
"last_message_path = None",
|
|
1129
|
+
"for index, arg in enumerate(argv):",
|
|
1130
|
+
" if arg == '--output-last-message' and index + 1 < len(argv):",
|
|
1131
|
+
" last_message_path = argv[index + 1]",
|
|
1132
|
+
" break",
|
|
1133
|
+
"",
|
|
1134
|
+
"prompt = sys.stdin.read()",
|
|
1135
|
+
"if 'No-edit watchdog recovery' in prompt:",
|
|
1136
|
+
" Path('src').mkdir(exist_ok=True)",
|
|
1137
|
+
" Path('src/no-edit-recheck-retry.txt').write_text('patched after recheck\\n', encoding='utf-8')",
|
|
1138
|
+
" if last_message_path:",
|
|
1139
|
+
" Path(last_message_path).write_text('Patched after transient no-edit recheck.', encoding='utf-8')",
|
|
1140
|
+
" print('item.completed | Patched after transient no-edit recheck.', flush=True)",
|
|
1141
|
+
" sys.exit(0)",
|
|
1142
|
+
"",
|
|
1143
|
+
"Path('src').mkdir(exist_ok=True)",
|
|
1144
|
+
"transient = Path('src/transient-progress.txt')",
|
|
1145
|
+
"transient.write_text('temporary progress\\n', encoding='utf-8')",
|
|
1146
|
+
"print('item.completed | Created transient publishable progress.', flush=True)",
|
|
1147
|
+
"time.sleep(1.4)",
|
|
1148
|
+
"transient.unlink()",
|
|
1149
|
+
"Path('node_modules').mkdir(exist_ok=True)",
|
|
1150
|
+
"Path('node_modules/linked.txt').write_text('artifact only\\n', encoding='utf-8')",
|
|
1151
|
+
"print('item.completed | Lost patch while still thinking.', flush=True)",
|
|
1152
|
+
"time.sleep(10)",
|
|
1153
|
+
]
|
|
1154
|
+
),
|
|
1155
|
+
encoding="utf-8",
|
|
1156
|
+
)
|
|
1157
|
+
|
|
1158
|
+
env_overrides = {
|
|
1159
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
1160
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
1161
|
+
"OPENAI_API_KEY": "pushpals-no-edit-recheck-test-key",
|
|
1162
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
|
|
1163
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
|
|
1164
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_RECHECK_S": "1",
|
|
1165
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
1166
|
+
}
|
|
1167
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
1168
|
+
result = _run_codex_task(
|
|
1169
|
+
str(repo),
|
|
1170
|
+
"Polish the first-entry home shell with a compact visual patch.",
|
|
1171
|
+
[],
|
|
1172
|
+
)
|
|
1173
|
+
|
|
1174
|
+
self.assertTrue(result.get("ok"), result)
|
|
1175
|
+
self.assertEqual(result.get("exitCode"), 0)
|
|
1176
|
+
self.assertIn("Patched after transient no-edit recheck", str(result.get("stdout") or ""))
|
|
1177
|
+
self.assertIn("src/", str(result.get("stdout") or ""))
|
|
1178
|
+
|
|
895
1179
|
def test_codex_changed_paths_filters_dependency_artifacts_from_publishable_delta(self) -> None:
|
|
896
1180
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-artifact-delta-") as temp_dir:
|
|
897
1181
|
repo = Path(temp_dir) / "repo"
|
|
@@ -1081,6 +1365,77 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1081
1365
|
self.assertIn("Patched after rollout coach guidance", str(result.get("stdout") or ""))
|
|
1082
1366
|
self.assertIn("scripts/", str(result.get("stdout") or ""))
|
|
1083
1367
|
|
|
1368
|
+
def test_run_codex_task_rollout_coach_fails_fast_on_broad_small_task_changes(self) -> None:
|
|
1369
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-rollout-noisy-") as temp_dir:
|
|
1370
|
+
repo = Path(temp_dir) / "repo"
|
|
1371
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
1372
|
+
(repo / "README.md").write_text("# rollout noisy repo\n", encoding="utf-8")
|
|
1373
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1374
|
+
subprocess.run(
|
|
1375
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
1376
|
+
cwd=repo,
|
|
1377
|
+
check=True,
|
|
1378
|
+
capture_output=True,
|
|
1379
|
+
text=True,
|
|
1380
|
+
)
|
|
1381
|
+
subprocess.run(
|
|
1382
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
1383
|
+
cwd=repo,
|
|
1384
|
+
check=True,
|
|
1385
|
+
capture_output=True,
|
|
1386
|
+
text=True,
|
|
1387
|
+
)
|
|
1388
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1389
|
+
subprocess.run(
|
|
1390
|
+
["git", "commit", "-m", "chore: seed rollout noisy repo"],
|
|
1391
|
+
cwd=repo,
|
|
1392
|
+
check=True,
|
|
1393
|
+
capture_output=True,
|
|
1394
|
+
text=True,
|
|
1395
|
+
)
|
|
1396
|
+
|
|
1397
|
+
stub_path = Path(temp_dir) / "fake_codex_rollout_noisy.py"
|
|
1398
|
+
stub_path.write_text(
|
|
1399
|
+
"\n".join(
|
|
1400
|
+
[
|
|
1401
|
+
"from pathlib import Path",
|
|
1402
|
+
"import sys",
|
|
1403
|
+
"import time",
|
|
1404
|
+
"",
|
|
1405
|
+
"sys.stdin.read()",
|
|
1406
|
+
"for index in range(5):",
|
|
1407
|
+
" root = Path(f'area{index}')",
|
|
1408
|
+
" root.mkdir(exist_ok=True)",
|
|
1409
|
+
" (root / 'changed.txt').write_text('broad rollout change\\n', encoding='utf-8')",
|
|
1410
|
+
"print('item.completed | Made broad edits for a supposedly small task.', flush=True)",
|
|
1411
|
+
"time.sleep(10)",
|
|
1412
|
+
]
|
|
1413
|
+
),
|
|
1414
|
+
encoding="utf-8",
|
|
1415
|
+
)
|
|
1416
|
+
|
|
1417
|
+
env_overrides = {
|
|
1418
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
1419
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
1420
|
+
"OPENAI_API_KEY": "pushpals-rollout-noisy-test-key",
|
|
1421
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "700",
|
|
1422
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "10",
|
|
1423
|
+
"WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S": "1",
|
|
1424
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
1425
|
+
}
|
|
1426
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
1427
|
+
result = _run_codex_task(
|
|
1428
|
+
str(repo),
|
|
1429
|
+
"Make a small low-risk repo-native patch.",
|
|
1430
|
+
[],
|
|
1431
|
+
)
|
|
1432
|
+
|
|
1433
|
+
self.assertFalse(result.get("ok"), result)
|
|
1434
|
+
self.assertEqual(result.get("exitCode"), 124)
|
|
1435
|
+
self.assertIn("rollout coach", str(result.get("summary") or ""))
|
|
1436
|
+
self.assertIn("broad/noisy", str(result.get("stderr") or ""))
|
|
1437
|
+
self.assertIn("area0", str(result.get("stderr") or ""))
|
|
1438
|
+
|
|
1084
1439
|
def test_run_codex_task_timeout_reports_artifact_only_changes(self) -> None:
|
|
1085
1440
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-artifact-timeout-") as temp_dir:
|
|
1086
1441
|
repo = Path(temp_dir) / "repo"
|
|
@@ -18,6 +18,74 @@ export interface JobPublishBlockedInfo {
|
|
|
18
18
|
stage: "sync" | "push";
|
|
19
19
|
}
|
|
20
20
|
|
|
21
|
+
export interface JobDiagnosticAttempt {
|
|
22
|
+
attempt: number;
|
|
23
|
+
workerId?: string | null;
|
|
24
|
+
backend?: string | null;
|
|
25
|
+
model?: string | null;
|
|
26
|
+
startedAt?: string | null;
|
|
27
|
+
finishedAt?: string | null;
|
|
28
|
+
durationMs?: number | null;
|
|
29
|
+
terminalReason?: string | null;
|
|
30
|
+
exitCode?: number | null;
|
|
31
|
+
metadata?: Record<string, unknown>;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface JobTerminalDiagnostics {
|
|
35
|
+
failureClass?: string | null;
|
|
36
|
+
terminalStage?: string | null;
|
|
37
|
+
executorBackend?: string | null;
|
|
38
|
+
summary?: string | null;
|
|
39
|
+
watchdogFired?: boolean;
|
|
40
|
+
timeoutMs?: number | null;
|
|
41
|
+
publishableFileCount?: number | null;
|
|
42
|
+
artifactOnlyPathCount?: number | null;
|
|
43
|
+
changedPathSample?: string[];
|
|
44
|
+
metadata?: Record<string, unknown>;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export interface JobPhaseSpanDiagnostics {
|
|
48
|
+
attempt?: number | null;
|
|
49
|
+
phase: string;
|
|
50
|
+
startedAt: string;
|
|
51
|
+
finishedAt: string;
|
|
52
|
+
durationMs: number;
|
|
53
|
+
outcome?: string | null;
|
|
54
|
+
metadata?: Record<string, unknown>;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export interface JobValidationRunDiagnostics {
|
|
58
|
+
attempt?: number | null;
|
|
59
|
+
command: string;
|
|
60
|
+
exitCode?: number | null;
|
|
61
|
+
durationMs?: number | null;
|
|
62
|
+
passed: boolean;
|
|
63
|
+
failureClass?: string | null;
|
|
64
|
+
stdoutTail?: string | null;
|
|
65
|
+
stderrTail?: string | null;
|
|
66
|
+
metadata?: Record<string, unknown>;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export interface JobPatchSnapshotDiagnostics {
|
|
70
|
+
attempt?: number | null;
|
|
71
|
+
phase?: string | null;
|
|
72
|
+
publishableFileCount?: number | null;
|
|
73
|
+
artifactOnlyPathCount?: number | null;
|
|
74
|
+
changedPathSample?: string[];
|
|
75
|
+
topLevelDirs?: string[];
|
|
76
|
+
capturedAt?: string | null;
|
|
77
|
+
metadata?: Record<string, unknown>;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export interface JobDiagnostics {
|
|
81
|
+
attempts?: JobDiagnosticAttempt[];
|
|
82
|
+
terminal?: JobTerminalDiagnostics;
|
|
83
|
+
phaseSpans?: JobPhaseSpanDiagnostics[];
|
|
84
|
+
validationRuns?: JobValidationRunDiagnostics[];
|
|
85
|
+
patchSnapshots?: JobPatchSnapshotDiagnostics[];
|
|
86
|
+
metadata?: Record<string, unknown>;
|
|
87
|
+
}
|
|
88
|
+
|
|
21
89
|
export interface JobResult {
|
|
22
90
|
ok: boolean;
|
|
23
91
|
summary: string;
|
|
@@ -26,4 +94,5 @@ export interface JobResult {
|
|
|
26
94
|
exitCode?: number;
|
|
27
95
|
usage?: JobTokenUsage;
|
|
28
96
|
publishBlocked?: JobPublishBlockedInfo;
|
|
97
|
+
diagnostics?: JobDiagnostics;
|
|
29
98
|
}
|
|
@@ -18,7 +18,7 @@ import { homedir } from "os";
|
|
|
18
18
|
import { isAbsolute, relative, resolve } from "path";
|
|
19
19
|
import { loadPushPalsConfig } from "shared";
|
|
20
20
|
import { resolveExecutor, type WorkerpalsRuntimeConfig } from "./common/executor_backend.js";
|
|
21
|
-
import type { ExecutorBackend } from "./common/types.js";
|
|
21
|
+
import type { ExecutorBackend, JobDiagnostics } from "./common/types.js";
|
|
22
22
|
import { computeTimeoutWarningWindow, DEFAULT_DOCKER_TIMEOUT_MS } from "./timeout_policy.js";
|
|
23
23
|
import {
|
|
24
24
|
BACKEND_DOCKER_PASSTHROUGH_ENV,
|
|
@@ -237,6 +237,7 @@ export interface DockerJobResult {
|
|
|
237
237
|
branch: string;
|
|
238
238
|
sha: string;
|
|
239
239
|
};
|
|
240
|
+
diagnostics?: JobDiagnostics;
|
|
240
241
|
}
|
|
241
242
|
|
|
242
243
|
export interface Job {
|
|
@@ -247,6 +248,37 @@ export interface Job {
|
|
|
247
248
|
sessionId: string;
|
|
248
249
|
}
|
|
249
250
|
|
|
251
|
+
function compactDockerDiagnosticText(value: unknown, maxChars = 1000): string | null {
|
|
252
|
+
const text = String(value ?? "").replace(/\s+$/g, "").trim();
|
|
253
|
+
if (!text) return null;
|
|
254
|
+
return text.length <= maxChars ? text : text.slice(0, maxChars);
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
function dockerFallbackDiagnostics(
|
|
258
|
+
summary: string,
|
|
259
|
+
context: { timedOutByDocker: boolean; elapsedMs: number; timeoutMs: number },
|
|
260
|
+
exitCode: number,
|
|
261
|
+
failureClass: string,
|
|
262
|
+
metadata: Record<string, unknown> = {},
|
|
263
|
+
): JobDiagnostics {
|
|
264
|
+
return {
|
|
265
|
+
terminal: {
|
|
266
|
+
failureClass,
|
|
267
|
+
terminalStage: "docker",
|
|
268
|
+
summary: compactDockerDiagnosticText(summary),
|
|
269
|
+
watchdogFired: context.timedOutByDocker,
|
|
270
|
+
timeoutMs: context.timeoutMs,
|
|
271
|
+
metadata: {
|
|
272
|
+
structuredResult: false,
|
|
273
|
+
elapsedMs: context.elapsedMs,
|
|
274
|
+
exitCode,
|
|
275
|
+
timedOutByDocker: context.timedOutByDocker,
|
|
276
|
+
...metadata,
|
|
277
|
+
},
|
|
278
|
+
},
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
|
|
250
282
|
function readPositiveNumber(value: unknown): number | null {
|
|
251
283
|
const parsed =
|
|
252
284
|
typeof value === "number"
|
|
@@ -264,6 +296,10 @@ function maybeRecord(value: unknown): Record<string, unknown> | null {
|
|
|
264
296
|
: null;
|
|
265
297
|
}
|
|
266
298
|
|
|
299
|
+
function isReadableByteStream(value: unknown): value is ReadableStream<Uint8Array> {
|
|
300
|
+
return value instanceof ReadableStream;
|
|
301
|
+
}
|
|
302
|
+
|
|
267
303
|
function collectValidationCommandHints(params: Record<string, unknown>): string[] {
|
|
268
304
|
const planning = maybeRecord(params.planning);
|
|
269
305
|
const values: unknown[] = [
|
|
@@ -1280,10 +1316,15 @@ export class DockerExecutor {
|
|
|
1280
1316
|
const stderrLines: string[] = [];
|
|
1281
1317
|
|
|
1282
1318
|
try {
|
|
1319
|
+
const stdout = proc.stdout;
|
|
1320
|
+
const stderr = proc.stderr;
|
|
1321
|
+
if (!isReadableByteStream(stdout) || !isReadableByteStream(stderr)) {
|
|
1322
|
+
throw new Error("docker exec stdout/stderr pipes were not available");
|
|
1323
|
+
}
|
|
1283
1324
|
await Promise.all([
|
|
1284
1325
|
this.writeJobSpecToStdin(proc, base64Spec),
|
|
1285
|
-
this.readStream(
|
|
1286
|
-
this.readStream(
|
|
1326
|
+
this.readStream(stdout, "stdout", onLog, stdoutLines),
|
|
1327
|
+
this.readStream(stderr, "stderr", onLog, stderrLines),
|
|
1287
1328
|
]);
|
|
1288
1329
|
} catch (err) {
|
|
1289
1330
|
try {
|
|
@@ -1343,7 +1384,7 @@ export class DockerExecutor {
|
|
|
1343
1384
|
throw new Error("docker exec stdin pipe was not available");
|
|
1344
1385
|
}
|
|
1345
1386
|
const bytes = new TextEncoder().encode(base64Spec);
|
|
1346
|
-
if (
|
|
1387
|
+
if (stdin instanceof WritableStream) {
|
|
1347
1388
|
const writer = stdin.getWriter();
|
|
1348
1389
|
try {
|
|
1349
1390
|
await writer.write(bytes);
|
|
@@ -1359,12 +1400,17 @@ export class DockerExecutor {
|
|
|
1359
1400
|
return;
|
|
1360
1401
|
}
|
|
1361
1402
|
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1403
|
+
const nodeStdin = stdin as {
|
|
1404
|
+
write?: (chunk: Uint8Array | string) => unknown;
|
|
1405
|
+
end?: () => unknown;
|
|
1406
|
+
flush?: () => unknown;
|
|
1407
|
+
};
|
|
1408
|
+
if (typeof nodeStdin.write === "function" && typeof nodeStdin.end === "function") {
|
|
1409
|
+
await nodeStdin.write(bytes);
|
|
1410
|
+
if (typeof nodeStdin.flush === "function") {
|
|
1411
|
+
await nodeStdin.flush();
|
|
1366
1412
|
}
|
|
1367
|
-
await
|
|
1413
|
+
await nodeStdin.end();
|
|
1368
1414
|
return;
|
|
1369
1415
|
}
|
|
1370
1416
|
|
|
@@ -1648,44 +1694,57 @@ export class DockerExecutor {
|
|
|
1648
1694
|
`Malformed ___RESULT___ payload: ${sentinelParseError || "unknown parse error"}`,
|
|
1649
1695
|
];
|
|
1650
1696
|
if (stderr) details.push(stderr);
|
|
1697
|
+
const summary = `Worker returned malformed structured result after ${context.elapsedMs}ms`;
|
|
1651
1698
|
return {
|
|
1652
1699
|
ok: false,
|
|
1653
|
-
summary
|
|
1700
|
+
summary,
|
|
1654
1701
|
stdout,
|
|
1655
1702
|
stderr: details.join("\n"),
|
|
1656
1703
|
exitCode,
|
|
1704
|
+
diagnostics: dockerFallbackDiagnostics(summary, context, exitCode, "malformed_structured_result", {
|
|
1705
|
+
sentinelParseError,
|
|
1706
|
+
}),
|
|
1657
1707
|
};
|
|
1658
1708
|
}
|
|
1659
1709
|
|
|
1660
1710
|
// No sentinel found, return generic result.
|
|
1661
1711
|
if (context.timedOutByDocker) {
|
|
1712
|
+
const summary = `Job timed out in Docker executor after ${context.elapsedMs}ms (limit ${context.timeoutMs}ms; terminated before structured result).`;
|
|
1662
1713
|
return {
|
|
1663
1714
|
ok: false,
|
|
1664
|
-
summary
|
|
1715
|
+
summary,
|
|
1665
1716
|
stdout,
|
|
1666
1717
|
stderr,
|
|
1667
1718
|
exitCode,
|
|
1719
|
+
diagnostics: dockerFallbackDiagnostics(summary, context, exitCode, "timeout"),
|
|
1668
1720
|
};
|
|
1669
1721
|
}
|
|
1670
1722
|
if (exitCode === 143 || exitCode === 137) {
|
|
1723
|
+
const summary = `Job process was terminated (exit ${exitCode}) after ${context.elapsedMs}ms before structured result was produced.`;
|
|
1671
1724
|
return {
|
|
1672
1725
|
ok: false,
|
|
1673
|
-
summary
|
|
1726
|
+
summary,
|
|
1674
1727
|
stdout,
|
|
1675
1728
|
stderr,
|
|
1676
1729
|
exitCode,
|
|
1730
|
+
diagnostics: dockerFallbackDiagnostics(summary, context, exitCode, "terminated"),
|
|
1677
1731
|
};
|
|
1678
1732
|
}
|
|
1679
1733
|
|
|
1734
|
+
const summary =
|
|
1735
|
+
exitCode === 0
|
|
1736
|
+
? `Job completed in ${context.elapsedMs}ms`
|
|
1737
|
+
: `Job failed (exit ${exitCode}, elapsed ${context.elapsedMs}ms)`;
|
|
1680
1738
|
return {
|
|
1681
1739
|
ok: exitCode === 0,
|
|
1682
|
-
summary
|
|
1683
|
-
exitCode === 0
|
|
1684
|
-
? `Job completed in ${context.elapsedMs}ms`
|
|
1685
|
-
: `Job failed (exit ${exitCode}, elapsed ${context.elapsedMs}ms)`,
|
|
1740
|
+
summary,
|
|
1686
1741
|
stdout,
|
|
1687
1742
|
stderr,
|
|
1688
1743
|
exitCode,
|
|
1744
|
+
diagnostics:
|
|
1745
|
+
exitCode === 0
|
|
1746
|
+
? undefined
|
|
1747
|
+
: dockerFallbackDiagnostics(summary, context, exitCode, "no_structured_result"),
|
|
1689
1748
|
};
|
|
1690
1749
|
}
|
|
1691
1750
|
|