@pushpalsdev/cli 1.1.26 → 1.1.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +98 -0
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +153 -0
- package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +1 -1
package/package.json
CHANGED
|
@@ -106,6 +106,7 @@ _MAX_WRAPPER_BOOTSTRAP_OUTPUT_CHARS = 1_200
|
|
|
106
106
|
_MAX_WRAPPER_BOOTSTRAP_TOTAL_CHARS = 5_000
|
|
107
107
|
_MAX_CREDIBLE_WRAPPER_LOOP_CHANGED_PATHS = 8
|
|
108
108
|
_MAX_CREDIBLE_WRAPPER_LOOP_TOP_LEVELS = 4
|
|
109
|
+
_MAX_STARTUP_STALL_RECOVERY_ATTEMPTS = 1
|
|
109
110
|
_MAX_NO_EDIT_RECOVERY_ATTEMPTS = 1
|
|
110
111
|
_MAX_ROLLOUT_RECOVERY_ATTEMPTS = 1
|
|
111
112
|
_DEFAULT_NO_EDIT_WATCHDOG_S = 480
|
|
@@ -121,6 +122,7 @@ _NARROW_TEST_TASK_ROLLOUT_WATCHDOG_S = 150
|
|
|
121
122
|
_WEB_REVIEW_ROLLOUT_WATCHDOG_S = 180
|
|
122
123
|
_BACKGROUND_ROLLOUT_WATCHDOG_S = 90
|
|
123
124
|
_NO_PUBLISHABLE_FAILURE_COOLDOWN_MS = 10 * 60 * 1000
|
|
125
|
+
_CODEX_STARTUP_ONLY_EVENT_TYPES = {"thread.started", "turn.started"}
|
|
124
126
|
|
|
125
127
|
|
|
126
128
|
def _model_supports_xhigh_reasoning(model: str) -> bool:
|
|
@@ -862,6 +864,19 @@ def _build_no_edit_recovery_guidance(trace_excerpt: str, artifact_only_paths: st
|
|
|
862
864
|
return "\n".join(lines)
|
|
863
865
|
|
|
864
866
|
|
|
867
|
+
def _build_startup_stall_recovery_guidance(trace_excerpt: str) -> str:
|
|
868
|
+
lines = [
|
|
869
|
+
"Codex startup-stall recovery: the previous Codex subprocess started but emitted no assistant, tool, or reasoning progress before the watchdog.",
|
|
870
|
+
"Treat this as a fresh execution with a patch-first contract. After at most one narrow read of the hinted owner, make the smallest publishable edit.",
|
|
871
|
+
"Do not spend this recovery attempt re-reading broad repository topology or validating before an edit exists.",
|
|
872
|
+
"If the hinted path is absent, choose the nearest existing repo-native owner or test rather than creating unrelated scaffolding.",
|
|
873
|
+
]
|
|
874
|
+
if trace_excerpt:
|
|
875
|
+
lines.append("Previous Codex event trace excerpt:")
|
|
876
|
+
lines.append(trace_excerpt)
|
|
877
|
+
return "\n".join(lines)
|
|
878
|
+
|
|
879
|
+
|
|
865
880
|
def _trace_summaries_text(trace: Dict[str, Any]) -> str:
|
|
866
881
|
summaries = trace.get("summaries")
|
|
867
882
|
if not isinstance(summaries, list):
|
|
@@ -869,6 +884,36 @@ def _trace_summaries_text(trace: Dict[str, Any]) -> str:
|
|
|
869
884
|
return "\n".join(str(item or "") for item in summaries[-80:]).lower()
|
|
870
885
|
|
|
871
886
|
|
|
887
|
+
def _codex_trace_has_work_progress(trace: Dict[str, Any]) -> bool:
|
|
888
|
+
if to_int(trace.get("reasoning_events"), 0) > 0:
|
|
889
|
+
return True
|
|
890
|
+
|
|
891
|
+
event_counts = trace.get("event_type_counts")
|
|
892
|
+
if isinstance(event_counts, dict):
|
|
893
|
+
for key, value in event_counts.items():
|
|
894
|
+
event_type = str(key or "").strip()
|
|
895
|
+
if to_int(value, 0) > 0 and event_type not in _CODEX_STARTUP_ONLY_EVENT_TYPES:
|
|
896
|
+
return True
|
|
897
|
+
|
|
898
|
+
summaries = trace.get("summaries")
|
|
899
|
+
if isinstance(summaries, list):
|
|
900
|
+
for item in summaries:
|
|
901
|
+
summary = str(item or "").strip()
|
|
902
|
+
if not summary:
|
|
903
|
+
continue
|
|
904
|
+
event_type = summary.split("|", 1)[0].strip()
|
|
905
|
+
if event_type not in _CODEX_STARTUP_ONLY_EVENT_TYPES:
|
|
906
|
+
return True
|
|
907
|
+
|
|
908
|
+
return False
|
|
909
|
+
|
|
910
|
+
|
|
911
|
+
def _codex_trace_is_startup_stall(trace: Dict[str, Any]) -> bool:
|
|
912
|
+
if to_int(trace.get("total_tokens"), 0) > 0:
|
|
913
|
+
return False
|
|
914
|
+
return not _codex_trace_has_work_progress(trace)
|
|
915
|
+
|
|
916
|
+
|
|
872
917
|
def _detect_offtrack_rollout(trace: Dict[str, Any], artifact_only_paths: str = "") -> str:
|
|
873
918
|
text = _trace_summaries_text(trace)
|
|
874
919
|
if artifact_only_paths:
|
|
@@ -1981,6 +2026,7 @@ def _run_codex_task(
|
|
|
1981
2026
|
*,
|
|
1982
2027
|
wrapper_recovery_attempt: int = 0,
|
|
1983
2028
|
model_compatibility_recovery_attempt: int = 0,
|
|
2029
|
+
startup_stall_recovery_attempt: int = 0,
|
|
1984
2030
|
no_edit_recovery_attempt: int = 0,
|
|
1985
2031
|
rollout_recovery_attempt: int = 0,
|
|
1986
2032
|
model_override: Optional[str] = None,
|
|
@@ -2475,6 +2521,7 @@ def _run_codex_task(
|
|
|
2475
2521
|
retry_guidance,
|
|
2476
2522
|
wrapper_recovery_attempt=wrapper_recovery_attempt,
|
|
2477
2523
|
model_compatibility_recovery_attempt=model_compatibility_recovery_attempt,
|
|
2524
|
+
startup_stall_recovery_attempt=startup_stall_recovery_attempt,
|
|
2478
2525
|
no_edit_recovery_attempt=no_edit_recovery_attempt,
|
|
2479
2526
|
rollout_recovery_attempt=rollout_recovery_attempt + 1,
|
|
2480
2527
|
model_override=model_override,
|
|
@@ -2497,6 +2544,54 @@ def _run_codex_task(
|
|
|
2497
2544
|
}
|
|
2498
2545
|
|
|
2499
2546
|
if no_edit_watchdog_fired:
|
|
2547
|
+
startup_stall = _codex_trace_is_startup_stall(stdout_trace)
|
|
2548
|
+
if startup_stall and startup_stall_recovery_attempt < _MAX_STARTUP_STALL_RECOVERY_ATTEMPTS:
|
|
2549
|
+
retry_guidance = [
|
|
2550
|
+
*supplemental_guidance,
|
|
2551
|
+
_build_startup_stall_recovery_guidance(trace_excerpt),
|
|
2552
|
+
]
|
|
2553
|
+
log.warning(
|
|
2554
|
+
"Codex emitted only startup events before the no-edit watchdog; "
|
|
2555
|
+
"restarting Codex once before classifying the job terminally."
|
|
2556
|
+
)
|
|
2557
|
+
retry_result = _run_codex_task(
|
|
2558
|
+
repo,
|
|
2559
|
+
instruction,
|
|
2560
|
+
retry_guidance,
|
|
2561
|
+
wrapper_recovery_attempt=wrapper_recovery_attempt,
|
|
2562
|
+
model_compatibility_recovery_attempt=model_compatibility_recovery_attempt,
|
|
2563
|
+
startup_stall_recovery_attempt=startup_stall_recovery_attempt + 1,
|
|
2564
|
+
no_edit_recovery_attempt=no_edit_recovery_attempt,
|
|
2565
|
+
rollout_recovery_attempt=rollout_recovery_attempt,
|
|
2566
|
+
model_override=model_override,
|
|
2567
|
+
baseline_changes=baseline_snapshot,
|
|
2568
|
+
)
|
|
2569
|
+
retry_result["usage"] = _merge_usage_records(usage, retry_result.get("usage"))
|
|
2570
|
+
if retry_result.get("ok"):
|
|
2571
|
+
recovered_stdout = str(retry_result.get("stdout") or "").strip()
|
|
2572
|
+
retry_result["stdout"] = _truncate(
|
|
2573
|
+
(
|
|
2574
|
+
"Recovered after the first Codex subprocess stalled before emitting "
|
|
2575
|
+
f"assistant/tool progress.\n\n{recovered_stdout}"
|
|
2576
|
+
).strip()
|
|
2577
|
+
)
|
|
2578
|
+
return retry_result
|
|
2579
|
+
if startup_stall:
|
|
2580
|
+
detail = (
|
|
2581
|
+
"Codex subprocess started but did not emit assistant, tool, reasoning, "
|
|
2582
|
+
"or usage progress before the startup watchdog."
|
|
2583
|
+
)
|
|
2584
|
+
if trace_excerpt:
|
|
2585
|
+
detail = f"{detail}\n{trace_excerpt}"
|
|
2586
|
+
return {
|
|
2587
|
+
"ok": False,
|
|
2588
|
+
"summary": "openai_codex stalled before first response",
|
|
2589
|
+
"stdout": _truncate(stdout),
|
|
2590
|
+
"stderr": _truncate(f"{detail}\n{stderr}".strip()),
|
|
2591
|
+
"exitCode": 124,
|
|
2592
|
+
"usage": usage,
|
|
2593
|
+
"cooldownMs": _NO_PUBLISHABLE_FAILURE_COOLDOWN_MS,
|
|
2594
|
+
}
|
|
2500
2595
|
if no_edit_recovery_attempt < _MAX_NO_EDIT_RECOVERY_ATTEMPTS:
|
|
2501
2596
|
retry_guidance = [
|
|
2502
2597
|
*supplemental_guidance,
|
|
@@ -2511,6 +2606,7 @@ def _run_codex_task(
|
|
|
2511
2606
|
retry_guidance,
|
|
2512
2607
|
wrapper_recovery_attempt=wrapper_recovery_attempt,
|
|
2513
2608
|
model_compatibility_recovery_attempt=model_compatibility_recovery_attempt,
|
|
2609
|
+
startup_stall_recovery_attempt=startup_stall_recovery_attempt,
|
|
2514
2610
|
no_edit_recovery_attempt=no_edit_recovery_attempt + 1,
|
|
2515
2611
|
rollout_recovery_attempt=rollout_recovery_attempt,
|
|
2516
2612
|
model_override=model_override,
|
|
@@ -2706,6 +2802,7 @@ def _run_codex_task(
|
|
|
2706
2802
|
],
|
|
2707
2803
|
wrapper_recovery_attempt=wrapper_recovery_attempt + 1,
|
|
2708
2804
|
model_compatibility_recovery_attempt=model_compatibility_recovery_attempt,
|
|
2805
|
+
startup_stall_recovery_attempt=startup_stall_recovery_attempt,
|
|
2709
2806
|
no_edit_recovery_attempt=no_edit_recovery_attempt,
|
|
2710
2807
|
rollout_recovery_attempt=rollout_recovery_attempt,
|
|
2711
2808
|
model_override=model_override,
|
|
@@ -2820,6 +2917,7 @@ def _run_codex_task(
|
|
|
2820
2917
|
effective_supplemental_guidance,
|
|
2821
2918
|
wrapper_recovery_attempt=wrapper_recovery_attempt,
|
|
2822
2919
|
model_compatibility_recovery_attempt=model_compatibility_recovery_attempt + 1,
|
|
2920
|
+
startup_stall_recovery_attempt=startup_stall_recovery_attempt,
|
|
2823
2921
|
no_edit_recovery_attempt=no_edit_recovery_attempt,
|
|
2824
2922
|
rollout_recovery_attempt=rollout_recovery_attempt,
|
|
2825
2923
|
model_override=LEGACY_CODEX_MODEL_FALLBACK,
|
|
@@ -1029,6 +1029,159 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1029
1029
|
self.assertNotIn("broad/noisy", str(result.get("summary") or ""))
|
|
1030
1030
|
self.assertNotIn("too broad/noisy", str(result.get("stderr") or ""))
|
|
1031
1031
|
|
|
1032
|
+
def test_run_codex_task_retries_once_when_codex_stalls_before_first_response(self) -> None:
|
|
1033
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-startup-stall-") as temp_dir:
|
|
1034
|
+
repo = Path(temp_dir) / "repo"
|
|
1035
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
1036
|
+
(repo / "README.md").write_text("# startup stall repo\n", encoding="utf-8")
|
|
1037
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1038
|
+
subprocess.run(
|
|
1039
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
1040
|
+
cwd=repo,
|
|
1041
|
+
check=True,
|
|
1042
|
+
capture_output=True,
|
|
1043
|
+
text=True,
|
|
1044
|
+
)
|
|
1045
|
+
subprocess.run(
|
|
1046
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
1047
|
+
cwd=repo,
|
|
1048
|
+
check=True,
|
|
1049
|
+
capture_output=True,
|
|
1050
|
+
text=True,
|
|
1051
|
+
)
|
|
1052
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1053
|
+
subprocess.run(
|
|
1054
|
+
["git", "commit", "-m", "chore: seed startup stall repo"],
|
|
1055
|
+
cwd=repo,
|
|
1056
|
+
check=True,
|
|
1057
|
+
capture_output=True,
|
|
1058
|
+
text=True,
|
|
1059
|
+
)
|
|
1060
|
+
|
|
1061
|
+
stub_path = Path(temp_dir) / "fake_codex_startup_stall.py"
|
|
1062
|
+
stub_path.write_text(
|
|
1063
|
+
"\n".join(
|
|
1064
|
+
[
|
|
1065
|
+
"from pathlib import Path",
|
|
1066
|
+
"import json",
|
|
1067
|
+
"import sys",
|
|
1068
|
+
"import time",
|
|
1069
|
+
"",
|
|
1070
|
+
"argv = sys.argv[1:]",
|
|
1071
|
+
"last_message_path = None",
|
|
1072
|
+
"for index, arg in enumerate(argv):",
|
|
1073
|
+
" if arg == '--output-last-message' and index + 1 < len(argv):",
|
|
1074
|
+
" last_message_path = argv[index + 1]",
|
|
1075
|
+
" break",
|
|
1076
|
+
"",
|
|
1077
|
+
"prompt = sys.stdin.read()",
|
|
1078
|
+
"if 'Codex startup-stall recovery' in prompt:",
|
|
1079
|
+
" Path('src').mkdir(exist_ok=True)",
|
|
1080
|
+
" Path('src/startup-stall-recovered.txt').write_text('patched after restart\\n', encoding='utf-8')",
|
|
1081
|
+
" if last_message_path:",
|
|
1082
|
+
" Path(last_message_path).write_text('Patched after Codex startup-stall recovery.', encoding='utf-8')",
|
|
1083
|
+
" print(json.dumps({'type': 'item.completed', 'message': 'Patched after Codex startup-stall recovery.'}), flush=True)",
|
|
1084
|
+
" sys.exit(0)",
|
|
1085
|
+
"",
|
|
1086
|
+
"print(json.dumps({'type': 'thread.started'}), flush=True)",
|
|
1087
|
+
"print(json.dumps({'type': 'turn.started'}), flush=True)",
|
|
1088
|
+
"time.sleep(10)",
|
|
1089
|
+
]
|
|
1090
|
+
),
|
|
1091
|
+
encoding="utf-8",
|
|
1092
|
+
)
|
|
1093
|
+
|
|
1094
|
+
env_overrides = {
|
|
1095
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
1096
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
1097
|
+
"OPENAI_API_KEY": "pushpals-startup-stall-test-key",
|
|
1098
|
+
"WORKERPALS_OPENAI_CODEX_JSON": "true",
|
|
1099
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
|
|
1100
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
|
|
1101
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
1102
|
+
}
|
|
1103
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
1104
|
+
result = _run_codex_task(
|
|
1105
|
+
str(repo),
|
|
1106
|
+
"Rename one misleading test fixture constant and update the related assertions.",
|
|
1107
|
+
[],
|
|
1108
|
+
)
|
|
1109
|
+
|
|
1110
|
+
self.assertTrue(result.get("ok"), result)
|
|
1111
|
+
self.assertEqual(result.get("exitCode"), 0)
|
|
1112
|
+
stdout = str(result.get("stdout") or "")
|
|
1113
|
+
self.assertIn("Recovered after the first Codex subprocess stalled", stdout)
|
|
1114
|
+
self.assertIn("Patched after Codex startup-stall recovery", stdout)
|
|
1115
|
+
self.assertIn("src/", stdout)
|
|
1116
|
+
|
|
1117
|
+
def test_run_codex_task_reports_startup_stall_when_restart_also_never_responds(self) -> None:
|
|
1118
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-startup-stall-fail-") as temp_dir:
|
|
1119
|
+
repo = Path(temp_dir) / "repo"
|
|
1120
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
1121
|
+
(repo / "README.md").write_text("# startup stall failure repo\n", encoding="utf-8")
|
|
1122
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1123
|
+
subprocess.run(
|
|
1124
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
1125
|
+
cwd=repo,
|
|
1126
|
+
check=True,
|
|
1127
|
+
capture_output=True,
|
|
1128
|
+
text=True,
|
|
1129
|
+
)
|
|
1130
|
+
subprocess.run(
|
|
1131
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
1132
|
+
cwd=repo,
|
|
1133
|
+
check=True,
|
|
1134
|
+
capture_output=True,
|
|
1135
|
+
text=True,
|
|
1136
|
+
)
|
|
1137
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1138
|
+
subprocess.run(
|
|
1139
|
+
["git", "commit", "-m", "chore: seed startup stall failure repo"],
|
|
1140
|
+
cwd=repo,
|
|
1141
|
+
check=True,
|
|
1142
|
+
capture_output=True,
|
|
1143
|
+
text=True,
|
|
1144
|
+
)
|
|
1145
|
+
|
|
1146
|
+
stub_path = Path(temp_dir) / "fake_codex_startup_stall_fail.py"
|
|
1147
|
+
stub_path.write_text(
|
|
1148
|
+
"\n".join(
|
|
1149
|
+
[
|
|
1150
|
+
"import json",
|
|
1151
|
+
"import sys",
|
|
1152
|
+
"import time",
|
|
1153
|
+
"",
|
|
1154
|
+
"sys.stdin.read()",
|
|
1155
|
+
"print(json.dumps({'type': 'thread.started'}), flush=True)",
|
|
1156
|
+
"print(json.dumps({'type': 'turn.started'}), flush=True)",
|
|
1157
|
+
"time.sleep(10)",
|
|
1158
|
+
]
|
|
1159
|
+
),
|
|
1160
|
+
encoding="utf-8",
|
|
1161
|
+
)
|
|
1162
|
+
|
|
1163
|
+
env_overrides = {
|
|
1164
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
1165
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
1166
|
+
"OPENAI_API_KEY": "pushpals-startup-stall-fail-test-key",
|
|
1167
|
+
"WORKERPALS_OPENAI_CODEX_JSON": "true",
|
|
1168
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
|
|
1169
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
|
|
1170
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
1171
|
+
}
|
|
1172
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
1173
|
+
result = _run_codex_task(
|
|
1174
|
+
str(repo),
|
|
1175
|
+
"Rename one misleading test fixture constant and update the related assertions.",
|
|
1176
|
+
[],
|
|
1177
|
+
)
|
|
1178
|
+
|
|
1179
|
+
self.assertFalse(result.get("ok"), result)
|
|
1180
|
+
self.assertEqual(result.get("exitCode"), 124)
|
|
1181
|
+
self.assertEqual(result.get("summary"), "openai_codex stalled before first response")
|
|
1182
|
+
self.assertNotIn("no publishable", str(result.get("summary") or "").lower())
|
|
1183
|
+
self.assertEqual(result.get("cooldownMs"), 600000)
|
|
1184
|
+
|
|
1032
1185
|
def test_run_codex_task_retries_once_when_no_edit_watchdog_fires(self) -> None:
|
|
1033
1186
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-watchdog-") as temp_dir:
|
|
1034
1187
|
repo = Path(temp_dir) / "repo"
|
|
@@ -450,7 +450,7 @@ function mergeWorkerDiagnostics(
|
|
|
450
450
|
function inferWorkerTerminalFailureClass(result: JobResult): string {
|
|
451
451
|
if (result.ok) return "success";
|
|
452
452
|
const text = `${result.summary ?? ""}\n${result.stderr ?? ""}\n${result.stdout ?? ""}`.toLowerCase();
|
|
453
|
-
if (/timed out|timeout|signal 15|terminated|exit 143|exit 137/.test(text)) return "timeout";
|
|
453
|
+
if (/timed out|timeout|signal 15|terminated|exit 143|exit 137|stalled before first response|startup stall/.test(text)) return "timeout";
|
|
454
454
|
if (/no publishable|non-publishable|node_modules/.test(text)) return "artifact_only_no_publishable_patch";
|
|
455
455
|
if (/validationgate|validation/.test(text)) return "validation";
|
|
456
456
|
if (/scopegate|scope/.test(text)) return "scope";
|