@pushpalsdev/cli 1.1.18 → 1.1.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/runtime/sandbox/.pushpals-remotebuddy-fallback.js +159 -5
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +250 -6
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +223 -0
- package/runtime/sandbox/apps/workerpals/src/backends/shared/executor_base.py +9 -0
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +450 -5
|
@@ -32,8 +32,12 @@ from openai_codex_executor import (
|
|
|
32
32
|
_resolve_reasoning_effort,
|
|
33
33
|
_resolve_task_reasoning_effort,
|
|
34
34
|
_build_instruction,
|
|
35
|
+
_build_no_edit_recovery_guidance,
|
|
36
|
+
_build_rollout_recovery_guidance,
|
|
35
37
|
_collect_disallowed_shell_wrapper_rejections,
|
|
36
38
|
_codex_changed_paths,
|
|
39
|
+
_describe_non_publishable_paths,
|
|
40
|
+
_detect_offtrack_rollout,
|
|
37
41
|
_detect_codex_workaround_signal,
|
|
38
42
|
_extract_usage_counts,
|
|
39
43
|
_load_prompt_template,
|
|
@@ -41,6 +45,8 @@ from openai_codex_executor import (
|
|
|
41
45
|
_repo_root_for_prompt_loading,
|
|
42
46
|
_restore_repo_local_codex_files,
|
|
43
47
|
_resolve_codex_command_prefix,
|
|
48
|
+
_resolve_no_edit_watchdog_seconds,
|
|
49
|
+
_resolve_rollout_watchdog_seconds,
|
|
44
50
|
_unwrap_shell_wrapper_command,
|
|
45
51
|
_usage_from_trace_or_estimate,
|
|
46
52
|
)
|
|
@@ -925,6 +931,223 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
925
931
|
self.assertGreaterEqual(len(delta), 2)
|
|
926
932
|
self.assertEqual(effective, [])
|
|
927
933
|
|
|
934
|
+
def test_non_publishable_path_summary_names_artifact_only_dirty_paths(self) -> None:
|
|
935
|
+
changed_paths = [
|
|
936
|
+
"node_modules/react/index.js",
|
|
937
|
+
"outputs/data/runtime.log",
|
|
938
|
+
"src/real-change.ts",
|
|
939
|
+
]
|
|
940
|
+
summary = _describe_non_publishable_paths(changed_paths, ["src/real-change.ts"])
|
|
941
|
+
|
|
942
|
+
self.assertIn("node_modules/react/index.js", summary)
|
|
943
|
+
self.assertIn("outputs/data/runtime.log", summary)
|
|
944
|
+
self.assertNotIn("src/real-change.ts", summary)
|
|
945
|
+
|
|
946
|
+
def test_web_review_tasks_use_faster_no_edit_watchdog(self) -> None:
|
|
947
|
+
with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
|
|
948
|
+
watchdog_s = _resolve_no_edit_watchdog_seconds(
|
|
949
|
+
"Strengthen the repo-native web review path with a compact repo-native patch.",
|
|
950
|
+
1200,
|
|
951
|
+
)
|
|
952
|
+
|
|
953
|
+
self.assertEqual(watchdog_s, 240)
|
|
954
|
+
|
|
955
|
+
def test_no_edit_recovery_guidance_warns_against_artifact_only_progress(self) -> None:
|
|
956
|
+
guidance = _build_no_edit_recovery_guidance(
|
|
957
|
+
"item.completed | still inspecting",
|
|
958
|
+
"node_modules, outputs/data/runtime.log",
|
|
959
|
+
)
|
|
960
|
+
|
|
961
|
+
self.assertIn("node_modules", guidance)
|
|
962
|
+
self.assertIn("do not invent PushPals/autonomy-specific files", guidance)
|
|
963
|
+
self.assertIn("Previous Codex event trace excerpt", guidance)
|
|
964
|
+
|
|
965
|
+
def test_rollout_watchdog_is_earlier_than_web_review_no_edit_watchdog(self) -> None:
|
|
966
|
+
with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S": ""}, clear=False):
|
|
967
|
+
no_edit_s = _resolve_no_edit_watchdog_seconds(
|
|
968
|
+
"Strengthen the repo-native web review path.",
|
|
969
|
+
1200,
|
|
970
|
+
)
|
|
971
|
+
rollout_s = _resolve_rollout_watchdog_seconds(
|
|
972
|
+
"Strengthen the repo-native web review path.",
|
|
973
|
+
1200,
|
|
974
|
+
no_edit_s,
|
|
975
|
+
)
|
|
976
|
+
|
|
977
|
+
self.assertEqual(no_edit_s, 240)
|
|
978
|
+
self.assertEqual(rollout_s, 180)
|
|
979
|
+
|
|
980
|
+
def test_offtrack_rollout_detects_missing_path_and_harness_drift(self) -> None:
|
|
981
|
+
trace = {
|
|
982
|
+
"summaries": [
|
|
983
|
+
"item.completed | The requested test path is not present in this checkout.",
|
|
984
|
+
"item.completed | I am checking the React Native test surface before choosing assertion style.",
|
|
985
|
+
],
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
self.assertIn("missing hinted files", _detect_offtrack_rollout(trace))
|
|
989
|
+
|
|
990
|
+
def test_rollout_recovery_guidance_points_to_repo_native_patch(self) -> None:
|
|
991
|
+
guidance = _build_rollout_recovery_guidance(
|
|
992
|
+
"the worker is spending time on missing hinted files",
|
|
993
|
+
"Codex event trace:\n- missing test path",
|
|
994
|
+
"node_modules",
|
|
995
|
+
)
|
|
996
|
+
|
|
997
|
+
self.assertIn("Rollout coach recovery", guidance)
|
|
998
|
+
self.assertIn("stale hint", guidance)
|
|
999
|
+
self.assertIn("repo-native", guidance)
|
|
1000
|
+
self.assertIn("node_modules", guidance)
|
|
1001
|
+
|
|
1002
|
+
def test_run_codex_task_retries_once_when_rollout_coach_fires(self) -> None:
|
|
1003
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-rollout-coach-") as temp_dir:
|
|
1004
|
+
repo = Path(temp_dir) / "repo"
|
|
1005
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
1006
|
+
(repo / "README.md").write_text("# rollout coach repo\n", encoding="utf-8")
|
|
1007
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1008
|
+
subprocess.run(
|
|
1009
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
1010
|
+
cwd=repo,
|
|
1011
|
+
check=True,
|
|
1012
|
+
capture_output=True,
|
|
1013
|
+
text=True,
|
|
1014
|
+
)
|
|
1015
|
+
subprocess.run(
|
|
1016
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
1017
|
+
cwd=repo,
|
|
1018
|
+
check=True,
|
|
1019
|
+
capture_output=True,
|
|
1020
|
+
text=True,
|
|
1021
|
+
)
|
|
1022
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1023
|
+
subprocess.run(
|
|
1024
|
+
["git", "commit", "-m", "chore: seed rollout coach repo"],
|
|
1025
|
+
cwd=repo,
|
|
1026
|
+
check=True,
|
|
1027
|
+
capture_output=True,
|
|
1028
|
+
text=True,
|
|
1029
|
+
)
|
|
1030
|
+
|
|
1031
|
+
stub_path = Path(temp_dir) / "fake_codex_rollout_coach.py"
|
|
1032
|
+
stub_path.write_text(
|
|
1033
|
+
"\n".join(
|
|
1034
|
+
[
|
|
1035
|
+
"from pathlib import Path",
|
|
1036
|
+
"import sys",
|
|
1037
|
+
"import time",
|
|
1038
|
+
"",
|
|
1039
|
+
"argv = sys.argv[1:]",
|
|
1040
|
+
"last_message_path = None",
|
|
1041
|
+
"for index, arg in enumerate(argv):",
|
|
1042
|
+
" if arg == '--output-last-message' and index + 1 < len(argv):",
|
|
1043
|
+
" last_message_path = argv[index + 1]",
|
|
1044
|
+
" break",
|
|
1045
|
+
"",
|
|
1046
|
+
"prompt = sys.stdin.read()",
|
|
1047
|
+
"if 'Rollout coach recovery' in prompt:",
|
|
1048
|
+
" Path('scripts').mkdir(exist_ok=True)",
|
|
1049
|
+
" Path('scripts/web-review-path.txt').write_text('repo-native patch\\n', encoding='utf-8')",
|
|
1050
|
+
" if last_message_path:",
|
|
1051
|
+
" Path(last_message_path).write_text('Patched after rollout coach guidance.', encoding='utf-8')",
|
|
1052
|
+
" print('item.completed | Patched after rollout coach guidance.', flush=True)",
|
|
1053
|
+
" sys.exit(0)",
|
|
1054
|
+
"",
|
|
1055
|
+
"print('item.completed | The requested test path is not present in this checkout.', flush=True)",
|
|
1056
|
+
"print('item.completed | I am checking the React Native test surface before choosing assertion style.', flush=True)",
|
|
1057
|
+
"time.sleep(10)",
|
|
1058
|
+
]
|
|
1059
|
+
),
|
|
1060
|
+
encoding="utf-8",
|
|
1061
|
+
)
|
|
1062
|
+
|
|
1063
|
+
env_overrides = {
|
|
1064
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
1065
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
1066
|
+
"OPENAI_API_KEY": "pushpals-rollout-coach-test-key",
|
|
1067
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "700",
|
|
1068
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "10",
|
|
1069
|
+
"WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S": "1",
|
|
1070
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
1071
|
+
}
|
|
1072
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
1073
|
+
result = _run_codex_task(
|
|
1074
|
+
str(repo),
|
|
1075
|
+
"Strengthen the repo-native web review path.",
|
|
1076
|
+
[],
|
|
1077
|
+
)
|
|
1078
|
+
|
|
1079
|
+
self.assertTrue(result.get("ok"), result)
|
|
1080
|
+
self.assertEqual(result.get("exitCode"), 0)
|
|
1081
|
+
self.assertIn("Patched after rollout coach guidance", str(result.get("stdout") or ""))
|
|
1082
|
+
self.assertIn("scripts/", str(result.get("stdout") or ""))
|
|
1083
|
+
|
|
1084
|
+
def test_run_codex_task_timeout_reports_artifact_only_changes(self) -> None:
|
|
1085
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-artifact-timeout-") as temp_dir:
|
|
1086
|
+
repo = Path(temp_dir) / "repo"
|
|
1087
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
1088
|
+
(repo / "README.md").write_text("# artifact timeout repo\n", encoding="utf-8")
|
|
1089
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1090
|
+
subprocess.run(
|
|
1091
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
1092
|
+
cwd=repo,
|
|
1093
|
+
check=True,
|
|
1094
|
+
capture_output=True,
|
|
1095
|
+
text=True,
|
|
1096
|
+
)
|
|
1097
|
+
subprocess.run(
|
|
1098
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
1099
|
+
cwd=repo,
|
|
1100
|
+
check=True,
|
|
1101
|
+
capture_output=True,
|
|
1102
|
+
text=True,
|
|
1103
|
+
)
|
|
1104
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1105
|
+
subprocess.run(
|
|
1106
|
+
["git", "commit", "-m", "chore: seed artifact timeout repo"],
|
|
1107
|
+
cwd=repo,
|
|
1108
|
+
check=True,
|
|
1109
|
+
capture_output=True,
|
|
1110
|
+
text=True,
|
|
1111
|
+
)
|
|
1112
|
+
|
|
1113
|
+
stub_path = Path(temp_dir) / "fake_codex_artifact_timeout.py"
|
|
1114
|
+
stub_path.write_text(
|
|
1115
|
+
"\n".join(
|
|
1116
|
+
[
|
|
1117
|
+
"from pathlib import Path",
|
|
1118
|
+
"import sys",
|
|
1119
|
+
"import time",
|
|
1120
|
+
"",
|
|
1121
|
+
"sys.stdin.read()",
|
|
1122
|
+
"Path('node_modules').mkdir(exist_ok=True)",
|
|
1123
|
+
"Path('node_modules/linked.txt').write_text('artifact only\\n', encoding='utf-8')",
|
|
1124
|
+
"print('item.completed | Touched dependency artifact only.', flush=True)",
|
|
1125
|
+
"time.sleep(10)",
|
|
1126
|
+
]
|
|
1127
|
+
),
|
|
1128
|
+
encoding="utf-8",
|
|
1129
|
+
)
|
|
1130
|
+
|
|
1131
|
+
env_overrides = {
|
|
1132
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
1133
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
1134
|
+
"OPENAI_API_KEY": "pushpals-artifact-timeout-test-key",
|
|
1135
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "1",
|
|
1136
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "0",
|
|
1137
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
1138
|
+
}
|
|
1139
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
1140
|
+
result = _run_codex_task(
|
|
1141
|
+
str(repo),
|
|
1142
|
+
"Strengthen the repo-native web review path.",
|
|
1143
|
+
[],
|
|
1144
|
+
)
|
|
1145
|
+
|
|
1146
|
+
self.assertFalse(result.get("ok"), result)
|
|
1147
|
+
self.assertEqual(result.get("exitCode"), 124)
|
|
1148
|
+
self.assertIn("without publishable changes", str(result.get("summary") or ""))
|
|
1149
|
+
self.assertIn("node_modules", str(result.get("stderr") or ""))
|
|
1150
|
+
|
|
928
1151
|
def test_run_codex_task_escalates_wrapper_recovery_and_recovers(self) -> None:
|
|
929
1152
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-wrapper-recovery-") as temp_dir:
|
|
930
1153
|
repo = Path(temp_dir) / "repo"
|
|
@@ -906,6 +906,15 @@ def _build_planning_guidance(params: Dict[str, Any]) -> str:
|
|
|
906
906
|
"Target path hints",
|
|
907
907
|
_string_list(planning.get("targetPaths"), limit=6 if compact_task else 12),
|
|
908
908
|
)
|
|
909
|
+
_append_list_guidance(
|
|
910
|
+
lines,
|
|
911
|
+
"Repo hint preflight diagnostics",
|
|
912
|
+
_string_list(planning.get("repoHintDiagnostics"), limit=8),
|
|
913
|
+
)
|
|
914
|
+
if _string_list(planning.get("repoHintDiagnostics"), limit=1):
|
|
915
|
+
lines.append(
|
|
916
|
+
"- If a hinted path is absent, treat it as stale guidance unless the task explicitly asks to create that path; prefer an existing repo-native owner or nearby test."
|
|
917
|
+
)
|
|
909
918
|
|
|
910
919
|
discovery = planning.get("discovery")
|
|
911
920
|
if isinstance(discovery, dict):
|