@pushpalsdev/cli 1.1.17 → 1.1.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/runtime/sandbox/.pushpals-remotebuddy-fallback.js +159 -5
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +250 -6
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +223 -0
- package/runtime/sandbox/apps/workerpals/src/backends/shared/executor_base.py +9 -0
- package/runtime/sandbox/apps/workerpals/src/docker_executor.ts +47 -20
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +450 -5
|
@@ -32,8 +32,12 @@ from openai_codex_executor import (
|
|
|
32
32
|
_resolve_reasoning_effort,
|
|
33
33
|
_resolve_task_reasoning_effort,
|
|
34
34
|
_build_instruction,
|
|
35
|
+
_build_no_edit_recovery_guidance,
|
|
36
|
+
_build_rollout_recovery_guidance,
|
|
35
37
|
_collect_disallowed_shell_wrapper_rejections,
|
|
36
38
|
_codex_changed_paths,
|
|
39
|
+
_describe_non_publishable_paths,
|
|
40
|
+
_detect_offtrack_rollout,
|
|
37
41
|
_detect_codex_workaround_signal,
|
|
38
42
|
_extract_usage_counts,
|
|
39
43
|
_load_prompt_template,
|
|
@@ -41,6 +45,8 @@ from openai_codex_executor import (
|
|
|
41
45
|
_repo_root_for_prompt_loading,
|
|
42
46
|
_restore_repo_local_codex_files,
|
|
43
47
|
_resolve_codex_command_prefix,
|
|
48
|
+
_resolve_no_edit_watchdog_seconds,
|
|
49
|
+
_resolve_rollout_watchdog_seconds,
|
|
44
50
|
_unwrap_shell_wrapper_command,
|
|
45
51
|
_usage_from_trace_or_estimate,
|
|
46
52
|
)
|
|
@@ -925,6 +931,223 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
925
931
|
self.assertGreaterEqual(len(delta), 2)
|
|
926
932
|
self.assertEqual(effective, [])
|
|
927
933
|
|
|
934
|
+
def test_non_publishable_path_summary_names_artifact_only_dirty_paths(self) -> None:
|
|
935
|
+
changed_paths = [
|
|
936
|
+
"node_modules/react/index.js",
|
|
937
|
+
"outputs/data/runtime.log",
|
|
938
|
+
"src/real-change.ts",
|
|
939
|
+
]
|
|
940
|
+
summary = _describe_non_publishable_paths(changed_paths, ["src/real-change.ts"])
|
|
941
|
+
|
|
942
|
+
self.assertIn("node_modules/react/index.js", summary)
|
|
943
|
+
self.assertIn("outputs/data/runtime.log", summary)
|
|
944
|
+
self.assertNotIn("src/real-change.ts", summary)
|
|
945
|
+
|
|
946
|
+
def test_web_review_tasks_use_faster_no_edit_watchdog(self) -> None:
|
|
947
|
+
with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
|
|
948
|
+
watchdog_s = _resolve_no_edit_watchdog_seconds(
|
|
949
|
+
"Strengthen the repo-native web review path with a compact repo-native patch.",
|
|
950
|
+
1200,
|
|
951
|
+
)
|
|
952
|
+
|
|
953
|
+
self.assertEqual(watchdog_s, 240)
|
|
954
|
+
|
|
955
|
+
def test_no_edit_recovery_guidance_warns_against_artifact_only_progress(self) -> None:
|
|
956
|
+
guidance = _build_no_edit_recovery_guidance(
|
|
957
|
+
"item.completed | still inspecting",
|
|
958
|
+
"node_modules, outputs/data/runtime.log",
|
|
959
|
+
)
|
|
960
|
+
|
|
961
|
+
self.assertIn("node_modules", guidance)
|
|
962
|
+
self.assertIn("do not invent PushPals/autonomy-specific files", guidance)
|
|
963
|
+
self.assertIn("Previous Codex event trace excerpt", guidance)
|
|
964
|
+
|
|
965
|
+
def test_rollout_watchdog_is_earlier_than_web_review_no_edit_watchdog(self) -> None:
|
|
966
|
+
with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S": ""}, clear=False):
|
|
967
|
+
no_edit_s = _resolve_no_edit_watchdog_seconds(
|
|
968
|
+
"Strengthen the repo-native web review path.",
|
|
969
|
+
1200,
|
|
970
|
+
)
|
|
971
|
+
rollout_s = _resolve_rollout_watchdog_seconds(
|
|
972
|
+
"Strengthen the repo-native web review path.",
|
|
973
|
+
1200,
|
|
974
|
+
no_edit_s,
|
|
975
|
+
)
|
|
976
|
+
|
|
977
|
+
self.assertEqual(no_edit_s, 240)
|
|
978
|
+
self.assertEqual(rollout_s, 180)
|
|
979
|
+
|
|
980
|
+
def test_offtrack_rollout_detects_missing_path_and_harness_drift(self) -> None:
|
|
981
|
+
trace = {
|
|
982
|
+
"summaries": [
|
|
983
|
+
"item.completed | The requested test path is not present in this checkout.",
|
|
984
|
+
"item.completed | I am checking the React Native test surface before choosing assertion style.",
|
|
985
|
+
],
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
self.assertIn("missing hinted files", _detect_offtrack_rollout(trace))
|
|
989
|
+
|
|
990
|
+
def test_rollout_recovery_guidance_points_to_repo_native_patch(self) -> None:
|
|
991
|
+
guidance = _build_rollout_recovery_guidance(
|
|
992
|
+
"the worker is spending time on missing hinted files",
|
|
993
|
+
"Codex event trace:\n- missing test path",
|
|
994
|
+
"node_modules",
|
|
995
|
+
)
|
|
996
|
+
|
|
997
|
+
self.assertIn("Rollout coach recovery", guidance)
|
|
998
|
+
self.assertIn("stale hint", guidance)
|
|
999
|
+
self.assertIn("repo-native", guidance)
|
|
1000
|
+
self.assertIn("node_modules", guidance)
|
|
1001
|
+
|
|
1002
|
+
def test_run_codex_task_retries_once_when_rollout_coach_fires(self) -> None:
|
|
1003
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-rollout-coach-") as temp_dir:
|
|
1004
|
+
repo = Path(temp_dir) / "repo"
|
|
1005
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
1006
|
+
(repo / "README.md").write_text("# rollout coach repo\n", encoding="utf-8")
|
|
1007
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1008
|
+
subprocess.run(
|
|
1009
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
1010
|
+
cwd=repo,
|
|
1011
|
+
check=True,
|
|
1012
|
+
capture_output=True,
|
|
1013
|
+
text=True,
|
|
1014
|
+
)
|
|
1015
|
+
subprocess.run(
|
|
1016
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
1017
|
+
cwd=repo,
|
|
1018
|
+
check=True,
|
|
1019
|
+
capture_output=True,
|
|
1020
|
+
text=True,
|
|
1021
|
+
)
|
|
1022
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1023
|
+
subprocess.run(
|
|
1024
|
+
["git", "commit", "-m", "chore: seed rollout coach repo"],
|
|
1025
|
+
cwd=repo,
|
|
1026
|
+
check=True,
|
|
1027
|
+
capture_output=True,
|
|
1028
|
+
text=True,
|
|
1029
|
+
)
|
|
1030
|
+
|
|
1031
|
+
stub_path = Path(temp_dir) / "fake_codex_rollout_coach.py"
|
|
1032
|
+
stub_path.write_text(
|
|
1033
|
+
"\n".join(
|
|
1034
|
+
[
|
|
1035
|
+
"from pathlib import Path",
|
|
1036
|
+
"import sys",
|
|
1037
|
+
"import time",
|
|
1038
|
+
"",
|
|
1039
|
+
"argv = sys.argv[1:]",
|
|
1040
|
+
"last_message_path = None",
|
|
1041
|
+
"for index, arg in enumerate(argv):",
|
|
1042
|
+
" if arg == '--output-last-message' and index + 1 < len(argv):",
|
|
1043
|
+
" last_message_path = argv[index + 1]",
|
|
1044
|
+
" break",
|
|
1045
|
+
"",
|
|
1046
|
+
"prompt = sys.stdin.read()",
|
|
1047
|
+
"if 'Rollout coach recovery' in prompt:",
|
|
1048
|
+
" Path('scripts').mkdir(exist_ok=True)",
|
|
1049
|
+
" Path('scripts/web-review-path.txt').write_text('repo-native patch\\n', encoding='utf-8')",
|
|
1050
|
+
" if last_message_path:",
|
|
1051
|
+
" Path(last_message_path).write_text('Patched after rollout coach guidance.', encoding='utf-8')",
|
|
1052
|
+
" print('item.completed | Patched after rollout coach guidance.', flush=True)",
|
|
1053
|
+
" sys.exit(0)",
|
|
1054
|
+
"",
|
|
1055
|
+
"print('item.completed | The requested test path is not present in this checkout.', flush=True)",
|
|
1056
|
+
"print('item.completed | I am checking the React Native test surface before choosing assertion style.', flush=True)",
|
|
1057
|
+
"time.sleep(10)",
|
|
1058
|
+
]
|
|
1059
|
+
),
|
|
1060
|
+
encoding="utf-8",
|
|
1061
|
+
)
|
|
1062
|
+
|
|
1063
|
+
env_overrides = {
|
|
1064
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
1065
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
1066
|
+
"OPENAI_API_KEY": "pushpals-rollout-coach-test-key",
|
|
1067
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "700",
|
|
1068
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "10",
|
|
1069
|
+
"WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S": "1",
|
|
1070
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
1071
|
+
}
|
|
1072
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
1073
|
+
result = _run_codex_task(
|
|
1074
|
+
str(repo),
|
|
1075
|
+
"Strengthen the repo-native web review path.",
|
|
1076
|
+
[],
|
|
1077
|
+
)
|
|
1078
|
+
|
|
1079
|
+
self.assertTrue(result.get("ok"), result)
|
|
1080
|
+
self.assertEqual(result.get("exitCode"), 0)
|
|
1081
|
+
self.assertIn("Patched after rollout coach guidance", str(result.get("stdout") or ""))
|
|
1082
|
+
self.assertIn("scripts/", str(result.get("stdout") or ""))
|
|
1083
|
+
|
|
1084
|
+
def test_run_codex_task_timeout_reports_artifact_only_changes(self) -> None:
|
|
1085
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-artifact-timeout-") as temp_dir:
|
|
1086
|
+
repo = Path(temp_dir) / "repo"
|
|
1087
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
1088
|
+
(repo / "README.md").write_text("# artifact timeout repo\n", encoding="utf-8")
|
|
1089
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1090
|
+
subprocess.run(
|
|
1091
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
1092
|
+
cwd=repo,
|
|
1093
|
+
check=True,
|
|
1094
|
+
capture_output=True,
|
|
1095
|
+
text=True,
|
|
1096
|
+
)
|
|
1097
|
+
subprocess.run(
|
|
1098
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
1099
|
+
cwd=repo,
|
|
1100
|
+
check=True,
|
|
1101
|
+
capture_output=True,
|
|
1102
|
+
text=True,
|
|
1103
|
+
)
|
|
1104
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1105
|
+
subprocess.run(
|
|
1106
|
+
["git", "commit", "-m", "chore: seed artifact timeout repo"],
|
|
1107
|
+
cwd=repo,
|
|
1108
|
+
check=True,
|
|
1109
|
+
capture_output=True,
|
|
1110
|
+
text=True,
|
|
1111
|
+
)
|
|
1112
|
+
|
|
1113
|
+
stub_path = Path(temp_dir) / "fake_codex_artifact_timeout.py"
|
|
1114
|
+
stub_path.write_text(
|
|
1115
|
+
"\n".join(
|
|
1116
|
+
[
|
|
1117
|
+
"from pathlib import Path",
|
|
1118
|
+
"import sys",
|
|
1119
|
+
"import time",
|
|
1120
|
+
"",
|
|
1121
|
+
"sys.stdin.read()",
|
|
1122
|
+
"Path('node_modules').mkdir(exist_ok=True)",
|
|
1123
|
+
"Path('node_modules/linked.txt').write_text('artifact only\\n', encoding='utf-8')",
|
|
1124
|
+
"print('item.completed | Touched dependency artifact only.', flush=True)",
|
|
1125
|
+
"time.sleep(10)",
|
|
1126
|
+
]
|
|
1127
|
+
),
|
|
1128
|
+
encoding="utf-8",
|
|
1129
|
+
)
|
|
1130
|
+
|
|
1131
|
+
env_overrides = {
|
|
1132
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
1133
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
1134
|
+
"OPENAI_API_KEY": "pushpals-artifact-timeout-test-key",
|
|
1135
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "1",
|
|
1136
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "0",
|
|
1137
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
1138
|
+
}
|
|
1139
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
1140
|
+
result = _run_codex_task(
|
|
1141
|
+
str(repo),
|
|
1142
|
+
"Strengthen the repo-native web review path.",
|
|
1143
|
+
[],
|
|
1144
|
+
)
|
|
1145
|
+
|
|
1146
|
+
self.assertFalse(result.get("ok"), result)
|
|
1147
|
+
self.assertEqual(result.get("exitCode"), 124)
|
|
1148
|
+
self.assertIn("without publishable changes", str(result.get("summary") or ""))
|
|
1149
|
+
self.assertIn("node_modules", str(result.get("stderr") or ""))
|
|
1150
|
+
|
|
928
1151
|
def test_run_codex_task_escalates_wrapper_recovery_and_recovers(self) -> None:
|
|
929
1152
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-wrapper-recovery-") as temp_dir:
|
|
930
1153
|
repo = Path(temp_dir) / "repo"
|
|
@@ -906,6 +906,15 @@ def _build_planning_guidance(params: Dict[str, Any]) -> str:
|
|
|
906
906
|
"Target path hints",
|
|
907
907
|
_string_list(planning.get("targetPaths"), limit=6 if compact_task else 12),
|
|
908
908
|
)
|
|
909
|
+
_append_list_guidance(
|
|
910
|
+
lines,
|
|
911
|
+
"Repo hint preflight diagnostics",
|
|
912
|
+
_string_list(planning.get("repoHintDiagnostics"), limit=8),
|
|
913
|
+
)
|
|
914
|
+
if _string_list(planning.get("repoHintDiagnostics"), limit=1):
|
|
915
|
+
lines.append(
|
|
916
|
+
"- If a hinted path is absent, treat it as stale guidance unless the task explicitly asks to create that path; prefer an existing repo-native owner or nearby test."
|
|
917
|
+
)
|
|
909
918
|
|
|
910
919
|
discovery = planning.get("discovery")
|
|
911
920
|
if isinstance(discovery, dict):
|
|
@@ -1213,16 +1213,7 @@ export class DockerExecutor {
|
|
|
1213
1213
|
);
|
|
1214
1214
|
await this.ensureWorktreeDependencyArtifacts(containerWorktreePath, onLog);
|
|
1215
1215
|
|
|
1216
|
-
const args
|
|
1217
|
-
"exec",
|
|
1218
|
-
"-w",
|
|
1219
|
-
containerWorktreePath,
|
|
1220
|
-
this.warmContainerName,
|
|
1221
|
-
"bun",
|
|
1222
|
-
"run",
|
|
1223
|
-
"/workspace/apps/workerpals/src/job_runner.ts",
|
|
1224
|
-
"--spec-stdin",
|
|
1225
|
-
];
|
|
1216
|
+
const args = this.buildWarmContainerExecArgs(containerWorktreePath);
|
|
1226
1217
|
|
|
1227
1218
|
console.log(
|
|
1228
1219
|
`[DockerExecutor] Running job in warm container: ${this.warmContainerName} (${this.executionConfigSummary()})`,
|
|
@@ -1321,26 +1312,62 @@ export class DockerExecutor {
|
|
|
1321
1312
|
return result;
|
|
1322
1313
|
}
|
|
1323
1314
|
|
|
1315
|
+
private buildWarmContainerExecArgs(containerWorktreePath: string): string[] {
|
|
1316
|
+
return [
|
|
1317
|
+
"exec",
|
|
1318
|
+
"-i",
|
|
1319
|
+
"-w",
|
|
1320
|
+
containerWorktreePath,
|
|
1321
|
+
this.warmContainerName,
|
|
1322
|
+
"bun",
|
|
1323
|
+
"run",
|
|
1324
|
+
"/workspace/apps/workerpals/src/job_runner.ts",
|
|
1325
|
+
"--spec-stdin",
|
|
1326
|
+
];
|
|
1327
|
+
}
|
|
1328
|
+
|
|
1324
1329
|
private async writeJobSpecToStdin(
|
|
1325
1330
|
proc: ReturnType<typeof Bun.spawn>,
|
|
1326
1331
|
base64Spec: string,
|
|
1327
1332
|
): Promise<void> {
|
|
1328
|
-
const stdin = proc.stdin as
|
|
1333
|
+
const stdin = proc.stdin as
|
|
1334
|
+
| WritableStream<Uint8Array>
|
|
1335
|
+
| {
|
|
1336
|
+
write?: (chunk: Uint8Array | string) => unknown;
|
|
1337
|
+
end?: () => unknown;
|
|
1338
|
+
flush?: () => unknown;
|
|
1339
|
+
}
|
|
1340
|
+
| undefined;
|
|
1329
1341
|
if (!stdin) {
|
|
1330
1342
|
throw new Error("docker exec stdin pipe was not available");
|
|
1331
1343
|
}
|
|
1332
|
-
const
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
await writer.close();
|
|
1336
|
-
} catch (err) {
|
|
1344
|
+
const bytes = new TextEncoder().encode(base64Spec);
|
|
1345
|
+
if ("getWriter" in stdin && typeof stdin.getWriter === "function") {
|
|
1346
|
+
const writer = stdin.getWriter();
|
|
1337
1347
|
try {
|
|
1338
|
-
await writer.
|
|
1339
|
-
|
|
1340
|
-
|
|
1348
|
+
await writer.write(bytes);
|
|
1349
|
+
await writer.close();
|
|
1350
|
+
} catch (err) {
|
|
1351
|
+
try {
|
|
1352
|
+
await writer.abort(err);
|
|
1353
|
+
} catch {
|
|
1354
|
+
// Ignore abort failures; the original write error is more useful.
|
|
1355
|
+
}
|
|
1356
|
+
throw err;
|
|
1341
1357
|
}
|
|
1342
|
-
|
|
1358
|
+
return;
|
|
1343
1359
|
}
|
|
1360
|
+
|
|
1361
|
+
if (typeof stdin.write === "function" && typeof stdin.end === "function") {
|
|
1362
|
+
await stdin.write(bytes);
|
|
1363
|
+
if (typeof stdin.flush === "function") {
|
|
1364
|
+
await stdin.flush();
|
|
1365
|
+
}
|
|
1366
|
+
await stdin.end();
|
|
1367
|
+
return;
|
|
1368
|
+
}
|
|
1369
|
+
|
|
1370
|
+
throw new Error("docker exec stdin pipe does not support write/end or getWriter");
|
|
1344
1371
|
}
|
|
1345
1372
|
|
|
1346
1373
|
private async ensureWorktreeDependencyArtifacts(
|