@pushpalsdev/cli 1.1.12 → 1.1.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pushpals-cli.js +15 -5
- package/package.json +1 -1
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +176 -1
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +276 -0
- package/runtime/sandbox/apps/workerpals/src/backends/shared/executor_base.py +64 -7
- package/runtime/sandbox/apps/workerpals/src/common/generic_python_executor.ts +59 -2
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +93 -1
package/dist/pushpals-cli.js
CHANGED
|
@@ -5308,22 +5308,32 @@ function shouldSuppressCliSessionJobLogLine(line) {
|
|
|
5308
5308
|
return true;
|
|
5309
5309
|
if (/^(___RESULT___|__PUSHPALS_OH_RESULT__)\b/.test(text))
|
|
5310
5310
|
return true;
|
|
5311
|
-
if (/^\[DockerExecutor\]\s+Linked worktree dependency artifact/i.test(text))
|
|
5311
|
+
if (/^\[DockerExecutor\]\s+(?:Linked worktree dependency artifact|Capped job timeout|Extended job timeout)/i.test(text)) {
|
|
5312
5312
|
return true;
|
|
5313
|
-
|
|
5313
|
+
}
|
|
5314
|
+
if (/^\[JobRunner\]\s+Starting job\b/i.test(text))
|
|
5315
|
+
return true;
|
|
5316
|
+
if (/^\[QualityGate\]\s+(?:Policy:|Gates:)/i.test(text))
|
|
5317
|
+
return true;
|
|
5318
|
+
if (/^\[(?:Openai_codex|OpenHands|Miniswe)Executor\]\s+(?:Spawning\b|Timeout reached\b|Still running\b|Process did not exit after graceful timeout termination\b)/i.test(text)) {
|
|
5319
|
+
return true;
|
|
5320
|
+
}
|
|
5321
|
+
if (/^\[OpenAICodexExecutor\]\s+(?:Planner guidance|Codex auth mode|ChatGPT auth mode|Starting codex exec|codex exec finished|Codex JSON stream captured|Codex stdout captured|No reasoning-like|Reasoning-like event|Usage observed|Temporarily masked repo-local|Timeout reached after|Process did not exit after graceful timeout termination)/i.test(text)) {
|
|
5314
5322
|
return true;
|
|
5315
5323
|
}
|
|
5316
5324
|
if (/^\[OpenAICodexExecutor\]\s+codex exec still running\b/i.test(text))
|
|
5317
5325
|
return true;
|
|
5326
|
+
if (/^\[OpenAICodexExecutor\]\s+\[codex\]\s+(?:No reasoning-like|Reasoning-like|turn\.failed|turn\.completed|error\s+\|)/i.test(text)) {
|
|
5327
|
+
return true;
|
|
5328
|
+
}
|
|
5318
5329
|
if (/^\[OpenAICodexExecutor\]\s+\[codex\]\s+(?:thread|turn)\.started\b/i.test(text)) {
|
|
5319
5330
|
return true;
|
|
5320
5331
|
}
|
|
5321
5332
|
if (/^\[OpenAICodexExecutor\]\s+\[codex\]\s+item\.started\b/i.test(text))
|
|
5322
5333
|
return true;
|
|
5323
|
-
if (/^\[OpenAICodexExecutor\]\s+\[codex\]\s+item\.completed\
|
|
5324
|
-
return true;
|
|
5325
|
-
if (/^\[OpenAICodexExecutor\]\s+\[codex\]\s+item\.updated\s*$/i.test(text))
|
|
5334
|
+
if (/^\[OpenAICodexExecutor\]\s+\[codex\]\s+item\.(?:completed|updated)\b/i.test(text)) {
|
|
5326
5335
|
return true;
|
|
5336
|
+
}
|
|
5327
5337
|
if (/^\[OpenAICodexExecutor\]\s+\[stderr\].*codex_core::tools::router: error=exec_command failed/i.test(text)) {
|
|
5328
5338
|
return true;
|
|
5329
5339
|
}
|
package/package.json
CHANGED
|
@@ -103,6 +103,9 @@ _VALID_REASONING_EFFORTS = {"low", "medium", "high", "xhigh"}
|
|
|
103
103
|
_MAX_WRAPPER_RECOVERY_ATTEMPTS = 2
|
|
104
104
|
_MAX_WRAPPER_BOOTSTRAP_OUTPUT_CHARS = 1_200
|
|
105
105
|
_MAX_WRAPPER_BOOTSTRAP_TOTAL_CHARS = 5_000
|
|
106
|
+
_MAX_NO_EDIT_RECOVERY_ATTEMPTS = 1
|
|
107
|
+
_DEFAULT_NO_EDIT_WATCHDOG_S = 480
|
|
108
|
+
_SMALL_TASK_NO_EDIT_WATCHDOG_S = 360
|
|
106
109
|
|
|
107
110
|
|
|
108
111
|
def _model_supports_xhigh_reasoning(model: str) -> bool:
|
|
@@ -559,12 +562,99 @@ def _resolve_reasoning_effort(config: OpenAICodexRuntimeConfig, model: str = DEF
|
|
|
559
562
|
return default_effort
|
|
560
563
|
|
|
561
564
|
|
|
565
|
+
def _looks_like_small_task_prompt(prompt: str) -> bool:
|
|
566
|
+
text = str(prompt or "").lower()
|
|
567
|
+
small_markers = (
|
|
568
|
+
"risk=low",
|
|
569
|
+
"small scoped",
|
|
570
|
+
"small or medium repo tasks",
|
|
571
|
+
"compact",
|
|
572
|
+
"low-risk",
|
|
573
|
+
"low risk",
|
|
574
|
+
"route-entry",
|
|
575
|
+
"first-entry",
|
|
576
|
+
"home shell",
|
|
577
|
+
"startup shell",
|
|
578
|
+
"shell polish",
|
|
579
|
+
"visual/affordance",
|
|
580
|
+
)
|
|
581
|
+
heavy_markers = (
|
|
582
|
+
"merge-conflict",
|
|
583
|
+
"merge conflict",
|
|
584
|
+
"rebase",
|
|
585
|
+
"broad refactor",
|
|
586
|
+
"migration",
|
|
587
|
+
"security",
|
|
588
|
+
"architecture",
|
|
589
|
+
"deep debug",
|
|
590
|
+
)
|
|
591
|
+
return any(marker in text for marker in small_markers) and not any(
|
|
592
|
+
marker in text for marker in heavy_markers
|
|
593
|
+
)
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
def _resolve_task_reasoning_effort(
|
|
597
|
+
configured_effort: str,
|
|
598
|
+
prompt: str,
|
|
599
|
+
model: str = DEFAULT_CODEX_MODEL,
|
|
600
|
+
) -> str:
|
|
601
|
+
effort = configured_effort if configured_effort in _VALID_REASONING_EFFORTS else "high"
|
|
602
|
+
if not _looks_like_small_task_prompt(prompt):
|
|
603
|
+
return effort
|
|
604
|
+
if effort == "xhigh":
|
|
605
|
+
log.info(
|
|
606
|
+
f"Routing compact task on model {model!r} from reasoning_effort='xhigh' to 'high' for faster convergence."
|
|
607
|
+
)
|
|
608
|
+
return "high"
|
|
609
|
+
return effort
|
|
610
|
+
|
|
611
|
+
|
|
562
612
|
def _resolve_progress_log_interval_seconds(config: OpenAICodexRuntimeConfig) -> int:
|
|
563
613
|
interval = to_int(config.progress_log_interval_s, 30)
|
|
564
614
|
# Avoid noisy logs (<30s) and stale logs (>120s).
|
|
565
615
|
return max(30, min(120, interval))
|
|
566
616
|
|
|
567
617
|
|
|
618
|
+
def _resolve_no_edit_watchdog_seconds(
|
|
619
|
+
prompt: str,
|
|
620
|
+
communicate_timeout_s: Optional[int],
|
|
621
|
+
) -> Optional[int]:
|
|
622
|
+
if not communicate_timeout_s:
|
|
623
|
+
return None
|
|
624
|
+
|
|
625
|
+
raw = os.environ.get("WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S", "").strip()
|
|
626
|
+
if raw:
|
|
627
|
+
if raw == "0":
|
|
628
|
+
return None
|
|
629
|
+
parsed = _to_positive_int(raw)
|
|
630
|
+
if parsed is None:
|
|
631
|
+
log.info(
|
|
632
|
+
f"Invalid WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S={raw!r}; using default no-edit watchdog."
|
|
633
|
+
)
|
|
634
|
+
else:
|
|
635
|
+
return max(1, min(parsed, max(1, communicate_timeout_s - 1)))
|
|
636
|
+
|
|
637
|
+
if communicate_timeout_s < 600:
|
|
638
|
+
return None
|
|
639
|
+
|
|
640
|
+
default_s = _SMALL_TASK_NO_EDIT_WATCHDOG_S if _looks_like_small_task_prompt(prompt) else _DEFAULT_NO_EDIT_WATCHDOG_S
|
|
641
|
+
return max(120, min(default_s, max(120, communicate_timeout_s - 60)))
|
|
642
|
+
|
|
643
|
+
|
|
644
|
+
def _build_no_edit_recovery_guidance(trace_excerpt: str) -> str:
|
|
645
|
+
lines = [
|
|
646
|
+
"No-edit watchdog recovery: the previous Codex attempt spent too much of the execution budget without producing publishable file changes.",
|
|
647
|
+
"Start from the already inspected context. Do not re-read broad repo topology, route wrappers, or missing test infrastructure unless that is the blocker.",
|
|
648
|
+
"Within the first response/action, edit the smallest behavior-owning file that satisfies the task. If the hinted file is a thin wrapper, patch the owner you already identified.",
|
|
649
|
+
"Use existing tests or a narrow helper/style assertion; do not create broad React Native mocks or a new full render harness for a compact shell/visual polish task.",
|
|
650
|
+
"Run at most one focused fast validation check before final diff review; let PushPals ValidationGate own long required/browser validation.",
|
|
651
|
+
]
|
|
652
|
+
if trace_excerpt:
|
|
653
|
+
lines.append("Previous Codex event trace excerpt:")
|
|
654
|
+
lines.append(trace_excerpt)
|
|
655
|
+
return "\n".join(lines)
|
|
656
|
+
|
|
657
|
+
|
|
568
658
|
def _normalize_auth_mode(raw: str) -> str:
|
|
569
659
|
lowered = (raw or "").strip().lower()
|
|
570
660
|
aliases = {
|
|
@@ -1506,6 +1596,7 @@ def _run_codex_task(
|
|
|
1506
1596
|
*,
|
|
1507
1597
|
wrapper_recovery_attempt: int = 0,
|
|
1508
1598
|
model_compatibility_recovery_attempt: int = 0,
|
|
1599
|
+
no_edit_recovery_attempt: int = 0,
|
|
1509
1600
|
model_override: Optional[str] = None,
|
|
1510
1601
|
baseline_changes: Optional[List[str]] = None,
|
|
1511
1602
|
) -> Dict[str, Any]:
|
|
@@ -1567,10 +1658,14 @@ def _run_codex_task(
|
|
|
1567
1658
|
)
|
|
1568
1659
|
# JSON event output is noisy by default; prefer plain text + output-last-message.
|
|
1569
1660
|
use_json = runtime_config.json_output
|
|
1570
|
-
reasoning_effort = _resolve_reasoning_effort(runtime_config, model)
|
|
1571
1661
|
communicate_timeout_s = _resolve_communicate_timeout_seconds(runtime_config)
|
|
1572
1662
|
effective_supplemental_guidance = _augment_supplemental_guidance(supplemental_guidance)
|
|
1573
1663
|
prompt = _build_instruction(instruction, effective_supplemental_guidance)
|
|
1664
|
+
reasoning_effort = _resolve_task_reasoning_effort(
|
|
1665
|
+
_resolve_reasoning_effort(runtime_config, model),
|
|
1666
|
+
prompt,
|
|
1667
|
+
model,
|
|
1668
|
+
)
|
|
1574
1669
|
baseline_snapshot = list(baseline_changes) if baseline_changes is not None else summarize_git_changes(repo)
|
|
1575
1670
|
|
|
1576
1671
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-") as tmp_dir:
|
|
@@ -1793,7 +1888,18 @@ def _run_codex_task(
|
|
|
1793
1888
|
)
|
|
1794
1889
|
next_progress_at = started_at + float(progress_interval_s)
|
|
1795
1890
|
timed_out = False
|
|
1891
|
+
no_edit_watchdog_fired = False
|
|
1796
1892
|
command_policy_rejection_loop = False
|
|
1893
|
+
no_edit_watchdog_s = (
|
|
1894
|
+
_resolve_no_edit_watchdog_seconds(prompt, communicate_timeout_s)
|
|
1895
|
+
if no_edit_recovery_attempt <= _MAX_NO_EDIT_RECOVERY_ATTEMPTS
|
|
1896
|
+
else None
|
|
1897
|
+
)
|
|
1898
|
+
no_edit_deadline = (
|
|
1899
|
+
started_at + float(no_edit_watchdog_s)
|
|
1900
|
+
if no_edit_watchdog_s is not None
|
|
1901
|
+
else None
|
|
1902
|
+
)
|
|
1797
1903
|
|
|
1798
1904
|
while proc.poll() is None:
|
|
1799
1905
|
now = time.monotonic()
|
|
@@ -1802,6 +1908,17 @@ def _run_codex_task(
|
|
|
1802
1908
|
_terminate_active_child()
|
|
1803
1909
|
break
|
|
1804
1910
|
|
|
1911
|
+
if no_edit_deadline is not None and now >= no_edit_deadline:
|
|
1912
|
+
_, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
|
|
1913
|
+
if not effective_paths:
|
|
1914
|
+
no_edit_watchdog_fired = True
|
|
1915
|
+
log.info(
|
|
1916
|
+
f"No-edit watchdog fired after {int(no_edit_watchdog_s or 0)}s with no publishable file changes; retrying with patch-first guidance."
|
|
1917
|
+
)
|
|
1918
|
+
_terminate_active_child()
|
|
1919
|
+
break
|
|
1920
|
+
no_edit_deadline = None
|
|
1921
|
+
|
|
1805
1922
|
with trace_lock:
|
|
1806
1923
|
wrapper_rejections = to_int(wrapper_rejection_state.get("count"), 0)
|
|
1807
1924
|
if wrapper_rejections >= 3:
|
|
@@ -1869,6 +1986,34 @@ def _run_codex_task(
|
|
|
1869
1986
|
continue
|
|
1870
1987
|
rejected_shell_wrappers.append(text)
|
|
1871
1988
|
|
|
1989
|
+
if no_edit_watchdog_fired:
|
|
1990
|
+
if no_edit_recovery_attempt < _MAX_NO_EDIT_RECOVERY_ATTEMPTS:
|
|
1991
|
+
retry_guidance = [
|
|
1992
|
+
*supplemental_guidance,
|
|
1993
|
+
_build_no_edit_recovery_guidance(trace_excerpt),
|
|
1994
|
+
]
|
|
1995
|
+
return _run_codex_task(
|
|
1996
|
+
repo,
|
|
1997
|
+
instruction,
|
|
1998
|
+
retry_guidance,
|
|
1999
|
+
wrapper_recovery_attempt=wrapper_recovery_attempt,
|
|
2000
|
+
model_compatibility_recovery_attempt=model_compatibility_recovery_attempt,
|
|
2001
|
+
no_edit_recovery_attempt=no_edit_recovery_attempt + 1,
|
|
2002
|
+
model_override=model_override,
|
|
2003
|
+
baseline_changes=baseline_snapshot,
|
|
2004
|
+
)
|
|
2005
|
+
detail = "Codex spent too much of the execution budget without producing publishable file changes."
|
|
2006
|
+
if trace_excerpt:
|
|
2007
|
+
detail = f"{detail}\n{trace_excerpt}"
|
|
2008
|
+
return {
|
|
2009
|
+
"ok": False,
|
|
2010
|
+
"summary": "openai_codex made no publishable changes before the no-edit watchdog",
|
|
2011
|
+
"stdout": _truncate(stdout),
|
|
2012
|
+
"stderr": _truncate(f"{detail}\n{stderr}".strip()),
|
|
2013
|
+
"exitCode": 124,
|
|
2014
|
+
"usage": usage,
|
|
2015
|
+
}
|
|
2016
|
+
|
|
1872
2017
|
if timed_out:
|
|
1873
2018
|
detail = (
|
|
1874
2019
|
f"codex exec timed out after {communicate_timeout_s}s"
|
|
@@ -1877,6 +2022,34 @@ def _run_codex_task(
|
|
|
1877
2022
|
)
|
|
1878
2023
|
if trace_excerpt:
|
|
1879
2024
|
detail = f"{detail}\n{trace_excerpt}"
|
|
2025
|
+
_, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
|
|
2026
|
+
if effective_paths:
|
|
2027
|
+
last_message = _read_text_if_exists(last_message_path)
|
|
2028
|
+
log_git_status(repo, log)
|
|
2029
|
+
prefix = (
|
|
2030
|
+
"Codex reached the execution timeout after producing publishable file "
|
|
2031
|
+
"changes. Returning the partial patch to QualityGate/ValidationGate "
|
|
2032
|
+
"instead of discarding it; any incomplete edit will be caught by the "
|
|
2033
|
+
"normal gates or revision loop."
|
|
2034
|
+
)
|
|
2035
|
+
return {
|
|
2036
|
+
"ok": True,
|
|
2037
|
+
"summary": (
|
|
2038
|
+
f"openai_codex timed out after modifying {len(effective_paths)} "
|
|
2039
|
+
"publishable file(s)"
|
|
2040
|
+
),
|
|
2041
|
+
"stdout": _truncate(
|
|
2042
|
+
_build_success_stdout(
|
|
2043
|
+
effective_paths=effective_paths,
|
|
2044
|
+
last_message=last_message,
|
|
2045
|
+
trace_excerpt=trace_excerpt,
|
|
2046
|
+
prefix=prefix,
|
|
2047
|
+
)
|
|
2048
|
+
),
|
|
2049
|
+
"stderr": _truncate(f"{detail}\n{stderr}".strip()),
|
|
2050
|
+
"exitCode": 0,
|
|
2051
|
+
"usage": usage,
|
|
2052
|
+
}
|
|
1880
2053
|
return {
|
|
1881
2054
|
"ok": False,
|
|
1882
2055
|
"summary": "openai_codex execution timed out",
|
|
@@ -1975,6 +2148,7 @@ def _run_codex_task(
|
|
|
1975
2148
|
],
|
|
1976
2149
|
wrapper_recovery_attempt=wrapper_recovery_attempt + 1,
|
|
1977
2150
|
model_compatibility_recovery_attempt=model_compatibility_recovery_attempt,
|
|
2151
|
+
no_edit_recovery_attempt=no_edit_recovery_attempt,
|
|
1978
2152
|
model_override=model_override,
|
|
1979
2153
|
baseline_changes=baseline_snapshot,
|
|
1980
2154
|
)
|
|
@@ -2057,6 +2231,7 @@ def _run_codex_task(
|
|
|
2057
2231
|
effective_supplemental_guidance,
|
|
2058
2232
|
wrapper_recovery_attempt=wrapper_recovery_attempt,
|
|
2059
2233
|
model_compatibility_recovery_attempt=model_compatibility_recovery_attempt + 1,
|
|
2234
|
+
no_edit_recovery_attempt=no_edit_recovery_attempt,
|
|
2060
2235
|
model_override=LEGACY_CODEX_MODEL_FALLBACK,
|
|
2061
2236
|
baseline_changes=baseline_snapshot,
|
|
2062
2237
|
)
|
|
@@ -30,6 +30,7 @@ from openai_codex_executor import (
|
|
|
30
30
|
_build_wrapper_recovery_guidance,
|
|
31
31
|
_run_codex_task,
|
|
32
32
|
_resolve_reasoning_effort,
|
|
33
|
+
_resolve_task_reasoning_effort,
|
|
33
34
|
_build_instruction,
|
|
34
35
|
_collect_disallowed_shell_wrapper_rejections,
|
|
35
36
|
_codex_changed_paths,
|
|
@@ -202,6 +203,24 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
202
203
|
)
|
|
203
204
|
self.assertEqual(_resolve_reasoning_effort(cfg, model="gpt-6-preview"), "xhigh")
|
|
204
205
|
|
|
206
|
+
def test_task_reasoning_effort_routes_compact_shell_tasks_to_high(self) -> None:
|
|
207
|
+
prompt = (
|
|
208
|
+
"Task planning contract from PushPals:\n"
|
|
209
|
+
"- Planning summary: intent=code_change, risk=low, priority=normal\n"
|
|
210
|
+
"- Route-entry/shell task rule: inspect the hinted route wrapper, then patch the owner.\n"
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
self.assertEqual(_resolve_task_reasoning_effort("xhigh", prompt, "gpt-5.5"), "high")
|
|
214
|
+
self.assertEqual(_resolve_task_reasoning_effort("high", prompt, "gpt-5.5"), "high")
|
|
215
|
+
self.assertEqual(
|
|
216
|
+
_resolve_task_reasoning_effort(
|
|
217
|
+
"xhigh",
|
|
218
|
+
"Merge-conflict rebase task with risk=low wording in reviewer text.",
|
|
219
|
+
"gpt-5.5",
|
|
220
|
+
),
|
|
221
|
+
"xhigh",
|
|
222
|
+
)
|
|
223
|
+
|
|
205
224
|
def test_runtime_config_prefers_explicit_config_dir_override(self) -> None:
|
|
206
225
|
import executor_base
|
|
207
226
|
|
|
@@ -344,6 +363,43 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
344
363
|
self.assertIn("prefer pure helper/state/style-prop tests", guidance)
|
|
345
364
|
self.assertIn("full React Native/component render regression", guidance)
|
|
346
365
|
|
|
366
|
+
def test_parse_payload_adds_route_shell_convergence_guidance(self) -> None:
|
|
367
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-shell-guidance-") as temp_dir:
|
|
368
|
+
repo = Path(temp_dir) / "repo"
|
|
369
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
370
|
+
payload = {
|
|
371
|
+
"kind": "task.execute",
|
|
372
|
+
"repo": str(repo),
|
|
373
|
+
"params": {
|
|
374
|
+
"instruction": (
|
|
375
|
+
"Polish the first-entry shell. Start with app/_layout.tsx and "
|
|
376
|
+
"app/index.tsx, then tighten the home/settings route-entry affordance."
|
|
377
|
+
),
|
|
378
|
+
"schemaVersion": 2,
|
|
379
|
+
"planning": {
|
|
380
|
+
"intent": "code_change",
|
|
381
|
+
"riskLevel": "low",
|
|
382
|
+
"queuePriority": "normal",
|
|
383
|
+
"queueWaitBudgetMs": 90_000,
|
|
384
|
+
"executionBudgetMs": 1_200_000,
|
|
385
|
+
"finalizationBudgetMs": 120_000,
|
|
386
|
+
"scope": {"readAnywhere": True, "writeAllowed": True},
|
|
387
|
+
"targetPaths": ["app/_layout.tsx", "app/index.tsx"],
|
|
388
|
+
"acceptanceCriteria": ["Home shell feels coherent with the match UI"],
|
|
389
|
+
},
|
|
390
|
+
},
|
|
391
|
+
}
|
|
392
|
+
encoded = base64.b64encode(json.dumps(payload).encode("utf-8")).decode("ascii")
|
|
393
|
+
|
|
394
|
+
task = parse_task_execute_payload(["executor", encoded], logger=Logger("[test]"))
|
|
395
|
+
guidance = "\n".join(task.supplemental_guidance)
|
|
396
|
+
|
|
397
|
+
self.assertIn("Route-entry/shell task rule", guidance)
|
|
398
|
+
self.assertIn("route is thin", guidance)
|
|
399
|
+
self.assertIn("Do not keep re-reading navigation topology", guidance)
|
|
400
|
+
self.assertIn("missing test infrastructure", guidance)
|
|
401
|
+
self.assertIn("make one small visual/affordance patch", guidance)
|
|
402
|
+
|
|
347
403
|
def test_detects_codex_workaround_signals(self) -> None:
|
|
348
404
|
signal = _detect_codex_workaround_signal(
|
|
349
405
|
"Adapting test to avoid external Codex calls because Codex CLI isn't available in this environment.",
|
|
@@ -610,6 +666,226 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
610
666
|
self.assertIn("src/", str(result.get("stdout") or ""))
|
|
611
667
|
self.assertNotIn("Recovered after Codex attempts", str(result.get("stdout") or ""))
|
|
612
668
|
|
|
669
|
+
def test_run_codex_task_hands_changed_worktree_to_gates_after_timeout(self) -> None:
|
|
670
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-timeout-changed-") as temp_dir:
|
|
671
|
+
repo = Path(temp_dir) / "repo"
|
|
672
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
673
|
+
(repo / "README.md").write_text("# timeout changed repo\n", encoding="utf-8")
|
|
674
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
675
|
+
subprocess.run(
|
|
676
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
677
|
+
cwd=repo,
|
|
678
|
+
check=True,
|
|
679
|
+
capture_output=True,
|
|
680
|
+
text=True,
|
|
681
|
+
)
|
|
682
|
+
subprocess.run(
|
|
683
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
684
|
+
cwd=repo,
|
|
685
|
+
check=True,
|
|
686
|
+
capture_output=True,
|
|
687
|
+
text=True,
|
|
688
|
+
)
|
|
689
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
690
|
+
subprocess.run(
|
|
691
|
+
["git", "commit", "-m", "chore: seed timeout changed repo"],
|
|
692
|
+
cwd=repo,
|
|
693
|
+
check=True,
|
|
694
|
+
capture_output=True,
|
|
695
|
+
text=True,
|
|
696
|
+
)
|
|
697
|
+
|
|
698
|
+
stub_path = Path(temp_dir) / "fake_codex_timeout_changed.py"
|
|
699
|
+
stub_path.write_text(
|
|
700
|
+
"\n".join(
|
|
701
|
+
[
|
|
702
|
+
"from pathlib import Path",
|
|
703
|
+
"import sys",
|
|
704
|
+
"import time",
|
|
705
|
+
"",
|
|
706
|
+
"argv = sys.argv[1:]",
|
|
707
|
+
"last_message_path = None",
|
|
708
|
+
"for index, arg in enumerate(argv):",
|
|
709
|
+
" if arg == '--output-last-message' and index + 1 < len(argv):",
|
|
710
|
+
" last_message_path = argv[index + 1]",
|
|
711
|
+
" break",
|
|
712
|
+
"",
|
|
713
|
+
"sys.stdin.read()",
|
|
714
|
+
"Path('src').mkdir(exist_ok=True)",
|
|
715
|
+
"Path('src/timeout-patch.txt').write_text('changed before timeout\\n', encoding='utf-8')",
|
|
716
|
+
"if last_message_path:",
|
|
717
|
+
" Path(last_message_path).write_text('Made a small patch before timeout.', encoding='utf-8')",
|
|
718
|
+
"print('item.completed | Made a small patch before timeout.', flush=True)",
|
|
719
|
+
"time.sleep(5)",
|
|
720
|
+
]
|
|
721
|
+
),
|
|
722
|
+
encoding="utf-8",
|
|
723
|
+
)
|
|
724
|
+
|
|
725
|
+
env_overrides = {
|
|
726
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
727
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
728
|
+
"OPENAI_API_KEY": "pushpals-timeout-changed-test-key",
|
|
729
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "1",
|
|
730
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
731
|
+
}
|
|
732
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
733
|
+
result = _run_codex_task(
|
|
734
|
+
str(repo),
|
|
735
|
+
"Create a small file, then continue thinking too long.",
|
|
736
|
+
[],
|
|
737
|
+
)
|
|
738
|
+
|
|
739
|
+
self.assertTrue(result.get("ok"), result)
|
|
740
|
+
self.assertEqual(result.get("exitCode"), 0)
|
|
741
|
+
self.assertIn("timed out after modifying", str(result.get("summary") or ""))
|
|
742
|
+
self.assertIn("partial patch", str(result.get("stdout") or "").lower())
|
|
743
|
+
self.assertIn("src/", str(result.get("stdout") or ""))
|
|
744
|
+
self.assertIn("Made a small patch before timeout", str(result.get("stdout") or ""))
|
|
745
|
+
|
|
746
|
+
def test_run_codex_task_retries_once_when_no_edit_watchdog_fires(self) -> None:
|
|
747
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-watchdog-") as temp_dir:
|
|
748
|
+
repo = Path(temp_dir) / "repo"
|
|
749
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
750
|
+
(repo / "README.md").write_text("# no edit watchdog repo\n", encoding="utf-8")
|
|
751
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
752
|
+
subprocess.run(
|
|
753
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
754
|
+
cwd=repo,
|
|
755
|
+
check=True,
|
|
756
|
+
capture_output=True,
|
|
757
|
+
text=True,
|
|
758
|
+
)
|
|
759
|
+
subprocess.run(
|
|
760
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
761
|
+
cwd=repo,
|
|
762
|
+
check=True,
|
|
763
|
+
capture_output=True,
|
|
764
|
+
text=True,
|
|
765
|
+
)
|
|
766
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
767
|
+
subprocess.run(
|
|
768
|
+
["git", "commit", "-m", "chore: seed no-edit watchdog repo"],
|
|
769
|
+
cwd=repo,
|
|
770
|
+
check=True,
|
|
771
|
+
capture_output=True,
|
|
772
|
+
text=True,
|
|
773
|
+
)
|
|
774
|
+
|
|
775
|
+
stub_path = Path(temp_dir) / "fake_codex_no_edit_watchdog.py"
|
|
776
|
+
stub_path.write_text(
|
|
777
|
+
"\n".join(
|
|
778
|
+
[
|
|
779
|
+
"from pathlib import Path",
|
|
780
|
+
"import sys",
|
|
781
|
+
"import time",
|
|
782
|
+
"",
|
|
783
|
+
"argv = sys.argv[1:]",
|
|
784
|
+
"last_message_path = None",
|
|
785
|
+
"for index, arg in enumerate(argv):",
|
|
786
|
+
" if arg == '--output-last-message' and index + 1 < len(argv):",
|
|
787
|
+
" last_message_path = argv[index + 1]",
|
|
788
|
+
" break",
|
|
789
|
+
"",
|
|
790
|
+
"prompt = sys.stdin.read()",
|
|
791
|
+
"if 'No-edit watchdog recovery' in prompt:",
|
|
792
|
+
" Path('src').mkdir(exist_ok=True)",
|
|
793
|
+
" Path('src/no-edit-retry.txt').write_text('patched on retry\\n', encoding='utf-8')",
|
|
794
|
+
" if last_message_path:",
|
|
795
|
+
" Path(last_message_path).write_text('Patched immediately after no-edit recovery.', encoding='utf-8')",
|
|
796
|
+
" print('item.completed | Patched immediately after no-edit recovery.', flush=True)",
|
|
797
|
+
" sys.exit(0)",
|
|
798
|
+
"",
|
|
799
|
+
"print('item.completed | Still inspecting route wrappers.', flush=True)",
|
|
800
|
+
"time.sleep(10)",
|
|
801
|
+
]
|
|
802
|
+
),
|
|
803
|
+
encoding="utf-8",
|
|
804
|
+
)
|
|
805
|
+
|
|
806
|
+
env_overrides = {
|
|
807
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
808
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
809
|
+
"OPENAI_API_KEY": "pushpals-no-edit-watchdog-test-key",
|
|
810
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
|
|
811
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
|
|
812
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
813
|
+
}
|
|
814
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
815
|
+
result = _run_codex_task(
|
|
816
|
+
str(repo),
|
|
817
|
+
"Polish the first-entry home shell with a compact visual patch.",
|
|
818
|
+
[],
|
|
819
|
+
)
|
|
820
|
+
|
|
821
|
+
self.assertTrue(result.get("ok"), result)
|
|
822
|
+
self.assertEqual(result.get("exitCode"), 0)
|
|
823
|
+
self.assertIn("Patched immediately after no-edit recovery", str(result.get("stdout") or ""))
|
|
824
|
+
self.assertIn("src/", str(result.get("stdout") or ""))
|
|
825
|
+
|
|
826
|
+
def test_run_codex_task_recovery_attempt_is_still_guarded_by_no_edit_watchdog(self) -> None:
|
|
827
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-watchdog-fail-") as temp_dir:
|
|
828
|
+
repo = Path(temp_dir) / "repo"
|
|
829
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
830
|
+
(repo / "README.md").write_text("# no edit watchdog failure repo\n", encoding="utf-8")
|
|
831
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
832
|
+
subprocess.run(
|
|
833
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
834
|
+
cwd=repo,
|
|
835
|
+
check=True,
|
|
836
|
+
capture_output=True,
|
|
837
|
+
text=True,
|
|
838
|
+
)
|
|
839
|
+
subprocess.run(
|
|
840
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
841
|
+
cwd=repo,
|
|
842
|
+
check=True,
|
|
843
|
+
capture_output=True,
|
|
844
|
+
text=True,
|
|
845
|
+
)
|
|
846
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
847
|
+
subprocess.run(
|
|
848
|
+
["git", "commit", "-m", "chore: seed no-edit watchdog failure repo"],
|
|
849
|
+
cwd=repo,
|
|
850
|
+
check=True,
|
|
851
|
+
capture_output=True,
|
|
852
|
+
text=True,
|
|
853
|
+
)
|
|
854
|
+
|
|
855
|
+
stub_path = Path(temp_dir) / "fake_codex_no_edit_watchdog_fail.py"
|
|
856
|
+
stub_path.write_text(
|
|
857
|
+
"\n".join(
|
|
858
|
+
[
|
|
859
|
+
"import sys",
|
|
860
|
+
"import time",
|
|
861
|
+
"",
|
|
862
|
+
"sys.stdin.read()",
|
|
863
|
+
"print('item.completed | Still inspecting, no patch yet.', flush=True)",
|
|
864
|
+
"time.sleep(10)",
|
|
865
|
+
]
|
|
866
|
+
),
|
|
867
|
+
encoding="utf-8",
|
|
868
|
+
)
|
|
869
|
+
|
|
870
|
+
env_overrides = {
|
|
871
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
872
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
873
|
+
"OPENAI_API_KEY": "pushpals-no-edit-watchdog-fail-test-key",
|
|
874
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
|
|
875
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
|
|
876
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
877
|
+
}
|
|
878
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
879
|
+
result = _run_codex_task(
|
|
880
|
+
str(repo),
|
|
881
|
+
"Polish the first-entry home shell with a compact visual patch.",
|
|
882
|
+
[],
|
|
883
|
+
)
|
|
884
|
+
|
|
885
|
+
self.assertFalse(result.get("ok"), result)
|
|
886
|
+
self.assertEqual(result.get("exitCode"), 124)
|
|
887
|
+
self.assertIn("no publishable changes", str(result.get("summary") or ""))
|
|
888
|
+
|
|
613
889
|
def test_codex_changed_paths_filters_dependency_artifacts_from_publishable_delta(self) -> None:
|
|
614
890
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-artifact-delta-") as temp_dir:
|
|
615
891
|
repo = Path(temp_dir) / "repo"
|
|
@@ -787,14 +787,58 @@ def _looks_like_visual_derivation_task(params: Dict[str, Any]) -> bool:
|
|
|
787
787
|
return any(marker in text for marker in visual_markers)
|
|
788
788
|
|
|
789
789
|
|
|
790
|
+
def _looks_like_route_shell_task(params: Dict[str, Any]) -> bool:
|
|
791
|
+
text = _joined_task_text(params)
|
|
792
|
+
shell_markers = (
|
|
793
|
+
"route-entry",
|
|
794
|
+
"route entry",
|
|
795
|
+
"first-entry",
|
|
796
|
+
"first entry",
|
|
797
|
+
"startup shell",
|
|
798
|
+
"home shell",
|
|
799
|
+
"entry route",
|
|
800
|
+
"shell/navigation",
|
|
801
|
+
"app/_layout",
|
|
802
|
+
"app/index",
|
|
803
|
+
"homescreen",
|
|
804
|
+
"home screen",
|
|
805
|
+
"settingsscreen",
|
|
806
|
+
"settings screen",
|
|
807
|
+
"shopscreen",
|
|
808
|
+
"shop screen",
|
|
809
|
+
"help",
|
|
810
|
+
"game-over",
|
|
811
|
+
"game over",
|
|
812
|
+
"match-start",
|
|
813
|
+
"match start",
|
|
814
|
+
"return affordance",
|
|
815
|
+
)
|
|
816
|
+
return any(marker in text for marker in shell_markers)
|
|
817
|
+
|
|
818
|
+
|
|
790
819
|
def _build_efficiency_guidance(params: Dict[str, Any]) -> str:
|
|
791
820
|
lines: List[str] = [
|
|
792
821
|
"Worker speed/convergence contract from PushPals:",
|
|
793
822
|
"- Target useful completion in roughly 20 minutes for small or medium repo tasks; optimize for the smallest coherent patch over exhaustive exploration.",
|
|
794
|
-
"- Phase soft budgets: discovery <= 5m, editing <= 10m, focused validation <= 5m, final diff review <= 2m. If a phase runs long, narrow scope rather than expanding the harness.",
|
|
795
|
-
"-
|
|
823
|
+
"- Phase soft budgets: discovery <= 3m for small scoped tasks and <= 5m otherwise, editing <= 10m, focused validation <= 5m, final diff review <= 2m. If a phase runs long, narrow scope rather than expanding the harness.",
|
|
824
|
+
"- No-edit checkpoint: if you have not made a patch after identifying the behavior-owning file, stop discovering and edit that file now. Do not spend the execution budget proving every adjacent assumption first.",
|
|
825
|
+
"- Discovery command budget: for compact tasks, use at most 5-8 targeted read/search commands before editing. If that is not enough, state the blocker and patch the best behavior owner rather than widening discovery.",
|
|
796
826
|
]
|
|
797
|
-
|
|
827
|
+
route_shell_task = _looks_like_route_shell_task(params)
|
|
828
|
+
visual_task = _looks_like_visual_derivation_task(params)
|
|
829
|
+
if route_shell_task or visual_task:
|
|
830
|
+
lines.append(
|
|
831
|
+
"- Test-harness soft budget: if setting up a focused test requires multiple new shared mocks, broad React Native shims, or repeated import fixes, stop building that harness and switch to smaller pure helper/state/style coverage.",
|
|
832
|
+
)
|
|
833
|
+
if route_shell_task:
|
|
834
|
+
lines.extend(
|
|
835
|
+
[
|
|
836
|
+
"- Route-entry/shell task rule: inspect the hinted route wrapper, then move immediately to the behavior-owning shell component when the route is thin. Do not keep re-reading navigation topology once the owner is found.",
|
|
837
|
+
"- Compact shell polish rule: make one small visual/affordance patch before chasing missing test infrastructure. If a referenced React Native mock or app/__tests__ path is absent, use existing nearby tests or a focused style/helper assertion instead of creating a broad render harness.",
|
|
838
|
+
"- Shell task deadline: by the first clear owner hypothesis, choose the home/settings/shop/help/game-over surface and patch it; ValidationGate can run long browser checks after your focused validation.",
|
|
839
|
+
]
|
|
840
|
+
)
|
|
841
|
+
if visual_task:
|
|
798
842
|
lines.extend(
|
|
799
843
|
[
|
|
800
844
|
"- Visual/rendering task rule: prefer pure helper/state/style-prop tests for derived visual cues. Use a full React Native/component render regression only if the repo already has a stable harness for that exact surface.",
|
|
@@ -809,6 +853,7 @@ def _build_planning_guidance(params: Dict[str, Any]) -> str:
|
|
|
809
853
|
if not isinstance(planning, dict):
|
|
810
854
|
return ""
|
|
811
855
|
|
|
856
|
+
compact_task = _looks_like_route_shell_task(params) or _looks_like_visual_derivation_task(params)
|
|
812
857
|
lines: List[str] = ["Task planning contract from PushPals:"]
|
|
813
858
|
intent = to_single_line(planning.get("intent"), 80)
|
|
814
859
|
risk = to_single_line(planning.get("riskLevel"), 80)
|
|
@@ -856,17 +901,29 @@ def _build_planning_guidance(params: Dict[str, Any]) -> str:
|
|
|
856
901
|
forbidden = _string_list(scope.get("forbiddenGlobs"), limit=8)
|
|
857
902
|
_append_list_guidance(lines, "Forbidden path hints", forbidden)
|
|
858
903
|
|
|
859
|
-
_append_list_guidance(
|
|
904
|
+
_append_list_guidance(
|
|
905
|
+
lines,
|
|
906
|
+
"Target path hints",
|
|
907
|
+
_string_list(planning.get("targetPaths"), limit=6 if compact_task else 12),
|
|
908
|
+
)
|
|
860
909
|
|
|
861
910
|
discovery = planning.get("discovery")
|
|
862
911
|
if isinstance(discovery, dict):
|
|
863
912
|
_append_list_guidance(
|
|
864
913
|
lines,
|
|
865
914
|
"Suggested discovery commands",
|
|
866
|
-
_string_list(discovery.get("ripgrepQueries"), limit=8),
|
|
915
|
+
_string_list(discovery.get("ripgrepQueries"), limit=4 if compact_task else 8),
|
|
916
|
+
)
|
|
917
|
+
_append_list_guidance(
|
|
918
|
+
lines,
|
|
919
|
+
"Likely directories",
|
|
920
|
+
_string_list(discovery.get("likelyDirs"), limit=4 if compact_task else 8),
|
|
921
|
+
)
|
|
922
|
+
_append_list_guidance(
|
|
923
|
+
lines,
|
|
924
|
+
"Search keywords",
|
|
925
|
+
_string_list(discovery.get("keywords"), limit=8 if compact_task else 12),
|
|
867
926
|
)
|
|
868
|
-
_append_list_guidance(lines, "Likely directories", _string_list(discovery.get("likelyDirs"), limit=8))
|
|
869
|
-
_append_list_guidance(lines, "Search keywords", _string_list(discovery.get("keywords"), limit=12))
|
|
870
927
|
|
|
871
928
|
_append_list_guidance(
|
|
872
929
|
lines,
|
|
@@ -28,6 +28,8 @@ interface GenericPythonExecutorConfig {
|
|
|
28
28
|
capTimeoutToExecutionBudget?: boolean;
|
|
29
29
|
}
|
|
30
30
|
|
|
31
|
+
const BACKEND_TIMEOUT_RESULT_GRACE_MS = 30_000;
|
|
32
|
+
|
|
31
33
|
function estimateTokensFromText(text: string): number {
|
|
32
34
|
return Math.max(0, Math.ceil(String(text ?? "").length / 3));
|
|
33
35
|
}
|
|
@@ -123,6 +125,7 @@ function resolveRuntimeSettings(
|
|
|
123
125
|
export function resolveGenericPythonExecutorTimeoutMs(params: {
|
|
124
126
|
configuredTimeoutMs: number;
|
|
125
127
|
executionBudgetMs?: number | null;
|
|
128
|
+
finalizationBudgetMs?: number | null;
|
|
126
129
|
capTimeoutToExecutionBudget?: boolean;
|
|
127
130
|
}): number {
|
|
128
131
|
const configuredTimeoutMs = Math.max(10_000, Math.floor(params.configuredTimeoutMs));
|
|
@@ -130,12 +133,49 @@ export function resolveGenericPythonExecutorTimeoutMs(params: {
|
|
|
130
133
|
typeof params.executionBudgetMs === "number" && Number.isFinite(params.executionBudgetMs)
|
|
131
134
|
? Math.max(10_000, Math.floor(params.executionBudgetMs))
|
|
132
135
|
: null;
|
|
136
|
+
const finalizationBudgetMs =
|
|
137
|
+
typeof params.finalizationBudgetMs === "number" && Number.isFinite(params.finalizationBudgetMs)
|
|
138
|
+
? Math.max(0, Math.floor(params.finalizationBudgetMs))
|
|
139
|
+
: 0;
|
|
133
140
|
if (executionBudgetMs != null && params.capTimeoutToExecutionBudget !== false) {
|
|
134
|
-
return Math.min(configuredTimeoutMs, executionBudgetMs);
|
|
141
|
+
return Math.min(configuredTimeoutMs, executionBudgetMs + finalizationBudgetMs);
|
|
135
142
|
}
|
|
136
143
|
return configuredTimeoutMs;
|
|
137
144
|
}
|
|
138
145
|
|
|
146
|
+
export function resolveGenericPythonExecutorChildTimeoutMs(params: {
|
|
147
|
+
backendName: string;
|
|
148
|
+
hostTimeoutMs: number;
|
|
149
|
+
executionBudgetMs?: number | null;
|
|
150
|
+
}): number | null {
|
|
151
|
+
const hostTimeoutMs = Math.max(10_000, Math.floor(params.hostTimeoutMs));
|
|
152
|
+
if (params.backendName !== "openai_codex") return null;
|
|
153
|
+
const executionBudgetMs =
|
|
154
|
+
typeof params.executionBudgetMs === "number" && Number.isFinite(params.executionBudgetMs)
|
|
155
|
+
? Math.max(10_000, Math.floor(params.executionBudgetMs))
|
|
156
|
+
: null;
|
|
157
|
+
const childBudgetMs =
|
|
158
|
+
executionBudgetMs == null ? hostTimeoutMs : Math.min(hostTimeoutMs, executionBudgetMs);
|
|
159
|
+
const graceMs = Math.min(
|
|
160
|
+
BACKEND_TIMEOUT_RESULT_GRACE_MS,
|
|
161
|
+
Math.max(2_000, Math.floor(childBudgetMs / 10)),
|
|
162
|
+
);
|
|
163
|
+
return Math.max(1_000, childBudgetMs - graceMs);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
export function resolveGenericPythonExecutorChildTimeoutEnv(params: {
|
|
167
|
+
backendName: string;
|
|
168
|
+
hostTimeoutMs: number;
|
|
169
|
+
executionBudgetMs?: number | null;
|
|
170
|
+
}): Record<string, string> {
|
|
171
|
+
const childTimeoutMs = resolveGenericPythonExecutorChildTimeoutMs(params);
|
|
172
|
+
if (childTimeoutMs == null) return {};
|
|
173
|
+
return {
|
|
174
|
+
WORKERPALS_OPENAI_CODEX_TIMEOUT_MS: String(childTimeoutMs),
|
|
175
|
+
WORKERPALS_OPENAI_CODEX_TIMEOUT_S: String(Math.max(1, Math.floor(childTimeoutMs / 1000))),
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
|
|
139
179
|
function toSnakeConfigKey(key: string): string {
|
|
140
180
|
return key.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);
|
|
141
181
|
}
|
|
@@ -144,6 +184,7 @@ function formatGenericPythonExecutorTimeoutDetail(
|
|
|
144
184
|
config: GenericPythonExecutorConfig,
|
|
145
185
|
configuredTimeoutMs: number,
|
|
146
186
|
executionBudgetMs: number | null,
|
|
187
|
+
finalizationBudgetMs: number | null,
|
|
147
188
|
timeoutMs: number,
|
|
148
189
|
): string {
|
|
149
190
|
const configPath = `workerpals.${toSnakeConfigKey(config.timeoutConfigKey)}`;
|
|
@@ -154,7 +195,11 @@ function formatGenericPythonExecutorTimeoutDetail(
|
|
|
154
195
|
return `${configPath}=${configuredTimeoutMs}ms; planning executionBudgetMs=${executionBudgetMs}ms ignored by backend opt-out`;
|
|
155
196
|
}
|
|
156
197
|
if (timeoutMs < configuredTimeoutMs) {
|
|
157
|
-
|
|
198
|
+
const finalizationDetail =
|
|
199
|
+
finalizationBudgetMs && finalizationBudgetMs > 0
|
|
200
|
+
? ` + finalizationBudgetMs=${finalizationBudgetMs}ms`
|
|
201
|
+
: "";
|
|
202
|
+
return `${configPath}=${configuredTimeoutMs}ms capped by planning executionBudgetMs=${executionBudgetMs}ms${finalizationDetail}`;
|
|
158
203
|
}
|
|
159
204
|
return `${configPath}=${configuredTimeoutMs}ms within planning executionBudgetMs=${executionBudgetMs}ms`;
|
|
160
205
|
}
|
|
@@ -190,15 +235,21 @@ export function createGenericPythonExecutor(
|
|
|
190
235
|
typeof budgets?.executionBudgetMs === "number" && Number.isFinite(budgets.executionBudgetMs)
|
|
191
236
|
? Math.max(10_000, Math.floor(budgets.executionBudgetMs))
|
|
192
237
|
: null;
|
|
238
|
+
const finalizationBudgetMs =
|
|
239
|
+
typeof budgets?.finalizationBudgetMs === "number" && Number.isFinite(budgets.finalizationBudgetMs)
|
|
240
|
+
? Math.max(0, Math.floor(budgets.finalizationBudgetMs))
|
|
241
|
+
: null;
|
|
193
242
|
const timeoutMs = resolveGenericPythonExecutorTimeoutMs({
|
|
194
243
|
configuredTimeoutMs,
|
|
195
244
|
executionBudgetMs,
|
|
245
|
+
finalizationBudgetMs,
|
|
196
246
|
capTimeoutToExecutionBudget: config.capTimeoutToExecutionBudget,
|
|
197
247
|
});
|
|
198
248
|
const timeoutDetail = formatGenericPythonExecutorTimeoutDetail(
|
|
199
249
|
config,
|
|
200
250
|
configuredTimeoutMs,
|
|
201
251
|
executionBudgetMs,
|
|
252
|
+
finalizationBudgetMs,
|
|
202
253
|
timeoutMs,
|
|
203
254
|
);
|
|
204
255
|
const payloadBase64 = Buffer.from(
|
|
@@ -210,6 +261,11 @@ export function createGenericPythonExecutor(
|
|
|
210
261
|
"utf-8",
|
|
211
262
|
).toString("base64");
|
|
212
263
|
const args = [pythonBin, scriptPath, payloadBase64];
|
|
264
|
+
const childTimeoutEnv = resolveGenericPythonExecutorChildTimeoutEnv({
|
|
265
|
+
backendName,
|
|
266
|
+
hostTimeoutMs: timeoutMs,
|
|
267
|
+
executionBudgetMs,
|
|
268
|
+
});
|
|
213
269
|
|
|
214
270
|
onLog?.(
|
|
215
271
|
"stdout",
|
|
@@ -229,6 +285,7 @@ export function createGenericPythonExecutor(
|
|
|
229
285
|
stderr: "pipe",
|
|
230
286
|
env: {
|
|
231
287
|
...buildWorkerSandboxWritableEnv(repo),
|
|
288
|
+
...childTimeoutEnv,
|
|
232
289
|
PUSHPALS_REPO_PATH: repo,
|
|
233
290
|
PUSHPALS_ASSIGNED_REPO_ROOT: repo,
|
|
234
291
|
PYTHONIOENCODING: "utf-8",
|
|
@@ -1145,6 +1145,21 @@ export function isLongRunningBrowserValidationCommand(command: string): boolean
|
|
|
1145
1145
|
);
|
|
1146
1146
|
}
|
|
1147
1147
|
|
|
1148
|
+
export function isParallelSafeFastValidationCommand(repo: string, command: string): boolean {
|
|
1149
|
+
if (isLongRunningBrowserValidationCommand(command)) return false;
|
|
1150
|
+
if (shouldEnsurePlaywrightBrowserRuntime(repo, command)) return false;
|
|
1151
|
+
const tokens = tokenizeValidationCommandArgv(command);
|
|
1152
|
+
if (!tokens || tokens.length === 0) return false;
|
|
1153
|
+
const lower = tokens.map((token) => token.toLowerCase());
|
|
1154
|
+
if (lower[0] !== "bun") return false;
|
|
1155
|
+
if (lower[1] === "test") return true;
|
|
1156
|
+
if (lower[1] === "x" && lower[2] === "tsc") return true;
|
|
1157
|
+
if (lower[1] === "run" && ["lint", "typecheck", "test", "test:unit"].includes(lower[2] ?? "")) {
|
|
1158
|
+
return true;
|
|
1159
|
+
}
|
|
1160
|
+
return false;
|
|
1161
|
+
}
|
|
1162
|
+
|
|
1148
1163
|
function readPackageJson(repo: string): {
|
|
1149
1164
|
scripts?: Record<string, unknown>;
|
|
1150
1165
|
dependencies?: Record<string, unknown>;
|
|
@@ -3116,7 +3131,71 @@ async function runDeterministicQualityGate(
|
|
|
3116
3131
|
);
|
|
3117
3132
|
}
|
|
3118
3133
|
const playwrightBrowserRuntimeReadyTargets = new Set<string>();
|
|
3119
|
-
for (
|
|
3134
|
+
for (let commandIndex = 0; commandIndex < commandsToRun.length; ) {
|
|
3135
|
+
const parallelBatch: string[] = [];
|
|
3136
|
+
while (
|
|
3137
|
+
commandIndex + parallelBatch.length < commandsToRun.length &&
|
|
3138
|
+
parallelBatch.length < 3
|
|
3139
|
+
) {
|
|
3140
|
+
const candidate = commandsToRun[commandIndex + parallelBatch.length];
|
|
3141
|
+
if (!isParallelSafeFastValidationCommand(repo, candidate)) break;
|
|
3142
|
+
parallelBatch.push(candidate);
|
|
3143
|
+
}
|
|
3144
|
+
if (parallelBatch.length > 1) {
|
|
3145
|
+
onLog?.(
|
|
3146
|
+
"stdout",
|
|
3147
|
+
`[ValidationGate] Running fast validation batch in parallel: ${parallelBatch.join(" | ")}`,
|
|
3148
|
+
);
|
|
3149
|
+
const batchRuns = await Promise.all(
|
|
3150
|
+
parallelBatch.map(async (command) => {
|
|
3151
|
+
const commandMissingTools = requirementsForValidationCommand(
|
|
3152
|
+
toolchainPlan,
|
|
3153
|
+
command,
|
|
3154
|
+
).filter((requirement) =>
|
|
3155
|
+
missingToolRequirements.some((missing) => missing.tool === requirement.tool),
|
|
3156
|
+
);
|
|
3157
|
+
if (commandMissingTools.length > 0) {
|
|
3158
|
+
const stderr = `Validation skipped before execution because required tool(s) are missing: ${formatMissingToolRequirements(
|
|
3159
|
+
commandMissingTools,
|
|
3160
|
+
)}.`;
|
|
3161
|
+
return {
|
|
3162
|
+
run: {
|
|
3163
|
+
step: command,
|
|
3164
|
+
command,
|
|
3165
|
+
ok: false,
|
|
3166
|
+
exitCode: 127,
|
|
3167
|
+
stdout: "",
|
|
3168
|
+
stderr,
|
|
3169
|
+
elapsedMs: 1,
|
|
3170
|
+
} satisfies ValidationExecutionResult,
|
|
3171
|
+
stream: "stderr" as const,
|
|
3172
|
+
summary: `[ValidationGate] Validation skipped (missing toolchain): ${command}`,
|
|
3173
|
+
};
|
|
3174
|
+
}
|
|
3175
|
+
const run = await runValidationCommand(
|
|
3176
|
+
repo,
|
|
3177
|
+
command,
|
|
3178
|
+
resolveValidationCommandTimeoutMs(command, qualityValidationStepTimeoutMs),
|
|
3179
|
+
outputPolicy,
|
|
3180
|
+
);
|
|
3181
|
+
const digest = run.ok ? "" : extractValidationFailureDigest(run);
|
|
3182
|
+
return {
|
|
3183
|
+
run,
|
|
3184
|
+
stream: (run.ok ? "stdout" : "stderr") as "stdout" | "stderr",
|
|
3185
|
+
summary: `[ValidationGate] ${run.ok ? "Passed" : "Failed"} (${run.elapsedMs}ms, exit ${run.exitCode}): ${command}${digest ? ` - ${digest}` : ""}`,
|
|
3186
|
+
};
|
|
3187
|
+
}),
|
|
3188
|
+
);
|
|
3189
|
+
for (const { run, stream, summary } of batchRuns) {
|
|
3190
|
+
validationRuns.push(run);
|
|
3191
|
+
onLog?.(stream, summary);
|
|
3192
|
+
}
|
|
3193
|
+
commandIndex += parallelBatch.length;
|
|
3194
|
+
continue;
|
|
3195
|
+
}
|
|
3196
|
+
|
|
3197
|
+
const command = commandsToRun[commandIndex];
|
|
3198
|
+
commandIndex += 1;
|
|
3120
3199
|
const commandMissingTools = requirementsForValidationCommand(toolchainPlan, command).filter(
|
|
3121
3200
|
(requirement) =>
|
|
3122
3201
|
missingToolRequirements.some((missing) => missing.tool === requirement.tool),
|
|
@@ -6665,6 +6744,7 @@ export async function executeJob(
|
|
|
6665
6744
|
const previousValidationFailureDigests = new Map<string, string>();
|
|
6666
6745
|
const failureJobFamily = buildTaskFailureJobFamily(normalizedParams);
|
|
6667
6746
|
while (revisionAttempt <= qualityRevisionLoopMax) {
|
|
6747
|
+
const attemptStartedAt = Date.now();
|
|
6668
6748
|
const attemptParams: Record<string, unknown> = { ...normalizedParams };
|
|
6669
6749
|
if (revisionHint) {
|
|
6670
6750
|
attemptParams.qualityRevisionHint = revisionHint;
|
|
@@ -6683,6 +6763,7 @@ export async function executeJob(
|
|
|
6683
6763
|
}
|
|
6684
6764
|
let result: Awaited<ReturnType<typeof runExecutor>> | null = null;
|
|
6685
6765
|
let mergeConflictPass = 0;
|
|
6766
|
+
let executorElapsedMs = 0;
|
|
6686
6767
|
while (true) {
|
|
6687
6768
|
const currentResult = await runExecutor(
|
|
6688
6769
|
kind,
|
|
@@ -6751,6 +6832,7 @@ export async function executeJob(
|
|
|
6751
6832
|
exitCode: 4,
|
|
6752
6833
|
};
|
|
6753
6834
|
}
|
|
6835
|
+
executorElapsedMs = Date.now() - attemptStartedAt;
|
|
6754
6836
|
|
|
6755
6837
|
const preQualityStatus = await git(repo, ["status", "--porcelain"]);
|
|
6756
6838
|
const preQualityChangedPaths = preQualityStatus.ok
|
|
@@ -6799,6 +6881,7 @@ export async function executeJob(
|
|
|
6799
6881
|
};
|
|
6800
6882
|
}
|
|
6801
6883
|
|
|
6884
|
+
const qualityStartedAt = Date.now();
|
|
6802
6885
|
const quality = await runDeterministicQualityGate(
|
|
6803
6886
|
repo,
|
|
6804
6887
|
attemptParams,
|
|
@@ -6810,6 +6893,15 @@ export async function executeJob(
|
|
|
6810
6893
|
revisionAttempt,
|
|
6811
6894
|
},
|
|
6812
6895
|
);
|
|
6896
|
+
const qualityElapsedMs = Date.now() - qualityStartedAt;
|
|
6897
|
+
const validationCommandElapsedMs = quality.validationRuns.reduce(
|
|
6898
|
+
(total, run) => total + Math.max(0, Number(run.elapsedMs) || 0),
|
|
6899
|
+
0,
|
|
6900
|
+
);
|
|
6901
|
+
onLog?.(
|
|
6902
|
+
"stdout",
|
|
6903
|
+
`[JobRunner] Performance summary: attempt=${revisionAttempt}, executor=${executorElapsedMs}ms, quality=${qualityElapsedMs}ms, validation_commands=${quality.validationRuns.length}, validation_command_time=${validationCommandElapsedMs}ms, changed_files=${quality.changedPaths.length}`,
|
|
6904
|
+
);
|
|
6813
6905
|
let browserRepairPacket = buildBrowserValidationRepairPacket(
|
|
6814
6906
|
quality.validationRuns,
|
|
6815
6907
|
previousValidationFailureDigests,
|