@pushpalsdev/cli 1.1.22 → 1.1.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/runtime/sandbox/.pushpals-remotebuddy-fallback.js +10 -0
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +181 -10
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +171 -0
- package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +4 -3
package/package.json
CHANGED
|
@@ -8285,6 +8285,7 @@ function buildWorkerSpawnCommand(options) {
|
|
|
8285
8285
|
}
|
|
8286
8286
|
|
|
8287
8287
|
// apps/remotebuddy/src/remotebuddy_main.ts
|
|
8288
|
+
var AUTONOMY_TASK_DEDUPE_COOLDOWN_MS = 6 * 60 * 60 * 1000;
|
|
8288
8289
|
var CONFIG = loadPushPalsConfig();
|
|
8289
8290
|
function parseArgs() {
|
|
8290
8291
|
const args = process.argv.slice(2);
|
|
@@ -8464,6 +8465,11 @@ function buildTaskExecuteDedupeKey(sessionId, params) {
|
|
|
8464
8465
|
}
|
|
8465
8466
|
return `task.execute:${normalizedOrigin}:${normalizedSessionId}:${uniqueTargets.join("|")}`.toLowerCase();
|
|
8466
8467
|
}
|
|
8468
|
+
function resolveTaskExecuteDedupeCooldownMs(params, dedupeKey) {
|
|
8469
|
+
if (!dedupeKey)
|
|
8470
|
+
return 0;
|
|
8471
|
+
return params.origin === "autonomy" ? AUTONOMY_TASK_DEDUPE_COOLDOWN_MS : 0;
|
|
8472
|
+
}
|
|
8467
8473
|
function parseAutonomyRequestMetadata(value) {
|
|
8468
8474
|
let root = asObject2(value);
|
|
8469
8475
|
if (!root && typeof value === "string") {
|
|
@@ -9509,6 +9515,9 @@ Please reply with the missing details and I will enqueue a follow-up request.` :
|
|
|
9509
9515
|
const dedupeKey = buildTaskExecuteDedupeKey(sessionId, params);
|
|
9510
9516
|
if (dedupeKey)
|
|
9511
9517
|
payload.dedupeKey = dedupeKey;
|
|
9518
|
+
const dedupeCooldownMs = resolveTaskExecuteDedupeCooldownMs(params, dedupeKey);
|
|
9519
|
+
if (dedupeCooldownMs > 0)
|
|
9520
|
+
payload.dedupeCooldownMs = dedupeCooldownMs;
|
|
9512
9521
|
if (targetWorkerId)
|
|
9513
9522
|
payload.targetWorkerId = targetWorkerId;
|
|
9514
9523
|
const res = await this.fetchImpl(`${this.server}/jobs/enqueue`, {
|
|
@@ -10603,6 +10612,7 @@ if (import.meta.main) {
|
|
|
10603
10612
|
});
|
|
10604
10613
|
}
|
|
10605
10614
|
export {
|
|
10615
|
+
resolveTaskExecuteDedupeCooldownMs,
|
|
10606
10616
|
extractRequiredValidationStepsFromVisionMarkdown,
|
|
10607
10617
|
buildTaskExecuteDedupeKey,
|
|
10608
10618
|
RemoteBuddyOrchestrator
|
|
@@ -8,6 +8,7 @@ that the TypeScript host parses.
|
|
|
8
8
|
from __future__ import annotations
|
|
9
9
|
|
|
10
10
|
import json
|
|
11
|
+
import hashlib
|
|
11
12
|
import os
|
|
12
13
|
import re
|
|
13
14
|
from shutil import rmtree, which
|
|
@@ -108,11 +109,13 @@ _MAX_CREDIBLE_WRAPPER_LOOP_TOP_LEVELS = 4
|
|
|
108
109
|
_MAX_NO_EDIT_RECOVERY_ATTEMPTS = 1
|
|
109
110
|
_MAX_ROLLOUT_RECOVERY_ATTEMPTS = 1
|
|
110
111
|
_DEFAULT_NO_EDIT_WATCHDOG_S = 480
|
|
111
|
-
_SMALL_TASK_NO_EDIT_WATCHDOG_S =
|
|
112
|
+
_SMALL_TASK_NO_EDIT_WATCHDOG_S = 240
|
|
113
|
+
_NARROW_TEST_TASK_NO_EDIT_WATCHDOG_S = 180
|
|
112
114
|
_WEB_REVIEW_NO_EDIT_WATCHDOG_S = 240
|
|
113
115
|
_DEFAULT_NO_EDIT_RECHECK_S = 120
|
|
114
116
|
_DEFAULT_ROLLOUT_WATCHDOG_S = 300
|
|
115
117
|
_SMALL_TASK_ROLLOUT_WATCHDOG_S = 240
|
|
118
|
+
_NARROW_TEST_TASK_ROLLOUT_WATCHDOG_S = 150
|
|
116
119
|
_WEB_REVIEW_ROLLOUT_WATCHDOG_S = 180
|
|
117
120
|
|
|
118
121
|
|
|
@@ -590,6 +593,21 @@ def _looks_like_small_task_prompt(prompt: str) -> bool:
|
|
|
590
593
|
"browser smoke",
|
|
591
594
|
"web delivery",
|
|
592
595
|
"navigation trustworthy",
|
|
596
|
+
"test-only",
|
|
597
|
+
"test only",
|
|
598
|
+
"contract test",
|
|
599
|
+
"contract coverage",
|
|
600
|
+
"ranking contract",
|
|
601
|
+
"focused scenario",
|
|
602
|
+
"targeted test",
|
|
603
|
+
"one-file",
|
|
604
|
+
"one file",
|
|
605
|
+
"single-file",
|
|
606
|
+
"single file",
|
|
607
|
+
"max_files_to_edit: 1",
|
|
608
|
+
"max_files_to_edit=1",
|
|
609
|
+
"maxfilestoedit: 1",
|
|
610
|
+
"maxfilestoedit=1",
|
|
593
611
|
)
|
|
594
612
|
heavy_markers = (
|
|
595
613
|
"merge-conflict",
|
|
@@ -606,6 +624,34 @@ def _looks_like_small_task_prompt(prompt: str) -> bool:
|
|
|
606
624
|
)
|
|
607
625
|
|
|
608
626
|
|
|
627
|
+
def _looks_like_narrow_test_task_prompt(prompt: str) -> bool:
|
|
628
|
+
text = str(prompt or "").lower()
|
|
629
|
+
if not text:
|
|
630
|
+
return False
|
|
631
|
+
narrow_markers = (
|
|
632
|
+
"contract test",
|
|
633
|
+
"contract coverage",
|
|
634
|
+
"ranking contract",
|
|
635
|
+
"test-only",
|
|
636
|
+
"test only",
|
|
637
|
+
"targeted test",
|
|
638
|
+
"focused scenario",
|
|
639
|
+
)
|
|
640
|
+
if not any(marker in text for marker in narrow_markers):
|
|
641
|
+
return False
|
|
642
|
+
broad_markers = (
|
|
643
|
+
"full render harness",
|
|
644
|
+
"full-surface",
|
|
645
|
+
"full surface",
|
|
646
|
+
"e2e",
|
|
647
|
+
"browser validation",
|
|
648
|
+
"browser smoke",
|
|
649
|
+
"migration",
|
|
650
|
+
"broad refactor",
|
|
651
|
+
)
|
|
652
|
+
return not any(marker in text for marker in broad_markers)
|
|
653
|
+
|
|
654
|
+
|
|
609
655
|
def _resolve_task_reasoning_effort(
|
|
610
656
|
configured_effort: str,
|
|
611
657
|
prompt: str,
|
|
@@ -651,7 +697,9 @@ def _resolve_no_edit_watchdog_seconds(
|
|
|
651
697
|
return None
|
|
652
698
|
|
|
653
699
|
prompt_text = str(prompt or "").lower()
|
|
654
|
-
if
|
|
700
|
+
if _looks_like_narrow_test_task_prompt(prompt):
|
|
701
|
+
default_s = _NARROW_TEST_TASK_NO_EDIT_WATCHDOG_S
|
|
702
|
+
elif "repo-native web review" in prompt_text or "web review path" in prompt_text:
|
|
655
703
|
default_s = _WEB_REVIEW_NO_EDIT_WATCHDOG_S
|
|
656
704
|
else:
|
|
657
705
|
default_s = (
|
|
@@ -702,7 +750,9 @@ def _resolve_rollout_watchdog_seconds(
|
|
|
702
750
|
else:
|
|
703
751
|
return max(1, min(parsed, max(1, communicate_timeout_s - 1)))
|
|
704
752
|
|
|
705
|
-
if
|
|
753
|
+
if _looks_like_narrow_test_task_prompt(prompt):
|
|
754
|
+
default_s = _NARROW_TEST_TASK_ROLLOUT_WATCHDOG_S
|
|
755
|
+
elif _looks_like_web_review_prompt(prompt):
|
|
706
756
|
default_s = _WEB_REVIEW_ROLLOUT_WATCHDOG_S
|
|
707
757
|
elif _looks_like_small_task_prompt(prompt):
|
|
708
758
|
default_s = _SMALL_TASK_ROLLOUT_WATCHDOG_S
|
|
@@ -713,9 +763,39 @@ def _resolve_rollout_watchdog_seconds(
|
|
|
713
763
|
return max(90, min(default_s, max(90, communicate_timeout_s - 60)))
|
|
714
764
|
|
|
715
765
|
|
|
716
|
-
def
|
|
717
|
-
|
|
718
|
-
|
|
766
|
+
def _baseline_snapshot_paths(baseline_snapshot: Any) -> List[str]:
|
|
767
|
+
if isinstance(baseline_snapshot, dict):
|
|
768
|
+
return [str(path) for path in baseline_snapshot.keys()]
|
|
769
|
+
if isinstance(baseline_snapshot, list):
|
|
770
|
+
return [str(path) for path in baseline_snapshot]
|
|
771
|
+
return []
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
def _paths_changed_after_baseline(
|
|
775
|
+
repo: str,
|
|
776
|
+
changed_paths: List[str],
|
|
777
|
+
baseline_snapshot: Any,
|
|
778
|
+
) -> List[str]:
|
|
779
|
+
baseline_paths = set(_baseline_snapshot_paths(baseline_snapshot))
|
|
780
|
+
if not baseline_paths:
|
|
781
|
+
return list(changed_paths)
|
|
782
|
+
|
|
783
|
+
delta: List[str] = []
|
|
784
|
+
baseline_fingerprints = baseline_snapshot if isinstance(baseline_snapshot, dict) else {}
|
|
785
|
+
for path in changed_paths:
|
|
786
|
+
if path not in baseline_paths:
|
|
787
|
+
delta.append(path)
|
|
788
|
+
continue
|
|
789
|
+
if baseline_fingerprints:
|
|
790
|
+
current_fingerprint = _changed_path_fingerprint(repo, path)
|
|
791
|
+
if current_fingerprint != str(baseline_fingerprints.get(path) or ""):
|
|
792
|
+
delta.append(path)
|
|
793
|
+
return delta
|
|
794
|
+
|
|
795
|
+
|
|
796
|
+
def _describe_non_publishable_paths(changed_paths: List[str], baseline_snapshot: Any) -> str:
|
|
797
|
+
baseline_paths = set(_baseline_snapshot_paths(baseline_snapshot))
|
|
798
|
+
inspected = [p for p in changed_paths if p not in baseline_paths] if baseline_paths else changed_paths
|
|
719
799
|
non_publishable = [p for p in inspected if not _is_publishable_changed_path(p)]
|
|
720
800
|
if not non_publishable:
|
|
721
801
|
return ""
|
|
@@ -735,6 +815,8 @@ def _describe_publishable_paths(paths: List[str]) -> str:
|
|
|
735
815
|
def _build_no_edit_recovery_guidance(trace_excerpt: str, artifact_only_paths: str = "") -> str:
|
|
736
816
|
lines = [
|
|
737
817
|
"No-edit watchdog recovery: the previous Codex attempt spent too much of the execution budget without producing publishable file changes.",
|
|
818
|
+
"This recovery attempt has a patch-first contract: make one publishable edit before any further broad discovery. If you need one narrow read of the hinted file to place the edit, do that once, then patch immediately.",
|
|
819
|
+
"Do not repeat the same read/search sequence from the previous attempt. Re-reading the target without editing is a failed recovery.",
|
|
738
820
|
"Start from the already inspected context. Do not re-read broad repo topology, route wrappers, or missing test infrastructure unless that is the blocker.",
|
|
739
821
|
"Runtime/dependency artifacts such as node_modules, outputs, .worktrees, .codex, dist, build, and coverage do not count as progress.",
|
|
740
822
|
"Within the first response/action, edit the smallest behavior-owning file that satisfies the task. If the hinted file is a thin wrapper, patch the owner you already identified.",
|
|
@@ -1686,10 +1768,99 @@ def _is_publishable_changed_path(path: str) -> bool:
|
|
|
1686
1768
|
return not re.search(r"(^|/)(outputs|node_modules|\.worktrees|\.codex|dist|build|coverage)(/|$)", normalized)
|
|
1687
1769
|
|
|
1688
1770
|
|
|
1689
|
-
def
|
|
1771
|
+
def _filesystem_fingerprint(repo: str, raw_path: str) -> str:
|
|
1772
|
+
root = Path(repo)
|
|
1773
|
+
target = (root / raw_path).resolve()
|
|
1774
|
+
try:
|
|
1775
|
+
root_resolved = root.resolve()
|
|
1776
|
+
common = os.path.commonpath([str(root_resolved), str(target)])
|
|
1777
|
+
if common != str(root_resolved):
|
|
1778
|
+
return "outside-repo"
|
|
1779
|
+
except Exception:
|
|
1780
|
+
return "unresolved"
|
|
1781
|
+
digest = hashlib.sha256()
|
|
1782
|
+
if not target.exists():
|
|
1783
|
+
return "missing"
|
|
1784
|
+
if target.is_file():
|
|
1785
|
+
digest.update(b"file\0")
|
|
1786
|
+
try:
|
|
1787
|
+
digest.update(str(target.stat().st_size).encode("utf-8"))
|
|
1788
|
+
with target.open("rb") as handle:
|
|
1789
|
+
while True:
|
|
1790
|
+
chunk = handle.read(1024 * 1024)
|
|
1791
|
+
if not chunk:
|
|
1792
|
+
break
|
|
1793
|
+
digest.update(chunk)
|
|
1794
|
+
except Exception as exc:
|
|
1795
|
+
digest.update(f"read-error:{type(exc).__name__}:{exc}".encode("utf-8", errors="replace"))
|
|
1796
|
+
return digest.hexdigest()
|
|
1797
|
+
if target.is_dir():
|
|
1798
|
+
digest.update(b"dir\0")
|
|
1799
|
+
files_seen = 0
|
|
1800
|
+
try:
|
|
1801
|
+
for dirpath, dirnames, filenames in os.walk(target):
|
|
1802
|
+
dirnames.sort()
|
|
1803
|
+
filenames.sort()
|
|
1804
|
+
for filename in filenames:
|
|
1805
|
+
if files_seen >= 128:
|
|
1806
|
+
digest.update(b"\0truncated")
|
|
1807
|
+
return digest.hexdigest()
|
|
1808
|
+
child = Path(dirpath) / filename
|
|
1809
|
+
try:
|
|
1810
|
+
rel = child.relative_to(root_resolved).as_posix()
|
|
1811
|
+
except Exception:
|
|
1812
|
+
rel = child.name
|
|
1813
|
+
digest.update(rel.encode("utf-8", errors="replace"))
|
|
1814
|
+
digest.update(b"\0")
|
|
1815
|
+
digest.update(str(child.stat().st_size).encode("utf-8"))
|
|
1816
|
+
digest.update(b"\0")
|
|
1817
|
+
try:
|
|
1818
|
+
with child.open("rb") as handle:
|
|
1819
|
+
digest.update(handle.read(64 * 1024))
|
|
1820
|
+
except Exception as exc:
|
|
1821
|
+
digest.update(f"read-error:{type(exc).__name__}:{exc}".encode("utf-8", errors="replace"))
|
|
1822
|
+
files_seen += 1
|
|
1823
|
+
except Exception as exc:
|
|
1824
|
+
digest.update(f"walk-error:{type(exc).__name__}:{exc}".encode("utf-8", errors="replace"))
|
|
1825
|
+
return digest.hexdigest()
|
|
1826
|
+
return "special"
|
|
1827
|
+
|
|
1828
|
+
|
|
1829
|
+
def _changed_path_fingerprint(repo: str, path: str) -> str:
|
|
1830
|
+
normalized = str(path or "").strip()
|
|
1831
|
+
if not normalized:
|
|
1832
|
+
return ""
|
|
1833
|
+
digest = hashlib.sha256()
|
|
1834
|
+
digest.update(normalized.replace("\\", "/").encode("utf-8", errors="replace"))
|
|
1835
|
+
digest.update(b"\0fs\0")
|
|
1836
|
+
digest.update(_filesystem_fingerprint(repo, normalized).encode("utf-8", errors="replace"))
|
|
1837
|
+
return digest.hexdigest()
|
|
1838
|
+
|
|
1839
|
+
|
|
1840
|
+
def _capture_git_change_snapshot(repo: str) -> Dict[str, str]:
|
|
1841
|
+
return {path: _changed_path_fingerprint(repo, path) for path in summarize_git_changes(repo)}
|
|
1842
|
+
|
|
1843
|
+
|
|
1844
|
+
def _normalize_baseline_snapshot(repo: str, baseline_changes: Any) -> Dict[str, str]:
|
|
1845
|
+
if isinstance(baseline_changes, dict):
|
|
1846
|
+
return {
|
|
1847
|
+
str(path): str(fingerprint)
|
|
1848
|
+
for path, fingerprint in baseline_changes.items()
|
|
1849
|
+
if str(path or "").strip()
|
|
1850
|
+
}
|
|
1851
|
+
if isinstance(baseline_changes, list):
|
|
1852
|
+
return {
|
|
1853
|
+
str(path): _changed_path_fingerprint(repo, str(path))
|
|
1854
|
+
for path in baseline_changes
|
|
1855
|
+
if str(path or "").strip()
|
|
1856
|
+
}
|
|
1857
|
+
return _capture_git_change_snapshot(repo)
|
|
1858
|
+
|
|
1859
|
+
|
|
1860
|
+
def _codex_changed_paths(repo: str, baseline_snapshot: Any) -> Tuple[List[str], List[str], List[str]]:
|
|
1690
1861
|
changed_paths = summarize_git_changes(repo)
|
|
1691
|
-
delta =
|
|
1692
|
-
effective = [p for p in
|
|
1862
|
+
delta = _paths_changed_after_baseline(repo, changed_paths, baseline_snapshot)
|
|
1863
|
+
effective = [p for p in delta if _is_publishable_changed_path(p)]
|
|
1693
1864
|
return changed_paths, delta, effective
|
|
1694
1865
|
|
|
1695
1866
|
|
|
@@ -1851,7 +2022,7 @@ def _run_codex_task(
|
|
|
1851
2022
|
prompt,
|
|
1852
2023
|
model,
|
|
1853
2024
|
)
|
|
1854
|
-
baseline_snapshot =
|
|
2025
|
+
baseline_snapshot = _normalize_baseline_snapshot(repo, baseline_changes)
|
|
1855
2026
|
|
|
1856
2027
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-") as tmp_dir:
|
|
1857
2028
|
last_message_path = Path(tmp_dir) / "codex-last-message.txt"
|
|
@@ -36,6 +36,7 @@ from openai_codex_executor import (
|
|
|
36
36
|
_build_rollout_recovery_guidance,
|
|
37
37
|
_collect_disallowed_shell_wrapper_rejections,
|
|
38
38
|
_codex_changed_paths,
|
|
39
|
+
_capture_git_change_snapshot,
|
|
39
40
|
_describe_non_publishable_paths,
|
|
40
41
|
_detect_offtrack_rollout,
|
|
41
42
|
_detect_codex_workaround_signal,
|
|
@@ -944,6 +945,75 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
944
945
|
self.assertIn("too broad/noisy", str(result.get("stderr") or ""))
|
|
945
946
|
self.assertIn("area0", str(result.get("stderr") or ""))
|
|
946
947
|
|
|
948
|
+
def test_run_codex_task_timeout_ignores_broad_dirty_baseline(self) -> None:
|
|
949
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-timeout-dirty-baseline-") as temp_dir:
|
|
950
|
+
repo = Path(temp_dir) / "repo"
|
|
951
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
952
|
+
(repo / "README.md").write_text("# timeout dirty baseline repo\n", encoding="utf-8")
|
|
953
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
954
|
+
subprocess.run(
|
|
955
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
956
|
+
cwd=repo,
|
|
957
|
+
check=True,
|
|
958
|
+
capture_output=True,
|
|
959
|
+
text=True,
|
|
960
|
+
)
|
|
961
|
+
subprocess.run(
|
|
962
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
963
|
+
cwd=repo,
|
|
964
|
+
check=True,
|
|
965
|
+
capture_output=True,
|
|
966
|
+
text=True,
|
|
967
|
+
)
|
|
968
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
969
|
+
subprocess.run(
|
|
970
|
+
["git", "commit", "-m", "chore: seed timeout dirty baseline repo"],
|
|
971
|
+
cwd=repo,
|
|
972
|
+
check=True,
|
|
973
|
+
capture_output=True,
|
|
974
|
+
text=True,
|
|
975
|
+
)
|
|
976
|
+
for index in range(5):
|
|
977
|
+
root = repo / f"area{index}"
|
|
978
|
+
root.mkdir(exist_ok=True)
|
|
979
|
+
(root / "changed.txt").write_text("pre-existing dirty change\n", encoding="utf-8")
|
|
980
|
+
|
|
981
|
+
stub_path = Path(temp_dir) / "fake_codex_timeout_dirty_baseline.py"
|
|
982
|
+
stub_path.write_text(
|
|
983
|
+
"\n".join(
|
|
984
|
+
[
|
|
985
|
+
"import sys",
|
|
986
|
+
"import time",
|
|
987
|
+
"",
|
|
988
|
+
"sys.stdin.read()",
|
|
989
|
+
"print('item.completed | Still thinking without changing baseline files.', flush=True)",
|
|
990
|
+
"time.sleep(5)",
|
|
991
|
+
]
|
|
992
|
+
),
|
|
993
|
+
encoding="utf-8",
|
|
994
|
+
)
|
|
995
|
+
|
|
996
|
+
env_overrides = {
|
|
997
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
998
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
999
|
+
"OPENAI_API_KEY": "pushpals-timeout-dirty-baseline-test-key",
|
|
1000
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "1",
|
|
1001
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "0",
|
|
1002
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
1003
|
+
}
|
|
1004
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
1005
|
+
result = _run_codex_task(
|
|
1006
|
+
str(repo),
|
|
1007
|
+
"Make a compact scoped patch, then continue thinking too long.",
|
|
1008
|
+
[],
|
|
1009
|
+
)
|
|
1010
|
+
|
|
1011
|
+
self.assertFalse(result.get("ok"), result)
|
|
1012
|
+
self.assertEqual(result.get("exitCode"), 124)
|
|
1013
|
+
self.assertIn("execution timed out", str(result.get("summary") or ""))
|
|
1014
|
+
self.assertNotIn("broad/noisy", str(result.get("summary") or ""))
|
|
1015
|
+
self.assertNotIn("too broad/noisy", str(result.get("stderr") or ""))
|
|
1016
|
+
|
|
947
1017
|
def test_run_codex_task_retries_once_when_no_edit_watchdog_fires(self) -> None:
|
|
948
1018
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-watchdog-") as temp_dir:
|
|
949
1019
|
repo = Path(temp_dir) / "repo"
|
|
@@ -1215,6 +1285,86 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1215
1285
|
self.assertGreaterEqual(len(delta), 2)
|
|
1216
1286
|
self.assertEqual(effective, [])
|
|
1217
1287
|
|
|
1288
|
+
def test_codex_changed_paths_ignores_publishable_paths_dirty_at_baseline(self) -> None:
|
|
1289
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-dirty-baseline-") as temp_dir:
|
|
1290
|
+
repo = Path(temp_dir) / "repo"
|
|
1291
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
1292
|
+
(repo / "README.md").write_text("# dirty baseline repo\n", encoding="utf-8")
|
|
1293
|
+
(repo / "src").mkdir()
|
|
1294
|
+
(repo / "src" / "existing.ts").write_text("export const value = 1;\n", encoding="utf-8")
|
|
1295
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1296
|
+
subprocess.run(
|
|
1297
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
1298
|
+
cwd=repo,
|
|
1299
|
+
check=True,
|
|
1300
|
+
capture_output=True,
|
|
1301
|
+
text=True,
|
|
1302
|
+
)
|
|
1303
|
+
subprocess.run(
|
|
1304
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
1305
|
+
cwd=repo,
|
|
1306
|
+
check=True,
|
|
1307
|
+
capture_output=True,
|
|
1308
|
+
text=True,
|
|
1309
|
+
)
|
|
1310
|
+
subprocess.run(["git", "add", "."], cwd=repo, check=True, capture_output=True, text=True)
|
|
1311
|
+
subprocess.run(
|
|
1312
|
+
["git", "commit", "-m", "chore: seed dirty baseline repo"],
|
|
1313
|
+
cwd=repo,
|
|
1314
|
+
check=True,
|
|
1315
|
+
capture_output=True,
|
|
1316
|
+
text=True,
|
|
1317
|
+
)
|
|
1318
|
+
(repo / "README.md").write_text("# dirty baseline repo\n\npre-existing edit\n", encoding="utf-8")
|
|
1319
|
+
(repo / "src" / "existing.ts").write_text("export const value = 2;\n", encoding="utf-8")
|
|
1320
|
+
baseline = _capture_git_change_snapshot(str(repo))
|
|
1321
|
+
|
|
1322
|
+
changed_paths, delta, effective = _codex_changed_paths(str(repo), baseline)
|
|
1323
|
+
|
|
1324
|
+
self.assertIn("README.md", changed_paths)
|
|
1325
|
+
self.assertEqual(delta, [])
|
|
1326
|
+
self.assertEqual(effective, [])
|
|
1327
|
+
|
|
1328
|
+
def test_codex_changed_paths_counts_worker_edits_to_dirty_baseline_paths(self) -> None:
|
|
1329
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-dirty-baseline-mutated-") as temp_dir:
|
|
1330
|
+
repo = Path(temp_dir) / "repo"
|
|
1331
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
1332
|
+
(repo / "README.md").write_text("# dirty baseline mutation repo\n", encoding="utf-8")
|
|
1333
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1334
|
+
subprocess.run(
|
|
1335
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
1336
|
+
cwd=repo,
|
|
1337
|
+
check=True,
|
|
1338
|
+
capture_output=True,
|
|
1339
|
+
text=True,
|
|
1340
|
+
)
|
|
1341
|
+
subprocess.run(
|
|
1342
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
1343
|
+
cwd=repo,
|
|
1344
|
+
check=True,
|
|
1345
|
+
capture_output=True,
|
|
1346
|
+
text=True,
|
|
1347
|
+
)
|
|
1348
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1349
|
+
subprocess.run(
|
|
1350
|
+
["git", "commit", "-m", "chore: seed dirty baseline mutation repo"],
|
|
1351
|
+
cwd=repo,
|
|
1352
|
+
check=True,
|
|
1353
|
+
capture_output=True,
|
|
1354
|
+
text=True,
|
|
1355
|
+
)
|
|
1356
|
+
(repo / "README.md").write_text("# dirty baseline mutation repo\n\npre-existing edit\n", encoding="utf-8")
|
|
1357
|
+
baseline = _capture_git_change_snapshot(str(repo))
|
|
1358
|
+
(repo / "README.md").write_text(
|
|
1359
|
+
"# dirty baseline mutation repo\n\npre-existing edit\nworker edit\n",
|
|
1360
|
+
encoding="utf-8",
|
|
1361
|
+
)
|
|
1362
|
+
|
|
1363
|
+
_, delta, effective = _codex_changed_paths(str(repo), baseline)
|
|
1364
|
+
|
|
1365
|
+
self.assertEqual(delta, ["README.md"])
|
|
1366
|
+
self.assertEqual(effective, ["README.md"])
|
|
1367
|
+
|
|
1218
1368
|
def test_non_publishable_path_summary_names_artifact_only_dirty_paths(self) -> None:
|
|
1219
1369
|
changed_paths = [
|
|
1220
1370
|
"node_modules/react/index.js",
|
|
@@ -1236,6 +1386,16 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1236
1386
|
|
|
1237
1387
|
self.assertEqual(watchdog_s, 240)
|
|
1238
1388
|
|
|
1389
|
+
def test_narrow_contract_tests_use_fast_no_edit_watchdog(self) -> None:
|
|
1390
|
+
prompt = (
|
|
1391
|
+
"Update app/__tests__/opportunity-graph.contract.test.ts to tighten the "
|
|
1392
|
+
"ranking contract test. Keep this test-only and preserve existing behavior."
|
|
1393
|
+
)
|
|
1394
|
+
with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
|
|
1395
|
+
watchdog_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
|
|
1396
|
+
|
|
1397
|
+
self.assertEqual(watchdog_s, 180)
|
|
1398
|
+
|
|
1239
1399
|
def test_no_edit_recovery_guidance_warns_against_artifact_only_progress(self) -> None:
|
|
1240
1400
|
guidance = _build_no_edit_recovery_guidance(
|
|
1241
1401
|
"item.completed | still inspecting",
|
|
@@ -1243,6 +1403,8 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1243
1403
|
)
|
|
1244
1404
|
|
|
1245
1405
|
self.assertIn("node_modules", guidance)
|
|
1406
|
+
self.assertIn("patch-first contract", guidance)
|
|
1407
|
+
self.assertIn("Re-reading the target without editing is a failed recovery", guidance)
|
|
1246
1408
|
self.assertIn("do not invent PushPals/autonomy-specific files", guidance)
|
|
1247
1409
|
self.assertIn("Previous Codex event trace excerpt", guidance)
|
|
1248
1410
|
|
|
@@ -1261,6 +1423,15 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1261
1423
|
self.assertEqual(no_edit_s, 240)
|
|
1262
1424
|
self.assertEqual(rollout_s, 180)
|
|
1263
1425
|
|
|
1426
|
+
def test_narrow_contract_rollout_watchdog_is_earlier_than_no_edit_watchdog(self) -> None:
|
|
1427
|
+
prompt = "Tighten the focused contract test for one ranking behavior."
|
|
1428
|
+
with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S": ""}, clear=False):
|
|
1429
|
+
no_edit_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
|
|
1430
|
+
rollout_s = _resolve_rollout_watchdog_seconds(prompt, 1200, no_edit_s)
|
|
1431
|
+
|
|
1432
|
+
self.assertEqual(no_edit_s, 180)
|
|
1433
|
+
self.assertEqual(rollout_s, 120)
|
|
1434
|
+
|
|
1264
1435
|
def test_offtrack_rollout_detects_missing_path_and_harness_drift(self) -> None:
|
|
1265
1436
|
trace = {
|
|
1266
1437
|
"summaries": [
|
|
@@ -1785,9 +1785,10 @@ async function workerLoop(
|
|
|
1785
1785
|
terminalStage: currentJobPhase ?? (result.ok ? "completed" : "worker"),
|
|
1786
1786
|
executorBackend: resolveExecutor(CONFIG),
|
|
1787
1787
|
summary: result.summary,
|
|
1788
|
-
watchdogFired:
|
|
1789
|
-
|
|
1790
|
-
|
|
1788
|
+
watchdogFired:
|
|
1789
|
+
/watchdog|rollout coach|timed out|timeout|signal 15|terminated|exit 143|exit 137/i.test(
|
|
1790
|
+
`${result.summary}\n${result.stderr ?? ""}\n${result.stdout ?? ""}`,
|
|
1791
|
+
),
|
|
1791
1792
|
metadata: {
|
|
1792
1793
|
workerId: opts.workerId,
|
|
1793
1794
|
docker: Boolean(dockerExecutor),
|