@pushpalsdev/cli 1.1.21 → 1.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pushpals-cli.js +25 -1
- package/package.json +1 -1
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +288 -31
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +505 -0
- package/runtime/sandbox/apps/workerpals/src/common/types.ts +69 -0
- package/runtime/sandbox/apps/workerpals/src/docker_executor.ts +75 -16
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +334 -19
- package/runtime/sandbox/apps/workerpals/src/job_runner.ts +3 -0
- package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +131 -3
package/dist/pushpals-cli.js
CHANGED
|
@@ -4299,6 +4299,25 @@ async function probeRemoteBuddySessionConsumer(serverUrl, sessionId) {
|
|
|
4299
4299
|
};
|
|
4300
4300
|
}
|
|
4301
4301
|
}
|
|
4302
|
+
async function waitForRemoteBuddySessionConsumer(opts) {
|
|
4303
|
+
const timeoutMs = Math.max(0, opts.timeoutMs);
|
|
4304
|
+
const pollMs = Math.max(50, opts.pollMs ?? DEFAULT_RUNTIME_BOOT_POLL_MS);
|
|
4305
|
+
const nowFn = opts.nowFn ?? Date.now;
|
|
4306
|
+
const deadline = nowFn() + timeoutMs;
|
|
4307
|
+
let lastHealth = {
|
|
4308
|
+
ok: false,
|
|
4309
|
+
detail: `No connected RemoteBuddy session consumer found for session ${opts.sessionId}`
|
|
4310
|
+
};
|
|
4311
|
+
while (true) {
|
|
4312
|
+
lastHealth = await (opts.probeFn ?? probeRemoteBuddySessionConsumer)(opts.serverUrl, opts.sessionId);
|
|
4313
|
+
if (lastHealth.ok)
|
|
4314
|
+
return lastHealth;
|
|
4315
|
+
const remainingMs = deadline - nowFn();
|
|
4316
|
+
if (remainingMs <= 0)
|
|
4317
|
+
return lastHealth;
|
|
4318
|
+
await (opts.sleepFn ?? Bun.sleep)(Math.min(pollMs, remainingMs));
|
|
4319
|
+
}
|
|
4320
|
+
}
|
|
4302
4321
|
async function probeSourceControlManager(port) {
|
|
4303
4322
|
if (!Number.isFinite(port) || port <= 0)
|
|
4304
4323
|
return false;
|
|
@@ -5831,7 +5850,11 @@ async function main() {
|
|
|
5831
5850
|
process.exit(1);
|
|
5832
5851
|
}
|
|
5833
5852
|
}
|
|
5834
|
-
remoteBuddyConsumerHealth = await
|
|
5853
|
+
remoteBuddyConsumerHealth = autoStartedServiceManager ? await waitForRemoteBuddySessionConsumer({
|
|
5854
|
+
serverUrl,
|
|
5855
|
+
sessionId: activeSessionId,
|
|
5856
|
+
timeoutMs: DEFAULT_REMOTEBUDDY_CONSUMER_STARTUP_GRACE_MS
|
|
5857
|
+
}) : await probeRemoteBuddySessionConsumer(serverUrl, activeSessionId);
|
|
5835
5858
|
if (!serverHealthy) {
|
|
5836
5859
|
console.error(`[pushpals] Server is unavailable at ${serverUrl}.`);
|
|
5837
5860
|
process.exit(1);
|
|
@@ -6086,6 +6109,7 @@ if (import.meta.main) {
|
|
|
6086
6109
|
}
|
|
6087
6110
|
export {
|
|
6088
6111
|
waitForWorkerpalCapacity,
|
|
6112
|
+
waitForRemoteBuddySessionConsumer,
|
|
6089
6113
|
startEmbeddedMonitoringHub,
|
|
6090
6114
|
shutdownEmbeddedServiceManagerGracefully,
|
|
6091
6115
|
shouldUseRemoteBuddySilentStartupFallback,
|
package/package.json
CHANGED
|
@@ -8,6 +8,7 @@ that the TypeScript host parses.
|
|
|
8
8
|
from __future__ import annotations
|
|
9
9
|
|
|
10
10
|
import json
|
|
11
|
+
import hashlib
|
|
11
12
|
import os
|
|
12
13
|
import re
|
|
13
14
|
from shutil import rmtree, which
|
|
@@ -103,11 +104,14 @@ _VALID_REASONING_EFFORTS = {"low", "medium", "high", "xhigh"}
|
|
|
103
104
|
_MAX_WRAPPER_RECOVERY_ATTEMPTS = 2
|
|
104
105
|
_MAX_WRAPPER_BOOTSTRAP_OUTPUT_CHARS = 1_200
|
|
105
106
|
_MAX_WRAPPER_BOOTSTRAP_TOTAL_CHARS = 5_000
|
|
107
|
+
_MAX_CREDIBLE_WRAPPER_LOOP_CHANGED_PATHS = 8
|
|
108
|
+
_MAX_CREDIBLE_WRAPPER_LOOP_TOP_LEVELS = 4
|
|
106
109
|
_MAX_NO_EDIT_RECOVERY_ATTEMPTS = 1
|
|
107
110
|
_MAX_ROLLOUT_RECOVERY_ATTEMPTS = 1
|
|
108
111
|
_DEFAULT_NO_EDIT_WATCHDOG_S = 480
|
|
109
112
|
_SMALL_TASK_NO_EDIT_WATCHDOG_S = 360
|
|
110
113
|
_WEB_REVIEW_NO_EDIT_WATCHDOG_S = 240
|
|
114
|
+
_DEFAULT_NO_EDIT_RECHECK_S = 120
|
|
111
115
|
_DEFAULT_ROLLOUT_WATCHDOG_S = 300
|
|
112
116
|
_SMALL_TASK_ROLLOUT_WATCHDOG_S = 240
|
|
113
117
|
_WEB_REVIEW_ROLLOUT_WATCHDOG_S = 180
|
|
@@ -659,6 +663,21 @@ def _resolve_no_edit_watchdog_seconds(
|
|
|
659
663
|
return max(120, min(default_s, max(120, communicate_timeout_s - 60)))
|
|
660
664
|
|
|
661
665
|
|
|
666
|
+
def _resolve_no_edit_recheck_seconds(communicate_timeout_s: Optional[int]) -> int:
|
|
667
|
+
raw = os.environ.get("WORKERPALS_OPENAI_CODEX_NO_EDIT_RECHECK_S", "").strip()
|
|
668
|
+
if raw:
|
|
669
|
+
parsed = _to_positive_int(raw)
|
|
670
|
+
if parsed is None:
|
|
671
|
+
log.info(
|
|
672
|
+
f"Invalid WORKERPALS_OPENAI_CODEX_NO_EDIT_RECHECK_S={raw!r}; using default no-edit recheck interval."
|
|
673
|
+
)
|
|
674
|
+
else:
|
|
675
|
+
upper = max(1, (communicate_timeout_s or parsed + 1) - 1)
|
|
676
|
+
return max(1, min(parsed, upper))
|
|
677
|
+
upper = max(1, (communicate_timeout_s or _DEFAULT_NO_EDIT_RECHECK_S + 1) - 1)
|
|
678
|
+
return max(1, min(_DEFAULT_NO_EDIT_RECHECK_S, upper))
|
|
679
|
+
|
|
680
|
+
|
|
662
681
|
def _looks_like_web_review_prompt(prompt: str) -> bool:
|
|
663
682
|
text = str(prompt or "").lower()
|
|
664
683
|
return "repo-native web review" in text or "web review path" in text
|
|
@@ -695,9 +714,39 @@ def _resolve_rollout_watchdog_seconds(
|
|
|
695
714
|
return max(90, min(default_s, max(90, communicate_timeout_s - 60)))
|
|
696
715
|
|
|
697
716
|
|
|
698
|
-
def
|
|
699
|
-
|
|
700
|
-
|
|
717
|
+
def _baseline_snapshot_paths(baseline_snapshot: Any) -> List[str]:
|
|
718
|
+
if isinstance(baseline_snapshot, dict):
|
|
719
|
+
return [str(path) for path in baseline_snapshot.keys()]
|
|
720
|
+
if isinstance(baseline_snapshot, list):
|
|
721
|
+
return [str(path) for path in baseline_snapshot]
|
|
722
|
+
return []
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
def _paths_changed_after_baseline(
|
|
726
|
+
repo: str,
|
|
727
|
+
changed_paths: List[str],
|
|
728
|
+
baseline_snapshot: Any,
|
|
729
|
+
) -> List[str]:
|
|
730
|
+
baseline_paths = set(_baseline_snapshot_paths(baseline_snapshot))
|
|
731
|
+
if not baseline_paths:
|
|
732
|
+
return list(changed_paths)
|
|
733
|
+
|
|
734
|
+
delta: List[str] = []
|
|
735
|
+
baseline_fingerprints = baseline_snapshot if isinstance(baseline_snapshot, dict) else {}
|
|
736
|
+
for path in changed_paths:
|
|
737
|
+
if path not in baseline_paths:
|
|
738
|
+
delta.append(path)
|
|
739
|
+
continue
|
|
740
|
+
if baseline_fingerprints:
|
|
741
|
+
current_fingerprint = _changed_path_fingerprint(repo, path)
|
|
742
|
+
if current_fingerprint != str(baseline_fingerprints.get(path) or ""):
|
|
743
|
+
delta.append(path)
|
|
744
|
+
return delta
|
|
745
|
+
|
|
746
|
+
|
|
747
|
+
def _describe_non_publishable_paths(changed_paths: List[str], baseline_snapshot: Any) -> str:
|
|
748
|
+
baseline_paths = set(_baseline_snapshot_paths(baseline_snapshot))
|
|
749
|
+
inspected = [p for p in changed_paths if p not in baseline_paths] if baseline_paths else changed_paths
|
|
701
750
|
non_publishable = [p for p in inspected if not _is_publishable_changed_path(p)]
|
|
702
751
|
if not non_publishable:
|
|
703
752
|
return ""
|
|
@@ -707,6 +756,13 @@ def _describe_non_publishable_paths(changed_paths: List[str], baseline_snapshot:
|
|
|
707
756
|
return listed
|
|
708
757
|
|
|
709
758
|
|
|
759
|
+
def _describe_publishable_paths(paths: List[str]) -> str:
|
|
760
|
+
listed = ", ".join(paths[:8])
|
|
761
|
+
if len(paths) > 8:
|
|
762
|
+
listed = f"{listed}, ..."
|
|
763
|
+
return listed
|
|
764
|
+
|
|
765
|
+
|
|
710
766
|
def _build_no_edit_recovery_guidance(trace_excerpt: str, artifact_only_paths: str = "") -> str:
|
|
711
767
|
lines = [
|
|
712
768
|
"No-edit watchdog recovery: the previous Codex attempt spent too much of the execution budget without producing publishable file changes.",
|
|
@@ -1661,13 +1717,127 @@ def _is_publishable_changed_path(path: str) -> bool:
|
|
|
1661
1717
|
return not re.search(r"(^|/)(outputs|node_modules|\.worktrees|\.codex|dist|build|coverage)(/|$)", normalized)
|
|
1662
1718
|
|
|
1663
1719
|
|
|
1664
|
-
def
|
|
1720
|
+
def _filesystem_fingerprint(repo: str, raw_path: str) -> str:
|
|
1721
|
+
root = Path(repo)
|
|
1722
|
+
target = (root / raw_path).resolve()
|
|
1723
|
+
try:
|
|
1724
|
+
root_resolved = root.resolve()
|
|
1725
|
+
common = os.path.commonpath([str(root_resolved), str(target)])
|
|
1726
|
+
if common != str(root_resolved):
|
|
1727
|
+
return "outside-repo"
|
|
1728
|
+
except Exception:
|
|
1729
|
+
return "unresolved"
|
|
1730
|
+
digest = hashlib.sha256()
|
|
1731
|
+
if not target.exists():
|
|
1732
|
+
return "missing"
|
|
1733
|
+
if target.is_file():
|
|
1734
|
+
digest.update(b"file\0")
|
|
1735
|
+
try:
|
|
1736
|
+
digest.update(str(target.stat().st_size).encode("utf-8"))
|
|
1737
|
+
with target.open("rb") as handle:
|
|
1738
|
+
while True:
|
|
1739
|
+
chunk = handle.read(1024 * 1024)
|
|
1740
|
+
if not chunk:
|
|
1741
|
+
break
|
|
1742
|
+
digest.update(chunk)
|
|
1743
|
+
except Exception as exc:
|
|
1744
|
+
digest.update(f"read-error:{type(exc).__name__}:{exc}".encode("utf-8", errors="replace"))
|
|
1745
|
+
return digest.hexdigest()
|
|
1746
|
+
if target.is_dir():
|
|
1747
|
+
digest.update(b"dir\0")
|
|
1748
|
+
files_seen = 0
|
|
1749
|
+
try:
|
|
1750
|
+
for dirpath, dirnames, filenames in os.walk(target):
|
|
1751
|
+
dirnames.sort()
|
|
1752
|
+
filenames.sort()
|
|
1753
|
+
for filename in filenames:
|
|
1754
|
+
if files_seen >= 128:
|
|
1755
|
+
digest.update(b"\0truncated")
|
|
1756
|
+
return digest.hexdigest()
|
|
1757
|
+
child = Path(dirpath) / filename
|
|
1758
|
+
try:
|
|
1759
|
+
rel = child.relative_to(root_resolved).as_posix()
|
|
1760
|
+
except Exception:
|
|
1761
|
+
rel = child.name
|
|
1762
|
+
digest.update(rel.encode("utf-8", errors="replace"))
|
|
1763
|
+
digest.update(b"\0")
|
|
1764
|
+
digest.update(str(child.stat().st_size).encode("utf-8"))
|
|
1765
|
+
digest.update(b"\0")
|
|
1766
|
+
try:
|
|
1767
|
+
with child.open("rb") as handle:
|
|
1768
|
+
digest.update(handle.read(64 * 1024))
|
|
1769
|
+
except Exception as exc:
|
|
1770
|
+
digest.update(f"read-error:{type(exc).__name__}:{exc}".encode("utf-8", errors="replace"))
|
|
1771
|
+
files_seen += 1
|
|
1772
|
+
except Exception as exc:
|
|
1773
|
+
digest.update(f"walk-error:{type(exc).__name__}:{exc}".encode("utf-8", errors="replace"))
|
|
1774
|
+
return digest.hexdigest()
|
|
1775
|
+
return "special"
|
|
1776
|
+
|
|
1777
|
+
|
|
1778
|
+
def _changed_path_fingerprint(repo: str, path: str) -> str:
|
|
1779
|
+
normalized = str(path or "").strip()
|
|
1780
|
+
if not normalized:
|
|
1781
|
+
return ""
|
|
1782
|
+
digest = hashlib.sha256()
|
|
1783
|
+
digest.update(normalized.replace("\\", "/").encode("utf-8", errors="replace"))
|
|
1784
|
+
digest.update(b"\0fs\0")
|
|
1785
|
+
digest.update(_filesystem_fingerprint(repo, normalized).encode("utf-8", errors="replace"))
|
|
1786
|
+
return digest.hexdigest()
|
|
1787
|
+
|
|
1788
|
+
|
|
1789
|
+
def _capture_git_change_snapshot(repo: str) -> Dict[str, str]:
|
|
1790
|
+
return {path: _changed_path_fingerprint(repo, path) for path in summarize_git_changes(repo)}
|
|
1791
|
+
|
|
1792
|
+
|
|
1793
|
+
def _normalize_baseline_snapshot(repo: str, baseline_changes: Any) -> Dict[str, str]:
|
|
1794
|
+
if isinstance(baseline_changes, dict):
|
|
1795
|
+
return {
|
|
1796
|
+
str(path): str(fingerprint)
|
|
1797
|
+
for path, fingerprint in baseline_changes.items()
|
|
1798
|
+
if str(path or "").strip()
|
|
1799
|
+
}
|
|
1800
|
+
if isinstance(baseline_changes, list):
|
|
1801
|
+
return {
|
|
1802
|
+
str(path): _changed_path_fingerprint(repo, str(path))
|
|
1803
|
+
for path in baseline_changes
|
|
1804
|
+
if str(path or "").strip()
|
|
1805
|
+
}
|
|
1806
|
+
return _capture_git_change_snapshot(repo)
|
|
1807
|
+
|
|
1808
|
+
|
|
1809
|
+
def _codex_changed_paths(repo: str, baseline_snapshot: Any) -> Tuple[List[str], List[str], List[str]]:
|
|
1665
1810
|
changed_paths = summarize_git_changes(repo)
|
|
1666
|
-
delta =
|
|
1667
|
-
effective = [p for p in
|
|
1811
|
+
delta = _paths_changed_after_baseline(repo, changed_paths, baseline_snapshot)
|
|
1812
|
+
effective = [p for p in delta if _is_publishable_changed_path(p)]
|
|
1668
1813
|
return changed_paths, delta, effective
|
|
1669
1814
|
|
|
1670
1815
|
|
|
1816
|
+
def _changed_path_top_level(path: str) -> str:
|
|
1817
|
+
raw = str(path or "").replace("\\", "/").strip()
|
|
1818
|
+
is_top_level_directory = raw.endswith("/")
|
|
1819
|
+
normalized = raw.strip("/")
|
|
1820
|
+
if not normalized:
|
|
1821
|
+
return ""
|
|
1822
|
+
parts = [part for part in normalized.split("/") if part]
|
|
1823
|
+
if len(parts) > 1 or is_top_level_directory:
|
|
1824
|
+
return parts[0]
|
|
1825
|
+
return "<repo-root>"
|
|
1826
|
+
|
|
1827
|
+
|
|
1828
|
+
def _has_credible_shell_wrapper_progress(effective_paths: List[str]) -> bool:
|
|
1829
|
+
if not effective_paths:
|
|
1830
|
+
return False
|
|
1831
|
+
if len(effective_paths) > _MAX_CREDIBLE_WRAPPER_LOOP_CHANGED_PATHS:
|
|
1832
|
+
return False
|
|
1833
|
+
top_levels = {
|
|
1834
|
+
top_level
|
|
1835
|
+
for top_level in (_changed_path_top_level(path) for path in effective_paths)
|
|
1836
|
+
if top_level
|
|
1837
|
+
}
|
|
1838
|
+
return len(top_levels) <= _MAX_CREDIBLE_WRAPPER_LOOP_TOP_LEVELS
|
|
1839
|
+
|
|
1840
|
+
|
|
1671
1841
|
def _build_success_stdout(
|
|
1672
1842
|
*,
|
|
1673
1843
|
effective_paths: List[str],
|
|
@@ -1801,7 +1971,7 @@ def _run_codex_task(
|
|
|
1801
1971
|
prompt,
|
|
1802
1972
|
model,
|
|
1803
1973
|
)
|
|
1804
|
-
baseline_snapshot =
|
|
1974
|
+
baseline_snapshot = _normalize_baseline_snapshot(repo, baseline_changes)
|
|
1805
1975
|
|
|
1806
1976
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-") as tmp_dir:
|
|
1807
1977
|
last_message_path = Path(tmp_dir) / "codex-last-message.txt"
|
|
@@ -2028,12 +2198,14 @@ def _run_codex_task(
|
|
|
2028
2198
|
rollout_watchdog_fired = False
|
|
2029
2199
|
rollout_watchdog_reason = ""
|
|
2030
2200
|
rollout_artifact_only_paths = ""
|
|
2201
|
+
rollout_watchdog_retryable = True
|
|
2031
2202
|
command_policy_rejection_loop = False
|
|
2032
2203
|
no_edit_watchdog_s = (
|
|
2033
2204
|
_resolve_no_edit_watchdog_seconds(prompt, communicate_timeout_s)
|
|
2034
2205
|
if no_edit_recovery_attempt <= _MAX_NO_EDIT_RECOVERY_ATTEMPTS
|
|
2035
2206
|
else None
|
|
2036
2207
|
)
|
|
2208
|
+
no_edit_recheck_s = _resolve_no_edit_recheck_seconds(communicate_timeout_s)
|
|
2037
2209
|
rollout_watchdog_s = (
|
|
2038
2210
|
_resolve_rollout_watchdog_seconds(
|
|
2039
2211
|
prompt,
|
|
@@ -2079,16 +2251,36 @@ def _run_codex_task(
|
|
|
2079
2251
|
)
|
|
2080
2252
|
_terminate_active_child()
|
|
2081
2253
|
break
|
|
2082
|
-
no_edit_deadline =
|
|
2254
|
+
no_edit_deadline = now + float(no_edit_recheck_s)
|
|
2255
|
+
log.info(
|
|
2256
|
+
"No-edit watchdog observed publishable-looking file changes "
|
|
2257
|
+
f"({_describe_publishable_paths(effective_paths)}); rechecking in "
|
|
2258
|
+
f"{int(no_edit_recheck_s)}s to ensure the worker keeps durable PR content."
|
|
2259
|
+
)
|
|
2083
2260
|
|
|
2084
2261
|
if rollout_deadline is not None and now >= rollout_deadline:
|
|
2085
2262
|
changed_paths, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
|
|
2086
|
-
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
|
|
2091
|
-
|
|
2263
|
+
with trace_lock:
|
|
2264
|
+
live_trace = dict(stdout_trace_state)
|
|
2265
|
+
summaries = stdout_trace_state.get("summaries")
|
|
2266
|
+
if isinstance(summaries, list):
|
|
2267
|
+
live_trace["summaries"] = list(summaries)
|
|
2268
|
+
if effective_paths:
|
|
2269
|
+
small_or_web_task = (
|
|
2270
|
+
_looks_like_small_task_prompt(instruction)
|
|
2271
|
+
or _looks_like_web_review_prompt(instruction)
|
|
2272
|
+
or _looks_like_small_task_prompt(prompt)
|
|
2273
|
+
or _looks_like_web_review_prompt(prompt)
|
|
2274
|
+
)
|
|
2275
|
+
if small_or_web_task and not _has_credible_shell_wrapper_progress(effective_paths):
|
|
2276
|
+
rollout_watchdog_reason = (
|
|
2277
|
+
"publishable-looking changed paths are broad/noisy for a small task: "
|
|
2278
|
+
f"{_describe_publishable_paths(effective_paths)}"
|
|
2279
|
+
)
|
|
2280
|
+
rollout_watchdog_retryable = False
|
|
2281
|
+
else:
|
|
2282
|
+
rollout_deadline = None
|
|
2283
|
+
else:
|
|
2092
2284
|
rollout_artifact_only_paths = _describe_non_publishable_paths(
|
|
2093
2285
|
changed_paths,
|
|
2094
2286
|
baseline_snapshot,
|
|
@@ -2097,18 +2289,23 @@ def _run_codex_task(
|
|
|
2097
2289
|
live_trace,
|
|
2098
2290
|
rollout_artifact_only_paths,
|
|
2099
2291
|
)
|
|
2100
|
-
|
|
2101
|
-
|
|
2102
|
-
|
|
2103
|
-
|
|
2104
|
-
|
|
2105
|
-
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2292
|
+
if rollout_watchdog_reason:
|
|
2293
|
+
rollout_watchdog_fired = True
|
|
2294
|
+
artifact_detail = (
|
|
2295
|
+
f" Artifact-only dirty paths: {rollout_artifact_only_paths}."
|
|
2296
|
+
if rollout_artifact_only_paths
|
|
2297
|
+
else ""
|
|
2298
|
+
)
|
|
2299
|
+
action = (
|
|
2300
|
+
"Retrying with course-correction guidance."
|
|
2301
|
+
if rollout_watchdog_retryable
|
|
2302
|
+
else "Failing fast instead of retrying on top of a broad/noisy diff."
|
|
2303
|
+
)
|
|
2304
|
+
log.info(
|
|
2305
|
+
f"Rollout coach fired after {int(rollout_watchdog_s or 0)}s: {rollout_watchdog_reason}.{artifact_detail} {action}"
|
|
2306
|
+
)
|
|
2307
|
+
_terminate_active_child()
|
|
2308
|
+
break
|
|
2112
2309
|
|
|
2113
2310
|
with trace_lock:
|
|
2114
2311
|
wrapper_rejections = to_int(wrapper_rejection_state.get("count"), 0)
|
|
@@ -2178,7 +2375,7 @@ def _run_codex_task(
|
|
|
2178
2375
|
rejected_shell_wrappers.append(text)
|
|
2179
2376
|
|
|
2180
2377
|
if rollout_watchdog_fired:
|
|
2181
|
-
if rollout_recovery_attempt < _MAX_ROLLOUT_RECOVERY_ATTEMPTS:
|
|
2378
|
+
if rollout_watchdog_retryable and rollout_recovery_attempt < _MAX_ROLLOUT_RECOVERY_ATTEMPTS:
|
|
2182
2379
|
retry_guidance = [
|
|
2183
2380
|
*supplemental_guidance,
|
|
2184
2381
|
_build_rollout_recovery_guidance(
|
|
@@ -2199,7 +2396,7 @@ def _run_codex_task(
|
|
|
2199
2396
|
baseline_changes=baseline_snapshot,
|
|
2200
2397
|
)
|
|
2201
2398
|
detail = (
|
|
2202
|
-
"Codex trajectory remained off-track
|
|
2399
|
+
"Codex trajectory remained off-track or too broad for safe recovery: "
|
|
2203
2400
|
f"{rollout_watchdog_reason or 'no publishable progress'}."
|
|
2204
2401
|
)
|
|
2205
2402
|
if trace_excerpt:
|
|
@@ -2253,8 +2450,9 @@ def _run_codex_task(
|
|
|
2253
2450
|
)
|
|
2254
2451
|
if trace_excerpt:
|
|
2255
2452
|
detail = f"{detail}\n{trace_excerpt}"
|
|
2256
|
-
|
|
2257
|
-
|
|
2453
|
+
changed_paths, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
|
|
2454
|
+
credible_partial_patch = _has_credible_shell_wrapper_progress(effective_paths)
|
|
2455
|
+
if effective_paths and credible_partial_patch:
|
|
2258
2456
|
last_message = _read_text_if_exists(last_message_path)
|
|
2259
2457
|
log_git_status(repo, log)
|
|
2260
2458
|
prefix = (
|
|
@@ -2281,7 +2479,27 @@ def _run_codex_task(
|
|
|
2281
2479
|
"exitCode": 0,
|
|
2282
2480
|
"usage": usage,
|
|
2283
2481
|
}
|
|
2284
|
-
|
|
2482
|
+
if effective_paths:
|
|
2483
|
+
listed = _describe_publishable_paths(effective_paths)
|
|
2484
|
+
log.warning(
|
|
2485
|
+
"Codex reached the execution timeout with a broad/noisy changed-path set "
|
|
2486
|
+
f"({len(effective_paths)} publishable-looking path(s)); refusing to spend "
|
|
2487
|
+
"additional gate budget on a likely incomplete patch."
|
|
2488
|
+
)
|
|
2489
|
+
detail = (
|
|
2490
|
+
f"{detail}\nPublishable-looking changed paths at timeout were too broad/noisy "
|
|
2491
|
+
f"to preserve as a partial patch ({len(effective_paths)} path(s): {listed}). "
|
|
2492
|
+
"The executor is failing fast so the scheduler can replan instead of running "
|
|
2493
|
+
"expensive validation on a likely incomplete update."
|
|
2494
|
+
)
|
|
2495
|
+
return {
|
|
2496
|
+
"ok": False,
|
|
2497
|
+
"summary": "openai_codex timed out with broad/noisy publishable-looking changes",
|
|
2498
|
+
"stdout": _truncate(stdout),
|
|
2499
|
+
"stderr": _truncate(f"{detail}\n{stderr}".strip()),
|
|
2500
|
+
"exitCode": 124,
|
|
2501
|
+
"usage": usage,
|
|
2502
|
+
}
|
|
2285
2503
|
artifact_only_paths = _describe_non_publishable_paths(changed_paths, baseline_snapshot)
|
|
2286
2504
|
if artifact_only_paths:
|
|
2287
2505
|
detail = (
|
|
@@ -2306,6 +2524,7 @@ def _run_codex_task(
|
|
|
2306
2524
|
|
|
2307
2525
|
if command_policy_rejection_loop:
|
|
2308
2526
|
_, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
|
|
2527
|
+
credible_progress = _has_credible_shell_wrapper_progress(effective_paths)
|
|
2309
2528
|
if effective_paths:
|
|
2310
2529
|
policy_signal = _detect_codex_workaround_signal(last_message)
|
|
2311
2530
|
if not policy_signal and not last_message.strip():
|
|
@@ -2329,6 +2548,7 @@ def _run_codex_task(
|
|
|
2329
2548
|
"usage": usage,
|
|
2330
2549
|
}
|
|
2331
2550
|
|
|
2551
|
+
if effective_paths and credible_progress:
|
|
2332
2552
|
command_lines = (
|
|
2333
2553
|
"\n".join(f"- {command}" for command in rejected_shell_wrappers[:6])
|
|
2334
2554
|
if rejected_shell_wrappers
|
|
@@ -2359,6 +2579,13 @@ def _run_codex_task(
|
|
|
2359
2579
|
"usage": usage,
|
|
2360
2580
|
}
|
|
2361
2581
|
|
|
2582
|
+
if effective_paths:
|
|
2583
|
+
log.warning(
|
|
2584
|
+
"Codex hit a shell-wrapper rejection loop with a broad/noisy changed-path set "
|
|
2585
|
+
f"({len(effective_paths)} publishable-looking path(s)); retrying before handing "
|
|
2586
|
+
"the patch to QualityGate."
|
|
2587
|
+
)
|
|
2588
|
+
|
|
2362
2589
|
if wrapper_recovery_attempt < _MAX_WRAPPER_RECOVERY_ATTEMPTS:
|
|
2363
2590
|
hard_recovery = wrapper_recovery_attempt >= 1
|
|
2364
2591
|
recovery_guidance = _build_wrapper_recovery_guidance(
|
|
@@ -2413,6 +2640,36 @@ def _run_codex_task(
|
|
|
2413
2640
|
).strip()
|
|
2414
2641
|
)
|
|
2415
2642
|
return retry_result
|
|
2643
|
+
if effective_paths:
|
|
2644
|
+
command_lines = (
|
|
2645
|
+
"\n".join(f"- {command}" for command in rejected_shell_wrappers[:6])
|
|
2646
|
+
if rejected_shell_wrappers
|
|
2647
|
+
else "- (no command details captured)"
|
|
2648
|
+
)
|
|
2649
|
+
log.warning(
|
|
2650
|
+
"Codex exhausted shell-wrapper recovery attempts with file changes still present; "
|
|
2651
|
+
"returning the patch to QualityGate for final assessment."
|
|
2652
|
+
)
|
|
2653
|
+
return {
|
|
2654
|
+
"ok": True,
|
|
2655
|
+
"summary": (
|
|
2656
|
+
"Executed task and modified "
|
|
2657
|
+
f"{len(effective_paths)} file(s) before shell-wrapper command rejections"
|
|
2658
|
+
),
|
|
2659
|
+
"stdout": _build_success_stdout(
|
|
2660
|
+
effective_paths=effective_paths,
|
|
2661
|
+
last_message=last_message,
|
|
2662
|
+
trace_excerpt=trace_excerpt,
|
|
2663
|
+
prefix=(
|
|
2664
|
+
"Codex produced file changes but exhausted command-router shell-wrapper "
|
|
2665
|
+
"recovery attempts. The patch is being handed to ValidationGate/CriticGate for "
|
|
2666
|
+
f"normal assessment.\nRejected commands:\n{command_lines}"
|
|
2667
|
+
),
|
|
2668
|
+
),
|
|
2669
|
+
"stderr": "",
|
|
2670
|
+
"exitCode": 0,
|
|
2671
|
+
"usage": usage,
|
|
2672
|
+
}
|
|
2416
2673
|
command_lines = (
|
|
2417
2674
|
"\n".join(f"- {command}" for command in rejected_shell_wrappers[:6])
|
|
2418
2675
|
if rejected_shell_wrappers
|