@pushpalsdev/cli 1.1.25 → 1.1.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/runtime/sandbox/.pushpals-remotebuddy-fallback.js +37 -5
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +125 -4
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +172 -2
- package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +1 -1
package/package.json
CHANGED
|
@@ -6583,6 +6583,8 @@ class RemoteBuddyAutonomousEngine {
|
|
|
6583
6583
|
lastOutcome = "none";
|
|
6584
6584
|
lastDetail = "not_started";
|
|
6585
6585
|
lastCompletedAtMs = 0;
|
|
6586
|
+
dispatchBackoffUntilMs = 0;
|
|
6587
|
+
dispatchBackoffReason = "";
|
|
6586
6588
|
pendingIdeationTimeoutRecovery = null;
|
|
6587
6589
|
constructor(opts) {
|
|
6588
6590
|
this.server = opts.server;
|
|
@@ -7133,10 +7135,32 @@ ${JSON.stringify(input.messages ?? [])}`),
|
|
|
7133
7135
|
}
|
|
7134
7136
|
})
|
|
7135
7137
|
});
|
|
7136
|
-
if (!res.ok)
|
|
7138
|
+
if (!res.ok) {
|
|
7139
|
+
let errorPayload = {};
|
|
7140
|
+
try {
|
|
7141
|
+
const parsed = await res.json();
|
|
7142
|
+
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
|
|
7143
|
+
errorPayload = parsed;
|
|
7144
|
+
}
|
|
7145
|
+
} catch {
|
|
7146
|
+
errorPayload = {};
|
|
7147
|
+
}
|
|
7148
|
+
const code = String(errorPayload.code ?? "").trim();
|
|
7149
|
+
if (res.status === 429 && (code === "autonomy_worker_failure_circuit_open" || code === "autonomy_similar_no_publishable_suppressed" || code === "autonomy_queue_backpressure" || code === "autonomy_open_pr_limit")) {
|
|
7150
|
+
const retryAfterMsRaw = Number(errorPayload.retryAfterMs ?? 0);
|
|
7151
|
+
const retryAfterMs = Number.isFinite(retryAfterMsRaw) ? Math.max(60000, Math.min(60 * 60 * 1000, Math.floor(retryAfterMsRaw))) : 30 * 60 * 1000;
|
|
7152
|
+
this.dispatchBackoffUntilMs = Date.now() + retryAfterMs;
|
|
7153
|
+
this.dispatchBackoffReason = compactStatusDetail(code || String(errorPayload.message ?? "autonomy_enqueue_rejected")) || "autonomy_enqueue_rejected";
|
|
7154
|
+
}
|
|
7137
7155
|
return null;
|
|
7156
|
+
}
|
|
7138
7157
|
const data = await res.json();
|
|
7139
|
-
|
|
7158
|
+
if (data.ok && data.requestId) {
|
|
7159
|
+
this.dispatchBackoffUntilMs = 0;
|
|
7160
|
+
this.dispatchBackoffReason = "";
|
|
7161
|
+
return data.requestId;
|
|
7162
|
+
}
|
|
7163
|
+
return null;
|
|
7140
7164
|
}
|
|
7141
7165
|
isSnapshotExpired(snapshot) {
|
|
7142
7166
|
const createdAt = Date.parse(snapshot.snapshot_created_at);
|
|
@@ -7323,6 +7347,12 @@ ${JSON.stringify(input.messages ?? [])}`),
|
|
|
7323
7347
|
let outcome = "skipped";
|
|
7324
7348
|
let outcomeDetail = "not_dispatched";
|
|
7325
7349
|
try {
|
|
7350
|
+
if (Date.now() < this.dispatchBackoffUntilMs) {
|
|
7351
|
+
this.setPhase("dispatch_backoff");
|
|
7352
|
+
const remainingMs = Math.max(0, this.dispatchBackoffUntilMs - Date.now());
|
|
7353
|
+
outcomeDetail = compactStatusDetail(`dispatch_backoff:${this.dispatchBackoffReason || "autonomy_enqueue_rejected"}:${remainingMs}ms`);
|
|
7354
|
+
return;
|
|
7355
|
+
}
|
|
7326
7356
|
this.setPhase("acquire_lock");
|
|
7327
7357
|
const lockResult = await this.acquireDispatchLock(runId);
|
|
7328
7358
|
lockAcquired = lockResult.ok;
|
|
@@ -8651,7 +8681,7 @@ function sanitizeRepoNativeTargetHints(params) {
|
|
|
8651
8681
|
const lower = step.replace(/\\/g, "/").toLowerCase();
|
|
8652
8682
|
return !staleLower.some((path) => lower.includes(path));
|
|
8653
8683
|
});
|
|
8654
|
-
params.plan.scope.write_globs = params.plan.scope.write_globs.filter((glob) => {
|
|
8684
|
+
params.plan.scope.write_globs = (params.plan.scope.write_globs ?? []).filter((glob) => {
|
|
8655
8685
|
const normalized = normalizeTargetPath(glob);
|
|
8656
8686
|
if (!normalized)
|
|
8657
8687
|
return false;
|
|
@@ -10367,8 +10397,10 @@ Please reply with the missing details and I will enqueue a follow-up request.` :
|
|
|
10367
10397
|
});
|
|
10368
10398
|
}
|
|
10369
10399
|
} else {
|
|
10370
|
-
|
|
10371
|
-
|
|
10400
|
+
if (!autonomyMetadata) {
|
|
10401
|
+
await this.assistantMessage(requestSessionId, "I could not queue this WorkerPal task. No task was started.", { turnId, correlationId: requestId, from: eventFrom });
|
|
10402
|
+
}
|
|
10403
|
+
this.rememberPersistentMemory("job_enqueue_failed", `enqueue_failed lane=${lane} intent=${plan.intent} origin=${autonomyMetadata ? "autonomy" : "user"}`, requestId, requestSessionId);
|
|
10372
10404
|
}
|
|
10373
10405
|
await this.fetchImpl(`${this.server}/requests/${requestId}/complete`, {
|
|
10374
10406
|
method: "POST",
|
|
@@ -106,18 +106,23 @@ _MAX_WRAPPER_BOOTSTRAP_OUTPUT_CHARS = 1_200
|
|
|
106
106
|
_MAX_WRAPPER_BOOTSTRAP_TOTAL_CHARS = 5_000
|
|
107
107
|
_MAX_CREDIBLE_WRAPPER_LOOP_CHANGED_PATHS = 8
|
|
108
108
|
_MAX_CREDIBLE_WRAPPER_LOOP_TOP_LEVELS = 4
|
|
109
|
+
_MAX_STARTUP_STALL_RECOVERY_ATTEMPTS = 1
|
|
109
110
|
_MAX_NO_EDIT_RECOVERY_ATTEMPTS = 1
|
|
110
111
|
_MAX_ROLLOUT_RECOVERY_ATTEMPTS = 1
|
|
111
112
|
_DEFAULT_NO_EDIT_WATCHDOG_S = 480
|
|
112
113
|
_SMALL_TASK_NO_EDIT_WATCHDOG_S = 240
|
|
113
114
|
_NARROW_TEST_TASK_NO_EDIT_WATCHDOG_S = 180
|
|
114
115
|
_WEB_REVIEW_NO_EDIT_WATCHDOG_S = 240
|
|
115
|
-
|
|
116
|
+
_BACKGROUND_NO_EDIT_WATCHDOG_S = 120
|
|
117
|
+
_NO_EDIT_RECOVERY_WATCHDOG_S = 90
|
|
116
118
|
_DEFAULT_NO_EDIT_RECHECK_S = 120
|
|
117
119
|
_DEFAULT_ROLLOUT_WATCHDOG_S = 300
|
|
118
120
|
_SMALL_TASK_ROLLOUT_WATCHDOG_S = 240
|
|
119
121
|
_NARROW_TEST_TASK_ROLLOUT_WATCHDOG_S = 150
|
|
120
122
|
_WEB_REVIEW_ROLLOUT_WATCHDOG_S = 180
|
|
123
|
+
_BACKGROUND_ROLLOUT_WATCHDOG_S = 90
|
|
124
|
+
_NO_PUBLISHABLE_FAILURE_COOLDOWN_MS = 10 * 60 * 1000
|
|
125
|
+
_CODEX_STARTUP_ONLY_EVENT_TYPES = {"thread.started", "turn.started"}
|
|
121
126
|
|
|
122
127
|
|
|
123
128
|
def _model_supports_xhigh_reasoning(model: str) -> bool:
|
|
@@ -682,6 +687,16 @@ def _resolve_progress_log_interval_seconds(config: OpenAICodexRuntimeConfig) ->
|
|
|
682
687
|
return max(30, min(120, interval))
|
|
683
688
|
|
|
684
689
|
|
|
690
|
+
def _looks_like_background_autonomy_prompt(prompt: str) -> bool:
|
|
691
|
+
text = str(prompt or "").lower()
|
|
692
|
+
return (
|
|
693
|
+
"priority=background" in text
|
|
694
|
+
or "queuepriority=background" in text
|
|
695
|
+
or "origin=autonomy" in text
|
|
696
|
+
or "autonomy background" in text
|
|
697
|
+
)
|
|
698
|
+
|
|
699
|
+
|
|
685
700
|
def _resolve_no_edit_watchdog_seconds(
|
|
686
701
|
prompt: str,
|
|
687
702
|
communicate_timeout_s: Optional[int],
|
|
@@ -706,7 +721,10 @@ def _resolve_no_edit_watchdog_seconds(
|
|
|
706
721
|
return None
|
|
707
722
|
|
|
708
723
|
prompt_text = str(prompt or "").lower()
|
|
709
|
-
|
|
724
|
+
is_background = _looks_like_background_autonomy_prompt(prompt)
|
|
725
|
+
if is_background:
|
|
726
|
+
default_s = _BACKGROUND_NO_EDIT_WATCHDOG_S
|
|
727
|
+
elif _looks_like_narrow_test_task_prompt(prompt):
|
|
710
728
|
default_s = _NARROW_TEST_TASK_NO_EDIT_WATCHDOG_S
|
|
711
729
|
elif "repo-native web review" in prompt_text or "web review path" in prompt_text:
|
|
712
730
|
default_s = _WEB_REVIEW_NO_EDIT_WATCHDOG_S
|
|
@@ -718,7 +736,8 @@ def _resolve_no_edit_watchdog_seconds(
|
|
|
718
736
|
)
|
|
719
737
|
if recovery_attempt > 0:
|
|
720
738
|
default_s = min(default_s, _NO_EDIT_RECOVERY_WATCHDOG_S)
|
|
721
|
-
|
|
739
|
+
floor_s = 90 if is_background or recovery_attempt > 0 else 120
|
|
740
|
+
return max(floor_s, min(default_s, max(floor_s, communicate_timeout_s - 60)))
|
|
722
741
|
|
|
723
742
|
|
|
724
743
|
def _resolve_no_edit_recheck_seconds(communicate_timeout_s: Optional[int]) -> int:
|
|
@@ -761,7 +780,9 @@ def _resolve_rollout_watchdog_seconds(
|
|
|
761
780
|
else:
|
|
762
781
|
return max(1, min(parsed, max(1, communicate_timeout_s - 1)))
|
|
763
782
|
|
|
764
|
-
if
|
|
783
|
+
if _looks_like_background_autonomy_prompt(prompt):
|
|
784
|
+
default_s = _BACKGROUND_ROLLOUT_WATCHDOG_S
|
|
785
|
+
elif _looks_like_narrow_test_task_prompt(prompt):
|
|
765
786
|
default_s = _NARROW_TEST_TASK_ROLLOUT_WATCHDOG_S
|
|
766
787
|
elif _looks_like_web_review_prompt(prompt):
|
|
767
788
|
default_s = _WEB_REVIEW_ROLLOUT_WATCHDOG_S
|
|
@@ -843,6 +864,19 @@ def _build_no_edit_recovery_guidance(trace_excerpt: str, artifact_only_paths: st
|
|
|
843
864
|
return "\n".join(lines)
|
|
844
865
|
|
|
845
866
|
|
|
867
|
+
def _build_startup_stall_recovery_guidance(trace_excerpt: str) -> str:
|
|
868
|
+
lines = [
|
|
869
|
+
"Codex startup-stall recovery: the previous Codex subprocess started but emitted no assistant, tool, or reasoning progress before the watchdog.",
|
|
870
|
+
"Treat this as a fresh execution with a patch-first contract. After at most one narrow read of the hinted owner, make the smallest publishable edit.",
|
|
871
|
+
"Do not spend this recovery attempt re-reading broad repository topology or validating before an edit exists.",
|
|
872
|
+
"If the hinted path is absent, choose the nearest existing repo-native owner or test rather than creating unrelated scaffolding.",
|
|
873
|
+
]
|
|
874
|
+
if trace_excerpt:
|
|
875
|
+
lines.append("Previous Codex event trace excerpt:")
|
|
876
|
+
lines.append(trace_excerpt)
|
|
877
|
+
return "\n".join(lines)
|
|
878
|
+
|
|
879
|
+
|
|
846
880
|
def _trace_summaries_text(trace: Dict[str, Any]) -> str:
|
|
847
881
|
summaries = trace.get("summaries")
|
|
848
882
|
if not isinstance(summaries, list):
|
|
@@ -850,6 +884,36 @@ def _trace_summaries_text(trace: Dict[str, Any]) -> str:
|
|
|
850
884
|
return "\n".join(str(item or "") for item in summaries[-80:]).lower()
|
|
851
885
|
|
|
852
886
|
|
|
887
|
+
def _codex_trace_has_work_progress(trace: Dict[str, Any]) -> bool:
|
|
888
|
+
if to_int(trace.get("reasoning_events"), 0) > 0:
|
|
889
|
+
return True
|
|
890
|
+
|
|
891
|
+
event_counts = trace.get("event_type_counts")
|
|
892
|
+
if isinstance(event_counts, dict):
|
|
893
|
+
for key, value in event_counts.items():
|
|
894
|
+
event_type = str(key or "").strip()
|
|
895
|
+
if to_int(value, 0) > 0 and event_type not in _CODEX_STARTUP_ONLY_EVENT_TYPES:
|
|
896
|
+
return True
|
|
897
|
+
|
|
898
|
+
summaries = trace.get("summaries")
|
|
899
|
+
if isinstance(summaries, list):
|
|
900
|
+
for item in summaries:
|
|
901
|
+
summary = str(item or "").strip()
|
|
902
|
+
if not summary:
|
|
903
|
+
continue
|
|
904
|
+
event_type = summary.split("|", 1)[0].strip()
|
|
905
|
+
if event_type not in _CODEX_STARTUP_ONLY_EVENT_TYPES:
|
|
906
|
+
return True
|
|
907
|
+
|
|
908
|
+
return False
|
|
909
|
+
|
|
910
|
+
|
|
911
|
+
def _codex_trace_is_startup_stall(trace: Dict[str, Any]) -> bool:
|
|
912
|
+
if to_int(trace.get("total_tokens"), 0) > 0:
|
|
913
|
+
return False
|
|
914
|
+
return not _codex_trace_has_work_progress(trace)
|
|
915
|
+
|
|
916
|
+
|
|
853
917
|
def _detect_offtrack_rollout(trace: Dict[str, Any], artifact_only_paths: str = "") -> str:
|
|
854
918
|
text = _trace_summaries_text(trace)
|
|
855
919
|
if artifact_only_paths:
|
|
@@ -1962,6 +2026,7 @@ def _run_codex_task(
|
|
|
1962
2026
|
*,
|
|
1963
2027
|
wrapper_recovery_attempt: int = 0,
|
|
1964
2028
|
model_compatibility_recovery_attempt: int = 0,
|
|
2029
|
+
startup_stall_recovery_attempt: int = 0,
|
|
1965
2030
|
no_edit_recovery_attempt: int = 0,
|
|
1966
2031
|
rollout_recovery_attempt: int = 0,
|
|
1967
2032
|
model_override: Optional[str] = None,
|
|
@@ -2456,6 +2521,7 @@ def _run_codex_task(
|
|
|
2456
2521
|
retry_guidance,
|
|
2457
2522
|
wrapper_recovery_attempt=wrapper_recovery_attempt,
|
|
2458
2523
|
model_compatibility_recovery_attempt=model_compatibility_recovery_attempt,
|
|
2524
|
+
startup_stall_recovery_attempt=startup_stall_recovery_attempt,
|
|
2459
2525
|
no_edit_recovery_attempt=no_edit_recovery_attempt,
|
|
2460
2526
|
rollout_recovery_attempt=rollout_recovery_attempt + 1,
|
|
2461
2527
|
model_override=model_override,
|
|
@@ -2474,9 +2540,58 @@ def _run_codex_task(
|
|
|
2474
2540
|
"stderr": _truncate(f"{detail}\n{stderr}".strip()),
|
|
2475
2541
|
"exitCode": 124,
|
|
2476
2542
|
"usage": usage,
|
|
2543
|
+
"cooldownMs": _NO_PUBLISHABLE_FAILURE_COOLDOWN_MS,
|
|
2477
2544
|
}
|
|
2478
2545
|
|
|
2479
2546
|
if no_edit_watchdog_fired:
|
|
2547
|
+
startup_stall = _codex_trace_is_startup_stall(stdout_trace)
|
|
2548
|
+
if startup_stall and startup_stall_recovery_attempt < _MAX_STARTUP_STALL_RECOVERY_ATTEMPTS:
|
|
2549
|
+
retry_guidance = [
|
|
2550
|
+
*supplemental_guidance,
|
|
2551
|
+
_build_startup_stall_recovery_guidance(trace_excerpt),
|
|
2552
|
+
]
|
|
2553
|
+
log.warning(
|
|
2554
|
+
"Codex emitted only startup events before the no-edit watchdog; "
|
|
2555
|
+
"restarting Codex once before classifying the job terminally."
|
|
2556
|
+
)
|
|
2557
|
+
retry_result = _run_codex_task(
|
|
2558
|
+
repo,
|
|
2559
|
+
instruction,
|
|
2560
|
+
retry_guidance,
|
|
2561
|
+
wrapper_recovery_attempt=wrapper_recovery_attempt,
|
|
2562
|
+
model_compatibility_recovery_attempt=model_compatibility_recovery_attempt,
|
|
2563
|
+
startup_stall_recovery_attempt=startup_stall_recovery_attempt + 1,
|
|
2564
|
+
no_edit_recovery_attempt=no_edit_recovery_attempt,
|
|
2565
|
+
rollout_recovery_attempt=rollout_recovery_attempt,
|
|
2566
|
+
model_override=model_override,
|
|
2567
|
+
baseline_changes=baseline_snapshot,
|
|
2568
|
+
)
|
|
2569
|
+
retry_result["usage"] = _merge_usage_records(usage, retry_result.get("usage"))
|
|
2570
|
+
if retry_result.get("ok"):
|
|
2571
|
+
recovered_stdout = str(retry_result.get("stdout") or "").strip()
|
|
2572
|
+
retry_result["stdout"] = _truncate(
|
|
2573
|
+
(
|
|
2574
|
+
"Recovered after the first Codex subprocess stalled before emitting "
|
|
2575
|
+
f"assistant/tool progress.\n\n{recovered_stdout}"
|
|
2576
|
+
).strip()
|
|
2577
|
+
)
|
|
2578
|
+
return retry_result
|
|
2579
|
+
if startup_stall:
|
|
2580
|
+
detail = (
|
|
2581
|
+
"Codex subprocess started but did not emit assistant, tool, reasoning, "
|
|
2582
|
+
"or usage progress before the startup watchdog."
|
|
2583
|
+
)
|
|
2584
|
+
if trace_excerpt:
|
|
2585
|
+
detail = f"{detail}\n{trace_excerpt}"
|
|
2586
|
+
return {
|
|
2587
|
+
"ok": False,
|
|
2588
|
+
"summary": "openai_codex stalled before first response",
|
|
2589
|
+
"stdout": _truncate(stdout),
|
|
2590
|
+
"stderr": _truncate(f"{detail}\n{stderr}".strip()),
|
|
2591
|
+
"exitCode": 124,
|
|
2592
|
+
"usage": usage,
|
|
2593
|
+
"cooldownMs": _NO_PUBLISHABLE_FAILURE_COOLDOWN_MS,
|
|
2594
|
+
}
|
|
2480
2595
|
if no_edit_recovery_attempt < _MAX_NO_EDIT_RECOVERY_ATTEMPTS:
|
|
2481
2596
|
retry_guidance = [
|
|
2482
2597
|
*supplemental_guidance,
|
|
@@ -2491,6 +2606,7 @@ def _run_codex_task(
|
|
|
2491
2606
|
retry_guidance,
|
|
2492
2607
|
wrapper_recovery_attempt=wrapper_recovery_attempt,
|
|
2493
2608
|
model_compatibility_recovery_attempt=model_compatibility_recovery_attempt,
|
|
2609
|
+
startup_stall_recovery_attempt=startup_stall_recovery_attempt,
|
|
2494
2610
|
no_edit_recovery_attempt=no_edit_recovery_attempt + 1,
|
|
2495
2611
|
rollout_recovery_attempt=rollout_recovery_attempt,
|
|
2496
2612
|
model_override=model_override,
|
|
@@ -2506,6 +2622,7 @@ def _run_codex_task(
|
|
|
2506
2622
|
"stderr": _truncate(f"{detail}\n{stderr}".strip()),
|
|
2507
2623
|
"exitCode": 124,
|
|
2508
2624
|
"usage": usage,
|
|
2625
|
+
"cooldownMs": _NO_PUBLISHABLE_FAILURE_COOLDOWN_MS,
|
|
2509
2626
|
}
|
|
2510
2627
|
|
|
2511
2628
|
if timed_out:
|
|
@@ -2565,6 +2682,7 @@ def _run_codex_task(
|
|
|
2565
2682
|
"stderr": _truncate(f"{detail}\n{stderr}".strip()),
|
|
2566
2683
|
"exitCode": 124,
|
|
2567
2684
|
"usage": usage,
|
|
2685
|
+
"cooldownMs": _NO_PUBLISHABLE_FAILURE_COOLDOWN_MS,
|
|
2568
2686
|
}
|
|
2569
2687
|
artifact_only_paths = _describe_non_publishable_paths(changed_paths, baseline_snapshot)
|
|
2570
2688
|
if artifact_only_paths:
|
|
@@ -2583,6 +2701,7 @@ def _run_codex_task(
|
|
|
2583
2701
|
"stderr": _truncate(f"{detail}\n{stderr}".strip()),
|
|
2584
2702
|
"exitCode": 124,
|
|
2585
2703
|
"usage": usage,
|
|
2704
|
+
"cooldownMs": _NO_PUBLISHABLE_FAILURE_COOLDOWN_MS,
|
|
2586
2705
|
}
|
|
2587
2706
|
|
|
2588
2707
|
last_message = _read_text_if_exists(last_message_path)
|
|
@@ -2683,6 +2802,7 @@ def _run_codex_task(
|
|
|
2683
2802
|
],
|
|
2684
2803
|
wrapper_recovery_attempt=wrapper_recovery_attempt + 1,
|
|
2685
2804
|
model_compatibility_recovery_attempt=model_compatibility_recovery_attempt,
|
|
2805
|
+
startup_stall_recovery_attempt=startup_stall_recovery_attempt,
|
|
2686
2806
|
no_edit_recovery_attempt=no_edit_recovery_attempt,
|
|
2687
2807
|
rollout_recovery_attempt=rollout_recovery_attempt,
|
|
2688
2808
|
model_override=model_override,
|
|
@@ -2797,6 +2917,7 @@ def _run_codex_task(
|
|
|
2797
2917
|
effective_supplemental_guidance,
|
|
2798
2918
|
wrapper_recovery_attempt=wrapper_recovery_attempt,
|
|
2799
2919
|
model_compatibility_recovery_attempt=model_compatibility_recovery_attempt + 1,
|
|
2920
|
+
startup_stall_recovery_attempt=startup_stall_recovery_attempt,
|
|
2800
2921
|
no_edit_recovery_attempt=no_edit_recovery_attempt,
|
|
2801
2922
|
rollout_recovery_attempt=rollout_recovery_attempt,
|
|
2802
2923
|
model_override=LEGACY_CODEX_MODEL_FALLBACK,
|
|
@@ -229,6 +229,21 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
229
229
|
"xhigh",
|
|
230
230
|
)
|
|
231
231
|
|
|
232
|
+
def test_background_autonomy_uses_short_no_edit_and_rollout_watchdogs(self) -> None:
|
|
233
|
+
prompt = (
|
|
234
|
+
"Task planning contract from PushPals:\n"
|
|
235
|
+
"- Planning summary: intent=code_change, risk=low, priority=background\n"
|
|
236
|
+
"Make one narrow repo-native patch and avoid broad discovery.\n"
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
no_edit = _resolve_no_edit_watchdog_seconds(prompt, 1200)
|
|
240
|
+
self.assertEqual(no_edit, 120)
|
|
241
|
+
self.assertEqual(
|
|
242
|
+
_resolve_no_edit_watchdog_seconds(prompt, 1200, recovery_attempt=1),
|
|
243
|
+
90,
|
|
244
|
+
)
|
|
245
|
+
self.assertEqual(_resolve_rollout_watchdog_seconds(prompt, 1200, no_edit), 90)
|
|
246
|
+
|
|
232
247
|
def test_runtime_config_prefers_explicit_config_dir_override(self) -> None:
|
|
233
248
|
import executor_base
|
|
234
249
|
|
|
@@ -1014,6 +1029,159 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1014
1029
|
self.assertNotIn("broad/noisy", str(result.get("summary") or ""))
|
|
1015
1030
|
self.assertNotIn("too broad/noisy", str(result.get("stderr") or ""))
|
|
1016
1031
|
|
|
1032
|
+
def test_run_codex_task_retries_once_when_codex_stalls_before_first_response(self) -> None:
|
|
1033
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-startup-stall-") as temp_dir:
|
|
1034
|
+
repo = Path(temp_dir) / "repo"
|
|
1035
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
1036
|
+
(repo / "README.md").write_text("# startup stall repo\n", encoding="utf-8")
|
|
1037
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1038
|
+
subprocess.run(
|
|
1039
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
1040
|
+
cwd=repo,
|
|
1041
|
+
check=True,
|
|
1042
|
+
capture_output=True,
|
|
1043
|
+
text=True,
|
|
1044
|
+
)
|
|
1045
|
+
subprocess.run(
|
|
1046
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
1047
|
+
cwd=repo,
|
|
1048
|
+
check=True,
|
|
1049
|
+
capture_output=True,
|
|
1050
|
+
text=True,
|
|
1051
|
+
)
|
|
1052
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1053
|
+
subprocess.run(
|
|
1054
|
+
["git", "commit", "-m", "chore: seed startup stall repo"],
|
|
1055
|
+
cwd=repo,
|
|
1056
|
+
check=True,
|
|
1057
|
+
capture_output=True,
|
|
1058
|
+
text=True,
|
|
1059
|
+
)
|
|
1060
|
+
|
|
1061
|
+
stub_path = Path(temp_dir) / "fake_codex_startup_stall.py"
|
|
1062
|
+
stub_path.write_text(
|
|
1063
|
+
"\n".join(
|
|
1064
|
+
[
|
|
1065
|
+
"from pathlib import Path",
|
|
1066
|
+
"import json",
|
|
1067
|
+
"import sys",
|
|
1068
|
+
"import time",
|
|
1069
|
+
"",
|
|
1070
|
+
"argv = sys.argv[1:]",
|
|
1071
|
+
"last_message_path = None",
|
|
1072
|
+
"for index, arg in enumerate(argv):",
|
|
1073
|
+
" if arg == '--output-last-message' and index + 1 < len(argv):",
|
|
1074
|
+
" last_message_path = argv[index + 1]",
|
|
1075
|
+
" break",
|
|
1076
|
+
"",
|
|
1077
|
+
"prompt = sys.stdin.read()",
|
|
1078
|
+
"if 'Codex startup-stall recovery' in prompt:",
|
|
1079
|
+
" Path('src').mkdir(exist_ok=True)",
|
|
1080
|
+
" Path('src/startup-stall-recovered.txt').write_text('patched after restart\\n', encoding='utf-8')",
|
|
1081
|
+
" if last_message_path:",
|
|
1082
|
+
" Path(last_message_path).write_text('Patched after Codex startup-stall recovery.', encoding='utf-8')",
|
|
1083
|
+
" print(json.dumps({'type': 'item.completed', 'message': 'Patched after Codex startup-stall recovery.'}), flush=True)",
|
|
1084
|
+
" sys.exit(0)",
|
|
1085
|
+
"",
|
|
1086
|
+
"print(json.dumps({'type': 'thread.started'}), flush=True)",
|
|
1087
|
+
"print(json.dumps({'type': 'turn.started'}), flush=True)",
|
|
1088
|
+
"time.sleep(10)",
|
|
1089
|
+
]
|
|
1090
|
+
),
|
|
1091
|
+
encoding="utf-8",
|
|
1092
|
+
)
|
|
1093
|
+
|
|
1094
|
+
env_overrides = {
|
|
1095
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
1096
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
1097
|
+
"OPENAI_API_KEY": "pushpals-startup-stall-test-key",
|
|
1098
|
+
"WORKERPALS_OPENAI_CODEX_JSON": "true",
|
|
1099
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
|
|
1100
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
|
|
1101
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
1102
|
+
}
|
|
1103
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
1104
|
+
result = _run_codex_task(
|
|
1105
|
+
str(repo),
|
|
1106
|
+
"Rename one misleading test fixture constant and update the related assertions.",
|
|
1107
|
+
[],
|
|
1108
|
+
)
|
|
1109
|
+
|
|
1110
|
+
self.assertTrue(result.get("ok"), result)
|
|
1111
|
+
self.assertEqual(result.get("exitCode"), 0)
|
|
1112
|
+
stdout = str(result.get("stdout") or "")
|
|
1113
|
+
self.assertIn("Recovered after the first Codex subprocess stalled", stdout)
|
|
1114
|
+
self.assertIn("Patched after Codex startup-stall recovery", stdout)
|
|
1115
|
+
self.assertIn("src/", stdout)
|
|
1116
|
+
|
|
1117
|
+
def test_run_codex_task_reports_startup_stall_when_restart_also_never_responds(self) -> None:
|
|
1118
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-startup-stall-fail-") as temp_dir:
|
|
1119
|
+
repo = Path(temp_dir) / "repo"
|
|
1120
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
1121
|
+
(repo / "README.md").write_text("# startup stall failure repo\n", encoding="utf-8")
|
|
1122
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1123
|
+
subprocess.run(
|
|
1124
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
1125
|
+
cwd=repo,
|
|
1126
|
+
check=True,
|
|
1127
|
+
capture_output=True,
|
|
1128
|
+
text=True,
|
|
1129
|
+
)
|
|
1130
|
+
subprocess.run(
|
|
1131
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
1132
|
+
cwd=repo,
|
|
1133
|
+
check=True,
|
|
1134
|
+
capture_output=True,
|
|
1135
|
+
text=True,
|
|
1136
|
+
)
|
|
1137
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1138
|
+
subprocess.run(
|
|
1139
|
+
["git", "commit", "-m", "chore: seed startup stall failure repo"],
|
|
1140
|
+
cwd=repo,
|
|
1141
|
+
check=True,
|
|
1142
|
+
capture_output=True,
|
|
1143
|
+
text=True,
|
|
1144
|
+
)
|
|
1145
|
+
|
|
1146
|
+
stub_path = Path(temp_dir) / "fake_codex_startup_stall_fail.py"
|
|
1147
|
+
stub_path.write_text(
|
|
1148
|
+
"\n".join(
|
|
1149
|
+
[
|
|
1150
|
+
"import json",
|
|
1151
|
+
"import sys",
|
|
1152
|
+
"import time",
|
|
1153
|
+
"",
|
|
1154
|
+
"sys.stdin.read()",
|
|
1155
|
+
"print(json.dumps({'type': 'thread.started'}), flush=True)",
|
|
1156
|
+
"print(json.dumps({'type': 'turn.started'}), flush=True)",
|
|
1157
|
+
"time.sleep(10)",
|
|
1158
|
+
]
|
|
1159
|
+
),
|
|
1160
|
+
encoding="utf-8",
|
|
1161
|
+
)
|
|
1162
|
+
|
|
1163
|
+
env_overrides = {
|
|
1164
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
1165
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
1166
|
+
"OPENAI_API_KEY": "pushpals-startup-stall-fail-test-key",
|
|
1167
|
+
"WORKERPALS_OPENAI_CODEX_JSON": "true",
|
|
1168
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
|
|
1169
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
|
|
1170
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
1171
|
+
}
|
|
1172
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
1173
|
+
result = _run_codex_task(
|
|
1174
|
+
str(repo),
|
|
1175
|
+
"Rename one misleading test fixture constant and update the related assertions.",
|
|
1176
|
+
[],
|
|
1177
|
+
)
|
|
1178
|
+
|
|
1179
|
+
self.assertFalse(result.get("ok"), result)
|
|
1180
|
+
self.assertEqual(result.get("exitCode"), 124)
|
|
1181
|
+
self.assertEqual(result.get("summary"), "openai_codex stalled before first response")
|
|
1182
|
+
self.assertNotIn("no publishable", str(result.get("summary") or "").lower())
|
|
1183
|
+
self.assertEqual(result.get("cooldownMs"), 600000)
|
|
1184
|
+
|
|
1017
1185
|
def test_run_codex_task_retries_once_when_no_edit_watchdog_fires(self) -> None:
|
|
1018
1186
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-watchdog-") as temp_dir:
|
|
1019
1187
|
repo = Path(temp_dir) / "repo"
|
|
@@ -1156,6 +1324,7 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1156
1324
|
self.assertFalse(result.get("ok"), result)
|
|
1157
1325
|
self.assertEqual(result.get("exitCode"), 124)
|
|
1158
1326
|
self.assertIn("no publishable changes", str(result.get("summary") or ""))
|
|
1327
|
+
self.assertEqual(result.get("cooldownMs"), 600000)
|
|
1159
1328
|
|
|
1160
1329
|
def test_run_codex_task_no_edit_watchdog_rechecks_transient_publishable_progress(self) -> None:
|
|
1161
1330
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-recheck-") as temp_dir:
|
|
@@ -1407,7 +1576,7 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1407
1576
|
|
|
1408
1577
|
self.assertEqual(watchdog_s, 180)
|
|
1409
1578
|
|
|
1410
|
-
def
|
|
1579
|
+
def test_no_edit_recovery_attempt_uses_short_patch_first_watchdog(self) -> None:
|
|
1411
1580
|
prompt = "Investigate a broad reliability issue and make the smallest safe fix."
|
|
1412
1581
|
with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
|
|
1413
1582
|
first_attempt_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
|
|
@@ -1418,7 +1587,7 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1418
1587
|
)
|
|
1419
1588
|
|
|
1420
1589
|
self.assertEqual(first_attempt_s, 480)
|
|
1421
|
-
self.assertEqual(recovery_attempt_s,
|
|
1590
|
+
self.assertEqual(recovery_attempt_s, 90)
|
|
1422
1591
|
|
|
1423
1592
|
def test_explicit_no_edit_watchdog_override_still_controls_recovery_attempts(self) -> None:
|
|
1424
1593
|
with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "300"}, clear=False):
|
|
@@ -1650,6 +1819,7 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1650
1819
|
self.assertIn("rollout coach", str(result.get("summary") or ""))
|
|
1651
1820
|
self.assertIn("broad/noisy", str(result.get("stderr") or ""))
|
|
1652
1821
|
self.assertIn("area0", str(result.get("stderr") or ""))
|
|
1822
|
+
self.assertEqual(result.get("cooldownMs"), 600000)
|
|
1653
1823
|
|
|
1654
1824
|
def test_run_codex_task_timeout_reports_artifact_only_changes(self) -> None:
|
|
1655
1825
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-artifact-timeout-") as temp_dir:
|
|
@@ -450,7 +450,7 @@ function mergeWorkerDiagnostics(
|
|
|
450
450
|
function inferWorkerTerminalFailureClass(result: JobResult): string {
|
|
451
451
|
if (result.ok) return "success";
|
|
452
452
|
const text = `${result.summary ?? ""}\n${result.stderr ?? ""}\n${result.stdout ?? ""}`.toLowerCase();
|
|
453
|
-
if (/timed out|timeout|signal 15|terminated|exit 143|exit 137/.test(text)) return "timeout";
|
|
453
|
+
if (/timed out|timeout|signal 15|terminated|exit 143|exit 137|stalled before first response|startup stall/.test(text)) return "timeout";
|
|
454
454
|
if (/no publishable|non-publishable|node_modules/.test(text)) return "artifact_only_no_publishable_patch";
|
|
455
455
|
if (/validationgate|validation/.test(text)) return "validation";
|
|
456
456
|
if (/scopegate|scope/.test(text)) return "scope";
|