@pushpalsdev/cli 1.1.20 → 1.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pushpals-cli.js +25 -1
- package/package.json +1 -1
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +161 -24
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +355 -0
- package/runtime/sandbox/apps/workerpals/src/common/generic_python_executor.ts +45 -3
- package/runtime/sandbox/apps/workerpals/src/common/types.ts +69 -0
- package/runtime/sandbox/apps/workerpals/src/docker_executor.ts +75 -16
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +557 -57
- package/runtime/sandbox/apps/workerpals/src/job_runner.ts +3 -0
- package/runtime/sandbox/apps/workerpals/src/merge_conflict_job.ts +9 -0
- package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +131 -3
package/dist/pushpals-cli.js
CHANGED
|
@@ -4299,6 +4299,25 @@ async function probeRemoteBuddySessionConsumer(serverUrl, sessionId) {
|
|
|
4299
4299
|
};
|
|
4300
4300
|
}
|
|
4301
4301
|
}
|
|
4302
|
+
async function waitForRemoteBuddySessionConsumer(opts) {
|
|
4303
|
+
const timeoutMs = Math.max(0, opts.timeoutMs);
|
|
4304
|
+
const pollMs = Math.max(50, opts.pollMs ?? DEFAULT_RUNTIME_BOOT_POLL_MS);
|
|
4305
|
+
const nowFn = opts.nowFn ?? Date.now;
|
|
4306
|
+
const deadline = nowFn() + timeoutMs;
|
|
4307
|
+
let lastHealth = {
|
|
4308
|
+
ok: false,
|
|
4309
|
+
detail: `No connected RemoteBuddy session consumer found for session ${opts.sessionId}`
|
|
4310
|
+
};
|
|
4311
|
+
while (true) {
|
|
4312
|
+
lastHealth = await (opts.probeFn ?? probeRemoteBuddySessionConsumer)(opts.serverUrl, opts.sessionId);
|
|
4313
|
+
if (lastHealth.ok)
|
|
4314
|
+
return lastHealth;
|
|
4315
|
+
const remainingMs = deadline - nowFn();
|
|
4316
|
+
if (remainingMs <= 0)
|
|
4317
|
+
return lastHealth;
|
|
4318
|
+
await (opts.sleepFn ?? Bun.sleep)(Math.min(pollMs, remainingMs));
|
|
4319
|
+
}
|
|
4320
|
+
}
|
|
4302
4321
|
async function probeSourceControlManager(port) {
|
|
4303
4322
|
if (!Number.isFinite(port) || port <= 0)
|
|
4304
4323
|
return false;
|
|
@@ -5831,7 +5850,11 @@ async function main() {
|
|
|
5831
5850
|
process.exit(1);
|
|
5832
5851
|
}
|
|
5833
5852
|
}
|
|
5834
|
-
remoteBuddyConsumerHealth = await
|
|
5853
|
+
remoteBuddyConsumerHealth = autoStartedServiceManager ? await waitForRemoteBuddySessionConsumer({
|
|
5854
|
+
serverUrl,
|
|
5855
|
+
sessionId: activeSessionId,
|
|
5856
|
+
timeoutMs: DEFAULT_REMOTEBUDDY_CONSUMER_STARTUP_GRACE_MS
|
|
5857
|
+
}) : await probeRemoteBuddySessionConsumer(serverUrl, activeSessionId);
|
|
5835
5858
|
if (!serverHealthy) {
|
|
5836
5859
|
console.error(`[pushpals] Server is unavailable at ${serverUrl}.`);
|
|
5837
5860
|
process.exit(1);
|
|
@@ -6086,6 +6109,7 @@ if (import.meta.main) {
|
|
|
6086
6109
|
}
|
|
6087
6110
|
export {
|
|
6088
6111
|
waitForWorkerpalCapacity,
|
|
6112
|
+
waitForRemoteBuddySessionConsumer,
|
|
6089
6113
|
startEmbeddedMonitoringHub,
|
|
6090
6114
|
shutdownEmbeddedServiceManagerGracefully,
|
|
6091
6115
|
shouldUseRemoteBuddySilentStartupFallback,
|
package/package.json
CHANGED
|
@@ -103,11 +103,14 @@ _VALID_REASONING_EFFORTS = {"low", "medium", "high", "xhigh"}
|
|
|
103
103
|
_MAX_WRAPPER_RECOVERY_ATTEMPTS = 2
|
|
104
104
|
_MAX_WRAPPER_BOOTSTRAP_OUTPUT_CHARS = 1_200
|
|
105
105
|
_MAX_WRAPPER_BOOTSTRAP_TOTAL_CHARS = 5_000
|
|
106
|
+
_MAX_CREDIBLE_WRAPPER_LOOP_CHANGED_PATHS = 8
|
|
107
|
+
_MAX_CREDIBLE_WRAPPER_LOOP_TOP_LEVELS = 4
|
|
106
108
|
_MAX_NO_EDIT_RECOVERY_ATTEMPTS = 1
|
|
107
109
|
_MAX_ROLLOUT_RECOVERY_ATTEMPTS = 1
|
|
108
110
|
_DEFAULT_NO_EDIT_WATCHDOG_S = 480
|
|
109
111
|
_SMALL_TASK_NO_EDIT_WATCHDOG_S = 360
|
|
110
112
|
_WEB_REVIEW_NO_EDIT_WATCHDOG_S = 240
|
|
113
|
+
_DEFAULT_NO_EDIT_RECHECK_S = 120
|
|
111
114
|
_DEFAULT_ROLLOUT_WATCHDOG_S = 300
|
|
112
115
|
_SMALL_TASK_ROLLOUT_WATCHDOG_S = 240
|
|
113
116
|
_WEB_REVIEW_ROLLOUT_WATCHDOG_S = 180
|
|
@@ -659,6 +662,21 @@ def _resolve_no_edit_watchdog_seconds(
|
|
|
659
662
|
return max(120, min(default_s, max(120, communicate_timeout_s - 60)))
|
|
660
663
|
|
|
661
664
|
|
|
665
|
+
def _resolve_no_edit_recheck_seconds(communicate_timeout_s: Optional[int]) -> int:
|
|
666
|
+
raw = os.environ.get("WORKERPALS_OPENAI_CODEX_NO_EDIT_RECHECK_S", "").strip()
|
|
667
|
+
if raw:
|
|
668
|
+
parsed = _to_positive_int(raw)
|
|
669
|
+
if parsed is None:
|
|
670
|
+
log.info(
|
|
671
|
+
f"Invalid WORKERPALS_OPENAI_CODEX_NO_EDIT_RECHECK_S={raw!r}; using default no-edit recheck interval."
|
|
672
|
+
)
|
|
673
|
+
else:
|
|
674
|
+
upper = max(1, (communicate_timeout_s or parsed + 1) - 1)
|
|
675
|
+
return max(1, min(parsed, upper))
|
|
676
|
+
upper = max(1, (communicate_timeout_s or _DEFAULT_NO_EDIT_RECHECK_S + 1) - 1)
|
|
677
|
+
return max(1, min(_DEFAULT_NO_EDIT_RECHECK_S, upper))
|
|
678
|
+
|
|
679
|
+
|
|
662
680
|
def _looks_like_web_review_prompt(prompt: str) -> bool:
|
|
663
681
|
text = str(prompt or "").lower()
|
|
664
682
|
return "repo-native web review" in text or "web review path" in text
|
|
@@ -707,6 +725,13 @@ def _describe_non_publishable_paths(changed_paths: List[str], baseline_snapshot:
|
|
|
707
725
|
return listed
|
|
708
726
|
|
|
709
727
|
|
|
728
|
+
def _describe_publishable_paths(paths: List[str]) -> str:
|
|
729
|
+
listed = ", ".join(paths[:8])
|
|
730
|
+
if len(paths) > 8:
|
|
731
|
+
listed = f"{listed}, ..."
|
|
732
|
+
return listed
|
|
733
|
+
|
|
734
|
+
|
|
710
735
|
def _build_no_edit_recovery_guidance(trace_excerpt: str, artifact_only_paths: str = "") -> str:
|
|
711
736
|
lines = [
|
|
712
737
|
"No-edit watchdog recovery: the previous Codex attempt spent too much of the execution budget without producing publishable file changes.",
|
|
@@ -1668,6 +1693,31 @@ def _codex_changed_paths(repo: str, baseline_snapshot: List[str]) -> Tuple[List[
|
|
|
1668
1693
|
return changed_paths, delta, effective
|
|
1669
1694
|
|
|
1670
1695
|
|
|
1696
|
+
def _changed_path_top_level(path: str) -> str:
|
|
1697
|
+
raw = str(path or "").replace("\\", "/").strip()
|
|
1698
|
+
is_top_level_directory = raw.endswith("/")
|
|
1699
|
+
normalized = raw.strip("/")
|
|
1700
|
+
if not normalized:
|
|
1701
|
+
return ""
|
|
1702
|
+
parts = [part for part in normalized.split("/") if part]
|
|
1703
|
+
if len(parts) > 1 or is_top_level_directory:
|
|
1704
|
+
return parts[0]
|
|
1705
|
+
return "<repo-root>"
|
|
1706
|
+
|
|
1707
|
+
|
|
1708
|
+
def _has_credible_shell_wrapper_progress(effective_paths: List[str]) -> bool:
|
|
1709
|
+
if not effective_paths:
|
|
1710
|
+
return False
|
|
1711
|
+
if len(effective_paths) > _MAX_CREDIBLE_WRAPPER_LOOP_CHANGED_PATHS:
|
|
1712
|
+
return False
|
|
1713
|
+
top_levels = {
|
|
1714
|
+
top_level
|
|
1715
|
+
for top_level in (_changed_path_top_level(path) for path in effective_paths)
|
|
1716
|
+
if top_level
|
|
1717
|
+
}
|
|
1718
|
+
return len(top_levels) <= _MAX_CREDIBLE_WRAPPER_LOOP_TOP_LEVELS
|
|
1719
|
+
|
|
1720
|
+
|
|
1671
1721
|
def _build_success_stdout(
|
|
1672
1722
|
*,
|
|
1673
1723
|
effective_paths: List[str],
|
|
@@ -2028,12 +2078,14 @@ def _run_codex_task(
|
|
|
2028
2078
|
rollout_watchdog_fired = False
|
|
2029
2079
|
rollout_watchdog_reason = ""
|
|
2030
2080
|
rollout_artifact_only_paths = ""
|
|
2081
|
+
rollout_watchdog_retryable = True
|
|
2031
2082
|
command_policy_rejection_loop = False
|
|
2032
2083
|
no_edit_watchdog_s = (
|
|
2033
2084
|
_resolve_no_edit_watchdog_seconds(prompt, communicate_timeout_s)
|
|
2034
2085
|
if no_edit_recovery_attempt <= _MAX_NO_EDIT_RECOVERY_ATTEMPTS
|
|
2035
2086
|
else None
|
|
2036
2087
|
)
|
|
2088
|
+
no_edit_recheck_s = _resolve_no_edit_recheck_seconds(communicate_timeout_s)
|
|
2037
2089
|
rollout_watchdog_s = (
|
|
2038
2090
|
_resolve_rollout_watchdog_seconds(
|
|
2039
2091
|
prompt,
|
|
@@ -2079,16 +2131,36 @@ def _run_codex_task(
|
|
|
2079
2131
|
)
|
|
2080
2132
|
_terminate_active_child()
|
|
2081
2133
|
break
|
|
2082
|
-
no_edit_deadline =
|
|
2134
|
+
no_edit_deadline = now + float(no_edit_recheck_s)
|
|
2135
|
+
log.info(
|
|
2136
|
+
"No-edit watchdog observed publishable-looking file changes "
|
|
2137
|
+
f"({_describe_publishable_paths(effective_paths)}); rechecking in "
|
|
2138
|
+
f"{int(no_edit_recheck_s)}s to ensure the worker keeps durable PR content."
|
|
2139
|
+
)
|
|
2083
2140
|
|
|
2084
2141
|
if rollout_deadline is not None and now >= rollout_deadline:
|
|
2085
2142
|
changed_paths, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
|
|
2086
|
-
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
|
|
2091
|
-
|
|
2143
|
+
with trace_lock:
|
|
2144
|
+
live_trace = dict(stdout_trace_state)
|
|
2145
|
+
summaries = stdout_trace_state.get("summaries")
|
|
2146
|
+
if isinstance(summaries, list):
|
|
2147
|
+
live_trace["summaries"] = list(summaries)
|
|
2148
|
+
if effective_paths:
|
|
2149
|
+
small_or_web_task = (
|
|
2150
|
+
_looks_like_small_task_prompt(instruction)
|
|
2151
|
+
or _looks_like_web_review_prompt(instruction)
|
|
2152
|
+
or _looks_like_small_task_prompt(prompt)
|
|
2153
|
+
or _looks_like_web_review_prompt(prompt)
|
|
2154
|
+
)
|
|
2155
|
+
if small_or_web_task and not _has_credible_shell_wrapper_progress(effective_paths):
|
|
2156
|
+
rollout_watchdog_reason = (
|
|
2157
|
+
"publishable-looking changed paths are broad/noisy for a small task: "
|
|
2158
|
+
f"{_describe_publishable_paths(effective_paths)}"
|
|
2159
|
+
)
|
|
2160
|
+
rollout_watchdog_retryable = False
|
|
2161
|
+
else:
|
|
2162
|
+
rollout_deadline = None
|
|
2163
|
+
else:
|
|
2092
2164
|
rollout_artifact_only_paths = _describe_non_publishable_paths(
|
|
2093
2165
|
changed_paths,
|
|
2094
2166
|
baseline_snapshot,
|
|
@@ -2097,18 +2169,23 @@ def _run_codex_task(
|
|
|
2097
2169
|
live_trace,
|
|
2098
2170
|
rollout_artifact_only_paths,
|
|
2099
2171
|
)
|
|
2100
|
-
|
|
2101
|
-
|
|
2102
|
-
|
|
2103
|
-
|
|
2104
|
-
|
|
2105
|
-
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2172
|
+
if rollout_watchdog_reason:
|
|
2173
|
+
rollout_watchdog_fired = True
|
|
2174
|
+
artifact_detail = (
|
|
2175
|
+
f" Artifact-only dirty paths: {rollout_artifact_only_paths}."
|
|
2176
|
+
if rollout_artifact_only_paths
|
|
2177
|
+
else ""
|
|
2178
|
+
)
|
|
2179
|
+
action = (
|
|
2180
|
+
"Retrying with course-correction guidance."
|
|
2181
|
+
if rollout_watchdog_retryable
|
|
2182
|
+
else "Failing fast instead of retrying on top of a broad/noisy diff."
|
|
2183
|
+
)
|
|
2184
|
+
log.info(
|
|
2185
|
+
f"Rollout coach fired after {int(rollout_watchdog_s or 0)}s: {rollout_watchdog_reason}.{artifact_detail} {action}"
|
|
2186
|
+
)
|
|
2187
|
+
_terminate_active_child()
|
|
2188
|
+
break
|
|
2112
2189
|
|
|
2113
2190
|
with trace_lock:
|
|
2114
2191
|
wrapper_rejections = to_int(wrapper_rejection_state.get("count"), 0)
|
|
@@ -2178,7 +2255,7 @@ def _run_codex_task(
|
|
|
2178
2255
|
rejected_shell_wrappers.append(text)
|
|
2179
2256
|
|
|
2180
2257
|
if rollout_watchdog_fired:
|
|
2181
|
-
if rollout_recovery_attempt < _MAX_ROLLOUT_RECOVERY_ATTEMPTS:
|
|
2258
|
+
if rollout_watchdog_retryable and rollout_recovery_attempt < _MAX_ROLLOUT_RECOVERY_ATTEMPTS:
|
|
2182
2259
|
retry_guidance = [
|
|
2183
2260
|
*supplemental_guidance,
|
|
2184
2261
|
_build_rollout_recovery_guidance(
|
|
@@ -2199,7 +2276,7 @@ def _run_codex_task(
|
|
|
2199
2276
|
baseline_changes=baseline_snapshot,
|
|
2200
2277
|
)
|
|
2201
2278
|
detail = (
|
|
2202
|
-
"Codex trajectory remained off-track
|
|
2279
|
+
"Codex trajectory remained off-track or too broad for safe recovery: "
|
|
2203
2280
|
f"{rollout_watchdog_reason or 'no publishable progress'}."
|
|
2204
2281
|
)
|
|
2205
2282
|
if trace_excerpt:
|
|
@@ -2253,8 +2330,9 @@ def _run_codex_task(
|
|
|
2253
2330
|
)
|
|
2254
2331
|
if trace_excerpt:
|
|
2255
2332
|
detail = f"{detail}\n{trace_excerpt}"
|
|
2256
|
-
|
|
2257
|
-
|
|
2333
|
+
changed_paths, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
|
|
2334
|
+
credible_partial_patch = _has_credible_shell_wrapper_progress(effective_paths)
|
|
2335
|
+
if effective_paths and credible_partial_patch:
|
|
2258
2336
|
last_message = _read_text_if_exists(last_message_path)
|
|
2259
2337
|
log_git_status(repo, log)
|
|
2260
2338
|
prefix = (
|
|
@@ -2281,7 +2359,27 @@ def _run_codex_task(
|
|
|
2281
2359
|
"exitCode": 0,
|
|
2282
2360
|
"usage": usage,
|
|
2283
2361
|
}
|
|
2284
|
-
|
|
2362
|
+
if effective_paths:
|
|
2363
|
+
listed = _describe_publishable_paths(effective_paths)
|
|
2364
|
+
log.warning(
|
|
2365
|
+
"Codex reached the execution timeout with a broad/noisy changed-path set "
|
|
2366
|
+
f"({len(effective_paths)} publishable-looking path(s)); refusing to spend "
|
|
2367
|
+
"additional gate budget on a likely incomplete patch."
|
|
2368
|
+
)
|
|
2369
|
+
detail = (
|
|
2370
|
+
f"{detail}\nPublishable-looking changed paths at timeout were too broad/noisy "
|
|
2371
|
+
f"to preserve as a partial patch ({len(effective_paths)} path(s): {listed}). "
|
|
2372
|
+
"The executor is failing fast so the scheduler can replan instead of running "
|
|
2373
|
+
"expensive validation on a likely incomplete update."
|
|
2374
|
+
)
|
|
2375
|
+
return {
|
|
2376
|
+
"ok": False,
|
|
2377
|
+
"summary": "openai_codex timed out with broad/noisy publishable-looking changes",
|
|
2378
|
+
"stdout": _truncate(stdout),
|
|
2379
|
+
"stderr": _truncate(f"{detail}\n{stderr}".strip()),
|
|
2380
|
+
"exitCode": 124,
|
|
2381
|
+
"usage": usage,
|
|
2382
|
+
}
|
|
2285
2383
|
artifact_only_paths = _describe_non_publishable_paths(changed_paths, baseline_snapshot)
|
|
2286
2384
|
if artifact_only_paths:
|
|
2287
2385
|
detail = (
|
|
@@ -2306,6 +2404,7 @@ def _run_codex_task(
|
|
|
2306
2404
|
|
|
2307
2405
|
if command_policy_rejection_loop:
|
|
2308
2406
|
_, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
|
|
2407
|
+
credible_progress = _has_credible_shell_wrapper_progress(effective_paths)
|
|
2309
2408
|
if effective_paths:
|
|
2310
2409
|
policy_signal = _detect_codex_workaround_signal(last_message)
|
|
2311
2410
|
if not policy_signal and not last_message.strip():
|
|
@@ -2329,6 +2428,7 @@ def _run_codex_task(
|
|
|
2329
2428
|
"usage": usage,
|
|
2330
2429
|
}
|
|
2331
2430
|
|
|
2431
|
+
if effective_paths and credible_progress:
|
|
2332
2432
|
command_lines = (
|
|
2333
2433
|
"\n".join(f"- {command}" for command in rejected_shell_wrappers[:6])
|
|
2334
2434
|
if rejected_shell_wrappers
|
|
@@ -2359,6 +2459,13 @@ def _run_codex_task(
|
|
|
2359
2459
|
"usage": usage,
|
|
2360
2460
|
}
|
|
2361
2461
|
|
|
2462
|
+
if effective_paths:
|
|
2463
|
+
log.warning(
|
|
2464
|
+
"Codex hit a shell-wrapper rejection loop with a broad/noisy changed-path set "
|
|
2465
|
+
f"({len(effective_paths)} publishable-looking path(s)); retrying before handing "
|
|
2466
|
+
"the patch to QualityGate."
|
|
2467
|
+
)
|
|
2468
|
+
|
|
2362
2469
|
if wrapper_recovery_attempt < _MAX_WRAPPER_RECOVERY_ATTEMPTS:
|
|
2363
2470
|
hard_recovery = wrapper_recovery_attempt >= 1
|
|
2364
2471
|
recovery_guidance = _build_wrapper_recovery_guidance(
|
|
@@ -2413,6 +2520,36 @@ def _run_codex_task(
|
|
|
2413
2520
|
).strip()
|
|
2414
2521
|
)
|
|
2415
2522
|
return retry_result
|
|
2523
|
+
if effective_paths:
|
|
2524
|
+
command_lines = (
|
|
2525
|
+
"\n".join(f"- {command}" for command in rejected_shell_wrappers[:6])
|
|
2526
|
+
if rejected_shell_wrappers
|
|
2527
|
+
else "- (no command details captured)"
|
|
2528
|
+
)
|
|
2529
|
+
log.warning(
|
|
2530
|
+
"Codex exhausted shell-wrapper recovery attempts with file changes still present; "
|
|
2531
|
+
"returning the patch to QualityGate for final assessment."
|
|
2532
|
+
)
|
|
2533
|
+
return {
|
|
2534
|
+
"ok": True,
|
|
2535
|
+
"summary": (
|
|
2536
|
+
"Executed task and modified "
|
|
2537
|
+
f"{len(effective_paths)} file(s) before shell-wrapper command rejections"
|
|
2538
|
+
),
|
|
2539
|
+
"stdout": _build_success_stdout(
|
|
2540
|
+
effective_paths=effective_paths,
|
|
2541
|
+
last_message=last_message,
|
|
2542
|
+
trace_excerpt=trace_excerpt,
|
|
2543
|
+
prefix=(
|
|
2544
|
+
"Codex produced file changes but exhausted command-router shell-wrapper "
|
|
2545
|
+
"recovery attempts. The patch is being handed to ValidationGate/CriticGate for "
|
|
2546
|
+
f"normal assessment.\nRejected commands:\n{command_lines}"
|
|
2547
|
+
),
|
|
2548
|
+
),
|
|
2549
|
+
"stderr": "",
|
|
2550
|
+
"exitCode": 0,
|
|
2551
|
+
"usage": usage,
|
|
2552
|
+
}
|
|
2416
2553
|
command_lines = (
|
|
2417
2554
|
"\n".join(f"- {command}" for command in rejected_shell_wrappers[:6])
|
|
2418
2555
|
if rejected_shell_wrappers
|