@pushpalsdev/cli 1.1.27 → 1.1.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/runtime/sandbox/.pushpals-remotebuddy-fallback.js +24 -0
- package/runtime/sandbox/apps/workerpals/src/common/types.ts +1 -0
- package/runtime/sandbox/apps/workerpals/src/docker_executor.ts +29 -17
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +7 -1
- package/runtime/sandbox/apps/workerpals/src/job_runner.ts +29 -16
- package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +39 -18
package/package.json
CHANGED
|
@@ -8408,6 +8408,7 @@ ${detail}`.toLowerCase();
|
|
|
8408
8408
|
"codex cli is mandatory in this backend"
|
|
8409
8409
|
].some((needle) => text.includes(needle));
|
|
8410
8410
|
}
|
|
8411
|
+
var CODEX_STARTUP_STALL_WORKER_EXIT_CODE = 87;
|
|
8411
8412
|
function asAutonomyComponentArea2(value) {
|
|
8412
8413
|
return normalizeAutonomyComponentArea(value) ?? undefined;
|
|
8413
8414
|
}
|
|
@@ -8950,6 +8951,7 @@ class RemoteBuddyOrchestrator {
|
|
|
8950
8951
|
workerpalsEnvFile;
|
|
8951
8952
|
workerpalsEntrypoint;
|
|
8952
8953
|
workerpalsUnavailableReason;
|
|
8954
|
+
workerDockerFallbackActivated = false;
|
|
8953
8955
|
statusHeartbeatMs;
|
|
8954
8956
|
fetchFailureLogsOnJobFailure;
|
|
8955
8957
|
executionBudgetInteractiveMs;
|
|
@@ -9956,6 +9958,25 @@ Please reply with the missing details and I will enqueue a follow-up request.` :
|
|
|
9956
9958
|
entrypoint: this.workerpalsEntrypoint
|
|
9957
9959
|
});
|
|
9958
9960
|
}
|
|
9961
|
+
maybeFallbackFromDockerAfterWorkerExit(workerId, code) {
|
|
9962
|
+
if (code !== CODEX_STARTUP_STALL_WORKER_EXIT_CODE)
|
|
9963
|
+
return false;
|
|
9964
|
+
if (!this.spawnWorkerDocker)
|
|
9965
|
+
return false;
|
|
9966
|
+
if (this.workerDockerFallbackActivated)
|
|
9967
|
+
return false;
|
|
9968
|
+
if (parseEnabledFlag(process.env.REMOTEBUDDY_DISABLE_WORKERPAL_DIRECT_FALLBACK, false)) {
|
|
9969
|
+
console.warn(`[RemoteBuddy] WorkerPal ${workerId} exited after a Docker Codex startup stall, but direct WorkerPal fallback is disabled.`);
|
|
9970
|
+
return false;
|
|
9971
|
+
}
|
|
9972
|
+
this.workerDockerFallbackActivated = true;
|
|
9973
|
+
this.spawnWorkerDocker = false;
|
|
9974
|
+
this.spawnWorkerRequireDocker = false;
|
|
9975
|
+
this.workerSpawnCooldownUntil = 0;
|
|
9976
|
+
this.workerpalsUnavailableReason = "Docker-backed WorkerPal Codex startup stalled; falling back to direct isolated-worktree WorkerPal.";
|
|
9977
|
+
console.warn(`[RemoteBuddy] WorkerPal ${workerId} exited after a Docker Codex startup stall; falling back to direct isolated-worktree WorkerPal for future spawns.`);
|
|
9978
|
+
return true;
|
|
9979
|
+
}
|
|
9959
9980
|
async spawnWorker() {
|
|
9960
9981
|
if (this.workerSpawnInFlight) {
|
|
9961
9982
|
return await this.workerSpawnInFlight;
|
|
@@ -9983,6 +10004,9 @@ Please reply with the missing details and I will enqueue a follow-up request.` :
|
|
|
9983
10004
|
this.managedWorkers.set(workerId, child);
|
|
9984
10005
|
child.exited.then((code) => {
|
|
9985
10006
|
this.managedWorkers.delete(workerId);
|
|
10007
|
+
if (this.maybeFallbackFromDockerAfterWorkerExit(workerId, code)) {
|
|
10008
|
+
this.ensureAutoscaledWorkerCapacity("docker codex startup fallback");
|
|
10009
|
+
}
|
|
9986
10010
|
console.warn(`[RemoteBuddy] WorkerPal process ${workerId} exited with code ${code}`);
|
|
9987
10011
|
});
|
|
9988
10012
|
const ready = await this.waitForOnlineWorker(this.workerStartupTimeoutMs, workerId);
|
|
@@ -249,7 +249,9 @@ export interface Job {
|
|
|
249
249
|
}
|
|
250
250
|
|
|
251
251
|
function compactDockerDiagnosticText(value: unknown, maxChars = 1000): string | null {
|
|
252
|
-
const text = String(value ?? "")
|
|
252
|
+
const text = String(value ?? "")
|
|
253
|
+
.replace(/\s+$/g, "")
|
|
254
|
+
.trim();
|
|
253
255
|
if (!text) return null;
|
|
254
256
|
return text.length <= maxChars ? text : text.slice(0, maxChars);
|
|
255
257
|
}
|
|
@@ -482,11 +484,11 @@ export class DockerExecutor {
|
|
|
482
484
|
if (
|
|
483
485
|
retryableFailure &&
|
|
484
486
|
attempt >= this.jobRetryMaxAttempts &&
|
|
485
|
-
this.
|
|
487
|
+
this.retryExhaustionCooldownMs(result) > 0
|
|
486
488
|
) {
|
|
487
489
|
return {
|
|
488
490
|
...result,
|
|
489
|
-
cooldownMs: this.
|
|
491
|
+
cooldownMs: this.retryExhaustionCooldownMs(result),
|
|
490
492
|
};
|
|
491
493
|
}
|
|
492
494
|
return result;
|
|
@@ -1279,9 +1281,8 @@ export class DockerExecutor {
|
|
|
1279
1281
|
onLog?.("stdout", note);
|
|
1280
1282
|
}
|
|
1281
1283
|
|
|
1282
|
-
const { leadMs: warningLeadMs, delayMs: warningDelayMs } =
|
|
1283
|
-
timeoutMs
|
|
1284
|
-
);
|
|
1284
|
+
const { leadMs: warningLeadMs, delayMs: warningDelayMs } =
|
|
1285
|
+
computeTimeoutWarningWindow(timeoutMs);
|
|
1285
1286
|
const warningTimer = setTimeout(() => {
|
|
1286
1287
|
const warning = `[DockerExecutor] Job nearing timeout in warm container (${Math.round(
|
|
1287
1288
|
warningLeadMs / 1000,
|
|
@@ -1424,13 +1425,13 @@ export class DockerExecutor {
|
|
|
1424
1425
|
const worktreePrefix = shellSingleQuote(`${containerWorktreePath}/`);
|
|
1425
1426
|
const command = [
|
|
1426
1427
|
"set -eu",
|
|
1427
|
-
|
|
1428
|
+
'linked=""',
|
|
1428
1429
|
"for name in node_modules; do",
|
|
1429
|
-
|
|
1430
|
+
' src="/repo/$name"',
|
|
1430
1431
|
` dest=${worktreePrefix}$name`,
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1432
|
+
' if { [ -e "$src" ] || [ -L "$src" ]; } && [ ! -e "$dest" ] && [ ! -L "$dest" ]; then',
|
|
1433
|
+
' ln -s "$src" "$dest"',
|
|
1434
|
+
' linked="$linked $name"',
|
|
1434
1435
|
" fi",
|
|
1435
1436
|
"done",
|
|
1436
1437
|
"printf '%s' \"$linked\"",
|
|
@@ -1454,9 +1455,7 @@ export class DockerExecutor {
|
|
|
1454
1455
|
.filter(Boolean);
|
|
1455
1456
|
if (linked.length === 0) return;
|
|
1456
1457
|
|
|
1457
|
-
const note = `[DockerExecutor] Linked worktree dependency artifact(s): ${linked.join(
|
|
1458
|
-
", ",
|
|
1459
|
-
)}`;
|
|
1458
|
+
const note = `[DockerExecutor] Linked worktree dependency artifact(s): ${linked.join(", ")}`;
|
|
1460
1459
|
console.log(note);
|
|
1461
1460
|
onLog?.("stdout", note);
|
|
1462
1461
|
}
|
|
@@ -1701,9 +1700,15 @@ export class DockerExecutor {
|
|
|
1701
1700
|
stdout,
|
|
1702
1701
|
stderr: details.join("\n"),
|
|
1703
1702
|
exitCode,
|
|
1704
|
-
diagnostics: dockerFallbackDiagnostics(
|
|
1705
|
-
|
|
1706
|
-
|
|
1703
|
+
diagnostics: dockerFallbackDiagnostics(
|
|
1704
|
+
summary,
|
|
1705
|
+
context,
|
|
1706
|
+
exitCode,
|
|
1707
|
+
"malformed_structured_result",
|
|
1708
|
+
{
|
|
1709
|
+
sentinelParseError,
|
|
1710
|
+
},
|
|
1711
|
+
),
|
|
1707
1712
|
};
|
|
1708
1713
|
}
|
|
1709
1714
|
|
|
@@ -1906,8 +1911,15 @@ export class DockerExecutor {
|
|
|
1906
1911
|
return this.matchesRetryablePattern(text);
|
|
1907
1912
|
}
|
|
1908
1913
|
|
|
1914
|
+
private retryExhaustionCooldownMs(result: DockerJobResult): number {
|
|
1915
|
+
const resultCooldownMs = readPositiveNumber(result.cooldownMs) ?? 0;
|
|
1916
|
+
return Math.max(this.failureCooldownMs, resultCooldownMs);
|
|
1917
|
+
}
|
|
1918
|
+
|
|
1909
1919
|
private matchesRetryablePattern(text: string): boolean {
|
|
1910
1920
|
const transientPatterns: RegExp[] = [
|
|
1921
|
+
/\bstalled before first response\b/i,
|
|
1922
|
+
/\bstartup stall\b/i,
|
|
1911
1923
|
/warm .*runtime/i,
|
|
1912
1924
|
/failed to start warm container/i,
|
|
1913
1925
|
/docker execution error/i,
|
|
@@ -688,6 +688,9 @@ function inferTerminalFailureClass(result: JobResult, changedPaths: string[]): s
|
|
|
688
688
|
if (result.ok) return "success";
|
|
689
689
|
const text = `${result.summary ?? ""}\n${result.stderr ?? ""}\n${result.stdout ?? ""}`.toLowerCase();
|
|
690
690
|
const publishableCount = publishableChangedPaths(changedPaths).length;
|
|
691
|
+
if (text.includes("stalled before first response") || text.includes("startup stall")) {
|
|
692
|
+
return "codex_startup_stall";
|
|
693
|
+
}
|
|
691
694
|
if (changedPaths.length > 0 && publishableCount === 0) return "artifact_only_no_publishable_patch";
|
|
692
695
|
if (result.exitCode === 124 || text.includes("timed out") || text.includes("timeout")) return "timeout";
|
|
693
696
|
if (text.includes("validationgate") || text.includes("validation")) return "validation";
|
|
@@ -700,6 +703,9 @@ function inferTerminalFailureClass(result: JobResult, changedPaths: string[]): s
|
|
|
700
703
|
|
|
701
704
|
function inferTerminalStage(result: JobResult, fallback: string): string {
|
|
702
705
|
const text = `${result.summary ?? ""}\n${result.stderr ?? ""}`.toLowerCase();
|
|
706
|
+
if (text.includes("stalled before first response") || text.includes("startup stall")) {
|
|
707
|
+
return "executor_startup";
|
|
708
|
+
}
|
|
703
709
|
if (text.includes("validationgate") || text.includes("validation")) return "validation";
|
|
704
710
|
if (text.includes("scopegate") || text.includes("scope")) return "scope";
|
|
705
711
|
if (text.includes("criticgate") || text.includes("critic")) return "critic";
|
|
@@ -748,7 +754,7 @@ function buildTerminalDiagnostics(args: {
|
|
|
748
754
|
terminalStage: inferTerminalStage(args.result, args.terminalStage),
|
|
749
755
|
executorBackend: args.executor,
|
|
750
756
|
summary: compactDiagnosticText(args.result.summary, 1_000),
|
|
751
|
-
watchdogFired: /watchdog|rollout coach/i.test(text),
|
|
757
|
+
watchdogFired: /watchdog|rollout coach|stalled before first response|startup stall/i.test(text),
|
|
752
758
|
timeoutMs: args.timeoutMs ?? null,
|
|
753
759
|
publishableFileCount: publishable.length,
|
|
754
760
|
artifactOnlyPathCount: artifactOnly.length,
|
|
@@ -44,6 +44,7 @@ interface JobResult {
|
|
|
44
44
|
stdout?: string;
|
|
45
45
|
stderr?: string;
|
|
46
46
|
exitCode?: number;
|
|
47
|
+
cooldownMs?: number;
|
|
47
48
|
commit?: {
|
|
48
49
|
branch: string;
|
|
49
50
|
sha: string;
|
|
@@ -115,6 +116,23 @@ echo "password=${token}"
|
|
|
115
116
|
}
|
|
116
117
|
}
|
|
117
118
|
|
|
119
|
+
export function buildJobRunnerResult(
|
|
120
|
+
result: Pick<
|
|
121
|
+
Awaited<ReturnType<typeof executeJob>>,
|
|
122
|
+
"ok" | "summary" | "stdout" | "stderr" | "exitCode" | "cooldownMs" | "diagnostics"
|
|
123
|
+
>,
|
|
124
|
+
): JobResult {
|
|
125
|
+
return {
|
|
126
|
+
ok: result.ok,
|
|
127
|
+
summary: result.summary,
|
|
128
|
+
stdout: result.stdout,
|
|
129
|
+
stderr: result.stderr,
|
|
130
|
+
exitCode: result.exitCode,
|
|
131
|
+
cooldownMs: result.cooldownMs,
|
|
132
|
+
diagnostics: result.diagnostics,
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
|
|
118
136
|
// ─── Main ───────────────────────────────────────────────────────────────────
|
|
119
137
|
|
|
120
138
|
async function main(): Promise<void> {
|
|
@@ -127,8 +145,7 @@ async function main(): Promise<void> {
|
|
|
127
145
|
process.exit(1);
|
|
128
146
|
}
|
|
129
147
|
|
|
130
|
-
const base64Spec =
|
|
131
|
-
rawSpecArg === "--spec-stdin" ? (await Bun.stdin.text()).trim() : rawSpecArg;
|
|
148
|
+
const base64Spec = rawSpecArg === "--spec-stdin" ? (await Bun.stdin.text()).trim() : rawSpecArg;
|
|
132
149
|
if (!base64Spec) {
|
|
133
150
|
// eslint-disable-next-line no-console
|
|
134
151
|
console.error("Job spec was empty");
|
|
@@ -179,14 +196,7 @@ async function main(): Promise<void> {
|
|
|
179
196
|
CONFIG,
|
|
180
197
|
);
|
|
181
198
|
// Build result object
|
|
182
|
-
const jobResult
|
|
183
|
-
ok: result.ok,
|
|
184
|
-
summary: result.summary,
|
|
185
|
-
stdout: result.stdout,
|
|
186
|
-
stderr: result.stderr,
|
|
187
|
-
exitCode: result.exitCode,
|
|
188
|
-
diagnostics: result.diagnostics,
|
|
189
|
-
};
|
|
199
|
+
const jobResult = buildJobRunnerResult(result);
|
|
190
200
|
// Create commit for file-modifying jobs
|
|
191
201
|
if (result.ok && shouldCommit(spec.kind, CONFIG)) {
|
|
192
202
|
log("stdout", `[JobRunner] Job modified files, creating commit...`);
|
|
@@ -224,7 +234,8 @@ async function main(): Promise<void> {
|
|
|
224
234
|
if (commitResult.publishBlocked) {
|
|
225
235
|
jobResult.publishBlocked = commitResult.publishBlocked;
|
|
226
236
|
}
|
|
227
|
-
jobResult.exitCode =
|
|
237
|
+
jobResult.exitCode =
|
|
238
|
+
jobResult.exitCode && jobResult.exitCode !== 0 ? jobResult.exitCode : 1;
|
|
228
239
|
log(
|
|
229
240
|
"stderr",
|
|
230
241
|
commitResult.publishBlocked
|
|
@@ -246,8 +257,10 @@ async function main(): Promise<void> {
|
|
|
246
257
|
}
|
|
247
258
|
}
|
|
248
259
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
260
|
+
if (import.meta.main) {
|
|
261
|
+
main().catch((err) => {
|
|
262
|
+
// eslint-disable-next-line no-console
|
|
263
|
+
console.error(`[JobRunner] Fatal error: ${err}`);
|
|
264
|
+
process.exit(1);
|
|
265
|
+
});
|
|
266
|
+
}
|
|
@@ -67,6 +67,7 @@ type WorkerJobResult = JobResult & {
|
|
|
67
67
|
|
|
68
68
|
const DEFAULT_LLM_MODEL = "local-model";
|
|
69
69
|
const CODEX_UNAVAILABLE_WORKER_EXIT_CODE = 86;
|
|
70
|
+
const CODEX_STARTUP_STALL_WORKER_EXIT_CODE = 87;
|
|
70
71
|
const CODEX_UNAVAILABLE_DOCKER_SHUTDOWN_GRACE_MS = 5_000;
|
|
71
72
|
const CODEX_UNAVAILABLE_WORKER_FORCE_EXIT_MS = 4_000;
|
|
72
73
|
const DEFAULT_JOB_PROGRESS_LOG_EVERY_MS = 60_000;
|
|
@@ -391,7 +392,9 @@ function inferWorkerJobPhaseFromLogLine(line: string): WorkerJobPhase | null {
|
|
|
391
392
|
) {
|
|
392
393
|
return "full validation";
|
|
393
394
|
}
|
|
394
|
-
if (
|
|
395
|
+
if (
|
|
396
|
+
/creating commit|Publish blocked|publish-blocked|completion ref|enqueueCompletion/i.test(text)
|
|
397
|
+
) {
|
|
395
398
|
return "publishing";
|
|
396
399
|
}
|
|
397
400
|
if (
|
|
@@ -447,11 +450,20 @@ function mergeWorkerDiagnostics(
|
|
|
447
450
|
};
|
|
448
451
|
}
|
|
449
452
|
|
|
450
|
-
function
|
|
453
|
+
function isCodexStartupStallResult(result: JobResult): boolean {
|
|
454
|
+
const text =
|
|
455
|
+
`${result.summary ?? ""}\n${result.stderr ?? ""}\n${result.stdout ?? ""}`.toLowerCase();
|
|
456
|
+
return /stalled before first response|startup stall/.test(text);
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
export function inferWorkerTerminalFailureClass(result: JobResult): string {
|
|
451
460
|
if (result.ok) return "success";
|
|
452
|
-
const text =
|
|
453
|
-
|
|
454
|
-
if (
|
|
461
|
+
const text =
|
|
462
|
+
`${result.summary ?? ""}\n${result.stderr ?? ""}\n${result.stdout ?? ""}`.toLowerCase();
|
|
463
|
+
if (isCodexStartupStallResult(result)) return "codex_startup_stall";
|
|
464
|
+
if (/timed out|timeout|signal 15|terminated|exit 143|exit 137/.test(text)) return "timeout";
|
|
465
|
+
if (/no publishable|non-publishable|node_modules/.test(text))
|
|
466
|
+
return "artifact_only_no_publishable_patch";
|
|
455
467
|
if (/validationgate|validation/.test(text)) return "validation";
|
|
456
468
|
if (/scopegate|scope/.test(text)) return "scope";
|
|
457
469
|
if (/criticgate|critic/.test(text)) return "critic";
|
|
@@ -497,11 +509,12 @@ export function shouldRecycleWorkerForHeartbeatDegradation(options: {
|
|
|
497
509
|
return options.transportStale;
|
|
498
510
|
}
|
|
499
511
|
|
|
500
|
-
function shouldRecycleWorkerForCodexUnavailableFailure(
|
|
512
|
+
export function shouldRecycleWorkerForCodexUnavailableFailure(
|
|
501
513
|
summary: string,
|
|
502
514
|
stderr?: string | null,
|
|
503
515
|
): boolean {
|
|
504
516
|
const text = `${summary}\n${stderr ?? ""}`.toLowerCase();
|
|
517
|
+
if (/stalled before first response|startup stall/.test(text)) return true;
|
|
505
518
|
return [
|
|
506
519
|
"openai_codex cli is not installed",
|
|
507
520
|
"openai_codex chatgpt auth is not ready",
|
|
@@ -512,6 +525,12 @@ function shouldRecycleWorkerForCodexUnavailableFailure(
|
|
|
512
525
|
].some((needle) => text.includes(needle));
|
|
513
526
|
}
|
|
514
527
|
|
|
528
|
+
export function workerRecycleExitCodeForResult(result: WorkerJobResult): number {
|
|
529
|
+
return isCodexStartupStallResult(result)
|
|
530
|
+
? CODEX_STARTUP_STALL_WORKER_EXIT_CODE
|
|
531
|
+
: CODEX_UNAVAILABLE_WORKER_EXIT_CODE;
|
|
532
|
+
}
|
|
533
|
+
|
|
515
534
|
async function shutdownDockerExecutorBeforeCodexRecycle(
|
|
516
535
|
dockerExecutor: DockerExecutor | null,
|
|
517
536
|
): Promise<void> {
|
|
@@ -754,14 +773,9 @@ async function createIsolatedWorktree(
|
|
|
754
773
|
.toLowerCase()
|
|
755
774
|
.replace(/[^a-z0-9]+/g, "")
|
|
756
775
|
.slice(0, 8);
|
|
757
|
-
const nonce = `${Date.now().toString(36).slice(-6)}-${Math.random()
|
|
758
|
-
.toString(36)
|
|
759
|
-
.slice(2, 6)}`;
|
|
776
|
+
const nonce = `${Date.now().toString(36).slice(-6)}-${Math.random().toString(36).slice(2, 6)}`;
|
|
760
777
|
|
|
761
|
-
const worktreePath = resolve(
|
|
762
|
-
worktreeRoot,
|
|
763
|
-
`job-${safeJobId || "host"}-${nonce}`,
|
|
764
|
-
);
|
|
778
|
+
const worktreePath = resolve(worktreeRoot, `job-${safeJobId || "host"}-${nonce}`);
|
|
765
779
|
|
|
766
780
|
const addResult = await git(repo, ["worktree", "add", "--detach", worktreePath, baseRef]);
|
|
767
781
|
if (!addResult.ok) {
|
|
@@ -1752,6 +1766,7 @@ async function workerLoop(
|
|
|
1752
1766
|
const jobAttempt =
|
|
1753
1767
|
Number.isFinite(jobAttemptRaw) && jobAttemptRaw > 0 ? Math.floor(jobAttemptRaw) : 1;
|
|
1754
1768
|
const llm = workerLlmConfig(CONFIG);
|
|
1769
|
+
const terminalFailureClass = inferWorkerTerminalFailureClass(result);
|
|
1755
1770
|
result = {
|
|
1756
1771
|
...result,
|
|
1757
1772
|
diagnostics: mergeWorkerDiagnostics(result.diagnostics, {
|
|
@@ -1781,12 +1796,15 @@ async function workerLoop(
|
|
|
1781
1796
|
result.ok ? "completed" : result.publishBlocked ? "publish_blocked" : "failed",
|
|
1782
1797
|
),
|
|
1783
1798
|
terminal: {
|
|
1784
|
-
failureClass:
|
|
1785
|
-
terminalStage:
|
|
1799
|
+
failureClass: terminalFailureClass,
|
|
1800
|
+
terminalStage:
|
|
1801
|
+
terminalFailureClass === "codex_startup_stall"
|
|
1802
|
+
? "executor_startup"
|
|
1803
|
+
: (currentJobPhase ?? (result.ok ? "completed" : "worker")),
|
|
1786
1804
|
executorBackend: resolveExecutor(CONFIG),
|
|
1787
1805
|
summary: result.summary,
|
|
1788
1806
|
watchdogFired:
|
|
1789
|
-
/watchdog|rollout coach|timed out|timeout|signal 15|terminated|exit 143|exit 137/i.test(
|
|
1807
|
+
/watchdog|rollout coach|stalled before first response|startup stall|timed out|timeout|signal 15|terminated|exit 143|exit 137/i.test(
|
|
1790
1808
|
`${result.summary}\n${result.stderr ?? ""}\n${result.stdout ?? ""}`,
|
|
1791
1809
|
),
|
|
1792
1810
|
metadata: {
|
|
@@ -1961,12 +1979,15 @@ async function workerLoop(
|
|
|
1961
1979
|
clearInterval(busyHeartbeat);
|
|
1962
1980
|
if (jobProgressTimer) clearInterval(jobProgressTimer);
|
|
1963
1981
|
if (recycleWorkerAfterJob) {
|
|
1982
|
+
const recycleExitCode = result
|
|
1983
|
+
? workerRecycleExitCodeForResult(result)
|
|
1984
|
+
: CODEX_UNAVAILABLE_WORKER_EXIT_CODE;
|
|
1964
1985
|
runtimeState.shutdownRequested = true;
|
|
1965
1986
|
const forceExitTimer = setTimeout(() => {
|
|
1966
1987
|
console.warn(
|
|
1967
1988
|
`[WorkerPals] Forcing worker recycle ${CODEX_UNAVAILABLE_WORKER_FORCE_EXIT_MS}ms after Codex backend failure.`,
|
|
1968
1989
|
);
|
|
1969
|
-
process.exit(
|
|
1990
|
+
process.exit(recycleExitCode);
|
|
1970
1991
|
}, CODEX_UNAVAILABLE_WORKER_FORCE_EXIT_MS);
|
|
1971
1992
|
try {
|
|
1972
1993
|
await maybeHeartbeat("offline", null, true);
|
|
@@ -1983,7 +2004,7 @@ async function workerLoop(
|
|
|
1983
2004
|
await shutdownDockerExecutorBeforeCodexRecycle(dockerExecutor);
|
|
1984
2005
|
} finally {
|
|
1985
2006
|
clearTimeout(forceExitTimer);
|
|
1986
|
-
process.exit(
|
|
2007
|
+
process.exit(recycleExitCode);
|
|
1987
2008
|
}
|
|
1988
2009
|
}
|
|
1989
2010
|
if (job.sessionId && result?.cooldownMs && result.cooldownMs > 0) {
|