@pushpalsdev/cli 1.1.27 → 1.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pushpalsdev/cli",
3
- "version": "1.1.27",
3
+ "version": "1.1.28",
4
4
  "description": "PushPals terminal CLI for LocalBuddy -> RemoteBuddy orchestration",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -8408,6 +8408,7 @@ ${detail}`.toLowerCase();
8408
8408
  "codex cli is mandatory in this backend"
8409
8409
  ].some((needle) => text.includes(needle));
8410
8410
  }
8411
+ var CODEX_STARTUP_STALL_WORKER_EXIT_CODE = 87;
8411
8412
  function asAutonomyComponentArea2(value) {
8412
8413
  return normalizeAutonomyComponentArea(value) ?? undefined;
8413
8414
  }
@@ -8950,6 +8951,7 @@ class RemoteBuddyOrchestrator {
8950
8951
  workerpalsEnvFile;
8951
8952
  workerpalsEntrypoint;
8952
8953
  workerpalsUnavailableReason;
8954
+ workerDockerFallbackActivated = false;
8953
8955
  statusHeartbeatMs;
8954
8956
  fetchFailureLogsOnJobFailure;
8955
8957
  executionBudgetInteractiveMs;
@@ -9956,6 +9958,25 @@ Please reply with the missing details and I will enqueue a follow-up request.` :
9956
9958
  entrypoint: this.workerpalsEntrypoint
9957
9959
  });
9958
9960
  }
9961
+ maybeFallbackFromDockerAfterWorkerExit(workerId, code) {
9962
+ if (code !== CODEX_STARTUP_STALL_WORKER_EXIT_CODE)
9963
+ return false;
9964
+ if (!this.spawnWorkerDocker)
9965
+ return false;
9966
+ if (this.workerDockerFallbackActivated)
9967
+ return false;
9968
+ if (parseEnabledFlag(process.env.REMOTEBUDDY_DISABLE_WORKERPAL_DIRECT_FALLBACK, false)) {
9969
+ console.warn(`[RemoteBuddy] WorkerPal ${workerId} exited after a Docker Codex startup stall, but direct WorkerPal fallback is disabled.`);
9970
+ return false;
9971
+ }
9972
+ this.workerDockerFallbackActivated = true;
9973
+ this.spawnWorkerDocker = false;
9974
+ this.spawnWorkerRequireDocker = false;
9975
+ this.workerSpawnCooldownUntil = 0;
9976
+ this.workerpalsUnavailableReason = "Docker-backed WorkerPal Codex startup stalled; falling back to direct isolated-worktree WorkerPal.";
9977
+ console.warn(`[RemoteBuddy] WorkerPal ${workerId} exited after a Docker Codex startup stall; falling back to direct isolated-worktree WorkerPal for future spawns.`);
9978
+ return true;
9979
+ }
9959
9980
  async spawnWorker() {
9960
9981
  if (this.workerSpawnInFlight) {
9961
9982
  return await this.workerSpawnInFlight;
@@ -9983,6 +10004,9 @@ Please reply with the missing details and I will enqueue a follow-up request.` :
9983
10004
  this.managedWorkers.set(workerId, child);
9984
10005
  child.exited.then((code) => {
9985
10006
  this.managedWorkers.delete(workerId);
10007
+ if (this.maybeFallbackFromDockerAfterWorkerExit(workerId, code)) {
10008
+ this.ensureAutoscaledWorkerCapacity("docker codex startup fallback");
10009
+ }
9986
10010
  console.warn(`[RemoteBuddy] WorkerPal process ${workerId} exited with code ${code}`);
9987
10011
  });
9988
10012
  const ready = await this.waitForOnlineWorker(this.workerStartupTimeoutMs, workerId);
@@ -92,6 +92,7 @@ export interface JobResult {
92
92
  stdout?: string;
93
93
  stderr?: string;
94
94
  exitCode?: number;
95
+ cooldownMs?: number;
95
96
  usage?: JobTokenUsage;
96
97
  publishBlocked?: JobPublishBlockedInfo;
97
98
  diagnostics?: JobDiagnostics;
@@ -249,7 +249,9 @@ export interface Job {
249
249
  }
250
250
 
251
251
  function compactDockerDiagnosticText(value: unknown, maxChars = 1000): string | null {
252
- const text = String(value ?? "").replace(/\s+$/g, "").trim();
252
+ const text = String(value ?? "")
253
+ .replace(/\s+$/g, "")
254
+ .trim();
253
255
  if (!text) return null;
254
256
  return text.length <= maxChars ? text : text.slice(0, maxChars);
255
257
  }
@@ -482,11 +484,11 @@ export class DockerExecutor {
482
484
  if (
483
485
  retryableFailure &&
484
486
  attempt >= this.jobRetryMaxAttempts &&
485
- this.failureCooldownMs > 0
487
+ this.retryExhaustionCooldownMs(result) > 0
486
488
  ) {
487
489
  return {
488
490
  ...result,
489
- cooldownMs: this.failureCooldownMs,
491
+ cooldownMs: this.retryExhaustionCooldownMs(result),
490
492
  };
491
493
  }
492
494
  return result;
@@ -1279,9 +1281,8 @@ export class DockerExecutor {
1279
1281
  onLog?.("stdout", note);
1280
1282
  }
1281
1283
 
1282
- const { leadMs: warningLeadMs, delayMs: warningDelayMs } = computeTimeoutWarningWindow(
1283
- timeoutMs,
1284
- );
1284
+ const { leadMs: warningLeadMs, delayMs: warningDelayMs } =
1285
+ computeTimeoutWarningWindow(timeoutMs);
1285
1286
  const warningTimer = setTimeout(() => {
1286
1287
  const warning = `[DockerExecutor] Job nearing timeout in warm container (${Math.round(
1287
1288
  warningLeadMs / 1000,
@@ -1424,13 +1425,13 @@ export class DockerExecutor {
1424
1425
  const worktreePrefix = shellSingleQuote(`${containerWorktreePath}/`);
1425
1426
  const command = [
1426
1427
  "set -eu",
1427
- "linked=\"\"",
1428
+ 'linked=""',
1428
1429
  "for name in node_modules; do",
1429
- " src=\"/repo/$name\"",
1430
+ ' src="/repo/$name"',
1430
1431
  ` dest=${worktreePrefix}$name`,
1431
- " if { [ -e \"$src\" ] || [ -L \"$src\" ]; } && [ ! -e \"$dest\" ] && [ ! -L \"$dest\" ]; then",
1432
- " ln -s \"$src\" \"$dest\"",
1433
- " linked=\"$linked $name\"",
1432
+ ' if { [ -e "$src" ] || [ -L "$src" ]; } && [ ! -e "$dest" ] && [ ! -L "$dest" ]; then',
1433
+ ' ln -s "$src" "$dest"',
1434
+ ' linked="$linked $name"',
1434
1435
  " fi",
1435
1436
  "done",
1436
1437
  "printf '%s' \"$linked\"",
@@ -1454,9 +1455,7 @@ export class DockerExecutor {
1454
1455
  .filter(Boolean);
1455
1456
  if (linked.length === 0) return;
1456
1457
 
1457
- const note = `[DockerExecutor] Linked worktree dependency artifact(s): ${linked.join(
1458
- ", ",
1459
- )}`;
1458
+ const note = `[DockerExecutor] Linked worktree dependency artifact(s): ${linked.join(", ")}`;
1460
1459
  console.log(note);
1461
1460
  onLog?.("stdout", note);
1462
1461
  }
@@ -1701,9 +1700,15 @@ export class DockerExecutor {
1701
1700
  stdout,
1702
1701
  stderr: details.join("\n"),
1703
1702
  exitCode,
1704
- diagnostics: dockerFallbackDiagnostics(summary, context, exitCode, "malformed_structured_result", {
1705
- sentinelParseError,
1706
- }),
1703
+ diagnostics: dockerFallbackDiagnostics(
1704
+ summary,
1705
+ context,
1706
+ exitCode,
1707
+ "malformed_structured_result",
1708
+ {
1709
+ sentinelParseError,
1710
+ },
1711
+ ),
1707
1712
  };
1708
1713
  }
1709
1714
 
@@ -1906,8 +1911,15 @@ export class DockerExecutor {
1906
1911
  return this.matchesRetryablePattern(text);
1907
1912
  }
1908
1913
 
1914
+ private retryExhaustionCooldownMs(result: DockerJobResult): number {
1915
+ const resultCooldownMs = readPositiveNumber(result.cooldownMs) ?? 0;
1916
+ return Math.max(this.failureCooldownMs, resultCooldownMs);
1917
+ }
1918
+
1909
1919
  private matchesRetryablePattern(text: string): boolean {
1910
1920
  const transientPatterns: RegExp[] = [
1921
+ /\bstalled before first response\b/i,
1922
+ /\bstartup stall\b/i,
1911
1923
  /warm .*runtime/i,
1912
1924
  /failed to start warm container/i,
1913
1925
  /docker execution error/i,
@@ -688,6 +688,9 @@ function inferTerminalFailureClass(result: JobResult, changedPaths: string[]): s
688
688
  if (result.ok) return "success";
689
689
  const text = `${result.summary ?? ""}\n${result.stderr ?? ""}\n${result.stdout ?? ""}`.toLowerCase();
690
690
  const publishableCount = publishableChangedPaths(changedPaths).length;
691
+ if (text.includes("stalled before first response") || text.includes("startup stall")) {
692
+ return "codex_startup_stall";
693
+ }
691
694
  if (changedPaths.length > 0 && publishableCount === 0) return "artifact_only_no_publishable_patch";
692
695
  if (result.exitCode === 124 || text.includes("timed out") || text.includes("timeout")) return "timeout";
693
696
  if (text.includes("validationgate") || text.includes("validation")) return "validation";
@@ -700,6 +703,9 @@ function inferTerminalFailureClass(result: JobResult, changedPaths: string[]): s
700
703
 
701
704
  function inferTerminalStage(result: JobResult, fallback: string): string {
702
705
  const text = `${result.summary ?? ""}\n${result.stderr ?? ""}`.toLowerCase();
706
+ if (text.includes("stalled before first response") || text.includes("startup stall")) {
707
+ return "executor_startup";
708
+ }
703
709
  if (text.includes("validationgate") || text.includes("validation")) return "validation";
704
710
  if (text.includes("scopegate") || text.includes("scope")) return "scope";
705
711
  if (text.includes("criticgate") || text.includes("critic")) return "critic";
@@ -748,7 +754,7 @@ function buildTerminalDiagnostics(args: {
748
754
  terminalStage: inferTerminalStage(args.result, args.terminalStage),
749
755
  executorBackend: args.executor,
750
756
  summary: compactDiagnosticText(args.result.summary, 1_000),
751
- watchdogFired: /watchdog|rollout coach/i.test(text),
757
+ watchdogFired: /watchdog|rollout coach|stalled before first response|startup stall/i.test(text),
752
758
  timeoutMs: args.timeoutMs ?? null,
753
759
  publishableFileCount: publishable.length,
754
760
  artifactOnlyPathCount: artifactOnly.length,
@@ -44,6 +44,7 @@ interface JobResult {
44
44
  stdout?: string;
45
45
  stderr?: string;
46
46
  exitCode?: number;
47
+ cooldownMs?: number;
47
48
  commit?: {
48
49
  branch: string;
49
50
  sha: string;
@@ -115,6 +116,23 @@ echo "password=${token}"
115
116
  }
116
117
  }
117
118
 
119
+ export function buildJobRunnerResult(
120
+ result: Pick<
121
+ Awaited<ReturnType<typeof executeJob>>,
122
+ "ok" | "summary" | "stdout" | "stderr" | "exitCode" | "cooldownMs" | "diagnostics"
123
+ >,
124
+ ): JobResult {
125
+ return {
126
+ ok: result.ok,
127
+ summary: result.summary,
128
+ stdout: result.stdout,
129
+ stderr: result.stderr,
130
+ exitCode: result.exitCode,
131
+ cooldownMs: result.cooldownMs,
132
+ diagnostics: result.diagnostics,
133
+ };
134
+ }
135
+
118
136
  // ─── Main ───────────────────────────────────────────────────────────────────
119
137
 
120
138
  async function main(): Promise<void> {
@@ -127,8 +145,7 @@ async function main(): Promise<void> {
127
145
  process.exit(1);
128
146
  }
129
147
 
130
- const base64Spec =
131
- rawSpecArg === "--spec-stdin" ? (await Bun.stdin.text()).trim() : rawSpecArg;
148
+ const base64Spec = rawSpecArg === "--spec-stdin" ? (await Bun.stdin.text()).trim() : rawSpecArg;
132
149
  if (!base64Spec) {
133
150
  // eslint-disable-next-line no-console
134
151
  console.error("Job spec was empty");
@@ -179,14 +196,7 @@ async function main(): Promise<void> {
179
196
  CONFIG,
180
197
  );
181
198
  // Build result object
182
- const jobResult: JobResult = {
183
- ok: result.ok,
184
- summary: result.summary,
185
- stdout: result.stdout,
186
- stderr: result.stderr,
187
- exitCode: result.exitCode,
188
- diagnostics: result.diagnostics,
189
- };
199
+ const jobResult = buildJobRunnerResult(result);
190
200
  // Create commit for file-modifying jobs
191
201
  if (result.ok && shouldCommit(spec.kind, CONFIG)) {
192
202
  log("stdout", `[JobRunner] Job modified files, creating commit...`);
@@ -224,7 +234,8 @@ async function main(): Promise<void> {
224
234
  if (commitResult.publishBlocked) {
225
235
  jobResult.publishBlocked = commitResult.publishBlocked;
226
236
  }
227
- jobResult.exitCode = jobResult.exitCode && jobResult.exitCode !== 0 ? jobResult.exitCode : 1;
237
+ jobResult.exitCode =
238
+ jobResult.exitCode && jobResult.exitCode !== 0 ? jobResult.exitCode : 1;
228
239
  log(
229
240
  "stderr",
230
241
  commitResult.publishBlocked
@@ -246,8 +257,10 @@ async function main(): Promise<void> {
246
257
  }
247
258
  }
248
259
 
249
- main().catch((err) => {
250
- // eslint-disable-next-line no-console
251
- console.error(`[JobRunner] Fatal error: ${err}`);
252
- process.exit(1);
253
- });
260
+ if (import.meta.main) {
261
+ main().catch((err) => {
262
+ // eslint-disable-next-line no-console
263
+ console.error(`[JobRunner] Fatal error: ${err}`);
264
+ process.exit(1);
265
+ });
266
+ }
@@ -67,6 +67,7 @@ type WorkerJobResult = JobResult & {
67
67
 
68
68
  const DEFAULT_LLM_MODEL = "local-model";
69
69
  const CODEX_UNAVAILABLE_WORKER_EXIT_CODE = 86;
70
+ const CODEX_STARTUP_STALL_WORKER_EXIT_CODE = 87;
70
71
  const CODEX_UNAVAILABLE_DOCKER_SHUTDOWN_GRACE_MS = 5_000;
71
72
  const CODEX_UNAVAILABLE_WORKER_FORCE_EXIT_MS = 4_000;
72
73
  const DEFAULT_JOB_PROGRESS_LOG_EVERY_MS = 60_000;
@@ -391,7 +392,9 @@ function inferWorkerJobPhaseFromLogLine(line: string): WorkerJobPhase | null {
391
392
  ) {
392
393
  return "full validation";
393
394
  }
394
- if (/creating commit|Publish blocked|publish-blocked|completion ref|enqueueCompletion/i.test(text)) {
395
+ if (
396
+ /creating commit|Publish blocked|publish-blocked|completion ref|enqueueCompletion/i.test(text)
397
+ ) {
395
398
  return "publishing";
396
399
  }
397
400
  if (
@@ -447,11 +450,20 @@ function mergeWorkerDiagnostics(
447
450
  };
448
451
  }
449
452
 
450
- function inferWorkerTerminalFailureClass(result: JobResult): string {
453
+ function isCodexStartupStallResult(result: JobResult): boolean {
454
+ const text =
455
+ `${result.summary ?? ""}\n${result.stderr ?? ""}\n${result.stdout ?? ""}`.toLowerCase();
456
+ return /stalled before first response|startup stall/.test(text);
457
+ }
458
+
459
+ export function inferWorkerTerminalFailureClass(result: JobResult): string {
451
460
  if (result.ok) return "success";
452
- const text = `${result.summary ?? ""}\n${result.stderr ?? ""}\n${result.stdout ?? ""}`.toLowerCase();
453
- if (/timed out|timeout|signal 15|terminated|exit 143|exit 137|stalled before first response|startup stall/.test(text)) return "timeout";
454
- if (/no publishable|non-publishable|node_modules/.test(text)) return "artifact_only_no_publishable_patch";
461
+ const text =
462
+ `${result.summary ?? ""}\n${result.stderr ?? ""}\n${result.stdout ?? ""}`.toLowerCase();
463
+ if (isCodexStartupStallResult(result)) return "codex_startup_stall";
464
+ if (/timed out|timeout|signal 15|terminated|exit 143|exit 137/.test(text)) return "timeout";
465
+ if (/no publishable|non-publishable|node_modules/.test(text))
466
+ return "artifact_only_no_publishable_patch";
455
467
  if (/validationgate|validation/.test(text)) return "validation";
456
468
  if (/scopegate|scope/.test(text)) return "scope";
457
469
  if (/criticgate|critic/.test(text)) return "critic";
@@ -497,11 +509,12 @@ export function shouldRecycleWorkerForHeartbeatDegradation(options: {
497
509
  return options.transportStale;
498
510
  }
499
511
 
500
- function shouldRecycleWorkerForCodexUnavailableFailure(
512
+ export function shouldRecycleWorkerForCodexUnavailableFailure(
501
513
  summary: string,
502
514
  stderr?: string | null,
503
515
  ): boolean {
504
516
  const text = `${summary}\n${stderr ?? ""}`.toLowerCase();
517
+ if (/stalled before first response|startup stall/.test(text)) return true;
505
518
  return [
506
519
  "openai_codex cli is not installed",
507
520
  "openai_codex chatgpt auth is not ready",
@@ -512,6 +525,12 @@ function shouldRecycleWorkerForCodexUnavailableFailure(
512
525
  ].some((needle) => text.includes(needle));
513
526
  }
514
527
 
528
+ export function workerRecycleExitCodeForResult(result: WorkerJobResult): number {
529
+ return isCodexStartupStallResult(result)
530
+ ? CODEX_STARTUP_STALL_WORKER_EXIT_CODE
531
+ : CODEX_UNAVAILABLE_WORKER_EXIT_CODE;
532
+ }
533
+
515
534
  async function shutdownDockerExecutorBeforeCodexRecycle(
516
535
  dockerExecutor: DockerExecutor | null,
517
536
  ): Promise<void> {
@@ -754,14 +773,9 @@ async function createIsolatedWorktree(
754
773
  .toLowerCase()
755
774
  .replace(/[^a-z0-9]+/g, "")
756
775
  .slice(0, 8);
757
- const nonce = `${Date.now().toString(36).slice(-6)}-${Math.random()
758
- .toString(36)
759
- .slice(2, 6)}`;
776
+ const nonce = `${Date.now().toString(36).slice(-6)}-${Math.random().toString(36).slice(2, 6)}`;
760
777
 
761
- const worktreePath = resolve(
762
- worktreeRoot,
763
- `job-${safeJobId || "host"}-${nonce}`,
764
- );
778
+ const worktreePath = resolve(worktreeRoot, `job-${safeJobId || "host"}-${nonce}`);
765
779
 
766
780
  const addResult = await git(repo, ["worktree", "add", "--detach", worktreePath, baseRef]);
767
781
  if (!addResult.ok) {
@@ -1752,6 +1766,7 @@ async function workerLoop(
1752
1766
  const jobAttempt =
1753
1767
  Number.isFinite(jobAttemptRaw) && jobAttemptRaw > 0 ? Math.floor(jobAttemptRaw) : 1;
1754
1768
  const llm = workerLlmConfig(CONFIG);
1769
+ const terminalFailureClass = inferWorkerTerminalFailureClass(result);
1755
1770
  result = {
1756
1771
  ...result,
1757
1772
  diagnostics: mergeWorkerDiagnostics(result.diagnostics, {
@@ -1781,12 +1796,15 @@ async function workerLoop(
1781
1796
  result.ok ? "completed" : result.publishBlocked ? "publish_blocked" : "failed",
1782
1797
  ),
1783
1798
  terminal: {
1784
- failureClass: inferWorkerTerminalFailureClass(result),
1785
- terminalStage: currentJobPhase ?? (result.ok ? "completed" : "worker"),
1799
+ failureClass: terminalFailureClass,
1800
+ terminalStage:
1801
+ terminalFailureClass === "codex_startup_stall"
1802
+ ? "executor_startup"
1803
+ : (currentJobPhase ?? (result.ok ? "completed" : "worker")),
1786
1804
  executorBackend: resolveExecutor(CONFIG),
1787
1805
  summary: result.summary,
1788
1806
  watchdogFired:
1789
- /watchdog|rollout coach|timed out|timeout|signal 15|terminated|exit 143|exit 137/i.test(
1807
+ /watchdog|rollout coach|stalled before first response|startup stall|timed out|timeout|signal 15|terminated|exit 143|exit 137/i.test(
1790
1808
  `${result.summary}\n${result.stderr ?? ""}\n${result.stdout ?? ""}`,
1791
1809
  ),
1792
1810
  metadata: {
@@ -1961,12 +1979,15 @@ async function workerLoop(
1961
1979
  clearInterval(busyHeartbeat);
1962
1980
  if (jobProgressTimer) clearInterval(jobProgressTimer);
1963
1981
  if (recycleWorkerAfterJob) {
1982
+ const recycleExitCode = result
1983
+ ? workerRecycleExitCodeForResult(result)
1984
+ : CODEX_UNAVAILABLE_WORKER_EXIT_CODE;
1964
1985
  runtimeState.shutdownRequested = true;
1965
1986
  const forceExitTimer = setTimeout(() => {
1966
1987
  console.warn(
1967
1988
  `[WorkerPals] Forcing worker recycle ${CODEX_UNAVAILABLE_WORKER_FORCE_EXIT_MS}ms after Codex backend failure.`,
1968
1989
  );
1969
- process.exit(CODEX_UNAVAILABLE_WORKER_EXIT_CODE);
1990
+ process.exit(recycleExitCode);
1970
1991
  }, CODEX_UNAVAILABLE_WORKER_FORCE_EXIT_MS);
1971
1992
  try {
1972
1993
  await maybeHeartbeat("offline", null, true);
@@ -1983,7 +2004,7 @@ async function workerLoop(
1983
2004
  await shutdownDockerExecutorBeforeCodexRecycle(dockerExecutor);
1984
2005
  } finally {
1985
2006
  clearTimeout(forceExitTimer);
1986
- process.exit(CODEX_UNAVAILABLE_WORKER_EXIT_CODE);
2007
+ process.exit(recycleExitCode);
1987
2008
  }
1988
2009
  }
1989
2010
  if (job.sessionId && result?.cooldownMs && result.cooldownMs > 0) {