@pushpalsdev/cli 1.1.45 → 1.1.46
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -70,6 +70,7 @@ const CODEX_UNAVAILABLE_WORKER_EXIT_CODE = 86;
|
|
|
70
70
|
const CODEX_STARTUP_STALL_WORKER_EXIT_CODE = 87;
|
|
71
71
|
const CODEX_UNAVAILABLE_DOCKER_SHUTDOWN_GRACE_MS = 5_000;
|
|
72
72
|
const CODEX_UNAVAILABLE_WORKER_FORCE_EXIT_MS = 4_000;
|
|
73
|
+
const CODEX_STARTUP_STALL_DIRECT_RETRY_DEFER_MS = 5_000;
|
|
73
74
|
const DEFAULT_JOB_PROGRESS_LOG_EVERY_MS = 60_000;
|
|
74
75
|
const CONFIG = loadPushPalsConfig();
|
|
75
76
|
const LOG = new Logger("WorkerPals");
|
|
@@ -525,6 +526,14 @@ export function shouldRecycleWorkerForCodexUnavailableFailure(
|
|
|
525
526
|
].some((needle) => text.includes(needle));
|
|
526
527
|
}
|
|
527
528
|
|
|
529
|
+
export function shouldDeferDockerCodexStartupStallForDirectRetry(options: {
|
|
530
|
+
dockerEnabled: boolean;
|
|
531
|
+
result: JobResult;
|
|
532
|
+
}): boolean {
|
|
533
|
+
if (!options.dockerEnabled) return false;
|
|
534
|
+
return isCodexStartupStallResult(options.result);
|
|
535
|
+
}
|
|
536
|
+
|
|
528
537
|
export function workerRecycleExitCodeForResult(result: WorkerJobResult): number {
|
|
529
538
|
return isCodexStartupStallResult(result)
|
|
530
539
|
? CODEX_STARTUP_STALL_WORKER_EXIT_CODE
|
|
@@ -1282,12 +1291,20 @@ async function deferClaimedJobForMaintenance(
|
|
|
1282
1291
|
headers: Record<string, string>,
|
|
1283
1292
|
jobId: string,
|
|
1284
1293
|
deferMs: number,
|
|
1294
|
+
options: { targetWorkerId?: string | null; reason?: string } = {},
|
|
1285
1295
|
): Promise<{ ok: boolean; availableAt?: string; message?: string }> {
|
|
1286
1296
|
try {
|
|
1287
|
-
const
|
|
1297
|
+
const body: Record<string, unknown> = {
|
|
1288
1298
|
workerId: opts.workerId,
|
|
1289
1299
|
deferMs,
|
|
1290
|
-
}
|
|
1300
|
+
};
|
|
1301
|
+
if (Object.prototype.hasOwnProperty.call(options, "targetWorkerId")) {
|
|
1302
|
+
body.targetWorkerId = options.targetWorkerId;
|
|
1303
|
+
}
|
|
1304
|
+
if (options.reason) {
|
|
1305
|
+
body.reason = options.reason;
|
|
1306
|
+
}
|
|
1307
|
+
const response = await postJsonWithTimeout(`${opts.server}/jobs/${jobId}/defer`, headers, body);
|
|
1291
1308
|
const payload = (await response.json().catch(() => ({}))) as {
|
|
1292
1309
|
ok?: boolean;
|
|
1293
1310
|
availableAt?: string;
|
|
@@ -1818,6 +1835,7 @@ async function workerLoop(
|
|
|
1818
1835
|
};
|
|
1819
1836
|
|
|
1820
1837
|
let statusPersistedToServer = false;
|
|
1838
|
+
let deferredForDirectRetry = false;
|
|
1821
1839
|
if (result.publishBlocked) {
|
|
1822
1840
|
await reportToolRunForUnsuccessfulJob({
|
|
1823
1841
|
opts,
|
|
@@ -1872,40 +1890,86 @@ async function workerLoop(
|
|
|
1872
1890
|
`[WorkerPals] Job ${job.id} completed in ${formatDurationMs(jobDurationMs)}: ${result.summary}`,
|
|
1873
1891
|
);
|
|
1874
1892
|
} else {
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
1884
|
-
`${opts.server}/jobs/${job.id}/fail`,
|
|
1885
|
-
headers,
|
|
1886
|
-
{
|
|
1887
|
-
message: result.summary,
|
|
1888
|
-
detail: redactSensitiveText(result.stderr ?? ""),
|
|
1893
|
+
let unsuccessfulToolRunReported = false;
|
|
1894
|
+
const reportUnsuccessfulToolRun = async (phase: string) => {
|
|
1895
|
+
if (unsuccessfulToolRunReported) return;
|
|
1896
|
+
unsuccessfulToolRunReported = true;
|
|
1897
|
+
await reportToolRunForUnsuccessfulJob({
|
|
1898
|
+
opts,
|
|
1899
|
+
headers,
|
|
1900
|
+
job,
|
|
1901
|
+
result,
|
|
1889
1902
|
durationMs: jobDurationMs,
|
|
1890
|
-
|
|
1891
|
-
}
|
|
1892
|
-
|
|
1893
|
-
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
|
|
1902
|
-
|
|
1903
|
-
|
|
1903
|
+
phase,
|
|
1904
|
+
});
|
|
1905
|
+
};
|
|
1906
|
+
const failCurrentJob = async () => {
|
|
1907
|
+
await reportUnsuccessfulToolRun(job.kind);
|
|
1908
|
+
const response = await postJsonWithTimeout(
|
|
1909
|
+
`${opts.server}/jobs/${job.id}/fail`,
|
|
1910
|
+
headers,
|
|
1911
|
+
{
|
|
1912
|
+
message: result.summary,
|
|
1913
|
+
detail: redactSensitiveText(result.stderr ?? ""),
|
|
1914
|
+
durationMs: jobDurationMs,
|
|
1915
|
+
diagnostics: result.diagnostics,
|
|
1916
|
+
},
|
|
1917
|
+
);
|
|
1918
|
+
statusPersistedToServer = response.ok;
|
|
1919
|
+
console.log(
|
|
1920
|
+
`[WorkerPals] Job ${job.id} failed in ${formatDurationMs(jobDurationMs)}: ${result.summary}`,
|
|
1904
1921
|
);
|
|
1922
|
+
recycleWorkerAfterJob = shouldRecycleWorkerForCodexUnavailableFailure(
|
|
1923
|
+
result.summary,
|
|
1924
|
+
result.stderr,
|
|
1925
|
+
);
|
|
1926
|
+
if (recycleWorkerAfterJob) {
|
|
1927
|
+
console.error(
|
|
1928
|
+
`[WorkerPals] Codex backend unavailable for job ${job.id}; terminating this worker for replacement.`,
|
|
1929
|
+
);
|
|
1930
|
+
}
|
|
1931
|
+
};
|
|
1932
|
+
|
|
1933
|
+
if (
|
|
1934
|
+
shouldDeferDockerCodexStartupStallForDirectRetry({
|
|
1935
|
+
dockerEnabled: Boolean(dockerExecutor),
|
|
1936
|
+
result,
|
|
1937
|
+
})
|
|
1938
|
+
) {
|
|
1939
|
+
await reportUnsuccessfulToolRun("worker:docker-codex-startup-stall-defer");
|
|
1940
|
+
const deferred = await deferClaimedJobForMaintenance(
|
|
1941
|
+
opts,
|
|
1942
|
+
headers,
|
|
1943
|
+
job.id,
|
|
1944
|
+
CODEX_STARTUP_STALL_DIRECT_RETRY_DEFER_MS,
|
|
1945
|
+
{
|
|
1946
|
+
targetWorkerId: null,
|
|
1947
|
+
reason: "codex_startup_stall_direct_retry",
|
|
1948
|
+
},
|
|
1949
|
+
);
|
|
1950
|
+
if (deferred.ok) {
|
|
1951
|
+
deferredForDirectRetry = true;
|
|
1952
|
+
statusPersistedToServer = true;
|
|
1953
|
+
recycleWorkerAfterJob = true;
|
|
1954
|
+
console.warn(
|
|
1955
|
+
`[WorkerPals] Deferred job ${job.id} after Docker Codex startup stall until ${
|
|
1956
|
+
deferred.availableAt ?? "a direct WorkerPal retry"
|
|
1957
|
+
}; recycling this worker so RemoteBuddy can spawn a direct isolated-worktree WorkerPal.`,
|
|
1958
|
+
);
|
|
1959
|
+
} else {
|
|
1960
|
+
console.warn(
|
|
1961
|
+
`[WorkerPals] Failed to defer Docker Codex startup-stall job ${job.id}; marking failed: ${
|
|
1962
|
+
deferred.message || "unknown error"
|
|
1963
|
+
}`,
|
|
1964
|
+
);
|
|
1965
|
+
await failCurrentJob();
|
|
1966
|
+
}
|
|
1967
|
+
} else {
|
|
1968
|
+
await failCurrentJob();
|
|
1905
1969
|
}
|
|
1906
1970
|
}
|
|
1907
1971
|
|
|
1908
|
-
if (job.sessionId) {
|
|
1972
|
+
if (job.sessionId && !deferredForDirectRetry) {
|
|
1909
1973
|
const jobOrigin = taskExecuteOrigin(parsedParams);
|
|
1910
1974
|
const responseMode = String(parsedParams.responseMode ?? "")
|
|
1911
1975
|
.trim()
|