pi-oracle 0.1.12 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,31 @@
1
+ export interface ImportedAuthCookie {
2
+ name: string;
3
+ value?: string;
4
+ domain?: string;
5
+ path?: string;
6
+ expires?: number;
7
+ httpOnly?: boolean;
8
+ secure?: boolean;
9
+ sameSite?: "Lax" | "Strict" | "None";
10
+ }
11
+
12
+ export interface NormalizedAuthCookie {
13
+ name: string;
14
+ value: string;
15
+ domain: string;
16
+ path: string;
17
+ expires?: number;
18
+ httpOnly: boolean;
19
+ secure: boolean;
20
+ sameSite?: "Lax" | "Strict" | "None";
21
+ }
22
+
23
+ export function filterImportableAuthCookies(
24
+ cookies: ImportedAuthCookie[],
25
+ chatUrl: string,
26
+ ): { cookies: NormalizedAuthCookie[]; dropped: Array<{ cookie: NormalizedAuthCookie; reason: string }> };
27
+
28
+ export function ensureAccountCookie(
29
+ cookies: NormalizedAuthCookie[],
30
+ chatUrl: string,
31
+ ): { cookies: NormalizedAuthCookie[]; synthesized: boolean; value?: string };
@@ -1,9 +1,11 @@
1
- import { createHash } from "node:crypto";
2
- import { existsSync } from "node:fs";
1
+ import { createHash, randomUUID } from "node:crypto";
2
+ import { existsSync, readdirSync, readFileSync } from "node:fs";
3
3
  import { appendFile, chmod, mkdir, readFile, rename, rm, stat, writeFile } from "node:fs/promises";
4
4
  import { basename, dirname, join } from "node:path";
5
- import { spawn } from "node:child_process";
5
+ import { fileURLToPath } from "node:url";
6
+ import { spawn, execFileSync } from "node:child_process";
6
7
  import { FILE_LABEL_PATTERN_SOURCE, filterStructuralArtifactCandidates, GENERIC_ARTIFACT_LABELS, parseSnapshotEntries } from "./artifact-heuristics.mjs";
8
+ import { createLease, listLeaseMetadata, readLeaseMetadata, releaseLease, withLock } from "./state-locks.mjs";
7
9
 
8
10
  const jobId = process.argv[2];
9
11
  if (!jobId) {
@@ -29,10 +31,9 @@ const MODEL_FAMILY_PREFIX = {
29
31
  pro: "Pro ",
30
32
  };
31
33
 
34
+ const WORKER_SCRIPT_PATH = fileURLToPath(import.meta.url);
32
35
  const DEFAULT_ORACLE_STATE_DIR = "/tmp/pi-oracle-state";
33
36
  const ORACLE_STATE_DIR = process.env.PI_ORACLE_STATE_DIR?.trim() || DEFAULT_ORACLE_STATE_DIR;
34
- const LOCKS_DIR = join(ORACLE_STATE_DIR, "locks");
35
- const LEASES_DIR = join(ORACLE_STATE_DIR, "leases");
36
37
  const SEED_GENERATION_FILE = ".oracle-seed-generation";
37
38
  const ARTIFACT_CANDIDATE_STABILITY_TIMEOUT_MS = 15_000;
38
39
  const ARTIFACT_CANDIDATE_STABILITY_POLL_MS = 1_500;
@@ -61,23 +62,6 @@ async function ensurePrivateDir(path) {
61
62
  await chmod(path, 0o700).catch(() => undefined);
62
63
  }
63
64
 
64
- function leaseKey(kind, key) {
65
- return `${kind}-${createHash("sha256").update(key).digest("hex").slice(0, 24)}`;
66
- }
67
-
68
- async function readLockProcessPid(path) {
69
- const metadataPath = join(path, "metadata.json");
70
- if (!existsSync(metadataPath)) return undefined;
71
- try {
72
- const metadata = JSON.parse(await readFile(metadataPath, "utf8"));
73
- return typeof metadata?.processPid === "number" && Number.isInteger(metadata.processPid) && metadata.processPid > 0
74
- ? metadata.processPid
75
- : undefined;
76
- } catch {
77
- return undefined;
78
- }
79
- }
80
-
81
65
  function isProcessAlive(pid) {
82
66
  try {
83
67
  process.kill(pid, 0);
@@ -88,53 +72,69 @@ function isProcessAlive(pid) {
88
72
  }
89
73
  }
90
74
 
91
- async function maybeReclaimStaleLock(path) {
92
- const processPid = await readLockProcessPid(path);
93
- if (!processPid || isProcessAlive(processPid)) return false;
94
- await rm(path, { recursive: true, force: true }).catch(() => undefined);
95
- return true;
75
+ function readProcessStartedAt(pid) {
76
+ if (!pid || pid <= 0) return undefined;
77
+ try {
78
+ const startedAt = execFileSync("ps", ["-o", "lstart=", "-p", String(pid)], { encoding: "utf8" }).trim();
79
+ return startedAt || undefined;
80
+ } catch {
81
+ return undefined;
82
+ }
96
83
  }
97
84
 
98
- async function acquireLock(kind, key, metadata, timeoutMs = 30_000) {
99
- const path = join(LOCKS_DIR, leaseKey(kind, key));
85
+ async function waitForProcessStartedAt(pid, timeoutMs = 2_000) {
100
86
  const deadline = Date.now() + timeoutMs;
101
- await ensurePrivateDir(ORACLE_STATE_DIR);
102
- await ensurePrivateDir(LOCKS_DIR);
103
-
104
87
  while (Date.now() < deadline) {
105
- try {
106
- await mkdir(path, { recursive: false, mode: 0o700 });
107
- await secureWriteText(join(path, "metadata.json"), `${JSON.stringify(metadata, null, 2)}\n`);
108
- return path;
109
- } catch (error) {
110
- if (!(error && typeof error === "object" && "code" in error && error.code === "EEXIST")) throw error;
111
- if (await maybeReclaimStaleLock(path)) continue;
112
- }
113
- await sleep(200);
88
+ const startedAt = readProcessStartedAt(pid);
89
+ if (startedAt) return startedAt;
90
+ await sleep(100);
114
91
  }
115
-
116
- throw new Error(`Timed out waiting for oracle ${kind} lock: ${key}`);
92
+ return readProcessStartedAt(pid);
117
93
  }
118
94
 
119
- async function releaseLock(path) {
120
- if (!path) return;
121
- await rm(path, { recursive: true, force: true }).catch(() => undefined);
122
- }
95
+ async function terminateWorkerPid(pid, startedAt, options = {}) {
96
+ if (!pid || pid <= 0) return true;
97
+ const currentStartedAt = readProcessStartedAt(pid);
98
+ if (!currentStartedAt) return true;
99
+ if (startedAt && currentStartedAt !== startedAt) return false;
100
+
101
+ const termGraceMs = options.termGraceMs ?? 5_000;
102
+ const killGraceMs = options.killGraceMs ?? 2_000;
123
103
 
124
- async function withLock(kind, key, metadata, fn, timeoutMs) {
125
- const handle = await acquireLock(kind, key, metadata, timeoutMs);
126
104
  try {
127
- return await fn();
128
- } finally {
129
- await releaseLock(handle);
105
+ process.kill(pid, "SIGTERM");
106
+ } catch {
107
+ return !isProcessAlive(pid);
108
+ }
109
+
110
+ const termDeadline = Date.now() + termGraceMs;
111
+ while (Date.now() < termDeadline) {
112
+ const liveStartedAt = readProcessStartedAt(pid);
113
+ if (!liveStartedAt) return true;
114
+ if (startedAt && liveStartedAt !== startedAt) return true;
115
+ await sleep(250);
130
116
  }
131
- }
132
117
 
133
- async function releaseLease(kind, key) {
134
- if (!key) return;
135
- await rm(join(LEASES_DIR, leaseKey(kind, key)), { recursive: true, force: true }).catch(() => undefined);
118
+ try {
119
+ process.kill(pid, "SIGKILL");
120
+ } catch {
121
+ return !isProcessAlive(pid);
122
+ }
123
+
124
+ const killDeadline = Date.now() + killGraceMs;
125
+ while (Date.now() < killDeadline) {
126
+ const liveStartedAt = readProcessStartedAt(pid);
127
+ if (!liveStartedAt) return true;
128
+ if (startedAt && liveStartedAt !== startedAt) return true;
129
+ await sleep(250);
130
+ }
131
+
132
+ const finalStartedAt = readProcessStartedAt(pid);
133
+ if (!finalStartedAt) return true;
134
+ return startedAt ? finalStartedAt !== startedAt : false;
136
135
  }
137
136
 
137
+
138
138
  async function secureWriteText(path, content) {
139
139
  const tmpPath = `${path}.${process.pid}.${Date.now()}.tmp`;
140
140
  await writeFile(tmpPath, content, { encoding: "utf8", mode: 0o600 });
@@ -156,18 +156,79 @@ async function readJob() {
156
156
  return readJobUnlocked();
157
157
  }
158
158
 
159
+ function getAnyJobDir(targetJobId) {
160
+ return join(ORACLE_JOBS_DIR, `oracle-${targetJobId}`);
161
+ }
162
+
163
+ function getAnyJobPath(targetJobId) {
164
+ return join(getAnyJobDir(targetJobId), "job.json");
165
+ }
166
+
167
+ function readAnyJob(targetJobId) {
168
+ const path = getAnyJobPath(targetJobId);
169
+ if (!existsSync(path)) return undefined;
170
+ try {
171
+ return JSON.parse(readFileSync(path, "utf8"));
172
+ } catch {
173
+ return undefined;
174
+ }
175
+ }
176
+
177
+ function listQueuedJobs() {
178
+ if (!existsSync(ORACLE_JOBS_DIR)) return [];
179
+ return readdirSync(ORACLE_JOBS_DIR)
180
+ .filter((name) => name.startsWith("oracle-"))
181
+ .map((name) => readAnyJob(name.slice("oracle-".length)))
182
+ .filter((job) => job?.status === "queued")
183
+ .sort((left, right) => {
184
+ const leftKey = left?.queuedAt || left?.createdAt || "";
185
+ const rightKey = right?.queuedAt || right?.createdAt || "";
186
+ return leftKey.localeCompare(rightKey) || String(left?.createdAt || "").localeCompare(String(right?.createdAt || "")) || String(left?.id || "").localeCompare(String(right?.id || ""));
187
+ });
188
+ }
189
+
190
+ function isActiveJobStatus(status) {
191
+ return ["preparing", "submitted", "waiting"].includes(String(status || ""));
192
+ }
193
+
194
+ function jobBlocksAdmission(job) {
195
+ return isActiveJobStatus(job?.status) || job?.cleanupPending === true || (Array.isArray(job?.cleanupWarnings) && job.cleanupWarnings.length > 0);
196
+ }
197
+
198
+ function hasDurableWorkerHandoff(job) {
199
+ if (!job || job.status === "queued") return false;
200
+ if (job.workerPid) return true;
201
+ return false;
202
+ }
203
+
204
+ async function mutateAnyJob(targetJobId, mutator) {
205
+ return withLock(ORACLE_STATE_DIR, "job", targetJobId, { processPid: process.pid, action: "mutateJob", targetJobId }, async () => {
206
+ const path = getAnyJobPath(targetJobId);
207
+ const current = JSON.parse(await readFile(path, "utf8"));
208
+ const next = mutator(current);
209
+ await secureWriteText(path, `${JSON.stringify(next, null, 2)}\n`);
210
+ return next;
211
+ });
212
+ }
213
+
214
+ async function writeAnyJob(targetJobId, job) {
215
+ await withLock(ORACLE_STATE_DIR, "job", targetJobId, { processPid: process.pid, action: "writeJob", targetJobId }, async () => {
216
+ await secureWriteText(getAnyJobPath(targetJobId), `${JSON.stringify(job, null, 2)}\n`);
217
+ });
218
+ }
219
+
159
220
  async function writeJobUnlocked(job) {
160
221
  await secureWriteText(jobPath, `${JSON.stringify(job, null, 2)}\n`);
161
222
  }
162
223
 
163
224
  async function writeJob(job) {
164
- await withLock("job", jobId, { processPid: process.pid, action: "writeJob" }, async () => {
225
+ await withLock(ORACLE_STATE_DIR, "job", jobId, { processPid: process.pid, action: "writeJob" }, async () => {
165
226
  await writeJobUnlocked(job);
166
227
  });
167
228
  }
168
229
 
169
230
  async function mutateJob(mutator) {
170
- return withLock("job", jobId, { processPid: process.pid, action: "mutateJob" }, async () => {
231
+ return withLock(ORACLE_STATE_DIR, "job", jobId, { processPid: process.pid, action: "mutateJob" }, async () => {
171
232
  const job = await readJobUnlocked();
172
233
  const next = mutator(job);
173
234
  await writeJobUnlocked(next);
@@ -271,7 +332,7 @@ async function cloneSeedProfileToRuntime(job) {
271
332
  const seedGenerationPath = join(seedDir, SEED_GENERATION_FILE);
272
333
  const seedGeneration = existsSync(seedGenerationPath) ? (await readFile(seedGenerationPath, "utf8")).trim() || undefined : undefined;
273
334
 
274
- await withLock("auth", "global", { jobId: job.id, processPid: process.pid, action: "cloneSeedProfile" }, async () => {
335
+ await withLock(ORACLE_STATE_DIR, "auth", "global", { jobId: job.id, processPid: process.pid, action: "cloneSeedProfile" }, async () => {
275
336
  await rm(job.runtimeProfileDir, { recursive: true, force: true }).catch(() => undefined);
276
337
  await ensurePrivateDir(dirname(job.runtimeProfileDir));
277
338
  const cloneArgs = job.config.browser.cloneStrategy === "apfs-clone" ? ["-cR", seedDir, job.runtimeProfileDir] : ["-R", seedDir, job.runtimeProfileDir];
@@ -282,7 +343,7 @@ async function cloneSeedProfileToRuntime(job) {
282
343
  }
283
344
 
284
345
  async function cleanupRuntime(job) {
285
- if (!job || cleaningUpRuntime) return;
346
+ if (!job || cleaningUpRuntime) return [];
286
347
  cleaningUpRuntime = true;
287
348
  const warnings = [];
288
349
  try {
@@ -291,31 +352,202 @@ async function cleanupRuntime(job) {
291
352
  warnings.push(message);
292
353
  await log(message).catch(() => undefined);
293
354
  });
294
- await releaseLease("conversation", job.conversationId).catch(async (error) => {
295
- const message = `Conversation lease cleanup warning: ${error instanceof Error ? error.message : String(error)}`;
296
- warnings.push(message);
297
- await log(message).catch(() => undefined);
298
- });
299
- await releaseLease("runtime", job.runtimeId).catch(async (error) => {
300
- const message = `Runtime lease cleanup warning: ${error instanceof Error ? error.message : String(error)}`;
301
- warnings.push(message);
302
- await log(message).catch(() => undefined);
303
- });
304
355
  await rm(job.runtimeProfileDir, { recursive: true, force: true }).catch(async (error) => {
305
356
  const message = `Runtime profile cleanup warning: ${error instanceof Error ? error.message : String(error)}`;
306
357
  warnings.push(message);
307
358
  await log(message).catch(() => undefined);
308
359
  });
360
+ if (warnings.length === 0) {
361
+ await releaseLease(ORACLE_STATE_DIR, "conversation", job.conversationId).catch(async (error) => {
362
+ const message = `Conversation lease cleanup warning: ${error instanceof Error ? error.message : String(error)}`;
363
+ warnings.push(message);
364
+ await log(message).catch(() => undefined);
365
+ });
366
+ await releaseLease(ORACLE_STATE_DIR, "runtime", job.runtimeId).catch(async (error) => {
367
+ const message = `Runtime lease cleanup warning: ${error instanceof Error ? error.message : String(error)}`;
368
+ warnings.push(message);
369
+ await log(message).catch(() => undefined);
370
+ });
371
+ }
309
372
  if (warnings.length === 0) {
310
373
  await log(`Cleanup summary: runtime ${job.runtimeId} released with no warnings`).catch(() => undefined);
311
374
  } else {
312
375
  await log(`Cleanup summary: runtime ${job.runtimeId} released with ${warnings.length} warning(s)`).catch(() => undefined);
313
376
  }
377
+ return warnings;
314
378
  } finally {
315
379
  cleaningUpRuntime = false;
316
380
  }
317
381
  }
318
382
 
383
+ async function tryAcquireRuntimeLeaseForJob(job, createdAt) {
384
+ const existing = listLeaseMetadata(ORACLE_STATE_DIR, "runtime");
385
+ const liveLeases = [];
386
+ for (const lease of existing) {
387
+ const owner = lease?.jobId ? readAnyJob(lease.jobId) : undefined;
388
+ if (!jobBlocksAdmission(owner)) {
389
+ await releaseLease(ORACLE_STATE_DIR, "runtime", lease?.runtimeId).catch(() => undefined);
390
+ continue;
391
+ }
392
+ liveLeases.push(lease);
393
+ }
394
+ if (liveLeases.length >= job.config.browser.maxConcurrentJobs) {
395
+ return false;
396
+ }
397
+ await createLease(ORACLE_STATE_DIR, "runtime", job.runtimeId, {
398
+ jobId: job.id,
399
+ runtimeId: job.runtimeId,
400
+ runtimeSessionName: job.runtimeSessionName,
401
+ runtimeProfileDir: job.runtimeProfileDir,
402
+ projectId: job.projectId,
403
+ sessionId: job.sessionId,
404
+ createdAt,
405
+ });
406
+ return true;
407
+ }
408
+
409
+ async function tryAcquireConversationLeaseForJob(job, createdAt) {
410
+ if (!job.conversationId) return true;
411
+ const existing = await readLeaseMetadata(ORACLE_STATE_DIR, "conversation", job.conversationId);
412
+ if (existing?.jobId === job.id) return true;
413
+ if (existing && existing.jobId !== job.id) {
414
+ if (!jobBlocksAdmission(readAnyJob(existing.jobId))) {
415
+ await releaseLease(ORACLE_STATE_DIR, "conversation", job.conversationId).catch(() => undefined);
416
+ } else {
417
+ return false;
418
+ }
419
+ }
420
+ await createLease(ORACLE_STATE_DIR, "conversation", job.conversationId, {
421
+ jobId: job.id,
422
+ conversationId: job.conversationId,
423
+ projectId: job.projectId,
424
+ sessionId: job.sessionId,
425
+ createdAt,
426
+ });
427
+ return true;
428
+ }
429
+
430
+ async function spawnDetachedWorker(targetJobId) {
431
+ const child = spawn(process.execPath, [WORKER_SCRIPT_PATH, targetJobId], {
432
+ detached: true,
433
+ stdio: "ignore",
434
+ });
435
+ child.unref();
436
+ return {
437
+ pid: child.pid,
438
+ workerNonce: randomUUID(),
439
+ workerStartedAt: await waitForProcessStartedAt(child.pid),
440
+ };
441
+ }
442
+
443
+ async function failQueuedPromotion(targetJobId, message, at = new Date().toISOString()) {
444
+ await mutateAnyJob(targetJobId, (latest) => {
445
+ if (["complete", "failed", "cancelled"].includes(String(latest.status || ""))) return latest;
446
+ return {
447
+ ...latest,
448
+ ...phasePatch("failed", {
449
+ status: "failed",
450
+ completedAt: at,
451
+ heartbeatAt: at,
452
+ error: message,
453
+ }, at),
454
+ };
455
+ }).catch(() => undefined);
456
+ }
457
+
458
+ async function promoteQueuedJobsAfterCleanup() {
459
+ await withLock(ORACLE_STATE_DIR, "admission", "global", { processPid: process.pid, source: "worker_cleanup_promoter", jobId }, async () => {
460
+ for (const queuedJob of listQueuedJobs()) {
461
+ const current = readAnyJob(queuedJob.id);
462
+ if (!current || current.status !== "queued") continue;
463
+
464
+ let spawnedWorker;
465
+ const promotedAt = new Date().toISOString();
466
+ if (!existsSync(current.archivePath)) {
467
+ await failQueuedPromotion(current.id, `Queued oracle archive is missing: ${current.archivePath}`, promotedAt);
468
+ continue;
469
+ }
470
+ const runtimeLeaseAcquired = await tryAcquireRuntimeLeaseForJob(current, promotedAt);
471
+ if (!runtimeLeaseAcquired) break;
472
+
473
+ const conversationLeaseAcquired = await tryAcquireConversationLeaseForJob(current, promotedAt);
474
+ if (!conversationLeaseAcquired) {
475
+ await releaseLease(ORACLE_STATE_DIR, "runtime", current.runtimeId).catch(() => undefined);
476
+ continue;
477
+ }
478
+
479
+ try {
480
+ await mutateAnyJob(current.id, (latest) => {
481
+ if (latest.status !== "queued") throw new Error(`Queued job ${latest.id} changed state during cleanup promotion (${latest.status})`);
482
+ return {
483
+ ...latest,
484
+ ...phasePatch("submitted", {
485
+ status: "submitted",
486
+ submittedAt: latest.submittedAt || promotedAt,
487
+ }, promotedAt),
488
+ };
489
+ });
490
+
491
+ spawnedWorker = await spawnDetachedWorker(current.id);
492
+ await mutateAnyJob(current.id, (latest) => {
493
+ if (hasDurableWorkerHandoff(latest)) {
494
+ return {
495
+ ...latest,
496
+ workerPid: latest.workerPid || spawnedWorker.pid,
497
+ workerNonce: latest.workerNonce || spawnedWorker.workerNonce,
498
+ workerStartedAt: latest.workerStartedAt || spawnedWorker.workerStartedAt,
499
+ };
500
+ }
501
+ return {
502
+ ...latest,
503
+ workerPid: spawnedWorker.pid,
504
+ workerNonce: spawnedWorker.workerNonce,
505
+ workerStartedAt: spawnedWorker.workerStartedAt,
506
+ };
507
+ });
508
+ } catch (error) {
509
+ const latest = readAnyJob(current.id);
510
+ if (hasDurableWorkerHandoff(latest)) {
511
+ await log(`Queued promotion handoff already durable for ${current.id}; leaving active job intact`).catch(() => undefined);
512
+ continue;
513
+ }
514
+ if (spawnedWorker) {
515
+ await terminateWorkerPid(spawnedWorker.pid, spawnedWorker.workerStartedAt).catch(() => undefined);
516
+ }
517
+ const failedAt = new Date().toISOString();
518
+ if (latest && !["complete", "failed", "cancelled"].includes(String(latest.status || ""))) {
519
+ await failQueuedPromotion(current.id, error instanceof Error ? error.message : String(error), failedAt);
520
+ }
521
+ if (spawnedWorker) {
522
+ let cleanupWarnings = [];
523
+ try {
524
+ cleanupWarnings = await cleanupRuntime(current);
525
+ } catch (cleanupError) {
526
+ const message = `Cleanup-driven promotion teardown warning for ${current.id}: ${cleanupError instanceof Error ? cleanupError.message : String(cleanupError)}`;
527
+ cleanupWarnings = [message];
528
+ await log(message).catch(() => undefined);
529
+ }
530
+ if (cleanupWarnings.length > 0) {
531
+ await mutateAnyJob(current.id, (job) => ({
532
+ ...job,
533
+ cleanupWarnings: [...(job.cleanupWarnings || []), ...cleanupWarnings],
534
+ lastCleanupAt: failedAt,
535
+ error: [job.error, ...cleanupWarnings].filter(Boolean).join("\n"),
536
+ })).catch(() => undefined);
537
+ await log(`Stopping queued cleanup promotion after ${current.id} because teardown left ${cleanupWarnings.length} warning(s)`).catch(() => undefined);
538
+ break;
539
+ }
540
+ } else {
541
+ await releaseLease(ORACLE_STATE_DIR, "conversation", current.conversationId).catch(() => undefined);
542
+ await releaseLease(ORACLE_STATE_DIR, "runtime", current.runtimeId).catch(() => undefined);
543
+ }
544
+ }
545
+ }
546
+ }).catch(async (error) => {
547
+ await log(`Queued cleanup promotion warning: ${error instanceof Error ? error.message : String(error)}`).catch(() => undefined);
548
+ });
549
+ }
550
+
319
551
  function browserBaseArgs(job, options = {}) {
320
552
  const args = ["--session", job.runtimeSessionName];
321
553
  if (options.withLaunchOptions) {
@@ -1535,6 +1767,7 @@ async function run() {
1535
1767
  responsePath: currentJob.responsePath,
1536
1768
  responseFormat: "text/plain",
1537
1769
  artifactFailureCount,
1770
+ cleanupPending: true,
1538
1771
  }),
1539
1772
  { force: true },
1540
1773
  );
@@ -1551,13 +1784,39 @@ async function run() {
1551
1784
  status: "failed",
1552
1785
  completedAt: new Date().toISOString(),
1553
1786
  error: message,
1787
+ cleanupPending: true,
1554
1788
  }),
1555
1789
  { force: true },
1556
1790
  );
1557
1791
  process.exitCode = 1;
1558
1792
  }
1559
1793
  } finally {
1560
- await cleanupRuntime(currentJob).catch(() => undefined);
1794
+ let cleanupWarnings = [];
1795
+ try {
1796
+ cleanupWarnings = await cleanupRuntime(currentJob);
1797
+ } catch (error) {
1798
+ cleanupWarnings = [`Runtime cleanup failed before queued promotion: ${error instanceof Error ? error.message : String(error)}`];
1799
+ await log(cleanupWarnings[0]).catch(() => undefined);
1800
+ }
1801
+ if (currentJob?.id) {
1802
+ const cleanupAt = new Date().toISOString();
1803
+ await mutateJob((job) => ({
1804
+ ...job,
1805
+ cleanupPending: false,
1806
+ ...(cleanupWarnings.length > 0
1807
+ ? {
1808
+ cleanupWarnings: [...(job.cleanupWarnings || []), ...cleanupWarnings],
1809
+ lastCleanupAt: cleanupAt,
1810
+ error: [job.error, ...cleanupWarnings].filter(Boolean).join("\n"),
1811
+ }
1812
+ : { lastCleanupAt: cleanupAt }),
1813
+ })).catch(() => undefined);
1814
+ }
1815
+ if (cleanupWarnings.length === 0) {
1816
+ await promoteQueuedJobsAfterCleanup().catch(() => undefined);
1817
+ } else {
1818
+ await log(`Skipping queued promotion because runtime cleanup left ${cleanupWarnings.length} warning(s)`).catch(() => undefined);
1819
+ }
1561
1820
  }
1562
1821
  }
1563
1822
 
@@ -0,0 +1,45 @@
1
+ export const ORACLE_METADATA_WRITE_GRACE_MS: number;
2
+
3
+ export function acquireLock(
4
+ stateDir: string,
5
+ kind: string,
6
+ key: string,
7
+ metadata: unknown,
8
+ timeoutMs?: number,
9
+ ): Promise<string>;
10
+
11
+ export function releaseLock(path: string | undefined): Promise<void>;
12
+
13
+ export function withLock<T>(
14
+ stateDir: string,
15
+ kind: string,
16
+ key: string,
17
+ metadata: unknown,
18
+ fn: () => Promise<T>,
19
+ timeoutMs?: number,
20
+ ): Promise<T>;
21
+
22
+ export function createLease(
23
+ stateDir: string,
24
+ kind: string,
25
+ key: string,
26
+ metadata: unknown,
27
+ timeoutMs?: number,
28
+ ): Promise<string>;
29
+
30
+ export function writeLeaseMetadata(
31
+ stateDir: string,
32
+ kind: string,
33
+ key: string,
34
+ metadata: unknown,
35
+ ): Promise<string>;
36
+
37
+ export function readLeaseMetadata<T = unknown>(
38
+ stateDir: string,
39
+ kind: string,
40
+ key: string,
41
+ ): Promise<T | undefined>;
42
+
43
+ export function listLeaseMetadata<T = unknown>(stateDir: string, kind: string): T[];
44
+
45
+ export function releaseLease(stateDir: string, kind: string, key: string | undefined): Promise<void>;