pi-oracle 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/CHANGELOG.md +33 -0
  2. package/README.md +7 -0
  3. package/docs/ORACLE_DESIGN.md +1 -1
  4. package/docs/ORACLE_ISOLATED_PI_VALIDATION.md +249 -0
  5. package/docs/ORACLE_RECOVERY_DRILL.md +5 -4
  6. package/extensions/oracle/index.ts +8 -1
  7. package/extensions/oracle/lib/commands.ts +11 -24
  8. package/extensions/oracle/lib/config.ts +5 -0
  9. package/extensions/oracle/lib/jobs.ts +117 -217
  10. package/extensions/oracle/lib/locks.ts +41 -209
  11. package/extensions/oracle/lib/poller.ts +14 -51
  12. package/extensions/oracle/lib/queue.ts +75 -112
  13. package/extensions/oracle/lib/runtime.ts +60 -14
  14. package/extensions/oracle/lib/tools.ts +70 -67
  15. package/extensions/oracle/shared/job-coordination-helpers.d.mts +84 -0
  16. package/extensions/oracle/shared/job-coordination-helpers.mjs +168 -0
  17. package/extensions/oracle/shared/job-lifecycle-helpers.d.mts +130 -0
  18. package/extensions/oracle/shared/job-lifecycle-helpers.mjs +377 -0
  19. package/extensions/oracle/shared/job-observability-helpers.d.mts +59 -0
  20. package/extensions/oracle/shared/job-observability-helpers.mjs +143 -0
  21. package/extensions/oracle/shared/process-helpers.d.mts +20 -0
  22. package/extensions/oracle/shared/process-helpers.mjs +128 -0
  23. package/extensions/oracle/shared/state-coordination-helpers.d.mts +43 -0
  24. package/extensions/oracle/shared/state-coordination-helpers.mjs +381 -0
  25. package/extensions/oracle/worker/artifact-heuristics.mjs +5 -0
  26. package/extensions/oracle/worker/auth-bootstrap.mjs +100 -139
  27. package/extensions/oracle/worker/auth-cookie-policy.mjs +5 -0
  28. package/extensions/oracle/worker/auth-flow-helpers.d.mts +41 -0
  29. package/extensions/oracle/worker/auth-flow-helpers.mjs +165 -0
  30. package/extensions/oracle/worker/chatgpt-flow-helpers.d.mts +13 -0
  31. package/extensions/oracle/worker/chatgpt-flow-helpers.mjs +85 -0
  32. package/extensions/oracle/worker/chatgpt-ui-helpers.d.mts +33 -0
  33. package/extensions/oracle/worker/chatgpt-ui-helpers.mjs +292 -0
  34. package/extensions/oracle/worker/run-job.mjs +235 -380
  35. package/extensions/oracle/worker/state-locks.mjs +31 -216
  36. package/package.json +14 -5
  37. package/prompts/oracle.md +1 -1
@@ -1,19 +1,31 @@
1
- import { createHash } from "node:crypto";
2
- import { existsSync } from "node:fs";
3
- import { mkdirSync, readdirSync, readFileSync, statSync } from "node:fs";
4
- import { mkdir, readFile, rename, rm, writeFile } from "node:fs/promises";
5
- import { basename, join } from "node:path";
1
+ // Purpose: Provide typed oracle lock/lease wrappers bound to the configured shared oracle state directory.
2
+ // Responsibilities: Expose extension-facing lock helpers, sweep stale lock dirs, and preserve existing typed APIs over the shared state helper core.
3
+ // Scope: Extension-process wrappers only; atomic filesystem coordination semantics live in shared/state-coordination-helpers.mjs.
4
+ // Usage: Imported by oracle lib modules that need admission locks, job locks, reconcile locks, or lease metadata persistence.
5
+ // Invariants/Assumptions: All lock/lease paths live under the single configured oracle state directory for this machine.
6
+
7
+ import { mkdirSync } from "node:fs";
8
+ import {
9
+ acquireStateLock,
10
+ createStateLease,
11
+ getStateLeasesDir,
12
+ getStateLocksDir,
13
+ listStateLeaseMetadata,
14
+ ORACLE_METADATA_WRITE_GRACE_MS,
15
+ ORACLE_TMP_STATE_DIR_GRACE_MS,
16
+ readStateLeaseMetadata,
17
+ releaseStateLease,
18
+ releaseStatePath,
19
+ sweepStaleStateLocks,
20
+ withStateLock,
21
+ writeStateLeaseMetadata,
22
+ } from "../shared/state-coordination-helpers.mjs";
6
23
 
7
24
  export const DEFAULT_ORACLE_STATE_DIR = "/tmp/pi-oracle-state";
8
25
  export const ORACLE_STATE_DIR_ENV = "PI_ORACLE_STATE_DIR";
9
26
  const ORACLE_STATE_DIR = process.env[ORACLE_STATE_DIR_ENV]?.trim() || DEFAULT_ORACLE_STATE_DIR;
10
- const LOCKS_DIR = join(ORACLE_STATE_DIR, "locks");
11
- const LEASES_DIR = join(ORACLE_STATE_DIR, "leases");
12
- const DEFAULT_WAIT_MS = 30_000;
13
- const POLL_MS = 200;
14
- export const ORACLE_METADATA_WRITE_GRACE_MS = 1_000;
15
- /** Incomplete `.tmp-*` dirs are in-flight atomic creates; a 1s grace is too short under multi-process sweep + slow FS. */
16
- export const ORACLE_TMP_STATE_DIR_GRACE_MS = 60_000;
27
+
28
+ export { ORACLE_METADATA_WRITE_GRACE_MS, ORACLE_TMP_STATE_DIR_GRACE_MS };
17
29
 
18
30
  export interface OracleLockHandle {
19
31
  path: string;
@@ -23,136 +35,25 @@ function ensureDirSync(path: string): void {
23
35
  mkdirSync(path, { recursive: true, mode: 0o700 });
24
36
  }
25
37
 
26
- function leaseKey(kind: string, key: string): string {
27
- return `${kind}-${createHash("sha256").update(key).digest("hex").slice(0, 24)}`;
28
- }
29
-
30
38
  export function getOracleStateDir(): string {
31
39
  ensureDirSync(ORACLE_STATE_DIR);
32
40
  return ORACLE_STATE_DIR;
33
41
  }
34
42
 
35
43
  export function getLocksDir(): string {
36
- ensureDirSync(LOCKS_DIR);
37
- return LOCKS_DIR;
44
+ const dir = getStateLocksDir(getOracleStateDir());
45
+ ensureDirSync(dir);
46
+ return dir;
38
47
  }
39
48
 
40
49
  export function getLeasesDir(): string {
41
- ensureDirSync(LEASES_DIR);
42
- return LEASES_DIR;
43
- }
44
-
45
- function lockPath(kind: string, key: string): string {
46
- return join(getLocksDir(), leaseKey(kind, key));
47
- }
48
-
49
- function leasePath(kind: string, key: string): string {
50
- return join(getLeasesDir(), leaseKey(kind, key));
51
- }
52
-
53
- async function sleep(ms: number): Promise<void> {
54
- await new Promise((resolve) => setTimeout(resolve, ms));
55
- }
56
-
57
- async function writeMetadata(path: string, metadata: unknown): Promise<void> {
58
- const targetPath = join(path, "metadata.json");
59
- const tempPath = join(path, `metadata.${process.pid}.${Date.now()}.${Math.random().toString(36).slice(2)}.tmp`);
60
- await writeFile(tempPath, `${JSON.stringify(metadata, null, 2)}\n`, { encoding: "utf8", mode: 0o600 });
61
- await rename(tempPath, targetPath);
62
- }
63
-
64
- async function createStateDirAtomically(parentDir: string, finalPath: string, metadata: unknown): Promise<void> {
65
- const tempPath = join(parentDir, `.tmp-${basename(finalPath)}.${process.pid}.${Date.now()}.${Math.random().toString(36).slice(2)}`);
66
- await mkdir(tempPath, { recursive: false, mode: 0o700 });
67
- try {
68
- await writeMetadata(tempPath, metadata);
69
- await rename(tempPath, finalPath);
70
- } catch (error) {
71
- await rm(tempPath, { recursive: true, force: true }).catch(() => undefined);
72
- throw error;
73
- }
74
- }
75
-
76
- function getMetadataPath(path: string): string {
77
- return join(path, "metadata.json");
78
- }
79
-
80
- function getMetadataState(path: string): "present" | "missing" | "invalid" {
81
- const metadataPath = getMetadataPath(path);
82
- if (!existsSync(metadataPath)) return "missing";
83
- try {
84
- JSON.parse(readFileSync(metadataPath, "utf8"));
85
- return "present";
86
- } catch {
87
- return "invalid";
88
- }
89
- }
90
-
91
- function isIncompleteStateDirStale(path: string, now = Date.now()): boolean {
92
- try {
93
- const stats = statSync(path);
94
- const baselineMs = Math.max(stats.mtimeMs, stats.ctimeMs);
95
- const graceMs = basename(path).startsWith(".tmp-") ? ORACLE_TMP_STATE_DIR_GRACE_MS : ORACLE_METADATA_WRITE_GRACE_MS;
96
- return now - baselineMs >= graceMs;
97
- } catch {
98
- return false;
99
- }
100
- }
101
-
102
- function readLockProcessPid(path: string): number | undefined {
103
- const metadataPath = getMetadataPath(path);
104
- if (!existsSync(metadataPath)) return undefined;
105
- try {
106
- const metadata = JSON.parse(readFileSync(metadataPath, "utf8")) as { processPid?: unknown };
107
- return typeof metadata.processPid === "number" && Number.isInteger(metadata.processPid) && metadata.processPid > 0
108
- ? metadata.processPid
109
- : undefined;
110
- } catch {
111
- return undefined;
112
- }
113
- }
114
-
115
- function isProcessAlive(pid: number): boolean {
116
- try {
117
- process.kill(pid, 0);
118
- return true;
119
- } catch (error) {
120
- if (error && typeof error === "object" && "code" in error && error.code === "ESRCH") return false;
121
- return true;
122
- }
123
- }
124
-
125
- function isStateDirExistsError(error: unknown): boolean {
126
- return Boolean(error && typeof error === "object" && "code" in error && (error.code === "EEXIST" || error.code === "ENOTEMPTY"));
127
- }
128
-
129
- async function maybeReclaimIncompleteStateDir(path: string, now = Date.now()): Promise<boolean> {
130
- if (getMetadataState(path) === "present") return false;
131
- if (!isIncompleteStateDirStale(path, now)) return false;
132
- await rm(path, { recursive: true, force: true }).catch(() => undefined);
133
- return true;
134
- }
135
-
136
- async function maybeReclaimStaleLock(path: string, now = Date.now()): Promise<boolean> {
137
- if (await maybeReclaimIncompleteStateDir(path, now)) return true;
138
- const processPid = readLockProcessPid(path);
139
- if (!processPid || isProcessAlive(processPid)) return false;
140
- await rm(path, { recursive: true, force: true }).catch(() => undefined);
141
- return true;
50
+ const dir = getStateLeasesDir(getOracleStateDir());
51
+ ensureDirSync(dir);
52
+ return dir;
142
53
  }
143
54
 
144
55
  export async function sweepStaleLocks(now = Date.now()): Promise<string[]> {
145
- const dir = getLocksDir();
146
- const removed: string[] = [];
147
-
148
- for (const name of readdirSync(dir)) {
149
- const path = join(dir, name);
150
- if (await maybeReclaimStaleLock(path, now)) {
151
- removed.push(path);
152
- }
153
- }
154
-
155
- return removed;
56
+ return sweepStaleStateLocks(getOracleStateDir(), now);
156
57
  }
157
58
 
158
59
  export async function acquireLock(
@@ -161,28 +62,13 @@ export async function acquireLock(
161
62
  metadata: unknown,
162
63
  options?: { timeoutMs?: number },
163
64
  ): Promise<OracleLockHandle> {
164
- const parentDir = getLocksDir();
165
- const path = join(parentDir, leaseKey(kind, key));
166
- const timeoutMs = options?.timeoutMs ?? DEFAULT_WAIT_MS;
167
- const deadline = Date.now() + timeoutMs;
168
-
169
- while (Date.now() < deadline) {
170
- try {
171
- await createStateDirAtomically(parentDir, path, metadata);
172
- return { path };
173
- } catch (error) {
174
- if (!isStateDirExistsError(error)) throw error;
175
- if (await maybeReclaimStaleLock(path)) continue;
176
- }
177
- await sleep(POLL_MS);
178
- }
179
-
180
- throw new Error(`Timed out waiting for oracle ${kind} lock: ${key}`);
65
+ return {
66
+ path: await acquireStateLock(getOracleStateDir(), kind, key, metadata, options?.timeoutMs),
67
+ };
181
68
  }
182
69
 
183
70
  export async function releaseLock(handle: OracleLockHandle | undefined): Promise<void> {
184
- if (!handle) return;
185
- await rm(handle.path, { recursive: true, force: true }).catch(() => undefined);
71
+ await releaseStatePath(handle?.path);
186
72
  }
187
73
 
188
74
  export async function withLock<T>(
@@ -192,12 +78,7 @@ export async function withLock<T>(
192
78
  fn: () => Promise<T>,
193
79
  options?: { timeoutMs?: number },
194
80
  ): Promise<T> {
195
- const handle = await acquireLock(kind, key, metadata, options);
196
- try {
197
- return await fn();
198
- } finally {
199
- await releaseLock(handle);
200
- }
81
+ return withStateLock(getOracleStateDir(), kind, key, metadata, fn, options?.timeoutMs);
201
82
  }
202
83
 
203
84
  export function isLockTimeoutError(error: unknown, kind?: string, key?: string): boolean {
@@ -232,70 +113,21 @@ export async function withJobLock<T>(jobId: string, metadata: unknown, fn: () =>
232
113
  }
233
114
 
234
115
  export async function createLease(kind: string, key: string, metadata: unknown): Promise<string> {
235
- const parentDir = getLeasesDir();
236
- const path = join(parentDir, leaseKey(kind, key));
237
- const deadline = Date.now() + DEFAULT_WAIT_MS;
238
-
239
- while (Date.now() < deadline) {
240
- try {
241
- await createStateDirAtomically(parentDir, path, metadata);
242
- return path;
243
- } catch (error) {
244
- if (!isStateDirExistsError(error)) throw error;
245
- if (await maybeReclaimIncompleteStateDir(path)) continue;
246
- if (getMetadataState(path) === "present") throw error;
247
- }
248
- await sleep(POLL_MS);
249
- }
250
-
251
- throw new Error(`Timed out waiting for oracle ${kind} lease: ${key}`);
116
+ return createStateLease(getOracleStateDir(), kind, key, metadata);
252
117
  }
253
118
 
254
119
  export async function writeLeaseMetadata(kind: string, key: string, metadata: unknown): Promise<string> {
255
- const parentDir = getLeasesDir();
256
- const path = join(parentDir, leaseKey(kind, key));
257
- if (existsSync(path)) {
258
- await writeMetadata(path, metadata);
259
- return path;
260
- }
261
- try {
262
- await createStateDirAtomically(parentDir, path, metadata);
263
- } catch (error) {
264
- if (!isStateDirExistsError(error)) throw error;
265
- if (await maybeReclaimIncompleteStateDir(path)) {
266
- await createStateDirAtomically(parentDir, path, metadata);
267
- } else {
268
- await writeMetadata(path, metadata);
269
- }
270
- }
271
- return path;
120
+ return writeStateLeaseMetadata(getOracleStateDir(), kind, key, metadata);
272
121
  }
273
122
 
274
123
  export async function readLeaseMetadata<T = unknown>(kind: string, key: string): Promise<T | undefined> {
275
- const path = join(leasePath(kind, key), "metadata.json");
276
- if (!existsSync(path)) return undefined;
277
- try {
278
- return JSON.parse(await readFile(path, "utf8")) as T;
279
- } catch {
280
- return undefined;
281
- }
124
+ return readStateLeaseMetadata<T>(getOracleStateDir(), kind, key);
282
125
  }
283
126
 
284
127
  export async function releaseLease(kind: string, key: string): Promise<void> {
285
- await rm(leasePath(kind, key), { recursive: true, force: true }).catch(() => undefined);
128
+ await releaseStateLease(getOracleStateDir(), kind, key);
286
129
  }
287
130
 
288
131
  export function listLeaseMetadata<T = unknown>(kind: string): T[] {
289
- const dir = getLeasesDir();
290
- return readdirSync(dir)
291
- .filter((name) => name.startsWith(`${kind}-`))
292
- .map((name) => join(dir, name, "metadata.json"))
293
- .filter((path) => existsSync(path))
294
- .flatMap((path) => {
295
- try {
296
- return [JSON.parse(readFileSync(path, "utf8")) as T];
297
- } catch {
298
- return [];
299
- }
300
- });
132
+ return listStateLeaseMetadata<T>(getOracleStateDir(), kind);
301
133
  }
@@ -1,5 +1,11 @@
1
- import { execFileSync } from "node:child_process";
1
+ // Purpose: Poll oracle jobs in the background, reconcile stale state, and deliver best-effort wake-up reminders to eligible sessions.
2
+ // Responsibilities: Track live wake-up targets, promote queued jobs, scan terminal jobs for delivery, and keep session status text current.
3
+ // Scope: Poller/orchestration only; durable lifecycle mutations live in jobs.ts and shared observability formatting lives in extensions/oracle/shared.
4
+ // Usage: Imported by the oracle extension entrypoint to start or stop per-session oracle polling.
5
+ // Invariants/Assumptions: Poller scans are serialized per session key, wake-up delivery is best-effort, and terminal-job notifications always re-read durable job state before send.
2
6
  import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
7
+ import { buildOracleStatusText, buildOracleWakeupNotificationContent } from "../shared/job-observability-helpers.mjs";
8
+ import { isProcessAlive, readProcessStartedAt } from "../shared/process-helpers.mjs";
3
9
  import { isLockTimeoutError, listLeaseMetadata, releaseLease, withGlobalReconcileLock, writeLeaseMetadata } from "./locks.js";
4
10
  import {
5
11
  getJobDir,
@@ -64,26 +70,6 @@ function jobMatchesContext(job: { projectId: string; sessionId: string }, sessio
64
70
  return job.projectId === projectId && job.sessionId === sessionId;
65
71
  }
66
72
 
67
- function readProcessStartedAt(pid: number | undefined): string | undefined {
68
- if (!pid || pid <= 0) return undefined;
69
- try {
70
- const startedAt = execFileSync("ps", ["-o", "lstart=", "-p", String(pid)], { encoding: "utf8" }).trim();
71
- return startedAt || undefined;
72
- } catch {
73
- return undefined;
74
- }
75
- }
76
-
77
- function isProcessAlive(pid: number): boolean {
78
- try {
79
- process.kill(pid, 0);
80
- return true;
81
- } catch (error) {
82
- if (error && typeof error === "object" && "code" in error && error.code === "ESRCH") return false;
83
- return true;
84
- }
85
- }
86
-
87
73
  function parseTimestamp(value: string | undefined): number | undefined {
88
74
  if (!value) return undefined;
89
75
  const parsed = Date.parse(value);
@@ -157,43 +143,20 @@ export function refreshOracleStatus(ctx: ExtensionContext): void {
157
143
  return;
158
144
  }
159
145
  const counts = getJobCounts(ctx);
160
- if (counts.active > 0 && counts.queued > 0) {
161
- ctx.ui.setStatus("oracle", ctx.ui.theme.fg("success", `oracle: running (${counts.active}), queued (${counts.queued})`));
162
- return;
163
- }
164
- if (counts.active > 0) {
165
- const suffix = counts.active > 1 ? ` (${counts.active})` : "";
166
- ctx.ui.setStatus("oracle", ctx.ui.theme.fg("success", `oracle: running${suffix}`));
167
- return;
168
- }
169
- if (counts.queued > 0) {
170
- const suffix = counts.queued > 1 ? ` (${counts.queued})` : "";
171
- ctx.ui.setStatus("oracle", ctx.ui.theme.fg("accent", `oracle: queued${suffix}`));
172
- return;
173
- }
174
-
175
- ctx.ui.setStatus("oracle", ctx.ui.theme.fg("accent", "oracle: ready"));
176
- }
177
-
178
- function buildNotificationContent(job: OraclePollerJob): string {
179
- const responsePath = job.responsePath || `${getJobDir(job.id)}/response.md`;
180
- const artifactsPath = `${getJobDir(job.id)}/artifacts`;
181
- return [
182
- `Oracle job ${job.id} is ${job.status}.`,
183
- `Use oracle_read with jobId ${job.id} to open the response and settle wake-up retries.`,
184
- `Response file: ${responsePath}`,
185
- `Artifacts: ${artifactsPath}`,
186
- job.error ? `Error: ${job.error}` : "After oracle_read, continue from the oracle output.",
187
- ].join("\n");
146
+ const statusText = buildOracleStatusText(counts);
147
+ const tone = counts.active > 0 ? "success" : "accent";
148
+ ctx.ui.setStatus("oracle", ctx.ui.theme.fg(tone, statusText));
188
149
  }
189
150
 
190
-
191
151
  function requestWakeupTurn(pi: ExtensionAPI, job: OraclePollerJob): void {
192
152
  pi.sendMessage(
193
153
  {
194
154
  customType: ORACLE_WAKEUP_REMINDER_CUSTOM_TYPE,
195
155
  display: false,
196
- content: buildNotificationContent(job),
156
+ content: buildOracleWakeupNotificationContent(job, {
157
+ responsePath: job.responsePath || `${getJobDir(job.id)}/response.md`,
158
+ artifactsPath: `${getJobDir(job.id)}/artifacts`,
159
+ }),
197
160
  details: { jobId: job.id, status: job.status },
198
161
  },
199
162
  { triggerTurn: true, deliverAs: "followUp" },
@@ -1,15 +1,21 @@
1
- import { existsSync } from "node:fs";
1
+ // Purpose: Coordinate queued oracle job ordering and promotion into active worker execution.
2
+ // Responsibilities: List queued jobs, compute queue position, and promote queued work under admission control using shared coordination helpers.
3
+ // Scope: Extension-side queue orchestration only; shared promotion primitives live in extensions/oracle/shared and worker-side autonomous promotion stays in run-job.mjs.
4
+ // Usage: Imported by oracle tools/commands when queued jobs may advance after submission or cancellation.
5
+ // Invariants/Assumptions: Queue promotion runs under the global admission lock and only promotes jobs with durable archives and acquired runtime/conversation leases.
6
+ import {
7
+ buildConversationLeaseMetadata,
8
+ buildRuntimeLeaseMetadata,
9
+ compareQueuedOracleJobs,
10
+ hasDurableWorkerHandoff,
11
+ isQueuedOracleJob,
12
+ runQueuedJobPromotionPass,
13
+ } from "../shared/job-coordination-helpers.mjs";
14
+ import { transitionOracleJobPhase } from "../shared/job-lifecycle-helpers.mjs";
2
15
  import { loadOracleConfig } from "./config.js";
3
16
  import { withLock } from "./locks.js";
4
- import { appendCleanupWarnings, createJob, hasDurableWorkerHandoff, isTerminalOracleJob, listOracleJobDirs, readJob, spawnWorker, terminateWorkerPid, updateJob, withJobPhase, type OracleJob } from "./jobs.js";
5
- import {
6
- cleanupRuntimeArtifacts,
7
- releaseRuntimeLease,
8
- tryAcquireConversationLease,
9
- tryAcquireRuntimeLease,
10
- type OracleConversationLeaseMetadata,
11
- type OracleRuntimeLeaseMetadata,
12
- } from "./runtime.js";
17
+ import { appendCleanupWarnings, createJob, isTerminalOracleJob, listOracleJobDirs, readJob, spawnWorker, terminateWorkerPid, updateJob, type OracleJob } from "./jobs.js";
18
+ import { cleanupRuntimeArtifacts, releaseRuntimeLease, tryAcquireConversationLease, tryAcquireRuntimeLease } from "./runtime.js";
13
19
 
14
20
  export interface OracleQueuePosition {
15
21
  position: number;
@@ -24,13 +30,11 @@ export interface PromoteQueuedJobsOptions {
24
30
  }
25
31
 
26
32
  function isQueuedJob(job: OracleJob | undefined): job is OracleJob {
27
- return Boolean(job && job.status === "queued");
33
+ return isQueuedOracleJob(job);
28
34
  }
29
35
 
30
36
  export function compareQueuedJobs(left: OracleJob, right: OracleJob): number {
31
- const leftKey = left.queuedAt ?? left.createdAt;
32
- const rightKey = right.queuedAt ?? right.createdAt;
33
- return leftKey.localeCompare(rightKey) || left.createdAt.localeCompare(right.createdAt) || left.id.localeCompare(right.id);
37
+ return compareQueuedOracleJobs(left, right);
34
38
  }
35
39
 
36
40
  export function listQueuedJobs(): OracleJob[] {
@@ -50,129 +54,88 @@ export function getQueuePosition(jobId: string): OracleQueuePosition | undefined
50
54
  };
51
55
  }
52
56
 
53
- function runtimeLeaseMetadata(job: OracleJob, createdAt: string): OracleRuntimeLeaseMetadata {
54
- return {
55
- jobId: job.id,
56
- runtimeId: job.runtimeId,
57
- runtimeSessionName: job.runtimeSessionName,
58
- runtimeProfileDir: job.runtimeProfileDir,
59
- projectId: job.projectId,
60
- sessionId: job.sessionId,
61
- createdAt,
62
- };
63
- }
64
-
65
- function conversationLeaseMetadata(job: OracleJob, createdAt: string): OracleConversationLeaseMetadata | undefined {
66
- if (!job.conversationId) return undefined;
67
- return {
68
- jobId: job.id,
69
- conversationId: job.conversationId,
70
- projectId: job.projectId,
71
- sessionId: job.sessionId,
72
- createdAt,
73
- };
74
- }
75
-
76
57
  async function failQueuedPromotion(job: OracleJob, message: string, at: string): Promise<void> {
77
- await updateJob(job.id, (current) => ({
78
- ...current,
79
- ...withJobPhase("failed", {
80
- status: "failed",
81
- completedAt: at,
58
+ await updateJob(job.id, (current) => transitionOracleJobPhase(current, "failed", {
59
+ at,
60
+ source: "oracle:queue",
61
+ message: `Queued promotion failed: ${message}`,
62
+ clearNotificationClaim: true,
63
+ patch: {
82
64
  heartbeatAt: at,
83
- notifyClaimedAt: undefined,
84
- notifyClaimedBy: undefined,
85
65
  error: message,
86
- }, at),
66
+ },
87
67
  })).catch(() => undefined);
88
68
  }
89
69
 
90
70
  export async function promoteQueuedJobsWithinAdmissionLock(options: PromoteQueuedJobsOptions): Promise<{ promotedJobIds: string[] }> {
91
71
  const spawnWorkerFn = options.spawnWorkerFn ?? spawnWorker;
92
72
  const loadConfigFn = options.loadConfigFn ?? loadOracleConfig;
93
- const promotedJobIds: string[] = [];
94
-
95
- for (const queuedJob of listQueuedJobs()) {
96
- const now = new Date().toISOString();
97
- let runtimeLeaseAcquired = false;
98
- let conversationLeaseAcquired = false;
99
- let workerSpawned = false;
100
- let spawnedWorker: Awaited<ReturnType<typeof spawnWorker>> | undefined;
101
73
 
102
- try {
103
- const current = readJob(queuedJob.id);
104
- if (!isQueuedJob(current)) continue;
105
- if (!existsSync(current.archivePath)) {
106
- await failQueuedPromotion(current, `Queued oracle archive is missing: ${current.archivePath}`, now);
107
- continue;
108
- }
109
-
110
- const config = current.config ?? loadConfigFn(current.cwd);
111
- const runtimeAttempt = await tryAcquireRuntimeLease(config, runtimeLeaseMetadata(current, now));
112
- if (!runtimeAttempt.acquired) break;
113
- runtimeLeaseAcquired = true;
114
-
115
- const conversationMetadata = conversationLeaseMetadata(current, now);
116
- if (conversationMetadata) {
117
- const conversationAttempt = await tryAcquireConversationLease(conversationMetadata);
118
- if (!conversationAttempt.acquired) {
119
- await releaseRuntimeLease(current.runtimeId).catch(() => undefined);
120
- runtimeLeaseAcquired = false;
121
- continue;
122
- }
123
- conversationLeaseAcquired = true;
124
- }
125
-
126
- await updateJob(current.id, (latest) => {
74
+ return runQueuedJobPromotionPass<OracleJob, Awaited<ReturnType<typeof spawnWorkerFn>>>({
75
+ listQueuedJobs,
76
+ refreshJob: (jobId) => readJob(jobId),
77
+ readLatestJob: (jobId) => readJob(jobId),
78
+ isQueuedJob,
79
+ acquireRuntimeLease: async (job, at) => {
80
+ const config = job.config ?? loadConfigFn(job.cwd);
81
+ const attempt = await tryAcquireRuntimeLease(config, buildRuntimeLeaseMetadata(job, at));
82
+ return attempt.acquired;
83
+ },
84
+ acquireConversationLease: async (job, at) => {
85
+ const metadata = buildConversationLeaseMetadata(job, at);
86
+ if (!metadata) return true;
87
+ const attempt = await tryAcquireConversationLease(metadata);
88
+ return attempt.acquired;
89
+ },
90
+ releaseRuntimeLease: async (job) => {
91
+ await releaseRuntimeLease(job.runtimeId);
92
+ },
93
+ markSubmitted: async (job, at) => {
94
+ const config = job.config ?? loadConfigFn(job.cwd);
95
+ await updateJob(job.id, (latest) => {
127
96
  if (latest.status !== "queued") {
128
97
  throw new Error(`Queued job ${latest.id} changed state during promotion (${latest.status})`);
129
98
  }
130
- return {
99
+ return transitionOracleJobPhase({
131
100
  ...latest,
132
101
  config,
133
- ...withJobPhase("submitted", {
134
- status: "submitted",
135
- submittedAt: latest.submittedAt || now,
136
- }, now),
137
- };
102
+ }, "submitted", {
103
+ at,
104
+ source: "oracle:queue",
105
+ message: "Queued job admitted for worker launch.",
106
+ patch: {
107
+ submittedAt: latest.submittedAt || at,
108
+ },
109
+ });
138
110
  });
139
-
140
- spawnedWorker = await spawnWorkerFn(options.workerPath, current.id);
141
- workerSpawned = true;
142
- const worker = spawnedWorker;
143
- await updateJob(current.id, (latest) => ({
111
+ },
112
+ spawnWorker: async (job) => spawnWorkerFn(options.workerPath, job.id),
113
+ persistWorker: async (job, worker) => {
114
+ await updateJob(job.id, (latest) => ({
144
115
  ...latest,
145
116
  workerPid: worker.pid,
146
117
  workerNonce: worker.nonce,
147
118
  workerStartedAt: worker.startedAt,
148
119
  }));
149
- promotedJobIds.push(current.id);
150
- } catch (error) {
151
- const message = error instanceof Error ? error.message : String(error);
152
- const latest = readJob(queuedJob.id);
153
- if (workerSpawned && latest && hasDurableWorkerHandoff(latest)) {
154
- promotedJobIds.push(queuedJob.id);
155
- continue;
156
- }
157
- if (spawnedWorker) {
158
- await terminateWorkerPid(spawnedWorker.pid, spawnedWorker.startedAt).catch(() => undefined);
159
- }
160
- if (latest && !isTerminalOracleJob(latest)) {
161
- await failQueuedPromotion(latest, message, now);
162
- }
120
+ },
121
+ hasDurableWorkerHandoff,
122
+ isTerminalJob: isTerminalOracleJob,
123
+ failQueuedPromotion,
124
+ terminateSpawnedWorker: async (worker) => {
125
+ await terminateWorkerPid(worker.pid, worker.startedAt);
126
+ },
127
+ cleanupAfterFailure: async ({ job, at, spawnedWorker, runtimeLeaseAcquired, conversationLeaseAcquired }) => {
163
128
  const cleanupReport = await cleanupRuntimeArtifacts({
164
- runtimeId: runtimeLeaseAcquired ? queuedJob.runtimeId : undefined,
165
- runtimeProfileDir: runtimeLeaseAcquired ? queuedJob.runtimeProfileDir : undefined,
166
- runtimeSessionName: workerSpawned ? queuedJob.runtimeSessionName : undefined,
167
- conversationId: conversationLeaseAcquired ? queuedJob.conversationId : undefined,
129
+ runtimeId: runtimeLeaseAcquired ? job.runtimeId : undefined,
130
+ runtimeProfileDir: runtimeLeaseAcquired ? job.runtimeProfileDir : undefined,
131
+ runtimeSessionName: spawnedWorker ? job.runtimeSessionName : undefined,
132
+ conversationId: conversationLeaseAcquired ? job.conversationId : undefined,
168
133
  }).catch(() => ({ attempted: [], warnings: [] }));
169
134
  if (cleanupReport.warnings.length > 0) {
170
- await appendCleanupWarnings(queuedJob.id, cleanupReport.warnings, now).catch(() => undefined);
135
+ await appendCleanupWarnings(job.id, cleanupReport.warnings, at).catch(() => undefined);
171
136
  }
172
- }
173
- }
174
-
175
- return { promotedJobIds };
137
+ },
138
+ });
176
139
  }
177
140
 
178
141
  export async function promoteQueuedJobs(options: PromoteQueuedJobsOptions): Promise<{ promotedJobIds: string[] }> {