pi-oracle 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/CHANGELOG.md +33 -0
  2. package/README.md +7 -0
  3. package/docs/ORACLE_DESIGN.md +1 -1
  4. package/docs/ORACLE_ISOLATED_PI_VALIDATION.md +249 -0
  5. package/docs/ORACLE_RECOVERY_DRILL.md +5 -4
  6. package/extensions/oracle/index.ts +8 -1
  7. package/extensions/oracle/lib/commands.ts +11 -24
  8. package/extensions/oracle/lib/config.ts +5 -0
  9. package/extensions/oracle/lib/jobs.ts +117 -217
  10. package/extensions/oracle/lib/locks.ts +41 -209
  11. package/extensions/oracle/lib/poller.ts +14 -51
  12. package/extensions/oracle/lib/queue.ts +75 -112
  13. package/extensions/oracle/lib/runtime.ts +60 -14
  14. package/extensions/oracle/lib/tools.ts +70 -67
  15. package/extensions/oracle/shared/job-coordination-helpers.d.mts +84 -0
  16. package/extensions/oracle/shared/job-coordination-helpers.mjs +168 -0
  17. package/extensions/oracle/shared/job-lifecycle-helpers.d.mts +130 -0
  18. package/extensions/oracle/shared/job-lifecycle-helpers.mjs +377 -0
  19. package/extensions/oracle/shared/job-observability-helpers.d.mts +59 -0
  20. package/extensions/oracle/shared/job-observability-helpers.mjs +143 -0
  21. package/extensions/oracle/shared/process-helpers.d.mts +20 -0
  22. package/extensions/oracle/shared/process-helpers.mjs +128 -0
  23. package/extensions/oracle/shared/state-coordination-helpers.d.mts +43 -0
  24. package/extensions/oracle/shared/state-coordination-helpers.mjs +381 -0
  25. package/extensions/oracle/worker/artifact-heuristics.mjs +5 -0
  26. package/extensions/oracle/worker/auth-bootstrap.mjs +100 -139
  27. package/extensions/oracle/worker/auth-cookie-policy.mjs +5 -0
  28. package/extensions/oracle/worker/auth-flow-helpers.d.mts +41 -0
  29. package/extensions/oracle/worker/auth-flow-helpers.mjs +165 -0
  30. package/extensions/oracle/worker/chatgpt-flow-helpers.d.mts +13 -0
  31. package/extensions/oracle/worker/chatgpt-flow-helpers.mjs +85 -0
  32. package/extensions/oracle/worker/chatgpt-ui-helpers.d.mts +33 -0
  33. package/extensions/oracle/worker/chatgpt-ui-helpers.mjs +292 -0
  34. package/extensions/oracle/worker/run-job.mjs +235 -380
  35. package/extensions/oracle/worker/state-locks.mjs +31 -216
  36. package/package.json +14 -5
  37. package/prompts/oracle.md +1 -1
@@ -1,8 +1,15 @@
1
- import { randomUUID, createHash } from "node:crypto";
1
+ // Purpose: Manage oracle browser runtime allocation, lease admission, seed/runtime profile handling, and runtime cleanup for the extension side.
2
+ // Responsibilities: Allocate runtimes, enforce persisted-session requirements, acquire/release runtime and conversation leases, and clean up runtime artifacts safely.
3
+ // Scope: Extension-side runtime coordination only; shared concurrency/process primitives live in extensions/oracle/shared.
4
+ // Usage: Imported by jobs, tools, and queue logic to provision or tear down isolated oracle browser runtimes.
5
+ // Invariants/Assumptions: Lease metadata is the admission source of truth, tracked worker identity checks defend against PID reuse, and runtime cleanup always attempts lease release.
6
+ import { randomUUID } from "node:crypto";
2
7
  import { spawn } from "node:child_process";
3
8
  import { existsSync, realpathSync, readFileSync } from "node:fs";
4
9
  import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
5
- import { basename, dirname, join } from "node:path";
10
+ import { dirname, join } from "node:path";
11
+ import { jobBlocksAdmission } from "../shared/job-coordination-helpers.mjs";
12
+ import { isTrackedProcessAlive } from "../shared/process-helpers.mjs";
6
13
  import type { OracleConfig } from "./config.js";
7
14
  import { createLease, listLeaseMetadata, readLeaseMetadata, releaseLease, withAuthLock } from "./locks.js";
8
15
 
@@ -12,6 +19,8 @@ const ORACLE_JOBS_DIR = process.env.PI_ORACLE_JOBS_DIR?.trim() || DEFAULT_ORACLE
12
19
  const AGENT_BROWSER_BIN = [process.env.AGENT_BROWSER_PATH, "/opt/homebrew/bin/agent-browser", "/usr/local/bin/agent-browser"].find(
13
20
  (candidate) => typeof candidate === "string" && candidate && existsSync(candidate),
14
21
  ) || "agent-browser";
22
+ const PROFILE_CLONE_TIMEOUT_MS = 120_000;
23
+ const ORACLE_SUBPROCESS_KILL_GRACE_MS = 2_000;
15
24
 
16
25
  export interface OracleRuntimeLeaseMetadata {
17
26
  jobId: string;
@@ -110,8 +119,18 @@ function activeJobExists(jobId: string): boolean {
110
119
  const path = join(ORACLE_JOBS_DIR, `oracle-${jobId}`, "job.json");
111
120
  if (!existsSync(path)) return false;
112
121
  try {
113
- const job = JSON.parse(readFileSync(path, "utf8")) as { status?: string; cleanupWarnings?: unknown; cleanupPending?: unknown };
114
- return ["preparing", "submitted", "waiting"].includes(job.status || "") || job.cleanupPending === true || (Array.isArray(job.cleanupWarnings) && job.cleanupWarnings.length > 0);
122
+ const job = JSON.parse(readFileSync(path, "utf8")) as {
123
+ status?: string;
124
+ cleanupPending?: unknown;
125
+ workerPid?: unknown;
126
+ workerStartedAt?: unknown;
127
+ };
128
+ return jobBlocksAdmission({
129
+ status: typeof job.status === "string" ? job.status : undefined,
130
+ cleanupPending: job.cleanupPending === true,
131
+ workerPid: typeof job.workerPid === "number" ? job.workerPid : undefined,
132
+ workerStartedAt: typeof job.workerStartedAt === "string" ? job.workerStartedAt : undefined,
133
+ }, isTrackedProcessAlive);
115
134
  } catch {
116
135
  return false;
117
136
  }
@@ -198,22 +217,55 @@ function profileCloneArgs(config: OracleConfig, sourceDir: string, destinationDi
198
217
  return ["-R", sourceDir, destinationDir];
199
218
  }
200
219
 
201
- async function spawnCp(args: string[]): Promise<void> {
220
+ async function spawnCp(args: string[], options?: { timeoutMs?: number }): Promise<void> {
202
221
  await new Promise<void>((resolve, reject) => {
203
222
  const child = spawn("cp", args, { stdio: ["ignore", "pipe", "pipe"] });
204
223
  let stderr = "";
224
+ let timedOut = false;
225
+ let killTimer: NodeJS.Timeout | undefined;
226
+ let killGraceTimer: NodeJS.Timeout | undefined;
227
+
228
+ const clearTimers = () => {
229
+ if (killTimer) clearTimeout(killTimer);
230
+ if (killGraceTimer) clearTimeout(killGraceTimer);
231
+ };
232
+
233
+ if ((options?.timeoutMs ?? 0) > 0) {
234
+ killTimer = setTimeout(() => {
235
+ timedOut = true;
236
+ child.kill("SIGTERM");
237
+ killGraceTimer = setTimeout(() => {
238
+ child.kill("SIGKILL");
239
+ }, ORACLE_SUBPROCESS_KILL_GRACE_MS);
240
+ killGraceTimer.unref?.();
241
+ }, options?.timeoutMs);
242
+ killTimer.unref?.();
243
+ }
244
+
205
245
  child.stderr.on("data", (data) => {
206
246
  stderr += String(data);
207
247
  });
208
- child.on("error", reject);
248
+ child.on("error", (error) => {
249
+ clearTimers();
250
+ reject(error);
251
+ });
209
252
  child.on("close", (code) => {
253
+ clearTimers();
254
+ if (timedOut) {
255
+ reject(new Error(stderr || `cp timed out after ${options?.timeoutMs}ms`));
256
+ return;
257
+ }
210
258
  if (code === 0) resolve();
211
259
  else reject(new Error(stderr || `cp exited with code ${code}`));
212
260
  });
213
261
  });
214
262
  }
215
263
 
216
- export async function cloneSeedProfileToRuntime(config: OracleConfig, runtimeProfileDir: string): Promise<string | undefined> {
264
+ export async function cloneSeedProfileToRuntime(
265
+ config: OracleConfig,
266
+ runtimeProfileDir: string,
267
+ options?: { cpTimeoutMs?: number },
268
+ ): Promise<string | undefined> {
217
269
  const seedDir = config.browser.authSeedProfileDir;
218
270
  if (!existsSync(seedDir)) {
219
271
  throw new Error(`Oracle auth seed profile not found: ${seedDir}. Run /oracle-auth first.`);
@@ -222,7 +274,7 @@ export async function cloneSeedProfileToRuntime(config: OracleConfig, runtimePro
222
274
  await withAuthLock({ runtimeProfileDir, seedDir }, async () => {
223
275
  await rm(runtimeProfileDir, { recursive: true, force: true }).catch(() => undefined);
224
276
  await mkdir(dirname(runtimeProfileDir), { recursive: true, mode: 0o700 }).catch(() => undefined);
225
- await spawnCp(profileCloneArgs(config, seedDir, runtimeProfileDir));
277
+ await spawnCp(profileCloneArgs(config, seedDir, runtimeProfileDir), { timeoutMs: options?.cpTimeoutMs ?? PROFILE_CLONE_TIMEOUT_MS });
226
278
  });
227
279
 
228
280
  return getSeedGeneration(config);
@@ -286,9 +338,6 @@ export async function cleanupRuntimeArtifacts(runtime: {
286
338
  report.warnings.push(`Failed to remove runtime profile ${runtime.runtimeProfileDir}: ${error.message}`);
287
339
  });
288
340
  }
289
- if (report.warnings.length > 0) {
290
- return report;
291
- }
292
341
  if (runtime.conversationId) {
293
342
  report.attempted.push("conversationLease");
294
343
  }
@@ -305,6 +354,3 @@ export async function cleanupRuntimeArtifacts(runtime: {
305
354
  return report;
306
355
  }
307
356
 
308
- export function stableProjectLabel(projectId: string): string {
309
- return basename(projectId) || createHash("sha256").update(projectId).digest("hex").slice(0, 8);
310
- }
@@ -9,6 +9,8 @@ import { tmpdir } from "node:os";
9
9
  import { basename, join, posix } from "node:path";
10
10
  import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
11
11
  import { Type } from "@sinclair/typebox";
12
+ import { formatOracleJobSummary, formatOracleSubmitResponse } from "../shared/job-observability-helpers.mjs";
13
+ import { transitionOracleJobPhase } from "../shared/job-lifecycle-helpers.mjs";
12
14
  import { isLockTimeoutError, withGlobalReconcileLock, withLock } from "./locks.js";
13
15
  import {
14
16
  coerceOracleSubmitPresetId,
@@ -36,7 +38,6 @@ import {
36
38
  spawnWorker,
37
39
  terminateWorkerPid,
38
40
  updateJob,
39
- withJobPhase,
40
41
  type OracleJob,
41
42
  } from "./jobs.js";
42
43
  import { getQueuePosition, promoteQueuedJobs, promoteQueuedJobsWithinAdmissionLock } from "./queue.js";
@@ -78,11 +79,16 @@ const ORACLE_CANCEL_PARAMS = Type.Object({
78
79
  const MAX_ARCHIVE_BYTES = 250 * 1024 * 1024;
79
80
  const MAX_QUEUED_JOBS_PER_ACTIVE_RUNTIME = 1;
80
81
  const MAX_QUEUED_ARCHIVE_BYTES_PER_ACTIVE_RUNTIME = MAX_ARCHIVE_BYTES;
82
+ const ARCHIVE_COMMAND_TIMEOUT_MS = 120_000;
83
+ const ARCHIVE_COMMAND_KILL_GRACE_MS = 2_000;
81
84
 
82
85
  const DEFAULT_ARCHIVE_EXCLUDED_DIR_NAMES_ANYWHERE = new Set([
83
86
  ".git",
84
87
  ".hg",
85
88
  ".svn",
89
+ ".pi",
90
+ ".oracle-context",
91
+ ".cursor",
86
92
  "node_modules",
87
93
  "target",
88
94
  ".venv",
@@ -107,8 +113,10 @@ const DEFAULT_ARCHIVE_EXCLUDED_DIR_NAMES_ANYWHERE = new Set([
107
113
  ".pnpm-store",
108
114
  ".serverless",
109
115
  ".aws-sam",
116
+ "secrets",
117
+ ".secrets",
110
118
  ]);
111
- const DEFAULT_ARCHIVE_EXCLUDED_DIR_NAMES_AT_REPO_ROOT = new Set(["coverage", "htmlcov", "tmp", "temp", ".tmp", "dist", "build", "out", "secrets", ".secrets"]);
119
+ const DEFAULT_ARCHIVE_EXCLUDED_DIR_NAMES_AT_REPO_ROOT = new Set(["coverage", "htmlcov", "tmp", "temp", ".tmp", "dist", "build", "out"]);
112
120
  const DEFAULT_ARCHIVE_EXCLUDED_FILES = new Set([
113
121
  ".coverage",
114
122
  ".DS_Store",
@@ -116,6 +124,7 @@ const DEFAULT_ARCHIVE_EXCLUDED_FILES = new Set([
116
124
  ".netrc",
117
125
  ".npmrc",
118
126
  ".pypirc",
127
+ ".scratchpad.md",
119
128
  "Thumbs.db",
120
129
  "id_dsa",
121
130
  "id_ecdsa",
@@ -319,7 +328,13 @@ function formatArchiveOversizeError(args: {
319
328
  .join("\n");
320
329
  }
321
330
 
322
- async function writeArchiveFile(cwd: string, entries: string[], archivePath: string, listPath: string): Promise<number> {
331
+ async function writeArchiveFile(
332
+ cwd: string,
333
+ entries: string[],
334
+ archivePath: string,
335
+ listPath: string,
336
+ options?: { commandTimeoutMs?: number },
337
+ ): Promise<number> {
323
338
  await writeFile(listPath, Buffer.from(`${entries.join("\0")}\0`), { mode: 0o600 });
324
339
  await rm(archivePath, { force: true }).catch(() => undefined);
325
340
 
@@ -335,24 +350,57 @@ async function writeArchiveFile(cwd: string, entries: string[], archivePath: str
335
350
 
336
351
  let stderr = "";
337
352
  let settled = false;
353
+ let timedOut = false;
354
+ let timeout: NodeJS.Timeout | undefined;
355
+ let killGraceTimer: NodeJS.Timeout | undefined;
338
356
  let tarCode: number | null | undefined;
339
357
  let zstdCode: number | null | undefined;
340
358
 
359
+ const clearTimers = () => {
360
+ if (timeout) clearTimeout(timeout);
361
+ if (killGraceTimer) clearTimeout(killGraceTimer);
362
+ };
363
+
364
+ const terminateChildren = () => {
365
+ tar.kill("SIGTERM");
366
+ zstd.kill("SIGTERM");
367
+ killGraceTimer = setTimeout(() => {
368
+ tar.kill("SIGKILL");
369
+ zstd.kill("SIGKILL");
370
+ }, ARCHIVE_COMMAND_KILL_GRACE_MS);
371
+ killGraceTimer.unref?.();
372
+ };
373
+
341
374
  const finish = (error?: Error) => {
342
375
  if (settled) return;
343
376
  if (error) {
344
377
  settled = true;
345
- tar.kill("SIGTERM");
346
- zstd.kill("SIGTERM");
378
+ clearTimers();
379
+ terminateChildren();
347
380
  rejectPromise(error);
348
381
  return;
349
382
  }
350
383
  if (tarCode === undefined || zstdCode === undefined) return;
351
384
  settled = true;
385
+ clearTimers();
386
+ if (timedOut) {
387
+ rejectPromise(new Error(stderr || `Oracle archive subprocess timed out after ${options?.commandTimeoutMs ?? ARCHIVE_COMMAND_TIMEOUT_MS}ms`));
388
+ return;
389
+ }
352
390
  if (tarCode === 0 && zstdCode === 0) resolvePromise();
353
391
  else rejectPromise(new Error(stderr || `archive command failed (tar=${tarCode}, zstd=${zstdCode})`));
354
392
  };
355
393
 
394
+ const commandTimeoutMs = options?.commandTimeoutMs ?? ARCHIVE_COMMAND_TIMEOUT_MS;
395
+ if (commandTimeoutMs > 0) {
396
+ timeout = setTimeout(() => {
397
+ timedOut = true;
398
+ stderr = `${stderr}${stderr ? "\n" : ""}Oracle archive subprocess timed out after ${commandTimeoutMs}ms`;
399
+ terminateChildren();
400
+ }, commandTimeoutMs);
401
+ timeout.unref?.();
402
+ }
403
+
356
404
  tar.stderr.on("data", (data) => {
357
405
  stderr += String(data);
358
406
  });
@@ -379,7 +427,7 @@ export async function createArchiveForTesting(
379
427
  cwd: string,
380
428
  files: string[],
381
429
  archivePath: string,
382
- options?: { maxBytes?: number; adaptivePruneMinBytes?: number },
430
+ options?: { maxBytes?: number; adaptivePruneMinBytes?: number; commandTimeoutMs?: number },
383
431
  ): Promise<ArchiveCreationResult> {
384
432
  const archiveInputs = resolveArchiveInputs(cwd, files);
385
433
  const wholeRepoSelection = isWholeRepoArchiveSelection(archiveInputs);
@@ -401,7 +449,7 @@ export async function createArchiveForTesting(
401
449
  throw new Error("Oracle archive inputs are empty after default exclusions and automatic size pruning");
402
450
  }
403
451
 
404
- const archiveBytes = await writeArchiveFile(cwd, expandedEntries, archivePath, listPath);
452
+ const archiveBytes = await writeArchiveFile(cwd, expandedEntries, archivePath, listPath, { commandTimeoutMs: options?.commandTimeoutMs });
405
453
  if (archiveBytes < maxBytes) {
406
454
  return {
407
455
  sha256: await sha256File(archivePath),
@@ -541,38 +589,10 @@ function redactJobDetails(job: NonNullable<ReturnType<typeof readJob>>) {
541
589
  cleanupWarnings: job.cleanupWarnings,
542
590
  lastCleanupAt: job.lastCleanupAt,
543
591
  error: job.error,
592
+ lifecycleEvents: job.lifecycleEvents,
544
593
  };
545
594
  }
546
595
 
547
- function formatAutoPrunedArchiveMessage(autoPrunedPrefixes: ArchiveCreationResult["autoPrunedPrefixes"]): string | undefined {
548
- if (autoPrunedPrefixes.length === 0) return undefined;
549
- return `Archive auto-pruned generic generated-output-name dirs to fit size limit: ${autoPrunedPrefixes.map((entry) => `${entry.relativePath}/ (${formatBytes(entry.bytes)})`).join(", ")}`;
550
- }
551
-
552
- function formatSubmitResponse(
553
- job: NonNullable<ReturnType<typeof readJob>>,
554
- options: {
555
- autoPrunedPrefixes: ArchiveCreationResult["autoPrunedPrefixes"];
556
- queued: boolean;
557
- queuePosition?: number;
558
- queueDepth?: number;
559
- },
560
- ): string {
561
- return [
562
- `${options.queued ? "Oracle job queued" : "Oracle job dispatched"}: ${job.id}`,
563
- options.queued && options.queuePosition && options.queueDepth ? `Queue position: ${options.queuePosition} of ${options.queueDepth}` : undefined,
564
- job.followUpToJobId ? `Follow-up to: ${job.followUpToJobId}` : undefined,
565
- `Prompt: ${job.promptPath}`,
566
- `Archive: ${job.archivePath}`,
567
- formatAutoPrunedArchiveMessage(options.autoPrunedPrefixes),
568
- `Response will be written to: ${job.responsePath}`,
569
- options.queued ? "The job will start automatically when capacity is available." : undefined,
570
- "Stop now and wait for the oracle completion wake-up.",
571
- ]
572
- .filter(Boolean)
573
- .join("\n");
574
- }
575
-
576
596
  export function registerOracleTools(pi: ExtensionAPI, workerPath: string): void {
577
597
  pi.registerTool({
578
598
  name: "oracle_submit",
@@ -583,7 +603,7 @@ export function registerOracleTools(pi: ExtensionAPI, workerPath: string): void
583
603
  promptSnippet: "Dispatch a background ChatGPT web oracle job after gathering repo context.",
584
604
  promptGuidelines: [
585
605
  "Gather context before calling oracle_submit.",
586
- "By default, archive the whole repo by passing '.'; default archive exclusions apply automatically, including common bulky outputs and obvious credentials/private data like .env files, key material, credential dotfiles, local database files, and root secrets directories.",
606
+ "By default, archive the whole repo by passing '.'; default archive exclusions apply automatically, including common bulky outputs and obvious credentials/private data like .env files, key material, credential dotfiles, local database files, and nested secrets directories anywhere in the repo.",
587
607
  "Only narrow file selection when the user explicitly asks, the task is clearly scoped smaller, or privacy/sensitivity requires it.",
588
608
  "For very targeted asks like a single function or stack trace, a smaller archive is preferable.",
589
609
  "When files='.' and the post-exclusion archive is still too large, submit automatically prunes the largest nested directories matching generic generated-output names like build/, dist/, out/, coverage/, and tmp/ outside obvious source roots like src/ and lib/ until the archive fits or no candidate remains; successful submissions report what was pruned.",
@@ -736,7 +756,7 @@ export function registerOracleTools(pi: ExtensionAPI, workerPath: string): void
736
756
  content: [
737
757
  {
738
758
  type: "text",
739
- text: formatSubmitResponse(job, {
759
+ text: formatOracleSubmitResponse(job, {
740
760
  autoPrunedPrefixes: currentArchive.autoPrunedPrefixes,
741
761
  queued,
742
762
  queuePosition: queuePosition?.position,
@@ -767,7 +787,7 @@ export function registerOracleTools(pi: ExtensionAPI, workerPath: string): void
767
787
  content: [
768
788
  {
769
789
  type: "text",
770
- text: formatSubmitResponse(latest, {
790
+ text: formatOracleSubmitResponse(latest, {
771
791
  autoPrunedPrefixes: archive?.autoPrunedPrefixes ?? [],
772
792
  queued: true,
773
793
  queuePosition: queuePosition?.position,
@@ -795,7 +815,7 @@ export function registerOracleTools(pi: ExtensionAPI, workerPath: string): void
795
815
  content: [
796
816
  {
797
817
  type: "text",
798
- text: formatSubmitResponse(latest, {
818
+ text: formatOracleSubmitResponse(latest, {
799
819
  autoPrunedPrefixes: archive?.autoPrunedPrefixes ?? [],
800
820
  queued: false,
801
821
  }),
@@ -818,13 +838,13 @@ export function registerOracleTools(pi: ExtensionAPI, workerPath: string): void
818
838
  }
819
839
  if (job && (!latest || !isTerminalOracleJob(latest))) {
820
840
  const failedAt = new Date().toISOString();
821
- await updateJob(job.id, (current) => ({
822
- ...current,
823
- ...withJobPhase("failed", {
824
- status: "failed",
825
- completedAt: failedAt,
841
+ await updateJob(job.id, (current) => transitionOracleJobPhase(current, "failed", {
842
+ at: failedAt,
843
+ source: "oracle:submit",
844
+ message: `Submission failed before durable worker handoff: ${message}`,
845
+ patch: {
826
846
  error: message,
827
- }, failedAt),
847
+ },
828
848
  })).catch(() => undefined);
829
849
  }
830
850
  const cleanupReport = await cleanupRuntimeArtifacts({
@@ -875,28 +895,11 @@ export function registerOracleTools(pi: ExtensionAPI, workerPath: string): void
875
895
  content: [
876
896
  {
877
897
  type: "text",
878
- text: [
879
- `job: ${current.id}`,
880
- `status: ${current.status}`,
881
- current.queuedAt ? `queued: ${current.queuedAt}` : undefined,
882
- current.submittedAt ? `submitted: ${current.submittedAt}` : undefined,
883
- ...(current.status === "queued"
884
- ? (() => {
885
- const queuePosition = getQueuePosition(current.id);
886
- return queuePosition ? [`queue-position: ${queuePosition.position} of ${queuePosition.depth}`] : [];
887
- })()
888
- : []),
889
- current.followUpToJobId ? `follow-up-to: ${current.followUpToJobId}` : undefined,
890
- current.chatUrl ? `chat: ${current.chatUrl}` : undefined,
891
- current.responsePath ? `response: ${current.responsePath}` : undefined,
892
- current.responseFormat ? `response-format: ${current.responseFormat}` : undefined,
893
- `artifacts: ${getJobDir(current.id)}/artifacts`,
894
- current.error ? `error: ${current.error}` : undefined,
895
- "",
898
+ text: formatOracleJobSummary(current, {
899
+ queuePosition: current.status === "queued" ? getQueuePosition(current.id) : undefined,
900
+ artifactsPath: `${getJobDir(current.id)}/artifacts`,
896
901
  responsePreview,
897
- ]
898
- .filter(Boolean)
899
- .join("\n"),
902
+ }),
900
903
  },
901
904
  ],
902
905
  details: { job: redactJobDetails(current) },
@@ -0,0 +1,84 @@
1
+ export interface OracleDurableWorkerHandoffJobLike {
2
+ status?: string;
3
+ workerPid?: number;
4
+ }
5
+
6
+ export interface OracleAdmissionBlockingJobLike extends OracleDurableWorkerHandoffJobLike {
7
+ cleanupPending?: boolean;
8
+ workerStartedAt?: string;
9
+ }
10
+
11
+ export interface OracleRuntimeLeaseMetadataLike {
12
+ jobId: string;
13
+ runtimeId: string;
14
+ runtimeSessionName: string;
15
+ runtimeProfileDir: string;
16
+ projectId: string;
17
+ sessionId: string;
18
+ createdAt: string;
19
+ }
20
+
21
+ export interface OracleConversationLeaseMetadataLike {
22
+ jobId: string;
23
+ conversationId: string;
24
+ projectId: string;
25
+ sessionId: string;
26
+ createdAt: string;
27
+ }
28
+
29
+ export interface OracleQueuedPromotionFailureContext<TJob, TWorker> {
30
+ job: TJob;
31
+ latest?: TJob;
32
+ error: unknown;
33
+ at: string;
34
+ spawnedWorker?: TWorker;
35
+ runtimeLeaseAcquired: boolean;
36
+ conversationLeaseAcquired: boolean;
37
+ }
38
+
39
+ export type OracleQueuedPromotionFailureOutcome = void | "break";
40
+
41
+ export interface OracleQueuedPromotionOptions<TJob extends { id: string; archivePath: string }, TWorker> {
42
+ listQueuedJobs: () => TJob[];
43
+ refreshJob: (jobId: string) => TJob | undefined;
44
+ readLatestJob: (jobId: string) => TJob | undefined;
45
+ isQueuedJob?: (job: TJob | undefined) => boolean;
46
+ acquireRuntimeLease: (job: TJob, at: string) => Promise<boolean>;
47
+ acquireConversationLease: (job: TJob, at: string) => Promise<boolean>;
48
+ releaseRuntimeLease: (job: TJob) => Promise<void>;
49
+ markSubmitted: (job: TJob, at: string) => Promise<void>;
50
+ spawnWorker: (job: TJob) => Promise<TWorker>;
51
+ persistWorker: (job: TJob, worker: TWorker) => Promise<void>;
52
+ hasDurableWorkerHandoff?: (job: TJob | undefined) => boolean;
53
+ isTerminalJob: (job: TJob) => boolean;
54
+ failQueuedPromotion: (job: TJob, message: string, at: string) => Promise<void>;
55
+ terminateSpawnedWorker: (worker: TWorker) => Promise<void>;
56
+ cleanupAfterFailure: (context: OracleQueuedPromotionFailureContext<TJob, TWorker>) => Promise<OracleQueuedPromotionFailureOutcome>;
57
+ onDurableHandoff?: (job: TJob, latest?: TJob) => Promise<void> | void;
58
+ }
59
+
60
+ export declare function isQueuedOracleJob(job: OracleDurableWorkerHandoffJobLike | undefined): boolean;
61
+ export declare function compareQueuedOracleJobs(
62
+ left: { createdAt: string; queuedAt?: string; id: string },
63
+ right: { createdAt: string; queuedAt?: string; id: string },
64
+ ): number;
65
+ export declare function hasDurableWorkerHandoff(job: OracleDurableWorkerHandoffJobLike | undefined): boolean;
66
+ export declare function hasAdmissionBlockingWorker(
67
+ job: OracleAdmissionBlockingJobLike | undefined,
68
+ isTrackedProcessAliveFn?: (pid: number | undefined, startedAt?: string) => boolean,
69
+ ): boolean;
70
+ export declare function jobBlocksAdmission(
71
+ job: OracleAdmissionBlockingJobLike | undefined,
72
+ isTrackedProcessAliveFn?: (pid: number | undefined, startedAt?: string) => boolean,
73
+ ): boolean;
74
+ export declare function buildRuntimeLeaseMetadata(
75
+ job: { id: string; runtimeId: string; runtimeSessionName: string; runtimeProfileDir: string; projectId: string; sessionId: string },
76
+ createdAt: string,
77
+ ): OracleRuntimeLeaseMetadataLike;
78
+ export declare function buildConversationLeaseMetadata(
79
+ job: { id: string; conversationId?: string; projectId: string; sessionId: string },
80
+ createdAt: string,
81
+ ): OracleConversationLeaseMetadataLike | undefined;
82
+ export declare function runQueuedJobPromotionPass<TJob extends { id: string; archivePath: string }, TWorker>(
83
+ options: OracleQueuedPromotionOptions<TJob, TWorker>,
84
+ ): Promise<{ promotedJobIds: string[] }>;
@@ -0,0 +1,168 @@
1
+ // Purpose: Provide shared oracle job coordination helpers for admission control, lease metadata, and queued promotion orchestration.
2
+ // Responsibilities: Normalize queue ordering, derive lease metadata, detect durable handoff/admission blockers, and run a single queued-promotion pass.
3
+ // Scope: Pure coordination/state-machine logic only; filesystem I/O and job persistence remain in injected callbacks.
4
+ // Usage: Imported by lib/queue.ts, lib/runtime.ts, lib/jobs.ts, and worker/run-job.mjs to keep concurrency semantics aligned.
5
+ // Invariants/Assumptions: Queued jobs have durable ids/archive paths, and callers provide side-effect callbacks that preserve atomic job updates.
6
+
7
+ import { existsSync } from "node:fs";
8
+ import { isTrackedProcessAlive } from "./process-helpers.mjs";
9
+
10
+ /** @typedef {import("./job-coordination-helpers.d.mts").OracleAdmissionBlockingJobLike} OracleAdmissionBlockingJobLike */
11
+ /** @typedef {import("./job-coordination-helpers.d.mts").OracleConversationLeaseMetadataLike} OracleConversationLeaseMetadataLike */
12
+ /** @typedef {import("./job-coordination-helpers.d.mts").OracleDurableWorkerHandoffJobLike} OracleDurableWorkerHandoffJobLike */
13
+ /** @typedef {import("./job-coordination-helpers.d.mts").OracleRuntimeLeaseMetadataLike} OracleRuntimeLeaseMetadataLike */
14
+
15
+ /**
16
+ * @param {OracleDurableWorkerHandoffJobLike | undefined} job
17
+ * @returns {boolean}
18
+ */
19
+ export function isQueuedOracleJob(job) {
20
+ return job?.status === "queued";
21
+ }
22
+
23
+ /**
24
+ * @param {{ createdAt: string; queuedAt?: string; id: string }} left
25
+ * @param {{ createdAt: string; queuedAt?: string; id: string }} right
26
+ * @returns {number}
27
+ */
28
+ export function compareQueuedOracleJobs(left, right) {
29
+ const leftKey = left.queuedAt ?? left.createdAt;
30
+ const rightKey = right.queuedAt ?? right.createdAt;
31
+ return leftKey.localeCompare(rightKey) || left.createdAt.localeCompare(right.createdAt) || left.id.localeCompare(right.id);
32
+ }
33
+
34
+ /**
35
+ * @param {OracleDurableWorkerHandoffJobLike | undefined} job
36
+ * @returns {boolean}
37
+ */
38
+ export function hasDurableWorkerHandoff(job) {
39
+ if (!job || job.status === "queued") return false;
40
+ if (job.workerPid) return true;
41
+ return false;
42
+ }
43
+
44
+ /**
45
+ * @param {OracleAdmissionBlockingJobLike | undefined} job
46
+ * @param {(pid: number | undefined, startedAt?: string) => boolean} [isTrackedProcessAliveFn]
47
+ * @returns {boolean}
48
+ */
49
+ export function hasAdmissionBlockingWorker(job, isTrackedProcessAliveFn = isTrackedProcessAlive) {
50
+ if (!job?.workerPid) return false;
51
+ return isTrackedProcessAliveFn(job.workerPid, job.workerStartedAt);
52
+ }
53
+
54
+ /**
55
+ * @param {OracleAdmissionBlockingJobLike | undefined} job
56
+ * @param {(pid: number | undefined, startedAt?: string) => boolean} [isTrackedProcessAliveFn]
57
+ * @returns {boolean}
58
+ */
59
+ export function jobBlocksAdmission(job, isTrackedProcessAliveFn = isTrackedProcessAlive) {
60
+ return ["preparing", "submitted", "waiting"].includes(String(job?.status || "")) ||
61
+ job?.cleanupPending === true ||
62
+ hasAdmissionBlockingWorker(job, isTrackedProcessAliveFn);
63
+ }
64
+
65
+ /**
66
+ * @param {{ id: string; runtimeId: string; runtimeSessionName: string; runtimeProfileDir: string; projectId: string; sessionId: string }} job
67
+ * @param {string} createdAt
68
+ * @returns {OracleRuntimeLeaseMetadataLike}
69
+ */
70
+ export function buildRuntimeLeaseMetadata(job, createdAt) {
71
+ return {
72
+ jobId: job.id,
73
+ runtimeId: job.runtimeId,
74
+ runtimeSessionName: job.runtimeSessionName,
75
+ runtimeProfileDir: job.runtimeProfileDir,
76
+ projectId: job.projectId,
77
+ sessionId: job.sessionId,
78
+ createdAt,
79
+ };
80
+ }
81
+
82
+ /**
83
+ * @param {{ id: string; conversationId?: string; projectId: string; sessionId: string }} job
84
+ * @param {string} createdAt
85
+ * @returns {OracleConversationLeaseMetadataLike | undefined}
86
+ */
87
+ export function buildConversationLeaseMetadata(job, createdAt) {
88
+ if (!job.conversationId) return undefined;
89
+ return {
90
+ jobId: job.id,
91
+ conversationId: job.conversationId,
92
+ projectId: job.projectId,
93
+ sessionId: job.sessionId,
94
+ createdAt,
95
+ };
96
+ }
97
+
98
+ /**
99
+ * @template {{ id: string; archivePath: string }} TJob
100
+ * @template TWorker
101
+ * @param {import("./job-coordination-helpers.d.mts").OracleQueuedPromotionOptions<TJob, TWorker>} options
102
+ * @returns {Promise<{ promotedJobIds: string[] }>}
103
+ */
104
+ export async function runQueuedJobPromotionPass(options) {
105
+ const promotedJobIds = [];
106
+ const isQueuedJob = options.isQueuedJob ?? isQueuedOracleJob;
107
+ const durableHandoff = options.hasDurableWorkerHandoff ?? hasDurableWorkerHandoff;
108
+
109
+ for (const queuedJob of options.listQueuedJobs()) {
110
+ const promotedAt = new Date().toISOString();
111
+ let runtimeLeaseAcquired = false;
112
+ let conversationLeaseAcquired = false;
113
+ /** @type {TWorker | undefined} */
114
+ let spawnedWorker;
115
+
116
+ try {
117
+ const current = options.refreshJob(queuedJob.id);
118
+ if (!isQueuedJob(current)) continue;
119
+ if (!existsSync(current.archivePath)) {
120
+ await options.failQueuedPromotion(current, `Queued oracle archive is missing: ${current.archivePath}`, promotedAt);
121
+ continue;
122
+ }
123
+
124
+ const runtimeAttempt = await options.acquireRuntimeLease(current, promotedAt);
125
+ if (!runtimeAttempt) break;
126
+ runtimeLeaseAcquired = true;
127
+
128
+ const conversationAttempt = await options.acquireConversationLease(current, promotedAt);
129
+ if (!conversationAttempt) {
130
+ await options.releaseRuntimeLease(current).catch(() => undefined);
131
+ runtimeLeaseAcquired = false;
132
+ continue;
133
+ }
134
+ conversationLeaseAcquired = true;
135
+
136
+ await options.markSubmitted(current, promotedAt);
137
+ spawnedWorker = await options.spawnWorker(current);
138
+ await options.persistWorker(current, spawnedWorker);
139
+ promotedJobIds.push(current.id);
140
+ } catch (error) {
141
+ const message = error instanceof Error ? error.message : String(error);
142
+ const latest = options.readLatestJob(queuedJob.id);
143
+ if (spawnedWorker && durableHandoff(latest)) {
144
+ promotedJobIds.push(queuedJob.id);
145
+ await options.onDurableHandoff?.(queuedJob, latest);
146
+ continue;
147
+ }
148
+ if (spawnedWorker) {
149
+ await options.terminateSpawnedWorker(spawnedWorker).catch(() => undefined);
150
+ }
151
+ if (latest && !options.isTerminalJob(latest)) {
152
+ await options.failQueuedPromotion(latest, message, promotedAt);
153
+ }
154
+ const failureOutcome = await options.cleanupAfterFailure({
155
+ job: queuedJob,
156
+ latest,
157
+ error,
158
+ at: promotedAt,
159
+ spawnedWorker,
160
+ runtimeLeaseAcquired,
161
+ conversationLeaseAcquired,
162
+ });
163
+ if (failureOutcome === "break") break;
164
+ }
165
+ }
166
+
167
+ return { promotedJobIds };
168
+ }