pi-oracle 0.1.11 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +37 -0
- package/README.md +27 -11
- package/docs/ORACLE_DESIGN.md +583 -0
- package/docs/ORACLE_RECOVERY_DRILL.md +127 -0
- package/extensions/oracle/index.ts +15 -4
- package/extensions/oracle/lib/commands.ts +35 -12
- package/extensions/oracle/lib/config.ts +2 -2
- package/extensions/oracle/lib/jobs.ts +438 -72
- package/extensions/oracle/lib/locks.ts +99 -13
- package/extensions/oracle/lib/poller.ts +223 -38
- package/extensions/oracle/lib/queue.ts +193 -0
- package/extensions/oracle/lib/runtime.ts +69 -15
- package/extensions/oracle/lib/tools.ts +514 -123
- package/extensions/oracle/worker/artifact-heuristics.d.mts +29 -0
- package/extensions/oracle/worker/auth-bootstrap.mjs +2 -72
- package/extensions/oracle/worker/auth-cookie-policy.d.mts +31 -0
- package/extensions/oracle/worker/run-job.mjs +333 -71
- package/extensions/oracle/worker/state-locks.d.mts +45 -0
- package/extensions/oracle/worker/state-locks.mjs +235 -0
- package/package.json +13 -4
- package/prompts/oracle.md +9 -3
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
export interface SnapshotEntry {
|
|
2
|
+
line: string;
|
|
3
|
+
lineIndex: number;
|
|
4
|
+
ref: string;
|
|
5
|
+
kind?: string;
|
|
6
|
+
label?: string;
|
|
7
|
+
value?: string;
|
|
8
|
+
disabled: boolean;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export interface StructuralArtifactCandidateInput {
|
|
12
|
+
label?: string;
|
|
13
|
+
paragraphText?: string;
|
|
14
|
+
listItemText?: string;
|
|
15
|
+
paragraphFileButtonCount?: number;
|
|
16
|
+
paragraphOtherTextLength?: number;
|
|
17
|
+
listItemFileButtonCount?: number;
|
|
18
|
+
focusableFileButtonCount?: number;
|
|
19
|
+
focusableOtherTextLength?: number;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface StructuralArtifactCandidate {
|
|
23
|
+
label: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function parseSnapshotEntries(snapshot: string): SnapshotEntry[];
|
|
27
|
+
export function filterStructuralArtifactCandidates(
|
|
28
|
+
candidates: StructuralArtifactCandidateInput[],
|
|
29
|
+
): StructuralArtifactCandidate[];
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { withLock } from "./state-locks.mjs";
|
|
2
2
|
import { spawn } from "node:child_process";
|
|
3
3
|
import { existsSync } from "node:fs";
|
|
4
4
|
import { appendFile, chmod, lstat, mkdir, readdir, readFile, rename, rm, stat, writeFile } from "node:fs/promises";
|
|
@@ -35,7 +35,6 @@ const SCREENSHOT_PATH = "/tmp/oracle-auth.png";
|
|
|
35
35
|
const REAL_CHROME_USER_DATA_DIR = resolve(homedir(), "Library", "Application Support", "Google", "Chrome");
|
|
36
36
|
const DEFAULT_ORACLE_STATE_DIR = "/tmp/pi-oracle-state";
|
|
37
37
|
const ORACLE_STATE_DIR = process.env.PI_ORACLE_STATE_DIR?.trim() || DEFAULT_ORACLE_STATE_DIR;
|
|
38
|
-
const LOCKS_DIR = join(ORACLE_STATE_DIR, "locks");
|
|
39
38
|
const STALE_STAGING_PROFILE_MAX_AGE_MS = 24 * 60 * 60 * 1000;
|
|
40
39
|
const AGENT_BROWSER_BIN = [process.env.AGENT_BROWSER_PATH, "/opt/homebrew/bin/agent-browser", "/usr/local/bin/agent-browser"].find(
|
|
41
40
|
(candidate) => typeof candidate === "string" && candidate && existsSync(candidate),
|
|
@@ -51,75 +50,6 @@ function sleep(ms) {
|
|
|
51
50
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
52
51
|
}
|
|
53
52
|
|
|
54
|
-
function leaseKey(kind, key) {
|
|
55
|
-
return `${kind}-${createHash("sha256").update(key).digest("hex").slice(0, 24)}`;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
async function readLockProcessPid(path) {
|
|
59
|
-
const metadataPath = join(path, "metadata.json");
|
|
60
|
-
if (!existsSync(metadataPath)) return undefined;
|
|
61
|
-
try {
|
|
62
|
-
const metadata = JSON.parse(await readFile(metadataPath, "utf8"));
|
|
63
|
-
return typeof metadata?.processPid === "number" && Number.isInteger(metadata.processPid) && metadata.processPid > 0
|
|
64
|
-
? metadata.processPid
|
|
65
|
-
: undefined;
|
|
66
|
-
} catch {
|
|
67
|
-
return undefined;
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
function isProcessAlive(pid) {
|
|
72
|
-
try {
|
|
73
|
-
process.kill(pid, 0);
|
|
74
|
-
return true;
|
|
75
|
-
} catch (error) {
|
|
76
|
-
if (error && typeof error === "object" && "code" in error && error.code === "ESRCH") return false;
|
|
77
|
-
return true;
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
async function maybeReclaimStaleLock(path) {
|
|
82
|
-
const processPid = await readLockProcessPid(path);
|
|
83
|
-
if (!processPid || isProcessAlive(processPid)) return false;
|
|
84
|
-
await rm(path, { recursive: true, force: true }).catch(() => undefined);
|
|
85
|
-
return true;
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
async function acquireLock(kind, key, metadata, timeoutMs = 30_000) {
|
|
89
|
-
const path = join(LOCKS_DIR, leaseKey(kind, key));
|
|
90
|
-
const deadline = Date.now() + timeoutMs;
|
|
91
|
-
await mkdir(ORACLE_STATE_DIR, { recursive: true, mode: 0o700 });
|
|
92
|
-
await mkdir(LOCKS_DIR, { recursive: true, mode: 0o700 });
|
|
93
|
-
|
|
94
|
-
while (Date.now() < deadline) {
|
|
95
|
-
try {
|
|
96
|
-
await mkdir(path, { recursive: false, mode: 0o700 });
|
|
97
|
-
await writeFile(join(path, "metadata.json"), `${JSON.stringify(metadata, null, 2)}\n`, { encoding: "utf8", mode: 0o600 });
|
|
98
|
-
return path;
|
|
99
|
-
} catch (error) {
|
|
100
|
-
if (!(error && typeof error === "object" && "code" in error && error.code === "EEXIST")) throw error;
|
|
101
|
-
if (await maybeReclaimStaleLock(path)) continue;
|
|
102
|
-
}
|
|
103
|
-
await sleep(200);
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
throw new Error(`Timed out waiting for oracle ${kind} lock: ${key}`);
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
async function releaseLock(path) {
|
|
110
|
-
if (!path) return;
|
|
111
|
-
await rm(path, { recursive: true, force: true }).catch(() => undefined);
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
async function withLock(kind, key, metadata, fn, timeoutMs) {
|
|
115
|
-
const handle = await acquireLock(kind, key, metadata, timeoutMs);
|
|
116
|
-
try {
|
|
117
|
-
return await fn();
|
|
118
|
-
} finally {
|
|
119
|
-
await releaseLock(handle);
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
|
|
123
53
|
async function initLog() {
|
|
124
54
|
await writeFile(LOG_PATH, "", { mode: 0o600 });
|
|
125
55
|
await chmod(LOG_PATH, 0o600).catch(() => undefined);
|
|
@@ -850,7 +780,7 @@ async function waitForImportedAuthReady() {
|
|
|
850
780
|
|
|
851
781
|
async function run() {
|
|
852
782
|
await initLog();
|
|
853
|
-
await withLock("auth", "global", { processPid: process.pid, action: "oracle-auth" }, async () => {
|
|
783
|
+
await withLock(ORACLE_STATE_DIR, "auth", "global", { processPid: process.pid, action: "oracle-auth" }, async () => {
|
|
854
784
|
let shouldPreserveBrowser = false;
|
|
855
785
|
let committedProfile = false;
|
|
856
786
|
let profilePlan;
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
export interface ImportedAuthCookie {
|
|
2
|
+
name: string;
|
|
3
|
+
value?: string;
|
|
4
|
+
domain?: string;
|
|
5
|
+
path?: string;
|
|
6
|
+
expires?: number;
|
|
7
|
+
httpOnly?: boolean;
|
|
8
|
+
secure?: boolean;
|
|
9
|
+
sameSite?: "Lax" | "Strict" | "None";
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface NormalizedAuthCookie {
|
|
13
|
+
name: string;
|
|
14
|
+
value: string;
|
|
15
|
+
domain: string;
|
|
16
|
+
path: string;
|
|
17
|
+
expires?: number;
|
|
18
|
+
httpOnly: boolean;
|
|
19
|
+
secure: boolean;
|
|
20
|
+
sameSite?: "Lax" | "Strict" | "None";
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export function filterImportableAuthCookies(
|
|
24
|
+
cookies: ImportedAuthCookie[],
|
|
25
|
+
chatUrl: string,
|
|
26
|
+
): { cookies: NormalizedAuthCookie[]; dropped: Array<{ cookie: NormalizedAuthCookie; reason: string }> };
|
|
27
|
+
|
|
28
|
+
export function ensureAccountCookie(
|
|
29
|
+
cookies: NormalizedAuthCookie[],
|
|
30
|
+
chatUrl: string,
|
|
31
|
+
): { cookies: NormalizedAuthCookie[]; synthesized: boolean; value?: string };
|
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
import { createHash } from "node:crypto";
|
|
2
|
-
import { existsSync } from "node:fs";
|
|
1
|
+
import { createHash, randomUUID } from "node:crypto";
|
|
2
|
+
import { existsSync, readdirSync, readFileSync } from "node:fs";
|
|
3
3
|
import { appendFile, chmod, mkdir, readFile, rename, rm, stat, writeFile } from "node:fs/promises";
|
|
4
4
|
import { basename, dirname, join } from "node:path";
|
|
5
|
-
import {
|
|
5
|
+
import { fileURLToPath } from "node:url";
|
|
6
|
+
import { spawn, execFileSync } from "node:child_process";
|
|
6
7
|
import { FILE_LABEL_PATTERN_SOURCE, filterStructuralArtifactCandidates, GENERIC_ARTIFACT_LABELS, parseSnapshotEntries } from "./artifact-heuristics.mjs";
|
|
8
|
+
import { createLease, listLeaseMetadata, readLeaseMetadata, releaseLease, withLock } from "./state-locks.mjs";
|
|
7
9
|
|
|
8
10
|
const jobId = process.argv[2];
|
|
9
11
|
if (!jobId) {
|
|
@@ -29,10 +31,9 @@ const MODEL_FAMILY_PREFIX = {
|
|
|
29
31
|
pro: "Pro ",
|
|
30
32
|
};
|
|
31
33
|
|
|
34
|
+
const WORKER_SCRIPT_PATH = fileURLToPath(import.meta.url);
|
|
32
35
|
const DEFAULT_ORACLE_STATE_DIR = "/tmp/pi-oracle-state";
|
|
33
36
|
const ORACLE_STATE_DIR = process.env.PI_ORACLE_STATE_DIR?.trim() || DEFAULT_ORACLE_STATE_DIR;
|
|
34
|
-
const LOCKS_DIR = join(ORACLE_STATE_DIR, "locks");
|
|
35
|
-
const LEASES_DIR = join(ORACLE_STATE_DIR, "leases");
|
|
36
37
|
const SEED_GENERATION_FILE = ".oracle-seed-generation";
|
|
37
38
|
const ARTIFACT_CANDIDATE_STABILITY_TIMEOUT_MS = 15_000;
|
|
38
39
|
const ARTIFACT_CANDIDATE_STABILITY_POLL_MS = 1_500;
|
|
@@ -44,6 +45,7 @@ const AGENT_BROWSER_CLOSE_TIMEOUT_MS = 10_000;
|
|
|
44
45
|
const MODEL_CONFIGURATION_SETTLE_TIMEOUT_MS = 20_000;
|
|
45
46
|
const MODEL_CONFIGURATION_SETTLE_POLL_MS = 250;
|
|
46
47
|
const MODEL_CONFIGURATION_CLOSE_RETRY_MS = 1_000;
|
|
48
|
+
const POST_SEND_SETTLE_MS = 15_000;
|
|
47
49
|
const AGENT_BROWSER_BIN = [process.env.AGENT_BROWSER_PATH, "/opt/homebrew/bin/agent-browser", "/usr/local/bin/agent-browser"].find(
|
|
48
50
|
(candidate) => typeof candidate === "string" && candidate && existsSync(candidate),
|
|
49
51
|
) || "agent-browser";
|
|
@@ -60,23 +62,6 @@ async function ensurePrivateDir(path) {
|
|
|
60
62
|
await chmod(path, 0o700).catch(() => undefined);
|
|
61
63
|
}
|
|
62
64
|
|
|
63
|
-
function leaseKey(kind, key) {
|
|
64
|
-
return `${kind}-${createHash("sha256").update(key).digest("hex").slice(0, 24)}`;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
async function readLockProcessPid(path) {
|
|
68
|
-
const metadataPath = join(path, "metadata.json");
|
|
69
|
-
if (!existsSync(metadataPath)) return undefined;
|
|
70
|
-
try {
|
|
71
|
-
const metadata = JSON.parse(await readFile(metadataPath, "utf8"));
|
|
72
|
-
return typeof metadata?.processPid === "number" && Number.isInteger(metadata.processPid) && metadata.processPid > 0
|
|
73
|
-
? metadata.processPid
|
|
74
|
-
: undefined;
|
|
75
|
-
} catch {
|
|
76
|
-
return undefined;
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
|
|
80
65
|
function isProcessAlive(pid) {
|
|
81
66
|
try {
|
|
82
67
|
process.kill(pid, 0);
|
|
@@ -87,53 +72,69 @@ function isProcessAlive(pid) {
|
|
|
87
72
|
}
|
|
88
73
|
}
|
|
89
74
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
75
|
+
function readProcessStartedAt(pid) {
|
|
76
|
+
if (!pid || pid <= 0) return undefined;
|
|
77
|
+
try {
|
|
78
|
+
const startedAt = execFileSync("ps", ["-o", "lstart=", "-p", String(pid)], { encoding: "utf8" }).trim();
|
|
79
|
+
return startedAt || undefined;
|
|
80
|
+
} catch {
|
|
81
|
+
return undefined;
|
|
82
|
+
}
|
|
95
83
|
}
|
|
96
84
|
|
|
97
|
-
async function
|
|
98
|
-
const path = join(LOCKS_DIR, leaseKey(kind, key));
|
|
85
|
+
async function waitForProcessStartedAt(pid, timeoutMs = 2_000) {
|
|
99
86
|
const deadline = Date.now() + timeoutMs;
|
|
100
|
-
await ensurePrivateDir(ORACLE_STATE_DIR);
|
|
101
|
-
await ensurePrivateDir(LOCKS_DIR);
|
|
102
|
-
|
|
103
87
|
while (Date.now() < deadline) {
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
return path;
|
|
108
|
-
} catch (error) {
|
|
109
|
-
if (!(error && typeof error === "object" && "code" in error && error.code === "EEXIST")) throw error;
|
|
110
|
-
if (await maybeReclaimStaleLock(path)) continue;
|
|
111
|
-
}
|
|
112
|
-
await sleep(200);
|
|
88
|
+
const startedAt = readProcessStartedAt(pid);
|
|
89
|
+
if (startedAt) return startedAt;
|
|
90
|
+
await sleep(100);
|
|
113
91
|
}
|
|
114
|
-
|
|
115
|
-
throw new Error(`Timed out waiting for oracle ${kind} lock: ${key}`);
|
|
92
|
+
return readProcessStartedAt(pid);
|
|
116
93
|
}
|
|
117
94
|
|
|
118
|
-
async function
|
|
119
|
-
if (!
|
|
120
|
-
|
|
121
|
-
|
|
95
|
+
async function terminateWorkerPid(pid, startedAt, options = {}) {
|
|
96
|
+
if (!pid || pid <= 0) return true;
|
|
97
|
+
const currentStartedAt = readProcessStartedAt(pid);
|
|
98
|
+
if (!currentStartedAt) return true;
|
|
99
|
+
if (startedAt && currentStartedAt !== startedAt) return false;
|
|
100
|
+
|
|
101
|
+
const termGraceMs = options.termGraceMs ?? 5_000;
|
|
102
|
+
const killGraceMs = options.killGraceMs ?? 2_000;
|
|
122
103
|
|
|
123
|
-
async function withLock(kind, key, metadata, fn, timeoutMs) {
|
|
124
|
-
const handle = await acquireLock(kind, key, metadata, timeoutMs);
|
|
125
104
|
try {
|
|
126
|
-
|
|
127
|
-
}
|
|
128
|
-
|
|
105
|
+
process.kill(pid, "SIGTERM");
|
|
106
|
+
} catch {
|
|
107
|
+
return !isProcessAlive(pid);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const termDeadline = Date.now() + termGraceMs;
|
|
111
|
+
while (Date.now() < termDeadline) {
|
|
112
|
+
const liveStartedAt = readProcessStartedAt(pid);
|
|
113
|
+
if (!liveStartedAt) return true;
|
|
114
|
+
if (startedAt && liveStartedAt !== startedAt) return true;
|
|
115
|
+
await sleep(250);
|
|
129
116
|
}
|
|
130
|
-
}
|
|
131
117
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
118
|
+
try {
|
|
119
|
+
process.kill(pid, "SIGKILL");
|
|
120
|
+
} catch {
|
|
121
|
+
return !isProcessAlive(pid);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const killDeadline = Date.now() + killGraceMs;
|
|
125
|
+
while (Date.now() < killDeadline) {
|
|
126
|
+
const liveStartedAt = readProcessStartedAt(pid);
|
|
127
|
+
if (!liveStartedAt) return true;
|
|
128
|
+
if (startedAt && liveStartedAt !== startedAt) return true;
|
|
129
|
+
await sleep(250);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const finalStartedAt = readProcessStartedAt(pid);
|
|
133
|
+
if (!finalStartedAt) return true;
|
|
134
|
+
return startedAt ? finalStartedAt !== startedAt : false;
|
|
135
135
|
}
|
|
136
136
|
|
|
137
|
+
|
|
137
138
|
async function secureWriteText(path, content) {
|
|
138
139
|
const tmpPath = `${path}.${process.pid}.${Date.now()}.tmp`;
|
|
139
140
|
await writeFile(tmpPath, content, { encoding: "utf8", mode: 0o600 });
|
|
@@ -155,18 +156,79 @@ async function readJob() {
|
|
|
155
156
|
return readJobUnlocked();
|
|
156
157
|
}
|
|
157
158
|
|
|
159
|
+
function getAnyJobDir(targetJobId) {
|
|
160
|
+
return join(ORACLE_JOBS_DIR, `oracle-${targetJobId}`);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function getAnyJobPath(targetJobId) {
|
|
164
|
+
return join(getAnyJobDir(targetJobId), "job.json");
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
function readAnyJob(targetJobId) {
|
|
168
|
+
const path = getAnyJobPath(targetJobId);
|
|
169
|
+
if (!existsSync(path)) return undefined;
|
|
170
|
+
try {
|
|
171
|
+
return JSON.parse(readFileSync(path, "utf8"));
|
|
172
|
+
} catch {
|
|
173
|
+
return undefined;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function listQueuedJobs() {
|
|
178
|
+
if (!existsSync(ORACLE_JOBS_DIR)) return [];
|
|
179
|
+
return readdirSync(ORACLE_JOBS_DIR)
|
|
180
|
+
.filter((name) => name.startsWith("oracle-"))
|
|
181
|
+
.map((name) => readAnyJob(name.slice("oracle-".length)))
|
|
182
|
+
.filter((job) => job?.status === "queued")
|
|
183
|
+
.sort((left, right) => {
|
|
184
|
+
const leftKey = left?.queuedAt || left?.createdAt || "";
|
|
185
|
+
const rightKey = right?.queuedAt || right?.createdAt || "";
|
|
186
|
+
return leftKey.localeCompare(rightKey) || String(left?.createdAt || "").localeCompare(String(right?.createdAt || "")) || String(left?.id || "").localeCompare(String(right?.id || ""));
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
function isActiveJobStatus(status) {
|
|
191
|
+
return ["preparing", "submitted", "waiting"].includes(String(status || ""));
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
function jobBlocksAdmission(job) {
|
|
195
|
+
return isActiveJobStatus(job?.status) || job?.cleanupPending === true || (Array.isArray(job?.cleanupWarnings) && job.cleanupWarnings.length > 0);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
function hasDurableWorkerHandoff(job) {
|
|
199
|
+
if (!job || job.status === "queued") return false;
|
|
200
|
+
if (job.workerPid) return true;
|
|
201
|
+
return false;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
async function mutateAnyJob(targetJobId, mutator) {
|
|
205
|
+
return withLock(ORACLE_STATE_DIR, "job", targetJobId, { processPid: process.pid, action: "mutateJob", targetJobId }, async () => {
|
|
206
|
+
const path = getAnyJobPath(targetJobId);
|
|
207
|
+
const current = JSON.parse(await readFile(path, "utf8"));
|
|
208
|
+
const next = mutator(current);
|
|
209
|
+
await secureWriteText(path, `${JSON.stringify(next, null, 2)}\n`);
|
|
210
|
+
return next;
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
async function writeAnyJob(targetJobId, job) {
|
|
215
|
+
await withLock(ORACLE_STATE_DIR, "job", targetJobId, { processPid: process.pid, action: "writeJob", targetJobId }, async () => {
|
|
216
|
+
await secureWriteText(getAnyJobPath(targetJobId), `${JSON.stringify(job, null, 2)}\n`);
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
|
|
158
220
|
async function writeJobUnlocked(job) {
|
|
159
221
|
await secureWriteText(jobPath, `${JSON.stringify(job, null, 2)}\n`);
|
|
160
222
|
}
|
|
161
223
|
|
|
162
224
|
async function writeJob(job) {
|
|
163
|
-
await withLock("job", jobId, { processPid: process.pid, action: "writeJob" }, async () => {
|
|
225
|
+
await withLock(ORACLE_STATE_DIR, "job", jobId, { processPid: process.pid, action: "writeJob" }, async () => {
|
|
164
226
|
await writeJobUnlocked(job);
|
|
165
227
|
});
|
|
166
228
|
}
|
|
167
229
|
|
|
168
230
|
async function mutateJob(mutator) {
|
|
169
|
-
return withLock("job", jobId, { processPid: process.pid, action: "mutateJob" }, async () => {
|
|
231
|
+
return withLock(ORACLE_STATE_DIR, "job", jobId, { processPid: process.pid, action: "mutateJob" }, async () => {
|
|
170
232
|
const job = await readJobUnlocked();
|
|
171
233
|
const next = mutator(job);
|
|
172
234
|
await writeJobUnlocked(next);
|
|
@@ -270,7 +332,7 @@ async function cloneSeedProfileToRuntime(job) {
|
|
|
270
332
|
const seedGenerationPath = join(seedDir, SEED_GENERATION_FILE);
|
|
271
333
|
const seedGeneration = existsSync(seedGenerationPath) ? (await readFile(seedGenerationPath, "utf8")).trim() || undefined : undefined;
|
|
272
334
|
|
|
273
|
-
await withLock("auth", "global", { jobId: job.id, processPid: process.pid, action: "cloneSeedProfile" }, async () => {
|
|
335
|
+
await withLock(ORACLE_STATE_DIR, "auth", "global", { jobId: job.id, processPid: process.pid, action: "cloneSeedProfile" }, async () => {
|
|
274
336
|
await rm(job.runtimeProfileDir, { recursive: true, force: true }).catch(() => undefined);
|
|
275
337
|
await ensurePrivateDir(dirname(job.runtimeProfileDir));
|
|
276
338
|
const cloneArgs = job.config.browser.cloneStrategy === "apfs-clone" ? ["-cR", seedDir, job.runtimeProfileDir] : ["-R", seedDir, job.runtimeProfileDir];
|
|
@@ -281,7 +343,7 @@ async function cloneSeedProfileToRuntime(job) {
|
|
|
281
343
|
}
|
|
282
344
|
|
|
283
345
|
async function cleanupRuntime(job) {
|
|
284
|
-
if (!job || cleaningUpRuntime) return;
|
|
346
|
+
if (!job || cleaningUpRuntime) return [];
|
|
285
347
|
cleaningUpRuntime = true;
|
|
286
348
|
const warnings = [];
|
|
287
349
|
try {
|
|
@@ -290,31 +352,202 @@ async function cleanupRuntime(job) {
|
|
|
290
352
|
warnings.push(message);
|
|
291
353
|
await log(message).catch(() => undefined);
|
|
292
354
|
});
|
|
293
|
-
await releaseLease("conversation", job.conversationId).catch(async (error) => {
|
|
294
|
-
const message = `Conversation lease cleanup warning: ${error instanceof Error ? error.message : String(error)}`;
|
|
295
|
-
warnings.push(message);
|
|
296
|
-
await log(message).catch(() => undefined);
|
|
297
|
-
});
|
|
298
|
-
await releaseLease("runtime", job.runtimeId).catch(async (error) => {
|
|
299
|
-
const message = `Runtime lease cleanup warning: ${error instanceof Error ? error.message : String(error)}`;
|
|
300
|
-
warnings.push(message);
|
|
301
|
-
await log(message).catch(() => undefined);
|
|
302
|
-
});
|
|
303
355
|
await rm(job.runtimeProfileDir, { recursive: true, force: true }).catch(async (error) => {
|
|
304
356
|
const message = `Runtime profile cleanup warning: ${error instanceof Error ? error.message : String(error)}`;
|
|
305
357
|
warnings.push(message);
|
|
306
358
|
await log(message).catch(() => undefined);
|
|
307
359
|
});
|
|
360
|
+
if (warnings.length === 0) {
|
|
361
|
+
await releaseLease(ORACLE_STATE_DIR, "conversation", job.conversationId).catch(async (error) => {
|
|
362
|
+
const message = `Conversation lease cleanup warning: ${error instanceof Error ? error.message : String(error)}`;
|
|
363
|
+
warnings.push(message);
|
|
364
|
+
await log(message).catch(() => undefined);
|
|
365
|
+
});
|
|
366
|
+
await releaseLease(ORACLE_STATE_DIR, "runtime", job.runtimeId).catch(async (error) => {
|
|
367
|
+
const message = `Runtime lease cleanup warning: ${error instanceof Error ? error.message : String(error)}`;
|
|
368
|
+
warnings.push(message);
|
|
369
|
+
await log(message).catch(() => undefined);
|
|
370
|
+
});
|
|
371
|
+
}
|
|
308
372
|
if (warnings.length === 0) {
|
|
309
373
|
await log(`Cleanup summary: runtime ${job.runtimeId} released with no warnings`).catch(() => undefined);
|
|
310
374
|
} else {
|
|
311
375
|
await log(`Cleanup summary: runtime ${job.runtimeId} released with ${warnings.length} warning(s)`).catch(() => undefined);
|
|
312
376
|
}
|
|
377
|
+
return warnings;
|
|
313
378
|
} finally {
|
|
314
379
|
cleaningUpRuntime = false;
|
|
315
380
|
}
|
|
316
381
|
}
|
|
317
382
|
|
|
383
|
+
async function tryAcquireRuntimeLeaseForJob(job, createdAt) {
|
|
384
|
+
const existing = listLeaseMetadata(ORACLE_STATE_DIR, "runtime");
|
|
385
|
+
const liveLeases = [];
|
|
386
|
+
for (const lease of existing) {
|
|
387
|
+
const owner = lease?.jobId ? readAnyJob(lease.jobId) : undefined;
|
|
388
|
+
if (!jobBlocksAdmission(owner)) {
|
|
389
|
+
await releaseLease(ORACLE_STATE_DIR, "runtime", lease?.runtimeId).catch(() => undefined);
|
|
390
|
+
continue;
|
|
391
|
+
}
|
|
392
|
+
liveLeases.push(lease);
|
|
393
|
+
}
|
|
394
|
+
if (liveLeases.length >= job.config.browser.maxConcurrentJobs) {
|
|
395
|
+
return false;
|
|
396
|
+
}
|
|
397
|
+
await createLease(ORACLE_STATE_DIR, "runtime", job.runtimeId, {
|
|
398
|
+
jobId: job.id,
|
|
399
|
+
runtimeId: job.runtimeId,
|
|
400
|
+
runtimeSessionName: job.runtimeSessionName,
|
|
401
|
+
runtimeProfileDir: job.runtimeProfileDir,
|
|
402
|
+
projectId: job.projectId,
|
|
403
|
+
sessionId: job.sessionId,
|
|
404
|
+
createdAt,
|
|
405
|
+
});
|
|
406
|
+
return true;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
async function tryAcquireConversationLeaseForJob(job, createdAt) {
|
|
410
|
+
if (!job.conversationId) return true;
|
|
411
|
+
const existing = await readLeaseMetadata(ORACLE_STATE_DIR, "conversation", job.conversationId);
|
|
412
|
+
if (existing?.jobId === job.id) return true;
|
|
413
|
+
if (existing && existing.jobId !== job.id) {
|
|
414
|
+
if (!jobBlocksAdmission(readAnyJob(existing.jobId))) {
|
|
415
|
+
await releaseLease(ORACLE_STATE_DIR, "conversation", job.conversationId).catch(() => undefined);
|
|
416
|
+
} else {
|
|
417
|
+
return false;
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
await createLease(ORACLE_STATE_DIR, "conversation", job.conversationId, {
|
|
421
|
+
jobId: job.id,
|
|
422
|
+
conversationId: job.conversationId,
|
|
423
|
+
projectId: job.projectId,
|
|
424
|
+
sessionId: job.sessionId,
|
|
425
|
+
createdAt,
|
|
426
|
+
});
|
|
427
|
+
return true;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
async function spawnDetachedWorker(targetJobId) {
|
|
431
|
+
const child = spawn(process.execPath, [WORKER_SCRIPT_PATH, targetJobId], {
|
|
432
|
+
detached: true,
|
|
433
|
+
stdio: "ignore",
|
|
434
|
+
});
|
|
435
|
+
child.unref();
|
|
436
|
+
return {
|
|
437
|
+
pid: child.pid,
|
|
438
|
+
workerNonce: randomUUID(),
|
|
439
|
+
workerStartedAt: await waitForProcessStartedAt(child.pid),
|
|
440
|
+
};
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
async function failQueuedPromotion(targetJobId, message, at = new Date().toISOString()) {
|
|
444
|
+
await mutateAnyJob(targetJobId, (latest) => {
|
|
445
|
+
if (["complete", "failed", "cancelled"].includes(String(latest.status || ""))) return latest;
|
|
446
|
+
return {
|
|
447
|
+
...latest,
|
|
448
|
+
...phasePatch("failed", {
|
|
449
|
+
status: "failed",
|
|
450
|
+
completedAt: at,
|
|
451
|
+
heartbeatAt: at,
|
|
452
|
+
error: message,
|
|
453
|
+
}, at),
|
|
454
|
+
};
|
|
455
|
+
}).catch(() => undefined);
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
async function promoteQueuedJobsAfterCleanup() {
|
|
459
|
+
await withLock(ORACLE_STATE_DIR, "admission", "global", { processPid: process.pid, source: "worker_cleanup_promoter", jobId }, async () => {
|
|
460
|
+
for (const queuedJob of listQueuedJobs()) {
|
|
461
|
+
const current = readAnyJob(queuedJob.id);
|
|
462
|
+
if (!current || current.status !== "queued") continue;
|
|
463
|
+
|
|
464
|
+
let spawnedWorker;
|
|
465
|
+
const promotedAt = new Date().toISOString();
|
|
466
|
+
if (!existsSync(current.archivePath)) {
|
|
467
|
+
await failQueuedPromotion(current.id, `Queued oracle archive is missing: ${current.archivePath}`, promotedAt);
|
|
468
|
+
continue;
|
|
469
|
+
}
|
|
470
|
+
const runtimeLeaseAcquired = await tryAcquireRuntimeLeaseForJob(current, promotedAt);
|
|
471
|
+
if (!runtimeLeaseAcquired) break;
|
|
472
|
+
|
|
473
|
+
const conversationLeaseAcquired = await tryAcquireConversationLeaseForJob(current, promotedAt);
|
|
474
|
+
if (!conversationLeaseAcquired) {
|
|
475
|
+
await releaseLease(ORACLE_STATE_DIR, "runtime", current.runtimeId).catch(() => undefined);
|
|
476
|
+
continue;
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
try {
|
|
480
|
+
await mutateAnyJob(current.id, (latest) => {
|
|
481
|
+
if (latest.status !== "queued") throw new Error(`Queued job ${latest.id} changed state during cleanup promotion (${latest.status})`);
|
|
482
|
+
return {
|
|
483
|
+
...latest,
|
|
484
|
+
...phasePatch("submitted", {
|
|
485
|
+
status: "submitted",
|
|
486
|
+
submittedAt: latest.submittedAt || promotedAt,
|
|
487
|
+
}, promotedAt),
|
|
488
|
+
};
|
|
489
|
+
});
|
|
490
|
+
|
|
491
|
+
spawnedWorker = await spawnDetachedWorker(current.id);
|
|
492
|
+
await mutateAnyJob(current.id, (latest) => {
|
|
493
|
+
if (hasDurableWorkerHandoff(latest)) {
|
|
494
|
+
return {
|
|
495
|
+
...latest,
|
|
496
|
+
workerPid: latest.workerPid || spawnedWorker.pid,
|
|
497
|
+
workerNonce: latest.workerNonce || spawnedWorker.workerNonce,
|
|
498
|
+
workerStartedAt: latest.workerStartedAt || spawnedWorker.workerStartedAt,
|
|
499
|
+
};
|
|
500
|
+
}
|
|
501
|
+
return {
|
|
502
|
+
...latest,
|
|
503
|
+
workerPid: spawnedWorker.pid,
|
|
504
|
+
workerNonce: spawnedWorker.workerNonce,
|
|
505
|
+
workerStartedAt: spawnedWorker.workerStartedAt,
|
|
506
|
+
};
|
|
507
|
+
});
|
|
508
|
+
} catch (error) {
|
|
509
|
+
const latest = readAnyJob(current.id);
|
|
510
|
+
if (hasDurableWorkerHandoff(latest)) {
|
|
511
|
+
await log(`Queued promotion handoff already durable for ${current.id}; leaving active job intact`).catch(() => undefined);
|
|
512
|
+
continue;
|
|
513
|
+
}
|
|
514
|
+
if (spawnedWorker) {
|
|
515
|
+
await terminateWorkerPid(spawnedWorker.pid, spawnedWorker.workerStartedAt).catch(() => undefined);
|
|
516
|
+
}
|
|
517
|
+
const failedAt = new Date().toISOString();
|
|
518
|
+
if (latest && !["complete", "failed", "cancelled"].includes(String(latest.status || ""))) {
|
|
519
|
+
await failQueuedPromotion(current.id, error instanceof Error ? error.message : String(error), failedAt);
|
|
520
|
+
}
|
|
521
|
+
if (spawnedWorker) {
|
|
522
|
+
let cleanupWarnings = [];
|
|
523
|
+
try {
|
|
524
|
+
cleanupWarnings = await cleanupRuntime(current);
|
|
525
|
+
} catch (cleanupError) {
|
|
526
|
+
const message = `Cleanup-driven promotion teardown warning for ${current.id}: ${cleanupError instanceof Error ? cleanupError.message : String(cleanupError)}`;
|
|
527
|
+
cleanupWarnings = [message];
|
|
528
|
+
await log(message).catch(() => undefined);
|
|
529
|
+
}
|
|
530
|
+
if (cleanupWarnings.length > 0) {
|
|
531
|
+
await mutateAnyJob(current.id, (job) => ({
|
|
532
|
+
...job,
|
|
533
|
+
cleanupWarnings: [...(job.cleanupWarnings || []), ...cleanupWarnings],
|
|
534
|
+
lastCleanupAt: failedAt,
|
|
535
|
+
error: [job.error, ...cleanupWarnings].filter(Boolean).join("\n"),
|
|
536
|
+
})).catch(() => undefined);
|
|
537
|
+
await log(`Stopping queued cleanup promotion after ${current.id} because teardown left ${cleanupWarnings.length} warning(s)`).catch(() => undefined);
|
|
538
|
+
break;
|
|
539
|
+
}
|
|
540
|
+
} else {
|
|
541
|
+
await releaseLease(ORACLE_STATE_DIR, "conversation", current.conversationId).catch(() => undefined);
|
|
542
|
+
await releaseLease(ORACLE_STATE_DIR, "runtime", current.runtimeId).catch(() => undefined);
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
}).catch(async (error) => {
|
|
547
|
+
await log(`Queued cleanup promotion warning: ${error instanceof Error ? error.message : String(error)}`).catch(() => undefined);
|
|
548
|
+
});
|
|
549
|
+
}
|
|
550
|
+
|
|
318
551
|
function browserBaseArgs(job, options = {}) {
|
|
319
552
|
const args = ["--session", job.runtimeSessionName];
|
|
320
553
|
if (options.withLaunchOptions) {
|
|
@@ -1510,6 +1743,8 @@ async function run() {
|
|
|
1510
1743
|
const baselineAssistantCount = (await assistantMessages(currentJob)).length;
|
|
1511
1744
|
await log(`Assistant response count before send: ${baselineAssistantCount}`);
|
|
1512
1745
|
await clickSend(currentJob);
|
|
1746
|
+
await log(`Waiting ${POST_SEND_SETTLE_MS}ms after send to avoid streaming interruption`);
|
|
1747
|
+
await sleep(POST_SEND_SETTLE_MS);
|
|
1513
1748
|
|
|
1514
1749
|
const chatUrl = await waitForStableChatUrl(currentJob, currentJob.chatUrl);
|
|
1515
1750
|
const conversationId = parseConversationId(chatUrl) || currentJob.conversationId;
|
|
@@ -1532,6 +1767,7 @@ async function run() {
|
|
|
1532
1767
|
responsePath: currentJob.responsePath,
|
|
1533
1768
|
responseFormat: "text/plain",
|
|
1534
1769
|
artifactFailureCount,
|
|
1770
|
+
cleanupPending: true,
|
|
1535
1771
|
}),
|
|
1536
1772
|
{ force: true },
|
|
1537
1773
|
);
|
|
@@ -1548,13 +1784,39 @@ async function run() {
|
|
|
1548
1784
|
status: "failed",
|
|
1549
1785
|
completedAt: new Date().toISOString(),
|
|
1550
1786
|
error: message,
|
|
1787
|
+
cleanupPending: true,
|
|
1551
1788
|
}),
|
|
1552
1789
|
{ force: true },
|
|
1553
1790
|
);
|
|
1554
1791
|
process.exitCode = 1;
|
|
1555
1792
|
}
|
|
1556
1793
|
} finally {
|
|
1557
|
-
|
|
1794
|
+
let cleanupWarnings = [];
|
|
1795
|
+
try {
|
|
1796
|
+
cleanupWarnings = await cleanupRuntime(currentJob);
|
|
1797
|
+
} catch (error) {
|
|
1798
|
+
cleanupWarnings = [`Runtime cleanup failed before queued promotion: ${error instanceof Error ? error.message : String(error)}`];
|
|
1799
|
+
await log(cleanupWarnings[0]).catch(() => undefined);
|
|
1800
|
+
}
|
|
1801
|
+
if (currentJob?.id) {
|
|
1802
|
+
const cleanupAt = new Date().toISOString();
|
|
1803
|
+
await mutateJob((job) => ({
|
|
1804
|
+
...job,
|
|
1805
|
+
cleanupPending: false,
|
|
1806
|
+
...(cleanupWarnings.length > 0
|
|
1807
|
+
? {
|
|
1808
|
+
cleanupWarnings: [...(job.cleanupWarnings || []), ...cleanupWarnings],
|
|
1809
|
+
lastCleanupAt: cleanupAt,
|
|
1810
|
+
error: [job.error, ...cleanupWarnings].filter(Boolean).join("\n"),
|
|
1811
|
+
}
|
|
1812
|
+
: { lastCleanupAt: cleanupAt }),
|
|
1813
|
+
})).catch(() => undefined);
|
|
1814
|
+
}
|
|
1815
|
+
if (cleanupWarnings.length === 0) {
|
|
1816
|
+
await promoteQueuedJobsAfterCleanup().catch(() => undefined);
|
|
1817
|
+
} else {
|
|
1818
|
+
await log(`Skipping queued promotion because runtime cleanup left ${cleanupWarnings.length} warning(s)`).catch(() => undefined);
|
|
1819
|
+
}
|
|
1558
1820
|
}
|
|
1559
1821
|
}
|
|
1560
1822
|
|