doer-agent 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.js +390 -265
- package/package.json +1 -1
package/dist/agent.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { spawn, spawnSync } from "node:child_process";
|
|
2
|
-
import { existsSync, statSync } from "node:fs";
|
|
2
|
+
import { createWriteStream, existsSync, statSync } from "node:fs";
|
|
3
3
|
import { chmod, mkdir, open, readFile, readdir, stat, writeFile } from "node:fs/promises";
|
|
4
4
|
import path from "node:path";
|
|
5
5
|
import { fileURLToPath } from "node:url";
|
|
@@ -13,10 +13,16 @@ const activeTaskCancelRequests = new Map();
|
|
|
13
13
|
let workspaceRootOverride = null;
|
|
14
14
|
const fsRpcCodec = StringCodec();
|
|
15
15
|
const shellRpcCodec = StringCodec();
|
|
16
|
+
const runRpcCodec = StringCodec();
|
|
17
|
+
const activeRuns = new Map();
|
|
18
|
+
const retainedRuns = new Map();
|
|
16
19
|
function sanitizeUserId(userId) {
|
|
17
20
|
const normalized = userId.trim().replace(/[^a-zA-Z0-9_-]/g, "_");
|
|
18
21
|
return normalized.length > 0 ? normalized : "anonymous";
|
|
19
22
|
}
|
|
23
|
+
function buildAgentRunRpcSubject(userId, agentId) {
|
|
24
|
+
return `doer.agent.run.rpc.${sanitizeUserId(userId)}.${agentId.trim()}`;
|
|
25
|
+
}
|
|
20
26
|
function normalizeNatsServers(value) {
|
|
21
27
|
if (!Array.isArray(value)) {
|
|
22
28
|
return [];
|
|
@@ -92,12 +98,6 @@ async function initJetStreamContext(args) {
|
|
|
92
98
|
const nc = await connect(args.token ? { servers: args.servers, token: args.token } : { servers: args.servers });
|
|
93
99
|
const jsm = await nc.jetstreamManager();
|
|
94
100
|
await ensureJetStreamInfra({ jsm, stream, subject, durable });
|
|
95
|
-
await ensureJetStreamInfra({
|
|
96
|
-
jsm,
|
|
97
|
-
stream: args.taskStream,
|
|
98
|
-
subject: args.taskSubject,
|
|
99
|
-
durable: args.taskDurable,
|
|
100
|
-
});
|
|
101
101
|
void nc.closed().then((error) => {
|
|
102
102
|
if (error) {
|
|
103
103
|
writeAgentInfraError(`nats connection closed with error: ${error.message}`);
|
|
@@ -126,14 +126,10 @@ async function initJetStreamContext(args) {
|
|
|
126
126
|
js: nc.jetstream(),
|
|
127
127
|
jsm,
|
|
128
128
|
codec: JSONCodec(),
|
|
129
|
-
taskCodec: JSONCodec(),
|
|
130
129
|
subject,
|
|
131
130
|
stream,
|
|
132
131
|
durable,
|
|
133
132
|
servers: args.servers,
|
|
134
|
-
taskStream: args.taskStream,
|
|
135
|
-
taskSubject: args.taskSubject,
|
|
136
|
-
taskDurable: args.taskDurable,
|
|
137
133
|
};
|
|
138
134
|
}
|
|
139
135
|
function resolveCodexHomePath() {
|
|
@@ -371,6 +367,275 @@ function writeRpcStream(requestId, stream, chunk) {
|
|
|
371
367
|
function writeRpcStatus(requestId, message) {
|
|
372
368
|
process.stdout.write(`[doer-agent][rpc=${requestId}][status] ${message}\n`);
|
|
373
369
|
}
|
|
370
|
+
function writeRunStatus(runId, message) {
|
|
371
|
+
process.stdout.write(`[doer-agent][run=${runId}][status] ${message}\n`);
|
|
372
|
+
}
|
|
373
|
+
function writeRunStream(runId, stream, chunk) {
|
|
374
|
+
const target = stream === "stdout" ? process.stdout : process.stderr;
|
|
375
|
+
const lines = chunk.split(/\r?\n/);
|
|
376
|
+
for (let index = 0; index < lines.length; index += 1) {
|
|
377
|
+
const line = lines[index];
|
|
378
|
+
if (!line && index === lines.length - 1) {
|
|
379
|
+
continue;
|
|
380
|
+
}
|
|
381
|
+
target.write(`[doer-agent][run=${runId}][${stream}] ${line}\n`);
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
function normalizeRunRpcRequest(args) {
|
|
385
|
+
const requestId = typeof args.request.requestId === "string" ? args.request.requestId.trim() : "";
|
|
386
|
+
if (!requestId) {
|
|
387
|
+
throw new Error("missing requestId");
|
|
388
|
+
}
|
|
389
|
+
const requestAgentId = typeof args.request.agentId === "string" ? args.request.agentId.trim() : "";
|
|
390
|
+
if (!requestAgentId || requestAgentId !== args.agentId) {
|
|
391
|
+
throw new Error("agent id mismatch");
|
|
392
|
+
}
|
|
393
|
+
const actionRaw = typeof args.request.action === "string" ? args.request.action.trim() : "";
|
|
394
|
+
const action = actionRaw === "cancel" || actionRaw === "get" || actionRaw === "list" ? actionRaw : "start";
|
|
395
|
+
const responseSubject = typeof args.request.responseSubject === "string" ? args.request.responseSubject.trim() : "";
|
|
396
|
+
if (!responseSubject) {
|
|
397
|
+
throw new Error("missing responseSubject");
|
|
398
|
+
}
|
|
399
|
+
const runId = typeof args.request.runId === "string" && args.request.runId.trim() ? args.request.runId.trim() : null;
|
|
400
|
+
const command = typeof args.request.command === "string" && args.request.command.trim() ? args.request.command.trim() : null;
|
|
401
|
+
if (action === "start" && !command) {
|
|
402
|
+
throw new Error("missing command");
|
|
403
|
+
}
|
|
404
|
+
if ((action === "get" || action === "cancel") && !runId) {
|
|
405
|
+
throw new Error("missing runId");
|
|
406
|
+
}
|
|
407
|
+
const cwd = typeof args.request.cwd === "string" && args.request.cwd.trim() ? args.request.cwd.trim() : null;
|
|
408
|
+
const chatId = typeof args.request.chatId === "string" && args.request.chatId.trim() ? args.request.chatId.trim() : null;
|
|
409
|
+
const sinceSeqRaw = Number(args.request.sinceSeq);
|
|
410
|
+
const sinceSeq = Number.isInteger(sinceSeqRaw) && sinceSeqRaw >= 0 ? sinceSeqRaw : null;
|
|
411
|
+
const limitRaw = Number(args.request.limit);
|
|
412
|
+
const limit = Number.isFinite(limitRaw) ? Math.max(1, Math.min(Math.floor(limitRaw), 200)) : 50;
|
|
413
|
+
return {
|
|
414
|
+
requestId,
|
|
415
|
+
action,
|
|
416
|
+
runId,
|
|
417
|
+
command,
|
|
418
|
+
cwd,
|
|
419
|
+
chatId,
|
|
420
|
+
responseSubject,
|
|
421
|
+
sinceSeq,
|
|
422
|
+
limit,
|
|
423
|
+
runtimeEnvPatch: normalizeEnvPatch(args.request.runtimeEnvPatch),
|
|
424
|
+
codexAuthBundle: normalizeShellRpcCodexAuthBundle(args.request.codexAuth),
|
|
425
|
+
};
|
|
426
|
+
}
|
|
427
|
+
function publishRunRpcResponse(args) {
|
|
428
|
+
args.nc.publish(args.responseSubject, runRpcCodec.encode(JSON.stringify(args.payload)));
|
|
429
|
+
}
|
|
430
|
+
async function resolveRunLogsDir() {
|
|
431
|
+
const workspaceRoot = workspaceRootOverride ?? (process.env.WORKSPACE?.trim() || process.cwd());
|
|
432
|
+
const dir = path.join(workspaceRoot, ".doer-agent", "runs");
|
|
433
|
+
await mkdir(dir, { recursive: true });
|
|
434
|
+
return dir;
|
|
435
|
+
}
|
|
436
|
+
function cloneRunTask(task, sinceSeq) {
|
|
437
|
+
return {
|
|
438
|
+
...task,
|
|
439
|
+
events: task.events
|
|
440
|
+
.filter((event) => typeof sinceSeq === "number" ? event.seq > sinceSeq : true)
|
|
441
|
+
.map((event) => ({ ...event, payload: { ...event.payload } })),
|
|
442
|
+
};
|
|
443
|
+
}
|
|
444
|
+
function appendRunEvent(task, type, payload) {
|
|
445
|
+
const timestamp = formatLocalTimestamp();
|
|
446
|
+
const seq = task.agentEventAckSeq + 1;
|
|
447
|
+
task.agentEventAckSeq = seq;
|
|
448
|
+
task.updatedAt = timestamp;
|
|
449
|
+
task.events.push({ seq, type, timestamp, payload });
|
|
450
|
+
}
|
|
451
|
+
function persistRetainedRun(task) {
|
|
452
|
+
retainedRuns.set(task.id, cloneRunTask(task));
|
|
453
|
+
}
|
|
454
|
+
function getStoredRun(runId) {
|
|
455
|
+
const active = activeRuns.get(runId);
|
|
456
|
+
if (active) {
|
|
457
|
+
return active.task;
|
|
458
|
+
}
|
|
459
|
+
return retainedRuns.get(runId) ?? null;
|
|
460
|
+
}
|
|
461
|
+
async function startManagedRun(args) {
|
|
462
|
+
const prepared = await prepareCommandExecution({
|
|
463
|
+
cwd: args.cwd,
|
|
464
|
+
runtimeEnvPatch: args.runtimeEnvPatch,
|
|
465
|
+
codexAuthBundle: args.codexAuthBundle,
|
|
466
|
+
});
|
|
467
|
+
const child = spawnPreparedCommand({
|
|
468
|
+
kind: "shell",
|
|
469
|
+
command: args.command,
|
|
470
|
+
patch: null,
|
|
471
|
+
shellPath: prepared.shellPath,
|
|
472
|
+
taskWorkspace: prepared.taskWorkspace,
|
|
473
|
+
env: prepared.env,
|
|
474
|
+
agentToken: args.agentToken,
|
|
475
|
+
});
|
|
476
|
+
const logsDir = await resolveRunLogsDir();
|
|
477
|
+
const logPath = path.join(logsDir, `${args.runId}.log`);
|
|
478
|
+
const logStream = createWriteStream(logPath, { flags: "a", encoding: "utf8" });
|
|
479
|
+
const now = formatLocalTimestamp();
|
|
480
|
+
const task = {
|
|
481
|
+
id: args.runId,
|
|
482
|
+
userId: args.userId,
|
|
483
|
+
agentId: args.agentId,
|
|
484
|
+
command: args.command,
|
|
485
|
+
cwd: args.cwd,
|
|
486
|
+
chatId: args.chatId,
|
|
487
|
+
status: "running",
|
|
488
|
+
cancelRequested: false,
|
|
489
|
+
resultExitCode: null,
|
|
490
|
+
resultSignal: null,
|
|
491
|
+
error: null,
|
|
492
|
+
createdAt: now,
|
|
493
|
+
updatedAt: now,
|
|
494
|
+
startedAt: now,
|
|
495
|
+
finishedAt: null,
|
|
496
|
+
agentEventAckSeq: 0,
|
|
497
|
+
events: [],
|
|
498
|
+
};
|
|
499
|
+
appendRunEvent(task, "meta", {
|
|
500
|
+
host: process.platform,
|
|
501
|
+
pid: child.pid ?? null,
|
|
502
|
+
startedAt: now,
|
|
503
|
+
command: args.command,
|
|
504
|
+
cwd: prepared.taskWorkspace,
|
|
505
|
+
requestedCwd: args.cwd,
|
|
506
|
+
shell: prepared.shellPath,
|
|
507
|
+
logPath,
|
|
508
|
+
...prepared.taskGitMeta,
|
|
509
|
+
...prepared.codexAuthMeta,
|
|
510
|
+
});
|
|
511
|
+
appendRunEvent(task, "status", { status: "running" });
|
|
512
|
+
const cancellation = createManagedCancellation(child);
|
|
513
|
+
const requestCancel = () => {
|
|
514
|
+
if (task.status === "completed" || task.status === "failed" || task.status === "canceled") {
|
|
515
|
+
return;
|
|
516
|
+
}
|
|
517
|
+
task.cancelRequested = true;
|
|
518
|
+
task.updatedAt = formatLocalTimestamp();
|
|
519
|
+
writeRunStatus(task.id, "cancel requested");
|
|
520
|
+
cancellation.requestCancel();
|
|
521
|
+
};
|
|
522
|
+
const recordChunk = (stream, chunk) => {
|
|
523
|
+
appendRunEvent(task, stream, { chunk, at: formatLocalTimestamp() });
|
|
524
|
+
logStream.write(JSON.stringify({ at: formatLocalTimestamp(), stream, chunk }) + "\n");
|
|
525
|
+
writeRunStream(task.id, stream, chunk);
|
|
526
|
+
};
|
|
527
|
+
child.stdout.on("data", (chunk) => recordChunk("stdout", chunk));
|
|
528
|
+
child.stderr.on("data", (chunk) => recordChunk("stderr", chunk));
|
|
529
|
+
child.once("error", (error) => {
|
|
530
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
531
|
+
task.status = "failed";
|
|
532
|
+
task.error = message;
|
|
533
|
+
task.finishedAt = formatLocalTimestamp();
|
|
534
|
+
appendRunEvent(task, "status", { status: "failed", error: message, finishedAt: task.finishedAt });
|
|
535
|
+
persistRetainedRun(task);
|
|
536
|
+
activeRuns.delete(task.id);
|
|
537
|
+
logStream.end();
|
|
538
|
+
void prepared.codexAuthCleanup().catch(() => undefined);
|
|
539
|
+
writeRunStatus(task.id, `failed error=${message}`);
|
|
540
|
+
});
|
|
541
|
+
child.once("close", (code, signal) => {
|
|
542
|
+
cancellation.clear();
|
|
543
|
+
task.resultExitCode = typeof code === "number" ? code : null;
|
|
544
|
+
task.resultSignal = signal;
|
|
545
|
+
task.finishedAt = formatLocalTimestamp();
|
|
546
|
+
task.status = task.cancelRequested ? "canceled" : (task.resultExitCode ?? 1) === 0 ? "completed" : "failed";
|
|
547
|
+
task.error = task.status === "failed" ? `Command exited with code ${task.resultExitCode ?? "null"}` : null;
|
|
548
|
+
appendRunEvent(task, "status", {
|
|
549
|
+
status: task.status,
|
|
550
|
+
exitCode: task.resultExitCode,
|
|
551
|
+
signal: task.resultSignal,
|
|
552
|
+
error: task.error,
|
|
553
|
+
finishedAt: task.finishedAt,
|
|
554
|
+
});
|
|
555
|
+
persistRetainedRun(task);
|
|
556
|
+
activeRuns.delete(task.id);
|
|
557
|
+
logStream.end();
|
|
558
|
+
void prepared.codexAuthCleanup().catch(() => undefined);
|
|
559
|
+
writeRunStatus(task.id, `completed status=${task.status} exitCode=${task.resultExitCode ?? "null"} signal=${task.resultSignal ?? "null"}`);
|
|
560
|
+
});
|
|
561
|
+
activeRuns.set(task.id, { task, child, logPath, logStream, requestCancel });
|
|
562
|
+
persistRetainedRun(task);
|
|
563
|
+
writeRunStatus(task.id, `started requestId=${args.requestId} cwd=${prepared.taskWorkspace}`);
|
|
564
|
+
return cloneRunTask(task);
|
|
565
|
+
}
|
|
566
|
+
async function handleRunRpcMessage(args) {
|
|
567
|
+
let requestId = "unknown";
|
|
568
|
+
let responseSubject = "";
|
|
569
|
+
try {
|
|
570
|
+
const payload = JSON.parse(runRpcCodec.decode(args.msg.data));
|
|
571
|
+
const request = normalizeRunRpcRequest({ request: payload, agentId: args.agentId });
|
|
572
|
+
requestId = request.requestId;
|
|
573
|
+
responseSubject = request.responseSubject;
|
|
574
|
+
if (request.action === "start") {
|
|
575
|
+
const task = await startManagedRun({
|
|
576
|
+
requestId,
|
|
577
|
+
runId: request.runId ?? requestId,
|
|
578
|
+
userId: args.userId,
|
|
579
|
+
agentId: args.agentId,
|
|
580
|
+
command: request.command ?? "",
|
|
581
|
+
cwd: request.cwd,
|
|
582
|
+
chatId: request.chatId,
|
|
583
|
+
runtimeEnvPatch: request.runtimeEnvPatch,
|
|
584
|
+
codexAuthBundle: request.codexAuthBundle,
|
|
585
|
+
agentToken: args.agentToken,
|
|
586
|
+
});
|
|
587
|
+
publishRunRpcResponse({ nc: args.jetstream.nc, responseSubject, payload: { requestId, ok: true, task } });
|
|
588
|
+
return;
|
|
589
|
+
}
|
|
590
|
+
if (request.action === "list") {
|
|
591
|
+
const tasks = [...activeRuns.values()].map((entry) => cloneRunTask(entry.task));
|
|
592
|
+
const retained = [...retainedRuns.values()].filter((task) => !activeRuns.has(task.id)).map((task) => cloneRunTask(task));
|
|
593
|
+
const merged = [...tasks, ...retained]
|
|
594
|
+
.sort((a, b) => Date.parse(b.updatedAt) - Date.parse(a.updatedAt))
|
|
595
|
+
.slice(0, request.limit);
|
|
596
|
+
publishRunRpcResponse({ nc: args.jetstream.nc, responseSubject, payload: { requestId, ok: true, tasks: merged } });
|
|
597
|
+
return;
|
|
598
|
+
}
|
|
599
|
+
const stored = request.runId ? getStoredRun(request.runId) : null;
|
|
600
|
+
if (!stored || stored.agentId !== args.agentId || stored.userId !== args.userId) {
|
|
601
|
+
throw new Error("Run not found");
|
|
602
|
+
}
|
|
603
|
+
if (request.action === "cancel") {
|
|
604
|
+
const active = activeRuns.get(stored.id);
|
|
605
|
+
active?.requestCancel();
|
|
606
|
+
const task = cloneRunTask(active?.task ?? stored);
|
|
607
|
+
publishRunRpcResponse({ nc: args.jetstream.nc, responseSubject, payload: { requestId, ok: true, task } });
|
|
608
|
+
return;
|
|
609
|
+
}
|
|
610
|
+
const task = cloneRunTask(stored, request.sinceSeq);
|
|
611
|
+
publishRunRpcResponse({ nc: args.jetstream.nc, responseSubject, payload: { requestId, ok: true, task } });
|
|
612
|
+
}
|
|
613
|
+
catch (error) {
|
|
614
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
615
|
+
if (responseSubject) {
|
|
616
|
+
publishRunRpcResponse({
|
|
617
|
+
nc: args.jetstream.nc,
|
|
618
|
+
responseSubject,
|
|
619
|
+
payload: { requestId, ok: false, error: message },
|
|
620
|
+
});
|
|
621
|
+
}
|
|
622
|
+
writeAgentError(`run rpc failed requestId=${requestId} error=${message}`);
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
function subscribeToRunRpc(args) {
|
|
626
|
+
const subject = buildAgentRunRpcSubject(args.userId, args.agentId);
|
|
627
|
+
args.jetstream.nc.subscribe(subject, {
|
|
628
|
+
callback: (error, msg) => {
|
|
629
|
+
if (error) {
|
|
630
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
631
|
+
writeAgentError(`run rpc subscription error: ${message}`);
|
|
632
|
+
return;
|
|
633
|
+
}
|
|
634
|
+
void handleRunRpcMessage({ msg, jetstream: args.jetstream, userId: args.userId, agentId: args.agentId, agentToken: args.agentToken });
|
|
635
|
+
},
|
|
636
|
+
});
|
|
637
|
+
writeAgentInfo(`run rpc subscribed subject=${subject}`);
|
|
638
|
+
}
|
|
374
639
|
function isLikelyNatsAuthError(error) {
|
|
375
640
|
const message = (error instanceof Error ? error.message : String(error)).toLowerCase();
|
|
376
641
|
return (message.includes("auth")
|
|
@@ -827,53 +1092,21 @@ async function handleShellRpcMessage(args) {
|
|
|
827
1092
|
requestId = request.requestId;
|
|
828
1093
|
responseSubject = request.responseSubject;
|
|
829
1094
|
const startedAtMs = Date.now();
|
|
830
|
-
const
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
...request.runtimeEnvPatch,
|
|
835
|
-
...(codexAuth?.envPatch ?? {}),
|
|
836
|
-
WORKSPACE: taskWorkspace,
|
|
837
|
-
};
|
|
838
|
-
const taskGitEnv = await prepareTaskGitEnv({
|
|
839
|
-
cwd: taskWorkspace,
|
|
840
|
-
baseEnvPatch: baseTaskEnvPatch,
|
|
1095
|
+
const prepared = await prepareCommandExecution({
|
|
1096
|
+
cwd: request.cwd,
|
|
1097
|
+
runtimeEnvPatch: request.runtimeEnvPatch,
|
|
1098
|
+
codexAuthBundle: request.codexAuthBundle,
|
|
841
1099
|
});
|
|
842
|
-
const
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
PATH: taskPath,
|
|
853
|
-
DOER_AGENT_TOKEN: args.agentToken,
|
|
854
|
-
},
|
|
855
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
856
|
-
})
|
|
857
|
-
: spawn(request.command ?? "", {
|
|
858
|
-
cwd: taskWorkspace,
|
|
859
|
-
shell: shellPath,
|
|
860
|
-
detached: process.platform !== "win32",
|
|
861
|
-
env: {
|
|
862
|
-
...process.env,
|
|
863
|
-
...baseTaskEnvPatch,
|
|
864
|
-
...taskGitEnv.envPatch,
|
|
865
|
-
PATH: taskPath,
|
|
866
|
-
DOER_AGENT_TOKEN: args.agentToken,
|
|
867
|
-
},
|
|
868
|
-
stdio: ["ignore", "pipe", "pipe"],
|
|
869
|
-
});
|
|
870
|
-
if (request.kind === "apply_patch") {
|
|
871
|
-
child.stdin?.write(request.patch ?? "");
|
|
872
|
-
child.stdin?.end();
|
|
873
|
-
}
|
|
874
|
-
writeRpcStatus(requestId, `started kind=${request.kind} cwd=${taskWorkspace} shell=${request.kind === "shell" ? shellPath : "apply_patch"}`);
|
|
875
|
-
child.stdout.setEncoding("utf8");
|
|
876
|
-
child.stderr.setEncoding("utf8");
|
|
1100
|
+
const child = spawnPreparedCommand({
|
|
1101
|
+
kind: request.kind,
|
|
1102
|
+
command: request.command,
|
|
1103
|
+
patch: request.patch,
|
|
1104
|
+
shellPath: prepared.shellPath,
|
|
1105
|
+
taskWorkspace: prepared.taskWorkspace,
|
|
1106
|
+
env: prepared.env,
|
|
1107
|
+
agentToken: args.agentToken,
|
|
1108
|
+
});
|
|
1109
|
+
writeRpcStatus(requestId, `started kind=${request.kind} cwd=${prepared.taskWorkspace} shell=${request.kind === "shell" ? prepared.shellPath : "apply_patch"}`);
|
|
877
1110
|
child.stdout.on("data", (chunk) => {
|
|
878
1111
|
stdout += chunk;
|
|
879
1112
|
writeRpcStream(requestId, "stdout", chunk);
|
|
@@ -899,6 +1132,7 @@ async function handleShellRpcMessage(args) {
|
|
|
899
1132
|
}).finally(() => {
|
|
900
1133
|
clearTimeout(timeout);
|
|
901
1134
|
});
|
|
1135
|
+
await prepared.codexAuthCleanup().catch(() => undefined);
|
|
902
1136
|
publishShellRpcResponse({
|
|
903
1137
|
nc: args.jetstream.nc,
|
|
904
1138
|
responseSubject,
|
|
@@ -1154,6 +1388,93 @@ async function prepareCodexAuthBundle(bundle) {
|
|
|
1154
1388
|
},
|
|
1155
1389
|
};
|
|
1156
1390
|
}
|
|
1391
|
+
async function prepareCommandExecution(args) {
|
|
1392
|
+
const shellPath = resolveShellPath();
|
|
1393
|
+
const taskWorkspace = resolveTaskWorkspace(args.cwd);
|
|
1394
|
+
const codexAuth = await prepareCodexAuthBundle(args.codexAuthBundle);
|
|
1395
|
+
const baseTaskEnvPatch = {
|
|
1396
|
+
...args.runtimeEnvPatch,
|
|
1397
|
+
...(codexAuth?.envPatch ?? {}),
|
|
1398
|
+
WORKSPACE: taskWorkspace,
|
|
1399
|
+
};
|
|
1400
|
+
const taskGitEnv = await prepareTaskGitEnv({
|
|
1401
|
+
cwd: taskWorkspace,
|
|
1402
|
+
baseEnvPatch: baseTaskEnvPatch,
|
|
1403
|
+
});
|
|
1404
|
+
const runtimeBinPath = path.join(AGENT_PROJECT_DIR, "runtime/bin");
|
|
1405
|
+
const taskPath = [runtimeBinPath, process.env.PATH || ""].filter(Boolean).join(path.delimiter);
|
|
1406
|
+
return {
|
|
1407
|
+
shellPath,
|
|
1408
|
+
taskWorkspace,
|
|
1409
|
+
taskPath,
|
|
1410
|
+
env: {
|
|
1411
|
+
...process.env,
|
|
1412
|
+
...baseTaskEnvPatch,
|
|
1413
|
+
...taskGitEnv.envPatch,
|
|
1414
|
+
PATH: taskPath,
|
|
1415
|
+
},
|
|
1416
|
+
taskGitMeta: taskGitEnv.meta ?? {},
|
|
1417
|
+
codexAuthMeta: codexAuth?.meta ?? { codexAuthSynced: false },
|
|
1418
|
+
codexAuthCleanup: codexAuth?.cleanup ?? (async () => { }),
|
|
1419
|
+
};
|
|
1420
|
+
}
|
|
1421
|
+
function spawnPreparedCommand(args) {
|
|
1422
|
+
const env = {
|
|
1423
|
+
...args.env,
|
|
1424
|
+
DOER_AGENT_TOKEN: args.agentToken,
|
|
1425
|
+
};
|
|
1426
|
+
const child = args.kind === "apply_patch"
|
|
1427
|
+
? spawn("apply_patch", {
|
|
1428
|
+
cwd: args.taskWorkspace,
|
|
1429
|
+
detached: process.platform !== "win32",
|
|
1430
|
+
env,
|
|
1431
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
1432
|
+
})
|
|
1433
|
+
: spawn(args.command ?? "", {
|
|
1434
|
+
cwd: args.taskWorkspace,
|
|
1435
|
+
shell: args.shellPath,
|
|
1436
|
+
detached: process.platform !== "win32",
|
|
1437
|
+
env,
|
|
1438
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
1439
|
+
});
|
|
1440
|
+
if (args.kind === "apply_patch") {
|
|
1441
|
+
child.stdin?.write(args.patch ?? "");
|
|
1442
|
+
child.stdin?.end();
|
|
1443
|
+
}
|
|
1444
|
+
child.stdout.setEncoding("utf8");
|
|
1445
|
+
child.stderr.setEncoding("utf8");
|
|
1446
|
+
return child;
|
|
1447
|
+
}
|
|
1448
|
+
function createManagedCancellation(child) {
|
|
1449
|
+
let cancelStage1Timer = null;
|
|
1450
|
+
let cancelStage2Timer = null;
|
|
1451
|
+
let cancelSignalSent = false;
|
|
1452
|
+
return {
|
|
1453
|
+
requestCancel: () => {
|
|
1454
|
+
if (cancelSignalSent) {
|
|
1455
|
+
return;
|
|
1456
|
+
}
|
|
1457
|
+
cancelSignalSent = true;
|
|
1458
|
+
sendSignalToTaskProcess(child, "SIGINT");
|
|
1459
|
+
cancelStage1Timer = setTimeout(() => {
|
|
1460
|
+
sendSignalToTaskProcess(child, "SIGTERM");
|
|
1461
|
+
}, 1200);
|
|
1462
|
+
cancelStage1Timer.unref?.();
|
|
1463
|
+
cancelStage2Timer = setTimeout(() => {
|
|
1464
|
+
sendSignalToTaskProcess(child, "SIGKILL");
|
|
1465
|
+
}, 3500);
|
|
1466
|
+
cancelStage2Timer.unref?.();
|
|
1467
|
+
},
|
|
1468
|
+
clear: () => {
|
|
1469
|
+
if (cancelStage1Timer) {
|
|
1470
|
+
clearTimeout(cancelStage1Timer);
|
|
1471
|
+
}
|
|
1472
|
+
if (cancelStage2Timer) {
|
|
1473
|
+
clearTimeout(cancelStage2Timer);
|
|
1474
|
+
}
|
|
1475
|
+
},
|
|
1476
|
+
};
|
|
1477
|
+
}
|
|
1157
1478
|
async function runTask(args) {
|
|
1158
1479
|
activeTaskLogContext = {
|
|
1159
1480
|
jetstream: args.jetstream,
|
|
@@ -1355,22 +1676,14 @@ async function connectBootstrapWithRetry(args) {
|
|
|
1355
1676
|
if (natsServers.length === 0) {
|
|
1356
1677
|
throw new Error("No NATS servers configured by server");
|
|
1357
1678
|
}
|
|
1358
|
-
const taskConfig = parseBootstrapTaskConfig(natsBootstrap.tasks);
|
|
1359
|
-
if (!taskConfig) {
|
|
1360
|
-
throw new Error("Invalid task dispatch config from server");
|
|
1361
|
-
}
|
|
1362
1679
|
const natsToken = normalizeNatsToken(natsBootstrap.auth);
|
|
1363
|
-
const pendingTaskIds = normalizeTaskIds(natsBootstrap.pendingTaskIds);
|
|
1364
1680
|
const jetstream = await initJetStreamContext({
|
|
1365
1681
|
userId: args.userId,
|
|
1366
1682
|
servers: natsServers,
|
|
1367
1683
|
token: natsToken,
|
|
1368
|
-
taskStream: taskConfig.stream,
|
|
1369
|
-
taskSubject: taskConfig.subject,
|
|
1370
|
-
taskDurable: taskConfig.durable,
|
|
1371
1684
|
});
|
|
1372
|
-
writeAgentInfraError(`bootstrap ok servers=${natsServers.length}
|
|
1373
|
-
return { natsBootstrap,
|
|
1685
|
+
writeAgentInfraError(`bootstrap ok servers=${natsServers.length} eventStream=${jetstream.stream} eventSubject=${jetstream.subject}`);
|
|
1686
|
+
return { natsBootstrap, jetstream };
|
|
1374
1687
|
}
|
|
1375
1688
|
catch (error) {
|
|
1376
1689
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -1396,12 +1709,11 @@ async function main() {
|
|
|
1396
1709
|
throw new Error("user-id and agent-secret are required");
|
|
1397
1710
|
}
|
|
1398
1711
|
const agentToken = agentSecret;
|
|
1399
|
-
|
|
1712
|
+
const { natsBootstrap, jetstream } = await connectBootstrapWithRetry({
|
|
1400
1713
|
serverBaseUrl,
|
|
1401
1714
|
userId,
|
|
1402
1715
|
agentToken,
|
|
1403
1716
|
});
|
|
1404
|
-
const maxConcurrency = Math.max(1, parseEnvInteger(process.env.DOER_AGENT_MAX_CONCURRENCY, 5));
|
|
1405
1717
|
const agentVersion = await resolveAgentVersion();
|
|
1406
1718
|
const initialAgentId = typeof natsBootstrap.agentId === "string" ? natsBootstrap.agentId : "";
|
|
1407
1719
|
if (!initialAgentId) {
|
|
@@ -1418,11 +1730,6 @@ async function main() {
|
|
|
1418
1730
|
process.stdout.write(`- natsStream: ${jetstream.stream}\n`);
|
|
1419
1731
|
process.stdout.write(`- natsSubject: ${jetstream.subject}\n`);
|
|
1420
1732
|
process.stdout.write(`- natsDurable: ${jetstream.durable}\n\n`);
|
|
1421
|
-
process.stdout.write(`- taskStream: ${jetstream.taskStream}\n`);
|
|
1422
|
-
process.stdout.write(`- taskSubject: ${jetstream.taskSubject}\n`);
|
|
1423
|
-
process.stdout.write(`- taskDurable: ${jetstream.taskDurable}\n`);
|
|
1424
|
-
process.stdout.write(`- pendingTasks: ${pendingTaskIds.length}\n`);
|
|
1425
|
-
process.stdout.write(`- maxConcurrency: ${maxConcurrency}\n\n`);
|
|
1426
1733
|
process.stdout.write(`- workspace: ${process.cwd()}\n\n`);
|
|
1427
1734
|
if (requestedServerBaseUrl !== serverBaseUrl) {
|
|
1428
1735
|
writeAgentInfo(`detected container runtime, server endpoint rewritten: ${requestedServerBaseUrl} -> ${serverBaseUrl}`);
|
|
@@ -1444,27 +1751,6 @@ async function main() {
|
|
|
1444
1751
|
heartbeatHealthy = false;
|
|
1445
1752
|
});
|
|
1446
1753
|
}, 10_000);
|
|
1447
|
-
const inFlightTasks = new Set();
|
|
1448
|
-
async function waitForAvailableSlot() {
|
|
1449
|
-
while (inFlightTasks.size >= maxConcurrency) {
|
|
1450
|
-
try {
|
|
1451
|
-
await Promise.race(inFlightTasks);
|
|
1452
|
-
}
|
|
1453
|
-
catch {
|
|
1454
|
-
// keep draining slots even when a task fails.
|
|
1455
|
-
}
|
|
1456
|
-
}
|
|
1457
|
-
}
|
|
1458
|
-
function trackInFlight(taskPromise) {
|
|
1459
|
-
inFlightTasks.add(taskPromise);
|
|
1460
|
-
void taskPromise.finally(() => {
|
|
1461
|
-
inFlightTasks.delete(taskPromise);
|
|
1462
|
-
});
|
|
1463
|
-
}
|
|
1464
|
-
function scheduleTask(taskPromiseFactory) {
|
|
1465
|
-
const taskPromise = taskPromiseFactory();
|
|
1466
|
-
trackInFlight(taskPromise);
|
|
1467
|
-
}
|
|
1468
1754
|
subscribeToFsRpc({
|
|
1469
1755
|
jetstream,
|
|
1470
1756
|
serverBaseUrl,
|
|
@@ -1478,176 +1764,15 @@ async function main() {
|
|
|
1478
1764
|
agentId: initialAgentId,
|
|
1479
1765
|
agentToken,
|
|
1480
1766
|
});
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
});
|
|
1491
|
-
if (task) {
|
|
1492
|
-
await runClaimedTask({ task, serverBaseUrl, userId, agentToken, jetstream });
|
|
1493
|
-
}
|
|
1494
|
-
}
|
|
1495
|
-
catch (error) {
|
|
1496
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
1497
|
-
writeAgentError(`pending task bootstrap failed taskId=${pendingTaskId}: ${message}`);
|
|
1498
|
-
}
|
|
1499
|
-
});
|
|
1500
|
-
}
|
|
1501
|
-
let connected = false;
|
|
1502
|
-
while (true) {
|
|
1503
|
-
try {
|
|
1504
|
-
const consumer = await jetstream.js.consumers.get(jetstream.taskStream, jetstream.taskDurable);
|
|
1505
|
-
if (!connected) {
|
|
1506
|
-
writeAgentInfo(`connected to task stream (NATS ok) at=${formatLocalTimestamp()} userId=${userId}`);
|
|
1507
|
-
connected = true;
|
|
1508
|
-
}
|
|
1509
|
-
const messages = await consumer.fetch({ max_messages: 200, expires: 5_000 });
|
|
1510
|
-
for await (const msg of messages) {
|
|
1511
|
-
await waitForAvailableSlot();
|
|
1512
|
-
scheduleTask(async () => {
|
|
1513
|
-
let dispatch;
|
|
1514
|
-
try {
|
|
1515
|
-
dispatch = jetstream.taskCodec.decode(msg.data);
|
|
1516
|
-
}
|
|
1517
|
-
catch (error) {
|
|
1518
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
1519
|
-
writeAgentError(`task dispatch decode failed: ${message}`);
|
|
1520
|
-
msg.term();
|
|
1521
|
-
return;
|
|
1522
|
-
}
|
|
1523
|
-
writeAgentInfo(`task dispatch received taskId=${dispatch.taskId} createdAt=${dispatch.createdAt} subject=${jetstream.taskSubject} durable=${jetstream.taskDurable}`);
|
|
1524
|
-
const ackKeepAliveIntervalMs = 10_000;
|
|
1525
|
-
let ackKeepAliveTimer = null;
|
|
1526
|
-
const stopAckKeepAlive = () => {
|
|
1527
|
-
if (ackKeepAliveTimer) {
|
|
1528
|
-
clearInterval(ackKeepAliveTimer);
|
|
1529
|
-
ackKeepAliveTimer = null;
|
|
1530
|
-
}
|
|
1531
|
-
};
|
|
1532
|
-
try {
|
|
1533
|
-
ackKeepAliveTimer = setInterval(() => {
|
|
1534
|
-
try {
|
|
1535
|
-
msg.working();
|
|
1536
|
-
}
|
|
1537
|
-
catch (error) {
|
|
1538
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
1539
|
-
writeAgentError(`task dispatch keepalive failed taskId=${dispatch.taskId}: ${message}`);
|
|
1540
|
-
}
|
|
1541
|
-
}, ackKeepAliveIntervalMs);
|
|
1542
|
-
ackKeepAliveTimer.unref?.();
|
|
1543
|
-
if (dispatch.type === "cancel") {
|
|
1544
|
-
stopAckKeepAlive();
|
|
1545
|
-
const canceled = requestTaskCancellation(dispatch.taskId, "nats_dispatch");
|
|
1546
|
-
writeAgentInfo(`task cancel dispatch handled taskId=${dispatch.taskId} result=${canceled ? "signaled" : "not-running"}`);
|
|
1547
|
-
msg.ack();
|
|
1548
|
-
return;
|
|
1549
|
-
}
|
|
1550
|
-
const task = await claimTaskById({
|
|
1551
|
-
serverBaseUrl,
|
|
1552
|
-
userId,
|
|
1553
|
-
agentToken,
|
|
1554
|
-
taskId: dispatch.taskId,
|
|
1555
|
-
});
|
|
1556
|
-
if (!task) {
|
|
1557
|
-
stopAckKeepAlive();
|
|
1558
|
-
writeAgentInfo(`task dispatch acked without run taskId=${dispatch.taskId} reason=already-claimed`);
|
|
1559
|
-
msg.ack();
|
|
1560
|
-
return;
|
|
1561
|
-
}
|
|
1562
|
-
await runClaimedTask({ task, serverBaseUrl, userId, agentToken, jetstream });
|
|
1563
|
-
stopAckKeepAlive();
|
|
1564
|
-
msg.ack();
|
|
1565
|
-
writeAgentInfo(`task dispatch acked taskId=${dispatch.taskId}`);
|
|
1566
|
-
}
|
|
1567
|
-
catch (error) {
|
|
1568
|
-
stopAckKeepAlive();
|
|
1569
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
1570
|
-
writeAgentError(`task dispatch handle failed taskId=${dispatch.taskId}: ${message}`);
|
|
1571
|
-
writeAgentError(`task dispatch sending nak taskId=${dispatch.taskId}`);
|
|
1572
|
-
msg.nak();
|
|
1573
|
-
}
|
|
1574
|
-
});
|
|
1575
|
-
}
|
|
1576
|
-
}
|
|
1577
|
-
catch (error) {
|
|
1578
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
1579
|
-
if (connected) {
|
|
1580
|
-
writeAgentError(`task stream disconnected at=${formatLocalTimestamp()} reason=${message}`);
|
|
1581
|
-
}
|
|
1582
|
-
connected = false;
|
|
1583
|
-
if (isLikelyNatsAuthError(error)) {
|
|
1584
|
-
writeAgentError(`nats auth error detected. refreshing bootstrap credentials...`);
|
|
1585
|
-
}
|
|
1586
|
-
else if (isLikelyNatsReconnectError(error)) {
|
|
1587
|
-
writeAgentError(`nats connection lost. refreshing bootstrap/session...`);
|
|
1588
|
-
}
|
|
1589
|
-
else {
|
|
1590
|
-
writeAgentError(`task stream error detected. forcing bootstrap/session refresh... reason=${message}`);
|
|
1591
|
-
}
|
|
1592
|
-
if (inFlightTasks.size > 0) {
|
|
1593
|
-
writeAgentInfo(`waiting for in-flight tasks before reconnect count=${inFlightTasks.size}`);
|
|
1594
|
-
await Promise.allSettled(Array.from(inFlightTasks));
|
|
1595
|
-
}
|
|
1596
|
-
try {
|
|
1597
|
-
await jetstream.nc.close();
|
|
1598
|
-
}
|
|
1599
|
-
catch {
|
|
1600
|
-
// noop
|
|
1601
|
-
}
|
|
1602
|
-
const refreshed = await connectBootstrapWithRetry({
|
|
1603
|
-
serverBaseUrl,
|
|
1604
|
-
userId,
|
|
1605
|
-
agentToken,
|
|
1606
|
-
});
|
|
1607
|
-
natsBootstrap = refreshed.natsBootstrap;
|
|
1608
|
-
pendingTaskIds = refreshed.pendingTaskIds;
|
|
1609
|
-
jetstream = refreshed.jetstream;
|
|
1610
|
-
const refreshedAgentId = typeof natsBootstrap.agentId === "string" ? natsBootstrap.agentId : "";
|
|
1611
|
-
if (!refreshedAgentId) {
|
|
1612
|
-
throw new Error("agent id missing from refreshed bootstrap");
|
|
1613
|
-
}
|
|
1614
|
-
subscribeToFsRpc({
|
|
1615
|
-
jetstream,
|
|
1616
|
-
serverBaseUrl,
|
|
1617
|
-
userId,
|
|
1618
|
-
agentId: refreshedAgentId,
|
|
1619
|
-
agentToken,
|
|
1620
|
-
});
|
|
1621
|
-
subscribeToShellRpc({
|
|
1622
|
-
jetstream,
|
|
1623
|
-
userId,
|
|
1624
|
-
agentId: refreshedAgentId,
|
|
1625
|
-
agentToken,
|
|
1626
|
-
});
|
|
1627
|
-
for (const pendingTaskId of pendingTaskIds) {
|
|
1628
|
-
await waitForAvailableSlot();
|
|
1629
|
-
scheduleTask(async () => {
|
|
1630
|
-
try {
|
|
1631
|
-
const task = await claimTaskById({
|
|
1632
|
-
serverBaseUrl,
|
|
1633
|
-
userId,
|
|
1634
|
-
agentToken,
|
|
1635
|
-
taskId: pendingTaskId,
|
|
1636
|
-
});
|
|
1637
|
-
if (task) {
|
|
1638
|
-
await runClaimedTask({ task, serverBaseUrl, userId, agentToken, jetstream });
|
|
1639
|
-
}
|
|
1640
|
-
}
|
|
1641
|
-
catch (pendingError) {
|
|
1642
|
-
const pendingMessage = pendingError instanceof Error ? pendingError.message : String(pendingError);
|
|
1643
|
-
writeAgentError(`pending task refresh failed taskId=${pendingTaskId}: ${pendingMessage}`);
|
|
1644
|
-
}
|
|
1645
|
-
});
|
|
1646
|
-
}
|
|
1647
|
-
writeAgentInfo(`nats credentials refreshed at=${formatLocalTimestamp()} agentId=${typeof natsBootstrap.agentId === "string" ? natsBootstrap.agentId : "unknown"}`);
|
|
1648
|
-
continue;
|
|
1649
|
-
}
|
|
1650
|
-
}
|
|
1767
|
+
subscribeToRunRpc({
|
|
1768
|
+
jetstream,
|
|
1769
|
+
userId,
|
|
1770
|
+
agentId: initialAgentId,
|
|
1771
|
+
agentToken,
|
|
1772
|
+
});
|
|
1773
|
+
await new Promise(() => {
|
|
1774
|
+
// Keep the long-lived agent process alive for RPC subscriptions and heartbeat.
|
|
1775
|
+
});
|
|
1651
1776
|
}
|
|
1652
1777
|
main().catch((error) => {
|
|
1653
1778
|
const message = error instanceof Error ? error.message : String(error);
|