doer-agent 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.js +427 -265
- package/package.json +1 -1
package/dist/agent.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { spawn, spawnSync } from "node:child_process";
|
|
2
|
-
import { existsSync, statSync } from "node:fs";
|
|
2
|
+
import { createWriteStream, existsSync, statSync } from "node:fs";
|
|
3
3
|
import { chmod, mkdir, open, readFile, readdir, stat, writeFile } from "node:fs/promises";
|
|
4
4
|
import path from "node:path";
|
|
5
5
|
import { fileURLToPath } from "node:url";
|
|
@@ -13,10 +13,16 @@ const activeTaskCancelRequests = new Map();
|
|
|
13
13
|
let workspaceRootOverride = null;
|
|
14
14
|
const fsRpcCodec = StringCodec();
|
|
15
15
|
const shellRpcCodec = StringCodec();
|
|
16
|
+
const runRpcCodec = StringCodec();
|
|
17
|
+
const activeRuns = new Map();
|
|
18
|
+
const retainedRuns = new Map();
|
|
16
19
|
function sanitizeUserId(userId) {
|
|
17
20
|
const normalized = userId.trim().replace(/[^a-zA-Z0-9_-]/g, "_");
|
|
18
21
|
return normalized.length > 0 ? normalized : "anonymous";
|
|
19
22
|
}
|
|
23
|
+
function buildAgentRunRpcSubject(userId, agentId) {
|
|
24
|
+
return `doer.agent.run.rpc.${sanitizeUserId(userId)}.${agentId.trim()}`;
|
|
25
|
+
}
|
|
20
26
|
function normalizeNatsServers(value) {
|
|
21
27
|
if (!Array.isArray(value)) {
|
|
22
28
|
return [];
|
|
@@ -92,12 +98,6 @@ async function initJetStreamContext(args) {
|
|
|
92
98
|
const nc = await connect(args.token ? { servers: args.servers, token: args.token } : { servers: args.servers });
|
|
93
99
|
const jsm = await nc.jetstreamManager();
|
|
94
100
|
await ensureJetStreamInfra({ jsm, stream, subject, durable });
|
|
95
|
-
await ensureJetStreamInfra({
|
|
96
|
-
jsm,
|
|
97
|
-
stream: args.taskStream,
|
|
98
|
-
subject: args.taskSubject,
|
|
99
|
-
durable: args.taskDurable,
|
|
100
|
-
});
|
|
101
101
|
void nc.closed().then((error) => {
|
|
102
102
|
if (error) {
|
|
103
103
|
writeAgentInfraError(`nats connection closed with error: ${error.message}`);
|
|
@@ -126,14 +126,10 @@ async function initJetStreamContext(args) {
|
|
|
126
126
|
js: nc.jetstream(),
|
|
127
127
|
jsm,
|
|
128
128
|
codec: JSONCodec(),
|
|
129
|
-
taskCodec: JSONCodec(),
|
|
130
129
|
subject,
|
|
131
130
|
stream,
|
|
132
131
|
durable,
|
|
133
132
|
servers: args.servers,
|
|
134
|
-
taskStream: args.taskStream,
|
|
135
|
-
taskSubject: args.taskSubject,
|
|
136
|
-
taskDurable: args.taskDurable,
|
|
137
133
|
};
|
|
138
134
|
}
|
|
139
135
|
function resolveCodexHomePath() {
|
|
@@ -371,6 +367,311 @@ function writeRpcStream(requestId, stream, chunk) {
|
|
|
371
367
|
function writeRpcStatus(requestId, message) {
|
|
372
368
|
process.stdout.write(`[doer-agent][rpc=${requestId}][status] ${message}\n`);
|
|
373
369
|
}
|
|
370
|
+
function writeRunStatus(runId, message) {
|
|
371
|
+
process.stdout.write(`[doer-agent][run=${runId}][status] ${message}\n`);
|
|
372
|
+
}
|
|
373
|
+
function writeRunStream(runId, stream, chunk) {
|
|
374
|
+
const target = stream === "stdout" ? process.stdout : process.stderr;
|
|
375
|
+
const lines = chunk.split(/\r?\n/);
|
|
376
|
+
for (let index = 0; index < lines.length; index += 1) {
|
|
377
|
+
const line = lines[index];
|
|
378
|
+
if (!line && index === lines.length - 1) {
|
|
379
|
+
continue;
|
|
380
|
+
}
|
|
381
|
+
target.write(`[doer-agent][run=${runId}][${stream}] ${line}\n`);
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
function normalizeRunRpcRequest(args) {
|
|
385
|
+
const requestId = typeof args.request.requestId === "string" ? args.request.requestId.trim() : "";
|
|
386
|
+
if (!requestId) {
|
|
387
|
+
throw new Error("missing requestId");
|
|
388
|
+
}
|
|
389
|
+
const requestAgentId = typeof args.request.agentId === "string" ? args.request.agentId.trim() : "";
|
|
390
|
+
if (!requestAgentId || requestAgentId !== args.agentId) {
|
|
391
|
+
throw new Error("agent id mismatch");
|
|
392
|
+
}
|
|
393
|
+
const actionRaw = typeof args.request.action === "string" ? args.request.action.trim() : "";
|
|
394
|
+
const action = actionRaw === "cancel" || actionRaw === "get" || actionRaw === "list" ? actionRaw : "start";
|
|
395
|
+
const responseSubject = typeof args.request.responseSubject === "string" ? args.request.responseSubject.trim() : "";
|
|
396
|
+
if (!responseSubject) {
|
|
397
|
+
throw new Error("missing responseSubject");
|
|
398
|
+
}
|
|
399
|
+
const runId = typeof args.request.runId === "string" && args.request.runId.trim() ? args.request.runId.trim() : null;
|
|
400
|
+
const command = typeof args.request.command === "string" && args.request.command.trim() ? args.request.command.trim() : null;
|
|
401
|
+
if (action === "start" && !command) {
|
|
402
|
+
throw new Error("missing command");
|
|
403
|
+
}
|
|
404
|
+
if ((action === "get" || action === "cancel") && !runId) {
|
|
405
|
+
throw new Error("missing runId");
|
|
406
|
+
}
|
|
407
|
+
const cwd = typeof args.request.cwd === "string" && args.request.cwd.trim() ? args.request.cwd.trim() : null;
|
|
408
|
+
const chatId = typeof args.request.chatId === "string" && args.request.chatId.trim() ? args.request.chatId.trim() : null;
|
|
409
|
+
const sinceSeqRaw = Number(args.request.sinceSeq);
|
|
410
|
+
const sinceSeq = Number.isInteger(sinceSeqRaw) && sinceSeqRaw >= 0 ? sinceSeqRaw : null;
|
|
411
|
+
const limitRaw = Number(args.request.limit);
|
|
412
|
+
const limit = Number.isFinite(limitRaw) ? Math.max(1, Math.min(Math.floor(limitRaw), 200)) : 50;
|
|
413
|
+
return {
|
|
414
|
+
requestId,
|
|
415
|
+
action,
|
|
416
|
+
runId,
|
|
417
|
+
command,
|
|
418
|
+
cwd,
|
|
419
|
+
chatId,
|
|
420
|
+
responseSubject,
|
|
421
|
+
sinceSeq,
|
|
422
|
+
limit,
|
|
423
|
+
runtimeEnvPatch: normalizeEnvPatch(args.request.runtimeEnvPatch),
|
|
424
|
+
codexAuthBundle: normalizeShellRpcCodexAuthBundle(args.request.codexAuth),
|
|
425
|
+
};
|
|
426
|
+
}
|
|
427
|
+
function publishRunRpcResponse(args) {
|
|
428
|
+
args.nc.publish(args.responseSubject, runRpcCodec.encode(JSON.stringify(args.payload)));
|
|
429
|
+
}
|
|
430
|
+
async function resolveRunLogsDir() {
|
|
431
|
+
const workspaceRoot = workspaceRootOverride ?? (process.env.WORKSPACE?.trim() || process.cwd());
|
|
432
|
+
const dir = path.join(workspaceRoot, ".doer-agent", "runs");
|
|
433
|
+
await mkdir(dir, { recursive: true });
|
|
434
|
+
return dir;
|
|
435
|
+
}
|
|
436
|
+
function cloneRunTask(task, sinceSeq) {
|
|
437
|
+
return {
|
|
438
|
+
...task,
|
|
439
|
+
events: task.events
|
|
440
|
+
.filter((event) => typeof sinceSeq === "number" ? event.seq > sinceSeq : true)
|
|
441
|
+
.map((event) => ({ ...event, payload: { ...event.payload } })),
|
|
442
|
+
};
|
|
443
|
+
}
|
|
444
|
+
function appendRunEvent(task, type, payload) {
|
|
445
|
+
const timestamp = formatLocalTimestamp();
|
|
446
|
+
const seq = task.agentEventAckSeq + 1;
|
|
447
|
+
task.agentEventAckSeq = seq;
|
|
448
|
+
task.updatedAt = timestamp;
|
|
449
|
+
task.events.push({ seq, type, timestamp, payload });
|
|
450
|
+
}
|
|
451
|
+
function persistRetainedRun(task) {
|
|
452
|
+
retainedRuns.set(task.id, cloneRunTask(task));
|
|
453
|
+
}
|
|
454
|
+
function getStoredRun(runId) {
|
|
455
|
+
const active = activeRuns.get(runId);
|
|
456
|
+
if (active) {
|
|
457
|
+
return active.task;
|
|
458
|
+
}
|
|
459
|
+
return retainedRuns.get(runId) ?? null;
|
|
460
|
+
}
|
|
461
|
+
async function startManagedRun(args) {
|
|
462
|
+
const prepared = await prepareCommandExecution({
|
|
463
|
+
cwd: args.cwd,
|
|
464
|
+
runtimeEnvPatch: args.runtimeEnvPatch,
|
|
465
|
+
codexAuthBundle: args.codexAuthBundle,
|
|
466
|
+
});
|
|
467
|
+
const child = spawnPreparedCommand({
|
|
468
|
+
kind: "shell",
|
|
469
|
+
command: args.command,
|
|
470
|
+
patch: null,
|
|
471
|
+
shellPath: prepared.shellPath,
|
|
472
|
+
taskWorkspace: prepared.taskWorkspace,
|
|
473
|
+
env: prepared.env,
|
|
474
|
+
agentToken: args.agentToken,
|
|
475
|
+
});
|
|
476
|
+
const logsDir = await resolveRunLogsDir();
|
|
477
|
+
const logPath = path.join(logsDir, `${args.runId}.log`);
|
|
478
|
+
const logStream = createWriteStream(logPath, { flags: "a", encoding: "utf8" });
|
|
479
|
+
const now = formatLocalTimestamp();
|
|
480
|
+
const task = {
|
|
481
|
+
id: args.runId,
|
|
482
|
+
userId: args.userId,
|
|
483
|
+
agentId: args.agentId,
|
|
484
|
+
command: args.command,
|
|
485
|
+
cwd: args.cwd,
|
|
486
|
+
chatId: args.chatId,
|
|
487
|
+
status: "running",
|
|
488
|
+
cancelRequested: false,
|
|
489
|
+
resultExitCode: null,
|
|
490
|
+
resultSignal: null,
|
|
491
|
+
error: null,
|
|
492
|
+
createdAt: now,
|
|
493
|
+
updatedAt: now,
|
|
494
|
+
startedAt: now,
|
|
495
|
+
finishedAt: null,
|
|
496
|
+
agentEventAckSeq: 0,
|
|
497
|
+
events: [],
|
|
498
|
+
};
|
|
499
|
+
appendRunEvent(task, "meta", {
|
|
500
|
+
host: process.platform,
|
|
501
|
+
pid: child.pid ?? null,
|
|
502
|
+
startedAt: now,
|
|
503
|
+
command: args.command,
|
|
504
|
+
cwd: prepared.taskWorkspace,
|
|
505
|
+
requestedCwd: args.cwd,
|
|
506
|
+
shell: prepared.shellPath,
|
|
507
|
+
logPath,
|
|
508
|
+
...prepared.taskGitMeta,
|
|
509
|
+
...prepared.codexAuthMeta,
|
|
510
|
+
});
|
|
511
|
+
appendRunEvent(task, "status", { status: "running" });
|
|
512
|
+
const cancellation = createManagedCancellation(child);
|
|
513
|
+
const requestCancel = () => {
|
|
514
|
+
if (task.status === "completed" || task.status === "failed" || task.status === "canceled") {
|
|
515
|
+
return;
|
|
516
|
+
}
|
|
517
|
+
task.cancelRequested = true;
|
|
518
|
+
task.updatedAt = formatLocalTimestamp();
|
|
519
|
+
writeRunStatus(task.id, "cancel requested");
|
|
520
|
+
cancellation.requestCancel();
|
|
521
|
+
};
|
|
522
|
+
const recordChunk = (stream, chunk) => {
|
|
523
|
+
appendRunEvent(task, stream, { chunk, at: formatLocalTimestamp() });
|
|
524
|
+
logStream.write(JSON.stringify({ at: formatLocalTimestamp(), stream, chunk }) + "\n");
|
|
525
|
+
writeRunStream(task.id, stream, chunk);
|
|
526
|
+
};
|
|
527
|
+
child.stdout.on("data", (chunk) => recordChunk("stdout", chunk));
|
|
528
|
+
child.stderr.on("data", (chunk) => recordChunk("stderr", chunk));
|
|
529
|
+
child.once("error", (error) => {
|
|
530
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
531
|
+
task.status = "failed";
|
|
532
|
+
task.error = message;
|
|
533
|
+
task.finishedAt = formatLocalTimestamp();
|
|
534
|
+
appendRunEvent(task, "status", { status: "failed", error: message, finishedAt: task.finishedAt });
|
|
535
|
+
persistRetainedRun(task);
|
|
536
|
+
activeRuns.delete(task.id);
|
|
537
|
+
logStream.end();
|
|
538
|
+
void prepared.codexAuthCleanup().catch(() => undefined);
|
|
539
|
+
writeRunStatus(task.id, `failed error=${message}`);
|
|
540
|
+
});
|
|
541
|
+
child.once("close", (code, signal) => {
|
|
542
|
+
cancellation.clear();
|
|
543
|
+
task.resultExitCode = typeof code === "number" ? code : null;
|
|
544
|
+
task.resultSignal = signal;
|
|
545
|
+
task.finishedAt = formatLocalTimestamp();
|
|
546
|
+
task.status = task.cancelRequested ? "canceled" : (task.resultExitCode ?? 1) === 0 ? "completed" : "failed";
|
|
547
|
+
task.error = task.status === "failed" ? `Command exited with code ${task.resultExitCode ?? "null"}` : null;
|
|
548
|
+
appendRunEvent(task, "status", {
|
|
549
|
+
status: task.status,
|
|
550
|
+
exitCode: task.resultExitCode,
|
|
551
|
+
signal: task.resultSignal,
|
|
552
|
+
error: task.error,
|
|
553
|
+
finishedAt: task.finishedAt,
|
|
554
|
+
});
|
|
555
|
+
persistRetainedRun(task);
|
|
556
|
+
activeRuns.delete(task.id);
|
|
557
|
+
logStream.end();
|
|
558
|
+
void prepared.codexAuthCleanup().catch(() => undefined);
|
|
559
|
+
if ((task.status === "completed" || task.status === "failed") && task.chatId) {
|
|
560
|
+
void notifyServerRunFinished({
|
|
561
|
+
serverBaseUrl: args.serverBaseUrl,
|
|
562
|
+
userId: args.userId,
|
|
563
|
+
agentToken: args.agentToken,
|
|
564
|
+
task,
|
|
565
|
+
}).catch((error) => {
|
|
566
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
567
|
+
writeAgentInfraError(`run completion notify failed runId=${task.id}: ${message}`);
|
|
568
|
+
});
|
|
569
|
+
}
|
|
570
|
+
writeRunStatus(task.id, `completed status=${task.status} exitCode=${task.resultExitCode ?? "null"} signal=${task.resultSignal ?? "null"}`);
|
|
571
|
+
});
|
|
572
|
+
activeRuns.set(task.id, { task, child, logPath, logStream, requestCancel });
|
|
573
|
+
persistRetainedRun(task);
|
|
574
|
+
writeRunStatus(task.id, `started requestId=${args.requestId} cwd=${prepared.taskWorkspace}`);
|
|
575
|
+
return cloneRunTask(task);
|
|
576
|
+
}
|
|
577
|
+
async function notifyServerRunFinished(args) {
|
|
578
|
+
if (!args.task.chatId || (args.task.status !== "completed" && args.task.status !== "failed")) {
|
|
579
|
+
return;
|
|
580
|
+
}
|
|
581
|
+
await postJson(`${args.serverBaseUrl}/api/agent/run-finished`, {
|
|
582
|
+
userId: args.userId,
|
|
583
|
+
agentToken: args.agentToken,
|
|
584
|
+
chatId: args.task.chatId,
|
|
585
|
+
runId: args.task.id,
|
|
586
|
+
command: args.task.command,
|
|
587
|
+
status: args.task.status,
|
|
588
|
+
exitCode: args.task.resultExitCode,
|
|
589
|
+
signal: args.task.resultSignal,
|
|
590
|
+
finishedAt: args.task.finishedAt,
|
|
591
|
+
error: args.task.error,
|
|
592
|
+
});
|
|
593
|
+
}
|
|
594
|
+
async function handleRunRpcMessage(args) {
|
|
595
|
+
let requestId = "unknown";
|
|
596
|
+
let responseSubject = "";
|
|
597
|
+
try {
|
|
598
|
+
const payload = JSON.parse(runRpcCodec.decode(args.msg.data));
|
|
599
|
+
const request = normalizeRunRpcRequest({ request: payload, agentId: args.agentId });
|
|
600
|
+
requestId = request.requestId;
|
|
601
|
+
responseSubject = request.responseSubject;
|
|
602
|
+
if (request.action === "start") {
|
|
603
|
+
const task = await startManagedRun({
|
|
604
|
+
requestId,
|
|
605
|
+
runId: request.runId ?? requestId,
|
|
606
|
+
serverBaseUrl: args.serverBaseUrl,
|
|
607
|
+
userId: args.userId,
|
|
608
|
+
agentId: args.agentId,
|
|
609
|
+
command: request.command ?? "",
|
|
610
|
+
cwd: request.cwd,
|
|
611
|
+
chatId: request.chatId,
|
|
612
|
+
runtimeEnvPatch: request.runtimeEnvPatch,
|
|
613
|
+
codexAuthBundle: request.codexAuthBundle,
|
|
614
|
+
agentToken: args.agentToken,
|
|
615
|
+
});
|
|
616
|
+
publishRunRpcResponse({ nc: args.jetstream.nc, responseSubject, payload: { requestId, ok: true, task } });
|
|
617
|
+
return;
|
|
618
|
+
}
|
|
619
|
+
if (request.action === "list") {
|
|
620
|
+
const tasks = [...activeRuns.values()].map((entry) => cloneRunTask(entry.task));
|
|
621
|
+
const retained = [...retainedRuns.values()].filter((task) => !activeRuns.has(task.id)).map((task) => cloneRunTask(task));
|
|
622
|
+
const merged = [...tasks, ...retained]
|
|
623
|
+
.sort((a, b) => Date.parse(b.updatedAt) - Date.parse(a.updatedAt))
|
|
624
|
+
.slice(0, request.limit);
|
|
625
|
+
publishRunRpcResponse({ nc: args.jetstream.nc, responseSubject, payload: { requestId, ok: true, tasks: merged } });
|
|
626
|
+
return;
|
|
627
|
+
}
|
|
628
|
+
const stored = request.runId ? getStoredRun(request.runId) : null;
|
|
629
|
+
if (!stored || stored.agentId !== args.agentId || stored.userId !== args.userId) {
|
|
630
|
+
throw new Error("Run not found");
|
|
631
|
+
}
|
|
632
|
+
if (request.action === "cancel") {
|
|
633
|
+
const active = activeRuns.get(stored.id);
|
|
634
|
+
active?.requestCancel();
|
|
635
|
+
const task = cloneRunTask(active?.task ?? stored);
|
|
636
|
+
publishRunRpcResponse({ nc: args.jetstream.nc, responseSubject, payload: { requestId, ok: true, task } });
|
|
637
|
+
return;
|
|
638
|
+
}
|
|
639
|
+
const task = cloneRunTask(stored, request.sinceSeq);
|
|
640
|
+
publishRunRpcResponse({ nc: args.jetstream.nc, responseSubject, payload: { requestId, ok: true, task } });
|
|
641
|
+
}
|
|
642
|
+
catch (error) {
|
|
643
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
644
|
+
if (responseSubject) {
|
|
645
|
+
publishRunRpcResponse({
|
|
646
|
+
nc: args.jetstream.nc,
|
|
647
|
+
responseSubject,
|
|
648
|
+
payload: { requestId, ok: false, error: message },
|
|
649
|
+
});
|
|
650
|
+
}
|
|
651
|
+
writeAgentError(`run rpc failed requestId=${requestId} error=${message}`);
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
function subscribeToRunRpc(args) {
|
|
655
|
+
const subject = buildAgentRunRpcSubject(args.userId, args.agentId);
|
|
656
|
+
args.jetstream.nc.subscribe(subject, {
|
|
657
|
+
callback: (error, msg) => {
|
|
658
|
+
if (error) {
|
|
659
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
660
|
+
writeAgentError(`run rpc subscription error: ${message}`);
|
|
661
|
+
return;
|
|
662
|
+
}
|
|
663
|
+
void handleRunRpcMessage({
|
|
664
|
+
msg,
|
|
665
|
+
jetstream: args.jetstream,
|
|
666
|
+
serverBaseUrl: args.serverBaseUrl,
|
|
667
|
+
userId: args.userId,
|
|
668
|
+
agentId: args.agentId,
|
|
669
|
+
agentToken: args.agentToken,
|
|
670
|
+
});
|
|
671
|
+
},
|
|
672
|
+
});
|
|
673
|
+
writeAgentInfo(`run rpc subscribed subject=${subject}`);
|
|
674
|
+
}
|
|
374
675
|
function isLikelyNatsAuthError(error) {
|
|
375
676
|
const message = (error instanceof Error ? error.message : String(error)).toLowerCase();
|
|
376
677
|
return (message.includes("auth")
|
|
@@ -827,53 +1128,21 @@ async function handleShellRpcMessage(args) {
|
|
|
827
1128
|
requestId = request.requestId;
|
|
828
1129
|
responseSubject = request.responseSubject;
|
|
829
1130
|
const startedAtMs = Date.now();
|
|
830
|
-
const
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
...request.runtimeEnvPatch,
|
|
835
|
-
...(codexAuth?.envPatch ?? {}),
|
|
836
|
-
WORKSPACE: taskWorkspace,
|
|
837
|
-
};
|
|
838
|
-
const taskGitEnv = await prepareTaskGitEnv({
|
|
839
|
-
cwd: taskWorkspace,
|
|
840
|
-
baseEnvPatch: baseTaskEnvPatch,
|
|
1131
|
+
const prepared = await prepareCommandExecution({
|
|
1132
|
+
cwd: request.cwd,
|
|
1133
|
+
runtimeEnvPatch: request.runtimeEnvPatch,
|
|
1134
|
+
codexAuthBundle: request.codexAuthBundle,
|
|
841
1135
|
});
|
|
842
|
-
const
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
PATH: taskPath,
|
|
853
|
-
DOER_AGENT_TOKEN: args.agentToken,
|
|
854
|
-
},
|
|
855
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
856
|
-
})
|
|
857
|
-
: spawn(request.command ?? "", {
|
|
858
|
-
cwd: taskWorkspace,
|
|
859
|
-
shell: shellPath,
|
|
860
|
-
detached: process.platform !== "win32",
|
|
861
|
-
env: {
|
|
862
|
-
...process.env,
|
|
863
|
-
...baseTaskEnvPatch,
|
|
864
|
-
...taskGitEnv.envPatch,
|
|
865
|
-
PATH: taskPath,
|
|
866
|
-
DOER_AGENT_TOKEN: args.agentToken,
|
|
867
|
-
},
|
|
868
|
-
stdio: ["ignore", "pipe", "pipe"],
|
|
869
|
-
});
|
|
870
|
-
if (request.kind === "apply_patch") {
|
|
871
|
-
child.stdin?.write(request.patch ?? "");
|
|
872
|
-
child.stdin?.end();
|
|
873
|
-
}
|
|
874
|
-
writeRpcStatus(requestId, `started kind=${request.kind} cwd=${taskWorkspace} shell=${request.kind === "shell" ? shellPath : "apply_patch"}`);
|
|
875
|
-
child.stdout.setEncoding("utf8");
|
|
876
|
-
child.stderr.setEncoding("utf8");
|
|
1136
|
+
const child = spawnPreparedCommand({
|
|
1137
|
+
kind: request.kind,
|
|
1138
|
+
command: request.command,
|
|
1139
|
+
patch: request.patch,
|
|
1140
|
+
shellPath: prepared.shellPath,
|
|
1141
|
+
taskWorkspace: prepared.taskWorkspace,
|
|
1142
|
+
env: prepared.env,
|
|
1143
|
+
agentToken: args.agentToken,
|
|
1144
|
+
});
|
|
1145
|
+
writeRpcStatus(requestId, `started kind=${request.kind} cwd=${prepared.taskWorkspace} shell=${request.kind === "shell" ? prepared.shellPath : "apply_patch"}`);
|
|
877
1146
|
child.stdout.on("data", (chunk) => {
|
|
878
1147
|
stdout += chunk;
|
|
879
1148
|
writeRpcStream(requestId, "stdout", chunk);
|
|
@@ -899,6 +1168,7 @@ async function handleShellRpcMessage(args) {
|
|
|
899
1168
|
}).finally(() => {
|
|
900
1169
|
clearTimeout(timeout);
|
|
901
1170
|
});
|
|
1171
|
+
await prepared.codexAuthCleanup().catch(() => undefined);
|
|
902
1172
|
publishShellRpcResponse({
|
|
903
1173
|
nc: args.jetstream.nc,
|
|
904
1174
|
responseSubject,
|
|
@@ -1154,6 +1424,93 @@ async function prepareCodexAuthBundle(bundle) {
|
|
|
1154
1424
|
},
|
|
1155
1425
|
};
|
|
1156
1426
|
}
|
|
1427
|
+
async function prepareCommandExecution(args) {
|
|
1428
|
+
const shellPath = resolveShellPath();
|
|
1429
|
+
const taskWorkspace = resolveTaskWorkspace(args.cwd);
|
|
1430
|
+
const codexAuth = await prepareCodexAuthBundle(args.codexAuthBundle);
|
|
1431
|
+
const baseTaskEnvPatch = {
|
|
1432
|
+
...args.runtimeEnvPatch,
|
|
1433
|
+
...(codexAuth?.envPatch ?? {}),
|
|
1434
|
+
WORKSPACE: taskWorkspace,
|
|
1435
|
+
};
|
|
1436
|
+
const taskGitEnv = await prepareTaskGitEnv({
|
|
1437
|
+
cwd: taskWorkspace,
|
|
1438
|
+
baseEnvPatch: baseTaskEnvPatch,
|
|
1439
|
+
});
|
|
1440
|
+
const runtimeBinPath = path.join(AGENT_PROJECT_DIR, "runtime/bin");
|
|
1441
|
+
const taskPath = [runtimeBinPath, process.env.PATH || ""].filter(Boolean).join(path.delimiter);
|
|
1442
|
+
return {
|
|
1443
|
+
shellPath,
|
|
1444
|
+
taskWorkspace,
|
|
1445
|
+
taskPath,
|
|
1446
|
+
env: {
|
|
1447
|
+
...process.env,
|
|
1448
|
+
...baseTaskEnvPatch,
|
|
1449
|
+
...taskGitEnv.envPatch,
|
|
1450
|
+
PATH: taskPath,
|
|
1451
|
+
},
|
|
1452
|
+
taskGitMeta: taskGitEnv.meta ?? {},
|
|
1453
|
+
codexAuthMeta: codexAuth?.meta ?? { codexAuthSynced: false },
|
|
1454
|
+
codexAuthCleanup: codexAuth?.cleanup ?? (async () => { }),
|
|
1455
|
+
};
|
|
1456
|
+
}
|
|
1457
|
+
function spawnPreparedCommand(args) {
|
|
1458
|
+
const env = {
|
|
1459
|
+
...args.env,
|
|
1460
|
+
DOER_AGENT_TOKEN: args.agentToken,
|
|
1461
|
+
};
|
|
1462
|
+
const child = args.kind === "apply_patch"
|
|
1463
|
+
? spawn("apply_patch", {
|
|
1464
|
+
cwd: args.taskWorkspace,
|
|
1465
|
+
detached: process.platform !== "win32",
|
|
1466
|
+
env,
|
|
1467
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
1468
|
+
})
|
|
1469
|
+
: spawn(args.command ?? "", {
|
|
1470
|
+
cwd: args.taskWorkspace,
|
|
1471
|
+
shell: args.shellPath,
|
|
1472
|
+
detached: process.platform !== "win32",
|
|
1473
|
+
env,
|
|
1474
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
1475
|
+
});
|
|
1476
|
+
if (args.kind === "apply_patch") {
|
|
1477
|
+
child.stdin?.write(args.patch ?? "");
|
|
1478
|
+
child.stdin?.end();
|
|
1479
|
+
}
|
|
1480
|
+
child.stdout.setEncoding("utf8");
|
|
1481
|
+
child.stderr.setEncoding("utf8");
|
|
1482
|
+
return child;
|
|
1483
|
+
}
|
|
1484
|
+
function createManagedCancellation(child) {
|
|
1485
|
+
let cancelStage1Timer = null;
|
|
1486
|
+
let cancelStage2Timer = null;
|
|
1487
|
+
let cancelSignalSent = false;
|
|
1488
|
+
return {
|
|
1489
|
+
requestCancel: () => {
|
|
1490
|
+
if (cancelSignalSent) {
|
|
1491
|
+
return;
|
|
1492
|
+
}
|
|
1493
|
+
cancelSignalSent = true;
|
|
1494
|
+
sendSignalToTaskProcess(child, "SIGINT");
|
|
1495
|
+
cancelStage1Timer = setTimeout(() => {
|
|
1496
|
+
sendSignalToTaskProcess(child, "SIGTERM");
|
|
1497
|
+
}, 1200);
|
|
1498
|
+
cancelStage1Timer.unref?.();
|
|
1499
|
+
cancelStage2Timer = setTimeout(() => {
|
|
1500
|
+
sendSignalToTaskProcess(child, "SIGKILL");
|
|
1501
|
+
}, 3500);
|
|
1502
|
+
cancelStage2Timer.unref?.();
|
|
1503
|
+
},
|
|
1504
|
+
clear: () => {
|
|
1505
|
+
if (cancelStage1Timer) {
|
|
1506
|
+
clearTimeout(cancelStage1Timer);
|
|
1507
|
+
}
|
|
1508
|
+
if (cancelStage2Timer) {
|
|
1509
|
+
clearTimeout(cancelStage2Timer);
|
|
1510
|
+
}
|
|
1511
|
+
},
|
|
1512
|
+
};
|
|
1513
|
+
}
|
|
1157
1514
|
async function runTask(args) {
|
|
1158
1515
|
activeTaskLogContext = {
|
|
1159
1516
|
jetstream: args.jetstream,
|
|
@@ -1355,22 +1712,14 @@ async function connectBootstrapWithRetry(args) {
|
|
|
1355
1712
|
if (natsServers.length === 0) {
|
|
1356
1713
|
throw new Error("No NATS servers configured by server");
|
|
1357
1714
|
}
|
|
1358
|
-
const taskConfig = parseBootstrapTaskConfig(natsBootstrap.tasks);
|
|
1359
|
-
if (!taskConfig) {
|
|
1360
|
-
throw new Error("Invalid task dispatch config from server");
|
|
1361
|
-
}
|
|
1362
1715
|
const natsToken = normalizeNatsToken(natsBootstrap.auth);
|
|
1363
|
-
const pendingTaskIds = normalizeTaskIds(natsBootstrap.pendingTaskIds);
|
|
1364
1716
|
const jetstream = await initJetStreamContext({
|
|
1365
1717
|
userId: args.userId,
|
|
1366
1718
|
servers: natsServers,
|
|
1367
1719
|
token: natsToken,
|
|
1368
|
-
taskStream: taskConfig.stream,
|
|
1369
|
-
taskSubject: taskConfig.subject,
|
|
1370
|
-
taskDurable: taskConfig.durable,
|
|
1371
1720
|
});
|
|
1372
|
-
writeAgentInfraError(`bootstrap ok servers=${natsServers.length}
|
|
1373
|
-
return { natsBootstrap,
|
|
1721
|
+
writeAgentInfraError(`bootstrap ok servers=${natsServers.length} eventStream=${jetstream.stream} eventSubject=${jetstream.subject}`);
|
|
1722
|
+
return { natsBootstrap, jetstream };
|
|
1374
1723
|
}
|
|
1375
1724
|
catch (error) {
|
|
1376
1725
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -1396,12 +1745,11 @@ async function main() {
|
|
|
1396
1745
|
throw new Error("user-id and agent-secret are required");
|
|
1397
1746
|
}
|
|
1398
1747
|
const agentToken = agentSecret;
|
|
1399
|
-
|
|
1748
|
+
const { natsBootstrap, jetstream } = await connectBootstrapWithRetry({
|
|
1400
1749
|
serverBaseUrl,
|
|
1401
1750
|
userId,
|
|
1402
1751
|
agentToken,
|
|
1403
1752
|
});
|
|
1404
|
-
const maxConcurrency = Math.max(1, parseEnvInteger(process.env.DOER_AGENT_MAX_CONCURRENCY, 5));
|
|
1405
1753
|
const agentVersion = await resolveAgentVersion();
|
|
1406
1754
|
const initialAgentId = typeof natsBootstrap.agentId === "string" ? natsBootstrap.agentId : "";
|
|
1407
1755
|
if (!initialAgentId) {
|
|
@@ -1418,11 +1766,6 @@ async function main() {
|
|
|
1418
1766
|
process.stdout.write(`- natsStream: ${jetstream.stream}\n`);
|
|
1419
1767
|
process.stdout.write(`- natsSubject: ${jetstream.subject}\n`);
|
|
1420
1768
|
process.stdout.write(`- natsDurable: ${jetstream.durable}\n\n`);
|
|
1421
|
-
process.stdout.write(`- taskStream: ${jetstream.taskStream}\n`);
|
|
1422
|
-
process.stdout.write(`- taskSubject: ${jetstream.taskSubject}\n`);
|
|
1423
|
-
process.stdout.write(`- taskDurable: ${jetstream.taskDurable}\n`);
|
|
1424
|
-
process.stdout.write(`- pendingTasks: ${pendingTaskIds.length}\n`);
|
|
1425
|
-
process.stdout.write(`- maxConcurrency: ${maxConcurrency}\n\n`);
|
|
1426
1769
|
process.stdout.write(`- workspace: ${process.cwd()}\n\n`);
|
|
1427
1770
|
if (requestedServerBaseUrl !== serverBaseUrl) {
|
|
1428
1771
|
writeAgentInfo(`detected container runtime, server endpoint rewritten: ${requestedServerBaseUrl} -> ${serverBaseUrl}`);
|
|
@@ -1444,27 +1787,6 @@ async function main() {
|
|
|
1444
1787
|
heartbeatHealthy = false;
|
|
1445
1788
|
});
|
|
1446
1789
|
}, 10_000);
|
|
1447
|
-
const inFlightTasks = new Set();
|
|
1448
|
-
async function waitForAvailableSlot() {
|
|
1449
|
-
while (inFlightTasks.size >= maxConcurrency) {
|
|
1450
|
-
try {
|
|
1451
|
-
await Promise.race(inFlightTasks);
|
|
1452
|
-
}
|
|
1453
|
-
catch {
|
|
1454
|
-
// keep draining slots even when a task fails.
|
|
1455
|
-
}
|
|
1456
|
-
}
|
|
1457
|
-
}
|
|
1458
|
-
function trackInFlight(taskPromise) {
|
|
1459
|
-
inFlightTasks.add(taskPromise);
|
|
1460
|
-
void taskPromise.finally(() => {
|
|
1461
|
-
inFlightTasks.delete(taskPromise);
|
|
1462
|
-
});
|
|
1463
|
-
}
|
|
1464
|
-
function scheduleTask(taskPromiseFactory) {
|
|
1465
|
-
const taskPromise = taskPromiseFactory();
|
|
1466
|
-
trackInFlight(taskPromise);
|
|
1467
|
-
}
|
|
1468
1790
|
subscribeToFsRpc({
|
|
1469
1791
|
jetstream,
|
|
1470
1792
|
serverBaseUrl,
|
|
@@ -1478,176 +1800,16 @@ async function main() {
|
|
|
1478
1800
|
agentId: initialAgentId,
|
|
1479
1801
|
agentToken,
|
|
1480
1802
|
});
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
if (task) {
|
|
1492
|
-
await runClaimedTask({ task, serverBaseUrl, userId, agentToken, jetstream });
|
|
1493
|
-
}
|
|
1494
|
-
}
|
|
1495
|
-
catch (error) {
|
|
1496
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
1497
|
-
writeAgentError(`pending task bootstrap failed taskId=${pendingTaskId}: ${message}`);
|
|
1498
|
-
}
|
|
1499
|
-
});
|
|
1500
|
-
}
|
|
1501
|
-
let connected = false;
|
|
1502
|
-
while (true) {
|
|
1503
|
-
try {
|
|
1504
|
-
const consumer = await jetstream.js.consumers.get(jetstream.taskStream, jetstream.taskDurable);
|
|
1505
|
-
if (!connected) {
|
|
1506
|
-
writeAgentInfo(`connected to task stream (NATS ok) at=${formatLocalTimestamp()} userId=${userId}`);
|
|
1507
|
-
connected = true;
|
|
1508
|
-
}
|
|
1509
|
-
const messages = await consumer.fetch({ max_messages: 200, expires: 5_000 });
|
|
1510
|
-
for await (const msg of messages) {
|
|
1511
|
-
await waitForAvailableSlot();
|
|
1512
|
-
scheduleTask(async () => {
|
|
1513
|
-
let dispatch;
|
|
1514
|
-
try {
|
|
1515
|
-
dispatch = jetstream.taskCodec.decode(msg.data);
|
|
1516
|
-
}
|
|
1517
|
-
catch (error) {
|
|
1518
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
1519
|
-
writeAgentError(`task dispatch decode failed: ${message}`);
|
|
1520
|
-
msg.term();
|
|
1521
|
-
return;
|
|
1522
|
-
}
|
|
1523
|
-
writeAgentInfo(`task dispatch received taskId=${dispatch.taskId} createdAt=${dispatch.createdAt} subject=${jetstream.taskSubject} durable=${jetstream.taskDurable}`);
|
|
1524
|
-
const ackKeepAliveIntervalMs = 10_000;
|
|
1525
|
-
let ackKeepAliveTimer = null;
|
|
1526
|
-
const stopAckKeepAlive = () => {
|
|
1527
|
-
if (ackKeepAliveTimer) {
|
|
1528
|
-
clearInterval(ackKeepAliveTimer);
|
|
1529
|
-
ackKeepAliveTimer = null;
|
|
1530
|
-
}
|
|
1531
|
-
};
|
|
1532
|
-
try {
|
|
1533
|
-
ackKeepAliveTimer = setInterval(() => {
|
|
1534
|
-
try {
|
|
1535
|
-
msg.working();
|
|
1536
|
-
}
|
|
1537
|
-
catch (error) {
|
|
1538
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
1539
|
-
writeAgentError(`task dispatch keepalive failed taskId=${dispatch.taskId}: ${message}`);
|
|
1540
|
-
}
|
|
1541
|
-
}, ackKeepAliveIntervalMs);
|
|
1542
|
-
ackKeepAliveTimer.unref?.();
|
|
1543
|
-
if (dispatch.type === "cancel") {
|
|
1544
|
-
stopAckKeepAlive();
|
|
1545
|
-
const canceled = requestTaskCancellation(dispatch.taskId, "nats_dispatch");
|
|
1546
|
-
writeAgentInfo(`task cancel dispatch handled taskId=${dispatch.taskId} result=${canceled ? "signaled" : "not-running"}`);
|
|
1547
|
-
msg.ack();
|
|
1548
|
-
return;
|
|
1549
|
-
}
|
|
1550
|
-
const task = await claimTaskById({
|
|
1551
|
-
serverBaseUrl,
|
|
1552
|
-
userId,
|
|
1553
|
-
agentToken,
|
|
1554
|
-
taskId: dispatch.taskId,
|
|
1555
|
-
});
|
|
1556
|
-
if (!task) {
|
|
1557
|
-
stopAckKeepAlive();
|
|
1558
|
-
writeAgentInfo(`task dispatch acked without run taskId=${dispatch.taskId} reason=already-claimed`);
|
|
1559
|
-
msg.ack();
|
|
1560
|
-
return;
|
|
1561
|
-
}
|
|
1562
|
-
await runClaimedTask({ task, serverBaseUrl, userId, agentToken, jetstream });
|
|
1563
|
-
stopAckKeepAlive();
|
|
1564
|
-
msg.ack();
|
|
1565
|
-
writeAgentInfo(`task dispatch acked taskId=${dispatch.taskId}`);
|
|
1566
|
-
}
|
|
1567
|
-
catch (error) {
|
|
1568
|
-
stopAckKeepAlive();
|
|
1569
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
1570
|
-
writeAgentError(`task dispatch handle failed taskId=${dispatch.taskId}: ${message}`);
|
|
1571
|
-
writeAgentError(`task dispatch sending nak taskId=${dispatch.taskId}`);
|
|
1572
|
-
msg.nak();
|
|
1573
|
-
}
|
|
1574
|
-
});
|
|
1575
|
-
}
|
|
1576
|
-
}
|
|
1577
|
-
catch (error) {
|
|
1578
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
1579
|
-
if (connected) {
|
|
1580
|
-
writeAgentError(`task stream disconnected at=${formatLocalTimestamp()} reason=${message}`);
|
|
1581
|
-
}
|
|
1582
|
-
connected = false;
|
|
1583
|
-
if (isLikelyNatsAuthError(error)) {
|
|
1584
|
-
writeAgentError(`nats auth error detected. refreshing bootstrap credentials...`);
|
|
1585
|
-
}
|
|
1586
|
-
else if (isLikelyNatsReconnectError(error)) {
|
|
1587
|
-
writeAgentError(`nats connection lost. refreshing bootstrap/session...`);
|
|
1588
|
-
}
|
|
1589
|
-
else {
|
|
1590
|
-
writeAgentError(`task stream error detected. forcing bootstrap/session refresh... reason=${message}`);
|
|
1591
|
-
}
|
|
1592
|
-
if (inFlightTasks.size > 0) {
|
|
1593
|
-
writeAgentInfo(`waiting for in-flight tasks before reconnect count=${inFlightTasks.size}`);
|
|
1594
|
-
await Promise.allSettled(Array.from(inFlightTasks));
|
|
1595
|
-
}
|
|
1596
|
-
try {
|
|
1597
|
-
await jetstream.nc.close();
|
|
1598
|
-
}
|
|
1599
|
-
catch {
|
|
1600
|
-
// noop
|
|
1601
|
-
}
|
|
1602
|
-
const refreshed = await connectBootstrapWithRetry({
|
|
1603
|
-
serverBaseUrl,
|
|
1604
|
-
userId,
|
|
1605
|
-
agentToken,
|
|
1606
|
-
});
|
|
1607
|
-
natsBootstrap = refreshed.natsBootstrap;
|
|
1608
|
-
pendingTaskIds = refreshed.pendingTaskIds;
|
|
1609
|
-
jetstream = refreshed.jetstream;
|
|
1610
|
-
const refreshedAgentId = typeof natsBootstrap.agentId === "string" ? natsBootstrap.agentId : "";
|
|
1611
|
-
if (!refreshedAgentId) {
|
|
1612
|
-
throw new Error("agent id missing from refreshed bootstrap");
|
|
1613
|
-
}
|
|
1614
|
-
subscribeToFsRpc({
|
|
1615
|
-
jetstream,
|
|
1616
|
-
serverBaseUrl,
|
|
1617
|
-
userId,
|
|
1618
|
-
agentId: refreshedAgentId,
|
|
1619
|
-
agentToken,
|
|
1620
|
-
});
|
|
1621
|
-
subscribeToShellRpc({
|
|
1622
|
-
jetstream,
|
|
1623
|
-
userId,
|
|
1624
|
-
agentId: refreshedAgentId,
|
|
1625
|
-
agentToken,
|
|
1626
|
-
});
|
|
1627
|
-
for (const pendingTaskId of pendingTaskIds) {
|
|
1628
|
-
await waitForAvailableSlot();
|
|
1629
|
-
scheduleTask(async () => {
|
|
1630
|
-
try {
|
|
1631
|
-
const task = await claimTaskById({
|
|
1632
|
-
serverBaseUrl,
|
|
1633
|
-
userId,
|
|
1634
|
-
agentToken,
|
|
1635
|
-
taskId: pendingTaskId,
|
|
1636
|
-
});
|
|
1637
|
-
if (task) {
|
|
1638
|
-
await runClaimedTask({ task, serverBaseUrl, userId, agentToken, jetstream });
|
|
1639
|
-
}
|
|
1640
|
-
}
|
|
1641
|
-
catch (pendingError) {
|
|
1642
|
-
const pendingMessage = pendingError instanceof Error ? pendingError.message : String(pendingError);
|
|
1643
|
-
writeAgentError(`pending task refresh failed taskId=${pendingTaskId}: ${pendingMessage}`);
|
|
1644
|
-
}
|
|
1645
|
-
});
|
|
1646
|
-
}
|
|
1647
|
-
writeAgentInfo(`nats credentials refreshed at=${formatLocalTimestamp()} agentId=${typeof natsBootstrap.agentId === "string" ? natsBootstrap.agentId : "unknown"}`);
|
|
1648
|
-
continue;
|
|
1649
|
-
}
|
|
1650
|
-
}
|
|
1803
|
+
subscribeToRunRpc({
|
|
1804
|
+
jetstream,
|
|
1805
|
+
serverBaseUrl,
|
|
1806
|
+
userId,
|
|
1807
|
+
agentId: initialAgentId,
|
|
1808
|
+
agentToken,
|
|
1809
|
+
});
|
|
1810
|
+
await new Promise(() => {
|
|
1811
|
+
// Keep the long-lived agent process alive for RPC subscriptions and heartbeat.
|
|
1812
|
+
});
|
|
1651
1813
|
}
|
|
1652
1814
|
main().catch((error) => {
|
|
1653
1815
|
const message = error instanceof Error ? error.message : String(error);
|