doer-agent 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/agent.js +427 -265
  2. package/package.json +1 -1
package/dist/agent.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import { spawn, spawnSync } from "node:child_process";
2
- import { existsSync, statSync } from "node:fs";
2
+ import { createWriteStream, existsSync, statSync } from "node:fs";
3
3
  import { chmod, mkdir, open, readFile, readdir, stat, writeFile } from "node:fs/promises";
4
4
  import path from "node:path";
5
5
  import { fileURLToPath } from "node:url";
@@ -13,10 +13,16 @@ const activeTaskCancelRequests = new Map();
13
13
  let workspaceRootOverride = null;
14
14
  const fsRpcCodec = StringCodec();
15
15
  const shellRpcCodec = StringCodec();
16
+ const runRpcCodec = StringCodec();
17
+ const activeRuns = new Map();
18
+ const retainedRuns = new Map();
16
19
  function sanitizeUserId(userId) {
17
20
  const normalized = userId.trim().replace(/[^a-zA-Z0-9_-]/g, "_");
18
21
  return normalized.length > 0 ? normalized : "anonymous";
19
22
  }
23
+ function buildAgentRunRpcSubject(userId, agentId) {
24
+ return `doer.agent.run.rpc.${sanitizeUserId(userId)}.${agentId.trim()}`;
25
+ }
20
26
  function normalizeNatsServers(value) {
21
27
  if (!Array.isArray(value)) {
22
28
  return [];
@@ -92,12 +98,6 @@ async function initJetStreamContext(args) {
92
98
  const nc = await connect(args.token ? { servers: args.servers, token: args.token } : { servers: args.servers });
93
99
  const jsm = await nc.jetstreamManager();
94
100
  await ensureJetStreamInfra({ jsm, stream, subject, durable });
95
- await ensureJetStreamInfra({
96
- jsm,
97
- stream: args.taskStream,
98
- subject: args.taskSubject,
99
- durable: args.taskDurable,
100
- });
101
101
  void nc.closed().then((error) => {
102
102
  if (error) {
103
103
  writeAgentInfraError(`nats connection closed with error: ${error.message}`);
@@ -126,14 +126,10 @@ async function initJetStreamContext(args) {
126
126
  js: nc.jetstream(),
127
127
  jsm,
128
128
  codec: JSONCodec(),
129
- taskCodec: JSONCodec(),
130
129
  subject,
131
130
  stream,
132
131
  durable,
133
132
  servers: args.servers,
134
- taskStream: args.taskStream,
135
- taskSubject: args.taskSubject,
136
- taskDurable: args.taskDurable,
137
133
  };
138
134
  }
139
135
  function resolveCodexHomePath() {
@@ -371,6 +367,311 @@ function writeRpcStream(requestId, stream, chunk) {
371
367
  function writeRpcStatus(requestId, message) {
372
368
  process.stdout.write(`[doer-agent][rpc=${requestId}][status] ${message}\n`);
373
369
  }
370
+ function writeRunStatus(runId, message) {
371
+ process.stdout.write(`[doer-agent][run=${runId}][status] ${message}\n`);
372
+ }
373
+ function writeRunStream(runId, stream, chunk) {
374
+ const target = stream === "stdout" ? process.stdout : process.stderr;
375
+ const lines = chunk.split(/\r?\n/);
376
+ for (let index = 0; index < lines.length; index += 1) {
377
+ const line = lines[index];
378
+ if (!line && index === lines.length - 1) {
379
+ continue;
380
+ }
381
+ target.write(`[doer-agent][run=${runId}][${stream}] ${line}\n`);
382
+ }
383
+ }
384
+ function normalizeRunRpcRequest(args) {
385
+ const requestId = typeof args.request.requestId === "string" ? args.request.requestId.trim() : "";
386
+ if (!requestId) {
387
+ throw new Error("missing requestId");
388
+ }
389
+ const requestAgentId = typeof args.request.agentId === "string" ? args.request.agentId.trim() : "";
390
+ if (!requestAgentId || requestAgentId !== args.agentId) {
391
+ throw new Error("agent id mismatch");
392
+ }
393
+ const actionRaw = typeof args.request.action === "string" ? args.request.action.trim() : "";
394
+ const action = actionRaw === "cancel" || actionRaw === "get" || actionRaw === "list" ? actionRaw : "start";
395
+ const responseSubject = typeof args.request.responseSubject === "string" ? args.request.responseSubject.trim() : "";
396
+ if (!responseSubject) {
397
+ throw new Error("missing responseSubject");
398
+ }
399
+ const runId = typeof args.request.runId === "string" && args.request.runId.trim() ? args.request.runId.trim() : null;
400
+ const command = typeof args.request.command === "string" && args.request.command.trim() ? args.request.command.trim() : null;
401
+ if (action === "start" && !command) {
402
+ throw new Error("missing command");
403
+ }
404
+ if ((action === "get" || action === "cancel") && !runId) {
405
+ throw new Error("missing runId");
406
+ }
407
+ const cwd = typeof args.request.cwd === "string" && args.request.cwd.trim() ? args.request.cwd.trim() : null;
408
+ const chatId = typeof args.request.chatId === "string" && args.request.chatId.trim() ? args.request.chatId.trim() : null;
409
+ const sinceSeqRaw = Number(args.request.sinceSeq);
410
+ const sinceSeq = Number.isInteger(sinceSeqRaw) && sinceSeqRaw >= 0 ? sinceSeqRaw : null;
411
+ const limitRaw = Number(args.request.limit);
412
+ const limit = Number.isFinite(limitRaw) ? Math.max(1, Math.min(Math.floor(limitRaw), 200)) : 50;
413
+ return {
414
+ requestId,
415
+ action,
416
+ runId,
417
+ command,
418
+ cwd,
419
+ chatId,
420
+ responseSubject,
421
+ sinceSeq,
422
+ limit,
423
+ runtimeEnvPatch: normalizeEnvPatch(args.request.runtimeEnvPatch),
424
+ codexAuthBundle: normalizeShellRpcCodexAuthBundle(args.request.codexAuth),
425
+ };
426
+ }
427
+ function publishRunRpcResponse(args) {
428
+ args.nc.publish(args.responseSubject, runRpcCodec.encode(JSON.stringify(args.payload)));
429
+ }
430
+ async function resolveRunLogsDir() {
431
+ const workspaceRoot = workspaceRootOverride ?? (process.env.WORKSPACE?.trim() || process.cwd());
432
+ const dir = path.join(workspaceRoot, ".doer-agent", "runs");
433
+ await mkdir(dir, { recursive: true });
434
+ return dir;
435
+ }
436
+ function cloneRunTask(task, sinceSeq) {
437
+ return {
438
+ ...task,
439
+ events: task.events
440
+ .filter((event) => typeof sinceSeq === "number" ? event.seq > sinceSeq : true)
441
+ .map((event) => ({ ...event, payload: { ...event.payload } })),
442
+ };
443
+ }
444
+ function appendRunEvent(task, type, payload) {
445
+ const timestamp = formatLocalTimestamp();
446
+ const seq = task.agentEventAckSeq + 1;
447
+ task.agentEventAckSeq = seq;
448
+ task.updatedAt = timestamp;
449
+ task.events.push({ seq, type, timestamp, payload });
450
+ }
451
+ function persistRetainedRun(task) {
452
+ retainedRuns.set(task.id, cloneRunTask(task));
453
+ }
454
+ function getStoredRun(runId) {
455
+ const active = activeRuns.get(runId);
456
+ if (active) {
457
+ return active.task;
458
+ }
459
+ return retainedRuns.get(runId) ?? null;
460
+ }
461
+ async function startManagedRun(args) {
462
+ const prepared = await prepareCommandExecution({
463
+ cwd: args.cwd,
464
+ runtimeEnvPatch: args.runtimeEnvPatch,
465
+ codexAuthBundle: args.codexAuthBundle,
466
+ });
467
+ const child = spawnPreparedCommand({
468
+ kind: "shell",
469
+ command: args.command,
470
+ patch: null,
471
+ shellPath: prepared.shellPath,
472
+ taskWorkspace: prepared.taskWorkspace,
473
+ env: prepared.env,
474
+ agentToken: args.agentToken,
475
+ });
476
+ const logsDir = await resolveRunLogsDir();
477
+ const logPath = path.join(logsDir, `${args.runId}.log`);
478
+ const logStream = createWriteStream(logPath, { flags: "a", encoding: "utf8" });
479
+ const now = formatLocalTimestamp();
480
+ const task = {
481
+ id: args.runId,
482
+ userId: args.userId,
483
+ agentId: args.agentId,
484
+ command: args.command,
485
+ cwd: args.cwd,
486
+ chatId: args.chatId,
487
+ status: "running",
488
+ cancelRequested: false,
489
+ resultExitCode: null,
490
+ resultSignal: null,
491
+ error: null,
492
+ createdAt: now,
493
+ updatedAt: now,
494
+ startedAt: now,
495
+ finishedAt: null,
496
+ agentEventAckSeq: 0,
497
+ events: [],
498
+ };
499
+ appendRunEvent(task, "meta", {
500
+ host: process.platform,
501
+ pid: child.pid ?? null,
502
+ startedAt: now,
503
+ command: args.command,
504
+ cwd: prepared.taskWorkspace,
505
+ requestedCwd: args.cwd,
506
+ shell: prepared.shellPath,
507
+ logPath,
508
+ ...prepared.taskGitMeta,
509
+ ...prepared.codexAuthMeta,
510
+ });
511
+ appendRunEvent(task, "status", { status: "running" });
512
+ const cancellation = createManagedCancellation(child);
513
+ const requestCancel = () => {
514
+ if (task.status === "completed" || task.status === "failed" || task.status === "canceled") {
515
+ return;
516
+ }
517
+ task.cancelRequested = true;
518
+ task.updatedAt = formatLocalTimestamp();
519
+ writeRunStatus(task.id, "cancel requested");
520
+ cancellation.requestCancel();
521
+ };
522
+ const recordChunk = (stream, chunk) => {
523
+ appendRunEvent(task, stream, { chunk, at: formatLocalTimestamp() });
524
+ logStream.write(JSON.stringify({ at: formatLocalTimestamp(), stream, chunk }) + "\n");
525
+ writeRunStream(task.id, stream, chunk);
526
+ };
527
+ child.stdout.on("data", (chunk) => recordChunk("stdout", chunk));
528
+ child.stderr.on("data", (chunk) => recordChunk("stderr", chunk));
529
+ child.once("error", (error) => {
530
+ const message = error instanceof Error ? error.message : String(error);
531
+ task.status = "failed";
532
+ task.error = message;
533
+ task.finishedAt = formatLocalTimestamp();
534
+ appendRunEvent(task, "status", { status: "failed", error: message, finishedAt: task.finishedAt });
535
+ persistRetainedRun(task);
536
+ activeRuns.delete(task.id);
537
+ logStream.end();
538
+ void prepared.codexAuthCleanup().catch(() => undefined);
539
+ writeRunStatus(task.id, `failed error=${message}`);
540
+ });
541
+ child.once("close", (code, signal) => {
542
+ cancellation.clear();
543
+ task.resultExitCode = typeof code === "number" ? code : null;
544
+ task.resultSignal = signal;
545
+ task.finishedAt = formatLocalTimestamp();
546
+ task.status = task.cancelRequested ? "canceled" : (task.resultExitCode ?? 1) === 0 ? "completed" : "failed";
547
+ task.error = task.status === "failed" ? `Command exited with code ${task.resultExitCode ?? "null"}` : null;
548
+ appendRunEvent(task, "status", {
549
+ status: task.status,
550
+ exitCode: task.resultExitCode,
551
+ signal: task.resultSignal,
552
+ error: task.error,
553
+ finishedAt: task.finishedAt,
554
+ });
555
+ persistRetainedRun(task);
556
+ activeRuns.delete(task.id);
557
+ logStream.end();
558
+ void prepared.codexAuthCleanup().catch(() => undefined);
559
+ if ((task.status === "completed" || task.status === "failed") && task.chatId) {
560
+ void notifyServerRunFinished({
561
+ serverBaseUrl: args.serverBaseUrl,
562
+ userId: args.userId,
563
+ agentToken: args.agentToken,
564
+ task,
565
+ }).catch((error) => {
566
+ const message = error instanceof Error ? error.message : String(error);
567
+ writeAgentInfraError(`run completion notify failed runId=${task.id}: ${message}`);
568
+ });
569
+ }
570
+ writeRunStatus(task.id, `completed status=${task.status} exitCode=${task.resultExitCode ?? "null"} signal=${task.resultSignal ?? "null"}`);
571
+ });
572
+ activeRuns.set(task.id, { task, child, logPath, logStream, requestCancel });
573
+ persistRetainedRun(task);
574
+ writeRunStatus(task.id, `started requestId=${args.requestId} cwd=${prepared.taskWorkspace}`);
575
+ return cloneRunTask(task);
576
+ }
577
+ async function notifyServerRunFinished(args) {
578
+ if (!args.task.chatId || (args.task.status !== "completed" && args.task.status !== "failed")) {
579
+ return;
580
+ }
581
+ await postJson(`${args.serverBaseUrl}/api/agent/run-finished`, {
582
+ userId: args.userId,
583
+ agentToken: args.agentToken,
584
+ chatId: args.task.chatId,
585
+ runId: args.task.id,
586
+ command: args.task.command,
587
+ status: args.task.status,
588
+ exitCode: args.task.resultExitCode,
589
+ signal: args.task.resultSignal,
590
+ finishedAt: args.task.finishedAt,
591
+ error: args.task.error,
592
+ });
593
+ }
594
+ async function handleRunRpcMessage(args) {
595
+ let requestId = "unknown";
596
+ let responseSubject = "";
597
+ try {
598
+ const payload = JSON.parse(runRpcCodec.decode(args.msg.data));
599
+ const request = normalizeRunRpcRequest({ request: payload, agentId: args.agentId });
600
+ requestId = request.requestId;
601
+ responseSubject = request.responseSubject;
602
+ if (request.action === "start") {
603
+ const task = await startManagedRun({
604
+ requestId,
605
+ runId: request.runId ?? requestId,
606
+ serverBaseUrl: args.serverBaseUrl,
607
+ userId: args.userId,
608
+ agentId: args.agentId,
609
+ command: request.command ?? "",
610
+ cwd: request.cwd,
611
+ chatId: request.chatId,
612
+ runtimeEnvPatch: request.runtimeEnvPatch,
613
+ codexAuthBundle: request.codexAuthBundle,
614
+ agentToken: args.agentToken,
615
+ });
616
+ publishRunRpcResponse({ nc: args.jetstream.nc, responseSubject, payload: { requestId, ok: true, task } });
617
+ return;
618
+ }
619
+ if (request.action === "list") {
620
+ const tasks = [...activeRuns.values()].map((entry) => cloneRunTask(entry.task));
621
+ const retained = [...retainedRuns.values()].filter((task) => !activeRuns.has(task.id)).map((task) => cloneRunTask(task));
622
+ const merged = [...tasks, ...retained]
623
+ .sort((a, b) => Date.parse(b.updatedAt) - Date.parse(a.updatedAt))
624
+ .slice(0, request.limit);
625
+ publishRunRpcResponse({ nc: args.jetstream.nc, responseSubject, payload: { requestId, ok: true, tasks: merged } });
626
+ return;
627
+ }
628
+ const stored = request.runId ? getStoredRun(request.runId) : null;
629
+ if (!stored || stored.agentId !== args.agentId || stored.userId !== args.userId) {
630
+ throw new Error("Run not found");
631
+ }
632
+ if (request.action === "cancel") {
633
+ const active = activeRuns.get(stored.id);
634
+ active?.requestCancel();
635
+ const task = cloneRunTask(active?.task ?? stored);
636
+ publishRunRpcResponse({ nc: args.jetstream.nc, responseSubject, payload: { requestId, ok: true, task } });
637
+ return;
638
+ }
639
+ const task = cloneRunTask(stored, request.sinceSeq);
640
+ publishRunRpcResponse({ nc: args.jetstream.nc, responseSubject, payload: { requestId, ok: true, task } });
641
+ }
642
+ catch (error) {
643
+ const message = error instanceof Error ? error.message : String(error);
644
+ if (responseSubject) {
645
+ publishRunRpcResponse({
646
+ nc: args.jetstream.nc,
647
+ responseSubject,
648
+ payload: { requestId, ok: false, error: message },
649
+ });
650
+ }
651
+ writeAgentError(`run rpc failed requestId=${requestId} error=${message}`);
652
+ }
653
+ }
654
+ function subscribeToRunRpc(args) {
655
+ const subject = buildAgentRunRpcSubject(args.userId, args.agentId);
656
+ args.jetstream.nc.subscribe(subject, {
657
+ callback: (error, msg) => {
658
+ if (error) {
659
+ const message = error instanceof Error ? error.message : String(error);
660
+ writeAgentError(`run rpc subscription error: ${message}`);
661
+ return;
662
+ }
663
+ void handleRunRpcMessage({
664
+ msg,
665
+ jetstream: args.jetstream,
666
+ serverBaseUrl: args.serverBaseUrl,
667
+ userId: args.userId,
668
+ agentId: args.agentId,
669
+ agentToken: args.agentToken,
670
+ });
671
+ },
672
+ });
673
+ writeAgentInfo(`run rpc subscribed subject=${subject}`);
674
+ }
374
675
  function isLikelyNatsAuthError(error) {
375
676
  const message = (error instanceof Error ? error.message : String(error)).toLowerCase();
376
677
  return (message.includes("auth")
@@ -827,53 +1128,21 @@ async function handleShellRpcMessage(args) {
827
1128
  requestId = request.requestId;
828
1129
  responseSubject = request.responseSubject;
829
1130
  const startedAtMs = Date.now();
830
- const shellPath = resolveShellPath();
831
- const taskWorkspace = resolveTaskWorkspace(request.cwd);
832
- const codexAuth = await prepareCodexAuthBundle(request.codexAuthBundle);
833
- const baseTaskEnvPatch = {
834
- ...request.runtimeEnvPatch,
835
- ...(codexAuth?.envPatch ?? {}),
836
- WORKSPACE: taskWorkspace,
837
- };
838
- const taskGitEnv = await prepareTaskGitEnv({
839
- cwd: taskWorkspace,
840
- baseEnvPatch: baseTaskEnvPatch,
1131
+ const prepared = await prepareCommandExecution({
1132
+ cwd: request.cwd,
1133
+ runtimeEnvPatch: request.runtimeEnvPatch,
1134
+ codexAuthBundle: request.codexAuthBundle,
841
1135
  });
842
- const runtimeBinPath = path.join(AGENT_PROJECT_DIR, "runtime/bin");
843
- const taskPath = [runtimeBinPath, process.env.PATH || ""].filter(Boolean).join(path.delimiter);
844
- const child = request.kind === "apply_patch"
845
- ? spawn("apply_patch", {
846
- cwd: taskWorkspace,
847
- detached: process.platform !== "win32",
848
- env: {
849
- ...process.env,
850
- ...baseTaskEnvPatch,
851
- ...taskGitEnv.envPatch,
852
- PATH: taskPath,
853
- DOER_AGENT_TOKEN: args.agentToken,
854
- },
855
- stdio: ["pipe", "pipe", "pipe"],
856
- })
857
- : spawn(request.command ?? "", {
858
- cwd: taskWorkspace,
859
- shell: shellPath,
860
- detached: process.platform !== "win32",
861
- env: {
862
- ...process.env,
863
- ...baseTaskEnvPatch,
864
- ...taskGitEnv.envPatch,
865
- PATH: taskPath,
866
- DOER_AGENT_TOKEN: args.agentToken,
867
- },
868
- stdio: ["ignore", "pipe", "pipe"],
869
- });
870
- if (request.kind === "apply_patch") {
871
- child.stdin?.write(request.patch ?? "");
872
- child.stdin?.end();
873
- }
874
- writeRpcStatus(requestId, `started kind=${request.kind} cwd=${taskWorkspace} shell=${request.kind === "shell" ? shellPath : "apply_patch"}`);
875
- child.stdout.setEncoding("utf8");
876
- child.stderr.setEncoding("utf8");
1136
+ const child = spawnPreparedCommand({
1137
+ kind: request.kind,
1138
+ command: request.command,
1139
+ patch: request.patch,
1140
+ shellPath: prepared.shellPath,
1141
+ taskWorkspace: prepared.taskWorkspace,
1142
+ env: prepared.env,
1143
+ agentToken: args.agentToken,
1144
+ });
1145
+ writeRpcStatus(requestId, `started kind=${request.kind} cwd=${prepared.taskWorkspace} shell=${request.kind === "shell" ? prepared.shellPath : "apply_patch"}`);
877
1146
  child.stdout.on("data", (chunk) => {
878
1147
  stdout += chunk;
879
1148
  writeRpcStream(requestId, "stdout", chunk);
@@ -899,6 +1168,7 @@ async function handleShellRpcMessage(args) {
899
1168
  }).finally(() => {
900
1169
  clearTimeout(timeout);
901
1170
  });
1171
+ await prepared.codexAuthCleanup().catch(() => undefined);
902
1172
  publishShellRpcResponse({
903
1173
  nc: args.jetstream.nc,
904
1174
  responseSubject,
@@ -1154,6 +1424,93 @@ async function prepareCodexAuthBundle(bundle) {
1154
1424
  },
1155
1425
  };
1156
1426
  }
1427
+ async function prepareCommandExecution(args) {
1428
+ const shellPath = resolveShellPath();
1429
+ const taskWorkspace = resolveTaskWorkspace(args.cwd);
1430
+ const codexAuth = await prepareCodexAuthBundle(args.codexAuthBundle);
1431
+ const baseTaskEnvPatch = {
1432
+ ...args.runtimeEnvPatch,
1433
+ ...(codexAuth?.envPatch ?? {}),
1434
+ WORKSPACE: taskWorkspace,
1435
+ };
1436
+ const taskGitEnv = await prepareTaskGitEnv({
1437
+ cwd: taskWorkspace,
1438
+ baseEnvPatch: baseTaskEnvPatch,
1439
+ });
1440
+ const runtimeBinPath = path.join(AGENT_PROJECT_DIR, "runtime/bin");
1441
+ const taskPath = [runtimeBinPath, process.env.PATH || ""].filter(Boolean).join(path.delimiter);
1442
+ return {
1443
+ shellPath,
1444
+ taskWorkspace,
1445
+ taskPath,
1446
+ env: {
1447
+ ...process.env,
1448
+ ...baseTaskEnvPatch,
1449
+ ...taskGitEnv.envPatch,
1450
+ PATH: taskPath,
1451
+ },
1452
+ taskGitMeta: taskGitEnv.meta ?? {},
1453
+ codexAuthMeta: codexAuth?.meta ?? { codexAuthSynced: false },
1454
+ codexAuthCleanup: codexAuth?.cleanup ?? (async () => { }),
1455
+ };
1456
+ }
1457
+ function spawnPreparedCommand(args) {
1458
+ const env = {
1459
+ ...args.env,
1460
+ DOER_AGENT_TOKEN: args.agentToken,
1461
+ };
1462
+ const child = args.kind === "apply_patch"
1463
+ ? spawn("apply_patch", {
1464
+ cwd: args.taskWorkspace,
1465
+ detached: process.platform !== "win32",
1466
+ env,
1467
+ stdio: ["pipe", "pipe", "pipe"],
1468
+ })
1469
+ : spawn(args.command ?? "", {
1470
+ cwd: args.taskWorkspace,
1471
+ shell: args.shellPath,
1472
+ detached: process.platform !== "win32",
1473
+ env,
1474
+ stdio: ["ignore", "pipe", "pipe"],
1475
+ });
1476
+ if (args.kind === "apply_patch") {
1477
+ child.stdin?.write(args.patch ?? "");
1478
+ child.stdin?.end();
1479
+ }
1480
+ child.stdout.setEncoding("utf8");
1481
+ child.stderr.setEncoding("utf8");
1482
+ return child;
1483
+ }
1484
+ function createManagedCancellation(child) {
1485
+ let cancelStage1Timer = null;
1486
+ let cancelStage2Timer = null;
1487
+ let cancelSignalSent = false;
1488
+ return {
1489
+ requestCancel: () => {
1490
+ if (cancelSignalSent) {
1491
+ return;
1492
+ }
1493
+ cancelSignalSent = true;
1494
+ sendSignalToTaskProcess(child, "SIGINT");
1495
+ cancelStage1Timer = setTimeout(() => {
1496
+ sendSignalToTaskProcess(child, "SIGTERM");
1497
+ }, 1200);
1498
+ cancelStage1Timer.unref?.();
1499
+ cancelStage2Timer = setTimeout(() => {
1500
+ sendSignalToTaskProcess(child, "SIGKILL");
1501
+ }, 3500);
1502
+ cancelStage2Timer.unref?.();
1503
+ },
1504
+ clear: () => {
1505
+ if (cancelStage1Timer) {
1506
+ clearTimeout(cancelStage1Timer);
1507
+ }
1508
+ if (cancelStage2Timer) {
1509
+ clearTimeout(cancelStage2Timer);
1510
+ }
1511
+ },
1512
+ };
1513
+ }
1157
1514
  async function runTask(args) {
1158
1515
  activeTaskLogContext = {
1159
1516
  jetstream: args.jetstream,
@@ -1355,22 +1712,14 @@ async function connectBootstrapWithRetry(args) {
1355
1712
  if (natsServers.length === 0) {
1356
1713
  throw new Error("No NATS servers configured by server");
1357
1714
  }
1358
- const taskConfig = parseBootstrapTaskConfig(natsBootstrap.tasks);
1359
- if (!taskConfig) {
1360
- throw new Error("Invalid task dispatch config from server");
1361
- }
1362
1715
  const natsToken = normalizeNatsToken(natsBootstrap.auth);
1363
- const pendingTaskIds = normalizeTaskIds(natsBootstrap.pendingTaskIds);
1364
1716
  const jetstream = await initJetStreamContext({
1365
1717
  userId: args.userId,
1366
1718
  servers: natsServers,
1367
1719
  token: natsToken,
1368
- taskStream: taskConfig.stream,
1369
- taskSubject: taskConfig.subject,
1370
- taskDurable: taskConfig.durable,
1371
1720
  });
1372
- writeAgentInfraError(`bootstrap ok servers=${natsServers.length} taskStream=${taskConfig.stream} taskSubject=${taskConfig.subject} taskDurable=${taskConfig.durable}`);
1373
- return { natsBootstrap, pendingTaskIds, jetstream };
1721
+ writeAgentInfraError(`bootstrap ok servers=${natsServers.length} eventStream=${jetstream.stream} eventSubject=${jetstream.subject}`);
1722
+ return { natsBootstrap, jetstream };
1374
1723
  }
1375
1724
  catch (error) {
1376
1725
  const message = error instanceof Error ? error.message : String(error);
@@ -1396,12 +1745,11 @@ async function main() {
1396
1745
  throw new Error("user-id and agent-secret are required");
1397
1746
  }
1398
1747
  const agentToken = agentSecret;
1399
- let { natsBootstrap, pendingTaskIds, jetstream } = await connectBootstrapWithRetry({
1748
+ const { natsBootstrap, jetstream } = await connectBootstrapWithRetry({
1400
1749
  serverBaseUrl,
1401
1750
  userId,
1402
1751
  agentToken,
1403
1752
  });
1404
- const maxConcurrency = Math.max(1, parseEnvInteger(process.env.DOER_AGENT_MAX_CONCURRENCY, 5));
1405
1753
  const agentVersion = await resolveAgentVersion();
1406
1754
  const initialAgentId = typeof natsBootstrap.agentId === "string" ? natsBootstrap.agentId : "";
1407
1755
  if (!initialAgentId) {
@@ -1418,11 +1766,6 @@ async function main() {
1418
1766
  process.stdout.write(`- natsStream: ${jetstream.stream}\n`);
1419
1767
  process.stdout.write(`- natsSubject: ${jetstream.subject}\n`);
1420
1768
  process.stdout.write(`- natsDurable: ${jetstream.durable}\n\n`);
1421
- process.stdout.write(`- taskStream: ${jetstream.taskStream}\n`);
1422
- process.stdout.write(`- taskSubject: ${jetstream.taskSubject}\n`);
1423
- process.stdout.write(`- taskDurable: ${jetstream.taskDurable}\n`);
1424
- process.stdout.write(`- pendingTasks: ${pendingTaskIds.length}\n`);
1425
- process.stdout.write(`- maxConcurrency: ${maxConcurrency}\n\n`);
1426
1769
  process.stdout.write(`- workspace: ${process.cwd()}\n\n`);
1427
1770
  if (requestedServerBaseUrl !== serverBaseUrl) {
1428
1771
  writeAgentInfo(`detected container runtime, server endpoint rewritten: ${requestedServerBaseUrl} -> ${serverBaseUrl}`);
@@ -1444,27 +1787,6 @@ async function main() {
1444
1787
  heartbeatHealthy = false;
1445
1788
  });
1446
1789
  }, 10_000);
1447
- const inFlightTasks = new Set();
1448
- async function waitForAvailableSlot() {
1449
- while (inFlightTasks.size >= maxConcurrency) {
1450
- try {
1451
- await Promise.race(inFlightTasks);
1452
- }
1453
- catch {
1454
- // keep draining slots even when a task fails.
1455
- }
1456
- }
1457
- }
1458
- function trackInFlight(taskPromise) {
1459
- inFlightTasks.add(taskPromise);
1460
- void taskPromise.finally(() => {
1461
- inFlightTasks.delete(taskPromise);
1462
- });
1463
- }
1464
- function scheduleTask(taskPromiseFactory) {
1465
- const taskPromise = taskPromiseFactory();
1466
- trackInFlight(taskPromise);
1467
- }
1468
1790
  subscribeToFsRpc({
1469
1791
  jetstream,
1470
1792
  serverBaseUrl,
@@ -1478,176 +1800,16 @@ async function main() {
1478
1800
  agentId: initialAgentId,
1479
1801
  agentToken,
1480
1802
  });
1481
- for (const pendingTaskId of pendingTaskIds) {
1482
- await waitForAvailableSlot();
1483
- scheduleTask(async () => {
1484
- try {
1485
- const task = await claimTaskById({
1486
- serverBaseUrl,
1487
- userId,
1488
- agentToken,
1489
- taskId: pendingTaskId,
1490
- });
1491
- if (task) {
1492
- await runClaimedTask({ task, serverBaseUrl, userId, agentToken, jetstream });
1493
- }
1494
- }
1495
- catch (error) {
1496
- const message = error instanceof Error ? error.message : String(error);
1497
- writeAgentError(`pending task bootstrap failed taskId=${pendingTaskId}: ${message}`);
1498
- }
1499
- });
1500
- }
1501
- let connected = false;
1502
- while (true) {
1503
- try {
1504
- const consumer = await jetstream.js.consumers.get(jetstream.taskStream, jetstream.taskDurable);
1505
- if (!connected) {
1506
- writeAgentInfo(`connected to task stream (NATS ok) at=${formatLocalTimestamp()} userId=${userId}`);
1507
- connected = true;
1508
- }
1509
- const messages = await consumer.fetch({ max_messages: 200, expires: 5_000 });
1510
- for await (const msg of messages) {
1511
- await waitForAvailableSlot();
1512
- scheduleTask(async () => {
1513
- let dispatch;
1514
- try {
1515
- dispatch = jetstream.taskCodec.decode(msg.data);
1516
- }
1517
- catch (error) {
1518
- const message = error instanceof Error ? error.message : String(error);
1519
- writeAgentError(`task dispatch decode failed: ${message}`);
1520
- msg.term();
1521
- return;
1522
- }
1523
- writeAgentInfo(`task dispatch received taskId=${dispatch.taskId} createdAt=${dispatch.createdAt} subject=${jetstream.taskSubject} durable=${jetstream.taskDurable}`);
1524
- const ackKeepAliveIntervalMs = 10_000;
1525
- let ackKeepAliveTimer = null;
1526
- const stopAckKeepAlive = () => {
1527
- if (ackKeepAliveTimer) {
1528
- clearInterval(ackKeepAliveTimer);
1529
- ackKeepAliveTimer = null;
1530
- }
1531
- };
1532
- try {
1533
- ackKeepAliveTimer = setInterval(() => {
1534
- try {
1535
- msg.working();
1536
- }
1537
- catch (error) {
1538
- const message = error instanceof Error ? error.message : String(error);
1539
- writeAgentError(`task dispatch keepalive failed taskId=${dispatch.taskId}: ${message}`);
1540
- }
1541
- }, ackKeepAliveIntervalMs);
1542
- ackKeepAliveTimer.unref?.();
1543
- if (dispatch.type === "cancel") {
1544
- stopAckKeepAlive();
1545
- const canceled = requestTaskCancellation(dispatch.taskId, "nats_dispatch");
1546
- writeAgentInfo(`task cancel dispatch handled taskId=${dispatch.taskId} result=${canceled ? "signaled" : "not-running"}`);
1547
- msg.ack();
1548
- return;
1549
- }
1550
- const task = await claimTaskById({
1551
- serverBaseUrl,
1552
- userId,
1553
- agentToken,
1554
- taskId: dispatch.taskId,
1555
- });
1556
- if (!task) {
1557
- stopAckKeepAlive();
1558
- writeAgentInfo(`task dispatch acked without run taskId=${dispatch.taskId} reason=already-claimed`);
1559
- msg.ack();
1560
- return;
1561
- }
1562
- await runClaimedTask({ task, serverBaseUrl, userId, agentToken, jetstream });
1563
- stopAckKeepAlive();
1564
- msg.ack();
1565
- writeAgentInfo(`task dispatch acked taskId=${dispatch.taskId}`);
1566
- }
1567
- catch (error) {
1568
- stopAckKeepAlive();
1569
- const message = error instanceof Error ? error.message : String(error);
1570
- writeAgentError(`task dispatch handle failed taskId=${dispatch.taskId}: ${message}`);
1571
- writeAgentError(`task dispatch sending nak taskId=${dispatch.taskId}`);
1572
- msg.nak();
1573
- }
1574
- });
1575
- }
1576
- }
1577
- catch (error) {
1578
- const message = error instanceof Error ? error.message : String(error);
1579
- if (connected) {
1580
- writeAgentError(`task stream disconnected at=${formatLocalTimestamp()} reason=${message}`);
1581
- }
1582
- connected = false;
1583
- if (isLikelyNatsAuthError(error)) {
1584
- writeAgentError(`nats auth error detected. refreshing bootstrap credentials...`);
1585
- }
1586
- else if (isLikelyNatsReconnectError(error)) {
1587
- writeAgentError(`nats connection lost. refreshing bootstrap/session...`);
1588
- }
1589
- else {
1590
- writeAgentError(`task stream error detected. forcing bootstrap/session refresh... reason=${message}`);
1591
- }
1592
- if (inFlightTasks.size > 0) {
1593
- writeAgentInfo(`waiting for in-flight tasks before reconnect count=${inFlightTasks.size}`);
1594
- await Promise.allSettled(Array.from(inFlightTasks));
1595
- }
1596
- try {
1597
- await jetstream.nc.close();
1598
- }
1599
- catch {
1600
- // noop
1601
- }
1602
- const refreshed = await connectBootstrapWithRetry({
1603
- serverBaseUrl,
1604
- userId,
1605
- agentToken,
1606
- });
1607
- natsBootstrap = refreshed.natsBootstrap;
1608
- pendingTaskIds = refreshed.pendingTaskIds;
1609
- jetstream = refreshed.jetstream;
1610
- const refreshedAgentId = typeof natsBootstrap.agentId === "string" ? natsBootstrap.agentId : "";
1611
- if (!refreshedAgentId) {
1612
- throw new Error("agent id missing from refreshed bootstrap");
1613
- }
1614
- subscribeToFsRpc({
1615
- jetstream,
1616
- serverBaseUrl,
1617
- userId,
1618
- agentId: refreshedAgentId,
1619
- agentToken,
1620
- });
1621
- subscribeToShellRpc({
1622
- jetstream,
1623
- userId,
1624
- agentId: refreshedAgentId,
1625
- agentToken,
1626
- });
1627
- for (const pendingTaskId of pendingTaskIds) {
1628
- await waitForAvailableSlot();
1629
- scheduleTask(async () => {
1630
- try {
1631
- const task = await claimTaskById({
1632
- serverBaseUrl,
1633
- userId,
1634
- agentToken,
1635
- taskId: pendingTaskId,
1636
- });
1637
- if (task) {
1638
- await runClaimedTask({ task, serverBaseUrl, userId, agentToken, jetstream });
1639
- }
1640
- }
1641
- catch (pendingError) {
1642
- const pendingMessage = pendingError instanceof Error ? pendingError.message : String(pendingError);
1643
- writeAgentError(`pending task refresh failed taskId=${pendingTaskId}: ${pendingMessage}`);
1644
- }
1645
- });
1646
- }
1647
- writeAgentInfo(`nats credentials refreshed at=${formatLocalTimestamp()} agentId=${typeof natsBootstrap.agentId === "string" ? natsBootstrap.agentId : "unknown"}`);
1648
- continue;
1649
- }
1650
- }
1803
+ subscribeToRunRpc({
1804
+ jetstream,
1805
+ serverBaseUrl,
1806
+ userId,
1807
+ agentId: initialAgentId,
1808
+ agentToken,
1809
+ });
1810
+ await new Promise(() => {
1811
+ // Keep the long-lived agent process alive for RPC subscriptions and heartbeat.
1812
+ });
1651
1813
  }
1652
1814
  main().catch((error) => {
1653
1815
  const message = error instanceof Error ? error.message : String(error);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "doer-agent",
3
- "version": "0.2.2",
3
+ "version": "0.2.4",
4
4
  "description": "Reverse-polling agent runtime for doer",
5
5
  "type": "module",
6
6
  "main": "dist/agent.js",