doer-agent 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/agent.js +417 -248
  2. package/package.json +1 -1
package/dist/agent.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import { spawn, spawnSync } from "node:child_process";
2
- import { existsSync, statSync } from "node:fs";
2
+ import { createWriteStream, existsSync, statSync } from "node:fs";
3
3
  import { chmod, mkdir, open, readFile, readdir, stat, writeFile } from "node:fs/promises";
4
4
  import path from "node:path";
5
5
  import { fileURLToPath } from "node:url";
@@ -13,10 +13,16 @@ const activeTaskCancelRequests = new Map();
13
13
  let workspaceRootOverride = null;
14
14
  const fsRpcCodec = StringCodec();
15
15
  const shellRpcCodec = StringCodec();
16
+ const runRpcCodec = StringCodec();
17
+ const activeRuns = new Map();
18
+ const retainedRuns = new Map();
16
19
  function sanitizeUserId(userId) {
17
20
  const normalized = userId.trim().replace(/[^a-zA-Z0-9_-]/g, "_");
18
21
  return normalized.length > 0 ? normalized : "anonymous";
19
22
  }
23
+ function buildAgentRunRpcSubject(userId, agentId) {
24
+ return `doer.agent.run.rpc.${sanitizeUserId(userId)}.${agentId.trim()}`;
25
+ }
20
26
  function normalizeNatsServers(value) {
21
27
  if (!Array.isArray(value)) {
22
28
  return [];
@@ -92,12 +98,6 @@ async function initJetStreamContext(args) {
92
98
  const nc = await connect(args.token ? { servers: args.servers, token: args.token } : { servers: args.servers });
93
99
  const jsm = await nc.jetstreamManager();
94
100
  await ensureJetStreamInfra({ jsm, stream, subject, durable });
95
- await ensureJetStreamInfra({
96
- jsm,
97
- stream: args.taskStream,
98
- subject: args.taskSubject,
99
- durable: args.taskDurable,
100
- });
101
101
  void nc.closed().then((error) => {
102
102
  if (error) {
103
103
  writeAgentInfraError(`nats connection closed with error: ${error.message}`);
@@ -126,14 +126,10 @@ async function initJetStreamContext(args) {
126
126
  js: nc.jetstream(),
127
127
  jsm,
128
128
  codec: JSONCodec(),
129
- taskCodec: JSONCodec(),
130
129
  subject,
131
130
  stream,
132
131
  durable,
133
132
  servers: args.servers,
134
- taskStream: args.taskStream,
135
- taskSubject: args.taskSubject,
136
- taskDurable: args.taskDurable,
137
133
  };
138
134
  }
139
135
  function resolveCodexHomePath() {
@@ -357,6 +353,289 @@ function writeTaskStream(taskId, stream, chunk) {
357
353
  function writeTaskUpload(taskId, message) {
358
354
  process.stdout.write(`[doer-agent][task=${taskId}][upload] ${message}\n`);
359
355
  }
356
+ function writeRpcStream(requestId, stream, chunk) {
357
+ const target = stream === "stdout" ? process.stdout : process.stderr;
358
+ const lines = chunk.replace(/\r/g, "\n").split("\n");
359
+ for (let i = 0; i < lines.length; i += 1) {
360
+ const line = lines[i];
361
+ if (line.length === 0 && i === lines.length - 1) {
362
+ continue;
363
+ }
364
+ target.write(`[doer-agent][rpc=${requestId}][${stream}] ${line}\n`);
365
+ }
366
+ }
367
+ function writeRpcStatus(requestId, message) {
368
+ process.stdout.write(`[doer-agent][rpc=${requestId}][status] ${message}\n`);
369
+ }
370
+ function writeRunStatus(runId, message) {
371
+ process.stdout.write(`[doer-agent][run=${runId}][status] ${message}\n`);
372
+ }
373
+ function writeRunStream(runId, stream, chunk) {
374
+ const target = stream === "stdout" ? process.stdout : process.stderr;
375
+ const lines = chunk.split(/\r?\n/);
376
+ for (let index = 0; index < lines.length; index += 1) {
377
+ const line = lines[index];
378
+ if (!line && index === lines.length - 1) {
379
+ continue;
380
+ }
381
+ target.write(`[doer-agent][run=${runId}][${stream}] ${line}\n`);
382
+ }
383
+ }
384
+ function normalizeRunRpcRequest(args) {
385
+ const requestId = typeof args.request.requestId === "string" ? args.request.requestId.trim() : "";
386
+ if (!requestId) {
387
+ throw new Error("missing requestId");
388
+ }
389
+ const requestAgentId = typeof args.request.agentId === "string" ? args.request.agentId.trim() : "";
390
+ if (!requestAgentId || requestAgentId !== args.agentId) {
391
+ throw new Error("agent id mismatch");
392
+ }
393
+ const actionRaw = typeof args.request.action === "string" ? args.request.action.trim() : "";
394
+ const action = actionRaw === "cancel" || actionRaw === "get" || actionRaw === "list" ? actionRaw : "start";
395
+ const responseSubject = typeof args.request.responseSubject === "string" ? args.request.responseSubject.trim() : "";
396
+ if (!responseSubject) {
397
+ throw new Error("missing responseSubject");
398
+ }
399
+ const runId = typeof args.request.runId === "string" && args.request.runId.trim() ? args.request.runId.trim() : null;
400
+ const command = typeof args.request.command === "string" && args.request.command.trim() ? args.request.command.trim() : null;
401
+ if (action === "start" && !command) {
402
+ throw new Error("missing command");
403
+ }
404
+ if ((action === "get" || action === "cancel") && !runId) {
405
+ throw new Error("missing runId");
406
+ }
407
+ const cwd = typeof args.request.cwd === "string" && args.request.cwd.trim() ? args.request.cwd.trim() : null;
408
+ const chatId = typeof args.request.chatId === "string" && args.request.chatId.trim() ? args.request.chatId.trim() : null;
409
+ const sinceSeqRaw = Number(args.request.sinceSeq);
410
+ const sinceSeq = Number.isInteger(sinceSeqRaw) && sinceSeqRaw >= 0 ? sinceSeqRaw : null;
411
+ const limitRaw = Number(args.request.limit);
412
+ const limit = Number.isFinite(limitRaw) ? Math.max(1, Math.min(Math.floor(limitRaw), 200)) : 50;
413
+ return {
414
+ requestId,
415
+ action,
416
+ runId,
417
+ command,
418
+ cwd,
419
+ chatId,
420
+ responseSubject,
421
+ sinceSeq,
422
+ limit,
423
+ runtimeEnvPatch: normalizeEnvPatch(args.request.runtimeEnvPatch),
424
+ codexAuthBundle: normalizeShellRpcCodexAuthBundle(args.request.codexAuth),
425
+ };
426
+ }
427
+ function publishRunRpcResponse(args) {
428
+ args.nc.publish(args.responseSubject, runRpcCodec.encode(JSON.stringify(args.payload)));
429
+ }
430
+ async function resolveRunLogsDir() {
431
+ const workspaceRoot = workspaceRootOverride ?? (process.env.WORKSPACE?.trim() || process.cwd());
432
+ const dir = path.join(workspaceRoot, ".doer-agent", "runs");
433
+ await mkdir(dir, { recursive: true });
434
+ return dir;
435
+ }
436
+ function cloneRunTask(task, sinceSeq) {
437
+ return {
438
+ ...task,
439
+ events: task.events
440
+ .filter((event) => typeof sinceSeq === "number" ? event.seq > sinceSeq : true)
441
+ .map((event) => ({ ...event, payload: { ...event.payload } })),
442
+ };
443
+ }
444
+ function appendRunEvent(task, type, payload) {
445
+ const timestamp = formatLocalTimestamp();
446
+ const seq = task.agentEventAckSeq + 1;
447
+ task.agentEventAckSeq = seq;
448
+ task.updatedAt = timestamp;
449
+ task.events.push({ seq, type, timestamp, payload });
450
+ }
451
+ function persistRetainedRun(task) {
452
+ retainedRuns.set(task.id, cloneRunTask(task));
453
+ }
454
+ function getStoredRun(runId) {
455
+ const active = activeRuns.get(runId);
456
+ if (active) {
457
+ return active.task;
458
+ }
459
+ return retainedRuns.get(runId) ?? null;
460
+ }
461
+ async function startManagedRun(args) {
462
+ const prepared = await prepareCommandExecution({
463
+ cwd: args.cwd,
464
+ runtimeEnvPatch: args.runtimeEnvPatch,
465
+ codexAuthBundle: args.codexAuthBundle,
466
+ });
467
+ const child = spawnPreparedCommand({
468
+ kind: "shell",
469
+ command: args.command,
470
+ patch: null,
471
+ shellPath: prepared.shellPath,
472
+ taskWorkspace: prepared.taskWorkspace,
473
+ env: prepared.env,
474
+ agentToken: args.agentToken,
475
+ });
476
+ const logsDir = await resolveRunLogsDir();
477
+ const logPath = path.join(logsDir, `${args.runId}.log`);
478
+ const logStream = createWriteStream(logPath, { flags: "a", encoding: "utf8" });
479
+ const now = formatLocalTimestamp();
480
+ const task = {
481
+ id: args.runId,
482
+ userId: args.userId,
483
+ agentId: args.agentId,
484
+ command: args.command,
485
+ cwd: args.cwd,
486
+ chatId: args.chatId,
487
+ status: "running",
488
+ cancelRequested: false,
489
+ resultExitCode: null,
490
+ resultSignal: null,
491
+ error: null,
492
+ createdAt: now,
493
+ updatedAt: now,
494
+ startedAt: now,
495
+ finishedAt: null,
496
+ agentEventAckSeq: 0,
497
+ events: [],
498
+ };
499
+ appendRunEvent(task, "meta", {
500
+ host: process.platform,
501
+ pid: child.pid ?? null,
502
+ startedAt: now,
503
+ command: args.command,
504
+ cwd: prepared.taskWorkspace,
505
+ requestedCwd: args.cwd,
506
+ shell: prepared.shellPath,
507
+ logPath,
508
+ ...prepared.taskGitMeta,
509
+ ...prepared.codexAuthMeta,
510
+ });
511
+ appendRunEvent(task, "status", { status: "running" });
512
+ const cancellation = createManagedCancellation(child);
513
+ const requestCancel = () => {
514
+ if (task.status === "completed" || task.status === "failed" || task.status === "canceled") {
515
+ return;
516
+ }
517
+ task.cancelRequested = true;
518
+ task.updatedAt = formatLocalTimestamp();
519
+ writeRunStatus(task.id, "cancel requested");
520
+ cancellation.requestCancel();
521
+ };
522
+ const recordChunk = (stream, chunk) => {
523
+ appendRunEvent(task, stream, { chunk, at: formatLocalTimestamp() });
524
+ logStream.write(JSON.stringify({ at: formatLocalTimestamp(), stream, chunk }) + "\n");
525
+ writeRunStream(task.id, stream, chunk);
526
+ };
527
+ child.stdout.on("data", (chunk) => recordChunk("stdout", chunk));
528
+ child.stderr.on("data", (chunk) => recordChunk("stderr", chunk));
529
+ child.once("error", (error) => {
530
+ const message = error instanceof Error ? error.message : String(error);
531
+ task.status = "failed";
532
+ task.error = message;
533
+ task.finishedAt = formatLocalTimestamp();
534
+ appendRunEvent(task, "status", { status: "failed", error: message, finishedAt: task.finishedAt });
535
+ persistRetainedRun(task);
536
+ activeRuns.delete(task.id);
537
+ logStream.end();
538
+ void prepared.codexAuthCleanup().catch(() => undefined);
539
+ writeRunStatus(task.id, `failed error=${message}`);
540
+ });
541
+ child.once("close", (code, signal) => {
542
+ cancellation.clear();
543
+ task.resultExitCode = typeof code === "number" ? code : null;
544
+ task.resultSignal = signal;
545
+ task.finishedAt = formatLocalTimestamp();
546
+ task.status = task.cancelRequested ? "canceled" : (task.resultExitCode ?? 1) === 0 ? "completed" : "failed";
547
+ task.error = task.status === "failed" ? `Command exited with code ${task.resultExitCode ?? "null"}` : null;
548
+ appendRunEvent(task, "status", {
549
+ status: task.status,
550
+ exitCode: task.resultExitCode,
551
+ signal: task.resultSignal,
552
+ error: task.error,
553
+ finishedAt: task.finishedAt,
554
+ });
555
+ persistRetainedRun(task);
556
+ activeRuns.delete(task.id);
557
+ logStream.end();
558
+ void prepared.codexAuthCleanup().catch(() => undefined);
559
+ writeRunStatus(task.id, `completed status=${task.status} exitCode=${task.resultExitCode ?? "null"} signal=${task.resultSignal ?? "null"}`);
560
+ });
561
+ activeRuns.set(task.id, { task, child, logPath, logStream, requestCancel });
562
+ persistRetainedRun(task);
563
+ writeRunStatus(task.id, `started requestId=${args.requestId} cwd=${prepared.taskWorkspace}`);
564
+ return cloneRunTask(task);
565
+ }
566
+ async function handleRunRpcMessage(args) {
567
+ let requestId = "unknown";
568
+ let responseSubject = "";
569
+ try {
570
+ const payload = JSON.parse(runRpcCodec.decode(args.msg.data));
571
+ const request = normalizeRunRpcRequest({ request: payload, agentId: args.agentId });
572
+ requestId = request.requestId;
573
+ responseSubject = request.responseSubject;
574
+ if (request.action === "start") {
575
+ const task = await startManagedRun({
576
+ requestId,
577
+ runId: request.runId ?? requestId,
578
+ userId: args.userId,
579
+ agentId: args.agentId,
580
+ command: request.command ?? "",
581
+ cwd: request.cwd,
582
+ chatId: request.chatId,
583
+ runtimeEnvPatch: request.runtimeEnvPatch,
584
+ codexAuthBundle: request.codexAuthBundle,
585
+ agentToken: args.agentToken,
586
+ });
587
+ publishRunRpcResponse({ nc: args.jetstream.nc, responseSubject, payload: { requestId, ok: true, task } });
588
+ return;
589
+ }
590
+ if (request.action === "list") {
591
+ const tasks = [...activeRuns.values()].map((entry) => cloneRunTask(entry.task));
592
+ const retained = [...retainedRuns.values()].filter((task) => !activeRuns.has(task.id)).map((task) => cloneRunTask(task));
593
+ const merged = [...tasks, ...retained]
594
+ .sort((a, b) => Date.parse(b.updatedAt) - Date.parse(a.updatedAt))
595
+ .slice(0, request.limit);
596
+ publishRunRpcResponse({ nc: args.jetstream.nc, responseSubject, payload: { requestId, ok: true, tasks: merged } });
597
+ return;
598
+ }
599
+ const stored = request.runId ? getStoredRun(request.runId) : null;
600
+ if (!stored || stored.agentId !== args.agentId || stored.userId !== args.userId) {
601
+ throw new Error("Run not found");
602
+ }
603
+ if (request.action === "cancel") {
604
+ const active = activeRuns.get(stored.id);
605
+ active?.requestCancel();
606
+ const task = cloneRunTask(active?.task ?? stored);
607
+ publishRunRpcResponse({ nc: args.jetstream.nc, responseSubject, payload: { requestId, ok: true, task } });
608
+ return;
609
+ }
610
+ const task = cloneRunTask(stored, request.sinceSeq);
611
+ publishRunRpcResponse({ nc: args.jetstream.nc, responseSubject, payload: { requestId, ok: true, task } });
612
+ }
613
+ catch (error) {
614
+ const message = error instanceof Error ? error.message : String(error);
615
+ if (responseSubject) {
616
+ publishRunRpcResponse({
617
+ nc: args.jetstream.nc,
618
+ responseSubject,
619
+ payload: { requestId, ok: false, error: message },
620
+ });
621
+ }
622
+ writeAgentError(`run rpc failed requestId=${requestId} error=${message}`);
623
+ }
624
+ }
625
+ function subscribeToRunRpc(args) {
626
+ const subject = buildAgentRunRpcSubject(args.userId, args.agentId);
627
+ args.jetstream.nc.subscribe(subject, {
628
+ callback: (error, msg) => {
629
+ if (error) {
630
+ const message = error instanceof Error ? error.message : String(error);
631
+ writeAgentError(`run rpc subscription error: ${message}`);
632
+ return;
633
+ }
634
+ void handleRunRpcMessage({ msg, jetstream: args.jetstream, userId: args.userId, agentId: args.agentId, agentToken: args.agentToken });
635
+ },
636
+ });
637
+ writeAgentInfo(`run rpc subscribed subject=${subject}`);
638
+ }
360
639
  function isLikelyNatsAuthError(error) {
361
640
  const message = (error instanceof Error ? error.message : String(error)).toLowerCase();
362
641
  return (message.includes("auth")
@@ -753,10 +1032,15 @@ function normalizeShellRpcRequest(args) {
753
1032
  if (requestAgentId !== args.agentId) {
754
1033
  throw new Error("agent id mismatch");
755
1034
  }
1035
+ const kind = args.request.kind === "apply_patch" ? "apply_patch" : "shell";
756
1036
  const command = typeof args.request.command === "string" ? args.request.command.trim() : "";
757
- if (!command) {
1037
+ const patch = typeof args.request.patch === "string" ? args.request.patch : "";
1038
+ if (kind === "shell" && !command) {
758
1039
  throw new Error("missing command");
759
1040
  }
1041
+ if (kind === "apply_patch" && !patch.trim()) {
1042
+ throw new Error("missing patch");
1043
+ }
760
1044
  const responseSubject = typeof args.request.responseSubject === "string" ? args.request.responseSubject.trim() : "";
761
1045
  if (!responseSubject) {
762
1046
  throw new Error("missing responseSubject");
@@ -765,8 +1049,10 @@ function normalizeShellRpcRequest(args) {
765
1049
  const timeoutRaw = Number(args.request.timeoutMs);
766
1050
  const timeoutMs = Number.isFinite(timeoutRaw) ? Math.max(1000, Math.min(Math.floor(timeoutRaw), 300000)) : 30000;
767
1051
  return {
1052
+ kind,
768
1053
  requestId,
769
- command,
1054
+ command: kind === "shell" ? command : null,
1055
+ patch: kind === "apply_patch" ? patch : null,
770
1056
  cwd,
771
1057
  timeoutMs,
772
1058
  responseSubject,
@@ -805,40 +1091,29 @@ async function handleShellRpcMessage(args) {
805
1091
  const request = normalizeShellRpcRequest({ request: payload, agentId: args.agentId });
806
1092
  requestId = request.requestId;
807
1093
  responseSubject = request.responseSubject;
808
- const shellPath = resolveShellPath();
809
- const taskWorkspace = resolveTaskWorkspace(request.cwd);
810
- const codexAuth = await prepareCodexAuthBundle(request.codexAuthBundle);
811
- const baseTaskEnvPatch = {
812
- ...request.runtimeEnvPatch,
813
- ...(codexAuth?.envPatch ?? {}),
814
- WORKSPACE: taskWorkspace,
815
- };
816
- const taskGitEnv = await prepareTaskGitEnv({
817
- cwd: taskWorkspace,
818
- baseEnvPatch: baseTaskEnvPatch,
1094
+ const startedAtMs = Date.now();
1095
+ const prepared = await prepareCommandExecution({
1096
+ cwd: request.cwd,
1097
+ runtimeEnvPatch: request.runtimeEnvPatch,
1098
+ codexAuthBundle: request.codexAuthBundle,
819
1099
  });
820
- const runtimeBinPath = path.join(AGENT_PROJECT_DIR, "runtime/bin");
821
- const taskPath = [runtimeBinPath, process.env.PATH || ""].filter(Boolean).join(path.delimiter);
822
- const child = spawn(request.command, {
823
- cwd: taskWorkspace,
824
- shell: shellPath,
825
- detached: process.platform !== "win32",
826
- env: {
827
- ...process.env,
828
- ...baseTaskEnvPatch,
829
- ...taskGitEnv.envPatch,
830
- PATH: taskPath,
831
- DOER_AGENT_TOKEN: args.agentToken,
832
- },
833
- stdio: ["ignore", "pipe", "pipe"],
1100
+ const child = spawnPreparedCommand({
1101
+ kind: request.kind,
1102
+ command: request.command,
1103
+ patch: request.patch,
1104
+ shellPath: prepared.shellPath,
1105
+ taskWorkspace: prepared.taskWorkspace,
1106
+ env: prepared.env,
1107
+ agentToken: args.agentToken,
834
1108
  });
835
- child.stdout.setEncoding("utf8");
836
- child.stderr.setEncoding("utf8");
1109
+ writeRpcStatus(requestId, `started kind=${request.kind} cwd=${prepared.taskWorkspace} shell=${request.kind === "shell" ? prepared.shellPath : "apply_patch"}`);
837
1110
  child.stdout.on("data", (chunk) => {
838
1111
  stdout += chunk;
1112
+ writeRpcStream(requestId, "stdout", chunk);
839
1113
  });
840
1114
  child.stderr.on("data", (chunk) => {
841
1115
  stderr += chunk;
1116
+ writeRpcStream(requestId, "stderr", chunk);
842
1117
  });
843
1118
  let timedOut = false;
844
1119
  const timeout = setTimeout(() => {
@@ -857,6 +1132,7 @@ async function handleShellRpcMessage(args) {
857
1132
  }).finally(() => {
858
1133
  clearTimeout(timeout);
859
1134
  });
1135
+ await prepared.codexAuthCleanup().catch(() => undefined);
860
1136
  publishShellRpcResponse({
861
1137
  nc: args.jetstream.nc,
862
1138
  responseSubject,
@@ -870,6 +1146,7 @@ async function handleShellRpcMessage(args) {
870
1146
  ...(timedOut ? { error: `Command timed out after ${request.timeoutMs}ms` } : {}),
871
1147
  },
872
1148
  });
1149
+ writeRpcStatus(requestId, `${timedOut ? "timed_out" : "completed"} exitCode=${result.exitCode ?? "null"} signal=${result.signal ?? "null"} durationMs=${Date.now() - startedAtMs}`);
873
1150
  }
874
1151
  catch (error) {
875
1152
  const message = error instanceof Error ? error.message : String(error);
@@ -888,6 +1165,7 @@ async function handleShellRpcMessage(args) {
888
1165
  },
889
1166
  });
890
1167
  }
1168
+ writeRpcStatus(requestId, `failed error=${message}`);
891
1169
  writeAgentError(`shell rpc failed requestId=${requestId} error=${message}`);
892
1170
  }
893
1171
  }
@@ -1110,6 +1388,93 @@ async function prepareCodexAuthBundle(bundle) {
1110
1388
  },
1111
1389
  };
1112
1390
  }
1391
+ async function prepareCommandExecution(args) {
1392
+ const shellPath = resolveShellPath();
1393
+ const taskWorkspace = resolveTaskWorkspace(args.cwd);
1394
+ const codexAuth = await prepareCodexAuthBundle(args.codexAuthBundle);
1395
+ const baseTaskEnvPatch = {
1396
+ ...args.runtimeEnvPatch,
1397
+ ...(codexAuth?.envPatch ?? {}),
1398
+ WORKSPACE: taskWorkspace,
1399
+ };
1400
+ const taskGitEnv = await prepareTaskGitEnv({
1401
+ cwd: taskWorkspace,
1402
+ baseEnvPatch: baseTaskEnvPatch,
1403
+ });
1404
+ const runtimeBinPath = path.join(AGENT_PROJECT_DIR, "runtime/bin");
1405
+ const taskPath = [runtimeBinPath, process.env.PATH || ""].filter(Boolean).join(path.delimiter);
1406
+ return {
1407
+ shellPath,
1408
+ taskWorkspace,
1409
+ taskPath,
1410
+ env: {
1411
+ ...process.env,
1412
+ ...baseTaskEnvPatch,
1413
+ ...taskGitEnv.envPatch,
1414
+ PATH: taskPath,
1415
+ },
1416
+ taskGitMeta: taskGitEnv.meta ?? {},
1417
+ codexAuthMeta: codexAuth?.meta ?? { codexAuthSynced: false },
1418
+ codexAuthCleanup: codexAuth?.cleanup ?? (async () => { }),
1419
+ };
1420
+ }
1421
+ function spawnPreparedCommand(args) {
1422
+ const env = {
1423
+ ...args.env,
1424
+ DOER_AGENT_TOKEN: args.agentToken,
1425
+ };
1426
+ const child = args.kind === "apply_patch"
1427
+ ? spawn("apply_patch", {
1428
+ cwd: args.taskWorkspace,
1429
+ detached: process.platform !== "win32",
1430
+ env,
1431
+ stdio: ["pipe", "pipe", "pipe"],
1432
+ })
1433
+ : spawn(args.command ?? "", {
1434
+ cwd: args.taskWorkspace,
1435
+ shell: args.shellPath,
1436
+ detached: process.platform !== "win32",
1437
+ env,
1438
+ stdio: ["ignore", "pipe", "pipe"],
1439
+ });
1440
+ if (args.kind === "apply_patch") {
1441
+ child.stdin?.write(args.patch ?? "");
1442
+ child.stdin?.end();
1443
+ }
1444
+ child.stdout.setEncoding("utf8");
1445
+ child.stderr.setEncoding("utf8");
1446
+ return child;
1447
+ }
1448
+ function createManagedCancellation(child) {
1449
+ let cancelStage1Timer = null;
1450
+ let cancelStage2Timer = null;
1451
+ let cancelSignalSent = false;
1452
+ return {
1453
+ requestCancel: () => {
1454
+ if (cancelSignalSent) {
1455
+ return;
1456
+ }
1457
+ cancelSignalSent = true;
1458
+ sendSignalToTaskProcess(child, "SIGINT");
1459
+ cancelStage1Timer = setTimeout(() => {
1460
+ sendSignalToTaskProcess(child, "SIGTERM");
1461
+ }, 1200);
1462
+ cancelStage1Timer.unref?.();
1463
+ cancelStage2Timer = setTimeout(() => {
1464
+ sendSignalToTaskProcess(child, "SIGKILL");
1465
+ }, 3500);
1466
+ cancelStage2Timer.unref?.();
1467
+ },
1468
+ clear: () => {
1469
+ if (cancelStage1Timer) {
1470
+ clearTimeout(cancelStage1Timer);
1471
+ }
1472
+ if (cancelStage2Timer) {
1473
+ clearTimeout(cancelStage2Timer);
1474
+ }
1475
+ },
1476
+ };
1477
+ }
1113
1478
  async function runTask(args) {
1114
1479
  activeTaskLogContext = {
1115
1480
  jetstream: args.jetstream,
@@ -1311,22 +1676,14 @@ async function connectBootstrapWithRetry(args) {
1311
1676
  if (natsServers.length === 0) {
1312
1677
  throw new Error("No NATS servers configured by server");
1313
1678
  }
1314
- const taskConfig = parseBootstrapTaskConfig(natsBootstrap.tasks);
1315
- if (!taskConfig) {
1316
- throw new Error("Invalid task dispatch config from server");
1317
- }
1318
1679
  const natsToken = normalizeNatsToken(natsBootstrap.auth);
1319
- const pendingTaskIds = normalizeTaskIds(natsBootstrap.pendingTaskIds);
1320
1680
  const jetstream = await initJetStreamContext({
1321
1681
  userId: args.userId,
1322
1682
  servers: natsServers,
1323
1683
  token: natsToken,
1324
- taskStream: taskConfig.stream,
1325
- taskSubject: taskConfig.subject,
1326
- taskDurable: taskConfig.durable,
1327
1684
  });
1328
- writeAgentInfraError(`bootstrap ok servers=${natsServers.length} taskStream=${taskConfig.stream} taskSubject=${taskConfig.subject} taskDurable=${taskConfig.durable}`);
1329
- return { natsBootstrap, pendingTaskIds, jetstream };
1685
+ writeAgentInfraError(`bootstrap ok servers=${natsServers.length} eventStream=${jetstream.stream} eventSubject=${jetstream.subject}`);
1686
+ return { natsBootstrap, jetstream };
1330
1687
  }
1331
1688
  catch (error) {
1332
1689
  const message = error instanceof Error ? error.message : String(error);
@@ -1352,12 +1709,11 @@ async function main() {
1352
1709
  throw new Error("user-id and agent-secret are required");
1353
1710
  }
1354
1711
  const agentToken = agentSecret;
1355
- let { natsBootstrap, pendingTaskIds, jetstream } = await connectBootstrapWithRetry({
1712
+ const { natsBootstrap, jetstream } = await connectBootstrapWithRetry({
1356
1713
  serverBaseUrl,
1357
1714
  userId,
1358
1715
  agentToken,
1359
1716
  });
1360
- const maxConcurrency = Math.max(1, parseEnvInteger(process.env.DOER_AGENT_MAX_CONCURRENCY, 5));
1361
1717
  const agentVersion = await resolveAgentVersion();
1362
1718
  const initialAgentId = typeof natsBootstrap.agentId === "string" ? natsBootstrap.agentId : "";
1363
1719
  if (!initialAgentId) {
@@ -1374,11 +1730,6 @@ async function main() {
1374
1730
  process.stdout.write(`- natsStream: ${jetstream.stream}\n`);
1375
1731
  process.stdout.write(`- natsSubject: ${jetstream.subject}\n`);
1376
1732
  process.stdout.write(`- natsDurable: ${jetstream.durable}\n\n`);
1377
- process.stdout.write(`- taskStream: ${jetstream.taskStream}\n`);
1378
- process.stdout.write(`- taskSubject: ${jetstream.taskSubject}\n`);
1379
- process.stdout.write(`- taskDurable: ${jetstream.taskDurable}\n`);
1380
- process.stdout.write(`- pendingTasks: ${pendingTaskIds.length}\n`);
1381
- process.stdout.write(`- maxConcurrency: ${maxConcurrency}\n\n`);
1382
1733
  process.stdout.write(`- workspace: ${process.cwd()}\n\n`);
1383
1734
  if (requestedServerBaseUrl !== serverBaseUrl) {
1384
1735
  writeAgentInfo(`detected container runtime, server endpoint rewritten: ${requestedServerBaseUrl} -> ${serverBaseUrl}`);
@@ -1400,27 +1751,6 @@ async function main() {
1400
1751
  heartbeatHealthy = false;
1401
1752
  });
1402
1753
  }, 10_000);
1403
- const inFlightTasks = new Set();
1404
- async function waitForAvailableSlot() {
1405
- while (inFlightTasks.size >= maxConcurrency) {
1406
- try {
1407
- await Promise.race(inFlightTasks);
1408
- }
1409
- catch {
1410
- // keep draining slots even when a task fails.
1411
- }
1412
- }
1413
- }
1414
- function trackInFlight(taskPromise) {
1415
- inFlightTasks.add(taskPromise);
1416
- void taskPromise.finally(() => {
1417
- inFlightTasks.delete(taskPromise);
1418
- });
1419
- }
1420
- function scheduleTask(taskPromiseFactory) {
1421
- const taskPromise = taskPromiseFactory();
1422
- trackInFlight(taskPromise);
1423
- }
1424
1754
  subscribeToFsRpc({
1425
1755
  jetstream,
1426
1756
  serverBaseUrl,
@@ -1434,176 +1764,15 @@ async function main() {
1434
1764
  agentId: initialAgentId,
1435
1765
  agentToken,
1436
1766
  });
1437
- for (const pendingTaskId of pendingTaskIds) {
1438
- await waitForAvailableSlot();
1439
- scheduleTask(async () => {
1440
- try {
1441
- const task = await claimTaskById({
1442
- serverBaseUrl,
1443
- userId,
1444
- agentToken,
1445
- taskId: pendingTaskId,
1446
- });
1447
- if (task) {
1448
- await runClaimedTask({ task, serverBaseUrl, userId, agentToken, jetstream });
1449
- }
1450
- }
1451
- catch (error) {
1452
- const message = error instanceof Error ? error.message : String(error);
1453
- writeAgentError(`pending task bootstrap failed taskId=${pendingTaskId}: ${message}`);
1454
- }
1455
- });
1456
- }
1457
- let connected = false;
1458
- while (true) {
1459
- try {
1460
- const consumer = await jetstream.js.consumers.get(jetstream.taskStream, jetstream.taskDurable);
1461
- if (!connected) {
1462
- writeAgentInfo(`connected to task stream (NATS ok) at=${formatLocalTimestamp()} userId=${userId}`);
1463
- connected = true;
1464
- }
1465
- const messages = await consumer.fetch({ max_messages: 200, expires: 5_000 });
1466
- for await (const msg of messages) {
1467
- await waitForAvailableSlot();
1468
- scheduleTask(async () => {
1469
- let dispatch;
1470
- try {
1471
- dispatch = jetstream.taskCodec.decode(msg.data);
1472
- }
1473
- catch (error) {
1474
- const message = error instanceof Error ? error.message : String(error);
1475
- writeAgentError(`task dispatch decode failed: ${message}`);
1476
- msg.term();
1477
- return;
1478
- }
1479
- writeAgentInfo(`task dispatch received taskId=${dispatch.taskId} createdAt=${dispatch.createdAt} subject=${jetstream.taskSubject} durable=${jetstream.taskDurable}`);
1480
- const ackKeepAliveIntervalMs = 10_000;
1481
- let ackKeepAliveTimer = null;
1482
- const stopAckKeepAlive = () => {
1483
- if (ackKeepAliveTimer) {
1484
- clearInterval(ackKeepAliveTimer);
1485
- ackKeepAliveTimer = null;
1486
- }
1487
- };
1488
- try {
1489
- ackKeepAliveTimer = setInterval(() => {
1490
- try {
1491
- msg.working();
1492
- }
1493
- catch (error) {
1494
- const message = error instanceof Error ? error.message : String(error);
1495
- writeAgentError(`task dispatch keepalive failed taskId=${dispatch.taskId}: ${message}`);
1496
- }
1497
- }, ackKeepAliveIntervalMs);
1498
- ackKeepAliveTimer.unref?.();
1499
- if (dispatch.type === "cancel") {
1500
- stopAckKeepAlive();
1501
- const canceled = requestTaskCancellation(dispatch.taskId, "nats_dispatch");
1502
- writeAgentInfo(`task cancel dispatch handled taskId=${dispatch.taskId} result=${canceled ? "signaled" : "not-running"}`);
1503
- msg.ack();
1504
- return;
1505
- }
1506
- const task = await claimTaskById({
1507
- serverBaseUrl,
1508
- userId,
1509
- agentToken,
1510
- taskId: dispatch.taskId,
1511
- });
1512
- if (!task) {
1513
- stopAckKeepAlive();
1514
- writeAgentInfo(`task dispatch acked without run taskId=${dispatch.taskId} reason=already-claimed`);
1515
- msg.ack();
1516
- return;
1517
- }
1518
- await runClaimedTask({ task, serverBaseUrl, userId, agentToken, jetstream });
1519
- stopAckKeepAlive();
1520
- msg.ack();
1521
- writeAgentInfo(`task dispatch acked taskId=${dispatch.taskId}`);
1522
- }
1523
- catch (error) {
1524
- stopAckKeepAlive();
1525
- const message = error instanceof Error ? error.message : String(error);
1526
- writeAgentError(`task dispatch handle failed taskId=${dispatch.taskId}: ${message}`);
1527
- writeAgentError(`task dispatch sending nak taskId=${dispatch.taskId}`);
1528
- msg.nak();
1529
- }
1530
- });
1531
- }
1532
- }
1533
- catch (error) {
1534
- const message = error instanceof Error ? error.message : String(error);
1535
- if (connected) {
1536
- writeAgentError(`task stream disconnected at=${formatLocalTimestamp()} reason=${message}`);
1537
- }
1538
- connected = false;
1539
- if (isLikelyNatsAuthError(error)) {
1540
- writeAgentError(`nats auth error detected. refreshing bootstrap credentials...`);
1541
- }
1542
- else if (isLikelyNatsReconnectError(error)) {
1543
- writeAgentError(`nats connection lost. refreshing bootstrap/session...`);
1544
- }
1545
- else {
1546
- writeAgentError(`task stream error detected. forcing bootstrap/session refresh... reason=${message}`);
1547
- }
1548
- if (inFlightTasks.size > 0) {
1549
- writeAgentInfo(`waiting for in-flight tasks before reconnect count=${inFlightTasks.size}`);
1550
- await Promise.allSettled(Array.from(inFlightTasks));
1551
- }
1552
- try {
1553
- await jetstream.nc.close();
1554
- }
1555
- catch {
1556
- // noop
1557
- }
1558
- const refreshed = await connectBootstrapWithRetry({
1559
- serverBaseUrl,
1560
- userId,
1561
- agentToken,
1562
- });
1563
- natsBootstrap = refreshed.natsBootstrap;
1564
- pendingTaskIds = refreshed.pendingTaskIds;
1565
- jetstream = refreshed.jetstream;
1566
- const refreshedAgentId = typeof natsBootstrap.agentId === "string" ? natsBootstrap.agentId : "";
1567
- if (!refreshedAgentId) {
1568
- throw new Error("agent id missing from refreshed bootstrap");
1569
- }
1570
- subscribeToFsRpc({
1571
- jetstream,
1572
- serverBaseUrl,
1573
- userId,
1574
- agentId: refreshedAgentId,
1575
- agentToken,
1576
- });
1577
- subscribeToShellRpc({
1578
- jetstream,
1579
- userId,
1580
- agentId: refreshedAgentId,
1581
- agentToken,
1582
- });
1583
- for (const pendingTaskId of pendingTaskIds) {
1584
- await waitForAvailableSlot();
1585
- scheduleTask(async () => {
1586
- try {
1587
- const task = await claimTaskById({
1588
- serverBaseUrl,
1589
- userId,
1590
- agentToken,
1591
- taskId: pendingTaskId,
1592
- });
1593
- if (task) {
1594
- await runClaimedTask({ task, serverBaseUrl, userId, agentToken, jetstream });
1595
- }
1596
- }
1597
- catch (pendingError) {
1598
- const pendingMessage = pendingError instanceof Error ? pendingError.message : String(pendingError);
1599
- writeAgentError(`pending task refresh failed taskId=${pendingTaskId}: ${pendingMessage}`);
1600
- }
1601
- });
1602
- }
1603
- writeAgentInfo(`nats credentials refreshed at=${formatLocalTimestamp()} agentId=${typeof natsBootstrap.agentId === "string" ? natsBootstrap.agentId : "unknown"}`);
1604
- continue;
1605
- }
1606
- }
1767
+ subscribeToRunRpc({
1768
+ jetstream,
1769
+ userId,
1770
+ agentId: initialAgentId,
1771
+ agentToken,
1772
+ });
1773
+ await new Promise(() => {
1774
+ // Keep the long-lived agent process alive for RPC subscriptions and heartbeat.
1775
+ });
1607
1776
  }
1608
1777
  main().catch((error) => {
1609
1778
  const message = error instanceof Error ? error.message : String(error);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "doer-agent",
3
- "version": "0.2.1",
3
+ "version": "0.2.3",
4
4
  "description": "Reverse-polling agent runtime for doer",
5
5
  "type": "module",
6
6
  "main": "dist/agent.js",