doer-agent 0.2.6 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/agent.js +212 -320
  2. package/package.json +1 -1
package/dist/agent.js CHANGED
@@ -9,16 +9,13 @@ const AGENT_MODULE_DIR = path.dirname(fileURLToPath(import.meta.url));
9
9
  const AGENT_PROJECT_DIR = path.join(AGENT_MODULE_DIR, "..");
10
10
  const AGENT_PACKAGE_JSON_PATH = path.join(AGENT_PROJECT_DIR, "package.json");
11
11
  let activeTaskLogContext = null;
12
- const activeTaskCancelRequests = new Map();
13
12
  let workspaceRootOverride = null;
14
13
  const fsRpcCodec = StringCodec();
15
- const shellRpcCodec = StringCodec();
16
14
  const runRpcCodec = StringCodec();
17
15
  const sessionRpcCodec = StringCodec();
18
16
  const codexAuthRpcCodec = StringCodec();
19
17
  const settingsRpcCodec = StringCodec();
20
18
  const gitRpcCodec = StringCodec();
21
- const activeRuns = new Map();
22
19
  const retainedRuns = new Map();
23
20
  const activeSessionWatchers = new Map();
24
21
  const sessionLineIndexCache = new Map();
@@ -118,13 +115,6 @@ async function initJetStreamContext(args) {
118
115
  const nc = await connect(args.token ? { servers: args.servers, token: args.token } : { servers: args.servers });
119
116
  const jsm = await nc.jetstreamManager();
120
117
  await ensureJetStreamInfra({ jsm, stream, subject, durable });
121
- void nc.closed().then((error) => {
122
- if (error) {
123
- writeAgentInfraError(`nats connection closed with error: ${error.message}`);
124
- return;
125
- }
126
- writeAgentInfraError("nats connection closed cleanly");
127
- });
128
118
  void (async () => {
129
119
  try {
130
120
  for await (const status of nc.status()) {
@@ -466,10 +456,13 @@ async function persistRunTask(task) {
466
456
  runId: task.id,
467
457
  agentId: task.agentId,
468
458
  userId: task.userId,
459
+ processPid: task.processPid,
469
460
  sessionId: task.sessionId,
470
461
  sessionFilePath: task.sessionFilePath,
471
462
  status: task.status,
472
463
  cancelRequested: task.cancelRequested,
464
+ resultExitCode: task.resultExitCode,
465
+ resultSignal: task.resultSignal,
473
466
  createdAt: task.createdAt,
474
467
  updatedAt: task.updatedAt,
475
468
  startedAt: task.startedAt,
@@ -1020,10 +1013,71 @@ async function updateRunSessionMetadata(task, metadata) {
1020
1013
  function persistRetainedRun(task) {
1021
1014
  retainedRuns.set(task.id, cloneRunTask(task));
1022
1015
  }
1023
- function getStoredRun(runId) {
1024
- const active = activeRuns.get(runId);
1025
- if (active) {
1026
- return active.task;
1016
+ function normalizePersistedRunTask(value) {
1017
+ if (!value || typeof value !== "object") {
1018
+ return null;
1019
+ }
1020
+ const record = value;
1021
+ const id = typeof record.runId === "string" && record.runId.trim()
1022
+ ? record.runId.trim()
1023
+ : typeof record.id === "string" && record.id.trim()
1024
+ ? record.id.trim()
1025
+ : "";
1026
+ const userId = typeof record.userId === "string" ? record.userId : "";
1027
+ const agentId = typeof record.agentId === "string" ? record.agentId : "";
1028
+ const status = record.status;
1029
+ if (!id || !userId || !agentId || !["queued", "running", "completed", "failed", "canceled"].includes(String(status))) {
1030
+ return null;
1031
+ }
1032
+ return {
1033
+ id,
1034
+ userId,
1035
+ agentId,
1036
+ processPid: typeof record.processPid === "number" ? record.processPid : null,
1037
+ sessionId: typeof record.sessionId === "string" && record.sessionId.trim() ? record.sessionId.trim() : null,
1038
+ sessionFilePath: typeof record.sessionFilePath === "string" && record.sessionFilePath.trim() ? record.sessionFilePath.trim() : null,
1039
+ status: status,
1040
+ cancelRequested: Boolean(record.cancelRequested),
1041
+ resultExitCode: typeof record.resultExitCode === "number" ? record.resultExitCode : null,
1042
+ resultSignal: typeof record.resultSignal === "string" && record.resultSignal.trim() ? record.resultSignal.trim() : null,
1043
+ error: typeof record.error === "string" && record.error.trim() ? record.error : null,
1044
+ createdAt: typeof record.createdAt === "string" ? record.createdAt : "",
1045
+ updatedAt: typeof record.updatedAt === "string" ? record.updatedAt : "",
1046
+ startedAt: typeof record.startedAt === "string" && record.startedAt.trim() ? record.startedAt : null,
1047
+ finishedAt: typeof record.finishedAt === "string" && record.finishedAt.trim() ? record.finishedAt : null,
1048
+ };
1049
+ }
1050
+ async function listPersistedRunTasks() {
1051
+ const dir = await resolveRunsDir();
1052
+ const names = await readdir(dir).catch(() => []);
1053
+ const tasks = await Promise.all(names
1054
+ .filter((name) => name.endsWith(".json"))
1055
+ .map(async (name) => {
1056
+ const raw = await readFile(path.join(dir, name), "utf8").catch(() => null);
1057
+ if (!raw) {
1058
+ return null;
1059
+ }
1060
+ try {
1061
+ return normalizePersistedRunTask(JSON.parse(raw));
1062
+ }
1063
+ catch {
1064
+ return null;
1065
+ }
1066
+ }));
1067
+ return tasks.filter((task) => task !== null);
1068
+ }
1069
+ async function getStoredRun(runId) {
1070
+ const persisted = await readFile(path.join(await resolveRunsDir(), `${runId}.json`), "utf8").catch(() => null);
1071
+ if (persisted) {
1072
+ try {
1073
+ const parsed = normalizePersistedRunTask(JSON.parse(persisted));
1074
+ if (parsed) {
1075
+ return parsed;
1076
+ }
1077
+ }
1078
+ catch {
1079
+ // Ignore malformed persisted state and fall back to retained memory.
1080
+ }
1027
1081
  }
1028
1082
  return retainedRuns.get(runId) ?? null;
1029
1083
  }
@@ -1045,6 +1099,7 @@ async function startManagedRun(args) {
1045
1099
  id: args.runId,
1046
1100
  userId: args.userId,
1047
1101
  agentId: args.agentId,
1102
+ processPid: typeof child.pid === "number" ? child.pid : null,
1048
1103
  sessionId: typeof args.sessionId === "string" && args.sessionId.trim() ? args.sessionId.trim() : null,
1049
1104
  sessionFilePath: null,
1050
1105
  status: "running",
@@ -1057,17 +1112,6 @@ async function startManagedRun(args) {
1057
1112
  startedAt: now,
1058
1113
  finishedAt: null,
1059
1114
  };
1060
- const cancellation = createManagedCancellation(child);
1061
- const requestCancel = () => {
1062
- if (task.status === "completed" || task.status === "failed" || task.status === "canceled") {
1063
- return;
1064
- }
1065
- task.cancelRequested = true;
1066
- task.updatedAt = formatLocalTimestamp();
1067
- void persistRunTask(task).catch(() => undefined);
1068
- writeRunStatus(task.id, "cancel requested");
1069
- cancellation.requestCancel();
1070
- };
1071
1115
  let stdoutBuffer = "";
1072
1116
  const recordChunk = (stream, chunk) => {
1073
1117
  writeRunStream(task.id, stream, chunk);
@@ -1096,33 +1140,33 @@ async function startManagedRun(args) {
1096
1140
  task.error = message;
1097
1141
  task.finishedAt = formatLocalTimestamp();
1098
1142
  persistRetainedRun(task);
1099
- activeRuns.delete(task.id);
1100
1143
  void removeRunTask(task.id).catch(() => undefined);
1101
1144
  void releaseRunStartSlot({ runId: task.id, sessionId: task.sessionId }).catch(() => undefined);
1102
1145
  void prepared.codexAuthCleanup().catch(() => undefined);
1103
1146
  writeRunStatus(task.id, `failed error=${message}`);
1104
1147
  });
1105
- child.once("close", (code, signal) => {
1106
- cancellation.clear();
1148
+ child.once("close", async (code, signal) => {
1107
1149
  if (stdoutBuffer.trim() && (!task.sessionId || !task.sessionFilePath)) {
1108
1150
  const metadata = extractCodexSessionMetadata(stdoutBuffer.trim());
1109
1151
  if (metadata.sessionId || metadata.sessionFilePath) {
1110
1152
  void updateRunSessionMetadata(task, metadata);
1111
1153
  }
1112
1154
  }
1155
+ const latest = await getStoredRun(task.id).catch(() => null);
1156
+ if (latest?.cancelRequested) {
1157
+ task.cancelRequested = true;
1158
+ }
1113
1159
  task.resultExitCode = typeof code === "number" ? code : null;
1114
1160
  task.resultSignal = signal;
1115
1161
  task.finishedAt = formatLocalTimestamp();
1116
1162
  task.status = task.cancelRequested ? "canceled" : (task.resultExitCode ?? 1) === 0 ? "completed" : "failed";
1117
1163
  task.error = task.status === "failed" ? `Command exited with code ${task.resultExitCode ?? "null"}` : null;
1118
1164
  persistRetainedRun(task);
1119
- activeRuns.delete(task.id);
1120
1165
  void removeRunTask(task.id).catch(() => undefined);
1121
1166
  void releaseRunStartSlot({ runId: task.id, sessionId: task.sessionId }).catch(() => undefined);
1122
1167
  void prepared.codexAuthCleanup().catch(() => undefined);
1123
1168
  writeRunStatus(task.id, `completed status=${task.status} exitCode=${task.resultExitCode ?? "null"} signal=${task.resultSignal ?? "null"}`);
1124
1169
  });
1125
- activeRuns.set(task.id, { task, child, requestCancel });
1126
1170
  persistRetainedRun(task);
1127
1171
  void persistRunTask(task).catch(() => undefined);
1128
1172
  writeRunStatus(task.id, `started requestId=${args.requestId} cwd=${prepared.taskWorkspace}`);
@@ -1935,22 +1979,37 @@ async function handleRunRpcMessage(args) {
1935
1979
  return;
1936
1980
  }
1937
1981
  if (request.action === "list") {
1938
- const tasks = [...activeRuns.values()].map((entry) => cloneRunTask(entry.task));
1939
- const retained = [...retainedRuns.values()].filter((task) => !activeRuns.has(task.id)).map((task) => cloneRunTask(task));
1940
- const merged = [...tasks, ...retained]
1982
+ const persisted = await listPersistedRunTasks();
1983
+ const mergedById = new Map();
1984
+ for (const task of persisted) {
1985
+ mergedById.set(task.id, cloneRunTask(task));
1986
+ }
1987
+ for (const task of retainedRuns.values()) {
1988
+ if (!mergedById.has(task.id)) {
1989
+ mergedById.set(task.id, cloneRunTask(task));
1990
+ }
1991
+ }
1992
+ const merged = [...mergedById.values()]
1941
1993
  .sort((a, b) => Date.parse(b.updatedAt) - Date.parse(a.updatedAt))
1942
1994
  .slice(0, request.limit);
1943
1995
  publishRunRpcResponse({ nc: args.jetstream.nc, responseSubject, payload: { requestId, ok: true, tasks: merged } });
1944
1996
  return;
1945
1997
  }
1946
- const stored = request.runId ? getStoredRun(request.runId) : null;
1998
+ const stored = request.runId ? await getStoredRun(request.runId) : null;
1947
1999
  if (!stored || stored.agentId !== args.agentId || stored.userId !== args.userId) {
1948
2000
  throw new Error("Run not found");
1949
2001
  }
1950
2002
  if (request.action === "cancel") {
1951
- const active = activeRuns.get(stored.id);
1952
- active?.requestCancel();
1953
- const task = cloneRunTask(active?.task ?? stored);
2003
+ const target = stored;
2004
+ if (target.processPid === null) {
2005
+ throw new Error("Run pid not found");
2006
+ }
2007
+ target.cancelRequested = true;
2008
+ target.updatedAt = formatLocalTimestamp();
2009
+ await persistRunTask(target);
2010
+ writeRunStatus(target.id, `cancel requested pid=${target.processPid}`);
2011
+ sendSignalToPid(target.processPid, "SIGINT");
2012
+ const task = cloneRunTask(target);
1954
2013
  publishRunRpcResponse({ nc: args.jetstream.nc, responseSubject, payload: { requestId, ok: true, task } });
1955
2014
  return;
1956
2015
  }
@@ -2026,21 +2085,17 @@ function sendSignalToTaskProcess(child, signal) {
2026
2085
  // noop
2027
2086
  }
2028
2087
  }
2029
- function requestTaskCancellation(taskId, reason) {
2030
- const requestCancel = activeTaskCancelRequests.get(taskId);
2031
- if (!requestCancel) {
2032
- return false;
2033
- }
2034
- try {
2035
- requestCancel();
2036
- writeAgentInfo(`task cancel requested taskId=${taskId} via=${reason}`);
2037
- return true;
2038
- }
2039
- catch (error) {
2040
- const message = error instanceof Error ? error.message : String(error);
2041
- writeAgentError(`task cancel request failed taskId=${taskId} via=${reason}: ${message}`);
2042
- return false;
2088
+ function sendSignalToPid(pid, signal) {
2089
+ if (process.platform !== "win32") {
2090
+ try {
2091
+ process.kill(-pid, signal);
2092
+ return;
2093
+ }
2094
+ catch {
2095
+ // Fall back to direct pid signaling.
2096
+ }
2043
2097
  }
2098
+ process.kill(pid, signal);
2044
2099
  }
2045
2100
  function resolveLogTimeZone() {
2046
2101
  const configured = process.env.DOER_AGENT_LOG_TIMEZONE?.trim() || process.env.TZ?.trim();
@@ -2172,9 +2227,6 @@ function resolveTaskWorkspace(rawCwd) {
2172
2227
  function buildAgentFsRpcSubject(userId, agentId) {
2173
2228
  return `doer.agent.fs.rpc.${sanitizeUserId(userId)}.${agentId.trim()}`;
2174
2229
  }
2175
- function buildAgentShellRpcSubject(userId, agentId) {
2176
- return `doer.agent.shell.rpc.${sanitizeUserId(userId)}.${agentId.trim()}`;
2177
- }
2178
2230
  function normalizeFsRpcPath(rawPath) {
2179
2231
  const root = workspaceRootOverride ?? (process.env.WORKSPACE?.trim() || process.cwd());
2180
2232
  const raw = typeof rawPath === "string" && rawPath.trim() ? rawPath.trim() : ".";
@@ -3000,7 +3052,25 @@ async function deleteAgentSession(filePath, sessionId) {
3000
3052
  }
3001
3053
  }
3002
3054
  function publishSessionRpcResponse(args) {
3003
- args.nc.publish(args.responseSubject, sessionRpcCodec.encode(JSON.stringify(args.payload)));
3055
+ try {
3056
+ args.nc.publish(args.responseSubject, sessionRpcCodec.encode(JSON.stringify(args.payload)));
3057
+ }
3058
+ catch (error) {
3059
+ const message = error instanceof Error ? error.message : String(error);
3060
+ writeAgentError(`session rpc publish failed responseSubject=${args.responseSubject}: ${message}`);
3061
+ }
3062
+ }
3063
+ function stopAllSessionWatchers() {
3064
+ const stops = [...activeSessionWatchers.values()];
3065
+ for (const stop of stops) {
3066
+ try {
3067
+ stop();
3068
+ }
3069
+ catch (error) {
3070
+ const message = error instanceof Error ? error.message : String(error);
3071
+ writeAgentError(`session watcher cleanup failed: ${message}`);
3072
+ }
3073
+ }
3004
3074
  }
3005
3075
  async function startSessionWatch(args) {
3006
3076
  const resolvedFile = resolveSessionFilePath(args.filePath);
@@ -3188,46 +3258,6 @@ function subscribeToFsRpc(args) {
3188
3258
  });
3189
3259
  writeAgentInfo(`fs rpc subscribed subject=${subject}`);
3190
3260
  }
3191
- function normalizeShellRpcRequest(args) {
3192
- const requestId = typeof args.request.requestId === "string" ? args.request.requestId.trim() : "";
3193
- if (!requestId) {
3194
- throw new Error("missing requestId");
3195
- }
3196
- const requestAgentId = typeof args.request.agentId === "string" ? args.request.agentId.trim() : "";
3197
- if (!requestAgentId) {
3198
- throw new Error("missing agentId");
3199
- }
3200
- if (requestAgentId !== args.agentId) {
3201
- throw new Error("agent id mismatch");
3202
- }
3203
- const kind = args.request.kind === "apply_patch" ? "apply_patch" : "shell";
3204
- const command = typeof args.request.command === "string" ? args.request.command.trim() : "";
3205
- const patch = typeof args.request.patch === "string" ? args.request.patch : "";
3206
- if (kind === "shell" && !command) {
3207
- throw new Error("missing command");
3208
- }
3209
- if (kind === "apply_patch" && !patch.trim()) {
3210
- throw new Error("missing patch");
3211
- }
3212
- const responseSubject = typeof args.request.responseSubject === "string" ? args.request.responseSubject.trim() : "";
3213
- if (!responseSubject) {
3214
- throw new Error("missing responseSubject");
3215
- }
3216
- const cwd = typeof args.request.cwd === "string" && args.request.cwd.trim() ? args.request.cwd.trim() : null;
3217
- const timeoutRaw = Number(args.request.timeoutMs);
3218
- const timeoutMs = Number.isFinite(timeoutRaw) ? Math.max(1000, Math.min(Math.floor(timeoutRaw), 300000)) : 30000;
3219
- return {
3220
- kind,
3221
- requestId,
3222
- command: kind === "shell" ? command : null,
3223
- patch: kind === "apply_patch" ? patch : null,
3224
- cwd,
3225
- timeoutMs,
3226
- responseSubject,
3227
- runtimeEnvPatch: normalizeEnvPatch(args.request.runtimeEnvPatch),
3228
- codexAuthBundle: normalizeShellRpcCodexAuthBundle(args.request.codexAuth),
3229
- };
3230
- }
3231
3261
  function normalizeShellRpcCodexAuthBundle(value) {
3232
3262
  if (!value || typeof value !== "object" || Array.isArray(value)) {
3233
3263
  return null;
@@ -3246,118 +3276,6 @@ function normalizeShellRpcCodexAuthBundle(value) {
3246
3276
  apiKey: typeof row.apiKey === "string" || row.apiKey === null ? row.apiKey : undefined,
3247
3277
  };
3248
3278
  }
3249
- function publishShellRpcResponse(args) {
3250
- args.nc.publish(args.responseSubject, shellRpcCodec.encode(JSON.stringify(args.payload)));
3251
- }
3252
- async function handleShellRpcMessage(args) {
3253
- let requestId = "unknown";
3254
- let responseSubject = "";
3255
- let stdout = "";
3256
- let stderr = "";
3257
- try {
3258
- const payload = JSON.parse(shellRpcCodec.decode(args.msg.data));
3259
- const request = normalizeShellRpcRequest({ request: payload, agentId: args.agentId });
3260
- requestId = request.requestId;
3261
- responseSubject = request.responseSubject;
3262
- const startedAtMs = Date.now();
3263
- const prepared = await prepareCommandExecution({
3264
- cwd: request.cwd,
3265
- userId: args.userId,
3266
- taskId: request.requestId,
3267
- codexAuthBundle: request.codexAuthBundle,
3268
- });
3269
- const child = spawnPreparedCommand({
3270
- kind: request.kind,
3271
- command: request.command,
3272
- patch: request.patch,
3273
- shellPath: prepared.shellPath,
3274
- taskWorkspace: prepared.taskWorkspace,
3275
- env: prepared.env,
3276
- agentToken: args.agentToken,
3277
- });
3278
- writeRpcStatus(requestId, `started kind=${request.kind} cwd=${prepared.taskWorkspace} shell=${request.kind === "shell" ? prepared.shellPath : "apply_patch"}`);
3279
- child.stdout.on("data", (chunk) => {
3280
- stdout += chunk;
3281
- writeRpcStream(requestId, "stdout", chunk);
3282
- });
3283
- child.stderr.on("data", (chunk) => {
3284
- stderr += chunk;
3285
- writeRpcStream(requestId, "stderr", chunk);
3286
- });
3287
- let timedOut = false;
3288
- const timeout = setTimeout(() => {
3289
- timedOut = true;
3290
- sendSignalToTaskProcess(child, "SIGTERM");
3291
- setTimeout(() => {
3292
- sendSignalToTaskProcess(child, "SIGKILL");
3293
- }, 1000).unref?.();
3294
- }, request.timeoutMs);
3295
- timeout.unref?.();
3296
- const result = await new Promise((resolve, reject) => {
3297
- child.once("error", reject);
3298
- child.once("close", (code, signal) => {
3299
- resolve({ exitCode: typeof code === "number" ? code : null, signal });
3300
- });
3301
- }).finally(() => {
3302
- clearTimeout(timeout);
3303
- });
3304
- await prepared.codexAuthCleanup().catch(() => undefined);
3305
- publishShellRpcResponse({
3306
- nc: args.jetstream.nc,
3307
- responseSubject,
3308
- payload: {
3309
- requestId,
3310
- ok: !timedOut,
3311
- exitCode: result.exitCode,
3312
- signal: result.signal,
3313
- stdout,
3314
- stderr,
3315
- ...(timedOut ? { error: `Command timed out after ${request.timeoutMs}ms` } : {}),
3316
- },
3317
- });
3318
- writeRpcStatus(requestId, `${timedOut ? "timed_out" : "completed"} exitCode=${result.exitCode ?? "null"} signal=${result.signal ?? "null"} durationMs=${Date.now() - startedAtMs}`);
3319
- }
3320
- catch (error) {
3321
- const message = error instanceof Error ? error.message : String(error);
3322
- if (responseSubject) {
3323
- publishShellRpcResponse({
3324
- nc: args.jetstream.nc,
3325
- responseSubject,
3326
- payload: {
3327
- requestId,
3328
- ok: false,
3329
- exitCode: null,
3330
- signal: null,
3331
- stdout,
3332
- stderr,
3333
- error: message,
3334
- },
3335
- });
3336
- }
3337
- writeRpcStatus(requestId, `failed error=${message}`);
3338
- writeAgentError(`shell rpc failed requestId=${requestId} error=${message}`);
3339
- }
3340
- }
3341
- function subscribeToShellRpc(args) {
3342
- const subject = buildAgentShellRpcSubject(args.userId, args.agentId);
3343
- args.jetstream.nc.subscribe(subject, {
3344
- callback: (error, msg) => {
3345
- if (error) {
3346
- const message = error instanceof Error ? error.message : String(error);
3347
- writeAgentError(`shell rpc subscription error: ${message}`);
3348
- return;
3349
- }
3350
- void handleShellRpcMessage({
3351
- msg,
3352
- jetstream: args.jetstream,
3353
- userId: args.userId,
3354
- agentId: args.agentId,
3355
- agentToken: args.agentToken,
3356
- });
3357
- },
3358
- });
3359
- writeAgentInfo(`shell rpc subscribed subject=${subject}`);
3360
- }
3361
3279
  async function postJson(url, body) {
3362
3280
  const res = await fetch(url, {
3363
3281
  method: "POST",
@@ -3605,36 +3523,6 @@ function spawnPreparedCommand(args) {
3605
3523
  child.stderr.setEncoding("utf8");
3606
3524
  return child;
3607
3525
  }
3608
- function createManagedCancellation(child) {
3609
- let cancelStage1Timer = null;
3610
- let cancelStage2Timer = null;
3611
- let cancelSignalSent = false;
3612
- return {
3613
- requestCancel: () => {
3614
- if (cancelSignalSent) {
3615
- return;
3616
- }
3617
- cancelSignalSent = true;
3618
- sendSignalToTaskProcess(child, "SIGINT");
3619
- cancelStage1Timer = setTimeout(() => {
3620
- sendSignalToTaskProcess(child, "SIGTERM");
3621
- }, 1200);
3622
- cancelStage1Timer.unref?.();
3623
- cancelStage2Timer = setTimeout(() => {
3624
- sendSignalToTaskProcess(child, "SIGKILL");
3625
- }, 3500);
3626
- cancelStage2Timer.unref?.();
3627
- },
3628
- clear: () => {
3629
- if (cancelStage1Timer) {
3630
- clearTimeout(cancelStage1Timer);
3631
- }
3632
- if (cancelStage2Timer) {
3633
- clearTimeout(cancelStage2Timer);
3634
- }
3635
- },
3636
- };
3637
- }
3638
3526
  async function runTask(args) {
3639
3527
  activeTaskLogContext = {
3640
3528
  jetstream: args.jetstream,
@@ -3728,7 +3616,6 @@ async function runTask(args) {
3728
3616
  }, 3500);
3729
3617
  cancelStage2Timer.unref?.();
3730
3618
  };
3731
- activeTaskCancelRequests.set(args.taskId, requestCancel);
3732
3619
  child.stdout.on("data", (chunk) => {
3733
3620
  writeTaskStream(args.taskId, "stdout", chunk);
3734
3621
  const seq = reserveNextEventSeq(args.taskId);
@@ -3823,7 +3710,6 @@ async function runTask(args) {
3823
3710
  writeAgentInfo(`task=${args.taskId} status=${status} exitCode=${typeof result.code === "number" ? result.code : "null"} signal=${result.signal ?? "null"}`);
3824
3711
  }
3825
3712
  finally {
3826
- activeTaskCancelRequests.delete(args.taskId);
3827
3713
  activeTaskLogContext = null;
3828
3714
  await codexAuth?.cleanup().catch(() => undefined);
3829
3715
  }
@@ -3878,91 +3764,97 @@ async function main() {
3878
3764
  throw new Error("user-id and agent-secret are required");
3879
3765
  }
3880
3766
  const agentToken = agentSecret;
3881
- const { natsBootstrap, jetstream } = await connectBootstrapWithRetry({
3882
- serverBaseUrl,
3883
- userId,
3884
- agentToken,
3885
- });
3886
3767
  const agentVersion = await resolveAgentVersion();
3887
- const initialAgentId = typeof natsBootstrap.agentId === "string" ? natsBootstrap.agentId : "";
3888
- if (!initialAgentId) {
3889
- throw new Error("agent id missing from bootstrap");
3890
- }
3891
- process.stdout.write(`\n[doer-agent v${agentVersion}]\n`);
3892
- if (!usesDefaultServer) {
3893
- process.stdout.write(`- server: ${serverBaseUrl}\n`);
3894
- }
3895
- process.stdout.write(`- userId: ${userId}\n`);
3896
- process.stdout.write(`- agentId: ${initialAgentId}\n`);
3897
- process.stdout.write(`\n- transport: nats\n`);
3898
- process.stdout.write(`- natsServers: ${jetstream.servers.join(",")}\n`);
3899
- process.stdout.write(`- natsStream: ${jetstream.stream}\n`);
3900
- process.stdout.write(`- natsSubject: ${jetstream.subject}\n`);
3901
- process.stdout.write(`- natsDurable: ${jetstream.durable}\n\n`);
3902
- process.stdout.write(`- workspace: ${process.cwd()}\n\n`);
3903
- if (requestedServerBaseUrl !== serverBaseUrl) {
3904
- writeAgentInfo(`detected container runtime, server endpoint rewritten: ${requestedServerBaseUrl} -> ${serverBaseUrl}`);
3905
- }
3906
- let heartbeatHealthy = null;
3907
- const heartbeatTimer = setInterval(() => {
3908
- void heartbeatAgent({ serverBaseUrl, userId, agentToken })
3909
- .then(() => {
3910
- if (heartbeatHealthy === false) {
3911
- writeAgentInfraError(`heartbeat reconnected at=${formatLocalTimestamp()}`);
3768
+ let bannerShown = false;
3769
+ while (true) {
3770
+ const { natsBootstrap, jetstream } = await connectBootstrapWithRetry({
3771
+ serverBaseUrl,
3772
+ userId,
3773
+ agentToken,
3774
+ });
3775
+ const initialAgentId = typeof natsBootstrap.agentId === "string" ? natsBootstrap.agentId : "";
3776
+ if (!initialAgentId) {
3777
+ throw new Error("agent id missing from bootstrap");
3778
+ }
3779
+ if (!bannerShown) {
3780
+ process.stdout.write(`\n[doer-agent v${agentVersion}]\n`);
3781
+ if (!usesDefaultServer) {
3782
+ process.stdout.write(`- server: ${serverBaseUrl}\n`);
3912
3783
  }
3913
- heartbeatHealthy = true;
3914
- })
3915
- .catch((error) => {
3916
- const message = error instanceof Error ? error.message : String(error);
3917
- if (heartbeatHealthy !== false) {
3918
- writeAgentInfraError(`heartbeat failed: ${message}`);
3784
+ process.stdout.write(`- userId: ${userId}\n`);
3785
+ process.stdout.write(`- agentId: ${initialAgentId}\n`);
3786
+ process.stdout.write(`\n- transport: nats\n`);
3787
+ process.stdout.write(`- natsServers: ${jetstream.servers.join(",")}\n`);
3788
+ process.stdout.write(`- natsStream: ${jetstream.stream}\n`);
3789
+ process.stdout.write(`- natsSubject: ${jetstream.subject}\n`);
3790
+ process.stdout.write(`- natsDurable: ${jetstream.durable}\n\n`);
3791
+ process.stdout.write(`- workspace: ${process.cwd()}\n\n`);
3792
+ if (requestedServerBaseUrl !== serverBaseUrl) {
3793
+ writeAgentInfo(`detected container runtime, server endpoint rewritten: ${requestedServerBaseUrl} -> ${serverBaseUrl}`);
3919
3794
  }
3920
- heartbeatHealthy = false;
3795
+ bannerShown = true;
3796
+ }
3797
+ else {
3798
+ writeAgentInfraError(`nats session restored agentId=${initialAgentId} servers=${jetstream.servers.join(",")} at=${formatLocalTimestamp()}`);
3799
+ }
3800
+ let heartbeatHealthy = null;
3801
+ const heartbeatTimer = setInterval(() => {
3802
+ void heartbeatAgent({ serverBaseUrl, userId, agentToken })
3803
+ .then(() => {
3804
+ if (heartbeatHealthy === false) {
3805
+ writeAgentInfraError(`heartbeat reconnected at=${formatLocalTimestamp()}`);
3806
+ }
3807
+ heartbeatHealthy = true;
3808
+ })
3809
+ .catch((error) => {
3810
+ const message = error instanceof Error ? error.message : String(error);
3811
+ if (heartbeatHealthy !== false) {
3812
+ writeAgentInfraError(`heartbeat failed: ${message}`);
3813
+ }
3814
+ heartbeatHealthy = false;
3815
+ });
3816
+ }, 10_000);
3817
+ subscribeToFsRpc({
3818
+ jetstream,
3819
+ serverBaseUrl,
3820
+ userId,
3821
+ agentId: initialAgentId,
3822
+ agentToken,
3921
3823
  });
3922
- }, 10_000);
3923
- subscribeToFsRpc({
3924
- jetstream,
3925
- serverBaseUrl,
3926
- userId,
3927
- agentId: initialAgentId,
3928
- agentToken,
3929
- });
3930
- subscribeToShellRpc({
3931
- jetstream,
3932
- userId,
3933
- agentId: initialAgentId,
3934
- agentToken,
3935
- });
3936
- subscribeToSessionRpc({
3937
- jetstream,
3938
- userId,
3939
- agentId: initialAgentId,
3940
- });
3941
- subscribeToCodexAuthRpc({
3942
- jetstream,
3943
- userId,
3944
- agentId: initialAgentId,
3945
- });
3946
- subscribeToSettingsRpc({
3947
- jetstream,
3948
- userId,
3949
- agentId: initialAgentId,
3950
- });
3951
- subscribeToGitRpc({
3952
- jetstream,
3953
- userId,
3954
- agentId: initialAgentId,
3955
- });
3956
- subscribeToRunRpc({
3957
- jetstream,
3958
- serverBaseUrl,
3959
- userId,
3960
- agentId: initialAgentId,
3961
- agentToken,
3962
- });
3963
- await new Promise(() => {
3964
- // Keep the long-lived agent process alive for RPC subscriptions and heartbeat.
3965
- });
3824
+ subscribeToSessionRpc({
3825
+ jetstream,
3826
+ userId,
3827
+ agentId: initialAgentId,
3828
+ });
3829
+ subscribeToCodexAuthRpc({
3830
+ jetstream,
3831
+ userId,
3832
+ agentId: initialAgentId,
3833
+ });
3834
+ subscribeToSettingsRpc({
3835
+ jetstream,
3836
+ userId,
3837
+ agentId: initialAgentId,
3838
+ });
3839
+ subscribeToGitRpc({
3840
+ jetstream,
3841
+ userId,
3842
+ agentId: initialAgentId,
3843
+ });
3844
+ subscribeToRunRpc({
3845
+ jetstream,
3846
+ serverBaseUrl,
3847
+ userId,
3848
+ agentId: initialAgentId,
3849
+ agentToken,
3850
+ });
3851
+ const closeError = await jetstream.nc.closed();
3852
+ clearInterval(heartbeatTimer);
3853
+ stopAllSessionWatchers();
3854
+ const detail = closeError instanceof Error ? closeError.message : "clean close";
3855
+ writeAgentInfraError(`nats session ended: ${detail}; reconnecting`);
3856
+ await sleep(1000);
3857
+ }
3966
3858
  }
3967
3859
  main().catch((error) => {
3968
3860
  const message = error instanceof Error ? error.message : String(error);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "doer-agent",
3
- "version": "0.2.6",
3
+ "version": "0.2.8",
4
4
  "description": "Reverse-polling agent runtime for doer",
5
5
  "type": "module",
6
6
  "main": "dist/agent.js",