@agentconnect.md/daemon 1.0.0-rc.36 → 1.0.0-rc.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -7376,11 +7376,17 @@ const ConfigSchema = object({
7376
7376
  maxAgents: number().int().default(8),
7377
7377
  maxConcurrentSessions: number().int().default(32),
7378
7378
  agentIdleTimeoutMs: number().int().default(9e5),
7379
+ idleSweepMs: number().int().default(6e4),
7380
+ shutdownDrainMs: number().int().default(25e3),
7381
+ cancelBackstopMs: number().int().default(3e4),
7379
7382
  maxAttachmentBytes: number().int().default(8 * 1024 * 1024)
7380
7383
  }).default({
7381
7384
  maxAgents: 8,
7382
7385
  maxConcurrentSessions: 32,
7383
7386
  agentIdleTimeoutMs: 9e5,
7387
+ idleSweepMs: 6e4,
7388
+ shutdownDrainMs: 25e3,
7389
+ cancelBackstopMs: 3e4,
7384
7390
  maxAttachmentBytes: 8 * 1024 * 1024
7385
7391
  })
7386
7392
  });
@@ -16975,10 +16981,29 @@ var AcpHost = class {
16975
16981
  async cancel(sessionId) {
16976
16982
  await this.conn.agent.notify(methods.agent.session.cancel, { sessionId });
16977
16983
  }
16978
- async stop() {
16979
- if (!this.child) return;
16980
- this.child.kill("SIGTERM");
16981
- await new Promise((resolve) => this.child.once("exit", () => resolve()));
16984
+ /** Stop the adapter child: SIGTERM, then escalate to SIGKILL if it hasn't exited
16985
+ * within `deadlineMs` (a buggy/hung agent must never block daemon shutdown or an
16986
+ * idle reap). Idempotent — the child handle is cleared up front so a concurrent
16987
+ * stop (drain + reconcile racing) is a no-op rather than a double-kill. */
16988
+ async stop(deadlineMs = 5e3) {
16989
+ const child = this.child;
16990
+ if (!child) return;
16991
+ this.child = void 0;
16992
+ child.kill("SIGTERM");
16993
+ await new Promise((resolve) => {
16994
+ let settled = false;
16995
+ const done = () => {
16996
+ if (settled) return;
16997
+ settled = true;
16998
+ clearTimeout(timer);
16999
+ resolve();
17000
+ };
17001
+ const timer = setTimeout(() => {
17002
+ this.opts.log?.warn(`acp: child ignored SIGTERM after ${deadlineMs}ms — sending SIGKILL`);
17003
+ child.kill("SIGKILL");
17004
+ }, deadlineMs);
17005
+ child.once("exit", done);
17006
+ });
16982
17007
  }
16983
17008
  };
16984
17009
  //#endregion
@@ -17338,6 +17363,91 @@ const AgentScopeDenied = object({
17338
17363
  capability: string()
17339
17364
  });
17340
17365
  //#endregion
17366
+ //#region ../protocol/dist/frames/register.js
17367
+ /**
17368
+ * Capability upload + the reconcile snapshot — protocol §3.3.
17369
+ *
17370
+ * `register/ok` is the authoritative source of truth: the daemon converges its
17371
+ * local cache to it. CP wins all conflicts, so re-issuing the same snapshot is
17372
+ * idempotent.
17373
+ */
17374
+ const RegisterReq = object({
17375
+ host: string(),
17376
+ capabilities: object({
17377
+ platforms: array(Platform),
17378
+ runtimes: array(string()),
17379
+ acp: boolean(),
17380
+ features: array(string()).default([])
17381
+ }),
17382
+ maxAgents: number().int(),
17383
+ localState: object({
17384
+ assignments: array(string()),
17385
+ crons: array(string()),
17386
+ leases: array(string())
17387
+ })
17388
+ });
17389
+ const RegisterOk = object({
17390
+ routingEpoch: number().int(),
17391
+ assignments: array(RouteAssign),
17392
+ agents: array(AgentSpec.extend({ agentId: string().uuid() })).default([]),
17393
+ crons: array(CronUpsert),
17394
+ leases: array(SecretsGrant),
17395
+ drop: object({
17396
+ assignments: array(string()),
17397
+ crons: array(string())
17398
+ })
17399
+ });
17400
+ //#endregion
17401
+ //#region ../protocol/dist/frames/session.js
17402
+ /**
17403
+ * Session read-back (C→D REQ → REP) — the console's on-demand pulls.
17404
+ *
17405
+ * The CP stores NO session data — neither the list nor the bodies. Sessions are
17406
+ * created on the Slack→daemon path and live solely in the daemon's local store
17407
+ * (body-locality, §1/§12). So both the session **list** and a session's chat
17408
+ * **history** are pulled live from the owning daemon(s) and proxied to the
17409
+ * console — never persisted on the CP, never on the orchestration hot path.
17410
+ *
17411
+ * - `session/list`: the daemon's live sessions (CP fans this out to all the org's
17412
+ * online daemons and merges, for "all sessions across the workspace").
17413
+ * - `session/history`: one cursor-paginated page of a session's transcript.
17414
+ */
17415
+ /** One row in the session list (metadata + console metrics; NOT the transcript). */
17416
+ const SessionListItem = object({
17417
+ sessionId: string().uuid(),
17418
+ sessionKey: SessionKey,
17419
+ agentId: string().uuid(),
17420
+ title: string().optional(),
17421
+ status: string().optional(),
17422
+ lastActivityAt: string().optional(),
17423
+ tokenUsage: number().int().optional(),
17424
+ triggeredBy: string().optional()
17425
+ });
17426
+ /** C→D REQ: list the daemon's sessions (optionally just one agent's). */
17427
+ const SessionListReq = object({ agentId: string().uuid().optional() });
17428
+ /** D→C REP (corr = req id): the daemon's current sessions. */
17429
+ const SessionListPage = object({ sessions: array(SessionListItem) });
17430
+ /** One message in a session transcript page (a body — returned only for display). */
17431
+ const SessionMessage = object({
17432
+ seq: number().int(),
17433
+ sender: string(),
17434
+ ts: string(),
17435
+ kind: string(),
17436
+ text: string()
17437
+ });
17438
+ /** C→D REQ: fetch one page of a session's history from the owning daemon. */
17439
+ const SessionHistoryReq = object({
17440
+ sessionId: string().uuid(),
17441
+ cursor: string().optional(),
17442
+ limit: number().int().positive().max(200).default(50)
17443
+ });
17444
+ /** D→C REP (corr = the req id): a page of messages + the cursor for the next page. */
17445
+ const SessionHistoryPage = object({
17446
+ sessionId: string().uuid(),
17447
+ messages: array(SessionMessage),
17448
+ nextCursor: string().optional()
17449
+ });
17450
+ //#endregion
17341
17451
  //#region ../protocol/dist/frame.js
17342
17452
  /**
17343
17453
  * The single source of truth for the wire: `type` string → payload zod schema.
@@ -17353,32 +17463,8 @@ const AgentScopeDenied = object({
17353
17463
  const FRAME_SCHEMAS = {
17354
17464
  auth: AuthReq,
17355
17465
  "auth/ok": AuthOk,
17356
- register: object({
17357
- host: string(),
17358
- capabilities: object({
17359
- platforms: array(Platform),
17360
- runtimes: array(string()),
17361
- acp: boolean(),
17362
- features: array(string()).default([])
17363
- }),
17364
- maxAgents: number().int(),
17365
- localState: object({
17366
- assignments: array(string()),
17367
- crons: array(string()),
17368
- leases: array(string())
17369
- })
17370
- }),
17371
- "register/ok": object({
17372
- routingEpoch: number().int(),
17373
- assignments: array(RouteAssign),
17374
- agents: array(AgentSpec.extend({ agentId: string().uuid() })).default([]),
17375
- crons: array(CronUpsert),
17376
- leases: array(SecretsGrant),
17377
- drop: object({
17378
- assignments: array(string()),
17379
- crons: array(string())
17380
- })
17381
- }),
17466
+ register: RegisterReq,
17467
+ "register/ok": RegisterOk,
17382
17468
  heartbeat: object({
17383
17469
  load: object({
17384
17470
  cpu: number(),
@@ -17437,6 +17523,10 @@ const FRAME_SCHEMAS = {
17437
17523
  ]),
17438
17524
  toolCalling: boolean()
17439
17525
  }),
17526
+ "session/list": SessionListReq,
17527
+ "session/list/page": SessionListPage,
17528
+ "session/history": SessionHistoryReq,
17529
+ "session/history/page": SessionHistoryPage,
17440
17530
  "config/push": object({ keys: record(string(), unknown()) }),
17441
17531
  "daemon/restart": object({
17442
17532
  reason: string(),
@@ -17523,6 +17613,10 @@ discriminatedUnion("type", [
17523
17613
  frame("scope-attestation", FRAME_SCHEMAS["scope-attestation"]),
17524
17614
  frame("event/session", FRAME_SCHEMAS["event/session"]),
17525
17615
  frame("facts/runtime-profile", FRAME_SCHEMAS["facts/runtime-profile"]),
17616
+ frame("session/list", FRAME_SCHEMAS["session/list"]),
17617
+ frame("session/list/page", FRAME_SCHEMAS["session/list/page"]),
17618
+ frame("session/history", FRAME_SCHEMAS["session/history"]),
17619
+ frame("session/history/page", FRAME_SCHEMAS["session/history/page"]),
17526
17620
  frame("config/push", FRAME_SCHEMAS["config/push"]),
17527
17621
  frame("daemon/restart", FRAME_SCHEMAS["daemon/restart"]),
17528
17622
  frame("daemon/upgrade", FRAME_SCHEMAS["daemon/upgrade"]),
@@ -23292,6 +23386,27 @@ var LocalStore = class {
23292
23386
  acpSessionId=excluded.acpSessionId, state=excluded.state,
23293
23387
  lastDeliveredTs=excluded.lastDeliveredTs, updatedAt=excluded.updatedAt`).run(rec);
23294
23388
  }
23389
+ /** Targeted state transition for an existing session (§7.3), stamping `updatedAt`
23390
+ * so the change counts as activity for the TTL/idle clocks. No-op if the key is
23391
+ * unknown (the row is created by the SessionManager on first turn). */
23392
+ setSessionState(key, state, updatedAt) {
23393
+ this.db.prepare("UPDATE sessions SET state = ?, updatedAt = ? WHERE key = ?").run(state, updatedAt, key);
23394
+ }
23395
+ /** Most-recent activity across an agent's non-closed sessions (epoch ms), or null
23396
+ * if it has none. Drives idle-host reaping (#111): a host with no recent session
23397
+ * activity AND no in-flight turn is past its idle window. */
23398
+ agentLastActivityTs(agentId) {
23399
+ return this.db.prepare("SELECT MAX(updatedAt) AS ts FROM sessions WHERE agentId = ? AND state != 'closed'").get(agentId)?.ts ?? null;
23400
+ }
23401
+ /** §7.3 TTL close: move every `idle` session untouched since `now - ttlMs` to
23402
+ * `closed`, returning the rows closed (for logging). `prompting`/`cancelling`
23403
+ * sessions are never closed — a live turn keeps the thread open. */
23404
+ closeIdleSessions(now, ttlMs) {
23405
+ const cutoff = now - ttlMs;
23406
+ const rows = this.db.prepare("SELECT key, channel, thread, agentId FROM sessions WHERE state = 'idle' AND updatedAt < ?").all(cutoff);
23407
+ if (rows.length) this.db.prepare("UPDATE sessions SET state = 'closed' WHERE state = 'idle' AND updatedAt < ?").run(cutoff);
23408
+ return rows;
23409
+ }
23295
23410
  appendTranscript(e) {
23296
23411
  this.db.prepare("INSERT OR IGNORE INTO transcript (channel, thread, ts, sender, kind, text) VALUES (@channel, @thread, @ts, @sender, @kind, @text)").run(e);
23297
23412
  }
@@ -23443,10 +23558,13 @@ var SessionManager = class {
23443
23558
  } else if (host.hasSession?.(rec.acpSessionId) === false) {
23444
23559
  const cwd = await prepareWorkspace(agent);
23445
23560
  let resumed = false;
23446
- if (host.loadSupported?.()) try {
23447
- await host.loadSession(rec.acpSessionId, cwd);
23448
- resumed = true;
23449
- } catch {}
23561
+ if (host.loadSupported?.()) {
23562
+ this.deps.store.setSessionState(key, "resuming", Date.now());
23563
+ try {
23564
+ await host.loadSession(rec.acpSessionId, cwd);
23565
+ resumed = true;
23566
+ } catch {}
23567
+ }
23450
23568
  if (!resumed) {
23451
23569
  const mcpServers = this.deps.mcpServersFor?.({
23452
23570
  agent,
@@ -81628,14 +81746,13 @@ var CpClient = class {
81628
81746
  }
81629
81747
  armHeartbeat() {
81630
81748
  this.heartbeatTimer = this.deps.clock.setTimeout(() => {
81631
- if (this.state !== "READY") return;
81632
- this.transport?.send(encode(buildEnvelope("heartbeat", {
81749
+ if (this.state === "READY") this.transport?.send(encode(buildEnvelope("heartbeat", {
81633
81750
  load: this.deps.loadSnapshot(),
81634
81751
  health: "ok",
81635
81752
  activeSessions: this.deps.activeSessions(),
81636
81753
  degradedScopes: this.deps.degradedScopes?.() ?? []
81637
81754
  })));
81638
- this.armHeartbeat();
81755
+ if (this.state === "READY" || this.state === "DRAINING") this.armHeartbeat();
81639
81756
  }, this.heartbeatMs);
81640
81757
  }
81641
81758
  stopHeartbeat() {
@@ -81682,13 +81799,36 @@ var CpClient = class {
81682
81799
  case "agent/remove":
81683
81800
  this.deps.configApply.applyAgentRemove(frame.payload.agentId);
81684
81801
  return;
81685
- case "agent/launch":
81686
- case "agent/stop":
81687
- case "agent/prompt":
81688
- case "daemon/drain":
81802
+ case "agent/launch": {
81803
+ const launch = frame.payload;
81804
+ this.deps.configApply.applyAgentLaunch(launch).then((launched) => this.reply(frame, "agent/launched", launched)).catch((err) => this.sendError(frame.id, "INTERNAL", `agent/launch failed: ${err.message}`, false));
81805
+ return;
81806
+ }
81807
+ case "agent/stop": {
81808
+ const stop = frame.payload;
81809
+ this.deps.configApply.applyAgentStop(stop).then((ack) => this.reply(frame, "ack", ack)).catch((err) => this.sendError(frame.id, "INTERNAL", `agent/stop failed: ${err.message}`, false));
81810
+ return;
81811
+ }
81812
+ case "daemon/drain": {
81813
+ const drain = frame.payload;
81814
+ this.state = "DRAINING";
81815
+ this.deps.configApply.applyDaemonDrain(drain, (p) => this.emit("drain/progress", p)).then((done) => {
81816
+ this.reply(frame, "drain/done", done);
81817
+ if (this.state === "DRAINING") this.state = "READY";
81818
+ }).catch((err) => {
81819
+ this.sendError(frame.id, "INTERNAL", `drain failed: ${err.message}`, false);
81820
+ if (this.state === "DRAINING") this.state = "READY";
81821
+ });
81822
+ return;
81823
+ }
81689
81824
  case "daemon/restart":
81825
+ this.reply(frame, "daemon/control/ack", this.deps.configApply.applyDaemonRestart(frame.payload));
81826
+ return;
81690
81827
  case "daemon/upgrade":
81691
- this.sendError(frame.id, "INTERNAL", `${frame.type} not implemented`, false);
81828
+ this.reply(frame, "daemon/control/ack", this.deps.configApply.applyDaemonUpgrade(frame.payload));
81829
+ return;
81830
+ case "agent/prompt":
81831
+ this.sendError(frame.id, "INTERNAL", "agent/prompt not implemented", false);
81692
81832
  return;
81693
81833
  default:
81694
81834
  this.deps.log.debug(`cp: ignoring ${frame.type}`);
@@ -81698,6 +81838,10 @@ var CpClient = class {
81698
81838
  reply(req, type, payload) {
81699
81839
  this.transport?.send(encode(buildEnvelope(type, payload, { corr: req.id })));
81700
81840
  }
81841
+ /** Emit an uncorrelated EVT (e.g. `drain/progress`). */
81842
+ emit(type, payload) {
81843
+ this.transport?.send(encode(buildEnvelope(type, payload)));
81844
+ }
81701
81845
  };
81702
81846
  //#endregion
81703
81847
  //#region src/cp/cp-cron.ts
@@ -81978,6 +82122,21 @@ function formatErr(err) {
81978
82122
  }
81979
82123
  const MAX_QUEUED_PER_SESSION = 10;
81980
82124
  const IDLE_FLUSH_MS = 2e3;
82125
+ /** Build the wire SessionKey (protocol §5) for a pending turn — what `drain/done`
82126
+ * reports as released so the CP may reassign it. Uses the real `thread` (absent for
82127
+ * a channel-root message), NOT `statusThread` (which falls back to msgId): the CP
82128
+ * keys assignments by `thread ?? "-"`, so reporting the msgId would miss the match. */
82129
+ function pendingSessionKey(p) {
82130
+ const platform = p.platform;
82131
+ return p.thread !== void 0 ? {
82132
+ platform,
82133
+ channel: p.channel,
82134
+ thread: p.thread
82135
+ } : {
82136
+ platform,
82137
+ channel: p.channel
82138
+ };
82139
+ }
81981
82140
  var Daemon = class {
81982
82141
  opts;
81983
82142
  store;
@@ -82003,8 +82162,17 @@ var Daemon = class {
82003
82162
  cpAgents;
82004
82163
  botUserIds = {};
82005
82164
  cpRouting;
82165
+ clock;
82166
+ requestExit;
82167
+ draining = false;
82168
+ drainingAgents = /* @__PURE__ */ new Set();
82169
+ hostStopping = /* @__PURE__ */ new Map();
82170
+ idleSweepTimer;
82171
+ cancelTimers = /* @__PURE__ */ new Map();
82006
82172
  constructor(opts = {}) {
82007
82173
  this.opts = opts;
82174
+ this.clock = opts.clock ?? systemClock;
82175
+ this.requestExit = opts.requestExit ?? ((code) => process.exit(code));
82008
82176
  }
82009
82177
  async start() {
82010
82178
  const root = resolveRoot(this.opts.root);
@@ -82144,6 +82312,7 @@ var Daemon = class {
82144
82312
  this.watcher.on("add", debounced).on("change", debounced).on("unlink", debounced);
82145
82313
  this.log.info(`watching ${this.agentsDir} for agent changes`);
82146
82314
  this.startCpClient(root);
82315
+ this.armIdleSweep();
82147
82316
  this.log.info("daemon ready");
82148
82317
  }
82149
82318
  loadAgentList() {
@@ -82316,6 +82485,10 @@ var Daemon = class {
82316
82485
  }
82317
82486
  seenMsgIds = /* @__PURE__ */ new Set();
82318
82487
  onInbound(msg) {
82488
+ if (this.draining) {
82489
+ this.log.debug(`routing: dropping inbound ${msg.msgId} (daemon draining)`);
82490
+ return;
82491
+ }
82319
82492
  if (this.seenMsgIds.has(msg.msgId)) {
82320
82493
  this.log.debug(`routing: duplicate ${msg.msgId} ignored`);
82321
82494
  return;
@@ -82333,6 +82506,10 @@ var Daemon = class {
82333
82506
  this.log.debug(`routing: dropped message in ch=${msg.channel} (no agent matched — not a mention of a known bot, not a subscribed 'all' channel, not a thread/DM hit)`);
82334
82507
  return;
82335
82508
  }
82509
+ if (this.drainingAgents.has(result.agentId)) {
82510
+ this.log.debug(`routing: dropping ${msg.msgId} for agent "${result.agentId}" (draining)`);
82511
+ return;
82512
+ }
82336
82513
  this.log.info(`routing: ch=${msg.channel} → agent "${result.agentId}" (integration ${result.integrationId})`);
82337
82514
  this.dispatch(result.agentId, msg, result.integrationId).catch((err) => this.log.error(`dispatch failed for agent "${result.agentId}": ${formatErr(err)}`));
82338
82515
  }
@@ -82402,7 +82579,8 @@ var Daemon = class {
82402
82579
  }
82403
82580
  const conn = this.replyConnFor(target.agentId, target.integrationId);
82404
82581
  const thread = msg.thread ?? msg.msgId;
82405
- const acpSessionId = this.store.getSession(sessionKey(msg.platform, msg.channel, thread, target.agentId))?.acpSessionId;
82582
+ const key = sessionKey(msg.platform, msg.channel, thread, target.agentId);
82583
+ const acpSessionId = this.store.getSession(key)?.acpSessionId;
82406
82584
  const inflight = !!(acpSessionId && this.pending.has(acpSessionId));
82407
82585
  if (command.kind === "stop") {
82408
82586
  if (!inflight) {
@@ -82411,7 +82589,9 @@ var Daemon = class {
82411
82589
  }
82412
82590
  this.queued.delete(acpSessionId);
82413
82591
  this.log.info(`command: stop → agent "${target.agentId}" session ${acpSessionId}`);
82592
+ this.store.setSessionState(key, "cancelling", this.clock.now());
82414
82593
  this.hosts.get(target.agentId)?.cancel(acpSessionId).catch((err) => this.log.error(`command stop: cancel failed: ${err.message}`));
82594
+ this.armCancelBackstop(target.agentId, acpSessionId, key);
82415
82595
  conn?.postMessage(msg.channel, "🛑 Stopped.", thread);
82416
82596
  return;
82417
82597
  }
@@ -82474,22 +82654,40 @@ var Daemon = class {
82474
82654
  return [...out];
82475
82655
  }
82476
82656
  async dispatch(agentId, msg, integrationId) {
82657
+ if (this.draining || this.drainingAgents.has(agentId)) {
82658
+ this.log.debug(`dispatch: skipped for agent "${agentId}" (draining)`);
82659
+ return;
82660
+ }
82477
82661
  const conv = new OutputConverger(this.agents.get(agentId).output.mode);
82478
82662
  const rec = new TranscriptRecorder();
82479
82663
  const replyConn = this.replyConnFor(agentId, integrationId);
82480
82664
  const wasRunning = this.hostStarts.has(agentId);
82481
82665
  const statusThread = msg.thread ?? msg.msgId;
82666
+ const key = sessionKey(msg.platform, msg.channel, statusThread, agentId);
82482
82667
  replyConn?.setStatus(msg.channel, statusThread, wasRunning ? "is thinking…" : "is starting up…");
82483
- const { sessionId, blocks } = await this.sessions.handle(agentId, msg);
82668
+ let handled;
82669
+ try {
82670
+ handled = await this.sessions.handle(agentId, msg);
82671
+ } catch (err) {
82672
+ this.store.setSessionState(key, "idle", this.clock.now());
82673
+ throw err;
82674
+ }
82675
+ const { sessionId, blocks } = handled;
82676
+ let resolveDone;
82677
+ const done = new Promise((r) => resolveDone = r);
82484
82678
  const p = {
82485
82679
  conv,
82486
82680
  rec,
82487
82681
  agentId,
82682
+ platform: msg.platform,
82683
+ sessionKey: key,
82488
82684
  channel: msg.channel,
82489
82685
  thread: msg.thread,
82490
82686
  statusThread,
82491
82687
  conn: replyConn,
82492
- applyChain: Promise.resolve()
82688
+ applyChain: Promise.resolve(),
82689
+ done,
82690
+ resolveDone
82493
82691
  };
82494
82692
  this.pending.set(sessionId, p);
82495
82693
  try {
@@ -82504,9 +82702,35 @@ var Daemon = class {
82504
82702
  } finally {
82505
82703
  this.clearIdle(p);
82506
82704
  this.pending.delete(sessionId);
82705
+ this.store.setSessionState(key, "idle", this.clock.now());
82706
+ this.clearCancelBackstop(sessionId);
82707
+ p.resolveDone();
82507
82708
  }
82508
82709
  this.flushQueued(agentId, sessionId, integrationId);
82509
82710
  }
82711
+ /** After `!stop` sends session/cancel, give the agent `cancelBackstopMs` to yield.
82712
+ * If the turn is still in flight when the timer fires, the agent ignored the
82713
+ * cancel — force-stop its host (the only hard kill available) so the session
82714
+ * can't be stuck in `cancelling` forever. dispatch's finally clears this timer
82715
+ * the moment the turn yields on its own. */
82716
+ armCancelBackstop(agentId, acpSessionId, key) {
82717
+ this.clearCancelBackstop(acpSessionId);
82718
+ const ms = this.cfg.limits.cancelBackstopMs;
82719
+ this.cancelTimers.set(acpSessionId, this.clock.setTimeout(() => {
82720
+ this.cancelTimers.delete(acpSessionId);
82721
+ if (!this.pending.has(acpSessionId)) return;
82722
+ this.log.warn(`command stop: agent "${agentId}" ignored session/cancel for ${ms}ms — force-stopping host (session ${acpSessionId})`);
82723
+ this.stopHost(agentId, 0);
82724
+ this.store.setSessionState(key, "idle", this.clock.now());
82725
+ }, ms));
82726
+ }
82727
+ clearCancelBackstop(acpSessionId) {
82728
+ const t = this.cancelTimers.get(acpSessionId);
82729
+ if (t !== void 0) {
82730
+ this.clock.clearTimeout(t);
82731
+ this.cancelTimers.delete(acpSessionId);
82732
+ }
82733
+ }
82510
82734
  /**
82511
82735
  * Apply one converger action against the session's Slack connection:
82512
82736
  * - set-status → assistant.threads.setStatus (best-effort; '' clears)
@@ -82595,6 +82819,8 @@ var Daemon = class {
82595
82819
  });
82596
82820
  }
82597
82821
  async ensureHostAsync(agentId) {
82822
+ const stopping = this.hostStopping.get(agentId);
82823
+ if (stopping) await stopping;
82598
82824
  const host = this.ensureHost(agentId, this.cfg);
82599
82825
  let p = this.hostStarts.get(agentId);
82600
82826
  if (!p) {
@@ -82608,6 +82834,171 @@ var Daemon = class {
82608
82834
  const intId = integrationId ?? this.agents.get(agentId)?.integrations[0]?.id;
82609
82835
  return intId ? this.connByIntegration.get(intId) : void 0;
82610
82836
  }
82837
+ /** Stop and evict an ACP adapter child, returning the agent to `provisioned`
82838
+ * (config kept; the next message lazily re-spawns it). Idempotent. The teardown
82839
+ * is registered in `hostStopping` so a concurrent ensureHostAsync waits for it
82840
+ * instead of spawning a second live child. */
82841
+ async stopHost(agentId, deadlineMs) {
82842
+ const host = this.hosts.get(agentId);
82843
+ this.hosts.delete(agentId);
82844
+ this.hostStarts.delete(agentId);
82845
+ if (!host) return;
82846
+ const stop = host.stop(deadlineMs).finally(() => {
82847
+ if (this.hostStopping.get(agentId) === stop) this.hostStopping.delete(agentId);
82848
+ });
82849
+ this.hostStopping.set(agentId, stop);
82850
+ await stop;
82851
+ }
82852
+ /** Recurring idle sweep (§7.2/§7.3): reap idle adapter children and TTL-close
82853
+ * idle sessions. Driven by the injected Clock so a FakeClock advances it in tests. */
82854
+ armIdleSweep() {
82855
+ const interval = this.cfg.limits.idleSweepMs;
82856
+ if (interval <= 0) return;
82857
+ this.idleSweepTimer = this.clock.setTimeout(() => {
82858
+ this.idleSweepTimer = void 0;
82859
+ try {
82860
+ this.sweepIdle();
82861
+ } catch (err) {
82862
+ this.log.error(`idle sweep failed: ${formatErr(err)}`);
82863
+ }
82864
+ if (!this.draining) this.armIdleSweep();
82865
+ }, interval);
82866
+ }
82867
+ sweepIdle() {
82868
+ const now = this.clock.now();
82869
+ const ttl = this.cfg.limits.agentIdleTimeoutMs;
82870
+ const closed = this.store.closeIdleSessions(now, ttl);
82871
+ if (closed.length) this.log.info(`idle: TTL-closed ${closed.length} session(s) (>${Math.round(ttl / 1e3)}s)`);
82872
+ for (const [agentId] of [...this.hosts]) {
82873
+ if (this.drainingAgents.has(agentId)) continue;
82874
+ if ([...this.pending.values()].some((p) => p.agentId === agentId)) continue;
82875
+ const last = this.store.agentLastActivityTs(agentId) ?? 0;
82876
+ if (now - last <= ttl) continue;
82877
+ this.log.info(`idle: reclaiming host "${agentId}" (idle ${Math.round((now - last) / 1e3)}s) → provisioned`);
82878
+ this.stopHost(agentId).catch((err) => this.log.error(`idle: stop host "${agentId}" failed: ${formatErr(err)}`));
82879
+ }
82880
+ }
82881
+ /** Race `work` against a Clock-driven deadline, always clearing the timer so a
82882
+ * finished drain never leaves a dangling timer holding the process open. */
82883
+ async raceDeadline(work, ms) {
82884
+ const delay = Math.min(Math.max(0, ms), 2147483647);
82885
+ let handle;
82886
+ const timeout = new Promise((resolve) => {
82887
+ handle = this.clock.setTimeout(() => resolve("timeout"), delay);
82888
+ });
82889
+ try {
82890
+ return await Promise.race([work.then(() => "done"), timeout]);
82891
+ } finally {
82892
+ if (handle !== void 0) this.clock.clearTimeout(handle);
82893
+ }
82894
+ }
82895
+ /**
82896
+ * §5.3 drain: gate new turns for the scope, await in-flight turns up to
82897
+ * `deadlineMs` (emitting `drain/progress` as each yields), then cancel any
82898
+ * straggler past the deadline. Returns BOTH the in-scope set (`matched`) and the
82899
+ * subset that actually finished (`drained`). The caller decides which to report
82900
+ * as released: a session is only safe to release once it is genuinely no longer
82901
+ * being served — either it drained, or the caller force-stops its host. Reporting
82902
+ * a still-running straggler would let the CP reassign it and double-serve.
82903
+ */
82904
+ async drainScope(scope, deadlineMs, onProgress) {
82905
+ const match = (p) => {
82906
+ if (scope.kind === "daemon") return true;
82907
+ if (scope.kind === "agent") return p.agentId === scope.agentId;
82908
+ const k = scope.sessionKey;
82909
+ return p.platform === k.platform && p.channel === k.channel && (k.thread === void 0 || p.statusThread === k.thread);
82910
+ };
82911
+ if (scope.kind === "daemon") this.draining = true;
82912
+ else if (scope.kind === "agent") this.drainingAgents.add(scope.agentId);
82913
+ const keyOf = (sk) => `${sk.platform}:${sk.channel}:${sk.thread ?? "-"}`;
82914
+ const targets = [...this.pending.entries()].filter(([, p]) => match(p));
82915
+ const matched = /* @__PURE__ */ new Map();
82916
+ for (const [, p] of targets) matched.set(keyOf(pendingSessionKey(p)), pendingSessionKey(p));
82917
+ if (targets.length) this.log.info(`drain[${scope.kind}]: awaiting ${targets.length} turn(s) (deadline ${Math.round(deadlineMs / 1e3)}s)`);
82918
+ let settled = false;
82919
+ const drained = /* @__PURE__ */ new Map();
82920
+ let remaining = targets.length;
82921
+ const work = Promise.all(targets.map(([, p]) => p.done.then(() => {
82922
+ const sk = pendingSessionKey(p);
82923
+ drained.set(keyOf(sk), sk);
82924
+ remaining--;
82925
+ if (!settled) onProgress?.({
82926
+ remaining,
82927
+ drained: [...drained.values()]
82928
+ });
82929
+ })));
82930
+ const res = await this.raceDeadline(work, deadlineMs);
82931
+ settled = true;
82932
+ if (res === "timeout") for (const [sid, p] of this.pending) {
82933
+ if (!match(p)) continue;
82934
+ this.log.warn(`drain[${scope.kind}]: cancelling straggler turn (session ${sid})`);
82935
+ await this.hosts.get(p.agentId)?.cancel(sid).catch(() => {});
82936
+ }
82937
+ return {
82938
+ matched: [...matched.values()],
82939
+ drained: [...drained.values()]
82940
+ };
82941
+ }
82942
+ /** Handle a CP `daemon/drain` (§5.3). A bare drain is a rebalance: after
82943
+ * releasing sessions the daemon reclaims hosts and re-opens its gate (a teardown
82944
+ * arrives separately via daemon/restart). */
82945
+ async runDrain(drain, onProgress) {
82946
+ const deadlineMs = Math.max(0, new Date(drain.deadline).getTime() - this.clock.now());
82947
+ const { matched, drained } = await this.drainScope(drain.scope, deadlineMs, onProgress);
82948
+ let released;
82949
+ if (drain.scope.kind === "daemon") {
82950
+ for (const id of [...this.hosts.keys()]) await this.stopHost(id);
82951
+ this.draining = false;
82952
+ released = matched;
82953
+ } else if (drain.scope.kind === "agent") {
82954
+ await this.stopHost(drain.scope.agentId);
82955
+ this.drainingAgents.delete(drain.scope.agentId);
82956
+ released = matched;
82957
+ } else released = drained;
82958
+ this.log.info(`drain[${drain.scope.kind}]: done — released ${released.length} session(s)`);
82959
+ return { released };
82960
+ }
82961
+ /** `agent/stop` (§8.2): drain the agent's in-flight turns, stop its host, and
82962
+ * leave it gated so it stays down until a matching `agent/launch` revives it. */
82963
+ async stopAgent(agentId) {
82964
+ await this.drainScope({
82965
+ kind: "agent",
82966
+ agentId
82967
+ }, this.cfg.limits.shutdownDrainMs);
82968
+ await this.stopHost(agentId);
82969
+ }
82970
+ /** §2.5 SIGTERM / daemon shutdown: gate new turns, then await in-flight turns up
82971
+ * to `shutdownDrainMs`, cancelling stragglers. Safe to call repeatedly. */
82972
+ async drainForShutdown() {
82973
+ this.draining = true;
82974
+ const live = [...this.pending.values()];
82975
+ if (live.length === 0) return;
82976
+ const deadlineMs = this.cfg.limits.shutdownDrainMs;
82977
+ this.log.info(`shutdown: draining ${live.length} in-flight turn(s) (deadline ${Math.round(deadlineMs / 1e3)}s)`);
82978
+ if (await this.raceDeadline(Promise.all(live.map((p) => p.done)), deadlineMs) === "timeout") {
82979
+ this.log.warn(`shutdown: deadline hit with ${this.pending.size} turn(s) still in flight — cancelling`);
82980
+ for (const [sid, p] of this.pending) await this.hosts.get(p.agentId)?.cancel(sid).catch(() => {});
82981
+ }
82982
+ }
82983
+ /** daemon/restart + daemon/upgrade (§8.3): ack now, then drain + stop + exit so
82984
+ * the supervisor relaunches (the new binary, for upgrade). */
82985
+ scheduleFleetExit(kind, targetVersion) {
82986
+ const willDrainUntil = new Date(this.clock.now() + this.cfg.limits.shutdownDrainMs).toISOString();
82987
+ this.log.info(`cp: ${kind}${targetVersion ? ` → ${targetVersion}` : ""} requested — draining then exiting`);
82988
+ (async () => {
82989
+ try {
82990
+ await this.stop();
82991
+ } catch (err) {
82992
+ this.log.error(`cp: ${kind} shutdown failed: ${formatErr(err)}`);
82993
+ } finally {
82994
+ this.requestExit(0);
82995
+ }
82996
+ })();
82997
+ return {
82998
+ accepted: true,
82999
+ willDrainUntil
83000
+ };
83001
+ }
82611
83002
  cpConfigApply() {
82612
83003
  return {
82613
83004
  applyConfigPush: (keys) => {
@@ -82633,7 +83024,26 @@ var Daemon = class {
82633
83024
  upsertCron: (cron) => this.cpCrons.upsert(cron),
82634
83025
  removeCron: (cronId) => this.cpCrons.remove(cronId),
82635
83026
  applyRouteAssign: (a) => this.cpRouting?.upsertAssign(a),
82636
- applyRouteUpdate: (u) => this.cpRouting?.applyUpdate(u)
83027
+ applyRouteUpdate: (u) => this.cpRouting?.applyUpdate(u),
83028
+ applyAgentLaunch: async (launch) => {
83029
+ const agent = this.agents.get(launch.agentId);
83030
+ if (!agent) throw new Error(`agent/launch: unknown agent ${launch.agentId}`);
83031
+ this.drainingAgents.delete(launch.agentId);
83032
+ await this.ensureHostAsync(launch.agentId);
83033
+ return {
83034
+ agentId: launch.agentId,
83035
+ launchId: randomUUID(),
83036
+ startedAt: new Date(this.clock.now()).toISOString(),
83037
+ runtime: agent.runtime
83038
+ };
83039
+ },
83040
+ applyAgentStop: async (stop) => {
83041
+ await this.stopAgent(stop.agentId);
83042
+ return { ok: true };
83043
+ },
83044
+ applyDaemonDrain: (drain, onProgress) => this.runDrain(drain, onProgress),
83045
+ applyDaemonRestart: (_req) => this.scheduleFleetExit("restart"),
83046
+ applyDaemonUpgrade: (req) => this.scheduleFleetExit("upgrade", req.targetVersion)
82637
83047
  };
82638
83048
  }
82639
83049
  /** A CP cron fired: build a synthetic message and run it through the normal routing path. */
@@ -82706,8 +83116,16 @@ var Daemon = class {
82706
83116
  this.log.info(`cp: connecting to ${url}…`);
82707
83117
  }
82708
83118
  async stop() {
83119
+ this.draining = true;
82709
83120
  clearTimeout(this.debounceTimer);
83121
+ if (this.idleSweepTimer !== void 0) {
83122
+ this.clock.clearTimeout(this.idleSweepTimer);
83123
+ this.idleSweepTimer = void 0;
83124
+ }
83125
+ for (const t of this.cancelTimers.values()) this.clock.clearTimeout(t);
83126
+ this.cancelTimers.clear();
82710
83127
  await this.watcher?.close();
83128
+ await this.drainForShutdown();
82711
83129
  const errors = [];
82712
83130
  this.scheduler?.stop();
82713
83131
  await Promise.resolve(this.cpClient?.stop()).catch((e) => errors.push(e));