@agentconnect.md/daemon 1.0.0-rc.37 → 1.0.0-rc.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -7376,11 +7376,17 @@ const ConfigSchema = object({
7376
7376
  maxAgents: number().int().default(8),
7377
7377
  maxConcurrentSessions: number().int().default(32),
7378
7378
  agentIdleTimeoutMs: number().int().default(9e5),
7379
+ idleSweepMs: number().int().default(6e4),
7380
+ shutdownDrainMs: number().int().default(25e3),
7381
+ cancelBackstopMs: number().int().default(3e4),
7379
7382
  maxAttachmentBytes: number().int().default(8 * 1024 * 1024)
7380
7383
  }).default({
7381
7384
  maxAgents: 8,
7382
7385
  maxConcurrentSessions: 32,
7383
7386
  agentIdleTimeoutMs: 9e5,
7387
+ idleSweepMs: 6e4,
7388
+ shutdownDrainMs: 25e3,
7389
+ cancelBackstopMs: 3e4,
7384
7390
  maxAttachmentBytes: 8 * 1024 * 1024
7385
7391
  })
7386
7392
  });
@@ -16975,10 +16981,29 @@ var AcpHost = class {
16975
16981
  async cancel(sessionId) {
16976
16982
  await this.conn.agent.notify(methods.agent.session.cancel, { sessionId });
16977
16983
  }
16978
- async stop() {
16979
- if (!this.child) return;
16980
- this.child.kill("SIGTERM");
16981
- await new Promise((resolve) => this.child.once("exit", () => resolve()));
16984
+ /** Stop the adapter child: SIGTERM, then escalate to SIGKILL if it hasn't exited
16985
+ * within `deadlineMs` (a buggy/hung agent must never block daemon shutdown or an
16986
+ * idle reap). Idempotent — the child handle is cleared up front so a concurrent
16987
+ * stop (drain + reconcile racing) is a no-op rather than a double-kill. */
16988
+ async stop(deadlineMs = 5e3) {
16989
+ const child = this.child;
16990
+ if (!child) return;
16991
+ this.child = void 0;
16992
+ child.kill("SIGTERM");
16993
+ await new Promise((resolve) => {
16994
+ let settled = false;
16995
+ const done = () => {
16996
+ if (settled) return;
16997
+ settled = true;
16998
+ clearTimeout(timer);
16999
+ resolve();
17000
+ };
17001
+ const timer = setTimeout(() => {
17002
+ this.opts.log?.warn(`acp: child ignored SIGTERM after ${deadlineMs}ms — sending SIGKILL`);
17003
+ child.kill("SIGKILL");
17004
+ }, deadlineMs);
17005
+ child.once("exit", done);
17006
+ });
16982
17007
  }
16983
17008
  };
16984
17009
  //#endregion
@@ -23361,6 +23386,27 @@ var LocalStore = class {
23361
23386
  acpSessionId=excluded.acpSessionId, state=excluded.state,
23362
23387
  lastDeliveredTs=excluded.lastDeliveredTs, updatedAt=excluded.updatedAt`).run(rec);
23363
23388
  }
23389
+ /** Targeted state transition for an existing session (§7.3), stamping `updatedAt`
23390
+ * so the change counts as activity for the TTL/idle clocks. No-op if the key is
23391
+ * unknown (the row is created by the SessionManager on first turn). */
23392
+ setSessionState(key, state, updatedAt) {
23393
+ this.db.prepare("UPDATE sessions SET state = ?, updatedAt = ? WHERE key = ?").run(state, updatedAt, key);
23394
+ }
23395
+ /** Most-recent activity across an agent's non-closed sessions (epoch ms), or null
23396
+ * if it has none. Drives idle-host reaping (#111): a host with no recent session
23397
+ * activity AND no in-flight turn is past its idle window. */
23398
+ agentLastActivityTs(agentId) {
23399
+ return this.db.prepare("SELECT MAX(updatedAt) AS ts FROM sessions WHERE agentId = ? AND state != 'closed'").get(agentId)?.ts ?? null;
23400
+ }
23401
+ /** §7.3 TTL close: move every `idle` session untouched since `now - ttlMs` to
23402
+ * `closed`, returning the rows closed (for logging). `prompting`/`cancelling`
23403
+ * sessions are never closed — a live turn keeps the thread open. */
23404
+ closeIdleSessions(now, ttlMs) {
23405
+ const cutoff = now - ttlMs;
23406
+ const rows = this.db.prepare("SELECT key, channel, thread, agentId FROM sessions WHERE state = 'idle' AND updatedAt < ?").all(cutoff);
23407
+ if (rows.length) this.db.prepare("UPDATE sessions SET state = 'closed' WHERE state = 'idle' AND updatedAt < ?").run(cutoff);
23408
+ return rows;
23409
+ }
23364
23410
  appendTranscript(e) {
23365
23411
  this.db.prepare("INSERT OR IGNORE INTO transcript (channel, thread, ts, sender, kind, text) VALUES (@channel, @thread, @ts, @sender, @kind, @text)").run(e);
23366
23412
  }
@@ -23512,10 +23558,13 @@ var SessionManager = class {
23512
23558
  } else if (host.hasSession?.(rec.acpSessionId) === false) {
23513
23559
  const cwd = await prepareWorkspace(agent);
23514
23560
  let resumed = false;
23515
- if (host.loadSupported?.()) try {
23516
- await host.loadSession(rec.acpSessionId, cwd);
23517
- resumed = true;
23518
- } catch {}
23561
+ if (host.loadSupported?.()) {
23562
+ this.deps.store.setSessionState(key, "resuming", Date.now());
23563
+ try {
23564
+ await host.loadSession(rec.acpSessionId, cwd);
23565
+ resumed = true;
23566
+ } catch {}
23567
+ }
23519
23568
  if (!resumed) {
23520
23569
  const mcpServers = this.deps.mcpServersFor?.({
23521
23570
  agent,
@@ -81697,14 +81746,13 @@ var CpClient = class {
81697
81746
  }
81698
81747
  armHeartbeat() {
81699
81748
  this.heartbeatTimer = this.deps.clock.setTimeout(() => {
81700
- if (this.state !== "READY") return;
81701
- this.transport?.send(encode(buildEnvelope("heartbeat", {
81749
+ if (this.state === "READY") this.transport?.send(encode(buildEnvelope("heartbeat", {
81702
81750
  load: this.deps.loadSnapshot(),
81703
81751
  health: "ok",
81704
81752
  activeSessions: this.deps.activeSessions(),
81705
81753
  degradedScopes: this.deps.degradedScopes?.() ?? []
81706
81754
  })));
81707
- this.armHeartbeat();
81755
+ if (this.state === "READY" || this.state === "DRAINING") this.armHeartbeat();
81708
81756
  }, this.heartbeatMs);
81709
81757
  }
81710
81758
  stopHeartbeat() {
@@ -81751,13 +81799,36 @@ var CpClient = class {
81751
81799
  case "agent/remove":
81752
81800
  this.deps.configApply.applyAgentRemove(frame.payload.agentId);
81753
81801
  return;
81754
- case "agent/launch":
81755
- case "agent/stop":
81756
- case "agent/prompt":
81757
- case "daemon/drain":
81802
+ case "agent/launch": {
81803
+ const launch = frame.payload;
81804
+ this.deps.configApply.applyAgentLaunch(launch).then((launched) => this.reply(frame, "agent/launched", launched)).catch((err) => this.sendError(frame.id, "INTERNAL", `agent/launch failed: ${err.message}`, false));
81805
+ return;
81806
+ }
81807
+ case "agent/stop": {
81808
+ const stop = frame.payload;
81809
+ this.deps.configApply.applyAgentStop(stop).then((ack) => this.reply(frame, "ack", ack)).catch((err) => this.sendError(frame.id, "INTERNAL", `agent/stop failed: ${err.message}`, false));
81810
+ return;
81811
+ }
81812
+ case "daemon/drain": {
81813
+ const drain = frame.payload;
81814
+ this.state = "DRAINING";
81815
+ this.deps.configApply.applyDaemonDrain(drain, (p) => this.emit("drain/progress", p)).then((done) => {
81816
+ this.reply(frame, "drain/done", done);
81817
+ if (this.state === "DRAINING") this.state = "READY";
81818
+ }).catch((err) => {
81819
+ this.sendError(frame.id, "INTERNAL", `drain failed: ${err.message}`, false);
81820
+ if (this.state === "DRAINING") this.state = "READY";
81821
+ });
81822
+ return;
81823
+ }
81758
81824
  case "daemon/restart":
81825
+ this.reply(frame, "daemon/control/ack", this.deps.configApply.applyDaemonRestart(frame.payload));
81826
+ return;
81759
81827
  case "daemon/upgrade":
81760
- this.sendError(frame.id, "INTERNAL", `${frame.type} not implemented`, false);
81828
+ this.reply(frame, "daemon/control/ack", this.deps.configApply.applyDaemonUpgrade(frame.payload));
81829
+ return;
81830
+ case "agent/prompt":
81831
+ this.sendError(frame.id, "INTERNAL", "agent/prompt not implemented", false);
81761
81832
  return;
81762
81833
  default:
81763
81834
  this.deps.log.debug(`cp: ignoring ${frame.type}`);
@@ -81767,6 +81838,10 @@ var CpClient = class {
81767
81838
  reply(req, type, payload) {
81768
81839
  this.transport?.send(encode(buildEnvelope(type, payload, { corr: req.id })));
81769
81840
  }
81841
+ /** Emit an uncorrelated EVT (e.g. `drain/progress`). */
81842
+ emit(type, payload) {
81843
+ this.transport?.send(encode(buildEnvelope(type, payload)));
81844
+ }
81770
81845
  };
81771
81846
  //#endregion
81772
81847
  //#region src/cp/cp-cron.ts
@@ -82047,6 +82122,21 @@ function formatErr(err) {
82047
82122
  }
82048
82123
  const MAX_QUEUED_PER_SESSION = 10;
82049
82124
  const IDLE_FLUSH_MS = 2e3;
82125
+ /** Build the wire SessionKey (protocol §5) for a pending turn — what `drain/done`
82126
+ * reports as released so the CP may reassign it. Uses the real `thread` (absent for
82127
+ * a channel-root message), NOT `statusThread` (which falls back to msgId): the CP
82128
+ * keys assignments by `thread ?? "-"`, so reporting the msgId would miss the match. */
82129
+ function pendingSessionKey(p) {
82130
+ const platform = p.platform;
82131
+ return p.thread !== void 0 ? {
82132
+ platform,
82133
+ channel: p.channel,
82134
+ thread: p.thread
82135
+ } : {
82136
+ platform,
82137
+ channel: p.channel
82138
+ };
82139
+ }
82050
82140
  var Daemon = class {
82051
82141
  opts;
82052
82142
  store;
@@ -82072,8 +82162,17 @@ var Daemon = class {
82072
82162
  cpAgents;
82073
82163
  botUserIds = {};
82074
82164
  cpRouting;
82165
+ clock;
82166
+ requestExit;
82167
+ draining = false;
82168
+ drainingAgents = /* @__PURE__ */ new Set();
82169
+ hostStopping = /* @__PURE__ */ new Map();
82170
+ idleSweepTimer;
82171
+ cancelTimers = /* @__PURE__ */ new Map();
82075
82172
  constructor(opts = {}) {
82076
82173
  this.opts = opts;
82174
+ this.clock = opts.clock ?? systemClock;
82175
+ this.requestExit = opts.requestExit ?? ((code) => process.exit(code));
82077
82176
  }
82078
82177
  async start() {
82079
82178
  const root = resolveRoot(this.opts.root);
@@ -82213,6 +82312,7 @@ var Daemon = class {
82213
82312
  this.watcher.on("add", debounced).on("change", debounced).on("unlink", debounced);
82214
82313
  this.log.info(`watching ${this.agentsDir} for agent changes`);
82215
82314
  this.startCpClient(root);
82315
+ this.armIdleSweep();
82216
82316
  this.log.info("daemon ready");
82217
82317
  }
82218
82318
  loadAgentList() {
@@ -82385,6 +82485,10 @@ var Daemon = class {
82385
82485
  }
82386
82486
  seenMsgIds = /* @__PURE__ */ new Set();
82387
82487
  onInbound(msg) {
82488
+ if (this.draining) {
82489
+ this.log.debug(`routing: dropping inbound ${msg.msgId} (daemon draining)`);
82490
+ return;
82491
+ }
82388
82492
  if (this.seenMsgIds.has(msg.msgId)) {
82389
82493
  this.log.debug(`routing: duplicate ${msg.msgId} ignored`);
82390
82494
  return;
@@ -82402,6 +82506,10 @@ var Daemon = class {
82402
82506
  this.log.debug(`routing: dropped message in ch=${msg.channel} (no agent matched — not a mention of a known bot, not a subscribed 'all' channel, not a thread/DM hit)`);
82403
82507
  return;
82404
82508
  }
82509
+ if (this.drainingAgents.has(result.agentId)) {
82510
+ this.log.debug(`routing: dropping ${msg.msgId} for agent "${result.agentId}" (draining)`);
82511
+ return;
82512
+ }
82405
82513
  this.log.info(`routing: ch=${msg.channel} → agent "${result.agentId}" (integration ${result.integrationId})`);
82406
82514
  this.dispatch(result.agentId, msg, result.integrationId).catch((err) => this.log.error(`dispatch failed for agent "${result.agentId}": ${formatErr(err)}`));
82407
82515
  }
@@ -82471,7 +82579,8 @@ var Daemon = class {
82471
82579
  }
82472
82580
  const conn = this.replyConnFor(target.agentId, target.integrationId);
82473
82581
  const thread = msg.thread ?? msg.msgId;
82474
- const acpSessionId = this.store.getSession(sessionKey(msg.platform, msg.channel, thread, target.agentId))?.acpSessionId;
82582
+ const key = sessionKey(msg.platform, msg.channel, thread, target.agentId);
82583
+ const acpSessionId = this.store.getSession(key)?.acpSessionId;
82475
82584
  const inflight = !!(acpSessionId && this.pending.has(acpSessionId));
82476
82585
  if (command.kind === "stop") {
82477
82586
  if (!inflight) {
@@ -82480,7 +82589,9 @@ var Daemon = class {
82480
82589
  }
82481
82590
  this.queued.delete(acpSessionId);
82482
82591
  this.log.info(`command: stop → agent "${target.agentId}" session ${acpSessionId}`);
82592
+ this.store.setSessionState(key, "cancelling", this.clock.now());
82483
82593
  this.hosts.get(target.agentId)?.cancel(acpSessionId).catch((err) => this.log.error(`command stop: cancel failed: ${err.message}`));
82594
+ this.armCancelBackstop(target.agentId, acpSessionId, key);
82484
82595
  conn?.postMessage(msg.channel, "🛑 Stopped.", thread);
82485
82596
  return;
82486
82597
  }
@@ -82543,22 +82654,40 @@ var Daemon = class {
82543
82654
  return [...out];
82544
82655
  }
82545
82656
  async dispatch(agentId, msg, integrationId) {
82657
+ if (this.draining || this.drainingAgents.has(agentId)) {
82658
+ this.log.debug(`dispatch: skipped for agent "${agentId}" (draining)`);
82659
+ return;
82660
+ }
82546
82661
  const conv = new OutputConverger(this.agents.get(agentId).output.mode);
82547
82662
  const rec = new TranscriptRecorder();
82548
82663
  const replyConn = this.replyConnFor(agentId, integrationId);
82549
82664
  const wasRunning = this.hostStarts.has(agentId);
82550
82665
  const statusThread = msg.thread ?? msg.msgId;
82666
+ const key = sessionKey(msg.platform, msg.channel, statusThread, agentId);
82551
82667
  replyConn?.setStatus(msg.channel, statusThread, wasRunning ? "is thinking…" : "is starting up…");
82552
- const { sessionId, blocks } = await this.sessions.handle(agentId, msg);
82668
+ let handled;
82669
+ try {
82670
+ handled = await this.sessions.handle(agentId, msg);
82671
+ } catch (err) {
82672
+ this.store.setSessionState(key, "idle", this.clock.now());
82673
+ throw err;
82674
+ }
82675
+ const { sessionId, blocks } = handled;
82676
+ let resolveDone;
82677
+ const done = new Promise((r) => resolveDone = r);
82553
82678
  const p = {
82554
82679
  conv,
82555
82680
  rec,
82556
82681
  agentId,
82682
+ platform: msg.platform,
82683
+ sessionKey: key,
82557
82684
  channel: msg.channel,
82558
82685
  thread: msg.thread,
82559
82686
  statusThread,
82560
82687
  conn: replyConn,
82561
- applyChain: Promise.resolve()
82688
+ applyChain: Promise.resolve(),
82689
+ done,
82690
+ resolveDone
82562
82691
  };
82563
82692
  this.pending.set(sessionId, p);
82564
82693
  try {
@@ -82573,9 +82702,35 @@ var Daemon = class {
82573
82702
  } finally {
82574
82703
  this.clearIdle(p);
82575
82704
  this.pending.delete(sessionId);
82705
+ this.store.setSessionState(key, "idle", this.clock.now());
82706
+ this.clearCancelBackstop(sessionId);
82707
+ p.resolveDone();
82576
82708
  }
82577
82709
  this.flushQueued(agentId, sessionId, integrationId);
82578
82710
  }
82711
+ /** After `!stop` sends session/cancel, give the agent `cancelBackstopMs` to yield.
82712
+ * If the turn is still in flight when the timer fires, the agent ignored the
82713
+ * cancel — force-stop its host (the only hard kill available) so the session
82714
+ * can't be stuck in `cancelling` forever. dispatch's finally clears this timer
82715
+ * the moment the turn yields on its own. */
82716
+ armCancelBackstop(agentId, acpSessionId, key) {
82717
+ this.clearCancelBackstop(acpSessionId);
82718
+ const ms = this.cfg.limits.cancelBackstopMs;
82719
+ this.cancelTimers.set(acpSessionId, this.clock.setTimeout(() => {
82720
+ this.cancelTimers.delete(acpSessionId);
82721
+ if (!this.pending.has(acpSessionId)) return;
82722
+ this.log.warn(`command stop: agent "${agentId}" ignored session/cancel for ${ms}ms — force-stopping host (session ${acpSessionId})`);
82723
+ this.stopHost(agentId, 0);
82724
+ this.store.setSessionState(key, "idle", this.clock.now());
82725
+ }, ms));
82726
+ }
82727
+ clearCancelBackstop(acpSessionId) {
82728
+ const t = this.cancelTimers.get(acpSessionId);
82729
+ if (t !== void 0) {
82730
+ this.clock.clearTimeout(t);
82731
+ this.cancelTimers.delete(acpSessionId);
82732
+ }
82733
+ }
82579
82734
  /**
82580
82735
  * Apply one converger action against the session's Slack connection:
82581
82736
  * - set-status → assistant.threads.setStatus (best-effort; '' clears)
@@ -82664,6 +82819,8 @@ var Daemon = class {
82664
82819
  });
82665
82820
  }
82666
82821
  async ensureHostAsync(agentId) {
82822
+ const stopping = this.hostStopping.get(agentId);
82823
+ if (stopping) await stopping;
82667
82824
  const host = this.ensureHost(agentId, this.cfg);
82668
82825
  let p = this.hostStarts.get(agentId);
82669
82826
  if (!p) {
@@ -82677,6 +82834,171 @@ var Daemon = class {
82677
82834
  const intId = integrationId ?? this.agents.get(agentId)?.integrations[0]?.id;
82678
82835
  return intId ? this.connByIntegration.get(intId) : void 0;
82679
82836
  }
82837
+ /** Stop and evict an ACP adapter child, returning the agent to `provisioned`
82838
+ * (config kept; the next message lazily re-spawns it). Idempotent. The teardown
82839
+ * is registered in `hostStopping` so a concurrent ensureHostAsync waits for it
82840
+ * instead of spawning a second live child. */
82841
+ async stopHost(agentId, deadlineMs) {
82842
+ const host = this.hosts.get(agentId);
82843
+ this.hosts.delete(agentId);
82844
+ this.hostStarts.delete(agentId);
82845
+ if (!host) return;
82846
+ const stop = host.stop(deadlineMs).finally(() => {
82847
+ if (this.hostStopping.get(agentId) === stop) this.hostStopping.delete(agentId);
82848
+ });
82849
+ this.hostStopping.set(agentId, stop);
82850
+ await stop;
82851
+ }
82852
+ /** Recurring idle sweep (§7.2/§7.3): reap idle adapter children and TTL-close
82853
+ * idle sessions. Driven by the injected Clock so a FakeClock advances it in tests. */
82854
+ armIdleSweep() {
82855
+ const interval = this.cfg.limits.idleSweepMs;
82856
+ if (interval <= 0) return;
82857
+ this.idleSweepTimer = this.clock.setTimeout(() => {
82858
+ this.idleSweepTimer = void 0;
82859
+ try {
82860
+ this.sweepIdle();
82861
+ } catch (err) {
82862
+ this.log.error(`idle sweep failed: ${formatErr(err)}`);
82863
+ }
82864
+ if (!this.draining) this.armIdleSweep();
82865
+ }, interval);
82866
+ }
82867
+ sweepIdle() {
82868
+ const now = this.clock.now();
82869
+ const ttl = this.cfg.limits.agentIdleTimeoutMs;
82870
+ const closed = this.store.closeIdleSessions(now, ttl);
82871
+ if (closed.length) this.log.info(`idle: TTL-closed ${closed.length} session(s) (>${Math.round(ttl / 1e3)}s)`);
82872
+ for (const [agentId] of [...this.hosts]) {
82873
+ if (this.drainingAgents.has(agentId)) continue;
82874
+ if ([...this.pending.values()].some((p) => p.agentId === agentId)) continue;
82875
+ const last = this.store.agentLastActivityTs(agentId) ?? 0;
82876
+ if (now - last <= ttl) continue;
82877
+ this.log.info(`idle: reclaiming host "${agentId}" (idle ${Math.round((now - last) / 1e3)}s) → provisioned`);
82878
+ this.stopHost(agentId).catch((err) => this.log.error(`idle: stop host "${agentId}" failed: ${formatErr(err)}`));
82879
+ }
82880
+ }
82881
+ /** Race `work` against a Clock-driven deadline, always clearing the timer so a
82882
+ * finished drain never leaves a dangling timer holding the process open. */
82883
+ async raceDeadline(work, ms) {
82884
+ const delay = Math.min(Math.max(0, ms), 2147483647);
82885
+ let handle;
82886
+ const timeout = new Promise((resolve) => {
82887
+ handle = this.clock.setTimeout(() => resolve("timeout"), delay);
82888
+ });
82889
+ try {
82890
+ return await Promise.race([work.then(() => "done"), timeout]);
82891
+ } finally {
82892
+ if (handle !== void 0) this.clock.clearTimeout(handle);
82893
+ }
82894
+ }
82895
+ /**
82896
+ * §5.3 drain: gate new turns for the scope, await in-flight turns up to
82897
+ * `deadlineMs` (emitting `drain/progress` as each yields), then cancel any
82898
+ * straggler past the deadline. Returns BOTH the in-scope set (`matched`) and the
82899
+ * subset that actually finished (`drained`). The caller decides which to report
82900
+ * as released: a session is only safe to release once it is genuinely no longer
82901
+ * being served — either it drained, or the caller force-stops its host. Reporting
82902
+ * a still-running straggler would let the CP reassign it and double-serve.
82903
+ */
82904
+ async drainScope(scope, deadlineMs, onProgress) {
82905
+ const match = (p) => {
82906
+ if (scope.kind === "daemon") return true;
82907
+ if (scope.kind === "agent") return p.agentId === scope.agentId;
82908
+ const k = scope.sessionKey;
82909
+ return p.platform === k.platform && p.channel === k.channel && (k.thread === void 0 || p.statusThread === k.thread);
82910
+ };
82911
+ if (scope.kind === "daemon") this.draining = true;
82912
+ else if (scope.kind === "agent") this.drainingAgents.add(scope.agentId);
82913
+ const keyOf = (sk) => `${sk.platform}:${sk.channel}:${sk.thread ?? "-"}`;
82914
+ const targets = [...this.pending.entries()].filter(([, p]) => match(p));
82915
+ const matched = /* @__PURE__ */ new Map();
82916
+ for (const [, p] of targets) matched.set(keyOf(pendingSessionKey(p)), pendingSessionKey(p));
82917
+ if (targets.length) this.log.info(`drain[${scope.kind}]: awaiting ${targets.length} turn(s) (deadline ${Math.round(deadlineMs / 1e3)}s)`);
82918
+ let settled = false;
82919
+ const drained = /* @__PURE__ */ new Map();
82920
+ let remaining = targets.length;
82921
+ const work = Promise.all(targets.map(([, p]) => p.done.then(() => {
82922
+ const sk = pendingSessionKey(p);
82923
+ drained.set(keyOf(sk), sk);
82924
+ remaining--;
82925
+ if (!settled) onProgress?.({
82926
+ remaining,
82927
+ drained: [...drained.values()]
82928
+ });
82929
+ })));
82930
+ const res = await this.raceDeadline(work, deadlineMs);
82931
+ settled = true;
82932
+ if (res === "timeout") for (const [sid, p] of this.pending) {
82933
+ if (!match(p)) continue;
82934
+ this.log.warn(`drain[${scope.kind}]: cancelling straggler turn (session ${sid})`);
82935
+ await this.hosts.get(p.agentId)?.cancel(sid).catch(() => {});
82936
+ }
82937
+ return {
82938
+ matched: [...matched.values()],
82939
+ drained: [...drained.values()]
82940
+ };
82941
+ }
82942
+ /** Handle a CP `daemon/drain` (§5.3). A bare drain is a rebalance: after
82943
+ * releasing sessions the daemon reclaims hosts and re-opens its gate (a teardown
82944
+ * arrives separately via daemon/restart). */
82945
+ async runDrain(drain, onProgress) {
82946
+ const deadlineMs = Math.max(0, new Date(drain.deadline).getTime() - this.clock.now());
82947
+ const { matched, drained } = await this.drainScope(drain.scope, deadlineMs, onProgress);
82948
+ let released;
82949
+ if (drain.scope.kind === "daemon") {
82950
+ for (const id of [...this.hosts.keys()]) await this.stopHost(id);
82951
+ this.draining = false;
82952
+ released = matched;
82953
+ } else if (drain.scope.kind === "agent") {
82954
+ await this.stopHost(drain.scope.agentId);
82955
+ this.drainingAgents.delete(drain.scope.agentId);
82956
+ released = matched;
82957
+ } else released = drained;
82958
+ this.log.info(`drain[${drain.scope.kind}]: done — released ${released.length} session(s)`);
82959
+ return { released };
82960
+ }
82961
+ /** `agent/stop` (§8.2): drain the agent's in-flight turns, stop its host, and
82962
+ * leave it gated so it stays down until a matching `agent/launch` revives it. */
82963
+ async stopAgent(agentId) {
82964
+ await this.drainScope({
82965
+ kind: "agent",
82966
+ agentId
82967
+ }, this.cfg.limits.shutdownDrainMs);
82968
+ await this.stopHost(agentId);
82969
+ }
82970
+ /** §2.5 SIGTERM / daemon shutdown: gate new turns, then await in-flight turns up
82971
+ * to `shutdownDrainMs`, cancelling stragglers. Safe to call repeatedly. */
82972
+ async drainForShutdown() {
82973
+ this.draining = true;
82974
+ const live = [...this.pending.values()];
82975
+ if (live.length === 0) return;
82976
+ const deadlineMs = this.cfg.limits.shutdownDrainMs;
82977
+ this.log.info(`shutdown: draining ${live.length} in-flight turn(s) (deadline ${Math.round(deadlineMs / 1e3)}s)`);
82978
+ if (await this.raceDeadline(Promise.all(live.map((p) => p.done)), deadlineMs) === "timeout") {
82979
+ this.log.warn(`shutdown: deadline hit with ${this.pending.size} turn(s) still in flight — cancelling`);
82980
+ for (const [sid, p] of this.pending) await this.hosts.get(p.agentId)?.cancel(sid).catch(() => {});
82981
+ }
82982
+ }
82983
+ /** daemon/restart + daemon/upgrade (§8.3): ack now, then drain + stop + exit so
82984
+ * the supervisor relaunches (the new binary, for upgrade). */
82985
+ scheduleFleetExit(kind, targetVersion) {
82986
+ const willDrainUntil = new Date(this.clock.now() + this.cfg.limits.shutdownDrainMs).toISOString();
82987
+ this.log.info(`cp: ${kind}${targetVersion ? ` → ${targetVersion}` : ""} requested — draining then exiting`);
82988
+ (async () => {
82989
+ try {
82990
+ await this.stop();
82991
+ } catch (err) {
82992
+ this.log.error(`cp: ${kind} shutdown failed: ${formatErr(err)}`);
82993
+ } finally {
82994
+ this.requestExit(0);
82995
+ }
82996
+ })();
82997
+ return {
82998
+ accepted: true,
82999
+ willDrainUntil
83000
+ };
83001
+ }
82680
83002
  cpConfigApply() {
82681
83003
  return {
82682
83004
  applyConfigPush: (keys) => {
@@ -82702,7 +83024,26 @@ var Daemon = class {
82702
83024
  upsertCron: (cron) => this.cpCrons.upsert(cron),
82703
83025
  removeCron: (cronId) => this.cpCrons.remove(cronId),
82704
83026
  applyRouteAssign: (a) => this.cpRouting?.upsertAssign(a),
82705
- applyRouteUpdate: (u) => this.cpRouting?.applyUpdate(u)
83027
+ applyRouteUpdate: (u) => this.cpRouting?.applyUpdate(u),
83028
+ applyAgentLaunch: async (launch) => {
83029
+ const agent = this.agents.get(launch.agentId);
83030
+ if (!agent) throw new Error(`agent/launch: unknown agent ${launch.agentId}`);
83031
+ this.drainingAgents.delete(launch.agentId);
83032
+ await this.ensureHostAsync(launch.agentId);
83033
+ return {
83034
+ agentId: launch.agentId,
83035
+ launchId: randomUUID(),
83036
+ startedAt: new Date(this.clock.now()).toISOString(),
83037
+ runtime: agent.runtime
83038
+ };
83039
+ },
83040
+ applyAgentStop: async (stop) => {
83041
+ await this.stopAgent(stop.agentId);
83042
+ return { ok: true };
83043
+ },
83044
+ applyDaemonDrain: (drain, onProgress) => this.runDrain(drain, onProgress),
83045
+ applyDaemonRestart: (_req) => this.scheduleFleetExit("restart"),
83046
+ applyDaemonUpgrade: (req) => this.scheduleFleetExit("upgrade", req.targetVersion)
82706
83047
  };
82707
83048
  }
82708
83049
  /** A CP cron fired: build a synthetic message and run it through the normal routing path. */
@@ -82775,8 +83116,16 @@ var Daemon = class {
82775
83116
  this.log.info(`cp: connecting to ${url}…`);
82776
83117
  }
82777
83118
  async stop() {
83119
+ this.draining = true;
82778
83120
  clearTimeout(this.debounceTimer);
83121
+ if (this.idleSweepTimer !== void 0) {
83122
+ this.clock.clearTimeout(this.idleSweepTimer);
83123
+ this.idleSweepTimer = void 0;
83124
+ }
83125
+ for (const t of this.cancelTimers.values()) this.clock.clearTimeout(t);
83126
+ this.cancelTimers.clear();
82779
83127
  await this.watcher?.close();
83128
+ await this.drainForShutdown();
82780
83129
  const errors = [];
82781
83130
  this.scheduler?.stop();
82782
83131
  await Promise.resolve(this.cpClient?.stop()).catch((e) => errors.push(e));