@agentconnect.md/daemon 1.0.0-rc.37 → 1.0.0-rc.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +369 -20
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -7376,11 +7376,17 @@ const ConfigSchema = object({
|
|
|
7376
7376
|
maxAgents: number().int().default(8),
|
|
7377
7377
|
maxConcurrentSessions: number().int().default(32),
|
|
7378
7378
|
agentIdleTimeoutMs: number().int().default(9e5),
|
|
7379
|
+
idleSweepMs: number().int().default(6e4),
|
|
7380
|
+
shutdownDrainMs: number().int().default(25e3),
|
|
7381
|
+
cancelBackstopMs: number().int().default(3e4),
|
|
7379
7382
|
maxAttachmentBytes: number().int().default(8 * 1024 * 1024)
|
|
7380
7383
|
}).default({
|
|
7381
7384
|
maxAgents: 8,
|
|
7382
7385
|
maxConcurrentSessions: 32,
|
|
7383
7386
|
agentIdleTimeoutMs: 9e5,
|
|
7387
|
+
idleSweepMs: 6e4,
|
|
7388
|
+
shutdownDrainMs: 25e3,
|
|
7389
|
+
cancelBackstopMs: 3e4,
|
|
7384
7390
|
maxAttachmentBytes: 8 * 1024 * 1024
|
|
7385
7391
|
})
|
|
7386
7392
|
});
|
|
@@ -16975,10 +16981,29 @@ var AcpHost = class {
|
|
|
16975
16981
|
async cancel(sessionId) {
|
|
16976
16982
|
await this.conn.agent.notify(methods.agent.session.cancel, { sessionId });
|
|
16977
16983
|
}
|
|
16978
|
-
|
|
16979
|
-
|
|
16980
|
-
|
|
16981
|
-
|
|
16984
|
+
/** Stop the adapter child: SIGTERM, then escalate to SIGKILL if it hasn't exited
|
|
16985
|
+
* within `deadlineMs` (a buggy/hung agent must never block daemon shutdown or an
|
|
16986
|
+
* idle reap). Idempotent — the child handle is cleared up front so a concurrent
|
|
16987
|
+
* stop (drain + reconcile racing) is a no-op rather than a double-kill. */
|
|
16988
|
+
async stop(deadlineMs = 5e3) {
|
|
16989
|
+
const child = this.child;
|
|
16990
|
+
if (!child) return;
|
|
16991
|
+
this.child = void 0;
|
|
16992
|
+
child.kill("SIGTERM");
|
|
16993
|
+
await new Promise((resolve) => {
|
|
16994
|
+
let settled = false;
|
|
16995
|
+
const done = () => {
|
|
16996
|
+
if (settled) return;
|
|
16997
|
+
settled = true;
|
|
16998
|
+
clearTimeout(timer);
|
|
16999
|
+
resolve();
|
|
17000
|
+
};
|
|
17001
|
+
const timer = setTimeout(() => {
|
|
17002
|
+
this.opts.log?.warn(`acp: child ignored SIGTERM after ${deadlineMs}ms — sending SIGKILL`);
|
|
17003
|
+
child.kill("SIGKILL");
|
|
17004
|
+
}, deadlineMs);
|
|
17005
|
+
child.once("exit", done);
|
|
17006
|
+
});
|
|
16982
17007
|
}
|
|
16983
17008
|
};
|
|
16984
17009
|
//#endregion
|
|
@@ -23361,6 +23386,27 @@ var LocalStore = class {
|
|
|
23361
23386
|
acpSessionId=excluded.acpSessionId, state=excluded.state,
|
|
23362
23387
|
lastDeliveredTs=excluded.lastDeliveredTs, updatedAt=excluded.updatedAt`).run(rec);
|
|
23363
23388
|
}
|
|
23389
|
+
/** Targeted state transition for an existing session (§7.3), stamping `updatedAt`
|
|
23390
|
+
* so the change counts as activity for the TTL/idle clocks. No-op if the key is
|
|
23391
|
+
* unknown (the row is created by the SessionManager on first turn). */
|
|
23392
|
+
setSessionState(key, state, updatedAt) {
|
|
23393
|
+
this.db.prepare("UPDATE sessions SET state = ?, updatedAt = ? WHERE key = ?").run(state, updatedAt, key);
|
|
23394
|
+
}
|
|
23395
|
+
/** Most-recent activity across an agent's non-closed sessions (epoch ms), or null
|
|
23396
|
+
* if it has none. Drives idle-host reaping (#111): a host with no recent session
|
|
23397
|
+
* activity AND no in-flight turn is past its idle window. */
|
|
23398
|
+
agentLastActivityTs(agentId) {
|
|
23399
|
+
return this.db.prepare("SELECT MAX(updatedAt) AS ts FROM sessions WHERE agentId = ? AND state != 'closed'").get(agentId)?.ts ?? null;
|
|
23400
|
+
}
|
|
23401
|
+
/** §7.3 TTL close: move every `idle` session untouched since `now - ttlMs` to
|
|
23402
|
+
* `closed`, returning the rows closed (for logging). `prompting`/`cancelling`
|
|
23403
|
+
* sessions are never closed — a live turn keeps the thread open. */
|
|
23404
|
+
closeIdleSessions(now, ttlMs) {
|
|
23405
|
+
const cutoff = now - ttlMs;
|
|
23406
|
+
const rows = this.db.prepare("SELECT key, channel, thread, agentId FROM sessions WHERE state = 'idle' AND updatedAt < ?").all(cutoff);
|
|
23407
|
+
if (rows.length) this.db.prepare("UPDATE sessions SET state = 'closed' WHERE state = 'idle' AND updatedAt < ?").run(cutoff);
|
|
23408
|
+
return rows;
|
|
23409
|
+
}
|
|
23364
23410
|
appendTranscript(e) {
|
|
23365
23411
|
this.db.prepare("INSERT OR IGNORE INTO transcript (channel, thread, ts, sender, kind, text) VALUES (@channel, @thread, @ts, @sender, @kind, @text)").run(e);
|
|
23366
23412
|
}
|
|
@@ -23512,10 +23558,13 @@ var SessionManager = class {
|
|
|
23512
23558
|
} else if (host.hasSession?.(rec.acpSessionId) === false) {
|
|
23513
23559
|
const cwd = await prepareWorkspace(agent);
|
|
23514
23560
|
let resumed = false;
|
|
23515
|
-
if (host.loadSupported?.())
|
|
23516
|
-
|
|
23517
|
-
|
|
23518
|
-
|
|
23561
|
+
if (host.loadSupported?.()) {
|
|
23562
|
+
this.deps.store.setSessionState(key, "resuming", Date.now());
|
|
23563
|
+
try {
|
|
23564
|
+
await host.loadSession(rec.acpSessionId, cwd);
|
|
23565
|
+
resumed = true;
|
|
23566
|
+
} catch {}
|
|
23567
|
+
}
|
|
23519
23568
|
if (!resumed) {
|
|
23520
23569
|
const mcpServers = this.deps.mcpServersFor?.({
|
|
23521
23570
|
agent,
|
|
@@ -81697,14 +81746,13 @@ var CpClient = class {
|
|
|
81697
81746
|
}
|
|
81698
81747
|
armHeartbeat() {
|
|
81699
81748
|
this.heartbeatTimer = this.deps.clock.setTimeout(() => {
|
|
81700
|
-
if (this.state
|
|
81701
|
-
this.transport?.send(encode(buildEnvelope("heartbeat", {
|
|
81749
|
+
if (this.state === "READY") this.transport?.send(encode(buildEnvelope("heartbeat", {
|
|
81702
81750
|
load: this.deps.loadSnapshot(),
|
|
81703
81751
|
health: "ok",
|
|
81704
81752
|
activeSessions: this.deps.activeSessions(),
|
|
81705
81753
|
degradedScopes: this.deps.degradedScopes?.() ?? []
|
|
81706
81754
|
})));
|
|
81707
|
-
this.armHeartbeat();
|
|
81755
|
+
if (this.state === "READY" || this.state === "DRAINING") this.armHeartbeat();
|
|
81708
81756
|
}, this.heartbeatMs);
|
|
81709
81757
|
}
|
|
81710
81758
|
stopHeartbeat() {
|
|
@@ -81751,13 +81799,36 @@ var CpClient = class {
|
|
|
81751
81799
|
case "agent/remove":
|
|
81752
81800
|
this.deps.configApply.applyAgentRemove(frame.payload.agentId);
|
|
81753
81801
|
return;
|
|
81754
|
-
case "agent/launch":
|
|
81755
|
-
|
|
81756
|
-
|
|
81757
|
-
|
|
81802
|
+
case "agent/launch": {
|
|
81803
|
+
const launch = frame.payload;
|
|
81804
|
+
this.deps.configApply.applyAgentLaunch(launch).then((launched) => this.reply(frame, "agent/launched", launched)).catch((err) => this.sendError(frame.id, "INTERNAL", `agent/launch failed: ${err.message}`, false));
|
|
81805
|
+
return;
|
|
81806
|
+
}
|
|
81807
|
+
case "agent/stop": {
|
|
81808
|
+
const stop = frame.payload;
|
|
81809
|
+
this.deps.configApply.applyAgentStop(stop).then((ack) => this.reply(frame, "ack", ack)).catch((err) => this.sendError(frame.id, "INTERNAL", `agent/stop failed: ${err.message}`, false));
|
|
81810
|
+
return;
|
|
81811
|
+
}
|
|
81812
|
+
case "daemon/drain": {
|
|
81813
|
+
const drain = frame.payload;
|
|
81814
|
+
this.state = "DRAINING";
|
|
81815
|
+
this.deps.configApply.applyDaemonDrain(drain, (p) => this.emit("drain/progress", p)).then((done) => {
|
|
81816
|
+
this.reply(frame, "drain/done", done);
|
|
81817
|
+
if (this.state === "DRAINING") this.state = "READY";
|
|
81818
|
+
}).catch((err) => {
|
|
81819
|
+
this.sendError(frame.id, "INTERNAL", `drain failed: ${err.message}`, false);
|
|
81820
|
+
if (this.state === "DRAINING") this.state = "READY";
|
|
81821
|
+
});
|
|
81822
|
+
return;
|
|
81823
|
+
}
|
|
81758
81824
|
case "daemon/restart":
|
|
81825
|
+
this.reply(frame, "daemon/control/ack", this.deps.configApply.applyDaemonRestart(frame.payload));
|
|
81826
|
+
return;
|
|
81759
81827
|
case "daemon/upgrade":
|
|
81760
|
-
this.
|
|
81828
|
+
this.reply(frame, "daemon/control/ack", this.deps.configApply.applyDaemonUpgrade(frame.payload));
|
|
81829
|
+
return;
|
|
81830
|
+
case "agent/prompt":
|
|
81831
|
+
this.sendError(frame.id, "INTERNAL", "agent/prompt not implemented", false);
|
|
81761
81832
|
return;
|
|
81762
81833
|
default:
|
|
81763
81834
|
this.deps.log.debug(`cp: ignoring ${frame.type}`);
|
|
@@ -81767,6 +81838,10 @@ var CpClient = class {
|
|
|
81767
81838
|
reply(req, type, payload) {
|
|
81768
81839
|
this.transport?.send(encode(buildEnvelope(type, payload, { corr: req.id })));
|
|
81769
81840
|
}
|
|
81841
|
+
/** Emit an uncorrelated EVT (e.g. `drain/progress`). */
|
|
81842
|
+
emit(type, payload) {
|
|
81843
|
+
this.transport?.send(encode(buildEnvelope(type, payload)));
|
|
81844
|
+
}
|
|
81770
81845
|
};
|
|
81771
81846
|
//#endregion
|
|
81772
81847
|
//#region src/cp/cp-cron.ts
|
|
@@ -82047,6 +82122,21 @@ function formatErr(err) {
|
|
|
82047
82122
|
}
|
|
82048
82123
|
const MAX_QUEUED_PER_SESSION = 10;
|
|
82049
82124
|
const IDLE_FLUSH_MS = 2e3;
|
|
82125
|
+
/** Build the wire SessionKey (protocol §5) for a pending turn — what `drain/done`
|
|
82126
|
+
* reports as released so the CP may reassign it. Uses the real `thread` (absent for
|
|
82127
|
+
* a channel-root message), NOT `statusThread` (which falls back to msgId): the CP
|
|
82128
|
+
* keys assignments by `thread ?? "-"`, so reporting the msgId would miss the match. */
|
|
82129
|
+
function pendingSessionKey(p) {
|
|
82130
|
+
const platform = p.platform;
|
|
82131
|
+
return p.thread !== void 0 ? {
|
|
82132
|
+
platform,
|
|
82133
|
+
channel: p.channel,
|
|
82134
|
+
thread: p.thread
|
|
82135
|
+
} : {
|
|
82136
|
+
platform,
|
|
82137
|
+
channel: p.channel
|
|
82138
|
+
};
|
|
82139
|
+
}
|
|
82050
82140
|
var Daemon = class {
|
|
82051
82141
|
opts;
|
|
82052
82142
|
store;
|
|
@@ -82072,8 +82162,17 @@ var Daemon = class {
|
|
|
82072
82162
|
cpAgents;
|
|
82073
82163
|
botUserIds = {};
|
|
82074
82164
|
cpRouting;
|
|
82165
|
+
clock;
|
|
82166
|
+
requestExit;
|
|
82167
|
+
draining = false;
|
|
82168
|
+
drainingAgents = /* @__PURE__ */ new Set();
|
|
82169
|
+
hostStopping = /* @__PURE__ */ new Map();
|
|
82170
|
+
idleSweepTimer;
|
|
82171
|
+
cancelTimers = /* @__PURE__ */ new Map();
|
|
82075
82172
|
constructor(opts = {}) {
|
|
82076
82173
|
this.opts = opts;
|
|
82174
|
+
this.clock = opts.clock ?? systemClock;
|
|
82175
|
+
this.requestExit = opts.requestExit ?? ((code) => process.exit(code));
|
|
82077
82176
|
}
|
|
82078
82177
|
async start() {
|
|
82079
82178
|
const root = resolveRoot(this.opts.root);
|
|
@@ -82213,6 +82312,7 @@ var Daemon = class {
|
|
|
82213
82312
|
this.watcher.on("add", debounced).on("change", debounced).on("unlink", debounced);
|
|
82214
82313
|
this.log.info(`watching ${this.agentsDir} for agent changes`);
|
|
82215
82314
|
this.startCpClient(root);
|
|
82315
|
+
this.armIdleSweep();
|
|
82216
82316
|
this.log.info("daemon ready");
|
|
82217
82317
|
}
|
|
82218
82318
|
loadAgentList() {
|
|
@@ -82385,6 +82485,10 @@ var Daemon = class {
|
|
|
82385
82485
|
}
|
|
82386
82486
|
seenMsgIds = /* @__PURE__ */ new Set();
|
|
82387
82487
|
onInbound(msg) {
|
|
82488
|
+
if (this.draining) {
|
|
82489
|
+
this.log.debug(`routing: dropping inbound ${msg.msgId} (daemon draining)`);
|
|
82490
|
+
return;
|
|
82491
|
+
}
|
|
82388
82492
|
if (this.seenMsgIds.has(msg.msgId)) {
|
|
82389
82493
|
this.log.debug(`routing: duplicate ${msg.msgId} ignored`);
|
|
82390
82494
|
return;
|
|
@@ -82402,6 +82506,10 @@ var Daemon = class {
|
|
|
82402
82506
|
this.log.debug(`routing: dropped message in ch=${msg.channel} (no agent matched — not a mention of a known bot, not a subscribed 'all' channel, not a thread/DM hit)`);
|
|
82403
82507
|
return;
|
|
82404
82508
|
}
|
|
82509
|
+
if (this.drainingAgents.has(result.agentId)) {
|
|
82510
|
+
this.log.debug(`routing: dropping ${msg.msgId} for agent "${result.agentId}" (draining)`);
|
|
82511
|
+
return;
|
|
82512
|
+
}
|
|
82405
82513
|
this.log.info(`routing: ch=${msg.channel} → agent "${result.agentId}" (integration ${result.integrationId})`);
|
|
82406
82514
|
this.dispatch(result.agentId, msg, result.integrationId).catch((err) => this.log.error(`dispatch failed for agent "${result.agentId}": ${formatErr(err)}`));
|
|
82407
82515
|
}
|
|
@@ -82471,7 +82579,8 @@ var Daemon = class {
|
|
|
82471
82579
|
}
|
|
82472
82580
|
const conn = this.replyConnFor(target.agentId, target.integrationId);
|
|
82473
82581
|
const thread = msg.thread ?? msg.msgId;
|
|
82474
|
-
const
|
|
82582
|
+
const key = sessionKey(msg.platform, msg.channel, thread, target.agentId);
|
|
82583
|
+
const acpSessionId = this.store.getSession(key)?.acpSessionId;
|
|
82475
82584
|
const inflight = !!(acpSessionId && this.pending.has(acpSessionId));
|
|
82476
82585
|
if (command.kind === "stop") {
|
|
82477
82586
|
if (!inflight) {
|
|
@@ -82480,7 +82589,9 @@ var Daemon = class {
|
|
|
82480
82589
|
}
|
|
82481
82590
|
this.queued.delete(acpSessionId);
|
|
82482
82591
|
this.log.info(`command: stop → agent "${target.agentId}" session ${acpSessionId}`);
|
|
82592
|
+
this.store.setSessionState(key, "cancelling", this.clock.now());
|
|
82483
82593
|
this.hosts.get(target.agentId)?.cancel(acpSessionId).catch((err) => this.log.error(`command stop: cancel failed: ${err.message}`));
|
|
82594
|
+
this.armCancelBackstop(target.agentId, acpSessionId, key);
|
|
82484
82595
|
conn?.postMessage(msg.channel, "🛑 Stopped.", thread);
|
|
82485
82596
|
return;
|
|
82486
82597
|
}
|
|
@@ -82543,22 +82654,40 @@ var Daemon = class {
|
|
|
82543
82654
|
return [...out];
|
|
82544
82655
|
}
|
|
82545
82656
|
async dispatch(agentId, msg, integrationId) {
|
|
82657
|
+
if (this.draining || this.drainingAgents.has(agentId)) {
|
|
82658
|
+
this.log.debug(`dispatch: skipped for agent "${agentId}" (draining)`);
|
|
82659
|
+
return;
|
|
82660
|
+
}
|
|
82546
82661
|
const conv = new OutputConverger(this.agents.get(agentId).output.mode);
|
|
82547
82662
|
const rec = new TranscriptRecorder();
|
|
82548
82663
|
const replyConn = this.replyConnFor(agentId, integrationId);
|
|
82549
82664
|
const wasRunning = this.hostStarts.has(agentId);
|
|
82550
82665
|
const statusThread = msg.thread ?? msg.msgId;
|
|
82666
|
+
const key = sessionKey(msg.platform, msg.channel, statusThread, agentId);
|
|
82551
82667
|
replyConn?.setStatus(msg.channel, statusThread, wasRunning ? "is thinking…" : "is starting up…");
|
|
82552
|
-
|
|
82668
|
+
let handled;
|
|
82669
|
+
try {
|
|
82670
|
+
handled = await this.sessions.handle(agentId, msg);
|
|
82671
|
+
} catch (err) {
|
|
82672
|
+
this.store.setSessionState(key, "idle", this.clock.now());
|
|
82673
|
+
throw err;
|
|
82674
|
+
}
|
|
82675
|
+
const { sessionId, blocks } = handled;
|
|
82676
|
+
let resolveDone;
|
|
82677
|
+
const done = new Promise((r) => resolveDone = r);
|
|
82553
82678
|
const p = {
|
|
82554
82679
|
conv,
|
|
82555
82680
|
rec,
|
|
82556
82681
|
agentId,
|
|
82682
|
+
platform: msg.platform,
|
|
82683
|
+
sessionKey: key,
|
|
82557
82684
|
channel: msg.channel,
|
|
82558
82685
|
thread: msg.thread,
|
|
82559
82686
|
statusThread,
|
|
82560
82687
|
conn: replyConn,
|
|
82561
|
-
applyChain: Promise.resolve()
|
|
82688
|
+
applyChain: Promise.resolve(),
|
|
82689
|
+
done,
|
|
82690
|
+
resolveDone
|
|
82562
82691
|
};
|
|
82563
82692
|
this.pending.set(sessionId, p);
|
|
82564
82693
|
try {
|
|
@@ -82573,9 +82702,35 @@ var Daemon = class {
|
|
|
82573
82702
|
} finally {
|
|
82574
82703
|
this.clearIdle(p);
|
|
82575
82704
|
this.pending.delete(sessionId);
|
|
82705
|
+
this.store.setSessionState(key, "idle", this.clock.now());
|
|
82706
|
+
this.clearCancelBackstop(sessionId);
|
|
82707
|
+
p.resolveDone();
|
|
82576
82708
|
}
|
|
82577
82709
|
this.flushQueued(agentId, sessionId, integrationId);
|
|
82578
82710
|
}
|
|
82711
|
+
/** After `!stop` sends session/cancel, give the agent `cancelBackstopMs` to yield.
|
|
82712
|
+
* If the turn is still in flight when the timer fires, the agent ignored the
|
|
82713
|
+
* cancel — force-stop its host (the only hard kill available) so the session
|
|
82714
|
+
* can't be stuck in `cancelling` forever. dispatch's finally clears this timer
|
|
82715
|
+
* the moment the turn yields on its own. */
|
|
82716
|
+
armCancelBackstop(agentId, acpSessionId, key) {
|
|
82717
|
+
this.clearCancelBackstop(acpSessionId);
|
|
82718
|
+
const ms = this.cfg.limits.cancelBackstopMs;
|
|
82719
|
+
this.cancelTimers.set(acpSessionId, this.clock.setTimeout(() => {
|
|
82720
|
+
this.cancelTimers.delete(acpSessionId);
|
|
82721
|
+
if (!this.pending.has(acpSessionId)) return;
|
|
82722
|
+
this.log.warn(`command stop: agent "${agentId}" ignored session/cancel for ${ms}ms — force-stopping host (session ${acpSessionId})`);
|
|
82723
|
+
this.stopHost(agentId, 0);
|
|
82724
|
+
this.store.setSessionState(key, "idle", this.clock.now());
|
|
82725
|
+
}, ms));
|
|
82726
|
+
}
|
|
82727
|
+
clearCancelBackstop(acpSessionId) {
|
|
82728
|
+
const t = this.cancelTimers.get(acpSessionId);
|
|
82729
|
+
if (t !== void 0) {
|
|
82730
|
+
this.clock.clearTimeout(t);
|
|
82731
|
+
this.cancelTimers.delete(acpSessionId);
|
|
82732
|
+
}
|
|
82733
|
+
}
|
|
82579
82734
|
/**
|
|
82580
82735
|
* Apply one converger action against the session's Slack connection:
|
|
82581
82736
|
* - set-status → assistant.threads.setStatus (best-effort; '' clears)
|
|
@@ -82664,6 +82819,8 @@ var Daemon = class {
|
|
|
82664
82819
|
});
|
|
82665
82820
|
}
|
|
82666
82821
|
async ensureHostAsync(agentId) {
|
|
82822
|
+
const stopping = this.hostStopping.get(agentId);
|
|
82823
|
+
if (stopping) await stopping;
|
|
82667
82824
|
const host = this.ensureHost(agentId, this.cfg);
|
|
82668
82825
|
let p = this.hostStarts.get(agentId);
|
|
82669
82826
|
if (!p) {
|
|
@@ -82677,6 +82834,171 @@ var Daemon = class {
|
|
|
82677
82834
|
const intId = integrationId ?? this.agents.get(agentId)?.integrations[0]?.id;
|
|
82678
82835
|
return intId ? this.connByIntegration.get(intId) : void 0;
|
|
82679
82836
|
}
|
|
82837
|
+
/** Stop and evict an ACP adapter child, returning the agent to `provisioned`
|
|
82838
|
+
* (config kept; the next message lazily re-spawns it). Idempotent. The teardown
|
|
82839
|
+
* is registered in `hostStopping` so a concurrent ensureHostAsync waits for it
|
|
82840
|
+
* instead of spawning a second live child. */
|
|
82841
|
+
async stopHost(agentId, deadlineMs) {
|
|
82842
|
+
const host = this.hosts.get(agentId);
|
|
82843
|
+
this.hosts.delete(agentId);
|
|
82844
|
+
this.hostStarts.delete(agentId);
|
|
82845
|
+
if (!host) return;
|
|
82846
|
+
const stop = host.stop(deadlineMs).finally(() => {
|
|
82847
|
+
if (this.hostStopping.get(agentId) === stop) this.hostStopping.delete(agentId);
|
|
82848
|
+
});
|
|
82849
|
+
this.hostStopping.set(agentId, stop);
|
|
82850
|
+
await stop;
|
|
82851
|
+
}
|
|
82852
|
+
/** Recurring idle sweep (§7.2/§7.3): reap idle adapter children and TTL-close
|
|
82853
|
+
* idle sessions. Driven by the injected Clock so a FakeClock advances it in tests. */
|
|
82854
|
+
armIdleSweep() {
|
|
82855
|
+
const interval = this.cfg.limits.idleSweepMs;
|
|
82856
|
+
if (interval <= 0) return;
|
|
82857
|
+
this.idleSweepTimer = this.clock.setTimeout(() => {
|
|
82858
|
+
this.idleSweepTimer = void 0;
|
|
82859
|
+
try {
|
|
82860
|
+
this.sweepIdle();
|
|
82861
|
+
} catch (err) {
|
|
82862
|
+
this.log.error(`idle sweep failed: ${formatErr(err)}`);
|
|
82863
|
+
}
|
|
82864
|
+
if (!this.draining) this.armIdleSweep();
|
|
82865
|
+
}, interval);
|
|
82866
|
+
}
|
|
82867
|
+
sweepIdle() {
|
|
82868
|
+
const now = this.clock.now();
|
|
82869
|
+
const ttl = this.cfg.limits.agentIdleTimeoutMs;
|
|
82870
|
+
const closed = this.store.closeIdleSessions(now, ttl);
|
|
82871
|
+
if (closed.length) this.log.info(`idle: TTL-closed ${closed.length} session(s) (>${Math.round(ttl / 1e3)}s)`);
|
|
82872
|
+
for (const [agentId] of [...this.hosts]) {
|
|
82873
|
+
if (this.drainingAgents.has(agentId)) continue;
|
|
82874
|
+
if ([...this.pending.values()].some((p) => p.agentId === agentId)) continue;
|
|
82875
|
+
const last = this.store.agentLastActivityTs(agentId) ?? 0;
|
|
82876
|
+
if (now - last <= ttl) continue;
|
|
82877
|
+
this.log.info(`idle: reclaiming host "${agentId}" (idle ${Math.round((now - last) / 1e3)}s) → provisioned`);
|
|
82878
|
+
this.stopHost(agentId).catch((err) => this.log.error(`idle: stop host "${agentId}" failed: ${formatErr(err)}`));
|
|
82879
|
+
}
|
|
82880
|
+
}
|
|
82881
|
+
/** Race `work` against a Clock-driven deadline, always clearing the timer so a
|
|
82882
|
+
* finished drain never leaves a dangling timer holding the process open. */
|
|
82883
|
+
async raceDeadline(work, ms) {
|
|
82884
|
+
const delay = Math.min(Math.max(0, ms), 2147483647);
|
|
82885
|
+
let handle;
|
|
82886
|
+
const timeout = new Promise((resolve) => {
|
|
82887
|
+
handle = this.clock.setTimeout(() => resolve("timeout"), delay);
|
|
82888
|
+
});
|
|
82889
|
+
try {
|
|
82890
|
+
return await Promise.race([work.then(() => "done"), timeout]);
|
|
82891
|
+
} finally {
|
|
82892
|
+
if (handle !== void 0) this.clock.clearTimeout(handle);
|
|
82893
|
+
}
|
|
82894
|
+
}
|
|
82895
|
+
/**
|
|
82896
|
+
* §5.3 drain: gate new turns for the scope, await in-flight turns up to
|
|
82897
|
+
* `deadlineMs` (emitting `drain/progress` as each yields), then cancel any
|
|
82898
|
+
* straggler past the deadline. Returns BOTH the in-scope set (`matched`) and the
|
|
82899
|
+
* subset that actually finished (`drained`). The caller decides which to report
|
|
82900
|
+
* as released: a session is only safe to release once it is genuinely no longer
|
|
82901
|
+
* being served — either it drained, or the caller force-stops its host. Reporting
|
|
82902
|
+
* a still-running straggler would let the CP reassign it and double-serve.
|
|
82903
|
+
*/
|
|
82904
|
+
async drainScope(scope, deadlineMs, onProgress) {
|
|
82905
|
+
const match = (p) => {
|
|
82906
|
+
if (scope.kind === "daemon") return true;
|
|
82907
|
+
if (scope.kind === "agent") return p.agentId === scope.agentId;
|
|
82908
|
+
const k = scope.sessionKey;
|
|
82909
|
+
return p.platform === k.platform && p.channel === k.channel && (k.thread === void 0 || p.statusThread === k.thread);
|
|
82910
|
+
};
|
|
82911
|
+
if (scope.kind === "daemon") this.draining = true;
|
|
82912
|
+
else if (scope.kind === "agent") this.drainingAgents.add(scope.agentId);
|
|
82913
|
+
const keyOf = (sk) => `${sk.platform}:${sk.channel}:${sk.thread ?? "-"}`;
|
|
82914
|
+
const targets = [...this.pending.entries()].filter(([, p]) => match(p));
|
|
82915
|
+
const matched = /* @__PURE__ */ new Map();
|
|
82916
|
+
for (const [, p] of targets) matched.set(keyOf(pendingSessionKey(p)), pendingSessionKey(p));
|
|
82917
|
+
if (targets.length) this.log.info(`drain[${scope.kind}]: awaiting ${targets.length} turn(s) (deadline ${Math.round(deadlineMs / 1e3)}s)`);
|
|
82918
|
+
let settled = false;
|
|
82919
|
+
const drained = /* @__PURE__ */ new Map();
|
|
82920
|
+
let remaining = targets.length;
|
|
82921
|
+
const work = Promise.all(targets.map(([, p]) => p.done.then(() => {
|
|
82922
|
+
const sk = pendingSessionKey(p);
|
|
82923
|
+
drained.set(keyOf(sk), sk);
|
|
82924
|
+
remaining--;
|
|
82925
|
+
if (!settled) onProgress?.({
|
|
82926
|
+
remaining,
|
|
82927
|
+
drained: [...drained.values()]
|
|
82928
|
+
});
|
|
82929
|
+
})));
|
|
82930
|
+
const res = await this.raceDeadline(work, deadlineMs);
|
|
82931
|
+
settled = true;
|
|
82932
|
+
if (res === "timeout") for (const [sid, p] of this.pending) {
|
|
82933
|
+
if (!match(p)) continue;
|
|
82934
|
+
this.log.warn(`drain[${scope.kind}]: cancelling straggler turn (session ${sid})`);
|
|
82935
|
+
await this.hosts.get(p.agentId)?.cancel(sid).catch(() => {});
|
|
82936
|
+
}
|
|
82937
|
+
return {
|
|
82938
|
+
matched: [...matched.values()],
|
|
82939
|
+
drained: [...drained.values()]
|
|
82940
|
+
};
|
|
82941
|
+
}
|
|
82942
|
+
/** Handle a CP `daemon/drain` (§5.3). A bare drain is a rebalance: after
|
|
82943
|
+
* releasing sessions the daemon reclaims hosts and re-opens its gate (a teardown
|
|
82944
|
+
* arrives separately via daemon/restart). */
|
|
82945
|
+
async runDrain(drain, onProgress) {
|
|
82946
|
+
const deadlineMs = Math.max(0, new Date(drain.deadline).getTime() - this.clock.now());
|
|
82947
|
+
const { matched, drained } = await this.drainScope(drain.scope, deadlineMs, onProgress);
|
|
82948
|
+
let released;
|
|
82949
|
+
if (drain.scope.kind === "daemon") {
|
|
82950
|
+
for (const id of [...this.hosts.keys()]) await this.stopHost(id);
|
|
82951
|
+
this.draining = false;
|
|
82952
|
+
released = matched;
|
|
82953
|
+
} else if (drain.scope.kind === "agent") {
|
|
82954
|
+
await this.stopHost(drain.scope.agentId);
|
|
82955
|
+
this.drainingAgents.delete(drain.scope.agentId);
|
|
82956
|
+
released = matched;
|
|
82957
|
+
} else released = drained;
|
|
82958
|
+
this.log.info(`drain[${drain.scope.kind}]: done — released ${released.length} session(s)`);
|
|
82959
|
+
return { released };
|
|
82960
|
+
}
|
|
82961
|
+
/** `agent/stop` (§8.2): drain the agent's in-flight turns, stop its host, and
|
|
82962
|
+
* leave it gated so it stays down until a matching `agent/launch` revives it. */
|
|
82963
|
+
async stopAgent(agentId) {
|
|
82964
|
+
await this.drainScope({
|
|
82965
|
+
kind: "agent",
|
|
82966
|
+
agentId
|
|
82967
|
+
}, this.cfg.limits.shutdownDrainMs);
|
|
82968
|
+
await this.stopHost(agentId);
|
|
82969
|
+
}
|
|
82970
|
+
/** §2.5 SIGTERM / daemon shutdown: gate new turns, then await in-flight turns up
|
|
82971
|
+
* to `shutdownDrainMs`, cancelling stragglers. Safe to call repeatedly. */
|
|
82972
|
+
async drainForShutdown() {
|
|
82973
|
+
this.draining = true;
|
|
82974
|
+
const live = [...this.pending.values()];
|
|
82975
|
+
if (live.length === 0) return;
|
|
82976
|
+
const deadlineMs = this.cfg.limits.shutdownDrainMs;
|
|
82977
|
+
this.log.info(`shutdown: draining ${live.length} in-flight turn(s) (deadline ${Math.round(deadlineMs / 1e3)}s)`);
|
|
82978
|
+
if (await this.raceDeadline(Promise.all(live.map((p) => p.done)), deadlineMs) === "timeout") {
|
|
82979
|
+
this.log.warn(`shutdown: deadline hit with ${this.pending.size} turn(s) still in flight — cancelling`);
|
|
82980
|
+
for (const [sid, p] of this.pending) await this.hosts.get(p.agentId)?.cancel(sid).catch(() => {});
|
|
82981
|
+
}
|
|
82982
|
+
}
|
|
82983
|
+
/** daemon/restart + daemon/upgrade (§8.3): ack now, then drain + stop + exit so
|
|
82984
|
+
* the supervisor relaunches (the new binary, for upgrade). */
|
|
82985
|
+
scheduleFleetExit(kind, targetVersion) {
|
|
82986
|
+
const willDrainUntil = new Date(this.clock.now() + this.cfg.limits.shutdownDrainMs).toISOString();
|
|
82987
|
+
this.log.info(`cp: ${kind}${targetVersion ? ` → ${targetVersion}` : ""} requested — draining then exiting`);
|
|
82988
|
+
(async () => {
|
|
82989
|
+
try {
|
|
82990
|
+
await this.stop();
|
|
82991
|
+
} catch (err) {
|
|
82992
|
+
this.log.error(`cp: ${kind} shutdown failed: ${formatErr(err)}`);
|
|
82993
|
+
} finally {
|
|
82994
|
+
this.requestExit(0);
|
|
82995
|
+
}
|
|
82996
|
+
})();
|
|
82997
|
+
return {
|
|
82998
|
+
accepted: true,
|
|
82999
|
+
willDrainUntil
|
|
83000
|
+
};
|
|
83001
|
+
}
|
|
82680
83002
|
cpConfigApply() {
|
|
82681
83003
|
return {
|
|
82682
83004
|
applyConfigPush: (keys) => {
|
|
@@ -82702,7 +83024,26 @@ var Daemon = class {
|
|
|
82702
83024
|
upsertCron: (cron) => this.cpCrons.upsert(cron),
|
|
82703
83025
|
removeCron: (cronId) => this.cpCrons.remove(cronId),
|
|
82704
83026
|
applyRouteAssign: (a) => this.cpRouting?.upsertAssign(a),
|
|
82705
|
-
applyRouteUpdate: (u) => this.cpRouting?.applyUpdate(u)
|
|
83027
|
+
applyRouteUpdate: (u) => this.cpRouting?.applyUpdate(u),
|
|
83028
|
+
applyAgentLaunch: async (launch) => {
|
|
83029
|
+
const agent = this.agents.get(launch.agentId);
|
|
83030
|
+
if (!agent) throw new Error(`agent/launch: unknown agent ${launch.agentId}`);
|
|
83031
|
+
this.drainingAgents.delete(launch.agentId);
|
|
83032
|
+
await this.ensureHostAsync(launch.agentId);
|
|
83033
|
+
return {
|
|
83034
|
+
agentId: launch.agentId,
|
|
83035
|
+
launchId: randomUUID(),
|
|
83036
|
+
startedAt: new Date(this.clock.now()).toISOString(),
|
|
83037
|
+
runtime: agent.runtime
|
|
83038
|
+
};
|
|
83039
|
+
},
|
|
83040
|
+
applyAgentStop: async (stop) => {
|
|
83041
|
+
await this.stopAgent(stop.agentId);
|
|
83042
|
+
return { ok: true };
|
|
83043
|
+
},
|
|
83044
|
+
applyDaemonDrain: (drain, onProgress) => this.runDrain(drain, onProgress),
|
|
83045
|
+
applyDaemonRestart: (_req) => this.scheduleFleetExit("restart"),
|
|
83046
|
+
applyDaemonUpgrade: (req) => this.scheduleFleetExit("upgrade", req.targetVersion)
|
|
82706
83047
|
};
|
|
82707
83048
|
}
|
|
82708
83049
|
/** A CP cron fired: build a synthetic message and run it through the normal routing path. */
|
|
@@ -82775,8 +83116,16 @@ var Daemon = class {
|
|
|
82775
83116
|
this.log.info(`cp: connecting to ${url}…`);
|
|
82776
83117
|
}
|
|
82777
83118
|
async stop() {
|
|
83119
|
+
this.draining = true;
|
|
82778
83120
|
clearTimeout(this.debounceTimer);
|
|
83121
|
+
if (this.idleSweepTimer !== void 0) {
|
|
83122
|
+
this.clock.clearTimeout(this.idleSweepTimer);
|
|
83123
|
+
this.idleSweepTimer = void 0;
|
|
83124
|
+
}
|
|
83125
|
+
for (const t of this.cancelTimers.values()) this.clock.clearTimeout(t);
|
|
83126
|
+
this.cancelTimers.clear();
|
|
82779
83127
|
await this.watcher?.close();
|
|
83128
|
+
await this.drainForShutdown();
|
|
82780
83129
|
const errors = [];
|
|
82781
83130
|
this.scheduler?.stop();
|
|
82782
83131
|
await Promise.resolve(this.cpClient?.stop()).catch((e) => errors.push(e));
|