@agentconnect.md/daemon 1.0.0-rc.36 → 1.0.0-rc.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +464 -46
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -7376,11 +7376,17 @@ const ConfigSchema = object({
|
|
|
7376
7376
|
maxAgents: number().int().default(8),
|
|
7377
7377
|
maxConcurrentSessions: number().int().default(32),
|
|
7378
7378
|
agentIdleTimeoutMs: number().int().default(9e5),
|
|
7379
|
+
idleSweepMs: number().int().default(6e4),
|
|
7380
|
+
shutdownDrainMs: number().int().default(25e3),
|
|
7381
|
+
cancelBackstopMs: number().int().default(3e4),
|
|
7379
7382
|
maxAttachmentBytes: number().int().default(8 * 1024 * 1024)
|
|
7380
7383
|
}).default({
|
|
7381
7384
|
maxAgents: 8,
|
|
7382
7385
|
maxConcurrentSessions: 32,
|
|
7383
7386
|
agentIdleTimeoutMs: 9e5,
|
|
7387
|
+
idleSweepMs: 6e4,
|
|
7388
|
+
shutdownDrainMs: 25e3,
|
|
7389
|
+
cancelBackstopMs: 3e4,
|
|
7384
7390
|
maxAttachmentBytes: 8 * 1024 * 1024
|
|
7385
7391
|
})
|
|
7386
7392
|
});
|
|
@@ -16975,10 +16981,29 @@ var AcpHost = class {
|
|
|
16975
16981
|
async cancel(sessionId) {
|
|
16976
16982
|
await this.conn.agent.notify(methods.agent.session.cancel, { sessionId });
|
|
16977
16983
|
}
|
|
16978
|
-
|
|
16979
|
-
|
|
16980
|
-
|
|
16981
|
-
|
|
16984
|
+
/** Stop the adapter child: SIGTERM, then escalate to SIGKILL if it hasn't exited
|
|
16985
|
+
* within `deadlineMs` (a buggy/hung agent must never block daemon shutdown or an
|
|
16986
|
+
* idle reap). Idempotent — the child handle is cleared up front so a concurrent
|
|
16987
|
+
* stop (drain + reconcile racing) is a no-op rather than a double-kill. */
|
|
16988
|
+
async stop(deadlineMs = 5e3) {
|
|
16989
|
+
const child = this.child;
|
|
16990
|
+
if (!child) return;
|
|
16991
|
+
this.child = void 0;
|
|
16992
|
+
child.kill("SIGTERM");
|
|
16993
|
+
await new Promise((resolve) => {
|
|
16994
|
+
let settled = false;
|
|
16995
|
+
const done = () => {
|
|
16996
|
+
if (settled) return;
|
|
16997
|
+
settled = true;
|
|
16998
|
+
clearTimeout(timer);
|
|
16999
|
+
resolve();
|
|
17000
|
+
};
|
|
17001
|
+
const timer = setTimeout(() => {
|
|
17002
|
+
this.opts.log?.warn(`acp: child ignored SIGTERM after ${deadlineMs}ms — sending SIGKILL`);
|
|
17003
|
+
child.kill("SIGKILL");
|
|
17004
|
+
}, deadlineMs);
|
|
17005
|
+
child.once("exit", done);
|
|
17006
|
+
});
|
|
16982
17007
|
}
|
|
16983
17008
|
};
|
|
16984
17009
|
//#endregion
|
|
@@ -17338,6 +17363,91 @@ const AgentScopeDenied = object({
|
|
|
17338
17363
|
capability: string()
|
|
17339
17364
|
});
|
|
17340
17365
|
//#endregion
|
|
17366
|
+
//#region ../protocol/dist/frames/register.js
|
|
17367
|
+
/**
|
|
17368
|
+
* Capability upload + the reconcile snapshot — protocol §3.3.
|
|
17369
|
+
*
|
|
17370
|
+
* `register/ok` is the authoritative source of truth: the daemon converges its
|
|
17371
|
+
* local cache to it. CP wins all conflicts, so re-issuing the same snapshot is
|
|
17372
|
+
* idempotent.
|
|
17373
|
+
*/
|
|
17374
|
+
const RegisterReq = object({
|
|
17375
|
+
host: string(),
|
|
17376
|
+
capabilities: object({
|
|
17377
|
+
platforms: array(Platform),
|
|
17378
|
+
runtimes: array(string()),
|
|
17379
|
+
acp: boolean(),
|
|
17380
|
+
features: array(string()).default([])
|
|
17381
|
+
}),
|
|
17382
|
+
maxAgents: number().int(),
|
|
17383
|
+
localState: object({
|
|
17384
|
+
assignments: array(string()),
|
|
17385
|
+
crons: array(string()),
|
|
17386
|
+
leases: array(string())
|
|
17387
|
+
})
|
|
17388
|
+
});
|
|
17389
|
+
const RegisterOk = object({
|
|
17390
|
+
routingEpoch: number().int(),
|
|
17391
|
+
assignments: array(RouteAssign),
|
|
17392
|
+
agents: array(AgentSpec.extend({ agentId: string().uuid() })).default([]),
|
|
17393
|
+
crons: array(CronUpsert),
|
|
17394
|
+
leases: array(SecretsGrant),
|
|
17395
|
+
drop: object({
|
|
17396
|
+
assignments: array(string()),
|
|
17397
|
+
crons: array(string())
|
|
17398
|
+
})
|
|
17399
|
+
});
|
|
17400
|
+
//#endregion
|
|
17401
|
+
//#region ../protocol/dist/frames/session.js
|
|
17402
|
+
/**
|
|
17403
|
+
* Session read-back (C→D REQ → REP) — the console's on-demand pulls.
|
|
17404
|
+
*
|
|
17405
|
+
* The CP stores NO session data — neither the list nor the bodies. Sessions are
|
|
17406
|
+
* created on the Slack→daemon path and live solely in the daemon's local store
|
|
17407
|
+
* (body-locality, §1/§12). So both the session **list** and a session's chat
|
|
17408
|
+
* **history** are pulled live from the owning daemon(s) and proxied to the
|
|
17409
|
+
* console — never persisted on the CP, never on the orchestration hot path.
|
|
17410
|
+
*
|
|
17411
|
+
* - `session/list`: the daemon's live sessions (CP fans this out to all the org's
|
|
17412
|
+
* online daemons and merges, for "all sessions across the workspace").
|
|
17413
|
+
* - `session/history`: one cursor-paginated page of a session's transcript.
|
|
17414
|
+
*/
|
|
17415
|
+
/** One row in the session list (metadata + console metrics; NOT the transcript). */
|
|
17416
|
+
const SessionListItem = object({
|
|
17417
|
+
sessionId: string().uuid(),
|
|
17418
|
+
sessionKey: SessionKey,
|
|
17419
|
+
agentId: string().uuid(),
|
|
17420
|
+
title: string().optional(),
|
|
17421
|
+
status: string().optional(),
|
|
17422
|
+
lastActivityAt: string().optional(),
|
|
17423
|
+
tokenUsage: number().int().optional(),
|
|
17424
|
+
triggeredBy: string().optional()
|
|
17425
|
+
});
|
|
17426
|
+
/** C→D REQ: list the daemon's sessions (optionally just one agent's). */
|
|
17427
|
+
const SessionListReq = object({ agentId: string().uuid().optional() });
|
|
17428
|
+
/** D→C REP (corr = req id): the daemon's current sessions. */
|
|
17429
|
+
const SessionListPage = object({ sessions: array(SessionListItem) });
|
|
17430
|
+
/** One message in a session transcript page (a body — returned only for display). */
|
|
17431
|
+
const SessionMessage = object({
|
|
17432
|
+
seq: number().int(),
|
|
17433
|
+
sender: string(),
|
|
17434
|
+
ts: string(),
|
|
17435
|
+
kind: string(),
|
|
17436
|
+
text: string()
|
|
17437
|
+
});
|
|
17438
|
+
/** C→D REQ: fetch one page of a session's history from the owning daemon. */
|
|
17439
|
+
const SessionHistoryReq = object({
|
|
17440
|
+
sessionId: string().uuid(),
|
|
17441
|
+
cursor: string().optional(),
|
|
17442
|
+
limit: number().int().positive().max(200).default(50)
|
|
17443
|
+
});
|
|
17444
|
+
/** D→C REP (corr = the req id): a page of messages + the cursor for the next page. */
|
|
17445
|
+
const SessionHistoryPage = object({
|
|
17446
|
+
sessionId: string().uuid(),
|
|
17447
|
+
messages: array(SessionMessage),
|
|
17448
|
+
nextCursor: string().optional()
|
|
17449
|
+
});
|
|
17450
|
+
//#endregion
|
|
17341
17451
|
//#region ../protocol/dist/frame.js
|
|
17342
17452
|
/**
|
|
17343
17453
|
* The single source of truth for the wire: `type` string → payload zod schema.
|
|
@@ -17353,32 +17463,8 @@ const AgentScopeDenied = object({
|
|
|
17353
17463
|
const FRAME_SCHEMAS = {
|
|
17354
17464
|
auth: AuthReq,
|
|
17355
17465
|
"auth/ok": AuthOk,
|
|
17356
|
-
register:
|
|
17357
|
-
|
|
17358
|
-
capabilities: object({
|
|
17359
|
-
platforms: array(Platform),
|
|
17360
|
-
runtimes: array(string()),
|
|
17361
|
-
acp: boolean(),
|
|
17362
|
-
features: array(string()).default([])
|
|
17363
|
-
}),
|
|
17364
|
-
maxAgents: number().int(),
|
|
17365
|
-
localState: object({
|
|
17366
|
-
assignments: array(string()),
|
|
17367
|
-
crons: array(string()),
|
|
17368
|
-
leases: array(string())
|
|
17369
|
-
})
|
|
17370
|
-
}),
|
|
17371
|
-
"register/ok": object({
|
|
17372
|
-
routingEpoch: number().int(),
|
|
17373
|
-
assignments: array(RouteAssign),
|
|
17374
|
-
agents: array(AgentSpec.extend({ agentId: string().uuid() })).default([]),
|
|
17375
|
-
crons: array(CronUpsert),
|
|
17376
|
-
leases: array(SecretsGrant),
|
|
17377
|
-
drop: object({
|
|
17378
|
-
assignments: array(string()),
|
|
17379
|
-
crons: array(string())
|
|
17380
|
-
})
|
|
17381
|
-
}),
|
|
17466
|
+
register: RegisterReq,
|
|
17467
|
+
"register/ok": RegisterOk,
|
|
17382
17468
|
heartbeat: object({
|
|
17383
17469
|
load: object({
|
|
17384
17470
|
cpu: number(),
|
|
@@ -17437,6 +17523,10 @@ const FRAME_SCHEMAS = {
|
|
|
17437
17523
|
]),
|
|
17438
17524
|
toolCalling: boolean()
|
|
17439
17525
|
}),
|
|
17526
|
+
"session/list": SessionListReq,
|
|
17527
|
+
"session/list/page": SessionListPage,
|
|
17528
|
+
"session/history": SessionHistoryReq,
|
|
17529
|
+
"session/history/page": SessionHistoryPage,
|
|
17440
17530
|
"config/push": object({ keys: record(string(), unknown()) }),
|
|
17441
17531
|
"daemon/restart": object({
|
|
17442
17532
|
reason: string(),
|
|
@@ -17523,6 +17613,10 @@ discriminatedUnion("type", [
|
|
|
17523
17613
|
frame("scope-attestation", FRAME_SCHEMAS["scope-attestation"]),
|
|
17524
17614
|
frame("event/session", FRAME_SCHEMAS["event/session"]),
|
|
17525
17615
|
frame("facts/runtime-profile", FRAME_SCHEMAS["facts/runtime-profile"]),
|
|
17616
|
+
frame("session/list", FRAME_SCHEMAS["session/list"]),
|
|
17617
|
+
frame("session/list/page", FRAME_SCHEMAS["session/list/page"]),
|
|
17618
|
+
frame("session/history", FRAME_SCHEMAS["session/history"]),
|
|
17619
|
+
frame("session/history/page", FRAME_SCHEMAS["session/history/page"]),
|
|
17526
17620
|
frame("config/push", FRAME_SCHEMAS["config/push"]),
|
|
17527
17621
|
frame("daemon/restart", FRAME_SCHEMAS["daemon/restart"]),
|
|
17528
17622
|
frame("daemon/upgrade", FRAME_SCHEMAS["daemon/upgrade"]),
|
|
@@ -23292,6 +23386,27 @@ var LocalStore = class {
|
|
|
23292
23386
|
acpSessionId=excluded.acpSessionId, state=excluded.state,
|
|
23293
23387
|
lastDeliveredTs=excluded.lastDeliveredTs, updatedAt=excluded.updatedAt`).run(rec);
|
|
23294
23388
|
}
|
|
23389
|
+
/** Targeted state transition for an existing session (§7.3), stamping `updatedAt`
|
|
23390
|
+
* so the change counts as activity for the TTL/idle clocks. No-op if the key is
|
|
23391
|
+
* unknown (the row is created by the SessionManager on first turn). */
|
|
23392
|
+
setSessionState(key, state, updatedAt) {
|
|
23393
|
+
this.db.prepare("UPDATE sessions SET state = ?, updatedAt = ? WHERE key = ?").run(state, updatedAt, key);
|
|
23394
|
+
}
|
|
23395
|
+
/** Most-recent activity across an agent's non-closed sessions (epoch ms), or null
|
|
23396
|
+
* if it has none. Drives idle-host reaping (#111): a host with no recent session
|
|
23397
|
+
* activity AND no in-flight turn is past its idle window. */
|
|
23398
|
+
agentLastActivityTs(agentId) {
|
|
23399
|
+
return this.db.prepare("SELECT MAX(updatedAt) AS ts FROM sessions WHERE agentId = ? AND state != 'closed'").get(agentId)?.ts ?? null;
|
|
23400
|
+
}
|
|
23401
|
+
/** §7.3 TTL close: move every `idle` session untouched since `now - ttlMs` to
|
|
23402
|
+
* `closed`, returning the rows closed (for logging). `prompting`/`cancelling`
|
|
23403
|
+
* sessions are never closed — a live turn keeps the thread open. */
|
|
23404
|
+
closeIdleSessions(now, ttlMs) {
|
|
23405
|
+
const cutoff = now - ttlMs;
|
|
23406
|
+
const rows = this.db.prepare("SELECT key, channel, thread, agentId FROM sessions WHERE state = 'idle' AND updatedAt < ?").all(cutoff);
|
|
23407
|
+
if (rows.length) this.db.prepare("UPDATE sessions SET state = 'closed' WHERE state = 'idle' AND updatedAt < ?").run(cutoff);
|
|
23408
|
+
return rows;
|
|
23409
|
+
}
|
|
23295
23410
|
appendTranscript(e) {
|
|
23296
23411
|
this.db.prepare("INSERT OR IGNORE INTO transcript (channel, thread, ts, sender, kind, text) VALUES (@channel, @thread, @ts, @sender, @kind, @text)").run(e);
|
|
23297
23412
|
}
|
|
@@ -23443,10 +23558,13 @@ var SessionManager = class {
|
|
|
23443
23558
|
} else if (host.hasSession?.(rec.acpSessionId) === false) {
|
|
23444
23559
|
const cwd = await prepareWorkspace(agent);
|
|
23445
23560
|
let resumed = false;
|
|
23446
|
-
if (host.loadSupported?.())
|
|
23447
|
-
|
|
23448
|
-
|
|
23449
|
-
|
|
23561
|
+
if (host.loadSupported?.()) {
|
|
23562
|
+
this.deps.store.setSessionState(key, "resuming", Date.now());
|
|
23563
|
+
try {
|
|
23564
|
+
await host.loadSession(rec.acpSessionId, cwd);
|
|
23565
|
+
resumed = true;
|
|
23566
|
+
} catch {}
|
|
23567
|
+
}
|
|
23450
23568
|
if (!resumed) {
|
|
23451
23569
|
const mcpServers = this.deps.mcpServersFor?.({
|
|
23452
23570
|
agent,
|
|
@@ -81628,14 +81746,13 @@ var CpClient = class {
|
|
|
81628
81746
|
}
|
|
81629
81747
|
armHeartbeat() {
|
|
81630
81748
|
this.heartbeatTimer = this.deps.clock.setTimeout(() => {
|
|
81631
|
-
if (this.state
|
|
81632
|
-
this.transport?.send(encode(buildEnvelope("heartbeat", {
|
|
81749
|
+
if (this.state === "READY") this.transport?.send(encode(buildEnvelope("heartbeat", {
|
|
81633
81750
|
load: this.deps.loadSnapshot(),
|
|
81634
81751
|
health: "ok",
|
|
81635
81752
|
activeSessions: this.deps.activeSessions(),
|
|
81636
81753
|
degradedScopes: this.deps.degradedScopes?.() ?? []
|
|
81637
81754
|
})));
|
|
81638
|
-
this.armHeartbeat();
|
|
81755
|
+
if (this.state === "READY" || this.state === "DRAINING") this.armHeartbeat();
|
|
81639
81756
|
}, this.heartbeatMs);
|
|
81640
81757
|
}
|
|
81641
81758
|
stopHeartbeat() {
|
|
@@ -81682,13 +81799,36 @@ var CpClient = class {
|
|
|
81682
81799
|
case "agent/remove":
|
|
81683
81800
|
this.deps.configApply.applyAgentRemove(frame.payload.agentId);
|
|
81684
81801
|
return;
|
|
81685
|
-
case "agent/launch":
|
|
81686
|
-
|
|
81687
|
-
|
|
81688
|
-
|
|
81802
|
+
case "agent/launch": {
|
|
81803
|
+
const launch = frame.payload;
|
|
81804
|
+
this.deps.configApply.applyAgentLaunch(launch).then((launched) => this.reply(frame, "agent/launched", launched)).catch((err) => this.sendError(frame.id, "INTERNAL", `agent/launch failed: ${err.message}`, false));
|
|
81805
|
+
return;
|
|
81806
|
+
}
|
|
81807
|
+
case "agent/stop": {
|
|
81808
|
+
const stop = frame.payload;
|
|
81809
|
+
this.deps.configApply.applyAgentStop(stop).then((ack) => this.reply(frame, "ack", ack)).catch((err) => this.sendError(frame.id, "INTERNAL", `agent/stop failed: ${err.message}`, false));
|
|
81810
|
+
return;
|
|
81811
|
+
}
|
|
81812
|
+
case "daemon/drain": {
|
|
81813
|
+
const drain = frame.payload;
|
|
81814
|
+
this.state = "DRAINING";
|
|
81815
|
+
this.deps.configApply.applyDaemonDrain(drain, (p) => this.emit("drain/progress", p)).then((done) => {
|
|
81816
|
+
this.reply(frame, "drain/done", done);
|
|
81817
|
+
if (this.state === "DRAINING") this.state = "READY";
|
|
81818
|
+
}).catch((err) => {
|
|
81819
|
+
this.sendError(frame.id, "INTERNAL", `drain failed: ${err.message}`, false);
|
|
81820
|
+
if (this.state === "DRAINING") this.state = "READY";
|
|
81821
|
+
});
|
|
81822
|
+
return;
|
|
81823
|
+
}
|
|
81689
81824
|
case "daemon/restart":
|
|
81825
|
+
this.reply(frame, "daemon/control/ack", this.deps.configApply.applyDaemonRestart(frame.payload));
|
|
81826
|
+
return;
|
|
81690
81827
|
case "daemon/upgrade":
|
|
81691
|
-
this.
|
|
81828
|
+
this.reply(frame, "daemon/control/ack", this.deps.configApply.applyDaemonUpgrade(frame.payload));
|
|
81829
|
+
return;
|
|
81830
|
+
case "agent/prompt":
|
|
81831
|
+
this.sendError(frame.id, "INTERNAL", "agent/prompt not implemented", false);
|
|
81692
81832
|
return;
|
|
81693
81833
|
default:
|
|
81694
81834
|
this.deps.log.debug(`cp: ignoring ${frame.type}`);
|
|
@@ -81698,6 +81838,10 @@ var CpClient = class {
|
|
|
81698
81838
|
reply(req, type, payload) {
|
|
81699
81839
|
this.transport?.send(encode(buildEnvelope(type, payload, { corr: req.id })));
|
|
81700
81840
|
}
|
|
81841
|
+
/** Emit an uncorrelated EVT (e.g. `drain/progress`). */
|
|
81842
|
+
emit(type, payload) {
|
|
81843
|
+
this.transport?.send(encode(buildEnvelope(type, payload)));
|
|
81844
|
+
}
|
|
81701
81845
|
};
|
|
81702
81846
|
//#endregion
|
|
81703
81847
|
//#region src/cp/cp-cron.ts
|
|
@@ -81978,6 +82122,21 @@ function formatErr(err) {
|
|
|
81978
82122
|
}
|
|
81979
82123
|
const MAX_QUEUED_PER_SESSION = 10;
|
|
81980
82124
|
const IDLE_FLUSH_MS = 2e3;
|
|
82125
|
+
/** Build the wire SessionKey (protocol §5) for a pending turn — what `drain/done`
|
|
82126
|
+
* reports as released so the CP may reassign it. Uses the real `thread` (absent for
|
|
82127
|
+
* a channel-root message), NOT `statusThread` (which falls back to msgId): the CP
|
|
82128
|
+
* keys assignments by `thread ?? "-"`, so reporting the msgId would miss the match. */
|
|
82129
|
+
function pendingSessionKey(p) {
|
|
82130
|
+
const platform = p.platform;
|
|
82131
|
+
return p.thread !== void 0 ? {
|
|
82132
|
+
platform,
|
|
82133
|
+
channel: p.channel,
|
|
82134
|
+
thread: p.thread
|
|
82135
|
+
} : {
|
|
82136
|
+
platform,
|
|
82137
|
+
channel: p.channel
|
|
82138
|
+
};
|
|
82139
|
+
}
|
|
81981
82140
|
var Daemon = class {
|
|
81982
82141
|
opts;
|
|
81983
82142
|
store;
|
|
@@ -82003,8 +82162,17 @@ var Daemon = class {
|
|
|
82003
82162
|
cpAgents;
|
|
82004
82163
|
botUserIds = {};
|
|
82005
82164
|
cpRouting;
|
|
82165
|
+
clock;
|
|
82166
|
+
requestExit;
|
|
82167
|
+
draining = false;
|
|
82168
|
+
drainingAgents = /* @__PURE__ */ new Set();
|
|
82169
|
+
hostStopping = /* @__PURE__ */ new Map();
|
|
82170
|
+
idleSweepTimer;
|
|
82171
|
+
cancelTimers = /* @__PURE__ */ new Map();
|
|
82006
82172
|
constructor(opts = {}) {
|
|
82007
82173
|
this.opts = opts;
|
|
82174
|
+
this.clock = opts.clock ?? systemClock;
|
|
82175
|
+
this.requestExit = opts.requestExit ?? ((code) => process.exit(code));
|
|
82008
82176
|
}
|
|
82009
82177
|
async start() {
|
|
82010
82178
|
const root = resolveRoot(this.opts.root);
|
|
@@ -82144,6 +82312,7 @@ var Daemon = class {
|
|
|
82144
82312
|
this.watcher.on("add", debounced).on("change", debounced).on("unlink", debounced);
|
|
82145
82313
|
this.log.info(`watching ${this.agentsDir} for agent changes`);
|
|
82146
82314
|
this.startCpClient(root);
|
|
82315
|
+
this.armIdleSweep();
|
|
82147
82316
|
this.log.info("daemon ready");
|
|
82148
82317
|
}
|
|
82149
82318
|
loadAgentList() {
|
|
@@ -82316,6 +82485,10 @@ var Daemon = class {
|
|
|
82316
82485
|
}
|
|
82317
82486
|
seenMsgIds = /* @__PURE__ */ new Set();
|
|
82318
82487
|
onInbound(msg) {
|
|
82488
|
+
if (this.draining) {
|
|
82489
|
+
this.log.debug(`routing: dropping inbound ${msg.msgId} (daemon draining)`);
|
|
82490
|
+
return;
|
|
82491
|
+
}
|
|
82319
82492
|
if (this.seenMsgIds.has(msg.msgId)) {
|
|
82320
82493
|
this.log.debug(`routing: duplicate ${msg.msgId} ignored`);
|
|
82321
82494
|
return;
|
|
@@ -82333,6 +82506,10 @@ var Daemon = class {
|
|
|
82333
82506
|
this.log.debug(`routing: dropped message in ch=${msg.channel} (no agent matched — not a mention of a known bot, not a subscribed 'all' channel, not a thread/DM hit)`);
|
|
82334
82507
|
return;
|
|
82335
82508
|
}
|
|
82509
|
+
if (this.drainingAgents.has(result.agentId)) {
|
|
82510
|
+
this.log.debug(`routing: dropping ${msg.msgId} for agent "${result.agentId}" (draining)`);
|
|
82511
|
+
return;
|
|
82512
|
+
}
|
|
82336
82513
|
this.log.info(`routing: ch=${msg.channel} → agent "${result.agentId}" (integration ${result.integrationId})`);
|
|
82337
82514
|
this.dispatch(result.agentId, msg, result.integrationId).catch((err) => this.log.error(`dispatch failed for agent "${result.agentId}": ${formatErr(err)}`));
|
|
82338
82515
|
}
|
|
@@ -82402,7 +82579,8 @@ var Daemon = class {
|
|
|
82402
82579
|
}
|
|
82403
82580
|
const conn = this.replyConnFor(target.agentId, target.integrationId);
|
|
82404
82581
|
const thread = msg.thread ?? msg.msgId;
|
|
82405
|
-
const
|
|
82582
|
+
const key = sessionKey(msg.platform, msg.channel, thread, target.agentId);
|
|
82583
|
+
const acpSessionId = this.store.getSession(key)?.acpSessionId;
|
|
82406
82584
|
const inflight = !!(acpSessionId && this.pending.has(acpSessionId));
|
|
82407
82585
|
if (command.kind === "stop") {
|
|
82408
82586
|
if (!inflight) {
|
|
@@ -82411,7 +82589,9 @@ var Daemon = class {
|
|
|
82411
82589
|
}
|
|
82412
82590
|
this.queued.delete(acpSessionId);
|
|
82413
82591
|
this.log.info(`command: stop → agent "${target.agentId}" session ${acpSessionId}`);
|
|
82592
|
+
this.store.setSessionState(key, "cancelling", this.clock.now());
|
|
82414
82593
|
this.hosts.get(target.agentId)?.cancel(acpSessionId).catch((err) => this.log.error(`command stop: cancel failed: ${err.message}`));
|
|
82594
|
+
this.armCancelBackstop(target.agentId, acpSessionId, key);
|
|
82415
82595
|
conn?.postMessage(msg.channel, "🛑 Stopped.", thread);
|
|
82416
82596
|
return;
|
|
82417
82597
|
}
|
|
@@ -82474,22 +82654,40 @@ var Daemon = class {
|
|
|
82474
82654
|
return [...out];
|
|
82475
82655
|
}
|
|
82476
82656
|
async dispatch(agentId, msg, integrationId) {
|
|
82657
|
+
if (this.draining || this.drainingAgents.has(agentId)) {
|
|
82658
|
+
this.log.debug(`dispatch: skipped for agent "${agentId}" (draining)`);
|
|
82659
|
+
return;
|
|
82660
|
+
}
|
|
82477
82661
|
const conv = new OutputConverger(this.agents.get(agentId).output.mode);
|
|
82478
82662
|
const rec = new TranscriptRecorder();
|
|
82479
82663
|
const replyConn = this.replyConnFor(agentId, integrationId);
|
|
82480
82664
|
const wasRunning = this.hostStarts.has(agentId);
|
|
82481
82665
|
const statusThread = msg.thread ?? msg.msgId;
|
|
82666
|
+
const key = sessionKey(msg.platform, msg.channel, statusThread, agentId);
|
|
82482
82667
|
replyConn?.setStatus(msg.channel, statusThread, wasRunning ? "is thinking…" : "is starting up…");
|
|
82483
|
-
|
|
82668
|
+
let handled;
|
|
82669
|
+
try {
|
|
82670
|
+
handled = await this.sessions.handle(agentId, msg);
|
|
82671
|
+
} catch (err) {
|
|
82672
|
+
this.store.setSessionState(key, "idle", this.clock.now());
|
|
82673
|
+
throw err;
|
|
82674
|
+
}
|
|
82675
|
+
const { sessionId, blocks } = handled;
|
|
82676
|
+
let resolveDone;
|
|
82677
|
+
const done = new Promise((r) => resolveDone = r);
|
|
82484
82678
|
const p = {
|
|
82485
82679
|
conv,
|
|
82486
82680
|
rec,
|
|
82487
82681
|
agentId,
|
|
82682
|
+
platform: msg.platform,
|
|
82683
|
+
sessionKey: key,
|
|
82488
82684
|
channel: msg.channel,
|
|
82489
82685
|
thread: msg.thread,
|
|
82490
82686
|
statusThread,
|
|
82491
82687
|
conn: replyConn,
|
|
82492
|
-
applyChain: Promise.resolve()
|
|
82688
|
+
applyChain: Promise.resolve(),
|
|
82689
|
+
done,
|
|
82690
|
+
resolveDone
|
|
82493
82691
|
};
|
|
82494
82692
|
this.pending.set(sessionId, p);
|
|
82495
82693
|
try {
|
|
@@ -82504,9 +82702,35 @@ var Daemon = class {
|
|
|
82504
82702
|
} finally {
|
|
82505
82703
|
this.clearIdle(p);
|
|
82506
82704
|
this.pending.delete(sessionId);
|
|
82705
|
+
this.store.setSessionState(key, "idle", this.clock.now());
|
|
82706
|
+
this.clearCancelBackstop(sessionId);
|
|
82707
|
+
p.resolveDone();
|
|
82507
82708
|
}
|
|
82508
82709
|
this.flushQueued(agentId, sessionId, integrationId);
|
|
82509
82710
|
}
|
|
82711
|
+
/** After `!stop` sends session/cancel, give the agent `cancelBackstopMs` to yield.
|
|
82712
|
+
* If the turn is still in flight when the timer fires, the agent ignored the
|
|
82713
|
+
* cancel — force-stop its host (the only hard kill available) so the session
|
|
82714
|
+
* can't be stuck in `cancelling` forever. dispatch's finally clears this timer
|
|
82715
|
+
* the moment the turn yields on its own. */
|
|
82716
|
+
armCancelBackstop(agentId, acpSessionId, key) {
|
|
82717
|
+
this.clearCancelBackstop(acpSessionId);
|
|
82718
|
+
const ms = this.cfg.limits.cancelBackstopMs;
|
|
82719
|
+
this.cancelTimers.set(acpSessionId, this.clock.setTimeout(() => {
|
|
82720
|
+
this.cancelTimers.delete(acpSessionId);
|
|
82721
|
+
if (!this.pending.has(acpSessionId)) return;
|
|
82722
|
+
this.log.warn(`command stop: agent "${agentId}" ignored session/cancel for ${ms}ms — force-stopping host (session ${acpSessionId})`);
|
|
82723
|
+
this.stopHost(agentId, 0);
|
|
82724
|
+
this.store.setSessionState(key, "idle", this.clock.now());
|
|
82725
|
+
}, ms));
|
|
82726
|
+
}
|
|
82727
|
+
clearCancelBackstop(acpSessionId) {
|
|
82728
|
+
const t = this.cancelTimers.get(acpSessionId);
|
|
82729
|
+
if (t !== void 0) {
|
|
82730
|
+
this.clock.clearTimeout(t);
|
|
82731
|
+
this.cancelTimers.delete(acpSessionId);
|
|
82732
|
+
}
|
|
82733
|
+
}
|
|
82510
82734
|
/**
|
|
82511
82735
|
* Apply one converger action against the session's Slack connection:
|
|
82512
82736
|
* - set-status → assistant.threads.setStatus (best-effort; '' clears)
|
|
@@ -82595,6 +82819,8 @@ var Daemon = class {
|
|
|
82595
82819
|
});
|
|
82596
82820
|
}
|
|
82597
82821
|
async ensureHostAsync(agentId) {
|
|
82822
|
+
const stopping = this.hostStopping.get(agentId);
|
|
82823
|
+
if (stopping) await stopping;
|
|
82598
82824
|
const host = this.ensureHost(agentId, this.cfg);
|
|
82599
82825
|
let p = this.hostStarts.get(agentId);
|
|
82600
82826
|
if (!p) {
|
|
@@ -82608,6 +82834,171 @@ var Daemon = class {
|
|
|
82608
82834
|
const intId = integrationId ?? this.agents.get(agentId)?.integrations[0]?.id;
|
|
82609
82835
|
return intId ? this.connByIntegration.get(intId) : void 0;
|
|
82610
82836
|
}
|
|
82837
|
+
/** Stop and evict an ACP adapter child, returning the agent to `provisioned`
|
|
82838
|
+
* (config kept; the next message lazily re-spawns it). Idempotent. The teardown
|
|
82839
|
+
* is registered in `hostStopping` so a concurrent ensureHostAsync waits for it
|
|
82840
|
+
* instead of spawning a second live child. */
|
|
82841
|
+
async stopHost(agentId, deadlineMs) {
|
|
82842
|
+
const host = this.hosts.get(agentId);
|
|
82843
|
+
this.hosts.delete(agentId);
|
|
82844
|
+
this.hostStarts.delete(agentId);
|
|
82845
|
+
if (!host) return;
|
|
82846
|
+
const stop = host.stop(deadlineMs).finally(() => {
|
|
82847
|
+
if (this.hostStopping.get(agentId) === stop) this.hostStopping.delete(agentId);
|
|
82848
|
+
});
|
|
82849
|
+
this.hostStopping.set(agentId, stop);
|
|
82850
|
+
await stop;
|
|
82851
|
+
}
|
|
82852
|
+
/** Recurring idle sweep (§7.2/§7.3): reap idle adapter children and TTL-close
|
|
82853
|
+
* idle sessions. Driven by the injected Clock so a FakeClock advances it in tests. */
|
|
82854
|
+
armIdleSweep() {
|
|
82855
|
+
const interval = this.cfg.limits.idleSweepMs;
|
|
82856
|
+
if (interval <= 0) return;
|
|
82857
|
+
this.idleSweepTimer = this.clock.setTimeout(() => {
|
|
82858
|
+
this.idleSweepTimer = void 0;
|
|
82859
|
+
try {
|
|
82860
|
+
this.sweepIdle();
|
|
82861
|
+
} catch (err) {
|
|
82862
|
+
this.log.error(`idle sweep failed: ${formatErr(err)}`);
|
|
82863
|
+
}
|
|
82864
|
+
if (!this.draining) this.armIdleSweep();
|
|
82865
|
+
}, interval);
|
|
82866
|
+
}
|
|
82867
|
+
sweepIdle() {
|
|
82868
|
+
const now = this.clock.now();
|
|
82869
|
+
const ttl = this.cfg.limits.agentIdleTimeoutMs;
|
|
82870
|
+
const closed = this.store.closeIdleSessions(now, ttl);
|
|
82871
|
+
if (closed.length) this.log.info(`idle: TTL-closed ${closed.length} session(s) (>${Math.round(ttl / 1e3)}s)`);
|
|
82872
|
+
for (const [agentId] of [...this.hosts]) {
|
|
82873
|
+
if (this.drainingAgents.has(agentId)) continue;
|
|
82874
|
+
if ([...this.pending.values()].some((p) => p.agentId === agentId)) continue;
|
|
82875
|
+
const last = this.store.agentLastActivityTs(agentId) ?? 0;
|
|
82876
|
+
if (now - last <= ttl) continue;
|
|
82877
|
+
this.log.info(`idle: reclaiming host "${agentId}" (idle ${Math.round((now - last) / 1e3)}s) → provisioned`);
|
|
82878
|
+
this.stopHost(agentId).catch((err) => this.log.error(`idle: stop host "${agentId}" failed: ${formatErr(err)}`));
|
|
82879
|
+
}
|
|
82880
|
+
}
|
|
82881
|
+
/** Race `work` against a Clock-driven deadline, always clearing the timer so a
|
|
82882
|
+
* finished drain never leaves a dangling timer holding the process open. */
|
|
82883
|
+
async raceDeadline(work, ms) {
|
|
82884
|
+
const delay = Math.min(Math.max(0, ms), 2147483647);
|
|
82885
|
+
let handle;
|
|
82886
|
+
const timeout = new Promise((resolve) => {
|
|
82887
|
+
handle = this.clock.setTimeout(() => resolve("timeout"), delay);
|
|
82888
|
+
});
|
|
82889
|
+
try {
|
|
82890
|
+
return await Promise.race([work.then(() => "done"), timeout]);
|
|
82891
|
+
} finally {
|
|
82892
|
+
if (handle !== void 0) this.clock.clearTimeout(handle);
|
|
82893
|
+
}
|
|
82894
|
+
}
|
|
82895
|
+
/**
|
|
82896
|
+
* §5.3 drain: gate new turns for the scope, await in-flight turns up to
|
|
82897
|
+
* `deadlineMs` (emitting `drain/progress` as each yields), then cancel any
|
|
82898
|
+
* straggler past the deadline. Returns BOTH the in-scope set (`matched`) and the
|
|
82899
|
+
* subset that actually finished (`drained`). The caller decides which to report
|
|
82900
|
+
* as released: a session is only safe to release once it is genuinely no longer
|
|
82901
|
+
* being served — either it drained, or the caller force-stops its host. Reporting
|
|
82902
|
+
* a still-running straggler would let the CP reassign it and double-serve.
|
|
82903
|
+
*/
|
|
82904
|
+
async drainScope(scope, deadlineMs, onProgress) {
|
|
82905
|
+
const match = (p) => {
|
|
82906
|
+
if (scope.kind === "daemon") return true;
|
|
82907
|
+
if (scope.kind === "agent") return p.agentId === scope.agentId;
|
|
82908
|
+
const k = scope.sessionKey;
|
|
82909
|
+
return p.platform === k.platform && p.channel === k.channel && (k.thread === void 0 || p.statusThread === k.thread);
|
|
82910
|
+
};
|
|
82911
|
+
if (scope.kind === "daemon") this.draining = true;
|
|
82912
|
+
else if (scope.kind === "agent") this.drainingAgents.add(scope.agentId);
|
|
82913
|
+
const keyOf = (sk) => `${sk.platform}:${sk.channel}:${sk.thread ?? "-"}`;
|
|
82914
|
+
const targets = [...this.pending.entries()].filter(([, p]) => match(p));
|
|
82915
|
+
const matched = /* @__PURE__ */ new Map();
|
|
82916
|
+
for (const [, p] of targets) matched.set(keyOf(pendingSessionKey(p)), pendingSessionKey(p));
|
|
82917
|
+
if (targets.length) this.log.info(`drain[${scope.kind}]: awaiting ${targets.length} turn(s) (deadline ${Math.round(deadlineMs / 1e3)}s)`);
|
|
82918
|
+
let settled = false;
|
|
82919
|
+
const drained = /* @__PURE__ */ new Map();
|
|
82920
|
+
let remaining = targets.length;
|
|
82921
|
+
const work = Promise.all(targets.map(([, p]) => p.done.then(() => {
|
|
82922
|
+
const sk = pendingSessionKey(p);
|
|
82923
|
+
drained.set(keyOf(sk), sk);
|
|
82924
|
+
remaining--;
|
|
82925
|
+
if (!settled) onProgress?.({
|
|
82926
|
+
remaining,
|
|
82927
|
+
drained: [...drained.values()]
|
|
82928
|
+
});
|
|
82929
|
+
})));
|
|
82930
|
+
const res = await this.raceDeadline(work, deadlineMs);
|
|
82931
|
+
settled = true;
|
|
82932
|
+
if (res === "timeout") for (const [sid, p] of this.pending) {
|
|
82933
|
+
if (!match(p)) continue;
|
|
82934
|
+
this.log.warn(`drain[${scope.kind}]: cancelling straggler turn (session ${sid})`);
|
|
82935
|
+
await this.hosts.get(p.agentId)?.cancel(sid).catch(() => {});
|
|
82936
|
+
}
|
|
82937
|
+
return {
|
|
82938
|
+
matched: [...matched.values()],
|
|
82939
|
+
drained: [...drained.values()]
|
|
82940
|
+
};
|
|
82941
|
+
}
|
|
82942
|
+
/** Handle a CP `daemon/drain` (§5.3). A bare drain is a rebalance: after
|
|
82943
|
+
* releasing sessions the daemon reclaims hosts and re-opens its gate (a teardown
|
|
82944
|
+
* arrives separately via daemon/restart). */
|
|
82945
|
+
async runDrain(drain, onProgress) {
|
|
82946
|
+
const deadlineMs = Math.max(0, new Date(drain.deadline).getTime() - this.clock.now());
|
|
82947
|
+
const { matched, drained } = await this.drainScope(drain.scope, deadlineMs, onProgress);
|
|
82948
|
+
let released;
|
|
82949
|
+
if (drain.scope.kind === "daemon") {
|
|
82950
|
+
for (const id of [...this.hosts.keys()]) await this.stopHost(id);
|
|
82951
|
+
this.draining = false;
|
|
82952
|
+
released = matched;
|
|
82953
|
+
} else if (drain.scope.kind === "agent") {
|
|
82954
|
+
await this.stopHost(drain.scope.agentId);
|
|
82955
|
+
this.drainingAgents.delete(drain.scope.agentId);
|
|
82956
|
+
released = matched;
|
|
82957
|
+
} else released = drained;
|
|
82958
|
+
this.log.info(`drain[${drain.scope.kind}]: done — released ${released.length} session(s)`);
|
|
82959
|
+
return { released };
|
|
82960
|
+
}
|
|
82961
|
+
/** `agent/stop` (§8.2): drain the agent's in-flight turns, stop its host, and
|
|
82962
|
+
* leave it gated so it stays down until a matching `agent/launch` revives it. */
|
|
82963
|
+
async stopAgent(agentId) {
|
|
82964
|
+
await this.drainScope({
|
|
82965
|
+
kind: "agent",
|
|
82966
|
+
agentId
|
|
82967
|
+
}, this.cfg.limits.shutdownDrainMs);
|
|
82968
|
+
await this.stopHost(agentId);
|
|
82969
|
+
}
|
|
82970
|
+
/** §2.5 SIGTERM / daemon shutdown: gate new turns, then await in-flight turns up
|
|
82971
|
+
* to `shutdownDrainMs`, cancelling stragglers. Safe to call repeatedly. */
|
|
82972
|
+
async drainForShutdown() {
|
|
82973
|
+
this.draining = true;
|
|
82974
|
+
const live = [...this.pending.values()];
|
|
82975
|
+
if (live.length === 0) return;
|
|
82976
|
+
const deadlineMs = this.cfg.limits.shutdownDrainMs;
|
|
82977
|
+
this.log.info(`shutdown: draining ${live.length} in-flight turn(s) (deadline ${Math.round(deadlineMs / 1e3)}s)`);
|
|
82978
|
+
if (await this.raceDeadline(Promise.all(live.map((p) => p.done)), deadlineMs) === "timeout") {
|
|
82979
|
+
this.log.warn(`shutdown: deadline hit with ${this.pending.size} turn(s) still in flight — cancelling`);
|
|
82980
|
+
for (const [sid, p] of this.pending) await this.hosts.get(p.agentId)?.cancel(sid).catch(() => {});
|
|
82981
|
+
}
|
|
82982
|
+
}
|
|
82983
|
+
/** daemon/restart + daemon/upgrade (§8.3): ack now, then drain + stop + exit so
|
|
82984
|
+
* the supervisor relaunches (the new binary, for upgrade). */
|
|
82985
|
+
scheduleFleetExit(kind, targetVersion) {
|
|
82986
|
+
const willDrainUntil = new Date(this.clock.now() + this.cfg.limits.shutdownDrainMs).toISOString();
|
|
82987
|
+
this.log.info(`cp: ${kind}${targetVersion ? ` → ${targetVersion}` : ""} requested — draining then exiting`);
|
|
82988
|
+
(async () => {
|
|
82989
|
+
try {
|
|
82990
|
+
await this.stop();
|
|
82991
|
+
} catch (err) {
|
|
82992
|
+
this.log.error(`cp: ${kind} shutdown failed: ${formatErr(err)}`);
|
|
82993
|
+
} finally {
|
|
82994
|
+
this.requestExit(0);
|
|
82995
|
+
}
|
|
82996
|
+
})();
|
|
82997
|
+
return {
|
|
82998
|
+
accepted: true,
|
|
82999
|
+
willDrainUntil
|
|
83000
|
+
};
|
|
83001
|
+
}
|
|
82611
83002
|
cpConfigApply() {
|
|
82612
83003
|
return {
|
|
82613
83004
|
applyConfigPush: (keys) => {
|
|
@@ -82633,7 +83024,26 @@ var Daemon = class {
|
|
|
82633
83024
|
upsertCron: (cron) => this.cpCrons.upsert(cron),
|
|
82634
83025
|
removeCron: (cronId) => this.cpCrons.remove(cronId),
|
|
82635
83026
|
applyRouteAssign: (a) => this.cpRouting?.upsertAssign(a),
|
|
82636
|
-
applyRouteUpdate: (u) => this.cpRouting?.applyUpdate(u)
|
|
83027
|
+
applyRouteUpdate: (u) => this.cpRouting?.applyUpdate(u),
|
|
83028
|
+
applyAgentLaunch: async (launch) => {
|
|
83029
|
+
const agent = this.agents.get(launch.agentId);
|
|
83030
|
+
if (!agent) throw new Error(`agent/launch: unknown agent ${launch.agentId}`);
|
|
83031
|
+
this.drainingAgents.delete(launch.agentId);
|
|
83032
|
+
await this.ensureHostAsync(launch.agentId);
|
|
83033
|
+
return {
|
|
83034
|
+
agentId: launch.agentId,
|
|
83035
|
+
launchId: randomUUID(),
|
|
83036
|
+
startedAt: new Date(this.clock.now()).toISOString(),
|
|
83037
|
+
runtime: agent.runtime
|
|
83038
|
+
};
|
|
83039
|
+
},
|
|
83040
|
+
applyAgentStop: async (stop) => {
|
|
83041
|
+
await this.stopAgent(stop.agentId);
|
|
83042
|
+
return { ok: true };
|
|
83043
|
+
},
|
|
83044
|
+
applyDaemonDrain: (drain, onProgress) => this.runDrain(drain, onProgress),
|
|
83045
|
+
applyDaemonRestart: (_req) => this.scheduleFleetExit("restart"),
|
|
83046
|
+
applyDaemonUpgrade: (req) => this.scheduleFleetExit("upgrade", req.targetVersion)
|
|
82637
83047
|
};
|
|
82638
83048
|
}
|
|
82639
83049
|
/** A CP cron fired: build a synthetic message and run it through the normal routing path. */
|
|
@@ -82706,8 +83116,16 @@ var Daemon = class {
|
|
|
82706
83116
|
this.log.info(`cp: connecting to ${url}…`);
|
|
82707
83117
|
}
|
|
82708
83118
|
async stop() {
|
|
83119
|
+
this.draining = true;
|
|
82709
83120
|
clearTimeout(this.debounceTimer);
|
|
83121
|
+
if (this.idleSweepTimer !== void 0) {
|
|
83122
|
+
this.clock.clearTimeout(this.idleSweepTimer);
|
|
83123
|
+
this.idleSweepTimer = void 0;
|
|
83124
|
+
}
|
|
83125
|
+
for (const t of this.cancelTimers.values()) this.clock.clearTimeout(t);
|
|
83126
|
+
this.cancelTimers.clear();
|
|
82710
83127
|
await this.watcher?.close();
|
|
83128
|
+
await this.drainForShutdown();
|
|
82711
83129
|
const errors = [];
|
|
82712
83130
|
this.scheduler?.stop();
|
|
82713
83131
|
await Promise.resolve(this.cpClient?.stop()).catch((e) => errors.push(e));
|