agent-relay-server 0.10.7 → 0.10.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/public/index.html +49 -47
- package/src/config-store.ts +380 -0
- package/src/db.ts +223 -5
- package/src/index.ts +2 -0
- package/src/lifecycle-manager.ts +369 -0
- package/src/routes.ts +407 -8
- package/src/sse.ts +26 -0
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
import { createCommand } from "./commands-db";
|
|
2
|
+
import { getAgent, getDb, getOrchestrator, resolveQueuedPolicyMessages } from "./db";
|
|
3
|
+
import {
|
|
4
|
+
getManagedAgentState,
|
|
5
|
+
listSpawnPolicies,
|
|
6
|
+
updateManagedAgentState,
|
|
7
|
+
upsertManagedAgentState,
|
|
8
|
+
} from "./config-store";
|
|
9
|
+
import { emitRelayEvent } from "./events";
|
|
10
|
+
import type { Command, ManagedAgentState, SpawnPolicy } from "./types";
|
|
11
|
+
|
|
12
|
+
const DEFAULT_TICK_MS = 10_000;
|
|
13
|
+
const DAY_MS = 24 * 60 * 60 * 1000;
|
|
14
|
+
|
|
15
|
+
interface LifecycleManagerOptions {
|
|
16
|
+
tickMs?: number;
|
|
17
|
+
now?: () => number;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export class LifecycleManager {
|
|
21
|
+
private timer?: Timer;
|
|
22
|
+
private readonly tickMs: number;
|
|
23
|
+
private readonly now: () => number;
|
|
24
|
+
private readonly updateRestartPending = new Set<string>();
|
|
25
|
+
|
|
26
|
+
constructor(options: LifecycleManagerOptions = {}) {
|
|
27
|
+
this.tickMs = options.tickMs ?? DEFAULT_TICK_MS;
|
|
28
|
+
this.now = options.now ?? (() => Date.now());
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
start(): void {
|
|
32
|
+
if (this.timer) return;
|
|
33
|
+
void this.tick();
|
|
34
|
+
this.timer = setInterval(() => void this.tick(), this.tickMs);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
stop(): void {
|
|
38
|
+
if (!this.timer) return;
|
|
39
|
+
clearInterval(this.timer);
|
|
40
|
+
this.timer = undefined;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
loadPolicies(): SpawnPolicy[] {
|
|
44
|
+
return listSpawnPolicies().map((entry) => entry.value);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
async tick(): Promise<void> {
|
|
48
|
+
for (const policy of this.loadPolicies()) {
|
|
49
|
+
this.reconcilePolicy(policy);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
onMessageForPolicy(policyName: string): void {
|
|
54
|
+
const policy = this.loadPolicies().find((item) => item.name === policyName);
|
|
55
|
+
if (!policy || policy.mode !== "on-demand") return;
|
|
56
|
+
const state = getManagedAgentState(policy.name);
|
|
57
|
+
if (state && ["starting", "running", "stopping"].includes(state.status)) return;
|
|
58
|
+
this.spawnAgent(policy, "message-trigger");
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
onAgentRegistered(agentId: string, meta: { policyName?: string; spawnRequestId?: string }): void {
|
|
62
|
+
if (!meta.policyName || !meta.spawnRequestId) return;
|
|
63
|
+
const state = getManagedAgentState(meta.policyName);
|
|
64
|
+
if (!state || state.spawnRequestId !== meta.spawnRequestId) return;
|
|
65
|
+
const next = updateManagedAgentState(meta.policyName, {
|
|
66
|
+
status: "running",
|
|
67
|
+
agentId,
|
|
68
|
+
healthySince: this.now(),
|
|
69
|
+
backoffUntil: undefined,
|
|
70
|
+
lastError: undefined,
|
|
71
|
+
});
|
|
72
|
+
if (next) this.emitState(next);
|
|
73
|
+
const available = resolveQueuedPolicyMessages(meta.policyName, agentId);
|
|
74
|
+
if (available.length) {
|
|
75
|
+
emitRelayEvent({
|
|
76
|
+
type: "message.available",
|
|
77
|
+
source: "server",
|
|
78
|
+
subject: `policy:${meta.policyName}`,
|
|
79
|
+
data: { policyName: meta.policyName, agentId, messageIds: available.map((message) => message.id), count: available.length },
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
onAgentDisappeared(agentId: string): void {
|
|
85
|
+
const policy = this.loadPolicies().find((item) => getManagedAgentState(item.name)?.agentId === agentId);
|
|
86
|
+
if (!policy) return;
|
|
87
|
+
const state = getManagedAgentState(policy.name);
|
|
88
|
+
if (!state) return;
|
|
89
|
+
if (state.status === "stopping") {
|
|
90
|
+
const next = updateManagedAgentState(policy.name, {
|
|
91
|
+
status: "stopped",
|
|
92
|
+
agentId: undefined,
|
|
93
|
+
tmuxSession: undefined,
|
|
94
|
+
lastStopAt: this.now(),
|
|
95
|
+
});
|
|
96
|
+
if (next) this.emitState(next);
|
|
97
|
+
return;
|
|
98
|
+
}
|
|
99
|
+
this.markBackoff(policy, state, "agent disappeared");
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
onConfigChanged(namespace: string, key: string): void {
|
|
103
|
+
if (namespace !== "spawn-policy") return;
|
|
104
|
+
const policy = this.loadPolicies().find((item) => item.name === key);
|
|
105
|
+
if (policy) this.reconcilePolicy(policy);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
onRelayUpdated(_newVersion: string): void {
|
|
109
|
+
for (const policy of this.loadPolicies()) {
|
|
110
|
+
if (policy.restartOnUpdate) this.updateRestartPending.add(policy.name);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
spawnAgent(policy: SpawnPolicy, reason = "reconcile"): Command | null {
|
|
115
|
+
const orch = getOrchestrator(policy.orchestratorId);
|
|
116
|
+
if (!orch || orch.status !== "online") {
|
|
117
|
+
const state = upsertManagedAgentState({
|
|
118
|
+
policyName: policy.name,
|
|
119
|
+
status: "backoff",
|
|
120
|
+
orchestratorId: policy.orchestratorId,
|
|
121
|
+
provider: policy.provider,
|
|
122
|
+
backoffUntil: this.now() + this.backoffDelay(policy, getManagedAgentState(policy.name)) * 1000,
|
|
123
|
+
lastError: "orchestrator offline",
|
|
124
|
+
});
|
|
125
|
+
this.emitState(state);
|
|
126
|
+
return null;
|
|
127
|
+
}
|
|
128
|
+
const spawnRequestId = `sp_${crypto.randomUUID()}`;
|
|
129
|
+
const state = upsertManagedAgentState({
|
|
130
|
+
policyName: policy.name,
|
|
131
|
+
status: "starting",
|
|
132
|
+
orchestratorId: policy.orchestratorId,
|
|
133
|
+
provider: policy.provider,
|
|
134
|
+
spawnRequestId,
|
|
135
|
+
lastSpawnAt: this.now(),
|
|
136
|
+
restartCount: getManagedAgentState(policy.name)?.restartCount ?? 0,
|
|
137
|
+
consecutiveFailures: getManagedAgentState(policy.name)?.consecutiveFailures ?? 0,
|
|
138
|
+
});
|
|
139
|
+
this.emitState(state);
|
|
140
|
+
const command = createCommand({
|
|
141
|
+
type: "agent.spawn",
|
|
142
|
+
source: "system",
|
|
143
|
+
target: orch.agentId,
|
|
144
|
+
correlationId: spawnRequestId,
|
|
145
|
+
params: {
|
|
146
|
+
action: "spawn",
|
|
147
|
+
provider: policy.provider,
|
|
148
|
+
cwd: policy.cwd,
|
|
149
|
+
label: policy.label,
|
|
150
|
+
tags: policy.tags,
|
|
151
|
+
capabilities: policy.capabilities,
|
|
152
|
+
approvalMode: policy.permissionMode,
|
|
153
|
+
permissionMode: policy.permissionMode,
|
|
154
|
+
providerArgs: policy.providerArgs,
|
|
155
|
+
prompt: policy.prompt,
|
|
156
|
+
headless: true,
|
|
157
|
+
policyName: policy.name,
|
|
158
|
+
spawnRequestId,
|
|
159
|
+
reason,
|
|
160
|
+
requestedBy: "lifecycle-manager",
|
|
161
|
+
requestedAt: this.now(),
|
|
162
|
+
},
|
|
163
|
+
});
|
|
164
|
+
this.emitCommand(command);
|
|
165
|
+
return command;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
stopAgent(policy: SpawnPolicy, graceful = true, reason = "reconcile-stop"): Command | null {
|
|
169
|
+
const orch = getOrchestrator(policy.orchestratorId);
|
|
170
|
+
if (!orch) return null;
|
|
171
|
+
const state = getManagedAgentState(policy.name);
|
|
172
|
+
const next = upsertManagedAgentState({
|
|
173
|
+
policyName: policy.name,
|
|
174
|
+
status: "stopping",
|
|
175
|
+
agentId: state?.agentId,
|
|
176
|
+
orchestratorId: policy.orchestratorId,
|
|
177
|
+
provider: policy.provider,
|
|
178
|
+
tmuxSession: state?.tmuxSession,
|
|
179
|
+
spawnRequestId: state?.spawnRequestId,
|
|
180
|
+
lastSpawnAt: state?.lastSpawnAt,
|
|
181
|
+
lastStopAt: this.now(),
|
|
182
|
+
restartCount: state?.restartCount ?? 0,
|
|
183
|
+
consecutiveFailures: state?.consecutiveFailures ?? 0,
|
|
184
|
+
});
|
|
185
|
+
this.emitState(next);
|
|
186
|
+
const command = createCommand({
|
|
187
|
+
type: "agent.shutdown",
|
|
188
|
+
source: "system",
|
|
189
|
+
target: orch.agentId,
|
|
190
|
+
correlationId: state?.spawnRequestId,
|
|
191
|
+
params: {
|
|
192
|
+
action: "shutdown",
|
|
193
|
+
policyName: policy.name,
|
|
194
|
+
spawnRequestId: state?.spawnRequestId,
|
|
195
|
+
agentId: state?.agentId,
|
|
196
|
+
tmuxSession: state?.tmuxSession,
|
|
197
|
+
graceful,
|
|
198
|
+
timeoutMs: 10_000,
|
|
199
|
+
reason,
|
|
200
|
+
requestedBy: "lifecycle-manager",
|
|
201
|
+
requestedAt: this.now(),
|
|
202
|
+
},
|
|
203
|
+
});
|
|
204
|
+
this.emitCommand(command);
|
|
205
|
+
return command;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
restartAgent(policy: SpawnPolicy, reason = "reconcile-restart"): Command | null {
|
|
209
|
+
const state = getManagedAgentState(policy.name);
|
|
210
|
+
const restarted = upsertManagedAgentState({
|
|
211
|
+
policyName: policy.name,
|
|
212
|
+
status: "stopping",
|
|
213
|
+
agentId: state?.agentId,
|
|
214
|
+
orchestratorId: policy.orchestratorId,
|
|
215
|
+
provider: policy.provider,
|
|
216
|
+
tmuxSession: state?.tmuxSession,
|
|
217
|
+
spawnRequestId: state?.spawnRequestId,
|
|
218
|
+
lastSpawnAt: state?.lastSpawnAt,
|
|
219
|
+
lastStopAt: this.now(),
|
|
220
|
+
restartCount: (state?.restartCount ?? 0) + 1,
|
|
221
|
+
consecutiveFailures: state?.consecutiveFailures ?? 0,
|
|
222
|
+
});
|
|
223
|
+
this.emitState(restarted);
|
|
224
|
+
return this.stopAgent(policy, true, reason);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
private reconcilePolicy(policy: SpawnPolicy): void {
|
|
228
|
+
const state = getManagedAgentState(policy.name);
|
|
229
|
+
if (!state) {
|
|
230
|
+
if (policy.mode === "always-on") this.spawnAgent(policy, "always-on");
|
|
231
|
+
else if (this.hasQueuedMessages(policy.name)) this.spawnAgent(policy, "message-trigger");
|
|
232
|
+
else this.ensureStoppedState(policy);
|
|
233
|
+
return;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
if (state.status === "running") {
|
|
237
|
+
const agent = state.agentId ? getAgent(state.agentId) : null;
|
|
238
|
+
if (!agent || agent.status === "offline") {
|
|
239
|
+
this.markBackoff(policy, state, "agent offline");
|
|
240
|
+
return;
|
|
241
|
+
}
|
|
242
|
+
this.resetBackoffAfterHealthyRun(policy, state);
|
|
243
|
+
if (policy.mode === "on-demand" && this.isIdle(policy, state)) {
|
|
244
|
+
this.stopAgent(policy, true, "idle-timeout");
|
|
245
|
+
return;
|
|
246
|
+
}
|
|
247
|
+
if (policy.scheduledDailyRestart && this.dailyRestartDue(policy, state) && this.isIdle(policy, state, 3600)) {
|
|
248
|
+
this.restartAgent(policy, "daily-restart");
|
|
249
|
+
return;
|
|
250
|
+
}
|
|
251
|
+
if (this.updateRestartPending.has(policy.name) && this.isIdle(policy, state, 60)) {
|
|
252
|
+
this.updateRestartPending.delete(policy.name);
|
|
253
|
+
this.restartAgent(policy, "restart-on-update");
|
|
254
|
+
}
|
|
255
|
+
return;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
if (state.status === "backoff") {
|
|
259
|
+
if ((state.backoffUntil ?? 0) <= this.now() && (policy.mode === "always-on" || this.hasQueuedMessages(policy.name))) {
|
|
260
|
+
this.spawnAgent(policy, "backoff-expired");
|
|
261
|
+
}
|
|
262
|
+
return;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
if (state.status === "stopped") {
|
|
266
|
+
if (policy.mode === "always-on" || this.hasQueuedMessages(policy.name)) this.spawnAgent(policy, policy.mode === "always-on" ? "always-on" : "message-trigger");
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
private ensureStoppedState(policy: SpawnPolicy): void {
|
|
271
|
+
const state = upsertManagedAgentState({
|
|
272
|
+
policyName: policy.name,
|
|
273
|
+
status: "stopped",
|
|
274
|
+
orchestratorId: policy.orchestratorId,
|
|
275
|
+
provider: policy.provider,
|
|
276
|
+
});
|
|
277
|
+
this.emitState(state);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
private markBackoff(policy: SpawnPolicy, state: ManagedAgentState, error: string): void {
|
|
281
|
+
if (state.status === "stopping") return;
|
|
282
|
+
const failures = state.consecutiveFailures + 1;
|
|
283
|
+
const delay = this.backoffDelay(policy, state);
|
|
284
|
+
const next = upsertManagedAgentState({
|
|
285
|
+
policyName: policy.name,
|
|
286
|
+
status: "backoff",
|
|
287
|
+
agentId: undefined,
|
|
288
|
+
orchestratorId: policy.orchestratorId,
|
|
289
|
+
provider: policy.provider,
|
|
290
|
+
spawnRequestId: state.spawnRequestId,
|
|
291
|
+
lastSpawnAt: state.lastSpawnAt,
|
|
292
|
+
restartCount: state.restartCount,
|
|
293
|
+
consecutiveFailures: failures,
|
|
294
|
+
backoffUntil: this.now() + delay * 1000,
|
|
295
|
+
lastError: error,
|
|
296
|
+
});
|
|
297
|
+
this.emitState(next);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
private backoffDelay(policy: SpawnPolicy, state: ManagedAgentState | null): number {
|
|
301
|
+
const schedule = policy.backoff.schedule.length ? policy.backoff.schedule : [30];
|
|
302
|
+
const index = Math.min(state?.consecutiveFailures ?? 0, schedule.length - 1);
|
|
303
|
+
return schedule[index]!;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
private resetBackoffAfterHealthyRun(policy: SpawnPolicy, state: ManagedAgentState): void {
|
|
307
|
+
if (state.consecutiveFailures === 0) return;
|
|
308
|
+
const healthySince = state.healthySince ?? state.lastSpawnAt;
|
|
309
|
+
if (!healthySince || this.now() - healthySince < policy.backoff.resetAfterSeconds * 1000) return;
|
|
310
|
+
const next = updateManagedAgentState(policy.name, {
|
|
311
|
+
consecutiveFailures: 0,
|
|
312
|
+
backoffUntil: undefined,
|
|
313
|
+
lastError: undefined,
|
|
314
|
+
});
|
|
315
|
+
if (next) this.emitState(next);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
private dailyRestartDue(_policy: SpawnPolicy, state: ManagedAgentState): boolean {
|
|
319
|
+
return Boolean(state.lastSpawnAt && this.now() - state.lastSpawnAt >= DAY_MS);
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
private isIdle(policy: SpawnPolicy, state: ManagedAgentState, overrideSeconds?: number): boolean {
|
|
323
|
+
if (!state.agentId) return false;
|
|
324
|
+
const agent = getAgent(state.agentId);
|
|
325
|
+
if (!agent || agent.status === "busy") return false;
|
|
326
|
+
const idleSeconds = overrideSeconds ?? policy.onDemand?.keepaliveSeconds ?? 0;
|
|
327
|
+
if (idleSeconds === 0) return false;
|
|
328
|
+
return this.now() - this.lastActivityAt(state.agentId) >= idleSeconds * 1000;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
private lastActivityAt(agentId: string): number {
|
|
332
|
+
const row = getDb().prepare(`
|
|
333
|
+
SELECT max(created_at) AS at
|
|
334
|
+
FROM messages
|
|
335
|
+
WHERE from_agent = ? OR to_target = ? OR resolved_to_agent = ?
|
|
336
|
+
`).get(agentId, agentId, agentId) as { at?: number | null } | undefined;
|
|
337
|
+
return row?.at ?? 0;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
private hasQueuedMessages(policyName: string): boolean {
|
|
341
|
+
const row = getDb().prepare("SELECT 1 FROM messages WHERE to_target = ? AND delivery_status = 'queued' LIMIT 1").get(`policy:${policyName}`);
|
|
342
|
+
return Boolean(row);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
private emitState(state: ManagedAgentState): void {
|
|
346
|
+
emitRelayEvent({
|
|
347
|
+
type: "policy.state.changed",
|
|
348
|
+
source: "lifecycle-manager",
|
|
349
|
+
subject: state.policyName,
|
|
350
|
+
data: state as unknown as Record<string, unknown>,
|
|
351
|
+
});
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
private emitCommand(command: Command): void {
|
|
355
|
+
emitRelayEvent({
|
|
356
|
+
type: "command.requested",
|
|
357
|
+
source: command.source,
|
|
358
|
+
subject: command.id,
|
|
359
|
+
data: { command },
|
|
360
|
+
});
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
let singleton: LifecycleManager | null = null;
|
|
365
|
+
|
|
366
|
+
export function getLifecycleManager(): LifecycleManager {
|
|
367
|
+
if (!singleton) singleton = new LifecycleManager();
|
|
368
|
+
return singleton;
|
|
369
|
+
}
|