@orgloop/agentctl 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/daemon/metrics.d.ts +6 -3
- package/dist/daemon/metrics.js +11 -4
- package/dist/daemon/server.js +142 -40
- package/dist/daemon/session-tracker.d.ts +42 -43
- package/dist/daemon/session-tracker.js +141 -274
- package/package.json +1 -1
package/dist/daemon/metrics.d.ts
CHANGED
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
import type { FuseEngine } from "./fuse-engine.js";
|
|
2
2
|
import type { LockManager } from "./lock-manager.js";
|
|
3
|
-
import type { SessionTracker } from "./session-tracker.js";
|
|
4
3
|
export declare class MetricsRegistry {
|
|
5
|
-
private sessionTracker;
|
|
6
4
|
private lockManager;
|
|
7
5
|
private fuseEngine;
|
|
8
6
|
sessionsTotalCompleted: number;
|
|
@@ -10,7 +8,12 @@ export declare class MetricsRegistry {
|
|
|
10
8
|
sessionsTotalStopped: number;
|
|
11
9
|
fusesExpiredTotal: number;
|
|
12
10
|
sessionDurations: number[];
|
|
13
|
-
|
|
11
|
+
/** Last-known active session count, updated by session.list fan-out */
|
|
12
|
+
private _activeSessionCount;
|
|
13
|
+
constructor(lockManager: LockManager, fuseEngine: FuseEngine);
|
|
14
|
+
/** Update the active session gauge (called after session.list fan-out) */
|
|
15
|
+
setActiveSessionCount(count: number): void;
|
|
16
|
+
get activeSessionCount(): number;
|
|
14
17
|
recordSessionCompleted(durationSeconds?: number): void;
|
|
15
18
|
recordSessionFailed(durationSeconds?: number): void;
|
|
16
19
|
recordSessionStopped(durationSeconds?: number): void;
|
package/dist/daemon/metrics.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
export class MetricsRegistry {
|
|
2
|
-
sessionTracker;
|
|
3
2
|
lockManager;
|
|
4
3
|
fuseEngine;
|
|
5
4
|
sessionsTotalCompleted = 0;
|
|
@@ -7,11 +6,19 @@ export class MetricsRegistry {
|
|
|
7
6
|
sessionsTotalStopped = 0;
|
|
8
7
|
fusesExpiredTotal = 0;
|
|
9
8
|
sessionDurations = []; // seconds
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
/** Last-known active session count, updated by session.list fan-out */
|
|
10
|
+
_activeSessionCount = 0;
|
|
11
|
+
constructor(lockManager, fuseEngine) {
|
|
12
12
|
this.lockManager = lockManager;
|
|
13
13
|
this.fuseEngine = fuseEngine;
|
|
14
14
|
}
|
|
15
|
+
/** Update the active session gauge (called after session.list fan-out) */
|
|
16
|
+
setActiveSessionCount(count) {
|
|
17
|
+
this._activeSessionCount = count;
|
|
18
|
+
}
|
|
19
|
+
get activeSessionCount() {
|
|
20
|
+
return this._activeSessionCount;
|
|
21
|
+
}
|
|
15
22
|
recordSessionCompleted(durationSeconds) {
|
|
16
23
|
this.sessionsTotalCompleted++;
|
|
17
24
|
if (durationSeconds != null)
|
|
@@ -43,7 +50,7 @@ export class MetricsRegistry {
|
|
|
43
50
|
lines.push(labels ? `${name}{${labels}} ${value}` : `${name} ${value}`);
|
|
44
51
|
};
|
|
45
52
|
// Gauges
|
|
46
|
-
g("agentctl_sessions_active", "Number of active sessions", this.
|
|
53
|
+
g("agentctl_sessions_active", "Number of active sessions", this._activeSessionCount);
|
|
47
54
|
const locks = this.lockManager.listAll();
|
|
48
55
|
g("agentctl_locks_active", "Number of active locks", locks.filter((l) => l.type === "auto").length, 'type="auto"');
|
|
49
56
|
g("agentctl_locks_active", "Number of active locks", locks.filter((l) => l.type === "manual").length, 'type="manual"');
|
package/dist/daemon/server.js
CHANGED
|
@@ -60,17 +60,25 @@ export async function startDaemon(opts = {}) {
|
|
|
60
60
|
emitter,
|
|
61
61
|
});
|
|
62
62
|
const sessionTracker = new SessionTracker(state, { adapters });
|
|
63
|
-
const metrics = new MetricsRegistry(
|
|
63
|
+
const metrics = new MetricsRegistry(lockManager, fuseEngine);
|
|
64
64
|
// Wire up events
|
|
65
65
|
emitter.on("fuse.expired", () => {
|
|
66
66
|
metrics.recordFuseExpired();
|
|
67
67
|
});
|
|
68
|
-
// 9.
|
|
69
|
-
|
|
68
|
+
// 9. Initial PID liveness cleanup for daemon-launched sessions
|
|
69
|
+
// (replaces the old validateAllSessions — much simpler, only checks launches)
|
|
70
|
+
const initialDead = sessionTracker.cleanupDeadLaunches();
|
|
71
|
+
if (initialDead.length > 0) {
|
|
72
|
+
for (const id of initialDead)
|
|
73
|
+
lockManager.autoUnlock(id);
|
|
74
|
+
console.error(`Startup cleanup: marked ${initialDead.length} dead launches as stopped`);
|
|
75
|
+
}
|
|
70
76
|
// 10. Resume fuse timers
|
|
71
77
|
fuseEngine.resumeTimers();
|
|
72
|
-
// 11. Start
|
|
73
|
-
sessionTracker.
|
|
78
|
+
// 11. Start periodic PID liveness check for lock cleanup (30s interval)
|
|
79
|
+
sessionTracker.startLaunchCleanup((deadId) => {
|
|
80
|
+
lockManager.autoUnlock(deadId);
|
|
81
|
+
});
|
|
74
82
|
// 12. Create request handler
|
|
75
83
|
const handleRequest = createRequestHandler({
|
|
76
84
|
sessionTracker,
|
|
@@ -140,7 +148,7 @@ export async function startDaemon(opts = {}) {
|
|
|
140
148
|
});
|
|
141
149
|
// Shutdown function
|
|
142
150
|
const shutdown = async () => {
|
|
143
|
-
sessionTracker.
|
|
151
|
+
sessionTracker.stopLaunchCleanup();
|
|
144
152
|
fuseEngine.shutdown();
|
|
145
153
|
state.flush();
|
|
146
154
|
await state.persist();
|
|
@@ -247,20 +255,104 @@ function createRequestHandler(ctx) {
|
|
|
247
255
|
const params = (req.params || {});
|
|
248
256
|
switch (req.method) {
|
|
249
257
|
case "session.list": {
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
258
|
+
const adapterFilter = params.adapter;
|
|
259
|
+
const statusFilter = params.status;
|
|
260
|
+
const showAll = params.all;
|
|
261
|
+
const groupFilter = params.group;
|
|
262
|
+
// Fan out discover() to adapters (or just one if filtered)
|
|
263
|
+
const adapterEntries = adapterFilter
|
|
264
|
+
? Object.entries(ctx.adapters).filter(([name]) => name === adapterFilter)
|
|
265
|
+
: Object.entries(ctx.adapters);
|
|
266
|
+
const ADAPTER_TIMEOUT_MS = 5000;
|
|
267
|
+
const succeededAdapters = new Set();
|
|
268
|
+
const results = await Promise.allSettled(adapterEntries.map(([name, adapter]) => Promise.race([
|
|
269
|
+
adapter.discover().then((sessions) => {
|
|
270
|
+
succeededAdapters.add(name);
|
|
271
|
+
return sessions.map((s) => ({ ...s, adapter: name }));
|
|
272
|
+
}),
|
|
273
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error(`Adapter ${name} timed out`)), ADAPTER_TIMEOUT_MS)),
|
|
274
|
+
])));
|
|
275
|
+
// Merge fulfilled results, skip failed adapters
|
|
276
|
+
const discovered = results
|
|
277
|
+
.filter((r) => r.status === "fulfilled")
|
|
278
|
+
.flatMap((r) => r.value);
|
|
279
|
+
// Reconcile with launch metadata and enrich
|
|
280
|
+
const { sessions: allSessions, stoppedLaunchIds } = ctx.sessionTracker.reconcileAndEnrich(discovered, succeededAdapters);
|
|
281
|
+
// Release locks for sessions that disappeared from adapter results
|
|
282
|
+
for (const id of stoppedLaunchIds) {
|
|
283
|
+
ctx.lockManager.autoUnlock(id);
|
|
256
284
|
}
|
|
285
|
+
// Apply filters
|
|
286
|
+
let sessions = allSessions;
|
|
287
|
+
if (statusFilter) {
|
|
288
|
+
sessions = sessions.filter((s) => s.status === statusFilter);
|
|
289
|
+
}
|
|
290
|
+
else if (!showAll) {
|
|
291
|
+
sessions = sessions.filter((s) => s.status === "running" || s.status === "idle");
|
|
292
|
+
}
|
|
293
|
+
if (groupFilter) {
|
|
294
|
+
sessions = sessions.filter((s) => s.group === groupFilter);
|
|
295
|
+
}
|
|
296
|
+
// Sort: running first, then by most recent
|
|
297
|
+
sessions.sort((a, b) => {
|
|
298
|
+
if (a.status === "running" && b.status !== "running")
|
|
299
|
+
return -1;
|
|
300
|
+
if (b.status === "running" && a.status !== "running")
|
|
301
|
+
return 1;
|
|
302
|
+
return (new Date(b.startedAt).getTime() - new Date(a.startedAt).getTime());
|
|
303
|
+
});
|
|
304
|
+
// Update metrics gauge
|
|
305
|
+
ctx.metrics.setActiveSessionCount(allSessions.filter((s) => s.status === "running" || s.status === "idle").length);
|
|
257
306
|
return sessions;
|
|
258
307
|
}
|
|
259
308
|
case "session.status": {
|
|
260
|
-
const
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
309
|
+
const id = params.id;
|
|
310
|
+
// Check launch metadata to determine adapter
|
|
311
|
+
const launchRecord = ctx.sessionTracker.getSession(id);
|
|
312
|
+
const adapterName = params.adapter || launchRecord?.adapter;
|
|
313
|
+
// Determine which adapters to search
|
|
314
|
+
const adaptersToSearch = adapterName
|
|
315
|
+
? Object.entries(ctx.adapters).filter(([name]) => name === adapterName)
|
|
316
|
+
: Object.entries(ctx.adapters);
|
|
317
|
+
// Search adapters for the session
|
|
318
|
+
for (const [name, adapter] of adaptersToSearch) {
|
|
319
|
+
try {
|
|
320
|
+
const discovered = await adapter.discover();
|
|
321
|
+
let match = discovered.find((d) => d.id === id);
|
|
322
|
+
// Prefix match
|
|
323
|
+
if (!match) {
|
|
324
|
+
const prefixMatches = discovered.filter((d) => d.id.startsWith(id));
|
|
325
|
+
if (prefixMatches.length === 1)
|
|
326
|
+
match = prefixMatches[0];
|
|
327
|
+
}
|
|
328
|
+
if (match) {
|
|
329
|
+
const meta = ctx.sessionTracker.getSession(match.id);
|
|
330
|
+
return {
|
|
331
|
+
id: match.id,
|
|
332
|
+
adapter: name,
|
|
333
|
+
status: match.status,
|
|
334
|
+
startedAt: match.startedAt?.toISOString() ?? new Date().toISOString(),
|
|
335
|
+
stoppedAt: match.stoppedAt?.toISOString(),
|
|
336
|
+
cwd: match.cwd ?? meta?.cwd,
|
|
337
|
+
model: match.model ?? meta?.model,
|
|
338
|
+
prompt: match.prompt ?? meta?.prompt,
|
|
339
|
+
tokens: match.tokens,
|
|
340
|
+
cost: match.cost,
|
|
341
|
+
pid: match.pid,
|
|
342
|
+
spec: meta?.spec,
|
|
343
|
+
group: meta?.group,
|
|
344
|
+
meta: match.nativeMetadata ?? meta?.meta ?? {},
|
|
345
|
+
};
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
catch {
|
|
349
|
+
// Adapter failed — try next
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
// Fall back to launch metadata if adapters didn't find it
|
|
353
|
+
if (launchRecord)
|
|
354
|
+
return launchRecord;
|
|
355
|
+
throw new Error(`Session not found: ${id}`);
|
|
264
356
|
}
|
|
265
357
|
case "session.peek": {
|
|
266
358
|
// Auto-detect adapter from tracked session, fall back to param or claude-code
|
|
@@ -315,47 +407,57 @@ function createRequestHandler(ctx) {
|
|
|
315
407
|
return record;
|
|
316
408
|
}
|
|
317
409
|
case "session.stop": {
|
|
318
|
-
const
|
|
319
|
-
|
|
320
|
-
throw new Error(`Session not found: ${params.id}`);
|
|
410
|
+
const id = params.id;
|
|
411
|
+
const launchRecord = ctx.sessionTracker.getSession(id);
|
|
321
412
|
// Ghost pending entry with dead PID: remove from state with --force
|
|
322
|
-
if (
|
|
413
|
+
if (launchRecord?.id.startsWith("pending-") &&
|
|
323
414
|
params.force &&
|
|
324
|
-
|
|
325
|
-
!isProcessAlive(
|
|
326
|
-
ctx.lockManager.autoUnlock(
|
|
327
|
-
ctx.sessionTracker.removeSession(
|
|
415
|
+
launchRecord.pid &&
|
|
416
|
+
!isProcessAlive(launchRecord.pid)) {
|
|
417
|
+
ctx.lockManager.autoUnlock(launchRecord.id);
|
|
418
|
+
ctx.sessionTracker.removeSession(launchRecord.id);
|
|
328
419
|
return null;
|
|
329
420
|
}
|
|
330
|
-
const
|
|
421
|
+
const adapterName = params.adapter || launchRecord?.adapter;
|
|
422
|
+
if (!adapterName)
|
|
423
|
+
throw new Error(`Session not found: ${id}. Specify --adapter to stop a non-daemon session.`);
|
|
424
|
+
const adapter = ctx.adapters[adapterName];
|
|
331
425
|
if (!adapter)
|
|
332
|
-
throw new Error(`Unknown adapter: ${
|
|
333
|
-
|
|
426
|
+
throw new Error(`Unknown adapter: ${adapterName}`);
|
|
427
|
+
const sessionId = launchRecord?.id || id;
|
|
428
|
+
await adapter.stop(sessionId, {
|
|
334
429
|
force: params.force,
|
|
335
430
|
});
|
|
336
431
|
// Remove auto-lock
|
|
337
|
-
ctx.lockManager.autoUnlock(
|
|
338
|
-
// Mark stopped
|
|
339
|
-
const stopped = ctx.sessionTracker.onSessionExit(
|
|
432
|
+
ctx.lockManager.autoUnlock(sessionId);
|
|
433
|
+
// Mark stopped in launch metadata
|
|
434
|
+
const stopped = ctx.sessionTracker.onSessionExit(sessionId);
|
|
340
435
|
if (stopped) {
|
|
341
436
|
ctx.metrics.recordSessionStopped();
|
|
342
437
|
}
|
|
343
438
|
return null;
|
|
344
439
|
}
|
|
345
440
|
case "session.resume": {
|
|
346
|
-
const
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
441
|
+
const id = params.id;
|
|
442
|
+
const launchRecord = ctx.sessionTracker.getSession(id);
|
|
443
|
+
const adapterName = params.adapter || launchRecord?.adapter;
|
|
444
|
+
if (!adapterName)
|
|
445
|
+
throw new Error(`Session not found: ${id}. Specify --adapter to resume a non-daemon session.`);
|
|
446
|
+
const adapter = ctx.adapters[adapterName];
|
|
350
447
|
if (!adapter)
|
|
351
|
-
throw new Error(`Unknown adapter: ${
|
|
352
|
-
await adapter.resume(
|
|
448
|
+
throw new Error(`Unknown adapter: ${adapterName}`);
|
|
449
|
+
await adapter.resume(launchRecord?.id || id, params.message);
|
|
353
450
|
return null;
|
|
354
451
|
}
|
|
355
|
-
// --- Prune command (#40) ---
|
|
452
|
+
// --- Prune command (#40) --- kept for CLI backward compat
|
|
356
453
|
case "session.prune": {
|
|
357
|
-
|
|
358
|
-
|
|
454
|
+
// In the stateless model, there's no session registry to prune.
|
|
455
|
+
// Clean up dead launches (PID liveness check) as a best-effort action.
|
|
456
|
+
const deadIds = ctx.sessionTracker.cleanupDeadLaunches();
|
|
457
|
+
for (const id of deadIds) {
|
|
458
|
+
ctx.lockManager.autoUnlock(id);
|
|
459
|
+
}
|
|
460
|
+
return { pruned: deadIds.length };
|
|
359
461
|
}
|
|
360
462
|
case "lock.list":
|
|
361
463
|
return ctx.lockManager.listAll();
|
|
@@ -388,7 +490,7 @@ function createRequestHandler(ctx) {
|
|
|
388
490
|
return {
|
|
389
491
|
pid: process.pid,
|
|
390
492
|
uptime: Date.now() - startTime,
|
|
391
|
-
sessions: ctx.
|
|
493
|
+
sessions: ctx.metrics.activeSessionCount,
|
|
392
494
|
locks: ctx.lockManager.listAll().length,
|
|
393
495
|
fuses: ctx.fuseEngine.listActive().length,
|
|
394
496
|
};
|
|
@@ -1,61 +1,60 @@
|
|
|
1
|
-
import type { AgentAdapter, AgentSession } from "../core/types.js";
|
|
1
|
+
import type { AgentAdapter, AgentSession, DiscoveredSession } from "../core/types.js";
|
|
2
2
|
import type { SessionRecord, StateManager } from "./state.js";
|
|
3
3
|
export interface SessionTrackerOpts {
|
|
4
4
|
adapters: Record<string, AgentAdapter>;
|
|
5
|
-
pollIntervalMs?: number;
|
|
6
5
|
/** Override PID liveness check for testing (default: process.kill(pid, 0)) */
|
|
7
6
|
isProcessAlive?: (pid: number) => boolean;
|
|
8
7
|
}
|
|
8
|
+
/**
|
|
9
|
+
* Simplified session tracker for the stateless daemon core (ADR 004).
|
|
10
|
+
*
|
|
11
|
+
* Adapters own session truth. The daemon only tracks:
|
|
12
|
+
* - Launch metadata (prompt, group, spec, cwd) for sessions launched via agentctl
|
|
13
|
+
* - Locks and fuses (handled by LockManager / FuseEngine)
|
|
14
|
+
*
|
|
15
|
+
* The old polling loop, pruning, and state-based session registry are removed.
|
|
16
|
+
* session.list now fans out adapter.discover() at call time.
|
|
17
|
+
*/
|
|
9
18
|
export declare class SessionTracker {
|
|
10
19
|
private state;
|
|
11
20
|
private adapters;
|
|
12
|
-
private pollIntervalMs;
|
|
13
|
-
private pollHandle;
|
|
14
|
-
private polling;
|
|
15
21
|
private readonly isProcessAlive;
|
|
22
|
+
private cleanupHandle;
|
|
16
23
|
constructor(state: StateManager, opts: SessionTrackerOpts);
|
|
17
|
-
startPolling(): void;
|
|
18
|
-
/** Run poll() with a guard to skip if the previous cycle is still running */
|
|
19
|
-
private guardedPoll;
|
|
20
|
-
stopPolling(): void;
|
|
21
|
-
private poll;
|
|
22
24
|
/**
|
|
23
|
-
*
|
|
24
|
-
*
|
|
25
|
-
*
|
|
25
|
+
* Start periodic PID liveness check for daemon-launched sessions.
|
|
26
|
+
* This is a lightweight check (no adapter fan-out) that runs every 30s
|
|
27
|
+
* to detect dead sessions and return their IDs for lock cleanup.
|
|
26
28
|
*/
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
* Any session marked as "running" or "idle" whose PID is dead gets
|
|
31
|
-
* immediately marked as "stopped". This prevents unbounded growth of
|
|
32
|
-
* ghost sessions across daemon restarts.
|
|
33
|
-
*/
|
|
34
|
-
validateAllSessions(): void;
|
|
35
|
-
/**
|
|
36
|
-
* Aggressively prune all clearly-dead sessions (#40).
|
|
37
|
-
* Returns the number of sessions pruned.
|
|
38
|
-
* Called via `agentctl prune` command.
|
|
39
|
-
*/
|
|
40
|
-
pruneDeadSessions(): number;
|
|
41
|
-
/**
|
|
42
|
-
* Remove stopped sessions from state that have been stopped for more than 7 days.
|
|
43
|
-
* This reduces overhead from accumulating hundreds of historical sessions.
|
|
44
|
-
*/
|
|
45
|
-
private pruneOldSessions;
|
|
46
|
-
/** Track a newly launched session */
|
|
29
|
+
startLaunchCleanup(onDead?: (sessionId: string) => void): void;
|
|
30
|
+
stopLaunchCleanup(): void;
|
|
31
|
+
/** Track a newly launched session (stores launch metadata in state) */
|
|
47
32
|
track(session: AgentSession, adapterName: string): SessionRecord;
|
|
48
|
-
/** Get session
|
|
33
|
+
/** Get session launch metadata by id (exact or prefix match) */
|
|
49
34
|
getSession(id: string): SessionRecord | undefined;
|
|
50
|
-
/**
|
|
51
|
-
listSessions(opts?: {
|
|
52
|
-
status?: string;
|
|
53
|
-
all?: boolean;
|
|
54
|
-
adapter?: string;
|
|
55
|
-
}): SessionRecord[];
|
|
56
|
-
activeCount(): number;
|
|
57
|
-
/** Remove a session from state entirely (used for ghost cleanup) */
|
|
35
|
+
/** Remove a session from launch metadata */
|
|
58
36
|
removeSession(sessionId: string): void;
|
|
59
|
-
/** Called when a session stops —
|
|
37
|
+
/** Called when a session stops — marks it in launch metadata, returns the record */
|
|
60
38
|
onSessionExit(sessionId: string): SessionRecord | undefined;
|
|
39
|
+
/**
|
|
40
|
+
* Merge adapter-discovered sessions with daemon launch metadata.
|
|
41
|
+
*
|
|
42
|
+
* 1. Enrich discovered sessions with launch metadata (prompt, group, spec, etc.)
|
|
43
|
+
* 2. Reconcile: mark daemon-launched sessions as stopped if their adapter
|
|
44
|
+
* succeeded but didn't return them (and they're past the grace period).
|
|
45
|
+
* 3. Include recently-launched sessions that adapters haven't discovered yet.
|
|
46
|
+
*
|
|
47
|
+
* Returns the merged session list and IDs of sessions that were marked stopped
|
|
48
|
+
* (for lock cleanup by the caller).
|
|
49
|
+
*/
|
|
50
|
+
reconcileAndEnrich(discovered: DiscoveredSession[], succeededAdapters: Set<string>): {
|
|
51
|
+
sessions: SessionRecord[];
|
|
52
|
+
stoppedLaunchIds: string[];
|
|
53
|
+
};
|
|
54
|
+
/**
|
|
55
|
+
* Check PID liveness for daemon-launched sessions.
|
|
56
|
+
* Returns IDs of sessions whose PIDs have died.
|
|
57
|
+
* This is a lightweight check (no adapter fan-out) for lock cleanup.
|
|
58
|
+
*/
|
|
59
|
+
cleanupDeadLaunches(): string[];
|
|
61
60
|
}
|
|
@@ -1,220 +1,53 @@
|
|
|
1
|
-
/**
|
|
2
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Grace period for recently-launched sessions.
|
|
3
|
+
* If a session was launched less than this many ms ago and the adapter
|
|
4
|
+
* doesn't return it yet, don't mark it stopped — the adapter may not
|
|
5
|
+
* have discovered it yet.
|
|
6
|
+
*/
|
|
7
|
+
const LAUNCH_GRACE_PERIOD_MS = 30_000;
|
|
8
|
+
/**
|
|
9
|
+
* Simplified session tracker for the stateless daemon core (ADR 004).
|
|
10
|
+
*
|
|
11
|
+
* Adapters own session truth. The daemon only tracks:
|
|
12
|
+
* - Launch metadata (prompt, group, spec, cwd) for sessions launched via agentctl
|
|
13
|
+
* - Locks and fuses (handled by LockManager / FuseEngine)
|
|
14
|
+
*
|
|
15
|
+
* The old polling loop, pruning, and state-based session registry are removed.
|
|
16
|
+
* session.list now fans out adapter.discover() at call time.
|
|
17
|
+
*/
|
|
3
18
|
export class SessionTracker {
|
|
4
19
|
state;
|
|
5
20
|
adapters;
|
|
6
|
-
pollIntervalMs;
|
|
7
|
-
pollHandle = null;
|
|
8
|
-
polling = false;
|
|
9
21
|
isProcessAlive;
|
|
22
|
+
cleanupHandle = null;
|
|
10
23
|
constructor(state, opts) {
|
|
11
24
|
this.state = state;
|
|
12
25
|
this.adapters = opts.adapters;
|
|
13
|
-
this.pollIntervalMs = opts.pollIntervalMs ?? 5000;
|
|
14
26
|
this.isProcessAlive = opts.isProcessAlive ?? defaultIsProcessAlive;
|
|
15
27
|
}
|
|
16
|
-
startPolling() {
|
|
17
|
-
if (this.pollHandle)
|
|
18
|
-
return;
|
|
19
|
-
// Prune old stopped sessions on startup
|
|
20
|
-
this.pruneOldSessions();
|
|
21
|
-
// Initial poll
|
|
22
|
-
this.guardedPoll();
|
|
23
|
-
this.pollHandle = setInterval(() => {
|
|
24
|
-
this.guardedPoll();
|
|
25
|
-
}, this.pollIntervalMs);
|
|
26
|
-
}
|
|
27
|
-
/** Run poll() with a guard to skip if the previous cycle is still running */
|
|
28
|
-
guardedPoll() {
|
|
29
|
-
if (this.polling)
|
|
30
|
-
return;
|
|
31
|
-
this.polling = true;
|
|
32
|
-
this.poll()
|
|
33
|
-
.catch((err) => console.error("Poll error:", err))
|
|
34
|
-
.finally(() => {
|
|
35
|
-
this.polling = false;
|
|
36
|
-
});
|
|
37
|
-
}
|
|
38
|
-
stopPolling() {
|
|
39
|
-
if (this.pollHandle) {
|
|
40
|
-
clearInterval(this.pollHandle);
|
|
41
|
-
this.pollHandle = null;
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
async poll() {
|
|
45
|
-
// Collect PIDs from all adapter-discovered sessions (the source of truth)
|
|
46
|
-
const adapterPidToId = new Map();
|
|
47
|
-
for (const [adapterName, adapter] of Object.entries(this.adapters)) {
|
|
48
|
-
try {
|
|
49
|
-
// Discover-first: adapter.discover() is the ground truth
|
|
50
|
-
const discovered = await adapter.discover();
|
|
51
|
-
for (const disc of discovered) {
|
|
52
|
-
if (disc.pid) {
|
|
53
|
-
adapterPidToId.set(disc.pid, disc.id);
|
|
54
|
-
}
|
|
55
|
-
const existing = this.state.getSession(disc.id);
|
|
56
|
-
const record = discoveredToRecord(disc, adapterName);
|
|
57
|
-
if (!existing) {
|
|
58
|
-
this.state.setSession(disc.id, record);
|
|
59
|
-
}
|
|
60
|
-
else if (existing.status !== record.status ||
|
|
61
|
-
(!existing.model && record.model)) {
|
|
62
|
-
// Status changed or model resolved — update, preserving metadata
|
|
63
|
-
this.state.setSession(disc.id, {
|
|
64
|
-
...existing,
|
|
65
|
-
status: record.status,
|
|
66
|
-
stoppedAt: record.stoppedAt,
|
|
67
|
-
model: record.model || existing.model,
|
|
68
|
-
tokens: record.tokens,
|
|
69
|
-
cost: record.cost,
|
|
70
|
-
prompt: record.prompt || existing.prompt,
|
|
71
|
-
pid: record.pid,
|
|
72
|
-
});
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
catch {
|
|
77
|
-
// Adapter unavailable — skip
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
// Reap stale entries from daemon state
|
|
81
|
-
this.reapStaleEntries(adapterPidToId);
|
|
82
|
-
}
|
|
83
28
|
/**
|
|
84
|
-
*
|
|
85
|
-
*
|
|
86
|
-
*
|
|
29
|
+
* Start periodic PID liveness check for daemon-launched sessions.
|
|
30
|
+
* This is a lightweight check (no adapter fan-out) that runs every 30s
|
|
31
|
+
* to detect dead sessions and return their IDs for lock cleanup.
|
|
87
32
|
*/
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
if (
|
|
94
|
-
const
|
|
95
|
-
|
|
96
|
-
this.state.removeSession(id);
|
|
97
|
-
continue;
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
// Bug 1: If session is "running"/"idle" but PID is dead, mark stopped
|
|
101
|
-
if ((record.status === "running" || record.status === "idle") &&
|
|
102
|
-
record.pid) {
|
|
103
|
-
// Only reap if the adapter didn't return this session as running
|
|
104
|
-
// (adapter is the source of truth for sessions it knows about)
|
|
105
|
-
const adapterId = adapterPidToId.get(record.pid);
|
|
106
|
-
if (adapterId === id)
|
|
107
|
-
continue; // Adapter confirmed this PID is active
|
|
108
|
-
if (!this.isProcessAlive(record.pid)) {
|
|
109
|
-
this.state.setSession(id, {
|
|
110
|
-
...record,
|
|
111
|
-
status: "stopped",
|
|
112
|
-
stoppedAt: new Date().toISOString(),
|
|
113
|
-
});
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
/**
|
|
119
|
-
* Validate all sessions on daemon startup (#40).
|
|
120
|
-
* Any session marked as "running" or "idle" whose PID is dead gets
|
|
121
|
-
* immediately marked as "stopped". This prevents unbounded growth of
|
|
122
|
-
* ghost sessions across daemon restarts.
|
|
123
|
-
*/
|
|
124
|
-
validateAllSessions() {
|
|
125
|
-
const sessions = this.state.getSessions();
|
|
126
|
-
let cleaned = 0;
|
|
127
|
-
for (const [id, record] of Object.entries(sessions)) {
|
|
128
|
-
if (record.status !== "running" && record.status !== "idle")
|
|
129
|
-
continue;
|
|
130
|
-
if (record.pid) {
|
|
131
|
-
if (!this.isProcessAlive(record.pid)) {
|
|
132
|
-
this.state.setSession(id, {
|
|
133
|
-
...record,
|
|
134
|
-
status: "stopped",
|
|
135
|
-
stoppedAt: new Date().toISOString(),
|
|
136
|
-
});
|
|
137
|
-
cleaned++;
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
else {
|
|
141
|
-
// No PID recorded — can't verify, mark as stopped
|
|
142
|
-
this.state.setSession(id, {
|
|
143
|
-
...record,
|
|
144
|
-
status: "stopped",
|
|
145
|
-
stoppedAt: new Date().toISOString(),
|
|
146
|
-
});
|
|
147
|
-
cleaned++;
|
|
33
|
+
startLaunchCleanup(onDead) {
|
|
34
|
+
if (this.cleanupHandle)
|
|
35
|
+
return;
|
|
36
|
+
this.cleanupHandle = setInterval(() => {
|
|
37
|
+
const dead = this.cleanupDeadLaunches();
|
|
38
|
+
if (onDead) {
|
|
39
|
+
for (const id of dead)
|
|
40
|
+
onDead(id);
|
|
148
41
|
}
|
|
149
|
-
}
|
|
150
|
-
if (cleaned > 0) {
|
|
151
|
-
console.error(`Validated sessions on startup: marked ${cleaned} dead sessions as stopped`);
|
|
152
|
-
}
|
|
42
|
+
}, 30_000);
|
|
153
43
|
}
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
*/
|
|
159
|
-
pruneDeadSessions() {
|
|
160
|
-
const sessions = this.state.getSessions();
|
|
161
|
-
let pruned = 0;
|
|
162
|
-
for (const [id, record] of Object.entries(sessions)) {
|
|
163
|
-
// Remove stopped/completed/failed sessions older than 24h
|
|
164
|
-
if (record.status === "stopped" ||
|
|
165
|
-
record.status === "completed" ||
|
|
166
|
-
record.status === "failed") {
|
|
167
|
-
const stoppedAt = record.stoppedAt
|
|
168
|
-
? new Date(record.stoppedAt).getTime()
|
|
169
|
-
: new Date(record.startedAt).getTime();
|
|
170
|
-
const age = Date.now() - stoppedAt;
|
|
171
|
-
if (age > 24 * 60 * 60 * 1000) {
|
|
172
|
-
this.state.removeSession(id);
|
|
173
|
-
pruned++;
|
|
174
|
-
}
|
|
175
|
-
continue;
|
|
176
|
-
}
|
|
177
|
-
// Remove running/idle sessions whose PID is dead
|
|
178
|
-
if (record.status === "running" || record.status === "idle") {
|
|
179
|
-
if (record.pid && !this.isProcessAlive(record.pid)) {
|
|
180
|
-
this.state.removeSession(id);
|
|
181
|
-
pruned++;
|
|
182
|
-
}
|
|
183
|
-
else if (!record.pid) {
|
|
184
|
-
this.state.removeSession(id);
|
|
185
|
-
pruned++;
|
|
186
|
-
}
|
|
187
|
-
}
|
|
44
|
+
stopLaunchCleanup() {
|
|
45
|
+
if (this.cleanupHandle) {
|
|
46
|
+
clearInterval(this.cleanupHandle);
|
|
47
|
+
this.cleanupHandle = null;
|
|
188
48
|
}
|
|
189
|
-
return pruned;
|
|
190
49
|
}
|
|
191
|
-
/**
|
|
192
|
-
* Remove stopped sessions from state that have been stopped for more than 7 days.
|
|
193
|
-
* This reduces overhead from accumulating hundreds of historical sessions.
|
|
194
|
-
*/
|
|
195
|
-
pruneOldSessions() {
|
|
196
|
-
const sessions = this.state.getSessions();
|
|
197
|
-
const now = Date.now();
|
|
198
|
-
let pruned = 0;
|
|
199
|
-
for (const [id, record] of Object.entries(sessions)) {
|
|
200
|
-
if (record.status !== "stopped" &&
|
|
201
|
-
record.status !== "completed" &&
|
|
202
|
-
record.status !== "failed") {
|
|
203
|
-
continue;
|
|
204
|
-
}
|
|
205
|
-
const stoppedAt = record.stoppedAt
|
|
206
|
-
? new Date(record.stoppedAt).getTime()
|
|
207
|
-
: new Date(record.startedAt).getTime();
|
|
208
|
-
if (now - stoppedAt > STOPPED_SESSION_PRUNE_AGE_MS) {
|
|
209
|
-
this.state.removeSession(id);
|
|
210
|
-
pruned++;
|
|
211
|
-
}
|
|
212
|
-
}
|
|
213
|
-
if (pruned > 0) {
|
|
214
|
-
console.error(`Pruned ${pruned} sessions stopped >7 days ago from state`);
|
|
215
|
-
}
|
|
216
|
-
}
|
|
217
|
-
/** Track a newly launched session */
|
|
50
|
+
/** Track a newly launched session (stores launch metadata in state) */
|
|
218
51
|
track(session, adapterName) {
|
|
219
52
|
const record = sessionToRecord(session, adapterName);
|
|
220
53
|
// Pending→UUID reconciliation: if this is a real session (not pending),
|
|
@@ -229,7 +62,7 @@ export class SessionTracker {
|
|
|
229
62
|
this.state.setSession(session.id, record);
|
|
230
63
|
return record;
|
|
231
64
|
}
|
|
232
|
-
/** Get session
|
|
65
|
+
/** Get session launch metadata by id (exact or prefix match) */
|
|
233
66
|
getSession(id) {
|
|
234
67
|
// Exact match
|
|
235
68
|
const exact = this.state.getSession(id);
|
|
@@ -242,48 +75,11 @@ export class SessionTracker {
|
|
|
242
75
|
return matches[0][1];
|
|
243
76
|
return undefined;
|
|
244
77
|
}
|
|
245
|
-
/**
|
|
246
|
-
listSessions(opts) {
|
|
247
|
-
const sessions = Object.values(this.state.getSessions());
|
|
248
|
-
// Liveness check: mark sessions with dead PIDs as stopped
|
|
249
|
-
for (const s of sessions) {
|
|
250
|
-
if ((s.status === "running" || s.status === "idle") && s.pid) {
|
|
251
|
-
if (!this.isProcessAlive(s.pid)) {
|
|
252
|
-
s.status = "stopped";
|
|
253
|
-
s.stoppedAt = new Date().toISOString();
|
|
254
|
-
this.state.setSession(s.id, s);
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
}
|
|
258
|
-
let filtered = sessions;
|
|
259
|
-
if (opts?.adapter) {
|
|
260
|
-
filtered = filtered.filter((s) => s.adapter === opts.adapter);
|
|
261
|
-
}
|
|
262
|
-
if (opts?.status) {
|
|
263
|
-
filtered = filtered.filter((s) => s.status === opts.status);
|
|
264
|
-
}
|
|
265
|
-
else if (!opts?.all) {
|
|
266
|
-
filtered = filtered.filter((s) => s.status === "running" || s.status === "idle");
|
|
267
|
-
}
|
|
268
|
-
// Dedup: if a pending-* entry shares a PID with a resolved entry, show only the resolved one
|
|
269
|
-
filtered = deduplicatePendingSessions(filtered);
|
|
270
|
-
return filtered.sort((a, b) => {
|
|
271
|
-
// Running first, then by recency
|
|
272
|
-
if (a.status === "running" && b.status !== "running")
|
|
273
|
-
return -1;
|
|
274
|
-
if (b.status === "running" && a.status !== "running")
|
|
275
|
-
return 1;
|
|
276
|
-
return new Date(b.startedAt).getTime() - new Date(a.startedAt).getTime();
|
|
277
|
-
});
|
|
278
|
-
}
|
|
279
|
-
activeCount() {
|
|
280
|
-
return Object.values(this.state.getSessions()).filter((s) => s.status === "running" || s.status === "idle").length;
|
|
281
|
-
}
|
|
282
|
-
/** Remove a session from state entirely (used for ghost cleanup) */
|
|
78
|
+
/** Remove a session from launch metadata */
|
|
283
79
|
removeSession(sessionId) {
|
|
284
80
|
this.state.removeSession(sessionId);
|
|
285
81
|
}
|
|
286
|
-
/** Called when a session stops —
|
|
82
|
+
/** Called when a session stops — marks it in launch metadata, returns the record */
|
|
287
83
|
onSessionExit(sessionId) {
|
|
288
84
|
const session = this.state.getSession(sessionId);
|
|
289
85
|
if (session) {
|
|
@@ -293,6 +89,91 @@ export class SessionTracker {
|
|
|
293
89
|
}
|
|
294
90
|
return session;
|
|
295
91
|
}
|
|
92
|
+
/**
|
|
93
|
+
* Merge adapter-discovered sessions with daemon launch metadata.
|
|
94
|
+
*
|
|
95
|
+
* 1. Enrich discovered sessions with launch metadata (prompt, group, spec, etc.)
|
|
96
|
+
* 2. Reconcile: mark daemon-launched sessions as stopped if their adapter
|
|
97
|
+
* succeeded but didn't return them (and they're past the grace period).
|
|
98
|
+
* 3. Include recently-launched sessions that adapters haven't discovered yet.
|
|
99
|
+
*
|
|
100
|
+
* Returns the merged session list and IDs of sessions that were marked stopped
|
|
101
|
+
* (for lock cleanup by the caller).
|
|
102
|
+
*/
|
|
103
|
+
reconcileAndEnrich(discovered, succeededAdapters) {
|
|
104
|
+
// Build lookups for discovered sessions
|
|
105
|
+
const discoveredIds = new Set(discovered.map((d) => d.id));
|
|
106
|
+
const discoveredPids = new Map();
|
|
107
|
+
for (const d of discovered) {
|
|
108
|
+
if (d.pid)
|
|
109
|
+
discoveredPids.set(d.pid, d.id);
|
|
110
|
+
}
|
|
111
|
+
// 1. Convert discovered sessions to records, enriching with launch metadata
|
|
112
|
+
const sessions = discovered.map((disc) => enrichDiscovered(disc, this.state.getSession(disc.id)));
|
|
113
|
+
// 2. Reconcile daemon-launched sessions that disappeared from adapter results
|
|
114
|
+
const stoppedLaunchIds = [];
|
|
115
|
+
const now = Date.now();
|
|
116
|
+
for (const [id, record] of Object.entries(this.state.getSessions())) {
|
|
117
|
+
if (record.status !== "running" &&
|
|
118
|
+
record.status !== "idle" &&
|
|
119
|
+
record.status !== "pending")
|
|
120
|
+
continue;
|
|
121
|
+
// If adapter for this session didn't succeed, include as-is from launch metadata
|
|
122
|
+
// (we can't verify status, so trust the last-known state)
|
|
123
|
+
if (!succeededAdapters.has(record.adapter)) {
|
|
124
|
+
sessions.push(record);
|
|
125
|
+
continue;
|
|
126
|
+
}
|
|
127
|
+
// Skip if adapter returned this session (it's still active)
|
|
128
|
+
if (discoveredIds.has(id))
|
|
129
|
+
continue;
|
|
130
|
+
// Check if this session's PID was resolved to a different ID (pending→UUID)
|
|
131
|
+
if (record.pid && discoveredPids.has(record.pid)) {
|
|
132
|
+
// PID was resolved to a real session — remove stale launch entry
|
|
133
|
+
this.state.removeSession(id);
|
|
134
|
+
stoppedLaunchIds.push(id);
|
|
135
|
+
continue;
|
|
136
|
+
}
|
|
137
|
+
// Grace period: don't mark recently-launched sessions as stopped
|
|
138
|
+
const launchAge = now - new Date(record.startedAt).getTime();
|
|
139
|
+
if (launchAge < LAUNCH_GRACE_PERIOD_MS) {
|
|
140
|
+
// Still within grace period — include as-is in results
|
|
141
|
+
sessions.push(record);
|
|
142
|
+
continue;
|
|
143
|
+
}
|
|
144
|
+
// Session disappeared from adapter results — mark stopped
|
|
145
|
+
this.state.setSession(id, {
|
|
146
|
+
...record,
|
|
147
|
+
status: "stopped",
|
|
148
|
+
stoppedAt: new Date().toISOString(),
|
|
149
|
+
});
|
|
150
|
+
stoppedLaunchIds.push(id);
|
|
151
|
+
}
|
|
152
|
+
return { sessions, stoppedLaunchIds };
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Check PID liveness for daemon-launched sessions.
|
|
156
|
+
* Returns IDs of sessions whose PIDs have died.
|
|
157
|
+
* This is a lightweight check (no adapter fan-out) for lock cleanup.
|
|
158
|
+
*/
|
|
159
|
+
cleanupDeadLaunches() {
|
|
160
|
+
const dead = [];
|
|
161
|
+
for (const [id, record] of Object.entries(this.state.getSessions())) {
|
|
162
|
+
if (record.status !== "running" &&
|
|
163
|
+
record.status !== "idle" &&
|
|
164
|
+
record.status !== "pending")
|
|
165
|
+
continue;
|
|
166
|
+
if (record.pid && !this.isProcessAlive(record.pid)) {
|
|
167
|
+
this.state.setSession(id, {
|
|
168
|
+
...record,
|
|
169
|
+
status: "stopped",
|
|
170
|
+
stoppedAt: new Date().toISOString(),
|
|
171
|
+
});
|
|
172
|
+
dead.push(id);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
return dead;
|
|
176
|
+
}
|
|
296
177
|
}
|
|
297
178
|
/** Check if a process is alive via kill(pid, 0) signal check */
|
|
298
179
|
function defaultIsProcessAlive(pid) {
|
|
@@ -305,22 +186,25 @@ function defaultIsProcessAlive(pid) {
|
|
|
305
186
|
}
|
|
306
187
|
}
|
|
307
188
|
/**
|
|
308
|
-
*
|
|
309
|
-
* This is a safety net for list output — the poll() reaper handles cleanup in state.
|
|
189
|
+
* Convert a discovered session to a SessionRecord, enriching with launch metadata.
|
|
310
190
|
*/
|
|
311
|
-
function
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
191
|
+
function enrichDiscovered(disc, launchMeta) {
|
|
192
|
+
return {
|
|
193
|
+
id: disc.id,
|
|
194
|
+
adapter: disc.adapter,
|
|
195
|
+
status: disc.status,
|
|
196
|
+
startedAt: disc.startedAt?.toISOString() ?? new Date().toISOString(),
|
|
197
|
+
stoppedAt: disc.stoppedAt?.toISOString(),
|
|
198
|
+
cwd: disc.cwd ?? launchMeta?.cwd,
|
|
199
|
+
model: disc.model ?? launchMeta?.model,
|
|
200
|
+
prompt: disc.prompt ?? launchMeta?.prompt,
|
|
201
|
+
tokens: disc.tokens,
|
|
202
|
+
cost: disc.cost,
|
|
203
|
+
pid: disc.pid,
|
|
204
|
+
spec: launchMeta?.spec,
|
|
205
|
+
group: launchMeta?.group,
|
|
206
|
+
meta: disc.nativeMetadata ?? launchMeta?.meta ?? {},
|
|
207
|
+
};
|
|
324
208
|
}
|
|
325
209
|
function sessionToRecord(session, adapterName) {
|
|
326
210
|
return {
|
|
@@ -340,20 +224,3 @@ function sessionToRecord(session, adapterName) {
|
|
|
340
224
|
meta: session.meta,
|
|
341
225
|
};
|
|
342
226
|
}
|
|
343
|
-
/** Convert a DiscoveredSession (adapter ground truth) to a SessionRecord for state */
|
|
344
|
-
function discoveredToRecord(disc, adapterName) {
|
|
345
|
-
return {
|
|
346
|
-
id: disc.id,
|
|
347
|
-
adapter: adapterName,
|
|
348
|
-
status: disc.status,
|
|
349
|
-
startedAt: disc.startedAt?.toISOString() ?? new Date().toISOString(),
|
|
350
|
-
stoppedAt: disc.stoppedAt?.toISOString(),
|
|
351
|
-
cwd: disc.cwd,
|
|
352
|
-
model: disc.model,
|
|
353
|
-
prompt: disc.prompt,
|
|
354
|
-
tokens: disc.tokens,
|
|
355
|
-
cost: disc.cost,
|
|
356
|
-
pid: disc.pid,
|
|
357
|
-
meta: disc.nativeMetadata ?? {},
|
|
358
|
-
};
|
|
359
|
-
}
|