aegis-bridge 2.2.5 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth.d.ts +2 -0
- package/dist/auth.js +10 -0
- package/dist/channels/types.d.ts +1 -1
- package/dist/events.d.ts +2 -0
- package/dist/events.js +9 -0
- package/dist/mcp-server.js +2 -1
- package/dist/monitor.d.ts +9 -0
- package/dist/monitor.js +72 -3
- package/dist/server.js +44 -6
- package/dist/session.d.ts +11 -1
- package/dist/session.js +82 -7
- package/dist/tmux.d.ts +9 -0
- package/dist/tmux.js +26 -0
- package/package.json +1 -1
package/dist/auth.d.ts
CHANGED
|
@@ -53,6 +53,8 @@ export declare class AuthManager {
|
|
|
53
53
|
};
|
|
54
54
|
/** Hash a key with SHA-256. */
|
|
55
55
|
static hashKey(key: string): string;
|
|
56
|
+
/** #398: Sweep stale rate limit buckets. Prune entries with expired windows. */
|
|
57
|
+
sweepStaleRateLimits(): void;
|
|
56
58
|
/** Check if auth is enabled (master token or any keys). */
|
|
57
59
|
get authEnabled(): boolean;
|
|
58
60
|
/**
|
package/dist/auth.js
CHANGED
|
@@ -127,6 +127,16 @@ export class AuthManager {
|
|
|
127
127
|
static hashKey(key) {
|
|
128
128
|
return createHash('sha256').update(key).digest('hex');
|
|
129
129
|
}
|
|
130
|
+
/** #398: Sweep stale rate limit buckets. Prune entries with expired windows. */
|
|
131
|
+
sweepStaleRateLimits() {
|
|
132
|
+
const now = Date.now();
|
|
133
|
+
const windowMs = 60_000; // 1 minute
|
|
134
|
+
for (const [keyId, bucket] of this.rateLimits) {
|
|
135
|
+
if (now - bucket.windowStart > windowMs) {
|
|
136
|
+
this.rateLimits.delete(keyId);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
130
140
|
/** Check if auth is enabled (master token or any keys). */
|
|
131
141
|
get authEnabled() {
|
|
132
142
|
return !!this.masterToken || this.store.keys.length > 0;
|
package/dist/channels/types.d.ts
CHANGED
|
@@ -14,7 +14,7 @@ export interface ChannelHealthStatus {
|
|
|
14
14
|
pendingCount: number;
|
|
15
15
|
}
|
|
16
16
|
/** Events a channel can subscribe to. */
|
|
17
|
-
export type SessionEvent = 'session.created' | 'session.ended' | 'message.user' | 'message.assistant' | 'message.thinking' | 'message.tool_use' | 'message.tool_result' | 'status.idle' | 'status.working' | 'status.permission' | 'status.question' | 'status.plan' | 'status.stall' | 'status.dead' | 'status.stopped' | 'status.error' | 'status.rate_limited' | 'status.permission_timeout' | 'swarm.teammate_spawned' | 'swarm.teammate_finished';
|
|
17
|
+
export type SessionEvent = 'session.created' | 'session.ended' | 'message.user' | 'message.assistant' | 'message.thinking' | 'message.tool_use' | 'message.tool_result' | 'status.idle' | 'status.working' | 'status.permission' | 'status.question' | 'status.plan' | 'status.stall' | 'status.dead' | 'status.stopped' | 'status.error' | 'status.rate_limited' | 'status.permission_timeout' | 'status.recovered' | 'swarm.teammate_spawned' | 'swarm.teammate_finished';
|
|
18
18
|
/** Payload for all session events. */
|
|
19
19
|
export interface SessionEventPayload {
|
|
20
20
|
event: SessionEvent;
|
package/dist/events.d.ts
CHANGED
|
@@ -81,6 +81,8 @@ export declare class SessionEventBus {
|
|
|
81
81
|
id: number;
|
|
82
82
|
event: GlobalSSEEvent;
|
|
83
83
|
}>;
|
|
84
|
+
/** #398: Clean up per-session state (call when session is killed). */
|
|
85
|
+
cleanupSession(sessionId: string): void;
|
|
84
86
|
/** Clean up all emitters. */
|
|
85
87
|
destroy(): void;
|
|
86
88
|
}
|
package/dist/events.js
CHANGED
|
@@ -243,6 +243,15 @@ export class SessionEventBus {
|
|
|
243
243
|
getGlobalEventsSince(lastEventId) {
|
|
244
244
|
return this.globalEventBuffer.filter(e => e.id > lastEventId);
|
|
245
245
|
}
|
|
246
|
+
/** #398: Clean up per-session state (call when session is killed). */
|
|
247
|
+
cleanupSession(sessionId) {
|
|
248
|
+
this.eventBuffers.delete(sessionId);
|
|
249
|
+
const emitter = this.emitters.get(sessionId);
|
|
250
|
+
if (emitter) {
|
|
251
|
+
emitter.removeAllListeners();
|
|
252
|
+
this.emitters.delete(sessionId);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
246
255
|
/** Clean up all emitters. */
|
|
247
256
|
destroy() {
|
|
248
257
|
for (const emitter of this.emitters.values()) {
|
package/dist/mcp-server.js
CHANGED
|
@@ -36,8 +36,9 @@ export class AegisClient {
|
|
|
36
36
|
}
|
|
37
37
|
}
|
|
38
38
|
async request(path, opts) {
|
|
39
|
+
const hasBody = opts?.body !== undefined;
|
|
39
40
|
const headers = {
|
|
40
|
-
'Content-Type': 'application/json',
|
|
41
|
+
...(hasBody ? { 'Content-Type': 'application/json' } : {}),
|
|
41
42
|
...(this.authToken ? { Authorization: `Bearer ${this.authToken}` } : {}),
|
|
42
43
|
};
|
|
43
44
|
let res;
|
package/dist/monitor.d.ts
CHANGED
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
* 3. Routes events to the ChannelManager (which fans out to Telegram, webhooks, etc.)
|
|
8
8
|
*/
|
|
9
9
|
import { type SessionManager } from './session.js';
|
|
10
|
+
import { type TmuxManager } from './tmux.js';
|
|
10
11
|
import { type ChannelManager } from './channels/index.js';
|
|
11
12
|
import { type SessionEventBus } from './events.js';
|
|
12
13
|
import { type JsonlWatcher } from './jsonl-watcher.js';
|
|
@@ -40,6 +41,9 @@ export declare class SessionMonitor {
|
|
|
40
41
|
private deadNotified;
|
|
41
42
|
private prevStatusForStall;
|
|
42
43
|
private rateLimitedSessions;
|
|
44
|
+
private tmuxWasDown;
|
|
45
|
+
private lastTmuxHealthCheck;
|
|
46
|
+
private static readonly TMUX_HEALTH_CHECK_INTERVAL_MS;
|
|
43
47
|
/** Issue #89 L4: Debounce status change broadcasts per session.
|
|
44
48
|
* If multiple status changes happen within 500ms, only emit the last one.
|
|
45
49
|
* Prevents rapid-fire notifications during state transitions. */
|
|
@@ -51,6 +55,9 @@ export declare class SessionMonitor {
|
|
|
51
55
|
constructor(sessions: SessionManager, channels: ChannelManager, config?: MonitorConfig);
|
|
52
56
|
/** Issue #32: Set the event bus for SSE streaming. */
|
|
53
57
|
setEventBus(bus: SessionEventBus): void;
|
|
58
|
+
/** Issue #397: Set the TmuxManager reference for tmux health checks. */
|
|
59
|
+
private tmux?;
|
|
60
|
+
setTmuxManager(tmuxManager: TmuxManager): void;
|
|
54
61
|
/** Issue #84: Set the JSONL watcher for fs.watch-based message detection. */
|
|
55
62
|
setJsonlWatcher(watcher: JsonlWatcher): void;
|
|
56
63
|
start(): void;
|
|
@@ -79,6 +86,8 @@ export declare class SessionMonitor {
|
|
|
79
86
|
private makePayload;
|
|
80
87
|
/** Check for dead tmux windows and notify via channels. */
|
|
81
88
|
private checkDeadSessions;
|
|
89
|
+
/** Issue #397: Check tmux server health. Detect crashes and trigger reconciliation. */
|
|
90
|
+
private checkTmuxHealth;
|
|
82
91
|
/** Clean up tracking for a killed session. */
|
|
83
92
|
removeSession(sessionId: string): void;
|
|
84
93
|
}
|
package/dist/monitor.js
CHANGED
|
@@ -43,6 +43,10 @@ export class SessionMonitor {
|
|
|
43
43
|
deadNotified = new Set(); // don't spam dead session events
|
|
44
44
|
prevStatusForStall = new Map(); // track previous status for stall transition detection
|
|
45
45
|
rateLimitedSessions = new Set(); // sessions in rate-limit backoff
|
|
46
|
+
// Issue #397: Track tmux server health for crash recovery
|
|
47
|
+
tmuxWasDown = false;
|
|
48
|
+
lastTmuxHealthCheck = 0;
|
|
49
|
+
static TMUX_HEALTH_CHECK_INTERVAL_MS = 10_000; // check every 10s
|
|
46
50
|
/** Issue #89 L4: Debounce status change broadcasts per session.
|
|
47
51
|
* If multiple status changes happen within 500ms, only emit the last one.
|
|
48
52
|
* Prevents rapid-fire notifications during state transitions. */
|
|
@@ -61,6 +65,11 @@ export class SessionMonitor {
|
|
|
61
65
|
setEventBus(bus) {
|
|
62
66
|
this.eventBus = bus;
|
|
63
67
|
}
|
|
68
|
+
/** Issue #397: Set the TmuxManager reference for tmux health checks. */
|
|
69
|
+
tmux;
|
|
70
|
+
setTmuxManager(tmuxManager) {
|
|
71
|
+
this.tmux = tmuxManager;
|
|
72
|
+
}
|
|
64
73
|
/** Issue #84: Set the JSONL watcher for fs.watch-based message detection. */
|
|
65
74
|
setJsonlWatcher(watcher) {
|
|
66
75
|
this.jsonlWatcher = watcher;
|
|
@@ -130,6 +139,11 @@ export class SessionMonitor {
|
|
|
130
139
|
this.lastDeadCheck = now;
|
|
131
140
|
await this.checkDeadSessions();
|
|
132
141
|
}
|
|
142
|
+
// Issue #397: Tmux server health check (every 10s)
|
|
143
|
+
if (now - this.lastTmuxHealthCheck >= SessionMonitor.TMUX_HEALTH_CHECK_INTERVAL_MS) {
|
|
144
|
+
this.lastTmuxHealthCheck = now;
|
|
145
|
+
await this.checkTmuxHealth();
|
|
146
|
+
}
|
|
133
147
|
}
|
|
134
148
|
/** Smart stall detection: multiple stall types with graduated thresholds.
|
|
135
149
|
*
|
|
@@ -256,6 +270,23 @@ export class SessionMonitor {
|
|
|
256
270
|
}
|
|
257
271
|
}
|
|
258
272
|
}
|
|
273
|
+
// --- Type 5: Extended working stall (working too long regardless of byte changes, ---
|
|
274
|
+
// Catches CC stuck in "Misting" state where internal loop detection
|
|
275
|
+
if (currentStatus === 'working') {
|
|
276
|
+
const entry = this.stateSince.get(session.id);
|
|
277
|
+
if (entry && entry.state === 'working') {
|
|
278
|
+
const workingDuration = now - entry.since;
|
|
279
|
+
const maxWorkingMs = this.config.stallThresholdMs * 3; // 15 min default
|
|
280
|
+
if (workingDuration >= maxWorkingMs && !this.stallNotified.has(`${session.id}:stall:extended_working`)) {
|
|
281
|
+
this.stallNotified.add(`${session.id}:stall:extended_working`);
|
|
282
|
+
const minutes = Math.round(workingDuration / 60000);
|
|
283
|
+
const detail = `Session stalled: in "working" state for ${minutes}min. ` +
|
|
284
|
+
`CC may be stuck in an internal loop (e.g., Misting). Consider: POST /v1/sessions/${session.id}/interrupt or /kill`;
|
|
285
|
+
this.eventBus?.emitStall(session.id, 'extended_working', detail);
|
|
286
|
+
await this.channels.statusChange(this.makePayload('status.stall', session, detail));
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
}
|
|
259
290
|
// Clean up stall notifications on state transitions (using prevStallStatus)
|
|
260
291
|
if (prevStallStatus && prevStallStatus !== currentStatus) {
|
|
261
292
|
const exitedPermission = prevStallStatus === 'permission_prompt' || prevStallStatus === 'bash_approval';
|
|
@@ -361,12 +392,20 @@ export class SessionMonitor {
|
|
|
361
392
|
// Update last activity
|
|
362
393
|
session.lastActivity = Date.now();
|
|
363
394
|
}
|
|
364
|
-
// Update JSONL stall tracking —
|
|
395
|
+
// Update JSONL stall tracking — only reset stall timer when real messages arrive
|
|
396
|
+
// When no messages, only update bytes tracking (keep timestamp)
|
|
365
397
|
const now = Date.now();
|
|
366
398
|
const prev = this.lastBytesSeen.get(event.sessionId);
|
|
367
399
|
if (event.newOffset > (prev?.bytes ?? -1)) {
|
|
368
|
-
|
|
369
|
-
|
|
400
|
+
if (event.messages.length > 0) {
|
|
401
|
+
// Real output — reset stall timer
|
|
402
|
+
this.lastBytesSeen.set(event.sessionId, { bytes: event.newOffset, at: now });
|
|
403
|
+
this.stallNotified.delete(`${event.sessionId}:stall:jsonl`);
|
|
404
|
+
}
|
|
405
|
+
else {
|
|
406
|
+
// File grew but no messages — only update bytes, keep timestamp
|
|
407
|
+
this.lastBytesSeen.set(event.sessionId, { bytes: event.newOffset, at: prev?.at ?? now });
|
|
408
|
+
}
|
|
370
409
|
}
|
|
371
410
|
}
|
|
372
411
|
async checkSession(session) {
|
|
@@ -407,6 +446,9 @@ export class SessionMonitor {
|
|
|
407
446
|
const latestResult = { statusText: result.statusText, interactiveContent: result.interactiveContent };
|
|
408
447
|
this.statusChangeDebounce.set(session.id, setTimeout(() => {
|
|
409
448
|
this.statusChangeDebounce.delete(session.id);
|
|
449
|
+
// #511: Skip broadcast if session was killed while debounce was pending
|
|
450
|
+
if (!this.lastStatus.has(session.id))
|
|
451
|
+
return;
|
|
410
452
|
void this.broadcastStatusChange(session, latestStatus, latestPrevStatus, latestResult)
|
|
411
453
|
.catch(e => console.error(`Monitor: broadcastStatusChange failed for ${session.id}:`, e));
|
|
412
454
|
}, STATUS_CHANGE_DEBOUNCE_MS));
|
|
@@ -519,6 +561,33 @@ export class SessionMonitor {
|
|
|
519
561
|
}
|
|
520
562
|
}
|
|
521
563
|
}
|
|
564
|
+
/** Issue #397: Check tmux server health. Detect crashes and trigger reconciliation. */
|
|
565
|
+
async checkTmuxHealth() {
|
|
566
|
+
if (!this.tmux)
|
|
567
|
+
return;
|
|
568
|
+
const { healthy } = await this.tmux.isServerHealthy();
|
|
569
|
+
if (!healthy) {
|
|
570
|
+
if (!this.tmuxWasDown) {
|
|
571
|
+
console.warn('Monitor: tmux server is unreachable — sessions may be orphaned');
|
|
572
|
+
this.tmuxWasDown = true;
|
|
573
|
+
}
|
|
574
|
+
return;
|
|
575
|
+
}
|
|
576
|
+
// Tmux is healthy now
|
|
577
|
+
if (this.tmuxWasDown) {
|
|
578
|
+
console.log('Monitor: tmux server recovered — triggering crash reconciliation');
|
|
579
|
+
this.tmuxWasDown = false;
|
|
580
|
+
// Trigger crash reconciliation to re-attach or mark orphaned sessions
|
|
581
|
+
const result = await this.sessions.reconcileTmuxCrash();
|
|
582
|
+
if (result.recovered > 0 || result.orphaned > 0) {
|
|
583
|
+
console.log(`Monitor: crash reconciliation complete — recovered: ${result.recovered}, orphaned: ${result.orphaned}`);
|
|
584
|
+
// Notify channels about recovery
|
|
585
|
+
for (const session of this.sessions.listSessions()) {
|
|
586
|
+
await this.channels.statusChange(this.makePayload('status.recovered', session, `tmux server recovered. Session ${session.windowName} re-attached.`));
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
}
|
|
522
591
|
/** Clean up tracking for a killed session. */
|
|
523
592
|
removeSession(sessionId) {
|
|
524
593
|
// Issue #84: Stop watching JSONL file for this session
|
package/dist/server.js
CHANGED
|
@@ -230,16 +230,19 @@ const createSessionSchema = z.object({
|
|
|
230
230
|
permissionMode: z.enum(['default', 'bypassPermissions', 'plan']).optional(),
|
|
231
231
|
autoApprove: z.boolean().optional(),
|
|
232
232
|
}).strict();
|
|
233
|
-
// Health
|
|
233
|
+
// Health — Issue #397: includes tmux server health check
|
|
234
234
|
app.get('/v1/health', async () => {
|
|
235
235
|
const pkg = await import('../package.json', { with: { type: 'json' } });
|
|
236
236
|
const activeCount = sessions.listSessions().length;
|
|
237
237
|
const totalCount = metrics.getTotalSessionsCreated();
|
|
238
|
+
const tmuxHealth = await tmux.isServerHealthy();
|
|
239
|
+
const status = tmuxHealth.healthy ? 'ok' : 'degraded';
|
|
238
240
|
return {
|
|
239
|
-
status
|
|
241
|
+
status,
|
|
240
242
|
version: pkg.default.version,
|
|
241
243
|
uptime: process.uptime(),
|
|
242
244
|
sessions: { active: activeCount, total: totalCount },
|
|
245
|
+
tmux: tmuxHealth,
|
|
243
246
|
timestamp: new Date().toISOString(),
|
|
244
247
|
};
|
|
245
248
|
});
|
|
@@ -248,11 +251,14 @@ app.get('/health', async () => {
|
|
|
248
251
|
const pkg = await import('../package.json', { with: { type: 'json' } });
|
|
249
252
|
const activeCount = sessions.listSessions().length;
|
|
250
253
|
const totalCount = metrics.getTotalSessionsCreated();
|
|
254
|
+
const tmuxHealth = await tmux.isServerHealthy();
|
|
255
|
+
const status = tmuxHealth.healthy ? 'ok' : 'degraded';
|
|
251
256
|
return {
|
|
252
|
-
status
|
|
257
|
+
status,
|
|
253
258
|
version: pkg.default.version,
|
|
254
259
|
uptime: process.uptime(),
|
|
255
260
|
sessions: { active: activeCount, total: totalCount },
|
|
261
|
+
tmux: tmuxHealth,
|
|
256
262
|
timestamp: new Date().toISOString(),
|
|
257
263
|
};
|
|
258
264
|
});
|
|
@@ -517,13 +523,13 @@ app.get('/v1/sessions/:id', async (req, reply) => {
|
|
|
517
523
|
const session = sessions.getSession(req.params.id);
|
|
518
524
|
if (!session)
|
|
519
525
|
return reply.status(404).send({ error: 'Session not found' });
|
|
520
|
-
return addActionHints(session);
|
|
526
|
+
return addActionHints(session, sessions);
|
|
521
527
|
});
|
|
522
528
|
app.get('/sessions/:id', async (req, reply) => {
|
|
523
529
|
const session = sessions.getSession(req.params.id);
|
|
524
530
|
if (!session)
|
|
525
531
|
return reply.status(404).send({ error: 'Session not found' });
|
|
526
|
-
return addActionHints(session);
|
|
532
|
+
return addActionHints(session, sessions);
|
|
527
533
|
});
|
|
528
534
|
// #128: Bulk health check — returns health for all sessions in one request
|
|
529
535
|
app.get('/v1/sessions/health', async () => {
|
|
@@ -1110,6 +1116,7 @@ async function reapStaleSessions(maxAgeMs) {
|
|
|
1110
1116
|
session: { id: session.id, name: session.windowName, workDir: session.workDir },
|
|
1111
1117
|
detail: `Auto-killed: exceeded ${maxAgeMs / 3600000}h time limit`,
|
|
1112
1118
|
});
|
|
1119
|
+
eventBus.cleanupSession(session.id);
|
|
1113
1120
|
await sessions.killSession(session.id);
|
|
1114
1121
|
monitor.removeSession(session.id);
|
|
1115
1122
|
metrics.cleanupSession(session.id);
|
|
@@ -1139,6 +1146,7 @@ async function reapZombieSessions() {
|
|
|
1139
1146
|
console.log(`Reaper: removing zombie session ${session.windowName} (${session.id.slice(0, 8)})`);
|
|
1140
1147
|
try {
|
|
1141
1148
|
monitor.removeSession(session.id);
|
|
1149
|
+
eventBus.cleanupSession(session.id);
|
|
1142
1150
|
await sessions.killSession(session.id);
|
|
1143
1151
|
metrics.cleanupSession(session.id);
|
|
1144
1152
|
await channels.sessionEnded({
|
|
@@ -1155,7 +1163,7 @@ async function reapZombieSessions() {
|
|
|
1155
1163
|
}
|
|
1156
1164
|
// ── Helpers ──────────────────────────────────────────────────────────
|
|
1157
1165
|
/** Issue #20: Add actionHints to session response for interactive states. */
|
|
1158
|
-
function addActionHints(session) {
|
|
1166
|
+
function addActionHints(session, sessions) {
|
|
1159
1167
|
// #357: Convert Set to array for JSON serialization
|
|
1160
1168
|
const result = {
|
|
1161
1169
|
...session,
|
|
@@ -1167,8 +1175,33 @@ function addActionHints(session) {
|
|
|
1167
1175
|
reject: { method: 'POST', url: `/v1/sessions/${session.id}/reject`, description: 'Reject the pending permission' },
|
|
1168
1176
|
};
|
|
1169
1177
|
}
|
|
1178
|
+
// #599: Expose pending question data for MCP/REST callers
|
|
1179
|
+
if (session.status === 'ask_question' && sessions) {
|
|
1180
|
+
const info = sessions.getPendingQuestionInfo(session.id);
|
|
1181
|
+
if (info) {
|
|
1182
|
+
result.pendingQuestion = {
|
|
1183
|
+
toolUseId: info.toolUseId,
|
|
1184
|
+
content: info.question,
|
|
1185
|
+
options: extractQuestionOptions(info.question),
|
|
1186
|
+
since: info.timestamp,
|
|
1187
|
+
};
|
|
1188
|
+
}
|
|
1189
|
+
}
|
|
1170
1190
|
return result;
|
|
1171
1191
|
}
|
|
1192
|
+
/** #599: Extract selectable options from AskUserQuestion text. */
|
|
1193
|
+
function extractQuestionOptions(text) {
|
|
1194
|
+
// Numbered options: "1. Foo\n2. Bar"
|
|
1195
|
+
const numberedRegex = /^\s*(\d+)\.\s+(.+)$/gm;
|
|
1196
|
+
const options = [];
|
|
1197
|
+
let m;
|
|
1198
|
+
while ((m = numberedRegex.exec(text)) !== null) {
|
|
1199
|
+
options.push(m[2].trim());
|
|
1200
|
+
}
|
|
1201
|
+
if (options.length >= 2)
|
|
1202
|
+
return options.slice(0, 4);
|
|
1203
|
+
return null;
|
|
1204
|
+
}
|
|
1172
1205
|
function makePayload(event, sessionId, detail, meta) {
|
|
1173
1206
|
const session = sessions.getSession(sessionId);
|
|
1174
1207
|
return {
|
|
@@ -1400,6 +1433,8 @@ async function main() {
|
|
|
1400
1433
|
await channels.init(handleInbound);
|
|
1401
1434
|
// Wire SSE event bus (Issue #32)
|
|
1402
1435
|
monitor.setEventBus(eventBus);
|
|
1436
|
+
// Issue #397: Wire TmuxManager for tmux health monitoring
|
|
1437
|
+
monitor.setTmuxManager(tmux);
|
|
1403
1438
|
// Issue #84: Wire JSONL watcher for fs.watch-based message detection
|
|
1404
1439
|
jsonlWatcher = new JsonlWatcher();
|
|
1405
1440
|
monitor.setJsonlWatcher(jsonlWatcher);
|
|
@@ -1422,6 +1457,8 @@ async function main() {
|
|
|
1422
1457
|
const metricsSaveInterval = setInterval(() => { void metrics.save(); }, 5 * 60 * 1000);
|
|
1423
1458
|
// #357: Prune stale IP rate-limit entries every minute
|
|
1424
1459
|
const ipPruneInterval = setInterval(pruneIpRateLimits, 60_000);
|
|
1460
|
+
// #398: Sweep stale API key rate limit buckets every 5 minutes
|
|
1461
|
+
const authSweepInterval = setInterval(() => auth.sweepStaleRateLimits(), 5 * 60_000);
|
|
1425
1462
|
// Issue #361: Graceful shutdown handler
|
|
1426
1463
|
// Issue #415: Reentrance guard at handler level prevents double execution on rapid SIGINT
|
|
1427
1464
|
let shuttingDown = false;
|
|
@@ -1441,6 +1478,7 @@ async function main() {
|
|
|
1441
1478
|
clearInterval(zombieReaperInterval);
|
|
1442
1479
|
clearInterval(metricsSaveInterval);
|
|
1443
1480
|
clearInterval(ipPruneInterval);
|
|
1481
|
+
clearInterval(authSweepInterval);
|
|
1444
1482
|
// 3. Destroy channels (awaits Telegram poll loop)
|
|
1445
1483
|
try {
|
|
1446
1484
|
await channels.destroy();
|
package/dist/session.d.ts
CHANGED
|
@@ -61,6 +61,7 @@ export declare class SessionManager {
|
|
|
61
61
|
private static readonly SAVE_DEBOUNCE_MS;
|
|
62
62
|
private pendingPermissions;
|
|
63
63
|
private pendingQuestions;
|
|
64
|
+
private static readonly MAX_CACHE_ENTRIES_PER_SESSION;
|
|
64
65
|
private parsedEntriesCache;
|
|
65
66
|
constructor(tmux: TmuxManager, config: Config);
|
|
66
67
|
/** Validate that parsed data looks like a valid SessionState. */
|
|
@@ -69,8 +70,16 @@ export declare class SessionManager {
|
|
|
69
70
|
private cleanTmpFiles;
|
|
70
71
|
/** Load state from disk. */
|
|
71
72
|
load(): Promise<void>;
|
|
72
|
-
/** Reconcile state with actual tmux windows. Remove dead sessions, restart discovery for live ones.
|
|
73
|
+
/** Reconcile state with actual tmux windows. Remove dead sessions, restart discovery for live ones.
|
|
74
|
+
* Issue #397: Also handles re-attach by window name when windowId is stale after tmux restart. */
|
|
73
75
|
private reconcile;
|
|
76
|
+
/** Issue #397: Reconcile after tmux server crash recovery.
|
|
77
|
+
* Called when the monitor detects tmux server came back after a crash.
|
|
78
|
+
* Returns counts for observability. */
|
|
79
|
+
reconcileTmuxCrash(): Promise<{
|
|
80
|
+
recovered: number;
|
|
81
|
+
orphaned: number;
|
|
82
|
+
}>;
|
|
74
83
|
/** Save state to disk atomically (write to temp, then rename).
|
|
75
84
|
* #218: Uses a write queue to serialize concurrent saves and prevent corruption. */
|
|
76
85
|
save(): Promise<void>;
|
|
@@ -227,6 +236,7 @@ export declare class SessionManager {
|
|
|
227
236
|
getPendingQuestionInfo(sessionId: string): {
|
|
228
237
|
toolUseId: string;
|
|
229
238
|
question: string;
|
|
239
|
+
timestamp: number;
|
|
230
240
|
} | null;
|
|
231
241
|
/** Issue #336: Clean up any pending question for a session. */
|
|
232
242
|
cleanupPendingQuestion(sessionId: string): void;
|
package/dist/session.js
CHANGED
|
@@ -43,6 +43,8 @@ export class SessionManager {
|
|
|
43
43
|
pendingPermissions = new Map();
|
|
44
44
|
pendingQuestions = new Map();
|
|
45
45
|
// #357: Cache of all parsed JSONL entries per session to avoid re-reading from offset 0
|
|
46
|
+
// #424: Evict oldest entries when cache exceeds max to prevent unbounded growth
|
|
47
|
+
static MAX_CACHE_ENTRIES_PER_SESSION = 10_000;
|
|
46
48
|
parsedEntriesCache = new Map();
|
|
47
49
|
constructor(tmux, config) {
|
|
48
50
|
this.tmux = tmux;
|
|
@@ -141,15 +143,19 @@ export class SessionManager {
|
|
|
141
143
|
// Reconcile: verify tmux windows still exist, clean up dead sessions
|
|
142
144
|
await this.reconcile();
|
|
143
145
|
}
|
|
144
|
-
/** Reconcile state with actual tmux windows. Remove dead sessions, restart discovery for live ones.
|
|
146
|
+
/** Reconcile state with actual tmux windows. Remove dead sessions, restart discovery for live ones.
|
|
147
|
+
* Issue #397: Also handles re-attach by window name when windowId is stale after tmux restart. */
|
|
145
148
|
async reconcile() {
|
|
146
149
|
const windows = await this.tmux.listWindows();
|
|
147
150
|
const windowIds = new Set(windows.map(w => w.windowId));
|
|
148
|
-
const
|
|
151
|
+
const windowByName = new Map();
|
|
152
|
+
for (const w of windows)
|
|
153
|
+
windowByName.set(w.windowName, w);
|
|
149
154
|
let changed = false;
|
|
150
155
|
for (const [id, session] of Object.entries(this.state.sessions)) {
|
|
151
|
-
const
|
|
152
|
-
|
|
156
|
+
const windowIdAlive = windowIds.has(session.windowId);
|
|
157
|
+
const windowNameAlive = windowByName.has(session.windowName);
|
|
158
|
+
if (!windowIdAlive && !windowNameAlive) {
|
|
153
159
|
console.log(`Reconcile: session ${session.windowName} (${id.slice(0, 8)}) — tmux window gone, removing`);
|
|
154
160
|
// Restore patched settings before removing dead session
|
|
155
161
|
if (session.settingsPatched) {
|
|
@@ -158,6 +164,19 @@ export class SessionManager {
|
|
|
158
164
|
delete this.state.sessions[id];
|
|
159
165
|
changed = true;
|
|
160
166
|
}
|
|
167
|
+
else if (!windowIdAlive && windowNameAlive) {
|
|
168
|
+
// Issue #397: Window exists with same name but different ID (tmux restarted).
|
|
169
|
+
// Re-attach by updating the windowId to the new one.
|
|
170
|
+
const win = windowByName.get(session.windowName);
|
|
171
|
+
const oldWindowId = session.windowId;
|
|
172
|
+
session.windowId = win.windowId;
|
|
173
|
+
console.log(`Reconcile: session ${session.windowName} re-attached: ${oldWindowId} → ${win.windowId}`);
|
|
174
|
+
// Restart discovery if needed
|
|
175
|
+
if (!session.claudeSessionId || !session.jsonlPath) {
|
|
176
|
+
this.startSessionIdDiscovery(id);
|
|
177
|
+
}
|
|
178
|
+
changed = true;
|
|
179
|
+
}
|
|
161
180
|
else {
|
|
162
181
|
// Session is alive — restart discovery if needed
|
|
163
182
|
if (!session.claudeSessionId || !session.jsonlPath) {
|
|
@@ -170,7 +189,9 @@ export class SessionManager {
|
|
|
170
189
|
}
|
|
171
190
|
}
|
|
172
191
|
// P0 fix: On startup, purge session_map entries that don't correspond to active sessions.
|
|
173
|
-
|
|
192
|
+
const finalWindowIds = new Set(Object.values(this.state.sessions).map(s => s.windowId));
|
|
193
|
+
const finalWindowNames = new Set(Object.values(this.state.sessions).map(s => s.windowName));
|
|
194
|
+
await this.purgeStaleSessionMapEntries(finalWindowIds, finalWindowNames);
|
|
174
195
|
// Issue #35: Adopt orphaned tmux windows (cc-* prefix) not in state
|
|
175
196
|
const knownWindowIds = new Set(Object.values(this.state.sessions).map(s => s.windowId));
|
|
176
197
|
const knownWindowNames = new Set(Object.values(this.state.sessions).map(s => s.windowName));
|
|
@@ -205,6 +226,56 @@ export class SessionManager {
|
|
|
205
226
|
await this.save();
|
|
206
227
|
}
|
|
207
228
|
}
|
|
229
|
+
/** Issue #397: Reconcile after tmux server crash recovery.
|
|
230
|
+
* Called when the monitor detects tmux server came back after a crash.
|
|
231
|
+
* Returns counts for observability. */
|
|
232
|
+
async reconcileTmuxCrash() {
|
|
233
|
+
console.log('Reconcile: tmux crash recovery — checking all sessions');
|
|
234
|
+
const windows = await this.tmux.listWindows();
|
|
235
|
+
const windowIds = new Set(windows.map(w => w.windowId));
|
|
236
|
+
const windowByName = new Map();
|
|
237
|
+
for (const w of windows)
|
|
238
|
+
windowByName.set(w.windowName, w);
|
|
239
|
+
let recovered = 0;
|
|
240
|
+
let orphaned = 0;
|
|
241
|
+
let changed = false;
|
|
242
|
+
for (const [id, session] of Object.entries(this.state.sessions)) {
|
|
243
|
+
const windowIdAlive = windowIds.has(session.windowId);
|
|
244
|
+
const windowNameAlive = windowByName.has(session.windowName);
|
|
245
|
+
if (windowIdAlive) {
|
|
246
|
+
// Window ID still matches — session survived the crash
|
|
247
|
+
continue;
|
|
248
|
+
}
|
|
249
|
+
if (windowNameAlive) {
|
|
250
|
+
// Window exists by name but ID changed — re-attach
|
|
251
|
+
const win = windowByName.get(session.windowName);
|
|
252
|
+
const oldWindowId = session.windowId;
|
|
253
|
+
session.windowId = win.windowId;
|
|
254
|
+
session.status = 'unknown';
|
|
255
|
+
session.lastActivity = Date.now();
|
|
256
|
+
console.log(`Reconcile (crash): session ${session.windowName} re-attached: ${oldWindowId} → ${win.windowId}`);
|
|
257
|
+
// Restart discovery in case the session state is stale
|
|
258
|
+
if (!session.claudeSessionId || !session.jsonlPath) {
|
|
259
|
+
this.startSessionIdDiscovery(id);
|
|
260
|
+
this.startFilesystemDiscovery(id, session.workDir);
|
|
261
|
+
}
|
|
262
|
+
recovered++;
|
|
263
|
+
changed = true;
|
|
264
|
+
}
|
|
265
|
+
else {
|
|
266
|
+
// Window gone entirely — session is orphaned
|
|
267
|
+
console.log(`Reconcile (crash): session ${session.windowName} (${id.slice(0, 8)}) — window gone, marking orphaned`);
|
|
268
|
+
session.status = 'unknown';
|
|
269
|
+
session.lastDeadAt = Date.now();
|
|
270
|
+
orphaned++;
|
|
271
|
+
changed = true;
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
if (changed) {
|
|
275
|
+
await this.save();
|
|
276
|
+
}
|
|
277
|
+
return { recovered, orphaned };
|
|
278
|
+
}
|
|
208
279
|
/** Save state to disk atomically (write to temp, then rename).
|
|
209
280
|
* #218: Uses a write queue to serialize concurrent saves and prevent corruption. */
|
|
210
281
|
async save() {
|
|
@@ -808,7 +879,7 @@ export class SessionManager {
|
|
|
808
879
|
console.log(`Hooks: AskUserQuestion timeout for session ${sessionId} — allowing without answer`);
|
|
809
880
|
resolve(null);
|
|
810
881
|
}, timeoutMs);
|
|
811
|
-
this.pendingQuestions.set(sessionId, { resolve, timer, toolUseId, question });
|
|
882
|
+
this.pendingQuestions.set(sessionId, { resolve, timer, toolUseId, question, timestamp: Date.now() });
|
|
812
883
|
});
|
|
813
884
|
}
|
|
814
885
|
/** Issue #336: Submit an answer to a pending question. Returns true if resolved. */
|
|
@@ -830,7 +901,7 @@ export class SessionManager {
|
|
|
830
901
|
/** Issue #336: Get info about a pending question. */
|
|
831
902
|
getPendingQuestionInfo(sessionId) {
|
|
832
903
|
const pending = this.pendingQuestions.get(sessionId);
|
|
833
|
-
return pending ? { toolUseId: pending.toolUseId, question: pending.question } : null;
|
|
904
|
+
return pending ? { toolUseId: pending.toolUseId, question: pending.question, timestamp: pending.timestamp } : null;
|
|
834
905
|
}
|
|
835
906
|
/** Issue #336: Clean up any pending question for a session. */
|
|
836
907
|
cleanupPendingQuestion(sessionId) {
|
|
@@ -946,6 +1017,10 @@ export class SessionManager {
|
|
|
946
1017
|
if (cached) {
|
|
947
1018
|
cached.entries.push(...result.entries);
|
|
948
1019
|
cached.offset = result.newOffset;
|
|
1020
|
+
// #424: Evict oldest entries when cache exceeds per-session cap
|
|
1021
|
+
if (cached.entries.length > SessionManager.MAX_CACHE_ENTRIES_PER_SESSION) {
|
|
1022
|
+
cached.entries.splice(0, cached.entries.length - SessionManager.MAX_CACHE_ENTRIES_PER_SESSION);
|
|
1023
|
+
}
|
|
949
1024
|
return cached.entries;
|
|
950
1025
|
}
|
|
951
1026
|
// First read — cache it
|
package/dist/tmux.d.ts
CHANGED
|
@@ -154,6 +154,15 @@ export declare class TmuxManager {
|
|
|
154
154
|
resizePane(windowId: string, cols: number, rows: number): Promise<void>;
|
|
155
155
|
/** Kill a window. */
|
|
156
156
|
killWindow(windowId: string): Promise<void>;
|
|
157
|
+
/** Issue #397: Check if the tmux server is reachable and healthy.
|
|
158
|
+
* Returns { healthy, error } — does not throw. */
|
|
159
|
+
isServerHealthy(): Promise<{
|
|
160
|
+
healthy: boolean;
|
|
161
|
+
error: string | null;
|
|
162
|
+
}>;
|
|
163
|
+
/** Issue #397: Check if a tmux error indicates the server crashed (vs window-not-found).
|
|
164
|
+
* Server crash errors contain specific patterns from tmux CLI. */
|
|
165
|
+
isTmuxServerError(error: unknown): boolean;
|
|
157
166
|
/** Kill the entire tmux session. Used for cleanup on shutdown. */
|
|
158
167
|
killSession(sessionName?: string): Promise<void>;
|
|
159
168
|
/** #357: Poll until condition returns true or timeout elapses. */
|
package/dist/tmux.js
CHANGED
|
@@ -697,6 +697,32 @@ export class TmuxManager {
|
|
|
697
697
|
console.warn(`Tmux: killWindow failed for ${target}: ${e.message}`);
|
|
698
698
|
}
|
|
699
699
|
}
|
|
700
|
+
/** Issue #397: Check if the tmux server is reachable and healthy.
|
|
701
|
+
* Returns { healthy, error } — does not throw. */
|
|
702
|
+
async isServerHealthy() {
|
|
703
|
+
try {
|
|
704
|
+
await this.tmuxInternal('list-sessions');
|
|
705
|
+
return { healthy: true, error: null };
|
|
706
|
+
}
|
|
707
|
+
catch (e) {
|
|
708
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
709
|
+
return { healthy: false, error: msg };
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
/** Issue #397: Check if a tmux error indicates the server crashed (vs window-not-found).
|
|
713
|
+
* Server crash errors contain specific patterns from tmux CLI. */
|
|
714
|
+
isTmuxServerError(error) {
|
|
715
|
+
if (!(error instanceof Error))
|
|
716
|
+
return false;
|
|
717
|
+
const msg = error.message.toLowerCase();
|
|
718
|
+
// "no server running" = tmux server not started
|
|
719
|
+
// "failed to connect to server" = socket/protocol error
|
|
720
|
+
// "connection refused" = server died mid-operation
|
|
721
|
+
return msg.includes('no server running')
|
|
722
|
+
|| msg.includes('failed to connect')
|
|
723
|
+
|| msg.includes('connection refused')
|
|
724
|
+
|| msg.includes('no tmux server');
|
|
725
|
+
}
|
|
700
726
|
/** Kill the entire tmux session. Used for cleanup on shutdown. */
|
|
701
727
|
async killSession(sessionName) {
|
|
702
728
|
const target = sessionName ?? this.sessionName;
|