aegis-bridge 2.3.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/channels/types.d.ts +1 -1
- package/dist/monitor.d.ts +9 -0
- package/dist/monitor.js +41 -0
- package/dist/server.js +11 -3
- package/dist/session.d.ts +9 -1
- package/dist/session.js +74 -5
- package/dist/tmux.d.ts +9 -0
- package/dist/tmux.js +26 -0
- package/package.json +1 -1
package/dist/channels/types.d.ts
CHANGED
|
@@ -14,7 +14,7 @@ export interface ChannelHealthStatus {
|
|
|
14
14
|
pendingCount: number;
|
|
15
15
|
}
|
|
16
16
|
/** Events a channel can subscribe to. */
|
|
17
|
-
export type SessionEvent = 'session.created' | 'session.ended' | 'message.user' | 'message.assistant' | 'message.thinking' | 'message.tool_use' | 'message.tool_result' | 'status.idle' | 'status.working' | 'status.permission' | 'status.question' | 'status.plan' | 'status.stall' | 'status.dead' | 'status.stopped' | 'status.error' | 'status.rate_limited' | 'status.permission_timeout' | 'swarm.teammate_spawned' | 'swarm.teammate_finished';
|
|
17
|
+
export type SessionEvent = 'session.created' | 'session.ended' | 'message.user' | 'message.assistant' | 'message.thinking' | 'message.tool_use' | 'message.tool_result' | 'status.idle' | 'status.working' | 'status.permission' | 'status.question' | 'status.plan' | 'status.stall' | 'status.dead' | 'status.stopped' | 'status.error' | 'status.rate_limited' | 'status.permission_timeout' | 'status.recovered' | 'swarm.teammate_spawned' | 'swarm.teammate_finished';
|
|
18
18
|
/** Payload for all session events. */
|
|
19
19
|
export interface SessionEventPayload {
|
|
20
20
|
event: SessionEvent;
|
package/dist/monitor.d.ts
CHANGED
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
* 3. Routes events to the ChannelManager (which fans out to Telegram, webhooks, etc.)
|
|
8
8
|
*/
|
|
9
9
|
import { type SessionManager } from './session.js';
|
|
10
|
+
import { type TmuxManager } from './tmux.js';
|
|
10
11
|
import { type ChannelManager } from './channels/index.js';
|
|
11
12
|
import { type SessionEventBus } from './events.js';
|
|
12
13
|
import { type JsonlWatcher } from './jsonl-watcher.js';
|
|
@@ -40,6 +41,9 @@ export declare class SessionMonitor {
|
|
|
40
41
|
private deadNotified;
|
|
41
42
|
private prevStatusForStall;
|
|
42
43
|
private rateLimitedSessions;
|
|
44
|
+
private tmuxWasDown;
|
|
45
|
+
private lastTmuxHealthCheck;
|
|
46
|
+
private static readonly TMUX_HEALTH_CHECK_INTERVAL_MS;
|
|
43
47
|
/** Issue #89 L4: Debounce status change broadcasts per session.
|
|
44
48
|
* If multiple status changes happen within 500ms, only emit the last one.
|
|
45
49
|
* Prevents rapid-fire notifications during state transitions. */
|
|
@@ -51,6 +55,9 @@ export declare class SessionMonitor {
|
|
|
51
55
|
constructor(sessions: SessionManager, channels: ChannelManager, config?: MonitorConfig);
|
|
52
56
|
/** Issue #32: Set the event bus for SSE streaming. */
|
|
53
57
|
setEventBus(bus: SessionEventBus): void;
|
|
58
|
+
/** Issue #397: Set the TmuxManager reference for tmux health checks. */
|
|
59
|
+
private tmux?;
|
|
60
|
+
setTmuxManager(tmuxManager: TmuxManager): void;
|
|
54
61
|
/** Issue #84: Set the JSONL watcher for fs.watch-based message detection. */
|
|
55
62
|
setJsonlWatcher(watcher: JsonlWatcher): void;
|
|
56
63
|
start(): void;
|
|
@@ -79,6 +86,8 @@ export declare class SessionMonitor {
|
|
|
79
86
|
private makePayload;
|
|
80
87
|
/** Check for dead tmux windows and notify via channels. */
|
|
81
88
|
private checkDeadSessions;
|
|
89
|
+
/** Issue #397: Check tmux server health. Detect crashes and trigger reconciliation. */
|
|
90
|
+
private checkTmuxHealth;
|
|
82
91
|
/** Clean up tracking for a killed session. */
|
|
83
92
|
removeSession(sessionId: string): void;
|
|
84
93
|
}
|
package/dist/monitor.js
CHANGED
|
@@ -43,6 +43,10 @@ export class SessionMonitor {
|
|
|
43
43
|
deadNotified = new Set(); // don't spam dead session events
|
|
44
44
|
prevStatusForStall = new Map(); // track previous status for stall transition detection
|
|
45
45
|
rateLimitedSessions = new Set(); // sessions in rate-limit backoff
|
|
46
|
+
// Issue #397: Track tmux server health for crash recovery
|
|
47
|
+
tmuxWasDown = false;
|
|
48
|
+
lastTmuxHealthCheck = 0;
|
|
49
|
+
static TMUX_HEALTH_CHECK_INTERVAL_MS = 10_000; // check every 10s
|
|
46
50
|
/** Issue #89 L4: Debounce status change broadcasts per session.
|
|
47
51
|
* If multiple status changes happen within 500ms, only emit the last one.
|
|
48
52
|
* Prevents rapid-fire notifications during state transitions. */
|
|
@@ -61,6 +65,11 @@ export class SessionMonitor {
|
|
|
61
65
|
setEventBus(bus) {
|
|
62
66
|
this.eventBus = bus;
|
|
63
67
|
}
|
|
68
|
+
/** Issue #397: Set the TmuxManager reference for tmux health checks. */
|
|
69
|
+
tmux;
|
|
70
|
+
setTmuxManager(tmuxManager) {
|
|
71
|
+
this.tmux = tmuxManager;
|
|
72
|
+
}
|
|
64
73
|
/** Issue #84: Set the JSONL watcher for fs.watch-based message detection. */
|
|
65
74
|
setJsonlWatcher(watcher) {
|
|
66
75
|
this.jsonlWatcher = watcher;
|
|
@@ -130,6 +139,11 @@ export class SessionMonitor {
|
|
|
130
139
|
this.lastDeadCheck = now;
|
|
131
140
|
await this.checkDeadSessions();
|
|
132
141
|
}
|
|
142
|
+
// Issue #397: Tmux server health check (every 10s)
|
|
143
|
+
if (now - this.lastTmuxHealthCheck >= SessionMonitor.TMUX_HEALTH_CHECK_INTERVAL_MS) {
|
|
144
|
+
this.lastTmuxHealthCheck = now;
|
|
145
|
+
await this.checkTmuxHealth();
|
|
146
|
+
}
|
|
133
147
|
}
|
|
134
148
|
/** Smart stall detection: multiple stall types with graduated thresholds.
|
|
135
149
|
*
|
|
@@ -547,6 +561,33 @@ export class SessionMonitor {
|
|
|
547
561
|
}
|
|
548
562
|
}
|
|
549
563
|
}
|
|
564
|
+
/** Issue #397: Check tmux server health. Detect crashes and trigger reconciliation. */
|
|
565
|
+
async checkTmuxHealth() {
|
|
566
|
+
if (!this.tmux)
|
|
567
|
+
return;
|
|
568
|
+
const { healthy } = await this.tmux.isServerHealthy();
|
|
569
|
+
if (!healthy) {
|
|
570
|
+
if (!this.tmuxWasDown) {
|
|
571
|
+
console.warn('Monitor: tmux server is unreachable — sessions may be orphaned');
|
|
572
|
+
this.tmuxWasDown = true;
|
|
573
|
+
}
|
|
574
|
+
return;
|
|
575
|
+
}
|
|
576
|
+
// Tmux is healthy now
|
|
577
|
+
if (this.tmuxWasDown) {
|
|
578
|
+
console.log('Monitor: tmux server recovered — triggering crash reconciliation');
|
|
579
|
+
this.tmuxWasDown = false;
|
|
580
|
+
// Trigger crash reconciliation to re-attach or mark orphaned sessions
|
|
581
|
+
const result = await this.sessions.reconcileTmuxCrash();
|
|
582
|
+
if (result.recovered > 0 || result.orphaned > 0) {
|
|
583
|
+
console.log(`Monitor: crash reconciliation complete — recovered: ${result.recovered}, orphaned: ${result.orphaned}`);
|
|
584
|
+
// Notify channels about recovery
|
|
585
|
+
for (const session of this.sessions.listSessions()) {
|
|
586
|
+
await this.channels.statusChange(this.makePayload('status.recovered', session, `tmux server recovered. Session ${session.windowName} re-attached.`));
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
}
|
|
550
591
|
/** Clean up tracking for a killed session. */
|
|
551
592
|
removeSession(sessionId) {
|
|
552
593
|
// Issue #84: Stop watching JSONL file for this session
|
package/dist/server.js
CHANGED
|
@@ -230,16 +230,19 @@ const createSessionSchema = z.object({
|
|
|
230
230
|
permissionMode: z.enum(['default', 'bypassPermissions', 'plan']).optional(),
|
|
231
231
|
autoApprove: z.boolean().optional(),
|
|
232
232
|
}).strict();
|
|
233
|
-
// Health
|
|
233
|
+
// Health — Issue #397: includes tmux server health check
|
|
234
234
|
app.get('/v1/health', async () => {
|
|
235
235
|
const pkg = await import('../package.json', { with: { type: 'json' } });
|
|
236
236
|
const activeCount = sessions.listSessions().length;
|
|
237
237
|
const totalCount = metrics.getTotalSessionsCreated();
|
|
238
|
+
const tmuxHealth = await tmux.isServerHealthy();
|
|
239
|
+
const status = tmuxHealth.healthy ? 'ok' : 'degraded';
|
|
238
240
|
return {
|
|
239
|
-
status
|
|
241
|
+
status,
|
|
240
242
|
version: pkg.default.version,
|
|
241
243
|
uptime: process.uptime(),
|
|
242
244
|
sessions: { active: activeCount, total: totalCount },
|
|
245
|
+
tmux: tmuxHealth,
|
|
243
246
|
timestamp: new Date().toISOString(),
|
|
244
247
|
};
|
|
245
248
|
});
|
|
@@ -248,11 +251,14 @@ app.get('/health', async () => {
|
|
|
248
251
|
const pkg = await import('../package.json', { with: { type: 'json' } });
|
|
249
252
|
const activeCount = sessions.listSessions().length;
|
|
250
253
|
const totalCount = metrics.getTotalSessionsCreated();
|
|
254
|
+
const tmuxHealth = await tmux.isServerHealthy();
|
|
255
|
+
const status = tmuxHealth.healthy ? 'ok' : 'degraded';
|
|
251
256
|
return {
|
|
252
|
-
status
|
|
257
|
+
status,
|
|
253
258
|
version: pkg.default.version,
|
|
254
259
|
uptime: process.uptime(),
|
|
255
260
|
sessions: { active: activeCount, total: totalCount },
|
|
261
|
+
tmux: tmuxHealth,
|
|
256
262
|
timestamp: new Date().toISOString(),
|
|
257
263
|
};
|
|
258
264
|
});
|
|
@@ -1427,6 +1433,8 @@ async function main() {
|
|
|
1427
1433
|
await channels.init(handleInbound);
|
|
1428
1434
|
// Wire SSE event bus (Issue #32)
|
|
1429
1435
|
monitor.setEventBus(eventBus);
|
|
1436
|
+
// Issue #397: Wire TmuxManager for tmux health monitoring
|
|
1437
|
+
monitor.setTmuxManager(tmux);
|
|
1430
1438
|
// Issue #84: Wire JSONL watcher for fs.watch-based message detection
|
|
1431
1439
|
jsonlWatcher = new JsonlWatcher();
|
|
1432
1440
|
monitor.setJsonlWatcher(jsonlWatcher);
|
package/dist/session.d.ts
CHANGED
|
@@ -70,8 +70,16 @@ export declare class SessionManager {
|
|
|
70
70
|
private cleanTmpFiles;
|
|
71
71
|
/** Load state from disk. */
|
|
72
72
|
load(): Promise<void>;
|
|
73
|
-
/** Reconcile state with actual tmux windows. Remove dead sessions, restart discovery for live ones.
|
|
73
|
+
/** Reconcile state with actual tmux windows. Remove dead sessions, restart discovery for live ones.
|
|
74
|
+
* Issue #397: Also handles re-attach by window name when windowId is stale after tmux restart. */
|
|
74
75
|
private reconcile;
|
|
76
|
+
/** Issue #397: Reconcile after tmux server crash recovery.
|
|
77
|
+
* Called when the monitor detects tmux server came back after a crash.
|
|
78
|
+
* Returns counts for observability. */
|
|
79
|
+
reconcileTmuxCrash(): Promise<{
|
|
80
|
+
recovered: number;
|
|
81
|
+
orphaned: number;
|
|
82
|
+
}>;
|
|
75
83
|
/** Save state to disk atomically (write to temp, then rename).
|
|
76
84
|
* #218: Uses a write queue to serialize concurrent saves and prevent corruption. */
|
|
77
85
|
save(): Promise<void>;
|
package/dist/session.js
CHANGED
|
@@ -143,15 +143,19 @@ export class SessionManager {
|
|
|
143
143
|
// Reconcile: verify tmux windows still exist, clean up dead sessions
|
|
144
144
|
await this.reconcile();
|
|
145
145
|
}
|
|
146
|
-
/** Reconcile state with actual tmux windows. Remove dead sessions, restart discovery for live ones.
|
|
146
|
+
/** Reconcile state with actual tmux windows. Remove dead sessions, restart discovery for live ones.
|
|
147
|
+
* Issue #397: Also handles re-attach by window name when windowId is stale after tmux restart. */
|
|
147
148
|
async reconcile() {
|
|
148
149
|
const windows = await this.tmux.listWindows();
|
|
149
150
|
const windowIds = new Set(windows.map(w => w.windowId));
|
|
150
|
-
const
|
|
151
|
+
const windowByName = new Map();
|
|
152
|
+
for (const w of windows)
|
|
153
|
+
windowByName.set(w.windowName, w);
|
|
151
154
|
let changed = false;
|
|
152
155
|
for (const [id, session] of Object.entries(this.state.sessions)) {
|
|
153
|
-
const
|
|
154
|
-
|
|
156
|
+
const windowIdAlive = windowIds.has(session.windowId);
|
|
157
|
+
const windowNameAlive = windowByName.has(session.windowName);
|
|
158
|
+
if (!windowIdAlive && !windowNameAlive) {
|
|
155
159
|
console.log(`Reconcile: session ${session.windowName} (${id.slice(0, 8)}) — tmux window gone, removing`);
|
|
156
160
|
// Restore patched settings before removing dead session
|
|
157
161
|
if (session.settingsPatched) {
|
|
@@ -160,6 +164,19 @@ export class SessionManager {
|
|
|
160
164
|
delete this.state.sessions[id];
|
|
161
165
|
changed = true;
|
|
162
166
|
}
|
|
167
|
+
else if (!windowIdAlive && windowNameAlive) {
|
|
168
|
+
// Issue #397: Window exists with same name but different ID (tmux restarted).
|
|
169
|
+
// Re-attach by updating the windowId to the new one.
|
|
170
|
+
const win = windowByName.get(session.windowName);
|
|
171
|
+
const oldWindowId = session.windowId;
|
|
172
|
+
session.windowId = win.windowId;
|
|
173
|
+
console.log(`Reconcile: session ${session.windowName} re-attached: ${oldWindowId} → ${win.windowId}`);
|
|
174
|
+
// Restart discovery if needed
|
|
175
|
+
if (!session.claudeSessionId || !session.jsonlPath) {
|
|
176
|
+
this.startSessionIdDiscovery(id);
|
|
177
|
+
}
|
|
178
|
+
changed = true;
|
|
179
|
+
}
|
|
163
180
|
else {
|
|
164
181
|
// Session is alive — restart discovery if needed
|
|
165
182
|
if (!session.claudeSessionId || !session.jsonlPath) {
|
|
@@ -172,7 +189,9 @@ export class SessionManager {
|
|
|
172
189
|
}
|
|
173
190
|
}
|
|
174
191
|
// P0 fix: On startup, purge session_map entries that don't correspond to active sessions.
|
|
175
|
-
|
|
192
|
+
const finalWindowIds = new Set(Object.values(this.state.sessions).map(s => s.windowId));
|
|
193
|
+
const finalWindowNames = new Set(Object.values(this.state.sessions).map(s => s.windowName));
|
|
194
|
+
await this.purgeStaleSessionMapEntries(finalWindowIds, finalWindowNames);
|
|
176
195
|
// Issue #35: Adopt orphaned tmux windows (cc-* prefix) not in state
|
|
177
196
|
const knownWindowIds = new Set(Object.values(this.state.sessions).map(s => s.windowId));
|
|
178
197
|
const knownWindowNames = new Set(Object.values(this.state.sessions).map(s => s.windowName));
|
|
@@ -207,6 +226,56 @@ export class SessionManager {
|
|
|
207
226
|
await this.save();
|
|
208
227
|
}
|
|
209
228
|
}
|
|
229
|
+
/** Issue #397: Reconcile after tmux server crash recovery.
|
|
230
|
+
* Called when the monitor detects tmux server came back after a crash.
|
|
231
|
+
* Returns counts for observability. */
|
|
232
|
+
async reconcileTmuxCrash() {
|
|
233
|
+
console.log('Reconcile: tmux crash recovery — checking all sessions');
|
|
234
|
+
const windows = await this.tmux.listWindows();
|
|
235
|
+
const windowIds = new Set(windows.map(w => w.windowId));
|
|
236
|
+
const windowByName = new Map();
|
|
237
|
+
for (const w of windows)
|
|
238
|
+
windowByName.set(w.windowName, w);
|
|
239
|
+
let recovered = 0;
|
|
240
|
+
let orphaned = 0;
|
|
241
|
+
let changed = false;
|
|
242
|
+
for (const [id, session] of Object.entries(this.state.sessions)) {
|
|
243
|
+
const windowIdAlive = windowIds.has(session.windowId);
|
|
244
|
+
const windowNameAlive = windowByName.has(session.windowName);
|
|
245
|
+
if (windowIdAlive) {
|
|
246
|
+
// Window ID still matches — session survived the crash
|
|
247
|
+
continue;
|
|
248
|
+
}
|
|
249
|
+
if (windowNameAlive) {
|
|
250
|
+
// Window exists by name but ID changed — re-attach
|
|
251
|
+
const win = windowByName.get(session.windowName);
|
|
252
|
+
const oldWindowId = session.windowId;
|
|
253
|
+
session.windowId = win.windowId;
|
|
254
|
+
session.status = 'unknown';
|
|
255
|
+
session.lastActivity = Date.now();
|
|
256
|
+
console.log(`Reconcile (crash): session ${session.windowName} re-attached: ${oldWindowId} → ${win.windowId}`);
|
|
257
|
+
// Restart discovery in case the session state is stale
|
|
258
|
+
if (!session.claudeSessionId || !session.jsonlPath) {
|
|
259
|
+
this.startSessionIdDiscovery(id);
|
|
260
|
+
this.startFilesystemDiscovery(id, session.workDir);
|
|
261
|
+
}
|
|
262
|
+
recovered++;
|
|
263
|
+
changed = true;
|
|
264
|
+
}
|
|
265
|
+
else {
|
|
266
|
+
// Window gone entirely — session is orphaned
|
|
267
|
+
console.log(`Reconcile (crash): session ${session.windowName} (${id.slice(0, 8)}) — window gone, marking orphaned`);
|
|
268
|
+
session.status = 'unknown';
|
|
269
|
+
session.lastDeadAt = Date.now();
|
|
270
|
+
orphaned++;
|
|
271
|
+
changed = true;
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
if (changed) {
|
|
275
|
+
await this.save();
|
|
276
|
+
}
|
|
277
|
+
return { recovered, orphaned };
|
|
278
|
+
}
|
|
210
279
|
/** Save state to disk atomically (write to temp, then rename).
|
|
211
280
|
* #218: Uses a write queue to serialize concurrent saves and prevent corruption. */
|
|
212
281
|
async save() {
|
package/dist/tmux.d.ts
CHANGED
|
@@ -154,6 +154,15 @@ export declare class TmuxManager {
|
|
|
154
154
|
resizePane(windowId: string, cols: number, rows: number): Promise<void>;
|
|
155
155
|
/** Kill a window. */
|
|
156
156
|
killWindow(windowId: string): Promise<void>;
|
|
157
|
+
/** Issue #397: Check if the tmux server is reachable and healthy.
|
|
158
|
+
* Returns { healthy, error } — does not throw. */
|
|
159
|
+
isServerHealthy(): Promise<{
|
|
160
|
+
healthy: boolean;
|
|
161
|
+
error: string | null;
|
|
162
|
+
}>;
|
|
163
|
+
/** Issue #397: Check if a tmux error indicates the server crashed (vs window-not-found).
|
|
164
|
+
* Server crash errors contain specific patterns from tmux CLI. */
|
|
165
|
+
isTmuxServerError(error: unknown): boolean;
|
|
157
166
|
/** Kill the entire tmux session. Used for cleanup on shutdown. */
|
|
158
167
|
killSession(sessionName?: string): Promise<void>;
|
|
159
168
|
/** #357: Poll until condition returns true or timeout elapses. */
|
package/dist/tmux.js
CHANGED
|
@@ -697,6 +697,32 @@ export class TmuxManager {
|
|
|
697
697
|
console.warn(`Tmux: killWindow failed for ${target}: ${e.message}`);
|
|
698
698
|
}
|
|
699
699
|
}
|
|
700
|
+
/** Issue #397: Check if the tmux server is reachable and healthy.
|
|
701
|
+
* Returns { healthy, error } — does not throw. */
|
|
702
|
+
async isServerHealthy() {
|
|
703
|
+
try {
|
|
704
|
+
await this.tmuxInternal('list-sessions');
|
|
705
|
+
return { healthy: true, error: null };
|
|
706
|
+
}
|
|
707
|
+
catch (e) {
|
|
708
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
709
|
+
return { healthy: false, error: msg };
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
/** Issue #397: Check if a tmux error indicates the server crashed (vs window-not-found).
|
|
713
|
+
* Server crash errors contain specific patterns from tmux CLI. */
|
|
714
|
+
isTmuxServerError(error) {
|
|
715
|
+
if (!(error instanceof Error))
|
|
716
|
+
return false;
|
|
717
|
+
const msg = error.message.toLowerCase();
|
|
718
|
+
// "no server running" = tmux server not started
|
|
719
|
+
// "failed to connect to server" = socket/protocol error
|
|
720
|
+
// "connection refused" = server died mid-operation
|
|
721
|
+
return msg.includes('no server running')
|
|
722
|
+
|| msg.includes('failed to connect')
|
|
723
|
+
|| msg.includes('connection refused')
|
|
724
|
+
|| msg.includes('no tmux server');
|
|
725
|
+
}
|
|
700
726
|
/** Kill the entire tmux session. Used for cleanup on shutdown. */
|
|
701
727
|
async killSession(sessionName) {
|
|
702
728
|
const target = sessionName ?? this.sessionName;
|