aegis-bridge 2.3.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,7 +14,7 @@ export interface ChannelHealthStatus {
14
14
  pendingCount: number;
15
15
  }
16
16
  /** Events a channel can subscribe to. */
17
- export type SessionEvent = 'session.created' | 'session.ended' | 'message.user' | 'message.assistant' | 'message.thinking' | 'message.tool_use' | 'message.tool_result' | 'status.idle' | 'status.working' | 'status.permission' | 'status.question' | 'status.plan' | 'status.stall' | 'status.dead' | 'status.stopped' | 'status.error' | 'status.rate_limited' | 'status.permission_timeout' | 'swarm.teammate_spawned' | 'swarm.teammate_finished';
17
+ export type SessionEvent = 'session.created' | 'session.ended' | 'message.user' | 'message.assistant' | 'message.thinking' | 'message.tool_use' | 'message.tool_result' | 'status.idle' | 'status.working' | 'status.permission' | 'status.question' | 'status.plan' | 'status.stall' | 'status.dead' | 'status.stopped' | 'status.error' | 'status.rate_limited' | 'status.permission_timeout' | 'status.recovered' | 'swarm.teammate_spawned' | 'swarm.teammate_finished';
18
18
  /** Payload for all session events. */
19
19
  export interface SessionEventPayload {
20
20
  event: SessionEvent;
package/dist/monitor.d.ts CHANGED
@@ -7,6 +7,7 @@
7
7
  * 3. Routes events to the ChannelManager (which fans out to Telegram, webhooks, etc.)
8
8
  */
9
9
  import { type SessionManager } from './session.js';
10
+ import { type TmuxManager } from './tmux.js';
10
11
  import { type ChannelManager } from './channels/index.js';
11
12
  import { type SessionEventBus } from './events.js';
12
13
  import { type JsonlWatcher } from './jsonl-watcher.js';
@@ -40,6 +41,9 @@ export declare class SessionMonitor {
40
41
  private deadNotified;
41
42
  private prevStatusForStall;
42
43
  private rateLimitedSessions;
44
+ private tmuxWasDown;
45
+ private lastTmuxHealthCheck;
46
+ private static readonly TMUX_HEALTH_CHECK_INTERVAL_MS;
43
47
  /** Issue #89 L4: Debounce status change broadcasts per session.
44
48
  * If multiple status changes happen within 500ms, only emit the last one.
45
49
  * Prevents rapid-fire notifications during state transitions. */
@@ -51,6 +55,9 @@ export declare class SessionMonitor {
51
55
  constructor(sessions: SessionManager, channels: ChannelManager, config?: MonitorConfig);
52
56
  /** Issue #32: Set the event bus for SSE streaming. */
53
57
  setEventBus(bus: SessionEventBus): void;
58
+ /** Issue #397: Set the TmuxManager reference for tmux health checks. */
59
+ private tmux?;
60
+ setTmuxManager(tmuxManager: TmuxManager): void;
54
61
  /** Issue #84: Set the JSONL watcher for fs.watch-based message detection. */
55
62
  setJsonlWatcher(watcher: JsonlWatcher): void;
56
63
  start(): void;
@@ -79,6 +86,8 @@ export declare class SessionMonitor {
79
86
  private makePayload;
80
87
  /** Check for dead tmux windows and notify via channels. */
81
88
  private checkDeadSessions;
89
+ /** Issue #397: Check tmux server health. Detect crashes and trigger reconciliation. */
90
+ private checkTmuxHealth;
82
91
  /** Clean up tracking for a killed session. */
83
92
  removeSession(sessionId: string): void;
84
93
  }
package/dist/monitor.js CHANGED
@@ -43,6 +43,10 @@ export class SessionMonitor {
43
43
  deadNotified = new Set(); // don't spam dead session events
44
44
  prevStatusForStall = new Map(); // track previous status for stall transition detection
45
45
  rateLimitedSessions = new Set(); // sessions in rate-limit backoff
46
+ // Issue #397: Track tmux server health for crash recovery
47
+ tmuxWasDown = false;
48
+ lastTmuxHealthCheck = 0;
49
+ static TMUX_HEALTH_CHECK_INTERVAL_MS = 10_000; // check every 10s
46
50
  /** Issue #89 L4: Debounce status change broadcasts per session.
47
51
  * If multiple status changes happen within 500ms, only emit the last one.
48
52
  * Prevents rapid-fire notifications during state transitions. */
@@ -61,6 +65,11 @@ export class SessionMonitor {
61
65
  setEventBus(bus) {
62
66
  this.eventBus = bus;
63
67
  }
68
+ /** Issue #397: Set the TmuxManager reference for tmux health checks. */
69
+ tmux;
70
+ setTmuxManager(tmuxManager) {
71
+ this.tmux = tmuxManager;
72
+ }
64
73
  /** Issue #84: Set the JSONL watcher for fs.watch-based message detection. */
65
74
  setJsonlWatcher(watcher) {
66
75
  this.jsonlWatcher = watcher;
@@ -130,6 +139,11 @@ export class SessionMonitor {
130
139
  this.lastDeadCheck = now;
131
140
  await this.checkDeadSessions();
132
141
  }
142
+ // Issue #397: Tmux server health check (every 10s)
143
+ if (now - this.lastTmuxHealthCheck >= SessionMonitor.TMUX_HEALTH_CHECK_INTERVAL_MS) {
144
+ this.lastTmuxHealthCheck = now;
145
+ await this.checkTmuxHealth();
146
+ }
133
147
  }
134
148
  /** Smart stall detection: multiple stall types with graduated thresholds.
135
149
  *
@@ -547,6 +561,33 @@ export class SessionMonitor {
547
561
  }
548
562
  }
549
563
  }
564
+ /** Issue #397: Check tmux server health. Detect crashes and trigger reconciliation. */
565
+ async checkTmuxHealth() {
566
+ if (!this.tmux)
567
+ return;
568
+ const { healthy } = await this.tmux.isServerHealthy();
569
+ if (!healthy) {
570
+ if (!this.tmuxWasDown) {
571
+ console.warn('Monitor: tmux server is unreachable — sessions may be orphaned');
572
+ this.tmuxWasDown = true;
573
+ }
574
+ return;
575
+ }
576
+ // Tmux is healthy now
577
+ if (this.tmuxWasDown) {
578
+ console.log('Monitor: tmux server recovered — triggering crash reconciliation');
579
+ this.tmuxWasDown = false;
580
+ // Trigger crash reconciliation to re-attach or mark orphaned sessions
581
+ const result = await this.sessions.reconcileTmuxCrash();
582
+ if (result.recovered > 0 || result.orphaned > 0) {
583
+ console.log(`Monitor: crash reconciliation complete — recovered: ${result.recovered}, orphaned: ${result.orphaned}`);
584
+ // Notify channels about recovery
585
+ for (const session of this.sessions.listSessions()) {
586
+ await this.channels.statusChange(this.makePayload('status.recovered', session, `tmux server recovered. Session ${session.windowName} re-attached.`));
587
+ }
588
+ }
589
+ }
590
+ }
550
591
  /** Clean up tracking for a killed session. */
551
592
  removeSession(sessionId) {
552
593
  // Issue #84: Stop watching JSONL file for this session
package/dist/server.js CHANGED
@@ -230,16 +230,19 @@ const createSessionSchema = z.object({
230
230
  permissionMode: z.enum(['default', 'bypassPermissions', 'plan']).optional(),
231
231
  autoApprove: z.boolean().optional(),
232
232
  }).strict();
233
- // Health
233
+ // Health — Issue #397: includes tmux server health check
234
234
  app.get('/v1/health', async () => {
235
235
  const pkg = await import('../package.json', { with: { type: 'json' } });
236
236
  const activeCount = sessions.listSessions().length;
237
237
  const totalCount = metrics.getTotalSessionsCreated();
238
+ const tmuxHealth = await tmux.isServerHealthy();
239
+ const status = tmuxHealth.healthy ? 'ok' : 'degraded';
238
240
  return {
239
- status: 'ok',
241
+ status,
240
242
  version: pkg.default.version,
241
243
  uptime: process.uptime(),
242
244
  sessions: { active: activeCount, total: totalCount },
245
+ tmux: tmuxHealth,
243
246
  timestamp: new Date().toISOString(),
244
247
  };
245
248
  });
@@ -248,11 +251,14 @@ app.get('/health', async () => {
248
251
  const pkg = await import('../package.json', { with: { type: 'json' } });
249
252
  const activeCount = sessions.listSessions().length;
250
253
  const totalCount = metrics.getTotalSessionsCreated();
254
+ const tmuxHealth = await tmux.isServerHealthy();
255
+ const status = tmuxHealth.healthy ? 'ok' : 'degraded';
251
256
  return {
252
- status: 'ok',
257
+ status,
253
258
  version: pkg.default.version,
254
259
  uptime: process.uptime(),
255
260
  sessions: { active: activeCount, total: totalCount },
261
+ tmux: tmuxHealth,
256
262
  timestamp: new Date().toISOString(),
257
263
  };
258
264
  });
@@ -1427,6 +1433,8 @@ async function main() {
1427
1433
  await channels.init(handleInbound);
1428
1434
  // Wire SSE event bus (Issue #32)
1429
1435
  monitor.setEventBus(eventBus);
1436
+ // Issue #397: Wire TmuxManager for tmux health monitoring
1437
+ monitor.setTmuxManager(tmux);
1430
1438
  // Issue #84: Wire JSONL watcher for fs.watch-based message detection
1431
1439
  jsonlWatcher = new JsonlWatcher();
1432
1440
  monitor.setJsonlWatcher(jsonlWatcher);
package/dist/session.d.ts CHANGED
@@ -70,8 +70,16 @@ export declare class SessionManager {
70
70
  private cleanTmpFiles;
71
71
  /** Load state from disk. */
72
72
  load(): Promise<void>;
73
- /** Reconcile state with actual tmux windows. Remove dead sessions, restart discovery for live ones. */
73
+ /** Reconcile state with actual tmux windows. Remove dead sessions, restart discovery for live ones.
74
+ * Issue #397: Also handles re-attach by window name when windowId is stale after tmux restart. */
74
75
  private reconcile;
76
+ /** Issue #397: Reconcile after tmux server crash recovery.
77
+ * Called when the monitor detects tmux server came back after a crash.
78
+ * Returns counts for observability. */
79
+ reconcileTmuxCrash(): Promise<{
80
+ recovered: number;
81
+ orphaned: number;
82
+ }>;
75
83
  /** Save state to disk atomically (write to temp, then rename).
76
84
  * #218: Uses a write queue to serialize concurrent saves and prevent corruption. */
77
85
  save(): Promise<void>;
package/dist/session.js CHANGED
@@ -143,15 +143,19 @@ export class SessionManager {
143
143
  // Reconcile: verify tmux windows still exist, clean up dead sessions
144
144
  await this.reconcile();
145
145
  }
146
- /** Reconcile state with actual tmux windows. Remove dead sessions, restart discovery for live ones. */
146
+ /** Reconcile state with actual tmux windows. Remove dead sessions, restart discovery for live ones.
147
+ * Issue #397: Also handles re-attach by window name when windowId is stale after tmux restart. */
147
148
  async reconcile() {
148
149
  const windows = await this.tmux.listWindows();
149
150
  const windowIds = new Set(windows.map(w => w.windowId));
150
- const windowNames = new Set(windows.map(w => w.windowName));
151
+ const windowByName = new Map();
152
+ for (const w of windows)
153
+ windowByName.set(w.windowName, w);
151
154
  let changed = false;
152
155
  for (const [id, session] of Object.entries(this.state.sessions)) {
153
- const alive = windowIds.has(session.windowId) || windowNames.has(session.windowName);
154
- if (!alive) {
156
+ const windowIdAlive = windowIds.has(session.windowId);
157
+ const windowNameAlive = windowByName.has(session.windowName);
158
+ if (!windowIdAlive && !windowNameAlive) {
155
159
  console.log(`Reconcile: session ${session.windowName} (${id.slice(0, 8)}) — tmux window gone, removing`);
156
160
  // Restore patched settings before removing dead session
157
161
  if (session.settingsPatched) {
@@ -160,6 +164,19 @@ export class SessionManager {
160
164
  delete this.state.sessions[id];
161
165
  changed = true;
162
166
  }
167
+ else if (!windowIdAlive && windowNameAlive) {
168
+ // Issue #397: Window exists with same name but different ID (tmux restarted).
169
+ // Re-attach by updating the windowId to the new one.
170
+ const win = windowByName.get(session.windowName);
171
+ const oldWindowId = session.windowId;
172
+ session.windowId = win.windowId;
173
+ console.log(`Reconcile: session ${session.windowName} re-attached: ${oldWindowId} → ${win.windowId}`);
174
+ // Restart discovery if needed
175
+ if (!session.claudeSessionId || !session.jsonlPath) {
176
+ this.startSessionIdDiscovery(id);
177
+ }
178
+ changed = true;
179
+ }
163
180
  else {
164
181
  // Session is alive — restart discovery if needed
165
182
  if (!session.claudeSessionId || !session.jsonlPath) {
@@ -172,7 +189,9 @@ export class SessionManager {
172
189
  }
173
190
  }
174
191
  // P0 fix: On startup, purge session_map entries that don't correspond to active sessions.
175
- await this.purgeStaleSessionMapEntries(windowIds, windowNames);
192
+ const finalWindowIds = new Set(Object.values(this.state.sessions).map(s => s.windowId));
193
+ const finalWindowNames = new Set(Object.values(this.state.sessions).map(s => s.windowName));
194
+ await this.purgeStaleSessionMapEntries(finalWindowIds, finalWindowNames);
176
195
  // Issue #35: Adopt orphaned tmux windows (cc-* prefix) not in state
177
196
  const knownWindowIds = new Set(Object.values(this.state.sessions).map(s => s.windowId));
178
197
  const knownWindowNames = new Set(Object.values(this.state.sessions).map(s => s.windowName));
@@ -207,6 +226,56 @@ export class SessionManager {
207
226
  await this.save();
208
227
  }
209
228
  }
229
+ /** Issue #397: Reconcile after tmux server crash recovery.
230
+ * Called when the monitor detects tmux server came back after a crash.
231
+ * Returns counts for observability. */
232
+ async reconcileTmuxCrash() {
233
+ console.log('Reconcile: tmux crash recovery — checking all sessions');
234
+ const windows = await this.tmux.listWindows();
235
+ const windowIds = new Set(windows.map(w => w.windowId));
236
+ const windowByName = new Map();
237
+ for (const w of windows)
238
+ windowByName.set(w.windowName, w);
239
+ let recovered = 0;
240
+ let orphaned = 0;
241
+ let changed = false;
242
+ for (const [id, session] of Object.entries(this.state.sessions)) {
243
+ const windowIdAlive = windowIds.has(session.windowId);
244
+ const windowNameAlive = windowByName.has(session.windowName);
245
+ if (windowIdAlive) {
246
+ // Window ID still matches — session survived the crash
247
+ continue;
248
+ }
249
+ if (windowNameAlive) {
250
+ // Window exists by name but ID changed — re-attach
251
+ const win = windowByName.get(session.windowName);
252
+ const oldWindowId = session.windowId;
253
+ session.windowId = win.windowId;
254
+ session.status = 'unknown';
255
+ session.lastActivity = Date.now();
256
+ console.log(`Reconcile (crash): session ${session.windowName} re-attached: ${oldWindowId} → ${win.windowId}`);
257
+ // Restart discovery in case the session state is stale
258
+ if (!session.claudeSessionId || !session.jsonlPath) {
259
+ this.startSessionIdDiscovery(id);
260
+ this.startFilesystemDiscovery(id, session.workDir);
261
+ }
262
+ recovered++;
263
+ changed = true;
264
+ }
265
+ else {
266
+ // Window gone entirely — session is orphaned
267
+ console.log(`Reconcile (crash): session ${session.windowName} (${id.slice(0, 8)}) — window gone, marking orphaned`);
268
+ session.status = 'unknown';
269
+ session.lastDeadAt = Date.now();
270
+ orphaned++;
271
+ changed = true;
272
+ }
273
+ }
274
+ if (changed) {
275
+ await this.save();
276
+ }
277
+ return { recovered, orphaned };
278
+ }
210
279
  /** Save state to disk atomically (write to temp, then rename).
211
280
  * #218: Uses a write queue to serialize concurrent saves and prevent corruption. */
212
281
  async save() {
package/dist/tmux.d.ts CHANGED
@@ -154,6 +154,15 @@ export declare class TmuxManager {
154
154
  resizePane(windowId: string, cols: number, rows: number): Promise<void>;
155
155
  /** Kill a window. */
156
156
  killWindow(windowId: string): Promise<void>;
157
+ /** Issue #397: Check if the tmux server is reachable and healthy.
158
+ * Returns { healthy, error } — does not throw. */
159
+ isServerHealthy(): Promise<{
160
+ healthy: boolean;
161
+ error: string | null;
162
+ }>;
163
+ /** Issue #397: Check if a tmux error indicates the server crashed (vs window-not-found).
164
+ * Server crash errors contain specific patterns from tmux CLI. */
165
+ isTmuxServerError(error: unknown): boolean;
157
166
  /** Kill the entire tmux session. Used for cleanup on shutdown. */
158
167
  killSession(sessionName?: string): Promise<void>;
159
168
  /** #357: Poll until condition returns true or timeout elapses. */
package/dist/tmux.js CHANGED
@@ -697,6 +697,32 @@ export class TmuxManager {
697
697
  console.warn(`Tmux: killWindow failed for ${target}: ${e.message}`);
698
698
  }
699
699
  }
700
+ /** Issue #397: Check if the tmux server is reachable and healthy.
701
+ * Returns { healthy, error } — does not throw. */
702
+ async isServerHealthy() {
703
+ try {
704
+ await this.tmuxInternal('list-sessions');
705
+ return { healthy: true, error: null };
706
+ }
707
+ catch (e) {
708
+ const msg = e instanceof Error ? e.message : String(e);
709
+ return { healthy: false, error: msg };
710
+ }
711
+ }
712
+ /** Issue #397: Check if a tmux error indicates the server crashed (vs window-not-found).
713
+ * Server crash errors contain specific patterns from tmux CLI. */
714
+ isTmuxServerError(error) {
715
+ if (!(error instanceof Error))
716
+ return false;
717
+ const msg = error.message.toLowerCase();
718
+ // "no server running" = tmux server not started
719
+ // "failed to connect to server" = socket/protocol error
720
+ // "connection refused" = server died mid-operation
721
+ return msg.includes('no server running')
722
+ || msg.includes('failed to connect')
723
+ || msg.includes('connection refused')
724
+ || msg.includes('no tmux server');
725
+ }
700
726
  /** Kill the entire tmux session. Used for cleanup on shutdown. */
701
727
  async killSession(sessionName) {
702
728
  const target = sessionName ?? this.sessionName;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "aegis-bridge",
3
- "version": "2.3.0",
3
+ "version": "2.3.1",
4
4
  "type": "module",
5
5
  "description": "Orchestrate Claude Code sessions via API. Create, brief, monitor, refine, ship.",
6
6
  "main": "dist/server.js",