instar 0.7.51 → 0.7.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,7 +36,6 @@ import { QuotaTracker } from '../monitoring/QuotaTracker.js';
36
36
  import { AccountSwitcher } from '../monitoring/AccountSwitcher.js';
37
37
  import { QuotaNotifier } from '../monitoring/QuotaNotifier.js';
38
38
  import { classifySessionDeath } from '../monitoring/QuotaExhaustionDetector.js';
39
- import { SessionWatchdog } from '../monitoring/SessionWatchdog.js';
40
39
  import { installAutoStart } from './setup.js';
41
40
  /**
42
41
  * Check if autostart is installed for this project.
@@ -647,31 +646,6 @@ export async function startServer(options) {
647
646
  scheduler.notifyJobComplete(session.id, session.tmuxSession);
648
647
  });
649
648
  }
650
- // Session Watchdog — auto-remediation for stuck commands
651
- let watchdog;
652
- if (config.monitoring.watchdog?.enabled) {
653
- watchdog = new SessionWatchdog(config, sessionManager, state);
654
- watchdog.on('intervention', (event) => {
655
- if (telegram) {
656
- const topicId = telegram.getTopicForSession(event.sessionName);
657
- if (topicId) {
658
- const levelNames = ['Monitoring', 'Ctrl+C', 'SIGTERM', 'SIGKILL', 'Kill Session'];
659
- const levelName = levelNames[event.level] || `Level ${event.level}`;
660
- telegram.sendToTopic(topicId, `🔧 Watchdog [${levelName}]: ${event.action}\nStuck: \`${event.stuckCommand.slice(0, 60)}\``).catch(() => { });
661
- }
662
- }
663
- });
664
- watchdog.on('recovery', (sessionName, fromLevel) => {
665
- if (telegram) {
666
- const topicId = telegram.getTopicForSession(sessionName);
667
- if (topicId) {
668
- telegram.sendToTopic(topicId, `✅ Watchdog: session recovered (was at escalation level ${fromLevel})`).catch(() => { });
669
- }
670
- }
671
- });
672
- watchdog.start();
673
- console.log(pc.green(' Session Watchdog enabled'));
674
- }
675
649
  // Set up feedback and update checking
676
650
  let feedback;
677
651
  if (config.feedback) {
@@ -826,7 +800,7 @@ export async function startServer(options) {
826
800
  }
827
801
  });
828
802
  sleepWakeDetector.start();
829
- const server = new AgentServer({ config, sessionManager, state, scheduler, telegram, relationships, feedback, dispatches, updateChecker, autoUpdater, autoDispatcher, quotaTracker, publisher, viewer, tunnel, evolution, watchdog });
803
+ const server = new AgentServer({ config, sessionManager, state, scheduler, telegram, relationships, feedback, dispatches, updateChecker, autoUpdater, autoDispatcher, quotaTracker, publisher, viewer, tunnel, evolution });
830
804
  await server.start();
831
805
  // Start tunnel AFTER server is listening
832
806
  if (tunnel) {
@@ -54,11 +54,17 @@ export class AutoUpdater {
54
54
  if (this.interval)
55
55
  return;
56
56
  const intervalMs = this.config.checkIntervalMinutes * 60 * 1000;
57
- // Warn if running from npx cache (auto-updates won't work properly)
57
+ // Detect npx cache auto-apply and restart cause infinite loops when
58
+ // running from npx because the cache still resolves to the old version
59
+ // after npm installs the update. The restart finds the update again,
60
+ // applies it again, restarts again — forever, killing all sessions each time.
58
61
  const scriptPath = process.argv[1] || '';
59
- if (scriptPath.includes('.npm/_npx') || scriptPath.includes('/_npx/')) {
60
- console.warn('[AutoUpdater] WARNING: Running from npx cache. Auto-updates require a global install.\n' +
61
- '[AutoUpdater] Run: npm install -g instar');
62
+ const runningFromNpx = scriptPath.includes('.npm/_npx') || scriptPath.includes('/_npx/');
63
+ if (runningFromNpx) {
64
+ this.config.autoApply = false;
65
+ this.config.autoRestart = false;
66
+ console.warn('[AutoUpdater] Running from npx cache. Auto-apply and auto-restart disabled to prevent restart loops.\n' +
67
+ '[AutoUpdater] Run: npm install -g instar (then restart with: instar server start)');
62
68
  }
63
69
  console.log(`[AutoUpdater] Started (every ${this.config.checkIntervalMinutes}m, ` +
64
70
  `autoApply: ${this.config.autoApply}, autoRestart: ${this.config.autoRestart})`);
@@ -122,8 +128,7 @@ export class AutoUpdater {
122
128
  if (!this.config.autoApply) {
123
129
  // Just notify — don't apply
124
130
  await this.notify(`Update available: v${info.currentVersion} → v${info.latestVersion}\n\n` +
125
- (info.changeSummary ? `What changed:\n${info.changeSummary}\n\n` : '') +
126
- `Details: ${info.changelogUrl || 'https://github.com/SageMindAI/instar/releases'}\n\n` +
131
+ (info.changeSummary ? `Changes: ${info.changeSummary}\n\n` : '') +
127
132
  `Auto-apply is disabled. Apply manually:\n` +
128
133
  `curl -X POST http://localhost:${this.getPort()}/updates/apply`);
129
134
  return;
@@ -148,19 +153,13 @@ export class AutoUpdater {
148
153
  console.log(`[AutoUpdater] Updated: v${result.previousVersion} → v${result.newVersion}`);
149
154
  // Step 5: Notify via Telegram
150
155
  const restartNote = result.restartNeeded && this.config.autoRestart
151
- ? '\nServer is restarting now...'
156
+ ? 'Server is restarting now...'
152
157
  : result.restartNeeded
153
- ? '\nA server restart is needed to use the new version.'
158
+ ? 'A server restart is needed to use the new version.'
154
159
  : '';
155
- const changeSummary = info.changeSummary
156
- ? `What changed:\n${info.changeSummary}\n`
157
- : '';
158
- const detailsUrl = info.changelogUrl || 'https://github.com/SageMindAI/instar/releases';
159
160
  await this.notify(`Updated: v${result.previousVersion} → v${result.newVersion}\n\n` +
160
- changeSummary +
161
- `Details: ${detailsUrl}\n` +
162
- restartNote +
163
- `\n\nTo disable auto-updates, set "autoApply": false in .instar/config.json under "updates".`);
161
+ (info.changeSummary ? `What changed:\n${info.changeSummary}\n\n` : '') +
162
+ restartNote);
164
163
  // Step 6: Self-restart if needed and configured
165
164
  if (result.restartNeeded && this.config.autoRestart) {
166
165
  // Brief delay to let the Telegram notification send
@@ -68,12 +68,6 @@ export declare class SessionManager extends EventEmitter {
68
68
  * Send input to a running tmux session.
69
69
  */
70
70
  sendInput(tmuxSession: string, input: string): boolean;
71
- /**
72
- * Send a tmux key sequence (without -l literal flag).
73
- * Use for special keys like 'C-c' (Ctrl+C), 'Enter', 'Escape'.
74
- * Unlike sendInput() which uses -l (literal), this sends key names directly.
75
- */
76
- sendKey(tmuxSession: string, key: string): boolean;
77
71
  /**
78
72
  * List all sessions that are currently running.
79
73
  * Pure filter — does not mutate state. The monitor tick handles lifecycle transitions.
@@ -297,20 +297,6 @@ export class SessionManager extends EventEmitter {
297
297
  return false;
298
298
  }
299
299
  }
300
- /**
301
- * Send a tmux key sequence (without -l literal flag).
302
- * Use for special keys like 'C-c' (Ctrl+C), 'Enter', 'Escape'.
303
- * Unlike sendInput() which uses -l (literal), this sends key names directly.
304
- */
305
- sendKey(tmuxSession, key) {
306
- try {
307
- execFileSync(this.config.tmuxPath, ['send-keys', '-t', `=${tmuxSession}:`, key], { encoding: 'utf-8', timeout: 5000 });
308
- return true;
309
- }
310
- catch {
311
- return false;
312
- }
313
- }
314
300
  /**
315
301
  * List all sessions that are currently running.
316
302
  * Pure filter — does not mutate state. The monitor tick handles lifecycle transitions.
@@ -478,25 +464,35 @@ export class SessionManager extends EventEmitter {
478
464
  const exactTarget = `=${tmuxSession}:`;
479
465
  try {
480
466
  if (text.includes('\n')) {
481
- // Multi-line: pipe into tmux load-buffer via stdin, then paste into pane.
467
+ // Multi-line: write to temp file, load into tmux buffer, paste into pane.
482
468
  // This avoids newlines being treated as Enter keypresses which would
483
469
  // fragment the message into multiple Claude prompts.
484
- // Uses stdin pipe (load-buffer -) instead of temp files to avoid
485
- // macOS TCC "access data from other apps" permission prompts.
486
- execFileSync(this.config.tmuxPath, ['load-buffer', '-'], {
487
- encoding: 'utf-8', timeout: 5000, input: text,
488
- });
489
- execFileSync(this.config.tmuxPath, ['paste-buffer', '-t', exactTarget, '-p'], {
490
- encoding: 'utf-8', timeout: 5000,
491
- });
492
- // Brief delay to let the terminal process the paste before sending Enter.
493
- // Without this, the Enter arrives before paste processing completes and
494
- // the message sits in the input buffer without being submitted.
495
- execFileSync('/bin/sleep', ['0.3'], { timeout: 2000 });
496
- // Send Enter to submit
497
- execFileSync(this.config.tmuxPath, ['send-keys', '-t', exactTarget, 'Enter'], {
498
- encoding: 'utf-8', timeout: 5000,
499
- });
470
+ const tmpDir = path.join('/tmp', 'instar-inject');
471
+ fs.mkdirSync(tmpDir, { recursive: true });
472
+ const tmpPath = path.join(tmpDir, `msg-${Date.now()}-${process.pid}.txt`);
473
+ fs.writeFileSync(tmpPath, text);
474
+ try {
475
+ execFileSync(this.config.tmuxPath, ['load-buffer', tmpPath], {
476
+ encoding: 'utf-8', timeout: 5000,
477
+ });
478
+ execFileSync(this.config.tmuxPath, ['paste-buffer', '-t', exactTarget, '-p'], {
479
+ encoding: 'utf-8', timeout: 5000,
480
+ });
481
+ // Brief delay to let the terminal process the paste before sending Enter.
482
+ // Without this, the Enter arrives before paste processing completes and
483
+ // the message sits in the input buffer without being submitted.
484
+ execFileSync('/bin/sleep', ['0.3'], { timeout: 2000 });
485
+ // Send Enter to submit
486
+ execFileSync(this.config.tmuxPath, ['send-keys', '-t', exactTarget, 'Enter'], {
487
+ encoding: 'utf-8', timeout: 5000,
488
+ });
489
+ }
490
+ finally {
491
+ try {
492
+ fs.unlinkSync(tmpPath);
493
+ }
494
+ catch { /* ignore */ }
495
+ }
500
496
  }
501
497
  else {
502
498
  // Single-line: simple send-keys
@@ -60,8 +60,7 @@ export declare class UpdateChecker {
60
60
  updatedAt: string;
61
61
  } | null;
62
62
  /**
63
- * Fetch human-readable changelog from GitHub releases, falling back to
64
- * recent commit messages if no release exists for this version.
63
+ * Fetch human-readable changelog from GitHub releases.
65
64
  */
66
65
  fetchChangelog(version: string): Promise<string | undefined>;
67
66
  /**
@@ -226,11 +226,9 @@ export class UpdateChecker {
226
226
  }
227
227
  }
228
228
  /**
229
- * Fetch human-readable changelog from GitHub releases, falling back to
230
- * recent commit messages if no release exists for this version.
229
+ * Fetch human-readable changelog from GitHub releases.
231
230
  */
232
231
  async fetchChangelog(version) {
233
- // Try GitHub release first
234
232
  try {
235
233
  const tag = version.startsWith('v') ? version : `v${version}`;
236
234
  const response = await fetch(`${GITHUB_RELEASES_URL}/tags/${tag}`, {
@@ -240,41 +238,16 @@ export class UpdateChecker {
240
238
  },
241
239
  signal: AbortSignal.timeout(10000),
242
240
  });
243
- if (response.ok) {
244
- const release = await response.json();
245
- if (release.body) {
246
- const summary = release.body.slice(0, 500);
247
- return summary.length < release.body.length ? summary + '...' : summary;
248
- }
249
- if (release.name)
250
- return release.name;
251
- }
252
- }
253
- catch {
254
- // Non-critical — try commit fallback
255
- }
256
- // Fallback: fetch recent commits from GitHub
257
- try {
258
- const response = await fetch('https://api.github.com/repos/SageMindAI/instar/commits?per_page=5', {
259
- headers: {
260
- 'Accept': 'application/vnd.github.v3+json',
261
- 'User-Agent': 'instar-update-checker',
262
- },
263
- signal: AbortSignal.timeout(10000),
264
- });
265
- if (response.ok) {
266
- const commits = await response.json();
267
- if (commits.length > 0) {
268
- const lines = commits
269
- .map(c => {
270
- // Take first line of commit message only
271
- const firstLine = c.commit.message.split('\n')[0];
272
- return `• ${firstLine}`;
273
- })
274
- .join('\n');
275
- return `Recent changes:\n${lines}`;
276
- }
241
+ if (!response.ok)
242
+ return undefined;
243
+ const release = await response.json();
244
+ if (release.body) {
245
+ // Truncate to first 500 chars for concise summary
246
+ const summary = release.body.slice(0, 500);
247
+ return summary.length < release.body.length ? summary + '...' : summary;
277
248
  }
249
+ if (release.name)
250
+ return release.name;
278
251
  }
279
252
  catch {
280
253
  // Non-critical
@@ -712,14 +712,6 @@ export interface MonitoringConfig {
712
712
  memoryMonitoring: boolean;
713
713
  /** Health check interval in ms */
714
714
  healthCheckIntervalMs: number;
715
- /** Session watchdog — auto-remediation for stuck commands */
716
- watchdog?: {
717
- enabled: boolean;
718
- /** Seconds before a command is considered stuck (default: 180) */
719
- stuckCommandSec?: number;
720
- /** Poll interval in ms (default: 30000) */
721
- pollIntervalMs?: number;
722
- };
723
715
  }
724
716
  /** @deprecated Use InstarConfig instead */
725
717
  export type AgentKitConfig = InstarConfig;
@@ -7,15 +7,13 @@
7
7
  import type { SessionManager } from '../core/SessionManager.js';
8
8
  import type { JobScheduler } from '../scheduler/JobScheduler.js';
9
9
  import type { HealthStatus, InstarConfig } from '../core/types.js';
10
- import type { SessionWatchdog } from './SessionWatchdog.js';
11
10
  export declare class HealthChecker {
12
11
  private config;
13
12
  private sessionManager;
14
13
  private scheduler;
15
- private watchdog;
16
14
  private checkInterval;
17
15
  private lastStatus;
18
- constructor(config: InstarConfig, sessionManager: SessionManager, scheduler?: JobScheduler | null, watchdog?: SessionWatchdog | null);
16
+ constructor(config: InstarConfig, sessionManager: SessionManager, scheduler?: JobScheduler | null);
19
17
  /**
20
18
  * Run all health checks and return aggregated status.
21
19
  */
@@ -11,14 +11,12 @@ export class HealthChecker {
11
11
  config;
12
12
  sessionManager;
13
13
  scheduler;
14
- watchdog;
15
14
  checkInterval = null;
16
15
  lastStatus = null;
17
- constructor(config, sessionManager, scheduler = null, watchdog = null) {
16
+ constructor(config, sessionManager, scheduler = null) {
18
17
  this.config = config;
19
18
  this.sessionManager = sessionManager;
20
19
  this.scheduler = scheduler;
21
- this.watchdog = watchdog;
22
20
  }
23
21
  /**
24
22
  * Run all health checks and return aggregated status.
@@ -32,17 +30,6 @@ export class HealthChecker {
32
30
  if (this.scheduler) {
33
31
  components.scheduler = this.checkScheduler();
34
32
  }
35
- if (this.watchdog) {
36
- const wdStatus = this.watchdog.getStatus();
37
- const intervening = wdStatus.sessions.filter(s => s.escalation && s.escalation.level > 0);
38
- components.watchdog = {
39
- status: intervening.length > 0 ? 'degraded' : 'healthy',
40
- message: intervening.length > 0
41
- ? `Intervening on ${intervening.length} session(s)`
42
- : `Monitoring${wdStatus.enabled ? '' : ' (disabled)'}`,
43
- lastCheck: new Date().toISOString(),
44
- };
45
- }
46
33
  // Aggregate: worst component status becomes overall status
47
34
  const statuses = Object.values(components).map(c => c.status);
48
35
  let overall = 'healthy';
@@ -21,7 +21,6 @@ import type { TelegraphService } from '../publishing/TelegraphService.js';
21
21
  import type { PrivateViewer } from '../publishing/PrivateViewer.js';
22
22
  import type { TunnelManager } from '../tunnel/TunnelManager.js';
23
23
  import type { EvolutionManager } from '../core/EvolutionManager.js';
24
- import type { SessionWatchdog } from '../monitoring/SessionWatchdog.js';
25
24
  export declare class AgentServer {
26
25
  private app;
27
26
  private server;
@@ -44,7 +43,6 @@ export declare class AgentServer {
44
43
  viewer?: PrivateViewer;
45
44
  tunnel?: TunnelManager;
46
45
  evolution?: EvolutionManager;
47
- watchdog?: SessionWatchdog;
48
46
  });
49
47
  /**
50
48
  * Start the HTTP server.
@@ -39,7 +39,6 @@ export class AgentServer {
39
39
  viewer: options.viewer ?? null,
40
40
  tunnel: options.tunnel ?? null,
41
41
  evolution: options.evolution ?? null,
42
- watchdog: options.watchdog ?? null,
43
42
  startTime: this.startTime,
44
43
  });
45
44
  this.app.use(routes);
@@ -21,7 +21,6 @@ import type { TelegraphService } from '../publishing/TelegraphService.js';
21
21
  import type { PrivateViewer } from '../publishing/PrivateViewer.js';
22
22
  import type { TunnelManager } from '../tunnel/TunnelManager.js';
23
23
  import type { EvolutionManager } from '../core/EvolutionManager.js';
24
- import type { SessionWatchdog } from '../monitoring/SessionWatchdog.js';
25
24
  export interface RouteContext {
26
25
  config: InstarConfig;
27
26
  sessionManager: SessionManager;
@@ -39,7 +38,6 @@ export interface RouteContext {
39
38
  viewer: PrivateViewer | null;
40
39
  tunnel: TunnelManager | null;
41
40
  evolution: EvolutionManager | null;
42
- watchdog: SessionWatchdog | null;
43
41
  startTime: Date;
44
42
  }
45
43
  export declare function createRoutes(ctx: RouteContext): Router;
@@ -8,6 +8,7 @@ import { Router } from 'express';
8
8
  import { execFileSync } from 'node:child_process';
9
9
  import { createHash, timingSafeEqual } from 'node:crypto';
10
10
  import fs from 'node:fs';
11
+ import os from 'node:os';
11
12
  import path from 'node:path';
12
13
  import { rateLimiter, signViewPath } from './middleware.js';
13
14
  // Validation patterns for route parameters
@@ -64,7 +65,6 @@ export function createRoutes(ctx) {
64
65
  heapTotal: Math.round(mem.heapTotal / 1024 / 1024),
65
66
  };
66
67
  // System-wide memory state
67
- const os = require('node:os');
68
68
  const totalMem = os.totalmem();
69
69
  const freeMem = os.freemem();
70
70
  base.systemMemory = {
@@ -1695,27 +1695,6 @@ export function createRoutes(ctx) {
1695
1695
  }
1696
1696
  res.json({ ok: true, id: req.params.id, status });
1697
1697
  });
1698
- // ── Watchdog ──────────────────────────────────────────────────
1699
- router.get('/watchdog/status', (req, res) => {
1700
- if (!ctx.watchdog) {
1701
- res.json({ enabled: false, sessions: [], interventionHistory: [] });
1702
- return;
1703
- }
1704
- res.json(ctx.watchdog.getStatus());
1705
- });
1706
- router.post('/watchdog/toggle', (req, res) => {
1707
- if (!ctx.watchdog) {
1708
- res.status(404).json({ error: 'Watchdog not configured' });
1709
- return;
1710
- }
1711
- const { enabled } = req.body;
1712
- if (typeof enabled !== 'boolean') {
1713
- res.status(400).json({ error: 'enabled (boolean) required' });
1714
- return;
1715
- }
1716
- ctx.watchdog.setEnabled(enabled);
1717
- res.json({ enabled: ctx.watchdog.isEnabled() });
1718
- });
1719
1698
  return router;
1720
1699
  }
1721
1700
  export function formatUptime(ms) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "instar",
3
- "version": "0.7.51",
3
+ "version": "0.7.52",
4
4
  "description": "Persistent autonomy infrastructure for AI agents",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -1,11 +0,0 @@
1
- > Why do I have a folder named ".vercel" in my project?
2
- The ".vercel" folder is created when you link a directory to a Vercel project.
3
-
4
- > What does the "project.json" file contain?
5
- The "project.json" file contains:
6
- - The ID of the Vercel project that you linked ("projectId")
7
- - The ID of the user or team your Vercel project is owned by ("orgId")
8
-
9
- > Should I commit the ".vercel" folder?
10
- No, you should not share the ".vercel" folder with anyone.
11
- Upon creation, it will be automatically added to your ".gitignore" file.
@@ -1 +0,0 @@
1
- {"projectId":"prj_evM5LcItYL3IAmw8zNvEPGrHeaya","orgId":"team_dHctwIDcV3X9ydapQlCPHFGI","projectName":"claude-agent-kit"}
@@ -1,83 +0,0 @@
1
- /**
2
- * SessionWatchdog — Auto-remediation for stuck Claude sessions (Instar port).
3
- *
4
- * Detects when a Claude session has a long-running bash command and escalates
5
- * from gentle (Ctrl+C) to forceful (SIGKILL + session kill). Adapted from
6
- * Dawn Server's SessionWatchdog for Instar's self-contained architecture.
7
- *
8
- * Escalation pipeline:
9
- * Level 0: Monitoring (default)
10
- * Level 1: Ctrl+C via tmux send-keys
11
- * Level 2: SIGTERM the stuck child PID
12
- * Level 3: SIGKILL the stuck child PID
13
- * Level 4: Kill tmux session
14
- */
15
- import { EventEmitter } from 'node:events';
16
- import type { SessionManager } from '../core/SessionManager.js';
17
- import type { StateManager } from '../core/StateManager.js';
18
- import type { InstarConfig } from '../core/types.js';
19
- export declare enum EscalationLevel {
20
- Monitoring = 0,
21
- CtrlC = 1,
22
- SigTerm = 2,
23
- SigKill = 3,
24
- KillSession = 4
25
- }
26
- interface EscalationState {
27
- level: EscalationLevel;
28
- levelEnteredAt: number;
29
- stuckChildPid: number;
30
- stuckCommand: string;
31
- retryCount: number;
32
- }
33
- export interface InterventionEvent {
34
- sessionName: string;
35
- level: EscalationLevel;
36
- action: string;
37
- stuckCommand: string;
38
- stuckPid: number;
39
- timestamp: number;
40
- }
41
- export interface WatchdogEvents {
42
- intervention: [event: InterventionEvent];
43
- recovery: [sessionName: string, fromLevel: EscalationLevel];
44
- }
45
- export declare class SessionWatchdog extends EventEmitter {
46
- private config;
47
- private sessionManager;
48
- private state;
49
- private interval;
50
- private escalationState;
51
- private interventionHistory;
52
- private enabled;
53
- private running;
54
- private stuckThresholdMs;
55
- private pollIntervalMs;
56
- constructor(config: InstarConfig, sessionManager: SessionManager, state: StateManager);
57
- start(): void;
58
- stop(): void;
59
- setEnabled(enabled: boolean): void;
60
- isEnabled(): boolean;
61
- isManaging(sessionName: string): boolean;
62
- getStatus(): {
63
- enabled: boolean;
64
- sessions: Array<{
65
- name: string;
66
- escalation: EscalationState | null;
67
- }>;
68
- interventionHistory: InterventionEvent[];
69
- };
70
- private poll;
71
- private checkSession;
72
- private handleEscalation;
73
- private getClaudePid;
74
- private getChildProcesses;
75
- private isExcluded;
76
- private parseElapsed;
77
- private sendSignal;
78
- private isProcessAlive;
79
- private killTmuxSession;
80
- private recordIntervention;
81
- }
82
- export {};
83
- //# sourceMappingURL=SessionWatchdog.d.ts.map
@@ -1,326 +0,0 @@
1
- /**
2
- * SessionWatchdog — Auto-remediation for stuck Claude sessions (Instar port).
3
- *
4
- * Detects when a Claude session has a long-running bash command and escalates
5
- * from gentle (Ctrl+C) to forceful (SIGKILL + session kill). Adapted from
6
- * Dawn Server's SessionWatchdog for Instar's self-contained architecture.
7
- *
8
- * Escalation pipeline:
9
- * Level 0: Monitoring (default)
10
- * Level 1: Ctrl+C via tmux send-keys
11
- * Level 2: SIGTERM the stuck child PID
12
- * Level 3: SIGKILL the stuck child PID
13
- * Level 4: Kill tmux session
14
- */
15
- import { execSync } from 'node:child_process';
16
- import { EventEmitter } from 'node:events';
17
- export var EscalationLevel;
18
- (function (EscalationLevel) {
19
- EscalationLevel[EscalationLevel["Monitoring"] = 0] = "Monitoring";
20
- EscalationLevel[EscalationLevel["CtrlC"] = 1] = "CtrlC";
21
- EscalationLevel[EscalationLevel["SigTerm"] = 2] = "SigTerm";
22
- EscalationLevel[EscalationLevel["SigKill"] = 3] = "SigKill";
23
- EscalationLevel[EscalationLevel["KillSession"] = 4] = "KillSession";
24
- })(EscalationLevel || (EscalationLevel = {}));
25
- // Processes that are long-running by design
26
- const EXCLUDED_PATTERNS = [
27
- 'playwright-mcp', 'playwright-persistent', '@playwright/mcp',
28
- 'chrome-native-host', 'claude-in-chrome-mcp', 'payments-mcp',
29
- 'mcp-remote', '/mcp/', '.mcp/', 'caffeinate', 'exa-mcp-server',
30
- ];
31
- const EXCLUDED_PREFIXES = [
32
- '/bin/zsh -c -l source',
33
- '/bin/bash -c -l source',
34
- ];
35
- // Escalation delays (ms to wait before advancing to next level)
36
- const ESCALATION_DELAYS = {
37
- [EscalationLevel.Monitoring]: 0,
38
- [EscalationLevel.CtrlC]: 0,
39
- [EscalationLevel.SigTerm]: 15_000,
40
- [EscalationLevel.SigKill]: 10_000,
41
- [EscalationLevel.KillSession]: 5_000,
42
- };
43
- const DEFAULT_STUCK_THRESHOLD_MS = 180_000; // 3 minutes
44
- const DEFAULT_POLL_INTERVAL_MS = 30_000;
45
- const MAX_RETRIES = 2;
46
- export class SessionWatchdog extends EventEmitter {
47
- config;
48
- sessionManager;
49
- state;
50
- interval = null;
51
- escalationState = new Map();
52
- interventionHistory = [];
53
- enabled = true;
54
- running = false;
55
- stuckThresholdMs;
56
- pollIntervalMs;
57
- constructor(config, sessionManager, state) {
58
- super();
59
- this.config = config;
60
- this.sessionManager = sessionManager;
61
- this.state = state;
62
- const wdConfig = config.monitoring.watchdog;
63
- this.stuckThresholdMs = (wdConfig?.stuckCommandSec ?? 180) * 1000;
64
- this.pollIntervalMs = wdConfig?.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS;
65
- }
66
- start() {
67
- if (this.interval)
68
- return;
69
- console.log(`[Watchdog] Starting (poll: ${this.pollIntervalMs / 1000}s, threshold: ${this.stuckThresholdMs / 1000}s)`);
70
- this.interval = setInterval(() => this.poll(), this.pollIntervalMs);
71
- setTimeout(() => this.poll(), 5000);
72
- }
73
- stop() {
74
- if (this.interval) {
75
- clearInterval(this.interval);
76
- this.interval = null;
77
- }
78
- }
79
- setEnabled(enabled) {
80
- this.enabled = enabled;
81
- if (!enabled) {
82
- this.escalationState.clear();
83
- }
84
- }
85
- isEnabled() {
86
- return this.enabled;
87
- }
88
- isManaging(sessionName) {
89
- const s = this.escalationState.get(sessionName);
90
- return s !== undefined && s.level > EscalationLevel.Monitoring;
91
- }
92
- getStatus() {
93
- const runningSessions = this.sessionManager.listRunningSessions();
94
- const sessions = runningSessions.map(s => ({
95
- name: s.tmuxSession,
96
- escalation: this.escalationState.get(s.tmuxSession) ?? null,
97
- }));
98
- return {
99
- enabled: this.enabled,
100
- sessions,
101
- interventionHistory: this.interventionHistory.slice(-20),
102
- };
103
- }
104
- // --- Core polling ---
105
- async poll() {
106
- if (!this.enabled || this.running)
107
- return;
108
- this.running = true;
109
- try {
110
- const sessions = this.sessionManager.listRunningSessions();
111
- for (const session of sessions) {
112
- try {
113
- this.checkSession(session.tmuxSession);
114
- }
115
- catch (err) {
116
- console.error(`[Watchdog] Error checking "${session.tmuxSession}":`, err);
117
- }
118
- }
119
- }
120
- finally {
121
- this.running = false;
122
- }
123
- }
124
- checkSession(tmuxSession) {
125
- const existing = this.escalationState.get(tmuxSession);
126
- if (existing && existing.level > EscalationLevel.Monitoring) {
127
- this.handleEscalation(tmuxSession, existing);
128
- return;
129
- }
130
- // Find Claude PID in the tmux session
131
- const claudePid = this.getClaudePid(tmuxSession);
132
- if (!claudePid)
133
- return;
134
- const children = this.getChildProcesses(claudePid);
135
- const stuckChild = children.find(c => !this.isExcluded(c.command) && c.elapsedMs > this.stuckThresholdMs);
136
- if (stuckChild) {
137
- const state = {
138
- level: EscalationLevel.CtrlC,
139
- levelEnteredAt: Date.now(),
140
- stuckChildPid: stuckChild.pid,
141
- stuckCommand: stuckChild.command,
142
- retryCount: existing?.retryCount ?? 0,
143
- };
144
- this.escalationState.set(tmuxSession, state);
145
- console.log(`[Watchdog] "${tmuxSession}": stuck command (${Math.round(stuckChild.elapsedMs / 1000)}s): ` +
146
- `${stuckChild.command.slice(0, 80)} — sending Ctrl+C`);
147
- this.sessionManager.sendKey(tmuxSession, 'C-c');
148
- this.recordIntervention(tmuxSession, EscalationLevel.CtrlC, 'Sent Ctrl+C', stuckChild);
149
- }
150
- else if (existing) {
151
- this.escalationState.delete(tmuxSession);
152
- }
153
- }
154
- handleEscalation(tmuxSession, state) {
155
- const now = Date.now();
156
- if (!this.isProcessAlive(state.stuckChildPid)) {
157
- console.log(`[Watchdog] "${tmuxSession}": stuck process ${state.stuckChildPid} died — recovered`);
158
- this.emit('recovery', tmuxSession, state.level);
159
- this.escalationState.delete(tmuxSession);
160
- return;
161
- }
162
- const timeInLevel = now - state.levelEnteredAt;
163
- const nextLevel = state.level + 1;
164
- if (nextLevel > EscalationLevel.KillSession) {
165
- if (state.retryCount >= MAX_RETRIES) {
166
- console.log(`[Watchdog] "${tmuxSession}": max retries reached — giving up`);
167
- this.escalationState.delete(tmuxSession);
168
- return;
169
- }
170
- state.level = EscalationLevel.CtrlC;
171
- state.levelEnteredAt = now;
172
- state.retryCount++;
173
- this.sessionManager.sendKey(tmuxSession, 'C-c');
174
- this.recordIntervention(tmuxSession, EscalationLevel.CtrlC, `Retry ${state.retryCount}: Sent Ctrl+C`, {
175
- pid: state.stuckChildPid, command: state.stuckCommand, elapsedMs: 0,
176
- });
177
- return;
178
- }
179
- const delayForNext = ESCALATION_DELAYS[nextLevel] ?? 15_000;
180
- if (timeInLevel < delayForNext)
181
- return;
182
- state.level = nextLevel;
183
- state.levelEnteredAt = now;
184
- const child = { pid: state.stuckChildPid, command: state.stuckCommand, elapsedMs: 0 };
185
- switch (state.level) {
186
- case EscalationLevel.SigTerm:
187
- console.log(`[Watchdog] "${tmuxSession}": sending SIGTERM to ${state.stuckChildPid}`);
188
- this.sendSignal(state.stuckChildPid, 'SIGTERM');
189
- this.recordIntervention(tmuxSession, EscalationLevel.SigTerm, `SIGTERM ${state.stuckChildPid}`, child);
190
- break;
191
- case EscalationLevel.SigKill:
192
- console.log(`[Watchdog] "${tmuxSession}": sending SIGKILL to ${state.stuckChildPid}`);
193
- this.sendSignal(state.stuckChildPid, 'SIGKILL');
194
- this.recordIntervention(tmuxSession, EscalationLevel.SigKill, `SIGKILL ${state.stuckChildPid}`, child);
195
- break;
196
- case EscalationLevel.KillSession:
197
- console.log(`[Watchdog] "${tmuxSession}": killing tmux session`);
198
- this.killTmuxSession(tmuxSession);
199
- this.recordIntervention(tmuxSession, EscalationLevel.KillSession, 'Killed tmux session', child);
200
- this.escalationState.delete(tmuxSession);
201
- break;
202
- }
203
- }
204
- // --- Process utilities (self-contained, no shared module) ---
205
- getClaudePid(tmuxSession) {
206
- try {
207
- // Get pane PID
208
- const panePidStr = execSync(`${this.config.sessions.tmuxPath} list-panes -t "=${tmuxSession}" -F "#{pane_pid}" 2>/dev/null`, { encoding: 'utf-8', timeout: 5000 }).trim().split('\n')[0];
209
- if (!panePidStr)
210
- return null;
211
- const panePid = parseInt(panePidStr, 10);
212
- if (isNaN(panePid))
213
- return null;
214
- // Find claude child
215
- const claudePidStr = execSync(`pgrep -P ${panePid} -f claude 2>/dev/null | head -1`, { encoding: 'utf-8', timeout: 5000 }).trim();
216
- if (!claudePidStr)
217
- return null;
218
- const pid = parseInt(claudePidStr, 10);
219
- return isNaN(pid) ? null : pid;
220
- }
221
- catch {
222
- return null;
223
- }
224
- }
225
- getChildProcesses(pid) {
226
- try {
227
- const childPidsStr = execSync(`pgrep -P ${pid} 2>/dev/null`, { encoding: 'utf-8', timeout: 5000 }).trim();
228
- if (!childPidsStr)
229
- return [];
230
- const childPids = childPidsStr.split('\n').filter(Boolean).join(',');
231
- if (!childPids)
232
- return [];
233
- const output = execSync(`ps -o pid=,etime=,command= -p ${childPids} 2>/dev/null`, { encoding: 'utf-8', timeout: 5000 }).trim();
234
- if (!output)
235
- return [];
236
- const results = [];
237
- for (const line of output.split('\n')) {
238
- const match = line.trim().match(/^(\d+)\s+([\d:.-]+)\s+(.+)$/);
239
- if (!match)
240
- continue;
241
- const childPid = parseInt(match[1], 10);
242
- if (isNaN(childPid))
243
- continue;
244
- results.push({
245
- pid: childPid,
246
- command: match[3],
247
- elapsedMs: this.parseElapsed(match[2]),
248
- });
249
- }
250
- return results;
251
- }
252
- catch {
253
- return [];
254
- }
255
- }
256
- isExcluded(command) {
257
- for (const pattern of EXCLUDED_PATTERNS) {
258
- if (command.includes(pattern))
259
- return true;
260
- }
261
- for (const prefix of EXCLUDED_PREFIXES) {
262
- if (command.startsWith(prefix))
263
- return true;
264
- }
265
- return false;
266
- }
267
- parseElapsed(elapsed) {
268
- let days = 0;
269
- let timePart = elapsed;
270
- if (elapsed.includes('-')) {
271
- const [d, t] = elapsed.split('-');
272
- days = parseInt(d, 10);
273
- timePart = t;
274
- }
275
- const parts = timePart.split(':').map(Number);
276
- let seconds = 0;
277
- if (parts.length === 3)
278
- seconds = parts[0] * 3600 + parts[1] * 60 + parts[2];
279
- else if (parts.length === 2)
280
- seconds = parts[0] * 60 + parts[1];
281
- else
282
- seconds = parts[0];
283
- return (days * 86400 + seconds) * 1000;
284
- }
285
- sendSignal(pid, signal) {
286
- try {
287
- process.kill(pid, signal);
288
- }
289
- catch (err) {
290
- if (err.code !== 'ESRCH') {
291
- console.error(`[Watchdog] Failed to send ${signal} to ${pid}:`, err);
292
- }
293
- }
294
- }
295
- isProcessAlive(pid) {
296
- try {
297
- process.kill(pid, 0);
298
- return true;
299
- }
300
- catch {
301
- return false;
302
- }
303
- }
304
- killTmuxSession(tmuxSession) {
305
- try {
306
- execSync(`${this.config.sessions.tmuxPath} kill-session -t "=${tmuxSession}" 2>/dev/null`, { timeout: 5000, stdio: 'ignore' });
307
- }
308
- catch { }
309
- }
310
- recordIntervention(sessionName, level, action, child) {
311
- const event = {
312
- sessionName,
313
- level,
314
- action,
315
- stuckCommand: child.command.slice(0, 200),
316
- stuckPid: child.pid,
317
- timestamp: Date.now(),
318
- };
319
- this.interventionHistory.push(event);
320
- if (this.interventionHistory.length > 50) {
321
- this.interventionHistory = this.interventionHistory.slice(-50);
322
- }
323
- this.emit('intervention', event);
324
- }
325
- }
326
- //# sourceMappingURL=SessionWatchdog.js.map