mstro-app 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/PRIVACY.md +126 -0
  2. package/README.md +24 -23
  3. package/bin/commands/login.js +79 -49
  4. package/bin/mstro.js +240 -37
  5. package/dist/server/cli/headless/claude-invoker.d.ts.map +1 -1
  6. package/dist/server/cli/headless/claude-invoker.js +133 -27
  7. package/dist/server/cli/headless/claude-invoker.js.map +1 -1
  8. package/dist/server/cli/headless/runner.d.ts.map +1 -1
  9. package/dist/server/cli/headless/runner.js +23 -0
  10. package/dist/server/cli/headless/runner.js.map +1 -1
  11. package/dist/server/cli/headless/stall-assessor.d.ts +3 -1
  12. package/dist/server/cli/headless/stall-assessor.d.ts.map +1 -1
  13. package/dist/server/cli/headless/stall-assessor.js +20 -1
  14. package/dist/server/cli/headless/stall-assessor.js.map +1 -1
  15. package/dist/server/cli/headless/tool-watchdog.d.ts +4 -1
  16. package/dist/server/cli/headless/tool-watchdog.d.ts.map +1 -1
  17. package/dist/server/cli/headless/tool-watchdog.js +30 -24
  18. package/dist/server/cli/headless/tool-watchdog.js.map +1 -1
  19. package/dist/server/cli/headless/types.d.ts +19 -1
  20. package/dist/server/cli/headless/types.d.ts.map +1 -1
  21. package/dist/server/cli/improvisation-session-manager.d.ts +28 -1
  22. package/dist/server/cli/improvisation-session-manager.d.ts.map +1 -1
  23. package/dist/server/cli/improvisation-session-manager.js +221 -29
  24. package/dist/server/cli/improvisation-session-manager.js.map +1 -1
  25. package/dist/server/index.js +0 -3
  26. package/dist/server/index.js.map +1 -1
  27. package/dist/server/services/analytics.d.ts.map +1 -1
  28. package/dist/server/services/analytics.js +13 -1
  29. package/dist/server/services/analytics.js.map +1 -1
  30. package/dist/server/services/platform.d.ts.map +1 -1
  31. package/dist/server/services/platform.js +13 -1
  32. package/dist/server/services/platform.js.map +1 -1
  33. package/dist/server/services/terminal/pty-manager.d.ts +2 -0
  34. package/dist/server/services/terminal/pty-manager.d.ts.map +1 -1
  35. package/dist/server/services/terminal/pty-manager.js +50 -3
  36. package/dist/server/services/terminal/pty-manager.js.map +1 -1
  37. package/dist/server/services/websocket/file-explorer-handlers.d.ts +5 -0
  38. package/dist/server/services/websocket/file-explorer-handlers.d.ts.map +1 -0
  39. package/dist/server/services/websocket/file-explorer-handlers.js +518 -0
  40. package/dist/server/services/websocket/file-explorer-handlers.js.map +1 -0
  41. package/dist/server/services/websocket/git-handlers.d.ts +36 -0
  42. package/dist/server/services/websocket/git-handlers.d.ts.map +1 -0
  43. package/dist/server/services/websocket/git-handlers.js +797 -0
  44. package/dist/server/services/websocket/git-handlers.js.map +1 -0
  45. package/dist/server/services/websocket/git-pr-handlers.d.ts +4 -0
  46. package/dist/server/services/websocket/git-pr-handlers.d.ts.map +1 -0
  47. package/dist/server/services/websocket/git-pr-handlers.js +299 -0
  48. package/dist/server/services/websocket/git-pr-handlers.js.map +1 -0
  49. package/dist/server/services/websocket/git-worktree-handlers.d.ts +4 -0
  50. package/dist/server/services/websocket/git-worktree-handlers.d.ts.map +1 -0
  51. package/dist/server/services/websocket/git-worktree-handlers.js +353 -0
  52. package/dist/server/services/websocket/git-worktree-handlers.js.map +1 -0
  53. package/dist/server/services/websocket/handler-context.d.ts +32 -0
  54. package/dist/server/services/websocket/handler-context.d.ts.map +1 -0
  55. package/dist/server/services/websocket/handler-context.js +4 -0
  56. package/dist/server/services/websocket/handler-context.js.map +1 -0
  57. package/dist/server/services/websocket/handler.d.ts +27 -359
  58. package/dist/server/services/websocket/handler.d.ts.map +1 -1
  59. package/dist/server/services/websocket/handler.js +67 -2328
  60. package/dist/server/services/websocket/handler.js.map +1 -1
  61. package/dist/server/services/websocket/index.d.ts +1 -1
  62. package/dist/server/services/websocket/index.d.ts.map +1 -1
  63. package/dist/server/services/websocket/index.js.map +1 -1
  64. package/dist/server/services/websocket/session-handlers.d.ts +10 -0
  65. package/dist/server/services/websocket/session-handlers.d.ts.map +1 -0
  66. package/dist/server/services/websocket/session-handlers.js +507 -0
  67. package/dist/server/services/websocket/session-handlers.js.map +1 -0
  68. package/dist/server/services/websocket/settings-handlers.d.ts +6 -0
  69. package/dist/server/services/websocket/settings-handlers.d.ts.map +1 -0
  70. package/dist/server/services/websocket/settings-handlers.js +125 -0
  71. package/dist/server/services/websocket/settings-handlers.js.map +1 -0
  72. package/dist/server/services/websocket/tab-handlers.d.ts +10 -0
  73. package/dist/server/services/websocket/tab-handlers.d.ts.map +1 -0
  74. package/dist/server/services/websocket/tab-handlers.js +131 -0
  75. package/dist/server/services/websocket/tab-handlers.js.map +1 -0
  76. package/dist/server/services/websocket/terminal-handlers.d.ts +9 -0
  77. package/dist/server/services/websocket/terminal-handlers.d.ts.map +1 -0
  78. package/dist/server/services/websocket/terminal-handlers.js +220 -0
  79. package/dist/server/services/websocket/terminal-handlers.js.map +1 -0
  80. package/dist/server/services/websocket/types.d.ts +63 -2
  81. package/dist/server/services/websocket/types.d.ts.map +1 -1
  82. package/package.json +4 -2
  83. package/server/README.md +176 -159
  84. package/server/cli/headless/claude-invoker.ts +155 -31
  85. package/server/cli/headless/output-utils.test.ts +225 -0
  86. package/server/cli/headless/runner.ts +25 -0
  87. package/server/cli/headless/stall-assessor.test.ts +165 -0
  88. package/server/cli/headless/stall-assessor.ts +25 -0
  89. package/server/cli/headless/tool-watchdog.test.ts +429 -0
  90. package/server/cli/headless/tool-watchdog.ts +33 -25
  91. package/server/cli/headless/types.ts +10 -1
  92. package/server/cli/improvisation-session-manager.ts +277 -30
  93. package/server/index.ts +0 -4
  94. package/server/mcp/README.md +59 -67
  95. package/server/mcp/bouncer-integration.test.ts +161 -0
  96. package/server/mcp/security-patterns.test.ts +258 -0
  97. package/server/services/analytics.ts +13 -1
  98. package/server/services/platform.ts +12 -1
  99. package/server/services/terminal/pty-manager.ts +53 -3
  100. package/server/services/websocket/autocomplete.test.ts +194 -0
  101. package/server/services/websocket/file-explorer-handlers.ts +587 -0
  102. package/server/services/websocket/git-handlers.ts +924 -0
  103. package/server/services/websocket/git-pr-handlers.ts +363 -0
  104. package/server/services/websocket/git-worktree-handlers.ts +403 -0
  105. package/server/services/websocket/handler-context.ts +44 -0
  106. package/server/services/websocket/handler.test.ts +1 -1
  107. package/server/services/websocket/handler.ts +83 -2678
  108. package/server/services/websocket/index.ts +1 -1
  109. package/server/services/websocket/session-handlers.ts +574 -0
  110. package/server/services/websocket/settings-handlers.ts +150 -0
  111. package/server/services/websocket/tab-handlers.ts +150 -0
  112. package/server/services/websocket/terminal-handlers.ts +277 -0
  113. package/server/services/websocket/types.ts +135 -0
  114. package/bin/release.sh +0 -110
@@ -0,0 +1,165 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import type { StallContext } from './stall-assessor.js';
3
+
4
+ // quickHeuristic, parseAssessmentResponse, and parseVerdictResponse are not exported.
5
+ // We test them via assessStall (which calls quickHeuristic first) and by testing
6
+ // the parsing functions indirectly. Since quickHeuristic is the critical logic
7
+ // and assessStall calls it before Haiku, we can test the heuristic paths by
8
+ // providing contexts that match known patterns.
9
+ //
10
+ // To avoid spawning Haiku (which requires `claude` CLI), we only test contexts
11
+ // that trigger the heuristic fast-path (return non-null from quickHeuristic).
12
+
13
+ import { assessStall } from './stall-assessor.js';
14
+
15
+ function makeContext(overrides: Partial<StallContext> = {}): StallContext {
16
+ return {
17
+ originalPrompt: 'Fix the bug in auth.ts',
18
+ silenceMs: 120_000,
19
+ pendingToolCount: 0,
20
+ totalToolCalls: 5,
21
+ elapsedTotalMs: 300_000,
22
+ ...overrides,
23
+ };
24
+ }
25
+
26
+ describe('assessStall - quickHeuristic paths', () => {
27
+ it('extends when tokens are still flowing (tokenSilenceMs < 60s)', async () => {
28
+ const ctx = makeContext({ tokenSilenceMs: 30_000 });
29
+ const verdict = await assessStall(ctx, 'claude', false, false);
30
+ expect(verdict.action).toBe('extend');
31
+ expect(verdict.extensionMs).toBe(10 * 60_000);
32
+ expect(verdict.reason).toContain('Tokens still flowing');
33
+ });
34
+
35
+ it('extends when tokenSilenceMs is 0', async () => {
36
+ const ctx = makeContext({ tokenSilenceMs: 0 });
37
+ const verdict = await assessStall(ctx, 'claude', false, false);
38
+ expect(verdict.action).toBe('extend');
39
+ expect(verdict.reason).toContain('Tokens still flowing');
40
+ });
41
+
42
+ it('does not use token heuristic when tokenSilenceMs >= 60s', async () => {
43
+ const ctx = makeContext({
44
+ tokenSilenceMs: 60_000,
45
+ pendingToolCount: 3, // will trigger parallel tools heuristic
46
+ });
47
+ const verdict = await assessStall(ctx, 'claude', false, false);
48
+ // Should NOT hit the token heuristic, should hit the 3+ parallel tools one
49
+ expect(verdict.action).toBe('extend');
50
+ expect(verdict.reason).toContain('parallel tool calls');
51
+ });
52
+
53
+ it('defers to watchdog when active and tools are pending', async () => {
54
+ const ctx = makeContext({ pendingToolCount: 1, lastToolName: 'Bash' });
55
+ const verdict = await assessStall(ctx, 'claude', false, true);
56
+ expect(verdict.action).toBe('extend');
57
+ expect(verdict.extensionMs).toBe(15 * 60_000);
58
+ expect(verdict.reason).toContain('Watchdog active');
59
+ });
60
+
61
+ it('defers to watchdog and lists pending tool names', async () => {
62
+ const ctx = makeContext({
63
+ pendingToolCount: 2,
64
+ pendingToolNames: new Set(['WebFetch', 'Bash']),
65
+ });
66
+ const verdict = await assessStall(ctx, 'claude', false, true);
67
+ expect(verdict.action).toBe('extend');
68
+ expect(verdict.reason).toContain('WebFetch');
69
+ expect(verdict.reason).toContain('Bash');
70
+ });
71
+
72
+ it('extends for Task subagent via pendingToolNames', async () => {
73
+ const ctx = makeContext({
74
+ pendingToolCount: 1,
75
+ pendingToolNames: new Set(['Task']),
76
+ });
77
+ const verdict = await assessStall(ctx, 'claude', false, false);
78
+ expect(verdict.action).toBe('extend');
79
+ expect(verdict.reason).toContain('Task subagent');
80
+ });
81
+
82
+ it('extends for Task subagent via lastToolName fallback', async () => {
83
+ const ctx = makeContext({
84
+ pendingToolCount: 1,
85
+ lastToolName: 'Task',
86
+ });
87
+ const verdict = await assessStall(ctx, 'claude', false, false);
88
+ expect(verdict.action).toBe('extend');
89
+ expect(verdict.reason).toContain('Task subagent');
90
+ });
91
+
92
+ it('scales Task extension with pending count', async () => {
93
+ const ctx1 = makeContext({
94
+ pendingToolCount: 1,
95
+ pendingToolNames: new Set(['Task']),
96
+ });
97
+ const ctx3 = makeContext({
98
+ pendingToolCount: 3,
99
+ pendingToolNames: new Set(['Task']),
100
+ });
101
+ const v1 = await assessStall(ctx1, 'claude', false, false);
102
+ const v3 = await assessStall(ctx3, 'claude', false, false);
103
+ // More pending = more extension, capped at 30 min
104
+ expect(v3.extensionMs).toBeGreaterThanOrEqual(v1.extensionMs);
105
+ expect(v3.extensionMs).toBeLessThanOrEqual(30 * 60_000);
106
+ });
107
+
108
+ it('extends for 3+ parallel tool calls', async () => {
109
+ const ctx = makeContext({ pendingToolCount: 3 });
110
+ const verdict = await assessStall(ctx, 'claude', false, false);
111
+ expect(verdict.action).toBe('extend');
112
+ expect(verdict.extensionMs).toBe(15 * 60_000);
113
+ expect(verdict.reason).toContain('parallel tool calls');
114
+ });
115
+
116
+ it('extends for 5 parallel tool calls', async () => {
117
+ const ctx = makeContext({ pendingToolCount: 5 });
118
+ const verdict = await assessStall(ctx, 'claude', false, false);
119
+ expect(verdict.action).toBe('extend');
120
+ expect(verdict.reason).toContain('5 parallel tool calls');
121
+ });
122
+
123
+ it('extends for WebSearch without watchdog', async () => {
124
+ const ctx = makeContext({ lastToolName: 'WebSearch', pendingToolCount: 1 });
125
+ // pendingToolCount < 3, not Task, not watchdog active, but WebSearch
126
+ const verdict = await assessStall(ctx, 'claude', false, false);
127
+ expect(verdict.action).toBe('extend');
128
+ expect(verdict.extensionMs).toBe(5 * 60_000);
129
+ expect(verdict.reason).toContain('WebSearch');
130
+ });
131
+
132
+ it('extends for WebFetch without watchdog', async () => {
133
+ const ctx = makeContext({ lastToolName: 'WebFetch', pendingToolCount: 1 });
134
+ const verdict = await assessStall(ctx, 'claude', false, false);
135
+ expect(verdict.action).toBe('extend');
136
+ expect(verdict.extensionMs).toBe(5 * 60_000);
137
+ expect(verdict.reason).toContain('WebFetch');
138
+ });
139
+
140
+ it('does NOT extend for WebSearch when watchdog is active', async () => {
141
+ // When watchdog is active and tools are pending, the watchdog deferral
142
+ // takes priority over the WebSearch heuristic
143
+ const ctx = makeContext({
144
+ lastToolName: 'WebSearch',
145
+ pendingToolCount: 1,
146
+ });
147
+ const verdict = await assessStall(ctx, 'claude', false, true);
148
+ // Should defer to watchdog, not WebSearch heuristic
149
+ expect(verdict.action).toBe('extend');
150
+ expect(verdict.reason).toContain('Watchdog active');
151
+ });
152
+
153
+ it('falls back to extend when Haiku assessment fails', async () => {
154
+ // Context that doesn't match any heuristic → triggers Haiku →
155
+ // Haiku fails (no `claude` binary) → cautious extend
156
+ const ctx = makeContext({
157
+ pendingToolCount: 1,
158
+ lastToolName: 'Edit',
159
+ });
160
+ const verdict = await assessStall(ctx, 'nonexistent-claude-binary', false, false);
161
+ expect(verdict.action).toBe('extend');
162
+ expect(verdict.extensionMs).toBe(10 * 60_000);
163
+ expect(verdict.reason).toContain('unavailable');
164
+ });
165
+ });
@@ -35,6 +35,8 @@ export interface StallContext {
35
35
  totalToolCalls: number;
36
36
  /** Total wall-clock time since process started (ms) */
37
37
  elapsedTotalMs: number;
38
+ /** Time since the last token usage event (ms). Undefined if no token events yet. */
39
+ tokenSilenceMs?: number;
38
40
  }
39
41
 
40
42
  export interface StallVerdict {
@@ -57,6 +59,17 @@ function quickHeuristic(ctx: StallContext, toolWatchdogActive = false): StallVer
57
59
  const pendingNames = ctx.pendingToolNames ?? new Set<string>();
58
60
  const hasPendingTools = ctx.pendingToolCount > 0;
59
61
 
62
+ // Tokens still flowing = process is alive and actively processing.
63
+ // Extend generously when token activity is recent (< 60s), regardless
64
+ // of stdout silence. This covers silent thinking and tool result processing.
65
+ if (ctx.tokenSilenceMs !== undefined && ctx.tokenSilenceMs < 60_000) {
66
+ return {
67
+ action: 'extend',
68
+ extensionMs: 10 * 60_000,
69
+ reason: `Tokens still flowing (last activity ${Math.round(ctx.tokenSilenceMs / 1000)}s ago) — process is alive`,
70
+ };
71
+ }
72
+
60
73
  // When the watchdog is active and tools are pending, always defer.
61
74
  // The watchdog manages per-tool timeouts; the stall detector should only
62
75
  // fire when no tools are running and there's genuine silence.
@@ -156,6 +169,7 @@ export async function assessToolTimeout(
156
169
  elapsedMs: number,
157
170
  claudeCommand: string,
158
171
  verbose: boolean,
172
+ tokenSilenceMs?: number,
159
173
  ): Promise<StallVerdict> {
160
174
  const elapsedSec = Math.round(elapsedMs / 1000);
161
175
 
@@ -181,13 +195,19 @@ export async function assessToolTimeout(
181
195
  };
182
196
  const toolDesc = toolDescriptions[toolName] || `executes the ${toolName} tool`;
183
197
 
198
+ const tokenLine = tokenSilenceMs !== undefined
199
+ ? `Token activity: last token event ${Math.round(tokenSilenceMs / 1000)}s ago (recent tokens = process is alive and processing)`
200
+ : 'Token activity: no token events observed';
201
+
184
202
  const prompt = [
185
203
  `You are a process health monitor. A ${toolName} tool call has been running for ${elapsedSec}s.`,
186
204
  `${toolName} ${toolDesc}.`,
187
205
  `Tool input: ${inputSummary}`,
206
+ tokenLine,
188
207
  '',
189
208
  `Is this tool call likely still working, or is it hung/frozen?`,
190
209
  'Consider: network latency, server response times, anti-bot protections, large page sizes, complex operations.',
210
+ 'IMPORTANT: If tokens were active recently (< 60s ago), the process is likely still alive and processing — strongly favor WORKING.',
191
211
  '',
192
212
  'Respond in EXACTLY this format (3 lines, no extra text):',
193
213
  'VERDICT: WORKING or STALLED',
@@ -305,6 +325,10 @@ function buildAssessmentPrompt(ctx: StallContext): string {
305
325
  ? `${ctx.originalPrompt.slice(0, 500)}...`
306
326
  : ctx.originalPrompt;
307
327
 
328
+ const tokenLine = ctx.tokenSilenceMs !== undefined
329
+ ? `Token activity: last token event ${Math.round(ctx.tokenSilenceMs / 1000)}s ago (tokens flowing = process alive)`
330
+ : 'Token activity: no token events observed';
331
+
308
332
  return [
309
333
  'You are a process health monitor. A Claude Code subprocess has been silent (no stdout) and you must determine if it is working or stalled.',
310
334
  '',
@@ -314,6 +338,7 @@ function buildAssessmentPrompt(ctx: StallContext): string {
314
338
  ctx.lastToolInputSummary ? `Last tool input: ${ctx.lastToolInputSummary}` : '',
315
339
  `Pending tool calls: ${ctx.pendingToolCount}`,
316
340
  `Total tool calls this session: ${ctx.totalToolCalls}`,
341
+ tokenLine,
317
342
  `Task being executed: ${promptPreview}`,
318
343
  '',
319
344
  'Respond in EXACTLY this format (3 lines, no extra text):',
@@ -0,0 +1,429 @@
1
+ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
2
+ import { DEFAULT_TOOL_TIMEOUT_PROFILES, ToolWatchdog } from './tool-watchdog.js';
3
+
4
+ describe('ToolWatchdog', () => {
5
+ beforeEach(() => {
6
+ vi.useFakeTimers();
7
+ });
8
+
9
+ afterEach(() => {
10
+ vi.useRealTimers();
11
+ });
12
+
13
+ // ========== getProfile ==========
14
+
15
+ describe('getProfile', () => {
16
+ it('returns specific profile for known tools', () => {
17
+ const watchdog = new ToolWatchdog();
18
+ const webFetch = watchdog.getProfile('WebFetch');
19
+ expect(webFetch.coldStartMs).toBe(180_000);
20
+ expect(webFetch.floorMs).toBe(120_000);
21
+ expect(webFetch.ceilingMs).toBe(300_000);
22
+ expect(webFetch.useAdaptive).toBe(true);
23
+ expect(webFetch.useHaikuTiebreaker).toBe(true);
24
+ });
25
+
26
+ it('returns Task profile with long timeouts', () => {
27
+ const watchdog = new ToolWatchdog();
28
+ const task = watchdog.getProfile('Task');
29
+ expect(task.coldStartMs).toBe(900_000);
30
+ expect(task.floorMs).toBe(600_000);
31
+ expect(task.ceilingMs).toBe(2_700_000);
32
+ });
33
+
34
+ it('returns default profile for unknown tools', () => {
35
+ const watchdog = new ToolWatchdog();
36
+ const unknown = watchdog.getProfile('SomeNewTool');
37
+ expect(unknown.coldStartMs).toBe(300_000);
38
+ expect(unknown.floorMs).toBe(120_000);
39
+ expect(unknown.ceilingMs).toBe(600_000);
40
+ expect(unknown.useAdaptive).toBe(false);
41
+ });
42
+
43
+ it('merges custom profiles with defaults', () => {
44
+ const watchdog = new ToolWatchdog({
45
+ profiles: {
46
+ WebFetch: { coldStartMs: 60_000 },
47
+ },
48
+ });
49
+ const profile = watchdog.getProfile('WebFetch');
50
+ expect(profile.coldStartMs).toBe(60_000);
51
+ // Other fields should come from default WebFetch profile
52
+ expect(profile.floorMs).toBe(DEFAULT_TOOL_TIMEOUT_PROFILES.WebFetch.floorMs);
53
+ expect(profile.useAdaptive).toBe(true);
54
+ });
55
+
56
+ it('allows custom profiles for new tool names', () => {
57
+ const watchdog = new ToolWatchdog({
58
+ profiles: {
59
+ CustomTool: { coldStartMs: 10_000, floorMs: 5_000, ceilingMs: 30_000 },
60
+ },
61
+ });
62
+ const profile = watchdog.getProfile('CustomTool');
63
+ expect(profile.coldStartMs).toBe(10_000);
64
+ expect(profile.floorMs).toBe(5_000);
65
+ expect(profile.ceilingMs).toBe(30_000);
66
+ });
67
+ });
68
+
69
+ // ========== getTimeout ==========
70
+
71
+ describe('getTimeout', () => {
72
+ it('returns coldStart for non-adaptive tools', () => {
73
+ const watchdog = new ToolWatchdog();
74
+ // Bash is non-adaptive
75
+ expect(watchdog.getTimeout('Bash')).toBe(300_000);
76
+ });
77
+
78
+ it('returns coldStart when no samples recorded', () => {
79
+ const watchdog = new ToolWatchdog();
80
+ expect(watchdog.getTimeout('WebFetch')).toBe(180_000);
81
+ });
82
+
83
+ it('returns adaptive timeout after recording samples', () => {
84
+ const watchdog = new ToolWatchdog();
85
+ // Record a 10s completion for WebFetch
86
+ watchdog.recordCompletion('WebFetch', 10_000);
87
+
88
+ const timeout = watchdog.getTimeout('WebFetch');
89
+ // First sample: est = 10000, dev = 5000, timeout = 10000 + 4*5000 = 30000
90
+ // But floor is 120000, so should be clamped to floor
91
+ expect(timeout).toBe(120_000);
92
+ });
93
+
94
+ it('respects floor clamping', () => {
95
+ const watchdog = new ToolWatchdog();
96
+ // Record very fast completions
97
+ watchdog.recordCompletion('WebFetch', 100);
98
+ watchdog.recordCompletion('WebFetch', 100);
99
+ watchdog.recordCompletion('WebFetch', 100);
100
+
101
+ // Adaptive calculation would be very low, but floor prevents it
102
+ expect(watchdog.getTimeout('WebFetch')).toBe(DEFAULT_TOOL_TIMEOUT_PROFILES.WebFetch.floorMs);
103
+ });
104
+
105
+ it('respects ceiling clamping', () => {
106
+ const watchdog = new ToolWatchdog();
107
+ // Record very slow completions
108
+ watchdog.recordCompletion('WebSearch', 500_000);
109
+
110
+ const timeout = watchdog.getTimeout('WebSearch');
111
+ // Should not exceed ceiling
112
+ expect(timeout).toBeLessThanOrEqual(DEFAULT_TOOL_TIMEOUT_PROFILES.WebSearch.ceilingMs);
113
+ });
114
+
115
+ it('does not record completions for non-adaptive tools', () => {
116
+ const watchdog = new ToolWatchdog();
117
+ // Bash is non-adaptive (Read too)
118
+ watchdog.recordCompletion('Bash', 5_000);
119
+ // Should still return coldStart
120
+ expect(watchdog.getTimeout('Bash')).toBe(300_000);
121
+ });
122
+ });
123
+
124
+ // ========== recordCompletion ==========
125
+
126
+ describe('recordCompletion', () => {
127
+ it('initializes tracker on first sample', () => {
128
+ const watchdog = new ToolWatchdog();
129
+ watchdog.recordCompletion('WebFetch', 20_000);
130
+
131
+ // After first sample: timeout should differ from cold start if above floor
132
+ const timeout = watchdog.getTimeout('WebFetch');
133
+ // est=20000, dev=10000, adaptive=20000+4*10000=60000, floor=120000 → 120000
134
+ expect(timeout).toBe(120_000);
135
+ });
136
+
137
+ it('updates EMA on subsequent samples', () => {
138
+ const watchdog = new ToolWatchdog();
139
+ // First sample
140
+ watchdog.recordCompletion('Glob', 10_000);
141
+ const timeout1 = watchdog.getTimeout('Glob');
142
+
143
+ // Second sample - much longer
144
+ watchdog.recordCompletion('Glob', 50_000);
145
+ const timeout2 = watchdog.getTimeout('Glob');
146
+
147
+ // Timeout should increase after longer sample
148
+ expect(timeout2).toBeGreaterThanOrEqual(timeout1);
149
+ });
150
+
151
+ it('converges toward actual duration over many samples', () => {
152
+ const watchdog = new ToolWatchdog();
153
+ // Record many similar samples for Glob (adaptive, floor=30000, ceiling=180000)
154
+ for (let i = 0; i < 20; i++) {
155
+ watchdog.recordCompletion('Glob', 45_000);
156
+ }
157
+ const timeout = watchdog.getTimeout('Glob');
158
+ // Should converge near 45000, with deviation near 0
159
+ // adaptive ≈ 45000 + 4*~0 ≈ 45000, but floor is 30000, so should be ~45000
160
+ expect(timeout).toBeGreaterThanOrEqual(30_000);
161
+ expect(timeout).toBeLessThanOrEqual(60_000);
162
+ });
163
+ });
164
+
165
+ // ========== startWatch / clearWatch ==========
166
+
167
+ describe('startWatch / clearWatch', () => {
168
+ it('calls timeout callback when timer expires', async () => {
169
+ const watchdog = new ToolWatchdog();
170
+ const onTimeout = vi.fn();
171
+
172
+ watchdog.startWatch('tool-1', 'WebFetch', { url: 'http://example.com' }, onTimeout);
173
+
174
+ // Advance past WebFetch cold start (180s) — async because internal handler is async
175
+ await vi.advanceTimersByTimeAsync(180_001);
176
+
177
+ // onTimeout should fire (no tiebreaker configured)
178
+ expect(onTimeout).toHaveBeenCalledOnce();
179
+ });
180
+
181
+ it('does not call timeout if cleared before expiry', async () => {
182
+ const watchdog = new ToolWatchdog();
183
+ const onTimeout = vi.fn();
184
+
185
+ watchdog.startWatch('tool-1', 'WebFetch', {}, onTimeout);
186
+ watchdog.clearWatch('tool-1');
187
+
188
+ await vi.advanceTimersByTimeAsync(300_000);
189
+ expect(onTimeout).not.toHaveBeenCalled();
190
+ });
191
+
192
+ it('replaces existing watch for same ID', async () => {
193
+ const watchdog = new ToolWatchdog();
194
+ const onTimeout1 = vi.fn();
195
+ const onTimeout2 = vi.fn();
196
+
197
+ watchdog.startWatch('tool-1', 'WebFetch', {}, onTimeout1);
198
+ watchdog.startWatch('tool-1', 'WebSearch', {}, onTimeout2);
199
+
200
+ // Advance past WebSearch cold start (90s)
201
+ await vi.advanceTimersByTimeAsync(90_001);
202
+ expect(onTimeout2).toHaveBeenCalledOnce();
203
+ expect(onTimeout1).not.toHaveBeenCalled();
204
+ });
205
+
206
+ it('tracks multiple watches independently', async () => {
207
+ const watchdog = new ToolWatchdog();
208
+ const onTimeout1 = vi.fn();
209
+ const onTimeout2 = vi.fn();
210
+
211
+ watchdog.startWatch('tool-1', 'WebSearch', {}, onTimeout1); // 90s
212
+ watchdog.startWatch('tool-2', 'WebFetch', {}, onTimeout2); // 180s
213
+
214
+ await vi.advanceTimersByTimeAsync(90_001);
215
+ expect(onTimeout1).toHaveBeenCalledOnce();
216
+ expect(onTimeout2).not.toHaveBeenCalled();
217
+
218
+ await vi.advanceTimersByTimeAsync(90_000);
219
+ expect(onTimeout2).toHaveBeenCalledOnce();
220
+ });
221
+ });
222
+
223
+ // ========== clearAll ==========
224
+
225
+ describe('clearAll', () => {
226
+ it('clears all active watches', () => {
227
+ const watchdog = new ToolWatchdog();
228
+ const onTimeout1 = vi.fn();
229
+ const onTimeout2 = vi.fn();
230
+
231
+ watchdog.startWatch('tool-1', 'WebFetch', {}, onTimeout1);
232
+ watchdog.startWatch('tool-2', 'WebSearch', {}, onTimeout2);
233
+ watchdog.clearAll();
234
+
235
+ vi.advanceTimersByTime(300_000);
236
+ expect(onTimeout1).not.toHaveBeenCalled();
237
+ expect(onTimeout2).not.toHaveBeenCalled();
238
+ });
239
+
240
+ it('clears active watches map', () => {
241
+ const watchdog = new ToolWatchdog();
242
+ watchdog.startWatch('tool-1', 'WebFetch', {}, vi.fn());
243
+ watchdog.startWatch('tool-2', 'WebSearch', {}, vi.fn());
244
+
245
+ watchdog.clearAll();
246
+ expect(watchdog.getActiveWatches().size).toBe(0);
247
+ });
248
+ });
249
+
250
+ // ========== getActiveWatch / getActiveWatches ==========
251
+
252
+ describe('getActiveWatch', () => {
253
+ it('returns watch for active tool', () => {
254
+ const watchdog = new ToolWatchdog();
255
+ watchdog.startWatch('tool-1', 'WebFetch', { url: 'http://test.com' }, vi.fn());
256
+
257
+ const watch = watchdog.getActiveWatch('tool-1');
258
+ expect(watch).toBeDefined();
259
+ expect(watch!.toolName).toBe('WebFetch');
260
+ expect(watch!.toolInput).toEqual({ url: 'http://test.com' });
261
+ });
262
+
263
+ it('returns undefined for cleared watch', () => {
264
+ const watchdog = new ToolWatchdog();
265
+ watchdog.startWatch('tool-1', 'WebFetch', {}, vi.fn());
266
+ watchdog.clearWatch('tool-1');
267
+
268
+ expect(watchdog.getActiveWatch('tool-1')).toBeUndefined();
269
+ });
270
+
271
+ it('returns undefined for unknown ID', () => {
272
+ const watchdog = new ToolWatchdog();
273
+ expect(watchdog.getActiveWatch('nonexistent')).toBeUndefined();
274
+ });
275
+ });
276
+
277
+ // ========== buildCheckpoint ==========
278
+
279
+ describe('buildCheckpoint', () => {
280
+ it('returns null when hung tool ID not found', () => {
281
+ const watchdog = new ToolWatchdog();
282
+ const checkpoint = watchdog.buildCheckpoint(
283
+ 'test prompt', '', '', [], 'missing-id', undefined, Date.now()
284
+ );
285
+ expect(checkpoint).toBeNull();
286
+ });
287
+
288
+ it('builds checkpoint with correct tool separation', () => {
289
+ const watchdog = new ToolWatchdog();
290
+ vi.setSystemTime(new Date('2025-01-01T00:00:00Z'));
291
+ const processStartTime = Date.now();
292
+
293
+ watchdog.startWatch('hung-tool', 'WebFetch', { url: 'http://slow.com' }, vi.fn());
294
+
295
+ const accumulatedTools = [
296
+ { toolId: 'tool-1', toolName: 'Read', toolInput: { path: 'a.ts' }, result: 'content', isError: false, duration: 100 },
297
+ { toolId: 'tool-2', toolName: 'Grep', toolInput: { pattern: 'foo' }, result: undefined, isError: false },
298
+ { toolId: 'hung-tool', toolName: 'WebFetch', toolInput: { url: 'http://slow.com' }, result: undefined, isError: false },
299
+ ];
300
+
301
+ const checkpoint = watchdog.buildCheckpoint(
302
+ 'find and fix',
303
+ 'assistant response text',
304
+ 'thinking about it',
305
+ accumulatedTools,
306
+ 'hung-tool',
307
+ 'session-123',
308
+ processStartTime,
309
+ );
310
+
311
+ expect(checkpoint).not.toBeNull();
312
+ expect(checkpoint!.originalPrompt).toBe('find and fix');
313
+ expect(checkpoint!.assistantText).toBe('assistant response text');
314
+ expect(checkpoint!.thinkingText).toBe('thinking about it');
315
+ expect(checkpoint!.claudeSessionId).toBe('session-123');
316
+
317
+ // Completed tools: only tool-1 (has result and is not hung)
318
+ expect(checkpoint!.completedTools).toHaveLength(1);
319
+ expect(checkpoint!.completedTools[0].toolId).toBe('tool-1');
320
+
321
+ // In-progress tools: tool-2 (no result, not hung)
322
+ expect(checkpoint!.inProgressTools).toHaveLength(1);
323
+ expect(checkpoint!.inProgressTools[0].toolId).toBe('tool-2');
324
+
325
+ // Hung tool
326
+ expect(checkpoint!.hungTool.toolName).toBe('WebFetch');
327
+ expect(checkpoint!.hungTool.toolId).toBe('hung-tool');
328
+ expect(checkpoint!.hungTool.url).toBe('http://slow.com');
329
+ });
330
+
331
+ it('extracts URL from tool input for WebFetch', () => {
332
+ const watchdog = new ToolWatchdog();
333
+ watchdog.startWatch('t1', 'WebFetch', { url: 'http://example.com' }, vi.fn());
334
+
335
+ const tools = [
336
+ { toolId: 't1', toolName: 'WebFetch', toolInput: { url: 'http://example.com' }, result: undefined, isError: false },
337
+ ];
338
+
339
+ const cp = watchdog.buildCheckpoint('prompt', '', '', tools, 't1', undefined, Date.now());
340
+ expect(cp!.hungTool.url).toBe('http://example.com');
341
+ });
342
+
343
+ it('extracts query from tool input for WebSearch', () => {
344
+ const watchdog = new ToolWatchdog();
345
+ watchdog.startWatch('t1', 'WebSearch', { query: 'test search' }, vi.fn());
346
+
347
+ const tools = [
348
+ { toolId: 't1', toolName: 'WebSearch', toolInput: { query: 'test search' }, result: undefined, isError: false },
349
+ ];
350
+
351
+ const cp = watchdog.buildCheckpoint('prompt', '', '', tools, 't1', undefined, Date.now());
352
+ expect(cp!.hungTool.url).toBe('test search');
353
+ });
354
+ });
355
+
356
+ // ========== tiebreaker integration ==========
357
+
358
+ describe('tiebreaker', () => {
359
+ it('extends when tiebreaker returns extend', async () => {
360
+ const onTiebreaker = vi.fn().mockResolvedValue({
361
+ action: 'extend',
362
+ extensionMs: 60_000,
363
+ reason: 'still working',
364
+ });
365
+ const watchdog = new ToolWatchdog({ onTiebreaker });
366
+ const onTimeout = vi.fn();
367
+
368
+ // Use a tool with useHaikuTiebreaker=true and short timeout
369
+ watchdog.startWatch('t1', 'WebFetch', {}, onTimeout);
370
+
371
+ // Advance to trigger timeout
372
+ await vi.advanceTimersByTimeAsync(180_001);
373
+
374
+ // Tiebreaker should have been called
375
+ expect(onTiebreaker).toHaveBeenCalledOnce();
376
+ // onTimeout should NOT have fired (tiebreaker extended)
377
+ expect(onTimeout).not.toHaveBeenCalled();
378
+
379
+ // Now advance past extension
380
+ await vi.advanceTimersByTimeAsync(60_001);
381
+ // Should fire after extension
382
+ expect(onTimeout).toHaveBeenCalledOnce();
383
+ });
384
+
385
+ it('kills when tiebreaker returns kill', async () => {
386
+ const onTiebreaker = vi.fn().mockResolvedValue({
387
+ action: 'kill',
388
+ extensionMs: 0,
389
+ reason: 'process is hung',
390
+ });
391
+ const watchdog = new ToolWatchdog({ onTiebreaker });
392
+ const onTimeout = vi.fn();
393
+
394
+ watchdog.startWatch('t1', 'WebFetch', {}, onTimeout);
395
+
396
+ await vi.advanceTimersByTimeAsync(180_001);
397
+
398
+ expect(onTiebreaker).toHaveBeenCalledOnce();
399
+ expect(onTimeout).toHaveBeenCalledOnce();
400
+ });
401
+
402
+ it('kills when tiebreaker throws', async () => {
403
+ const onTiebreaker = vi.fn().mockRejectedValue(new Error('haiku failed'));
404
+ const watchdog = new ToolWatchdog({ onTiebreaker });
405
+ const onTimeout = vi.fn();
406
+
407
+ watchdog.startWatch('t1', 'WebFetch', {}, onTimeout);
408
+
409
+ await vi.advanceTimersByTimeAsync(180_001);
410
+
411
+ expect(onTiebreaker).toHaveBeenCalledOnce();
412
+ expect(onTimeout).toHaveBeenCalledOnce();
413
+ });
414
+
415
+ it('does not attempt tiebreaker for tools with useHaikuTiebreaker=false', async () => {
416
+ const onTiebreaker = vi.fn();
417
+ const watchdog = new ToolWatchdog({ onTiebreaker });
418
+ const onTimeout = vi.fn();
419
+
420
+ // WebSearch has useHaikuTiebreaker: false
421
+ watchdog.startWatch('t1', 'WebSearch', {}, onTimeout);
422
+
423
+ await vi.advanceTimersByTimeAsync(90_001);
424
+
425
+ expect(onTiebreaker).not.toHaveBeenCalled();
426
+ expect(onTimeout).toHaveBeenCalledOnce();
427
+ });
428
+ });
429
+ });