mstro-app 0.1.58 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. package/PRIVACY.md +126 -0
  2. package/README.md +24 -23
  3. package/bin/commands/login.js +85 -42
  4. package/bin/commands/logout.js +35 -1
  5. package/bin/commands/status.js +1 -1
  6. package/bin/mstro.js +231 -131
  7. package/dist/server/cli/headless/claude-invoker.d.ts.map +1 -1
  8. package/dist/server/cli/headless/claude-invoker.js +550 -115
  9. package/dist/server/cli/headless/claude-invoker.js.map +1 -1
  10. package/dist/server/cli/headless/index.d.ts +2 -1
  11. package/dist/server/cli/headless/index.d.ts.map +1 -1
  12. package/dist/server/cli/headless/index.js +2 -0
  13. package/dist/server/cli/headless/index.js.map +1 -1
  14. package/dist/server/cli/headless/prompt-utils.d.ts +5 -8
  15. package/dist/server/cli/headless/prompt-utils.d.ts.map +1 -1
  16. package/dist/server/cli/headless/prompt-utils.js +40 -5
  17. package/dist/server/cli/headless/prompt-utils.js.map +1 -1
  18. package/dist/server/cli/headless/runner.d.ts +1 -1
  19. package/dist/server/cli/headless/runner.d.ts.map +1 -1
  20. package/dist/server/cli/headless/runner.js +52 -7
  21. package/dist/server/cli/headless/runner.js.map +1 -1
  22. package/dist/server/cli/headless/stall-assessor.d.ts +79 -1
  23. package/dist/server/cli/headless/stall-assessor.d.ts.map +1 -1
  24. package/dist/server/cli/headless/stall-assessor.js +355 -20
  25. package/dist/server/cli/headless/stall-assessor.js.map +1 -1
  26. package/dist/server/cli/headless/tool-watchdog.d.ts +70 -0
  27. package/dist/server/cli/headless/tool-watchdog.d.ts.map +1 -0
  28. package/dist/server/cli/headless/tool-watchdog.js +302 -0
  29. package/dist/server/cli/headless/tool-watchdog.js.map +1 -0
  30. package/dist/server/cli/headless/types.d.ts +98 -1
  31. package/dist/server/cli/headless/types.d.ts.map +1 -1
  32. package/dist/server/cli/improvisation-session-manager.d.ts +136 -2
  33. package/dist/server/cli/improvisation-session-manager.d.ts.map +1 -1
  34. package/dist/server/cli/improvisation-session-manager.js +929 -132
  35. package/dist/server/cli/improvisation-session-manager.js.map +1 -1
  36. package/dist/server/index.js +5 -13
  37. package/dist/server/index.js.map +1 -1
  38. package/dist/server/mcp/bouncer-integration.d.ts.map +1 -1
  39. package/dist/server/mcp/bouncer-integration.js +18 -0
  40. package/dist/server/mcp/bouncer-integration.js.map +1 -1
  41. package/dist/server/mcp/security-audit.d.ts +2 -2
  42. package/dist/server/mcp/security-audit.d.ts.map +1 -1
  43. package/dist/server/mcp/security-audit.js +12 -8
  44. package/dist/server/mcp/security-audit.js.map +1 -1
  45. package/dist/server/mcp/security-patterns.d.ts.map +1 -1
  46. package/dist/server/mcp/security-patterns.js +9 -4
  47. package/dist/server/mcp/security-patterns.js.map +1 -1
  48. package/dist/server/routes/improvise.js +6 -6
  49. package/dist/server/routes/improvise.js.map +1 -1
  50. package/dist/server/services/analytics.d.ts +2 -0
  51. package/dist/server/services/analytics.d.ts.map +1 -1
  52. package/dist/server/services/analytics.js +26 -4
  53. package/dist/server/services/analytics.js.map +1 -1
  54. package/dist/server/services/platform.d.ts.map +1 -1
  55. package/dist/server/services/platform.js +17 -10
  56. package/dist/server/services/platform.js.map +1 -1
  57. package/dist/server/services/sandbox-utils.d.ts +6 -0
  58. package/dist/server/services/sandbox-utils.d.ts.map +1 -0
  59. package/dist/server/services/sandbox-utils.js +72 -0
  60. package/dist/server/services/sandbox-utils.js.map +1 -0
  61. package/dist/server/services/settings.d.ts +6 -0
  62. package/dist/server/services/settings.d.ts.map +1 -1
  63. package/dist/server/services/settings.js +21 -0
  64. package/dist/server/services/settings.js.map +1 -1
  65. package/dist/server/services/terminal/pty-manager.d.ts +5 -51
  66. package/dist/server/services/terminal/pty-manager.d.ts.map +1 -1
  67. package/dist/server/services/terminal/pty-manager.js +63 -102
  68. package/dist/server/services/terminal/pty-manager.js.map +1 -1
  69. package/dist/server/services/websocket/file-explorer-handlers.d.ts +5 -0
  70. package/dist/server/services/websocket/file-explorer-handlers.d.ts.map +1 -0
  71. package/dist/server/services/websocket/file-explorer-handlers.js +518 -0
  72. package/dist/server/services/websocket/file-explorer-handlers.js.map +1 -0
  73. package/dist/server/services/websocket/git-handlers.d.ts +36 -0
  74. package/dist/server/services/websocket/git-handlers.d.ts.map +1 -0
  75. package/dist/server/services/websocket/git-handlers.js +797 -0
  76. package/dist/server/services/websocket/git-handlers.js.map +1 -0
  77. package/dist/server/services/websocket/git-pr-handlers.d.ts +4 -0
  78. package/dist/server/services/websocket/git-pr-handlers.d.ts.map +1 -0
  79. package/dist/server/services/websocket/git-pr-handlers.js +299 -0
  80. package/dist/server/services/websocket/git-pr-handlers.js.map +1 -0
  81. package/dist/server/services/websocket/git-worktree-handlers.d.ts +4 -0
  82. package/dist/server/services/websocket/git-worktree-handlers.d.ts.map +1 -0
  83. package/dist/server/services/websocket/git-worktree-handlers.js +353 -0
  84. package/dist/server/services/websocket/git-worktree-handlers.js.map +1 -0
  85. package/dist/server/services/websocket/handler-context.d.ts +32 -0
  86. package/dist/server/services/websocket/handler-context.d.ts.map +1 -0
  87. package/dist/server/services/websocket/handler-context.js +4 -0
  88. package/dist/server/services/websocket/handler-context.js.map +1 -0
  89. package/dist/server/services/websocket/handler.d.ts +27 -338
  90. package/dist/server/services/websocket/handler.d.ts.map +1 -1
  91. package/dist/server/services/websocket/handler.js +74 -2106
  92. package/dist/server/services/websocket/handler.js.map +1 -1
  93. package/dist/server/services/websocket/index.d.ts +1 -1
  94. package/dist/server/services/websocket/index.d.ts.map +1 -1
  95. package/dist/server/services/websocket/index.js.map +1 -1
  96. package/dist/server/services/websocket/session-handlers.d.ts +10 -0
  97. package/dist/server/services/websocket/session-handlers.d.ts.map +1 -0
  98. package/dist/server/services/websocket/session-handlers.js +507 -0
  99. package/dist/server/services/websocket/session-handlers.js.map +1 -0
  100. package/dist/server/services/websocket/settings-handlers.d.ts +6 -0
  101. package/dist/server/services/websocket/settings-handlers.d.ts.map +1 -0
  102. package/dist/server/services/websocket/settings-handlers.js +125 -0
  103. package/dist/server/services/websocket/settings-handlers.js.map +1 -0
  104. package/dist/server/services/websocket/tab-handlers.d.ts +10 -0
  105. package/dist/server/services/websocket/tab-handlers.d.ts.map +1 -0
  106. package/dist/server/services/websocket/tab-handlers.js +131 -0
  107. package/dist/server/services/websocket/tab-handlers.js.map +1 -0
  108. package/dist/server/services/websocket/terminal-handlers.d.ts +9 -0
  109. package/dist/server/services/websocket/terminal-handlers.d.ts.map +1 -0
  110. package/dist/server/services/websocket/terminal-handlers.js +220 -0
  111. package/dist/server/services/websocket/terminal-handlers.js.map +1 -0
  112. package/dist/server/services/websocket/types.d.ts +67 -2
  113. package/dist/server/services/websocket/types.d.ts.map +1 -1
  114. package/hooks/bouncer.sh +11 -4
  115. package/package.json +7 -2
  116. package/server/README.md +176 -159
  117. package/server/cli/headless/claude-invoker.ts +740 -133
  118. package/server/cli/headless/index.ts +7 -1
  119. package/server/cli/headless/output-utils.test.ts +225 -0
  120. package/server/cli/headless/prompt-utils.ts +37 -5
  121. package/server/cli/headless/runner.ts +55 -8
  122. package/server/cli/headless/stall-assessor.test.ts +165 -0
  123. package/server/cli/headless/stall-assessor.ts +478 -22
  124. package/server/cli/headless/tool-watchdog.test.ts +429 -0
  125. package/server/cli/headless/tool-watchdog.ts +398 -0
  126. package/server/cli/headless/types.ts +93 -1
  127. package/server/cli/improvisation-session-manager.ts +1133 -145
  128. package/server/index.ts +5 -14
  129. package/server/mcp/README.md +59 -67
  130. package/server/mcp/bouncer-integration.test.ts +161 -0
  131. package/server/mcp/bouncer-integration.ts +28 -0
  132. package/server/mcp/security-audit.ts +12 -8
  133. package/server/mcp/security-patterns.test.ts +258 -0
  134. package/server/mcp/security-patterns.ts +8 -2
  135. package/server/routes/improvise.ts +6 -6
  136. package/server/services/analytics.ts +26 -4
  137. package/server/services/platform.test.ts +0 -10
  138. package/server/services/platform.ts +16 -11
  139. package/server/services/sandbox-utils.ts +78 -0
  140. package/server/services/settings.ts +25 -0
  141. package/server/services/terminal/pty-manager.ts +68 -129
  142. package/server/services/websocket/autocomplete.test.ts +194 -0
  143. package/server/services/websocket/file-explorer-handlers.ts +587 -0
  144. package/server/services/websocket/git-handlers.ts +924 -0
  145. package/server/services/websocket/git-pr-handlers.ts +363 -0
  146. package/server/services/websocket/git-worktree-handlers.ts +403 -0
  147. package/server/services/websocket/handler-context.ts +44 -0
  148. package/server/services/websocket/handler.test.ts +1 -1
  149. package/server/services/websocket/handler.ts +90 -2421
  150. package/server/services/websocket/index.ts +1 -1
  151. package/server/services/websocket/session-handlers.ts +574 -0
  152. package/server/services/websocket/settings-handlers.ts +150 -0
  153. package/server/services/websocket/tab-handlers.ts +150 -0
  154. package/server/services/websocket/terminal-handlers.ts +277 -0
  155. package/server/services/websocket/types.ts +145 -4
  156. package/bin/release.sh +0 -110
  157. package/dist/server/services/terminal/tmux-manager.d.ts +0 -82
  158. package/dist/server/services/terminal/tmux-manager.d.ts.map +0 -1
  159. package/dist/server/services/terminal/tmux-manager.js +0 -352
  160. package/dist/server/services/terminal/tmux-manager.js.map +0 -1
  161. package/server/services/terminal/tmux-manager.ts +0 -426
@@ -0,0 +1,429 @@
1
+ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
2
+ import { DEFAULT_TOOL_TIMEOUT_PROFILES, ToolWatchdog } from './tool-watchdog.js';
3
+
4
+ describe('ToolWatchdog', () => {
5
+ beforeEach(() => {
6
+ vi.useFakeTimers();
7
+ });
8
+
9
+ afterEach(() => {
10
+ vi.useRealTimers();
11
+ });
12
+
13
+ // ========== getProfile ==========
14
+
15
+ describe('getProfile', () => {
16
+ it('returns specific profile for known tools', () => {
17
+ const watchdog = new ToolWatchdog();
18
+ const webFetch = watchdog.getProfile('WebFetch');
19
+ expect(webFetch.coldStartMs).toBe(180_000);
20
+ expect(webFetch.floorMs).toBe(120_000);
21
+ expect(webFetch.ceilingMs).toBe(300_000);
22
+ expect(webFetch.useAdaptive).toBe(true);
23
+ expect(webFetch.useHaikuTiebreaker).toBe(true);
24
+ });
25
+
26
+ it('returns Task profile with long timeouts', () => {
27
+ const watchdog = new ToolWatchdog();
28
+ const task = watchdog.getProfile('Task');
29
+ expect(task.coldStartMs).toBe(900_000);
30
+ expect(task.floorMs).toBe(600_000);
31
+ expect(task.ceilingMs).toBe(2_700_000);
32
+ });
33
+
34
+ it('returns default profile for unknown tools', () => {
35
+ const watchdog = new ToolWatchdog();
36
+ const unknown = watchdog.getProfile('SomeNewTool');
37
+ expect(unknown.coldStartMs).toBe(300_000);
38
+ expect(unknown.floorMs).toBe(120_000);
39
+ expect(unknown.ceilingMs).toBe(600_000);
40
+ expect(unknown.useAdaptive).toBe(false);
41
+ });
42
+
43
+ it('merges custom profiles with defaults', () => {
44
+ const watchdog = new ToolWatchdog({
45
+ profiles: {
46
+ WebFetch: { coldStartMs: 60_000 },
47
+ },
48
+ });
49
+ const profile = watchdog.getProfile('WebFetch');
50
+ expect(profile.coldStartMs).toBe(60_000);
51
+ // Other fields should come from default WebFetch profile
52
+ expect(profile.floorMs).toBe(DEFAULT_TOOL_TIMEOUT_PROFILES.WebFetch.floorMs);
53
+ expect(profile.useAdaptive).toBe(true);
54
+ });
55
+
56
+ it('allows custom profiles for new tool names', () => {
57
+ const watchdog = new ToolWatchdog({
58
+ profiles: {
59
+ CustomTool: { coldStartMs: 10_000, floorMs: 5_000, ceilingMs: 30_000 },
60
+ },
61
+ });
62
+ const profile = watchdog.getProfile('CustomTool');
63
+ expect(profile.coldStartMs).toBe(10_000);
64
+ expect(profile.floorMs).toBe(5_000);
65
+ expect(profile.ceilingMs).toBe(30_000);
66
+ });
67
+ });
68
+
69
+ // ========== getTimeout ==========
70
+
71
+ describe('getTimeout', () => {
72
+ it('returns coldStart for non-adaptive tools', () => {
73
+ const watchdog = new ToolWatchdog();
74
+ // Bash is non-adaptive
75
+ expect(watchdog.getTimeout('Bash')).toBe(300_000);
76
+ });
77
+
78
+ it('returns coldStart when no samples recorded', () => {
79
+ const watchdog = new ToolWatchdog();
80
+ expect(watchdog.getTimeout('WebFetch')).toBe(180_000);
81
+ });
82
+
83
+ it('returns adaptive timeout after recording samples', () => {
84
+ const watchdog = new ToolWatchdog();
85
+ // Record a 10s completion for WebFetch
86
+ watchdog.recordCompletion('WebFetch', 10_000);
87
+
88
+ const timeout = watchdog.getTimeout('WebFetch');
89
+ // First sample: est = 10000, dev = 5000, timeout = 10000 + 4*5000 = 30000
90
+ // But floor is 120000, so should be clamped to floor
91
+ expect(timeout).toBe(120_000);
92
+ });
93
+
94
+ it('respects floor clamping', () => {
95
+ const watchdog = new ToolWatchdog();
96
+ // Record very fast completions
97
+ watchdog.recordCompletion('WebFetch', 100);
98
+ watchdog.recordCompletion('WebFetch', 100);
99
+ watchdog.recordCompletion('WebFetch', 100);
100
+
101
+ // Adaptive calculation would be very low, but floor prevents it
102
+ expect(watchdog.getTimeout('WebFetch')).toBe(DEFAULT_TOOL_TIMEOUT_PROFILES.WebFetch.floorMs);
103
+ });
104
+
105
+ it('respects ceiling clamping', () => {
106
+ const watchdog = new ToolWatchdog();
107
+ // Record very slow completions
108
+ watchdog.recordCompletion('WebSearch', 500_000);
109
+
110
+ const timeout = watchdog.getTimeout('WebSearch');
111
+ // Should not exceed ceiling
112
+ expect(timeout).toBeLessThanOrEqual(DEFAULT_TOOL_TIMEOUT_PROFILES.WebSearch.ceilingMs);
113
+ });
114
+
115
+ it('does not record completions for non-adaptive tools', () => {
116
+ const watchdog = new ToolWatchdog();
117
+ // Bash is non-adaptive (Read too)
118
+ watchdog.recordCompletion('Bash', 5_000);
119
+ // Should still return coldStart
120
+ expect(watchdog.getTimeout('Bash')).toBe(300_000);
121
+ });
122
+ });
123
+
124
+ // ========== recordCompletion ==========
125
+
126
+ describe('recordCompletion', () => {
127
+ it('initializes tracker on first sample', () => {
128
+ const watchdog = new ToolWatchdog();
129
+ watchdog.recordCompletion('WebFetch', 20_000);
130
+
131
+ // After first sample: timeout should differ from cold start if above floor
132
+ const timeout = watchdog.getTimeout('WebFetch');
133
+ // est=20000, dev=10000, adaptive=20000+4*10000=60000, floor=120000 → 120000
134
+ expect(timeout).toBe(120_000);
135
+ });
136
+
137
+ it('updates EMA on subsequent samples', () => {
138
+ const watchdog = new ToolWatchdog();
139
+ // First sample
140
+ watchdog.recordCompletion('Glob', 10_000);
141
+ const timeout1 = watchdog.getTimeout('Glob');
142
+
143
+ // Second sample - much longer
144
+ watchdog.recordCompletion('Glob', 50_000);
145
+ const timeout2 = watchdog.getTimeout('Glob');
146
+
147
+ // Timeout should increase after longer sample
148
+ expect(timeout2).toBeGreaterThanOrEqual(timeout1);
149
+ });
150
+
151
+ it('converges toward actual duration over many samples', () => {
152
+ const watchdog = new ToolWatchdog();
153
+ // Record many similar samples for Glob (adaptive, floor=30000, ceiling=180000)
154
+ for (let i = 0; i < 20; i++) {
155
+ watchdog.recordCompletion('Glob', 45_000);
156
+ }
157
+ const timeout = watchdog.getTimeout('Glob');
158
+ // Should converge near 45000, with deviation near 0
159
+ // adaptive ≈ 45000 + 4*~0 ≈ 45000, but floor is 30000, so should be ~45000
160
+ expect(timeout).toBeGreaterThanOrEqual(30_000);
161
+ expect(timeout).toBeLessThanOrEqual(60_000);
162
+ });
163
+ });
164
+
165
+ // ========== startWatch / clearWatch ==========
166
+
167
+ describe('startWatch / clearWatch', () => {
168
+ it('calls timeout callback when timer expires', async () => {
169
+ const watchdog = new ToolWatchdog();
170
+ const onTimeout = vi.fn();
171
+
172
+ watchdog.startWatch('tool-1', 'WebFetch', { url: 'http://example.com' }, onTimeout);
173
+
174
+ // Advance past WebFetch cold start (180s) — async because internal handler is async
175
+ await vi.advanceTimersByTimeAsync(180_001);
176
+
177
+ // onTimeout should fire (no tiebreaker configured)
178
+ expect(onTimeout).toHaveBeenCalledOnce();
179
+ });
180
+
181
+ it('does not call timeout if cleared before expiry', async () => {
182
+ const watchdog = new ToolWatchdog();
183
+ const onTimeout = vi.fn();
184
+
185
+ watchdog.startWatch('tool-1', 'WebFetch', {}, onTimeout);
186
+ watchdog.clearWatch('tool-1');
187
+
188
+ await vi.advanceTimersByTimeAsync(300_000);
189
+ expect(onTimeout).not.toHaveBeenCalled();
190
+ });
191
+
192
+ it('replaces existing watch for same ID', async () => {
193
+ const watchdog = new ToolWatchdog();
194
+ const onTimeout1 = vi.fn();
195
+ const onTimeout2 = vi.fn();
196
+
197
+ watchdog.startWatch('tool-1', 'WebFetch', {}, onTimeout1);
198
+ watchdog.startWatch('tool-1', 'WebSearch', {}, onTimeout2);
199
+
200
+ // Advance past WebSearch cold start (90s)
201
+ await vi.advanceTimersByTimeAsync(90_001);
202
+ expect(onTimeout2).toHaveBeenCalledOnce();
203
+ expect(onTimeout1).not.toHaveBeenCalled();
204
+ });
205
+
206
+ it('tracks multiple watches independently', async () => {
207
+ const watchdog = new ToolWatchdog();
208
+ const onTimeout1 = vi.fn();
209
+ const onTimeout2 = vi.fn();
210
+
211
+ watchdog.startWatch('tool-1', 'WebSearch', {}, onTimeout1); // 90s
212
+ watchdog.startWatch('tool-2', 'WebFetch', {}, onTimeout2); // 180s
213
+
214
+ await vi.advanceTimersByTimeAsync(90_001);
215
+ expect(onTimeout1).toHaveBeenCalledOnce();
216
+ expect(onTimeout2).not.toHaveBeenCalled();
217
+
218
+ await vi.advanceTimersByTimeAsync(90_000);
219
+ expect(onTimeout2).toHaveBeenCalledOnce();
220
+ });
221
+ });
222
+
223
+ // ========== clearAll ==========
224
+
225
+ describe('clearAll', () => {
226
+ it('clears all active watches', () => {
227
+ const watchdog = new ToolWatchdog();
228
+ const onTimeout1 = vi.fn();
229
+ const onTimeout2 = vi.fn();
230
+
231
+ watchdog.startWatch('tool-1', 'WebFetch', {}, onTimeout1);
232
+ watchdog.startWatch('tool-2', 'WebSearch', {}, onTimeout2);
233
+ watchdog.clearAll();
234
+
235
+ vi.advanceTimersByTime(300_000);
236
+ expect(onTimeout1).not.toHaveBeenCalled();
237
+ expect(onTimeout2).not.toHaveBeenCalled();
238
+ });
239
+
240
+ it('clears active watches map', () => {
241
+ const watchdog = new ToolWatchdog();
242
+ watchdog.startWatch('tool-1', 'WebFetch', {}, vi.fn());
243
+ watchdog.startWatch('tool-2', 'WebSearch', {}, vi.fn());
244
+
245
+ watchdog.clearAll();
246
+ expect(watchdog.getActiveWatches().size).toBe(0);
247
+ });
248
+ });
249
+
250
+ // ========== getActiveWatch / getActiveWatches ==========
251
+
252
+ describe('getActiveWatch', () => {
253
+ it('returns watch for active tool', () => {
254
+ const watchdog = new ToolWatchdog();
255
+ watchdog.startWatch('tool-1', 'WebFetch', { url: 'http://test.com' }, vi.fn());
256
+
257
+ const watch = watchdog.getActiveWatch('tool-1');
258
+ expect(watch).toBeDefined();
259
+ expect(watch!.toolName).toBe('WebFetch');
260
+ expect(watch!.toolInput).toEqual({ url: 'http://test.com' });
261
+ });
262
+
263
+ it('returns undefined for cleared watch', () => {
264
+ const watchdog = new ToolWatchdog();
265
+ watchdog.startWatch('tool-1', 'WebFetch', {}, vi.fn());
266
+ watchdog.clearWatch('tool-1');
267
+
268
+ expect(watchdog.getActiveWatch('tool-1')).toBeUndefined();
269
+ });
270
+
271
+ it('returns undefined for unknown ID', () => {
272
+ const watchdog = new ToolWatchdog();
273
+ expect(watchdog.getActiveWatch('nonexistent')).toBeUndefined();
274
+ });
275
+ });
276
+
277
+ // ========== buildCheckpoint ==========
278
+
279
+ describe('buildCheckpoint', () => {
280
+ it('returns null when hung tool ID not found', () => {
281
+ const watchdog = new ToolWatchdog();
282
+ const checkpoint = watchdog.buildCheckpoint(
283
+ 'test prompt', '', '', [], 'missing-id', undefined, Date.now()
284
+ );
285
+ expect(checkpoint).toBeNull();
286
+ });
287
+
288
+ it('builds checkpoint with correct tool separation', () => {
289
+ const watchdog = new ToolWatchdog();
290
+ vi.setSystemTime(new Date('2025-01-01T00:00:00Z'));
291
+ const processStartTime = Date.now();
292
+
293
+ watchdog.startWatch('hung-tool', 'WebFetch', { url: 'http://slow.com' }, vi.fn());
294
+
295
+ const accumulatedTools = [
296
+ { toolId: 'tool-1', toolName: 'Read', toolInput: { path: 'a.ts' }, result: 'content', isError: false, duration: 100 },
297
+ { toolId: 'tool-2', toolName: 'Grep', toolInput: { pattern: 'foo' }, result: undefined, isError: false },
298
+ { toolId: 'hung-tool', toolName: 'WebFetch', toolInput: { url: 'http://slow.com' }, result: undefined, isError: false },
299
+ ];
300
+
301
+ const checkpoint = watchdog.buildCheckpoint(
302
+ 'find and fix',
303
+ 'assistant response text',
304
+ 'thinking about it',
305
+ accumulatedTools,
306
+ 'hung-tool',
307
+ 'session-123',
308
+ processStartTime,
309
+ );
310
+
311
+ expect(checkpoint).not.toBeNull();
312
+ expect(checkpoint!.originalPrompt).toBe('find and fix');
313
+ expect(checkpoint!.assistantText).toBe('assistant response text');
314
+ expect(checkpoint!.thinkingText).toBe('thinking about it');
315
+ expect(checkpoint!.claudeSessionId).toBe('session-123');
316
+
317
+ // Completed tools: only tool-1 (has result and is not hung)
318
+ expect(checkpoint!.completedTools).toHaveLength(1);
319
+ expect(checkpoint!.completedTools[0].toolId).toBe('tool-1');
320
+
321
+ // In-progress tools: tool-2 (no result, not hung)
322
+ expect(checkpoint!.inProgressTools).toHaveLength(1);
323
+ expect(checkpoint!.inProgressTools[0].toolId).toBe('tool-2');
324
+
325
+ // Hung tool
326
+ expect(checkpoint!.hungTool.toolName).toBe('WebFetch');
327
+ expect(checkpoint!.hungTool.toolId).toBe('hung-tool');
328
+ expect(checkpoint!.hungTool.url).toBe('http://slow.com');
329
+ });
330
+
331
+ it('extracts URL from tool input for WebFetch', () => {
332
+ const watchdog = new ToolWatchdog();
333
+ watchdog.startWatch('t1', 'WebFetch', { url: 'http://example.com' }, vi.fn());
334
+
335
+ const tools = [
336
+ { toolId: 't1', toolName: 'WebFetch', toolInput: { url: 'http://example.com' }, result: undefined, isError: false },
337
+ ];
338
+
339
+ const cp = watchdog.buildCheckpoint('prompt', '', '', tools, 't1', undefined, Date.now());
340
+ expect(cp!.hungTool.url).toBe('http://example.com');
341
+ });
342
+
343
+ it('extracts query from tool input for WebSearch', () => {
344
+ const watchdog = new ToolWatchdog();
345
+ watchdog.startWatch('t1', 'WebSearch', { query: 'test search' }, vi.fn());
346
+
347
+ const tools = [
348
+ { toolId: 't1', toolName: 'WebSearch', toolInput: { query: 'test search' }, result: undefined, isError: false },
349
+ ];
350
+
351
+ const cp = watchdog.buildCheckpoint('prompt', '', '', tools, 't1', undefined, Date.now());
352
+ expect(cp!.hungTool.url).toBe('test search');
353
+ });
354
+ });
355
+
356
+ // ========== tiebreaker integration ==========
357
+
358
+ describe('tiebreaker', () => {
359
+ it('extends when tiebreaker returns extend', async () => {
360
+ const onTiebreaker = vi.fn().mockResolvedValue({
361
+ action: 'extend',
362
+ extensionMs: 60_000,
363
+ reason: 'still working',
364
+ });
365
+ const watchdog = new ToolWatchdog({ onTiebreaker });
366
+ const onTimeout = vi.fn();
367
+
368
+ // Use a tool with useHaikuTiebreaker=true and short timeout
369
+ watchdog.startWatch('t1', 'WebFetch', {}, onTimeout);
370
+
371
+ // Advance to trigger timeout
372
+ await vi.advanceTimersByTimeAsync(180_001);
373
+
374
+ // Tiebreaker should have been called
375
+ expect(onTiebreaker).toHaveBeenCalledOnce();
376
+ // onTimeout should NOT have fired (tiebreaker extended)
377
+ expect(onTimeout).not.toHaveBeenCalled();
378
+
379
+ // Now advance past extension
380
+ await vi.advanceTimersByTimeAsync(60_001);
381
+ // Should fire after extension
382
+ expect(onTimeout).toHaveBeenCalledOnce();
383
+ });
384
+
385
+ it('kills when tiebreaker returns kill', async () => {
386
+ const onTiebreaker = vi.fn().mockResolvedValue({
387
+ action: 'kill',
388
+ extensionMs: 0,
389
+ reason: 'process is hung',
390
+ });
391
+ const watchdog = new ToolWatchdog({ onTiebreaker });
392
+ const onTimeout = vi.fn();
393
+
394
+ watchdog.startWatch('t1', 'WebFetch', {}, onTimeout);
395
+
396
+ await vi.advanceTimersByTimeAsync(180_001);
397
+
398
+ expect(onTiebreaker).toHaveBeenCalledOnce();
399
+ expect(onTimeout).toHaveBeenCalledOnce();
400
+ });
401
+
402
+ it('kills when tiebreaker throws', async () => {
403
+ const onTiebreaker = vi.fn().mockRejectedValue(new Error('haiku failed'));
404
+ const watchdog = new ToolWatchdog({ onTiebreaker });
405
+ const onTimeout = vi.fn();
406
+
407
+ watchdog.startWatch('t1', 'WebFetch', {}, onTimeout);
408
+
409
+ await vi.advanceTimersByTimeAsync(180_001);
410
+
411
+ expect(onTiebreaker).toHaveBeenCalledOnce();
412
+ expect(onTimeout).toHaveBeenCalledOnce();
413
+ });
414
+
415
+ it('does not attempt tiebreaker for tools with useHaikuTiebreaker=false', async () => {
416
+ const onTiebreaker = vi.fn();
417
+ const watchdog = new ToolWatchdog({ onTiebreaker });
418
+ const onTimeout = vi.fn();
419
+
420
+ // WebSearch has useHaikuTiebreaker: false
421
+ watchdog.startWatch('t1', 'WebSearch', {}, onTimeout);
422
+
423
+ await vi.advanceTimersByTimeAsync(90_001);
424
+
425
+ expect(onTiebreaker).not.toHaveBeenCalled();
426
+ expect(onTimeout).toHaveBeenCalledOnce();
427
+ });
428
+ });
429
+ });