mstro-app 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/mstro.js +65 -2
- package/dist/server/cli/headless/claude-invoker.d.ts.map +1 -1
- package/dist/server/cli/headless/claude-invoker.js +4 -3
- package/dist/server/cli/headless/claude-invoker.js.map +1 -1
- package/dist/server/cli/headless/mcp-config.js +2 -2
- package/dist/server/cli/headless/mcp-config.js.map +1 -1
- package/dist/server/cli/headless/runner.d.ts +6 -1
- package/dist/server/cli/headless/runner.d.ts.map +1 -1
- package/dist/server/cli/headless/runner.js +36 -4
- package/dist/server/cli/headless/runner.js.map +1 -1
- package/dist/server/cli/headless/types.d.ts +1 -1
- package/dist/server/cli/headless/types.d.ts.map +1 -1
- package/dist/server/cli/improvisation-session-manager.d.ts +2 -2
- package/dist/server/cli/improvisation-session-manager.d.ts.map +1 -1
- package/dist/server/cli/improvisation-session-manager.js +3 -2
- package/dist/server/cli/improvisation-session-manager.js.map +1 -1
- package/dist/server/index.js +6 -1
- package/dist/server/index.js.map +1 -1
- package/dist/server/mcp/bouncer-cli.js +53 -14
- package/dist/server/mcp/bouncer-cli.js.map +1 -1
- package/dist/server/mcp/bouncer-integration.d.ts +1 -1
- package/dist/server/mcp/bouncer-integration.d.ts.map +1 -1
- package/dist/server/mcp/bouncer-integration.js +70 -7
- package/dist/server/mcp/bouncer-integration.js.map +1 -1
- package/dist/server/mcp/security-audit.d.ts +3 -3
- package/dist/server/mcp/security-audit.d.ts.map +1 -1
- package/dist/server/mcp/security-audit.js.map +1 -1
- package/dist/server/mcp/server.js +3 -2
- package/dist/server/mcp/server.js.map +1 -1
- package/dist/server/services/analytics.d.ts +2 -2
- package/dist/server/services/analytics.d.ts.map +1 -1
- package/dist/server/services/analytics.js.map +1 -1
- package/dist/server/services/files.js +7 -7
- package/dist/server/services/files.js.map +1 -1
- package/dist/server/services/pathUtils.js +1 -1
- package/dist/server/services/pathUtils.js.map +1 -1
- package/dist/server/services/platform.d.ts +2 -2
- package/dist/server/services/platform.d.ts.map +1 -1
- package/dist/server/services/platform.js.map +1 -1
- package/dist/server/services/sentry.d.ts +1 -1
- package/dist/server/services/sentry.d.ts.map +1 -1
- package/dist/server/services/sentry.js.map +1 -1
- package/dist/server/services/terminal/pty-manager.d.ts +10 -0
- package/dist/server/services/terminal/pty-manager.d.ts.map +1 -1
- package/dist/server/services/terminal/pty-manager.js +32 -4
- package/dist/server/services/terminal/pty-manager.js.map +1 -1
- package/dist/server/services/websocket/file-explorer-handlers.js.map +1 -1
- package/dist/server/services/websocket/file-utils.d.ts +4 -0
- package/dist/server/services/websocket/file-utils.d.ts.map +1 -1
- package/dist/server/services/websocket/file-utils.js +27 -8
- package/dist/server/services/websocket/file-utils.js.map +1 -1
- package/dist/server/services/websocket/git-handlers.js +17 -17
- package/dist/server/services/websocket/git-handlers.js.map +1 -1
- package/dist/server/services/websocket/git-pr-handlers.js +3 -3
- package/dist/server/services/websocket/git-pr-handlers.js.map +1 -1
- package/dist/server/services/websocket/git-worktree-handlers.js +10 -10
- package/dist/server/services/websocket/git-worktree-handlers.js.map +1 -1
- package/dist/server/services/websocket/handler.js +1 -1
- package/dist/server/services/websocket/handler.js.map +1 -1
- package/dist/server/services/websocket/session-handlers.d.ts +1 -1
- package/dist/server/services/websocket/session-handlers.d.ts.map +1 -1
- package/dist/server/services/websocket/session-handlers.js +12 -11
- package/dist/server/services/websocket/session-handlers.js.map +1 -1
- package/dist/server/services/websocket/tab-handlers.js.map +1 -1
- package/dist/server/services/websocket/terminal-handlers.js +1 -1
- package/dist/server/services/websocket/terminal-handlers.js.map +1 -1
- package/dist/server/services/websocket/types.d.ts.map +1 -1
- package/dist/server/utils/agent-manager.d.ts +22 -2
- package/dist/server/utils/agent-manager.d.ts.map +1 -1
- package/dist/server/utils/agent-manager.js +2 -2
- package/dist/server/utils/agent-manager.js.map +1 -1
- package/dist/server/utils/port-manager.js.map +1 -1
- package/hooks/bouncer.sh +17 -3
- package/package.json +4 -2
- package/server/cli/headless/claude-invoker.ts +21 -16
- package/server/cli/headless/mcp-config.ts +8 -8
- package/server/cli/headless/runner.ts +32 -4
- package/server/cli/headless/types.ts +1 -1
- package/server/cli/improvisation-session-manager.ts +8 -7
- package/server/index.ts +15 -9
- package/server/mcp/bouncer-cli.ts +73 -20
- package/server/mcp/bouncer-integration.ts +99 -16
- package/server/mcp/security-audit.ts +4 -4
- package/server/mcp/server.ts +6 -5
- package/server/services/analytics.ts +3 -3
- package/server/services/files.ts +13 -13
- package/server/services/pathUtils.ts +2 -2
- package/server/services/platform.ts +5 -5
- package/server/services/sentry.ts +1 -1
- package/server/services/terminal/pty-manager.ts +36 -9
- package/server/services/websocket/file-explorer-handlers.ts +1 -1
- package/server/services/websocket/file-utils.ts +28 -9
- package/server/services/websocket/git-handlers.ts +34 -34
- package/server/services/websocket/git-pr-handlers.ts +6 -6
- package/server/services/websocket/git-worktree-handlers.ts +20 -20
- package/server/services/websocket/handler.ts +2 -2
- package/server/services/websocket/session-handlers.ts +31 -30
- package/server/services/websocket/tab-handlers.ts +1 -1
- package/server/services/websocket/terminal-handlers.ts +2 -2
- package/server/services/websocket/types.ts +2 -0
- package/server/utils/agent-manager.ts +6 -6
- package/server/utils/port-manager.ts +1 -1
- package/server/cli/headless/output-utils.test.ts +0 -225
- package/server/cli/headless/stall-assessor.test.ts +0 -165
- package/server/cli/headless/tool-watchdog.test.ts +0 -429
- package/server/mcp/bouncer-integration.test.ts +0 -161
- package/server/mcp/security-patterns.test.ts +0 -258
- package/server/services/platform.test.ts +0 -1304
- package/server/services/websocket/autocomplete.test.ts +0 -194
- package/server/services/websocket/handler.test.ts +0 -20
|
@@ -1,429 +0,0 @@
|
|
|
1
|
-
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
|
2
|
-
import { DEFAULT_TOOL_TIMEOUT_PROFILES, ToolWatchdog } from './tool-watchdog.js';
|
|
3
|
-
|
|
4
|
-
describe('ToolWatchdog', () => {
|
|
5
|
-
beforeEach(() => {
|
|
6
|
-
vi.useFakeTimers();
|
|
7
|
-
});
|
|
8
|
-
|
|
9
|
-
afterEach(() => {
|
|
10
|
-
vi.useRealTimers();
|
|
11
|
-
});
|
|
12
|
-
|
|
13
|
-
// ========== getProfile ==========
|
|
14
|
-
|
|
15
|
-
describe('getProfile', () => {
|
|
16
|
-
it('returns specific profile for known tools', () => {
|
|
17
|
-
const watchdog = new ToolWatchdog();
|
|
18
|
-
const webFetch = watchdog.getProfile('WebFetch');
|
|
19
|
-
expect(webFetch.coldStartMs).toBe(180_000);
|
|
20
|
-
expect(webFetch.floorMs).toBe(120_000);
|
|
21
|
-
expect(webFetch.ceilingMs).toBe(300_000);
|
|
22
|
-
expect(webFetch.useAdaptive).toBe(true);
|
|
23
|
-
expect(webFetch.useHaikuTiebreaker).toBe(true);
|
|
24
|
-
});
|
|
25
|
-
|
|
26
|
-
it('returns Task profile with long timeouts', () => {
|
|
27
|
-
const watchdog = new ToolWatchdog();
|
|
28
|
-
const task = watchdog.getProfile('Task');
|
|
29
|
-
expect(task.coldStartMs).toBe(900_000);
|
|
30
|
-
expect(task.floorMs).toBe(600_000);
|
|
31
|
-
expect(task.ceilingMs).toBe(2_700_000);
|
|
32
|
-
});
|
|
33
|
-
|
|
34
|
-
it('returns default profile for unknown tools', () => {
|
|
35
|
-
const watchdog = new ToolWatchdog();
|
|
36
|
-
const unknown = watchdog.getProfile('SomeNewTool');
|
|
37
|
-
expect(unknown.coldStartMs).toBe(300_000);
|
|
38
|
-
expect(unknown.floorMs).toBe(120_000);
|
|
39
|
-
expect(unknown.ceilingMs).toBe(600_000);
|
|
40
|
-
expect(unknown.useAdaptive).toBe(false);
|
|
41
|
-
});
|
|
42
|
-
|
|
43
|
-
it('merges custom profiles with defaults', () => {
|
|
44
|
-
const watchdog = new ToolWatchdog({
|
|
45
|
-
profiles: {
|
|
46
|
-
WebFetch: { coldStartMs: 60_000 },
|
|
47
|
-
},
|
|
48
|
-
});
|
|
49
|
-
const profile = watchdog.getProfile('WebFetch');
|
|
50
|
-
expect(profile.coldStartMs).toBe(60_000);
|
|
51
|
-
// Other fields should come from default WebFetch profile
|
|
52
|
-
expect(profile.floorMs).toBe(DEFAULT_TOOL_TIMEOUT_PROFILES.WebFetch.floorMs);
|
|
53
|
-
expect(profile.useAdaptive).toBe(true);
|
|
54
|
-
});
|
|
55
|
-
|
|
56
|
-
it('allows custom profiles for new tool names', () => {
|
|
57
|
-
const watchdog = new ToolWatchdog({
|
|
58
|
-
profiles: {
|
|
59
|
-
CustomTool: { coldStartMs: 10_000, floorMs: 5_000, ceilingMs: 30_000 },
|
|
60
|
-
},
|
|
61
|
-
});
|
|
62
|
-
const profile = watchdog.getProfile('CustomTool');
|
|
63
|
-
expect(profile.coldStartMs).toBe(10_000);
|
|
64
|
-
expect(profile.floorMs).toBe(5_000);
|
|
65
|
-
expect(profile.ceilingMs).toBe(30_000);
|
|
66
|
-
});
|
|
67
|
-
});
|
|
68
|
-
|
|
69
|
-
// ========== getTimeout ==========
|
|
70
|
-
|
|
71
|
-
describe('getTimeout', () => {
|
|
72
|
-
it('returns coldStart for non-adaptive tools', () => {
|
|
73
|
-
const watchdog = new ToolWatchdog();
|
|
74
|
-
// Bash is non-adaptive
|
|
75
|
-
expect(watchdog.getTimeout('Bash')).toBe(300_000);
|
|
76
|
-
});
|
|
77
|
-
|
|
78
|
-
it('returns coldStart when no samples recorded', () => {
|
|
79
|
-
const watchdog = new ToolWatchdog();
|
|
80
|
-
expect(watchdog.getTimeout('WebFetch')).toBe(180_000);
|
|
81
|
-
});
|
|
82
|
-
|
|
83
|
-
it('returns adaptive timeout after recording samples', () => {
|
|
84
|
-
const watchdog = new ToolWatchdog();
|
|
85
|
-
// Record a 10s completion for WebFetch
|
|
86
|
-
watchdog.recordCompletion('WebFetch', 10_000);
|
|
87
|
-
|
|
88
|
-
const timeout = watchdog.getTimeout('WebFetch');
|
|
89
|
-
// First sample: est = 10000, dev = 5000, timeout = 10000 + 4*5000 = 30000
|
|
90
|
-
// But floor is 120000, so should be clamped to floor
|
|
91
|
-
expect(timeout).toBe(120_000);
|
|
92
|
-
});
|
|
93
|
-
|
|
94
|
-
it('respects floor clamping', () => {
|
|
95
|
-
const watchdog = new ToolWatchdog();
|
|
96
|
-
// Record very fast completions
|
|
97
|
-
watchdog.recordCompletion('WebFetch', 100);
|
|
98
|
-
watchdog.recordCompletion('WebFetch', 100);
|
|
99
|
-
watchdog.recordCompletion('WebFetch', 100);
|
|
100
|
-
|
|
101
|
-
// Adaptive calculation would be very low, but floor prevents it
|
|
102
|
-
expect(watchdog.getTimeout('WebFetch')).toBe(DEFAULT_TOOL_TIMEOUT_PROFILES.WebFetch.floorMs);
|
|
103
|
-
});
|
|
104
|
-
|
|
105
|
-
it('respects ceiling clamping', () => {
|
|
106
|
-
const watchdog = new ToolWatchdog();
|
|
107
|
-
// Record very slow completions
|
|
108
|
-
watchdog.recordCompletion('WebSearch', 500_000);
|
|
109
|
-
|
|
110
|
-
const timeout = watchdog.getTimeout('WebSearch');
|
|
111
|
-
// Should not exceed ceiling
|
|
112
|
-
expect(timeout).toBeLessThanOrEqual(DEFAULT_TOOL_TIMEOUT_PROFILES.WebSearch.ceilingMs);
|
|
113
|
-
});
|
|
114
|
-
|
|
115
|
-
it('does not record completions for non-adaptive tools', () => {
|
|
116
|
-
const watchdog = new ToolWatchdog();
|
|
117
|
-
// Bash is non-adaptive (Read too)
|
|
118
|
-
watchdog.recordCompletion('Bash', 5_000);
|
|
119
|
-
// Should still return coldStart
|
|
120
|
-
expect(watchdog.getTimeout('Bash')).toBe(300_000);
|
|
121
|
-
});
|
|
122
|
-
});
|
|
123
|
-
|
|
124
|
-
// ========== recordCompletion ==========
|
|
125
|
-
|
|
126
|
-
describe('recordCompletion', () => {
|
|
127
|
-
it('initializes tracker on first sample', () => {
|
|
128
|
-
const watchdog = new ToolWatchdog();
|
|
129
|
-
watchdog.recordCompletion('WebFetch', 20_000);
|
|
130
|
-
|
|
131
|
-
// After first sample: timeout should differ from cold start if above floor
|
|
132
|
-
const timeout = watchdog.getTimeout('WebFetch');
|
|
133
|
-
// est=20000, dev=10000, adaptive=20000+4*10000=60000, floor=120000 → 120000
|
|
134
|
-
expect(timeout).toBe(120_000);
|
|
135
|
-
});
|
|
136
|
-
|
|
137
|
-
it('updates EMA on subsequent samples', () => {
|
|
138
|
-
const watchdog = new ToolWatchdog();
|
|
139
|
-
// First sample
|
|
140
|
-
watchdog.recordCompletion('Glob', 10_000);
|
|
141
|
-
const timeout1 = watchdog.getTimeout('Glob');
|
|
142
|
-
|
|
143
|
-
// Second sample - much longer
|
|
144
|
-
watchdog.recordCompletion('Glob', 50_000);
|
|
145
|
-
const timeout2 = watchdog.getTimeout('Glob');
|
|
146
|
-
|
|
147
|
-
// Timeout should increase after longer sample
|
|
148
|
-
expect(timeout2).toBeGreaterThanOrEqual(timeout1);
|
|
149
|
-
});
|
|
150
|
-
|
|
151
|
-
it('converges toward actual duration over many samples', () => {
|
|
152
|
-
const watchdog = new ToolWatchdog();
|
|
153
|
-
// Record many similar samples for Glob (adaptive, floor=30000, ceiling=180000)
|
|
154
|
-
for (let i = 0; i < 20; i++) {
|
|
155
|
-
watchdog.recordCompletion('Glob', 45_000);
|
|
156
|
-
}
|
|
157
|
-
const timeout = watchdog.getTimeout('Glob');
|
|
158
|
-
// Should converge near 45000, with deviation near 0
|
|
159
|
-
// adaptive ≈ 45000 + 4*~0 ≈ 45000, but floor is 30000, so should be ~45000
|
|
160
|
-
expect(timeout).toBeGreaterThanOrEqual(30_000);
|
|
161
|
-
expect(timeout).toBeLessThanOrEqual(60_000);
|
|
162
|
-
});
|
|
163
|
-
});
|
|
164
|
-
|
|
165
|
-
// ========== startWatch / clearWatch ==========
|
|
166
|
-
|
|
167
|
-
describe('startWatch / clearWatch', () => {
|
|
168
|
-
it('calls timeout callback when timer expires', async () => {
|
|
169
|
-
const watchdog = new ToolWatchdog();
|
|
170
|
-
const onTimeout = vi.fn();
|
|
171
|
-
|
|
172
|
-
watchdog.startWatch('tool-1', 'WebFetch', { url: 'http://example.com' }, onTimeout);
|
|
173
|
-
|
|
174
|
-
// Advance past WebFetch cold start (180s) — async because internal handler is async
|
|
175
|
-
await vi.advanceTimersByTimeAsync(180_001);
|
|
176
|
-
|
|
177
|
-
// onTimeout should fire (no tiebreaker configured)
|
|
178
|
-
expect(onTimeout).toHaveBeenCalledOnce();
|
|
179
|
-
});
|
|
180
|
-
|
|
181
|
-
it('does not call timeout if cleared before expiry', async () => {
|
|
182
|
-
const watchdog = new ToolWatchdog();
|
|
183
|
-
const onTimeout = vi.fn();
|
|
184
|
-
|
|
185
|
-
watchdog.startWatch('tool-1', 'WebFetch', {}, onTimeout);
|
|
186
|
-
watchdog.clearWatch('tool-1');
|
|
187
|
-
|
|
188
|
-
await vi.advanceTimersByTimeAsync(300_000);
|
|
189
|
-
expect(onTimeout).not.toHaveBeenCalled();
|
|
190
|
-
});
|
|
191
|
-
|
|
192
|
-
it('replaces existing watch for same ID', async () => {
|
|
193
|
-
const watchdog = new ToolWatchdog();
|
|
194
|
-
const onTimeout1 = vi.fn();
|
|
195
|
-
const onTimeout2 = vi.fn();
|
|
196
|
-
|
|
197
|
-
watchdog.startWatch('tool-1', 'WebFetch', {}, onTimeout1);
|
|
198
|
-
watchdog.startWatch('tool-1', 'WebSearch', {}, onTimeout2);
|
|
199
|
-
|
|
200
|
-
// Advance past WebSearch cold start (90s)
|
|
201
|
-
await vi.advanceTimersByTimeAsync(90_001);
|
|
202
|
-
expect(onTimeout2).toHaveBeenCalledOnce();
|
|
203
|
-
expect(onTimeout1).not.toHaveBeenCalled();
|
|
204
|
-
});
|
|
205
|
-
|
|
206
|
-
it('tracks multiple watches independently', async () => {
|
|
207
|
-
const watchdog = new ToolWatchdog();
|
|
208
|
-
const onTimeout1 = vi.fn();
|
|
209
|
-
const onTimeout2 = vi.fn();
|
|
210
|
-
|
|
211
|
-
watchdog.startWatch('tool-1', 'WebSearch', {}, onTimeout1); // 90s
|
|
212
|
-
watchdog.startWatch('tool-2', 'WebFetch', {}, onTimeout2); // 180s
|
|
213
|
-
|
|
214
|
-
await vi.advanceTimersByTimeAsync(90_001);
|
|
215
|
-
expect(onTimeout1).toHaveBeenCalledOnce();
|
|
216
|
-
expect(onTimeout2).not.toHaveBeenCalled();
|
|
217
|
-
|
|
218
|
-
await vi.advanceTimersByTimeAsync(90_000);
|
|
219
|
-
expect(onTimeout2).toHaveBeenCalledOnce();
|
|
220
|
-
});
|
|
221
|
-
});
|
|
222
|
-
|
|
223
|
-
// ========== clearAll ==========
|
|
224
|
-
|
|
225
|
-
describe('clearAll', () => {
|
|
226
|
-
it('clears all active watches', () => {
|
|
227
|
-
const watchdog = new ToolWatchdog();
|
|
228
|
-
const onTimeout1 = vi.fn();
|
|
229
|
-
const onTimeout2 = vi.fn();
|
|
230
|
-
|
|
231
|
-
watchdog.startWatch('tool-1', 'WebFetch', {}, onTimeout1);
|
|
232
|
-
watchdog.startWatch('tool-2', 'WebSearch', {}, onTimeout2);
|
|
233
|
-
watchdog.clearAll();
|
|
234
|
-
|
|
235
|
-
vi.advanceTimersByTime(300_000);
|
|
236
|
-
expect(onTimeout1).not.toHaveBeenCalled();
|
|
237
|
-
expect(onTimeout2).not.toHaveBeenCalled();
|
|
238
|
-
});
|
|
239
|
-
|
|
240
|
-
it('clears active watches map', () => {
|
|
241
|
-
const watchdog = new ToolWatchdog();
|
|
242
|
-
watchdog.startWatch('tool-1', 'WebFetch', {}, vi.fn());
|
|
243
|
-
watchdog.startWatch('tool-2', 'WebSearch', {}, vi.fn());
|
|
244
|
-
|
|
245
|
-
watchdog.clearAll();
|
|
246
|
-
expect(watchdog.getActiveWatches().size).toBe(0);
|
|
247
|
-
});
|
|
248
|
-
});
|
|
249
|
-
|
|
250
|
-
// ========== getActiveWatch / getActiveWatches ==========
|
|
251
|
-
|
|
252
|
-
describe('getActiveWatch', () => {
|
|
253
|
-
it('returns watch for active tool', () => {
|
|
254
|
-
const watchdog = new ToolWatchdog();
|
|
255
|
-
watchdog.startWatch('tool-1', 'WebFetch', { url: 'http://test.com' }, vi.fn());
|
|
256
|
-
|
|
257
|
-
const watch = watchdog.getActiveWatch('tool-1');
|
|
258
|
-
expect(watch).toBeDefined();
|
|
259
|
-
expect(watch!.toolName).toBe('WebFetch');
|
|
260
|
-
expect(watch!.toolInput).toEqual({ url: 'http://test.com' });
|
|
261
|
-
});
|
|
262
|
-
|
|
263
|
-
it('returns undefined for cleared watch', () => {
|
|
264
|
-
const watchdog = new ToolWatchdog();
|
|
265
|
-
watchdog.startWatch('tool-1', 'WebFetch', {}, vi.fn());
|
|
266
|
-
watchdog.clearWatch('tool-1');
|
|
267
|
-
|
|
268
|
-
expect(watchdog.getActiveWatch('tool-1')).toBeUndefined();
|
|
269
|
-
});
|
|
270
|
-
|
|
271
|
-
it('returns undefined for unknown ID', () => {
|
|
272
|
-
const watchdog = new ToolWatchdog();
|
|
273
|
-
expect(watchdog.getActiveWatch('nonexistent')).toBeUndefined();
|
|
274
|
-
});
|
|
275
|
-
});
|
|
276
|
-
|
|
277
|
-
// ========== buildCheckpoint ==========
|
|
278
|
-
|
|
279
|
-
describe('buildCheckpoint', () => {
|
|
280
|
-
it('returns null when hung tool ID not found', () => {
|
|
281
|
-
const watchdog = new ToolWatchdog();
|
|
282
|
-
const checkpoint = watchdog.buildCheckpoint(
|
|
283
|
-
'test prompt', '', '', [], 'missing-id', undefined, Date.now()
|
|
284
|
-
);
|
|
285
|
-
expect(checkpoint).toBeNull();
|
|
286
|
-
});
|
|
287
|
-
|
|
288
|
-
it('builds checkpoint with correct tool separation', () => {
|
|
289
|
-
const watchdog = new ToolWatchdog();
|
|
290
|
-
vi.setSystemTime(new Date('2025-01-01T00:00:00Z'));
|
|
291
|
-
const processStartTime = Date.now();
|
|
292
|
-
|
|
293
|
-
watchdog.startWatch('hung-tool', 'WebFetch', { url: 'http://slow.com' }, vi.fn());
|
|
294
|
-
|
|
295
|
-
const accumulatedTools = [
|
|
296
|
-
{ toolId: 'tool-1', toolName: 'Read', toolInput: { path: 'a.ts' }, result: 'content', isError: false, duration: 100 },
|
|
297
|
-
{ toolId: 'tool-2', toolName: 'Grep', toolInput: { pattern: 'foo' }, result: undefined, isError: false },
|
|
298
|
-
{ toolId: 'hung-tool', toolName: 'WebFetch', toolInput: { url: 'http://slow.com' }, result: undefined, isError: false },
|
|
299
|
-
];
|
|
300
|
-
|
|
301
|
-
const checkpoint = watchdog.buildCheckpoint(
|
|
302
|
-
'find and fix',
|
|
303
|
-
'assistant response text',
|
|
304
|
-
'thinking about it',
|
|
305
|
-
accumulatedTools,
|
|
306
|
-
'hung-tool',
|
|
307
|
-
'session-123',
|
|
308
|
-
processStartTime,
|
|
309
|
-
);
|
|
310
|
-
|
|
311
|
-
expect(checkpoint).not.toBeNull();
|
|
312
|
-
expect(checkpoint!.originalPrompt).toBe('find and fix');
|
|
313
|
-
expect(checkpoint!.assistantText).toBe('assistant response text');
|
|
314
|
-
expect(checkpoint!.thinkingText).toBe('thinking about it');
|
|
315
|
-
expect(checkpoint!.claudeSessionId).toBe('session-123');
|
|
316
|
-
|
|
317
|
-
// Completed tools: only tool-1 (has result and is not hung)
|
|
318
|
-
expect(checkpoint!.completedTools).toHaveLength(1);
|
|
319
|
-
expect(checkpoint!.completedTools[0].toolId).toBe('tool-1');
|
|
320
|
-
|
|
321
|
-
// In-progress tools: tool-2 (no result, not hung)
|
|
322
|
-
expect(checkpoint!.inProgressTools).toHaveLength(1);
|
|
323
|
-
expect(checkpoint!.inProgressTools[0].toolId).toBe('tool-2');
|
|
324
|
-
|
|
325
|
-
// Hung tool
|
|
326
|
-
expect(checkpoint!.hungTool.toolName).toBe('WebFetch');
|
|
327
|
-
expect(checkpoint!.hungTool.toolId).toBe('hung-tool');
|
|
328
|
-
expect(checkpoint!.hungTool.url).toBe('http://slow.com');
|
|
329
|
-
});
|
|
330
|
-
|
|
331
|
-
it('extracts URL from tool input for WebFetch', () => {
|
|
332
|
-
const watchdog = new ToolWatchdog();
|
|
333
|
-
watchdog.startWatch('t1', 'WebFetch', { url: 'http://example.com' }, vi.fn());
|
|
334
|
-
|
|
335
|
-
const tools = [
|
|
336
|
-
{ toolId: 't1', toolName: 'WebFetch', toolInput: { url: 'http://example.com' }, result: undefined, isError: false },
|
|
337
|
-
];
|
|
338
|
-
|
|
339
|
-
const cp = watchdog.buildCheckpoint('prompt', '', '', tools, 't1', undefined, Date.now());
|
|
340
|
-
expect(cp!.hungTool.url).toBe('http://example.com');
|
|
341
|
-
});
|
|
342
|
-
|
|
343
|
-
it('extracts query from tool input for WebSearch', () => {
|
|
344
|
-
const watchdog = new ToolWatchdog();
|
|
345
|
-
watchdog.startWatch('t1', 'WebSearch', { query: 'test search' }, vi.fn());
|
|
346
|
-
|
|
347
|
-
const tools = [
|
|
348
|
-
{ toolId: 't1', toolName: 'WebSearch', toolInput: { query: 'test search' }, result: undefined, isError: false },
|
|
349
|
-
];
|
|
350
|
-
|
|
351
|
-
const cp = watchdog.buildCheckpoint('prompt', '', '', tools, 't1', undefined, Date.now());
|
|
352
|
-
expect(cp!.hungTool.url).toBe('test search');
|
|
353
|
-
});
|
|
354
|
-
});
|
|
355
|
-
|
|
356
|
-
// ========== tiebreaker integration ==========
|
|
357
|
-
|
|
358
|
-
describe('tiebreaker', () => {
|
|
359
|
-
it('extends when tiebreaker returns extend', async () => {
|
|
360
|
-
const onTiebreaker = vi.fn().mockResolvedValue({
|
|
361
|
-
action: 'extend',
|
|
362
|
-
extensionMs: 60_000,
|
|
363
|
-
reason: 'still working',
|
|
364
|
-
});
|
|
365
|
-
const watchdog = new ToolWatchdog({ onTiebreaker });
|
|
366
|
-
const onTimeout = vi.fn();
|
|
367
|
-
|
|
368
|
-
// Use a tool with useHaikuTiebreaker=true and short timeout
|
|
369
|
-
watchdog.startWatch('t1', 'WebFetch', {}, onTimeout);
|
|
370
|
-
|
|
371
|
-
// Advance to trigger timeout
|
|
372
|
-
await vi.advanceTimersByTimeAsync(180_001);
|
|
373
|
-
|
|
374
|
-
// Tiebreaker should have been called
|
|
375
|
-
expect(onTiebreaker).toHaveBeenCalledOnce();
|
|
376
|
-
// onTimeout should NOT have fired (tiebreaker extended)
|
|
377
|
-
expect(onTimeout).not.toHaveBeenCalled();
|
|
378
|
-
|
|
379
|
-
// Now advance past extension
|
|
380
|
-
await vi.advanceTimersByTimeAsync(60_001);
|
|
381
|
-
// Should fire after extension
|
|
382
|
-
expect(onTimeout).toHaveBeenCalledOnce();
|
|
383
|
-
});
|
|
384
|
-
|
|
385
|
-
it('kills when tiebreaker returns kill', async () => {
|
|
386
|
-
const onTiebreaker = vi.fn().mockResolvedValue({
|
|
387
|
-
action: 'kill',
|
|
388
|
-
extensionMs: 0,
|
|
389
|
-
reason: 'process is hung',
|
|
390
|
-
});
|
|
391
|
-
const watchdog = new ToolWatchdog({ onTiebreaker });
|
|
392
|
-
const onTimeout = vi.fn();
|
|
393
|
-
|
|
394
|
-
watchdog.startWatch('t1', 'WebFetch', {}, onTimeout);
|
|
395
|
-
|
|
396
|
-
await vi.advanceTimersByTimeAsync(180_001);
|
|
397
|
-
|
|
398
|
-
expect(onTiebreaker).toHaveBeenCalledOnce();
|
|
399
|
-
expect(onTimeout).toHaveBeenCalledOnce();
|
|
400
|
-
});
|
|
401
|
-
|
|
402
|
-
it('kills when tiebreaker throws', async () => {
|
|
403
|
-
const onTiebreaker = vi.fn().mockRejectedValue(new Error('haiku failed'));
|
|
404
|
-
const watchdog = new ToolWatchdog({ onTiebreaker });
|
|
405
|
-
const onTimeout = vi.fn();
|
|
406
|
-
|
|
407
|
-
watchdog.startWatch('t1', 'WebFetch', {}, onTimeout);
|
|
408
|
-
|
|
409
|
-
await vi.advanceTimersByTimeAsync(180_001);
|
|
410
|
-
|
|
411
|
-
expect(onTiebreaker).toHaveBeenCalledOnce();
|
|
412
|
-
expect(onTimeout).toHaveBeenCalledOnce();
|
|
413
|
-
});
|
|
414
|
-
|
|
415
|
-
it('does not attempt tiebreaker for tools with useHaikuTiebreaker=false', async () => {
|
|
416
|
-
const onTiebreaker = vi.fn();
|
|
417
|
-
const watchdog = new ToolWatchdog({ onTiebreaker });
|
|
418
|
-
const onTimeout = vi.fn();
|
|
419
|
-
|
|
420
|
-
// WebSearch has useHaikuTiebreaker: false
|
|
421
|
-
watchdog.startWatch('t1', 'WebSearch', {}, onTimeout);
|
|
422
|
-
|
|
423
|
-
await vi.advanceTimersByTimeAsync(90_001);
|
|
424
|
-
|
|
425
|
-
expect(onTiebreaker).not.toHaveBeenCalled();
|
|
426
|
-
expect(onTimeout).toHaveBeenCalledOnce();
|
|
427
|
-
});
|
|
428
|
-
});
|
|
429
|
-
});
|
|
@@ -1,161 +0,0 @@
|
|
|
1
|
-
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
|
2
|
-
import type { BouncerReviewRequest } from './bouncer-integration.js';
|
|
3
|
-
import { reviewOperation } from './bouncer-integration.js';
|
|
4
|
-
|
|
5
|
-
// ========== Internal function tests via reviewOperation fast paths ==========
|
|
6
|
-
// The parsing helpers (tryExtractFromWrapper, tryExtractJsonBlock, validateDecision,
|
|
7
|
-
// parseHaikuResponse) are not exported, so we test them indirectly through reviewOperation
|
|
8
|
-
// for pattern-based fast paths, and directly test the parsing logic below.
|
|
9
|
-
|
|
10
|
-
describe('reviewOperation - pattern fast paths', () => {
|
|
11
|
-
beforeEach(() => {
|
|
12
|
-
// Suppress console.error from bouncer logging
|
|
13
|
-
vi.spyOn(console, 'error').mockImplementation(() => {});
|
|
14
|
-
});
|
|
15
|
-
|
|
16
|
-
afterEach(() => {
|
|
17
|
-
vi.restoreAllMocks();
|
|
18
|
-
});
|
|
19
|
-
|
|
20
|
-
it('allows safe read operations immediately', async () => {
|
|
21
|
-
const result = await reviewOperation({ operation: 'Read: /home/user/file.ts' });
|
|
22
|
-
expect(result.decision).toBe('allow');
|
|
23
|
-
expect(result.confidence).toBe(95);
|
|
24
|
-
expect(result.threatLevel).toBe('low');
|
|
25
|
-
});
|
|
26
|
-
|
|
27
|
-
it('allows safe bash commands immediately', async () => {
|
|
28
|
-
const result = await reviewOperation({ operation: 'Bash: npm test' });
|
|
29
|
-
expect(result.decision).toBe('allow');
|
|
30
|
-
expect(result.confidence).toBe(95);
|
|
31
|
-
});
|
|
32
|
-
|
|
33
|
-
it('allows Glob operations immediately', async () => {
|
|
34
|
-
const result = await reviewOperation({ operation: 'Glob: **/*.ts' });
|
|
35
|
-
expect(result.decision).toBe('allow');
|
|
36
|
-
});
|
|
37
|
-
|
|
38
|
-
it('allows Grep operations immediately', async () => {
|
|
39
|
-
const result = await reviewOperation({ operation: 'Grep: function' });
|
|
40
|
-
expect(result.decision).toBe('allow');
|
|
41
|
-
});
|
|
42
|
-
|
|
43
|
-
it('allows safe rm of build artifacts', async () => {
|
|
44
|
-
const result = await reviewOperation({ operation: 'Bash: rm -rf node_modules' });
|
|
45
|
-
expect(result.decision).toBe('allow');
|
|
46
|
-
});
|
|
47
|
-
|
|
48
|
-
it('denies critical threats with enforceable flag', async () => {
|
|
49
|
-
const result = await reviewOperation({ operation: 'rm -rf /' });
|
|
50
|
-
expect(result.decision).toBe('deny');
|
|
51
|
-
expect(result.confidence).toBe(99);
|
|
52
|
-
expect(result.threatLevel).toBe('critical');
|
|
53
|
-
expect(result.enforceable).toBe(true);
|
|
54
|
-
});
|
|
55
|
-
|
|
56
|
-
it('denies fork bombs', async () => {
|
|
57
|
-
const result = await reviewOperation({ operation: ':(){ :|:& };:' });
|
|
58
|
-
expect(result.decision).toBe('deny');
|
|
59
|
-
expect(result.threatLevel).toBe('critical');
|
|
60
|
-
});
|
|
61
|
-
|
|
62
|
-
it('denies disk overwrite attempts', async () => {
|
|
63
|
-
const result = await reviewOperation({ operation: 'dd if=/dev/zero of=/dev/sda' });
|
|
64
|
-
expect(result.decision).toBe('deny');
|
|
65
|
-
expect(result.threatLevel).toBe('critical');
|
|
66
|
-
});
|
|
67
|
-
|
|
68
|
-
it('denies filesystem formatting', async () => {
|
|
69
|
-
const result = await reviewOperation({ operation: 'mkfs.ext4 /dev/sda1' });
|
|
70
|
-
expect(result.decision).toBe('deny');
|
|
71
|
-
expect(result.threatLevel).toBe('critical');
|
|
72
|
-
});
|
|
73
|
-
|
|
74
|
-
it('denies obfuscated code execution', async () => {
|
|
75
|
-
const result = await reviewOperation({ operation: 'eval $(echo dGVzdA== | base64 -d)' });
|
|
76
|
-
expect(result.decision).toBe('deny');
|
|
77
|
-
});
|
|
78
|
-
|
|
79
|
-
it('allows empty tool parameters as no-op', async () => {
|
|
80
|
-
const request: BouncerReviewRequest = {
|
|
81
|
-
operation: 'Edit: /some/file',
|
|
82
|
-
context: { toolInput: {} },
|
|
83
|
-
};
|
|
84
|
-
const result = await reviewOperation(request);
|
|
85
|
-
expect(result.decision).toBe('allow');
|
|
86
|
-
expect(result.confidence).toBe(95);
|
|
87
|
-
expect(result.threatLevel).toBe('low');
|
|
88
|
-
});
|
|
89
|
-
|
|
90
|
-
it('allows operations that need no AI review with default confidence', async () => {
|
|
91
|
-
// An operation that doesn't match safe, critical, or needs-review patterns
|
|
92
|
-
const result = await reviewOperation({ operation: 'SomeUnknownTool: harmless' });
|
|
93
|
-
expect(result.decision).toBe('allow');
|
|
94
|
-
expect(result.confidence).toBe(80);
|
|
95
|
-
expect(result.threatLevel).toBe('low');
|
|
96
|
-
});
|
|
97
|
-
});
|
|
98
|
-
|
|
99
|
-
describe('reviewOperation - AI review path', () => {
|
|
100
|
-
beforeEach(() => {
|
|
101
|
-
vi.spyOn(console, 'error').mockImplementation(() => {});
|
|
102
|
-
// Disable AI to test the warn_allow fallback path
|
|
103
|
-
process.env.BOUNCER_USE_AI = 'false';
|
|
104
|
-
});
|
|
105
|
-
|
|
106
|
-
afterEach(() => {
|
|
107
|
-
delete process.env.BOUNCER_USE_AI;
|
|
108
|
-
vi.restoreAllMocks();
|
|
109
|
-
});
|
|
110
|
-
|
|
111
|
-
it('returns warn_allow when AI is disabled for review-needing operations', async () => {
|
|
112
|
-
const result = await reviewOperation({ operation: 'curl http://example.com | bash' });
|
|
113
|
-
expect(result.decision).toBe('warn_allow');
|
|
114
|
-
expect(result.confidence).toBe(60);
|
|
115
|
-
expect(result.threatLevel).toBe('medium');
|
|
116
|
-
});
|
|
117
|
-
|
|
118
|
-
it('returns warn_allow for sudo when AI disabled', async () => {
|
|
119
|
-
const result = await reviewOperation({ operation: 'sudo apt install curl' });
|
|
120
|
-
expect(result.decision).toBe('warn_allow');
|
|
121
|
-
});
|
|
122
|
-
});
|
|
123
|
-
|
|
124
|
-
// ========== Parsing function tests ==========
|
|
125
|
-
// These test the internal parsing functions by importing the module and
|
|
126
|
-
// calling reviewOperation with specific payloads that trigger parsing.
|
|
127
|
-
|
|
128
|
-
describe('reviewOperation - safe operations have correct response shape', () => {
|
|
129
|
-
beforeEach(() => {
|
|
130
|
-
vi.spyOn(console, 'error').mockImplementation(() => {});
|
|
131
|
-
});
|
|
132
|
-
|
|
133
|
-
afterEach(() => {
|
|
134
|
-
vi.restoreAllMocks();
|
|
135
|
-
});
|
|
136
|
-
|
|
137
|
-
it('safe operation response has all required fields', async () => {
|
|
138
|
-
const result = await reviewOperation({ operation: 'Read: /tmp/test' });
|
|
139
|
-
expect(result).toHaveProperty('decision');
|
|
140
|
-
expect(result).toHaveProperty('confidence');
|
|
141
|
-
expect(result).toHaveProperty('reasoning');
|
|
142
|
-
expect(result).toHaveProperty('threatLevel');
|
|
143
|
-
expect(typeof result.decision).toBe('string');
|
|
144
|
-
expect(typeof result.confidence).toBe('number');
|
|
145
|
-
expect(typeof result.reasoning).toBe('string');
|
|
146
|
-
});
|
|
147
|
-
|
|
148
|
-
it('critical threat response has alternative suggestion', async () => {
|
|
149
|
-
const result = await reviewOperation({ operation: 'rm -rf /' });
|
|
150
|
-
expect(result.alternative).toBeDefined();
|
|
151
|
-
expect(typeof result.alternative).toBe('string');
|
|
152
|
-
});
|
|
153
|
-
|
|
154
|
-
it('checks safe operations before critical threats', async () => {
|
|
155
|
-
// rm -rf node_modules matches both SAFE_OPERATIONS and technically could
|
|
156
|
-
// match patterns. Verify safe wins.
|
|
157
|
-
const result = await reviewOperation({ operation: 'Bash: rm -rf node_modules' });
|
|
158
|
-
expect(result.decision).toBe('allow');
|
|
159
|
-
expect(result.confidence).toBe(95);
|
|
160
|
-
});
|
|
161
|
-
});
|