@vellumai/assistant 0.3.13 → 0.3.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +17 -3
- package/Dockerfile +1 -1
- package/README.md +2 -0
- package/docs/architecture/scheduling.md +81 -0
- package/package.json +1 -1
- package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +22 -0
- package/src/__tests__/channel-policy.test.ts +19 -0
- package/src/__tests__/guardian-control-plane-policy.test.ts +582 -0
- package/src/__tests__/guardian-outbound-http.test.ts +8 -8
- package/src/__tests__/intent-routing.test.ts +22 -0
- package/src/__tests__/ipc-snapshot.test.ts +10 -0
- package/src/__tests__/notification-routing-intent.test.ts +185 -0
- package/src/__tests__/recording-handler.test.ts +191 -31
- package/src/__tests__/recording-intent-fallback.test.ts +180 -0
- package/src/__tests__/recording-intent-handler.test.ts +597 -74
- package/src/__tests__/recording-intent.test.ts +738 -342
- package/src/__tests__/recording-state-machine.test.ts +1109 -0
- package/src/__tests__/reminder-store.test.ts +20 -18
- package/src/__tests__/reminder.test.ts +2 -1
- package/src/channels/config.ts +1 -1
- package/src/config/bundled-skills/phone-calls/SKILL.md +1 -11
- package/src/config/bundled-skills/screen-recording/SKILL.md +91 -12
- package/src/config/system-prompt.ts +5 -0
- package/src/config/vellum-skills/guardian-verify-setup/SKILL.md +1 -0
- package/src/daemon/handlers/config-channels.ts +6 -6
- package/src/daemon/handlers/index.ts +1 -1
- package/src/daemon/handlers/misc.ts +258 -102
- package/src/daemon/handlers/recording.ts +417 -5
- package/src/daemon/handlers/sessions.ts +142 -68
- package/src/daemon/ipc-contract/computer-use.ts +23 -3
- package/src/daemon/ipc-contract/messages.ts +3 -1
- package/src/daemon/ipc-contract/shared.ts +6 -0
- package/src/daemon/ipc-contract-inventory.json +2 -0
- package/src/daemon/lifecycle.ts +2 -0
- package/src/daemon/recording-executor.ts +180 -0
- package/src/daemon/recording-intent-fallback.ts +132 -0
- package/src/daemon/recording-intent.ts +306 -15
- package/src/daemon/session-tool-setup.ts +4 -0
- package/src/memory/conversation-attention-store.ts +5 -5
- package/src/notifications/README.md +69 -1
- package/src/notifications/adapters/sms.ts +80 -0
- package/src/notifications/broadcaster.ts +1 -0
- package/src/notifications/copy-composer.ts +3 -3
- package/src/notifications/decision-engine.ts +70 -1
- package/src/notifications/decisions-store.ts +24 -0
- package/src/notifications/destination-resolver.ts +2 -1
- package/src/notifications/emit-signal.ts +35 -3
- package/src/notifications/signal.ts +6 -0
- package/src/notifications/types.ts +3 -0
- package/src/runtime/guardian-outbound-actions.ts +9 -9
- package/src/runtime/http-server.ts +7 -7
- package/src/runtime/routes/conversation-attention-routes.ts +3 -3
- package/src/runtime/routes/integration-routes.ts +5 -5
- package/src/schedule/scheduler.ts +15 -3
- package/src/tools/executor.ts +29 -0
- package/src/tools/guardian-control-plane-policy.ts +141 -0
- package/src/tools/types.ts +2 -0
|
@@ -0,0 +1,582 @@
|
|
|
1
|
+
import { afterAll, beforeEach, describe, expect, mock, test } from 'bun:test';
|
|
2
|
+
|
|
3
|
+
import type { ToolExecutionResult, ToolLifecycleEvent, ToolPermissionDeniedEvent } from '../tools/types.js';
|
|
4
|
+
|
|
5
|
+
// ── Module mocks (must precede real imports) ─────────────────────────
|
|
6
|
+
|
|
7
|
+
const mockConfig = {
|
|
8
|
+
provider: 'anthropic',
|
|
9
|
+
model: 'test',
|
|
10
|
+
apiKeys: {},
|
|
11
|
+
maxTokens: 4096,
|
|
12
|
+
dataDir: '/tmp',
|
|
13
|
+
timeouts: { shellDefaultTimeoutSec: 120, shellMaxTimeoutSec: 600, permissionTimeoutSec: 300 },
|
|
14
|
+
sandbox: { enabled: false, backend: 'native' as const, docker: { image: 'vellum-sandbox:latest', cpus: 1, memoryMb: 512, pidsLimit: 256, network: 'none' as const } },
|
|
15
|
+
rateLimit: { maxRequestsPerMinute: 0, maxTokensPerSession: 0 },
|
|
16
|
+
secretDetection: { enabled: false, action: 'warn' as const, entropyThreshold: 4.0 },
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
let fakeToolResult: ToolExecutionResult = { content: 'ok', isError: false };
|
|
20
|
+
|
|
21
|
+
mock.module('../config/loader.js', () => ({
|
|
22
|
+
getConfig: () => mockConfig,
|
|
23
|
+
loadConfig: () => mockConfig,
|
|
24
|
+
invalidateConfigCache: () => {},
|
|
25
|
+
saveConfig: () => {},
|
|
26
|
+
loadRawConfig: () => ({}),
|
|
27
|
+
saveRawConfig: () => {},
|
|
28
|
+
getNestedValue: () => undefined,
|
|
29
|
+
setNestedValue: () => {},
|
|
30
|
+
}));
|
|
31
|
+
|
|
32
|
+
mock.module('../util/logger.js', () => ({
|
|
33
|
+
getLogger: () => new Proxy({} as Record<string, unknown>, {
|
|
34
|
+
get: () => () => {},
|
|
35
|
+
}),
|
|
36
|
+
isDebug: () => false,
|
|
37
|
+
truncateForLog: (value: string) => value,
|
|
38
|
+
}));
|
|
39
|
+
|
|
40
|
+
mock.module('../permissions/checker.js', () => ({
|
|
41
|
+
classifyRisk: async () => 'low',
|
|
42
|
+
check: async () => ({ decision: 'allow', reason: 'allowed' }),
|
|
43
|
+
generateAllowlistOptions: () => [],
|
|
44
|
+
generateScopeOptions: () => [],
|
|
45
|
+
}));
|
|
46
|
+
|
|
47
|
+
mock.module('../memory/tool-usage-store.js', () => ({
|
|
48
|
+
recordToolInvocation: () => {},
|
|
49
|
+
}));
|
|
50
|
+
|
|
51
|
+
mock.module('../tools/registry.js', () => ({
|
|
52
|
+
getTool: (name: string) => {
|
|
53
|
+
if (name === 'unknown_tool') return undefined;
|
|
54
|
+
return {
|
|
55
|
+
name,
|
|
56
|
+
description: 'test tool',
|
|
57
|
+
category: 'test',
|
|
58
|
+
defaultRiskLevel: 'low',
|
|
59
|
+
getDefinition: () => ({}),
|
|
60
|
+
execute: async () => fakeToolResult,
|
|
61
|
+
};
|
|
62
|
+
},
|
|
63
|
+
getAllTools: () => [],
|
|
64
|
+
}));
|
|
65
|
+
|
|
66
|
+
mock.module('../tools/shared/filesystem/path-policy.js', () => ({
|
|
67
|
+
sandboxPolicy: () => ({ ok: false }),
|
|
68
|
+
hostPolicy: () => ({ ok: false }),
|
|
69
|
+
}));
|
|
70
|
+
|
|
71
|
+
mock.module('../tools/terminal/sandbox.js', () => ({
|
|
72
|
+
wrapCommand: () => ({ command: '', sandboxed: false }),
|
|
73
|
+
}));
|
|
74
|
+
|
|
75
|
+
// ── Real imports ─────────────────────────────────────────────────────
|
|
76
|
+
|
|
77
|
+
import { PermissionPrompter } from '../permissions/prompter.js';
|
|
78
|
+
import { ToolExecutor } from '../tools/executor.js';
|
|
79
|
+
import {
|
|
80
|
+
enforceGuardianOnlyPolicy,
|
|
81
|
+
isGuardianControlPlaneInvocation,
|
|
82
|
+
} from '../tools/guardian-control-plane-policy.js';
|
|
83
|
+
import type { ToolContext } from '../tools/types.js';
|
|
84
|
+
|
|
85
|
+
function makeContext(overrides?: Partial<ToolContext>): ToolContext {
|
|
86
|
+
return {
|
|
87
|
+
workingDir: '/tmp/project',
|
|
88
|
+
sessionId: 'session-1',
|
|
89
|
+
conversationId: 'conversation-1',
|
|
90
|
+
...overrides,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function makePrompter(): PermissionPrompter {
|
|
95
|
+
return {
|
|
96
|
+
prompt: async () => ({ decision: 'allow' as const }),
|
|
97
|
+
resolveConfirmation: () => {},
|
|
98
|
+
updateSender: () => {},
|
|
99
|
+
dispose: () => {},
|
|
100
|
+
} as unknown as PermissionPrompter;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
afterAll(() => { mock.restore(); });
|
|
104
|
+
|
|
105
|
+
// =====================================================================
|
|
106
|
+
// Unit tests: isGuardianControlPlaneInvocation
|
|
107
|
+
// =====================================================================
|
|
108
|
+
|
|
109
|
+
describe('isGuardianControlPlaneInvocation', () => {
|
|
110
|
+
const guardianPaths = [
|
|
111
|
+
'/v1/integrations/guardian/challenge',
|
|
112
|
+
'/v1/integrations/guardian/status',
|
|
113
|
+
'/v1/integrations/guardian/outbound/start',
|
|
114
|
+
'/v1/integrations/guardian/outbound/resend',
|
|
115
|
+
'/v1/integrations/guardian/outbound/cancel',
|
|
116
|
+
];
|
|
117
|
+
|
|
118
|
+
describe('bash tool with guardian endpoint in command', () => {
|
|
119
|
+
for (const path of guardianPaths) {
|
|
120
|
+
test(`detects curl to ${path}`, () => {
|
|
121
|
+
expect(isGuardianControlPlaneInvocation('bash', {
|
|
122
|
+
command: `curl -X POST http://localhost:3000${path}`,
|
|
123
|
+
})).toBe(true);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
test(`detects wget to ${path}`, () => {
|
|
127
|
+
expect(isGuardianControlPlaneInvocation('bash', {
|
|
128
|
+
command: `wget https://api.example.com${path}`,
|
|
129
|
+
})).toBe(true);
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
test('does not match unrelated commands', () => {
|
|
134
|
+
expect(isGuardianControlPlaneInvocation('bash', {
|
|
135
|
+
command: 'git status',
|
|
136
|
+
})).toBe(false);
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
test('matches partial path prefix via fragment detection (fail-closed for shell tools)', () => {
|
|
140
|
+
// Even without a trailing sub-path, the presence of both /v1/integrations and guardian
|
|
141
|
+
// in a bash command triggers the conservative fragment detector.
|
|
142
|
+
expect(isGuardianControlPlaneInvocation('bash', {
|
|
143
|
+
command: 'curl http://localhost:3000/v1/integrations/guardian',
|
|
144
|
+
})).toBe(true);
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
test('matches unknown sub-path under guardian control-plane (broad pattern)', () => {
|
|
148
|
+
expect(isGuardianControlPlaneInvocation('bash', {
|
|
149
|
+
command: 'curl http://localhost:3000/v1/integrations/guardian/other',
|
|
150
|
+
})).toBe(true);
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
test('handles missing command field gracefully', () => {
|
|
154
|
+
expect(isGuardianControlPlaneInvocation('bash', {})).toBe(false);
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
test('handles non-string command field gracefully', () => {
|
|
158
|
+
expect(isGuardianControlPlaneInvocation('bash', { command: 42 })).toBe(false);
|
|
159
|
+
});
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
describe('host_bash tool with guardian endpoint in command', () => {
|
|
163
|
+
test('detects guardian endpoint', () => {
|
|
164
|
+
expect(isGuardianControlPlaneInvocation('host_bash', {
|
|
165
|
+
command: 'curl -H "Authorization: Bearer token" https://internal:8080/v1/integrations/guardian/outbound/start',
|
|
166
|
+
})).toBe(true);
|
|
167
|
+
});
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
describe('network_request tool with guardian endpoint in url', () => {
|
|
171
|
+
for (const path of guardianPaths) {
|
|
172
|
+
test(`detects ${path}`, () => {
|
|
173
|
+
expect(isGuardianControlPlaneInvocation('network_request', {
|
|
174
|
+
url: `https://api.vellum.ai${path}`,
|
|
175
|
+
})).toBe(true);
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
test('detects proxied local URL', () => {
|
|
180
|
+
expect(isGuardianControlPlaneInvocation('network_request', {
|
|
181
|
+
url: 'http://127.0.0.1:3000/v1/integrations/guardian/challenge',
|
|
182
|
+
})).toBe(true);
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
test('does not match unrelated URLs', () => {
|
|
186
|
+
expect(isGuardianControlPlaneInvocation('network_request', {
|
|
187
|
+
url: 'https://api.example.com/v1/messages',
|
|
188
|
+
})).toBe(false);
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
test('handles missing url field gracefully', () => {
|
|
192
|
+
expect(isGuardianControlPlaneInvocation('network_request', {})).toBe(false);
|
|
193
|
+
});
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
describe('web_fetch tool with guardian endpoint in url', () => {
|
|
197
|
+
test('detects guardian endpoint', () => {
|
|
198
|
+
expect(isGuardianControlPlaneInvocation('web_fetch', {
|
|
199
|
+
url: 'https://api.example.com/v1/integrations/guardian/outbound/cancel',
|
|
200
|
+
})).toBe(true);
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
test('does not match unrelated URL', () => {
|
|
204
|
+
expect(isGuardianControlPlaneInvocation('web_fetch', {
|
|
205
|
+
url: 'https://docs.example.com/api/v1/help',
|
|
206
|
+
})).toBe(false);
|
|
207
|
+
});
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
describe('browser_navigate tool with guardian endpoint in url', () => {
|
|
211
|
+
test('detects guardian endpoint', () => {
|
|
212
|
+
expect(isGuardianControlPlaneInvocation('browser_navigate', {
|
|
213
|
+
url: 'http://localhost:3000/v1/integrations/guardian/status',
|
|
214
|
+
})).toBe(true);
|
|
215
|
+
});
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
describe('unrelated tools are not flagged', () => {
|
|
219
|
+
test('file_read is never a guardian invocation', () => {
|
|
220
|
+
expect(isGuardianControlPlaneInvocation('file_read', {
|
|
221
|
+
path: '/v1/integrations/guardian/challenge',
|
|
222
|
+
})).toBe(false);
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
test('file_write is never a guardian invocation', () => {
|
|
226
|
+
expect(isGuardianControlPlaneInvocation('file_write', {
|
|
227
|
+
path: '/tmp/test.txt',
|
|
228
|
+
content: 'curl /v1/integrations/guardian/outbound/start',
|
|
229
|
+
})).toBe(false);
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
test('web_search is never a guardian invocation', () => {
|
|
233
|
+
expect(isGuardianControlPlaneInvocation('web_search', {
|
|
234
|
+
query: '/v1/integrations/guardian/status',
|
|
235
|
+
})).toBe(false);
|
|
236
|
+
});
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
describe('path matching covers proxied and local variants', () => {
|
|
240
|
+
test('matches endpoint with query string', () => {
|
|
241
|
+
expect(isGuardianControlPlaneInvocation('network_request', {
|
|
242
|
+
url: 'https://api.example.com/v1/integrations/guardian/challenge?token=abc',
|
|
243
|
+
})).toBe(true);
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
test('matches endpoint with trailing slash', () => {
|
|
247
|
+
expect(isGuardianControlPlaneInvocation('network_request', {
|
|
248
|
+
url: 'https://api.example.com/v1/integrations/guardian/outbound/start/',
|
|
249
|
+
})).toBe(true);
|
|
250
|
+
});
|
|
251
|
+
|
|
252
|
+
test('matches endpoint in piped bash command', () => {
|
|
253
|
+
expect(isGuardianControlPlaneInvocation('bash', {
|
|
254
|
+
command: 'echo \'{"phone":"+1234567890"}\' | curl -X POST -d @- http://localhost:3000/v1/integrations/guardian/outbound/resend',
|
|
255
|
+
})).toBe(true);
|
|
256
|
+
});
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
describe('obfuscation resistance', () => {
|
|
260
|
+
test('detects URL-encoded path (%2F encoding)', () => {
|
|
261
|
+
expect(isGuardianControlPlaneInvocation('bash', {
|
|
262
|
+
command: 'curl http://localhost:3000/v1/integrations%2Fguardian%2Foutbound%2Fstart',
|
|
263
|
+
})).toBe(true);
|
|
264
|
+
});
|
|
265
|
+
|
|
266
|
+
test('detects double-encoded path (%252F encoding)', () => {
|
|
267
|
+
expect(isGuardianControlPlaneInvocation('network_request', {
|
|
268
|
+
url: 'http://localhost:3000/v1/integrations%252Fguardian%252Fchallenge',
|
|
269
|
+
})).toBe(true);
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
test('detects double slashes in path', () => {
|
|
273
|
+
expect(isGuardianControlPlaneInvocation('bash', {
|
|
274
|
+
command: 'curl http://localhost:3000/v1/integrations//guardian/outbound/start',
|
|
275
|
+
})).toBe(true);
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
test('detects triple slashes in path', () => {
|
|
279
|
+
expect(isGuardianControlPlaneInvocation('network_request', {
|
|
280
|
+
url: 'http://localhost:3000/v1///integrations///guardian///status',
|
|
281
|
+
})).toBe(true);
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
test('detects mixed case path', () => {
|
|
285
|
+
expect(isGuardianControlPlaneInvocation('bash', {
|
|
286
|
+
command: 'curl http://localhost:3000/V1/Integrations/Guardian/Outbound/Start',
|
|
287
|
+
})).toBe(true);
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
test('detects ALL CAPS path', () => {
|
|
291
|
+
expect(isGuardianControlPlaneInvocation('network_request', {
|
|
292
|
+
url: 'http://localhost:3000/V1/INTEGRATIONS/GUARDIAN/CHALLENGE',
|
|
293
|
+
})).toBe(true);
|
|
294
|
+
});
|
|
295
|
+
|
|
296
|
+
test('detects combined obfuscation: URL-encoding + mixed case', () => {
|
|
297
|
+
expect(isGuardianControlPlaneInvocation('bash', {
|
|
298
|
+
command: 'curl http://localhost:3000/V1/Integrations%2FGuardian%2FOutbound%2FCancel',
|
|
299
|
+
})).toBe(true);
|
|
300
|
+
});
|
|
301
|
+
|
|
302
|
+
test('detects combined obfuscation: double slashes + URL-encoding', () => {
|
|
303
|
+
expect(isGuardianControlPlaneInvocation('network_request', {
|
|
304
|
+
url: 'http://localhost:3000/v1//integrations%2Fguardian%2Fstatus',
|
|
305
|
+
})).toBe(true);
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
test('detects URL-encoded path in web_fetch tool', () => {
|
|
309
|
+
expect(isGuardianControlPlaneInvocation('web_fetch', {
|
|
310
|
+
url: 'http://localhost:3000/v1/integrations%2Fguardian%2Foutbound%2Fresend',
|
|
311
|
+
})).toBe(true);
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
test('does not false-positive on unrelated encoded paths', () => {
|
|
315
|
+
expect(isGuardianControlPlaneInvocation('bash', {
|
|
316
|
+
command: 'curl http://localhost:3000/v1/integrations%2Fother%2Fservice',
|
|
317
|
+
})).toBe(false);
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
test('detects guardian endpoint despite malformed percent-encoding elsewhere in command', () => {
|
|
321
|
+
const result = isGuardianControlPlaneInvocation('bash', {
|
|
322
|
+
command: 'curl -H "X: %ZZ" http://localhost:3000/v1/integrations%2Fguardian%2Foutbound%2Fstart -d \'{"channel":"sms"}\'',
|
|
323
|
+
});
|
|
324
|
+
expect(result).toBe(true);
|
|
325
|
+
});
|
|
326
|
+
});
|
|
327
|
+
|
|
328
|
+
describe('shell expansion resistance', () => {
|
|
329
|
+
test('detects guardian endpoint constructed via shell variable concatenation', () => {
|
|
330
|
+
expect(isGuardianControlPlaneInvocation('bash', {
|
|
331
|
+
command: 'base=http://localhost:7821/v1/integrations; seg=guardian; curl "$base/$seg/status"',
|
|
332
|
+
})).toBe(true);
|
|
333
|
+
});
|
|
334
|
+
|
|
335
|
+
test('detects guardian endpoint with split variable assignment', () => {
|
|
336
|
+
expect(isGuardianControlPlaneInvocation('bash', {
|
|
337
|
+
command: 'API=/v1/integrations; curl "http://localhost:3000${API}/guardian/outbound/start"',
|
|
338
|
+
})).toBe(true);
|
|
339
|
+
});
|
|
340
|
+
|
|
341
|
+
test('detects guardian endpoint with path built across multiple variables', () => {
|
|
342
|
+
expect(isGuardianControlPlaneInvocation('bash', {
|
|
343
|
+
command: 'HOST=http://localhost:7821; PATH_PREFIX=/v1/integrations; SVC=guardian; curl "$HOST$PATH_PREFIX/$SVC/challenge"',
|
|
344
|
+
})).toBe(true);
|
|
345
|
+
});
|
|
346
|
+
|
|
347
|
+
test('detects guardian endpoint via heredoc-style construction', () => {
|
|
348
|
+
expect(isGuardianControlPlaneInvocation('bash', {
|
|
349
|
+
command: 'url="http://localhost:3000/v1/integrations"; curl "${url}/guardian/outbound/resend"',
|
|
350
|
+
})).toBe(true);
|
|
351
|
+
});
|
|
352
|
+
|
|
353
|
+
test('does not false-positive when only /v1/integrations is present without guardian', () => {
|
|
354
|
+
expect(isGuardianControlPlaneInvocation('bash', {
|
|
355
|
+
command: 'curl http://localhost:3000/v1/integrations/other/service',
|
|
356
|
+
})).toBe(false);
|
|
357
|
+
});
|
|
358
|
+
|
|
359
|
+
test('does not false-positive when only guardian is present without /v1/integrations', () => {
|
|
360
|
+
expect(isGuardianControlPlaneInvocation('bash', {
|
|
361
|
+
command: 'echo "guardian notification sent"',
|
|
362
|
+
})).toBe(false);
|
|
363
|
+
});
|
|
364
|
+
|
|
365
|
+
test('shell fragment detection does not apply to URL tools', () => {
|
|
366
|
+
// URL tools pass structured URLs, not shell commands. The fragment detector
|
|
367
|
+
// is bash/host_bash only. For URL tools, we rely on exact/normalized matching.
|
|
368
|
+
expect(isGuardianControlPlaneInvocation('network_request', {
|
|
369
|
+
url: 'https://api.example.com/v1/messages',
|
|
370
|
+
})).toBe(false);
|
|
371
|
+
});
|
|
372
|
+
});
|
|
373
|
+
});
|
|
374
|
+
|
|
375
|
+
// =====================================================================
|
|
376
|
+
// Unit tests: enforceGuardianOnlyPolicy
|
|
377
|
+
// =====================================================================
|
|
378
|
+
|
|
379
|
+
describe('enforceGuardianOnlyPolicy', () => {
|
|
380
|
+
test('non-guardian actor denied for guardian endpoint', () => {
|
|
381
|
+
const result = enforceGuardianOnlyPolicy('bash', {
|
|
382
|
+
command: 'curl http://localhost:3000/v1/integrations/guardian/outbound/start',
|
|
383
|
+
}, 'non-guardian');
|
|
384
|
+
expect(result.denied).toBe(true);
|
|
385
|
+
expect(result.reason).toContain('restricted to guardian users');
|
|
386
|
+
});
|
|
387
|
+
|
|
388
|
+
test('unverified_channel actor denied for guardian endpoint', () => {
|
|
389
|
+
const result = enforceGuardianOnlyPolicy('network_request', {
|
|
390
|
+
url: 'https://api.example.com/v1/integrations/guardian/challenge',
|
|
391
|
+
}, 'unverified_channel');
|
|
392
|
+
expect(result.denied).toBe(true);
|
|
393
|
+
expect(result.reason).toContain('restricted to guardian users');
|
|
394
|
+
});
|
|
395
|
+
|
|
396
|
+
test('guardian actor is NOT denied for guardian endpoint', () => {
|
|
397
|
+
const result = enforceGuardianOnlyPolicy('bash', {
|
|
398
|
+
command: 'curl http://localhost:3000/v1/integrations/guardian/outbound/start',
|
|
399
|
+
}, 'guardian');
|
|
400
|
+
expect(result.denied).toBe(false);
|
|
401
|
+
expect(result.reason).toBeUndefined();
|
|
402
|
+
});
|
|
403
|
+
|
|
404
|
+
test('undefined actor role is NOT denied for guardian endpoint', () => {
|
|
405
|
+
const result = enforceGuardianOnlyPolicy('bash', {
|
|
406
|
+
command: 'curl http://localhost:3000/v1/integrations/guardian/outbound/start',
|
|
407
|
+
}, undefined);
|
|
408
|
+
expect(result.denied).toBe(false);
|
|
409
|
+
});
|
|
410
|
+
|
|
411
|
+
test('unknown actor role is denied for guardian endpoint (allowlist, not denylist)', () => {
|
|
412
|
+
const result = enforceGuardianOnlyPolicy('bash', {
|
|
413
|
+
command: 'curl http://localhost:3000/v1/integrations/guardian/outbound/start',
|
|
414
|
+
}, 'some_future_role');
|
|
415
|
+
expect(result.denied).toBe(true);
|
|
416
|
+
expect(result.reason).toContain('restricted to guardian users');
|
|
417
|
+
});
|
|
418
|
+
|
|
419
|
+
test('non-guardian actor is NOT denied for unrelated endpoint', () => {
|
|
420
|
+
const result = enforceGuardianOnlyPolicy('bash', {
|
|
421
|
+
command: 'curl http://localhost:3000/v1/messages',
|
|
422
|
+
}, 'non-guardian');
|
|
423
|
+
expect(result.denied).toBe(false);
|
|
424
|
+
});
|
|
425
|
+
|
|
426
|
+
test('non-guardian actor is NOT denied for unrelated tool', () => {
|
|
427
|
+
const result = enforceGuardianOnlyPolicy('file_read', {
|
|
428
|
+
path: 'README.md',
|
|
429
|
+
}, 'non-guardian');
|
|
430
|
+
expect(result.denied).toBe(false);
|
|
431
|
+
});
|
|
432
|
+
});
|
|
433
|
+
|
|
434
|
+
// =====================================================================
|
|
435
|
+
// Integration tests: ToolExecutor guardian-only policy gate
|
|
436
|
+
// =====================================================================
|
|
437
|
+
|
|
438
|
+
describe('ToolExecutor guardian-only policy gate', () => {
|
|
439
|
+
beforeEach(() => {
|
|
440
|
+
fakeToolResult = { content: 'ok', isError: false };
|
|
441
|
+
});
|
|
442
|
+
|
|
443
|
+
test('non-guardian actor blocked from bash curl to guardian outbound/start', async () => {
|
|
444
|
+
const executor = new ToolExecutor(makePrompter());
|
|
445
|
+
const result = await executor.execute(
|
|
446
|
+
'bash',
|
|
447
|
+
{ command: 'curl -X POST http://localhost:3000/v1/integrations/guardian/outbound/start' },
|
|
448
|
+
makeContext({ guardianActorRole: 'non-guardian' }),
|
|
449
|
+
);
|
|
450
|
+
expect(result.isError).toBe(true);
|
|
451
|
+
expect(result.content).toContain('restricted to guardian users');
|
|
452
|
+
});
|
|
453
|
+
|
|
454
|
+
test('unverified_channel actor blocked from network_request to guardian endpoint', async () => {
|
|
455
|
+
const executor = new ToolExecutor(makePrompter());
|
|
456
|
+
const result = await executor.execute(
|
|
457
|
+
'network_request',
|
|
458
|
+
{ url: 'https://api.example.com/v1/integrations/guardian/challenge' },
|
|
459
|
+
makeContext({ guardianActorRole: 'unverified_channel' }),
|
|
460
|
+
);
|
|
461
|
+
expect(result.isError).toBe(true);
|
|
462
|
+
expect(result.content).toContain('restricted to guardian users');
|
|
463
|
+
});
|
|
464
|
+
|
|
465
|
+
test('guardian actor is NOT blocked from the same invocation', async () => {
|
|
466
|
+
const executor = new ToolExecutor(makePrompter());
|
|
467
|
+
const result = await executor.execute(
|
|
468
|
+
'bash',
|
|
469
|
+
{ command: 'curl -X POST http://localhost:3000/v1/integrations/guardian/outbound/start' },
|
|
470
|
+
makeContext({ guardianActorRole: 'guardian' }),
|
|
471
|
+
);
|
|
472
|
+
expect(result.isError).toBe(false);
|
|
473
|
+
expect(result.content).toBe('ok');
|
|
474
|
+
});
|
|
475
|
+
|
|
476
|
+
test('undefined guardianActorRole is NOT blocked from guardian endpoint', async () => {
|
|
477
|
+
const executor = new ToolExecutor(makePrompter());
|
|
478
|
+
const result = await executor.execute(
|
|
479
|
+
'bash',
|
|
480
|
+
{ command: 'curl http://localhost:3000/v1/integrations/guardian/status' },
|
|
481
|
+
makeContext(), // no guardianActorRole set
|
|
482
|
+
);
|
|
483
|
+
expect(result.isError).toBe(false);
|
|
484
|
+
expect(result.content).toBe('ok');
|
|
485
|
+
});
|
|
486
|
+
|
|
487
|
+
test('non-guardian invocation of unrelated endpoint is unaffected', async () => {
|
|
488
|
+
const executor = new ToolExecutor(makePrompter());
|
|
489
|
+
const result = await executor.execute(
|
|
490
|
+
'bash',
|
|
491
|
+
{ command: 'curl http://localhost:3000/v1/messages' },
|
|
492
|
+
makeContext({ guardianActorRole: 'non-guardian' }),
|
|
493
|
+
);
|
|
494
|
+
expect(result.isError).toBe(false);
|
|
495
|
+
expect(result.content).toBe('ok');
|
|
496
|
+
});
|
|
497
|
+
|
|
498
|
+
test('non-guardian invocation of unrelated tool is unaffected', async () => {
|
|
499
|
+
const executor = new ToolExecutor(makePrompter());
|
|
500
|
+
const result = await executor.execute(
|
|
501
|
+
'file_read',
|
|
502
|
+
{ path: 'README.md' },
|
|
503
|
+
makeContext({ guardianActorRole: 'non-guardian' }),
|
|
504
|
+
);
|
|
505
|
+
expect(result.isError).toBe(false);
|
|
506
|
+
expect(result.content).toBe('ok');
|
|
507
|
+
});
|
|
508
|
+
|
|
509
|
+
test('permission_denied lifecycle event is emitted on guardian policy block', async () => {
|
|
510
|
+
let capturedEvent: ToolPermissionDeniedEvent | undefined;
|
|
511
|
+
const executor = new ToolExecutor(makePrompter());
|
|
512
|
+
await executor.execute(
|
|
513
|
+
'bash',
|
|
514
|
+
{ command: 'curl http://localhost:3000/v1/integrations/guardian/outbound/cancel' },
|
|
515
|
+
makeContext({
|
|
516
|
+
guardianActorRole: 'non-guardian',
|
|
517
|
+
onToolLifecycleEvent: (event: ToolLifecycleEvent) => {
|
|
518
|
+
if (event.type === 'permission_denied') {
|
|
519
|
+
capturedEvent = event as ToolPermissionDeniedEvent;
|
|
520
|
+
}
|
|
521
|
+
},
|
|
522
|
+
}),
|
|
523
|
+
);
|
|
524
|
+
expect(capturedEvent).toBeDefined();
|
|
525
|
+
expect(capturedEvent!.decision).toBe('deny');
|
|
526
|
+
expect(capturedEvent!.reason).toContain('restricted to guardian users');
|
|
527
|
+
});
|
|
528
|
+
|
|
529
|
+
test('non-guardian blocked from web_fetch to guardian endpoint', async () => {
|
|
530
|
+
const executor = new ToolExecutor(makePrompter());
|
|
531
|
+
const result = await executor.execute(
|
|
532
|
+
'web_fetch',
|
|
533
|
+
{ url: 'http://localhost:3000/v1/integrations/guardian/outbound/resend' },
|
|
534
|
+
makeContext({ guardianActorRole: 'non-guardian' }),
|
|
535
|
+
);
|
|
536
|
+
expect(result.isError).toBe(true);
|
|
537
|
+
expect(result.content).toContain('restricted to guardian users');
|
|
538
|
+
});
|
|
539
|
+
|
|
540
|
+
test('non-guardian blocked from browser_navigate to guardian endpoint', async () => {
|
|
541
|
+
const executor = new ToolExecutor(makePrompter());
|
|
542
|
+
const result = await executor.execute(
|
|
543
|
+
'browser_navigate',
|
|
544
|
+
{ url: 'http://localhost:3000/v1/integrations/guardian/status' },
|
|
545
|
+
makeContext({ guardianActorRole: 'non-guardian' }),
|
|
546
|
+
);
|
|
547
|
+
expect(result.isError).toBe(true);
|
|
548
|
+
expect(result.content).toContain('restricted to guardian users');
|
|
549
|
+
});
|
|
550
|
+
|
|
551
|
+
test('non-guardian blocked from host_bash with guardian endpoint', async () => {
|
|
552
|
+
const executor = new ToolExecutor(makePrompter());
|
|
553
|
+
const result = await executor.execute(
|
|
554
|
+
'host_bash',
|
|
555
|
+
{ command: 'curl -X POST https://internal:8080/v1/integrations/guardian/challenge' },
|
|
556
|
+
makeContext({ guardianActorRole: 'non-guardian' }),
|
|
557
|
+
);
|
|
558
|
+
expect(result.isError).toBe(true);
|
|
559
|
+
expect(result.content).toContain('restricted to guardian users');
|
|
560
|
+
});
|
|
561
|
+
|
|
562
|
+
test('all five guardian endpoints are blocked for non-guardian via network_request', async () => {
|
|
563
|
+
const endpoints = [
|
|
564
|
+
'/v1/integrations/guardian/challenge',
|
|
565
|
+
'/v1/integrations/guardian/status',
|
|
566
|
+
'/v1/integrations/guardian/outbound/start',
|
|
567
|
+
'/v1/integrations/guardian/outbound/resend',
|
|
568
|
+
'/v1/integrations/guardian/outbound/cancel',
|
|
569
|
+
];
|
|
570
|
+
|
|
571
|
+
for (const path of endpoints) {
|
|
572
|
+
const executor = new ToolExecutor(makePrompter());
|
|
573
|
+
const result = await executor.execute(
|
|
574
|
+
'network_request',
|
|
575
|
+
{ url: `https://api.example.com${path}` },
|
|
576
|
+
makeContext({ guardianActorRole: 'non-guardian' }),
|
|
577
|
+
);
|
|
578
|
+
expect(result.isError).toBe(true);
|
|
579
|
+
expect(result.content).toContain('restricted to guardian users');
|
|
580
|
+
}
|
|
581
|
+
});
|
|
582
|
+
});
|
|
@@ -92,20 +92,20 @@ globalThis.fetch = (async (input: string | URL | Request, init?: RequestInit) =>
|
|
|
92
92
|
// Now import modules under test (after mocks are in place)
|
|
93
93
|
// ---------------------------------------------------------------------------
|
|
94
94
|
|
|
95
|
+
import { getDb, initializeDb, resetDb } from '../memory/db.js';
|
|
96
|
+
import {
|
|
97
|
+
updateSessionDelivery,
|
|
98
|
+
} from '../runtime/channel-guardian-service.js';
|
|
95
99
|
import {
|
|
96
|
-
startOutbound,
|
|
97
|
-
resendOutbound,
|
|
98
100
|
cancelOutbound,
|
|
101
|
+
resendOutbound,
|
|
102
|
+
startOutbound,
|
|
99
103
|
} from '../runtime/guardian-outbound-actions.js';
|
|
100
104
|
import {
|
|
101
|
-
handleStartOutbound,
|
|
102
|
-
handleResendOutbound,
|
|
103
105
|
handleCancelOutbound,
|
|
106
|
+
handleResendOutbound,
|
|
107
|
+
handleStartOutbound,
|
|
104
108
|
} from '../runtime/routes/integration-routes.js';
|
|
105
|
-
import {
|
|
106
|
-
updateSessionDelivery,
|
|
107
|
-
} from '../runtime/channel-guardian-service.js';
|
|
108
|
-
import { getDb, initializeDb, resetDb } from '../memory/db.js';
|
|
109
109
|
|
|
110
110
|
// Initialize the database (creates all tables)
|
|
111
111
|
initializeDb();
|
|
@@ -290,4 +290,26 @@ describe('Guardian verification routing section in system prompt', () => {
|
|
|
290
290
|
expect(routingSection).toContain('voice');
|
|
291
291
|
expect(routingSection).toContain('telegram');
|
|
292
292
|
});
|
|
293
|
+
|
|
294
|
+
test('routing section contains exclusivity wording', () => {
|
|
295
|
+
const prompt = buildSystemPrompt();
|
|
296
|
+
const lower = prompt.toLowerCase();
|
|
297
|
+
// Must contain "exclusively" or "must only" to enforce exclusive handling
|
|
298
|
+
expect(lower.includes('exclusively') || lower.includes('must only')).toBe(true);
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
test('routing section prohibits loading phone-calls for guardian verification', () => {
|
|
302
|
+
const prompt = buildSystemPrompt();
|
|
303
|
+
const lower = prompt.toLowerCase();
|
|
304
|
+
// Must explicitly prohibit phone-calls for guardian verification intents
|
|
305
|
+
expect(lower).toContain('do not load');
|
|
306
|
+
expect(lower).toContain('phone-calls');
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
test('routing section includes channel-preservation guidance', () => {
|
|
310
|
+
const prompt = buildSystemPrompt();
|
|
311
|
+
const lower = prompt.toLowerCase();
|
|
312
|
+
// Must advise not to re-ask channel if already specified
|
|
313
|
+
expect(lower.includes('do not re-ask') || lower.includes('already specified')).toBe(true);
|
|
314
|
+
});
|
|
293
315
|
});
|
|
@@ -27,6 +27,7 @@ const clientMessages: Record<ClientMessageType, ClientMessage> = {
|
|
|
27
27
|
sessionId: 'sess-001',
|
|
28
28
|
content: 'Hello, assistant!',
|
|
29
29
|
interface: 'cli',
|
|
30
|
+
commandIntent: { domain: 'screen_recording', action: 'start' },
|
|
30
31
|
},
|
|
31
32
|
confirmation_response: {
|
|
32
33
|
type: 'confirmation_response',
|
|
@@ -153,6 +154,7 @@ const clientMessages: Record<ClientMessageType, ClientMessage> = {
|
|
|
153
154
|
task: 'Open Safari and search for weather',
|
|
154
155
|
screenWidth: 1920,
|
|
155
156
|
screenHeight: 1080,
|
|
157
|
+
commandIntent: { domain: 'screen_recording', action: 'start' },
|
|
156
158
|
},
|
|
157
159
|
ui_surface_action: {
|
|
158
160
|
type: 'ui_surface_action',
|
|
@@ -1952,6 +1954,14 @@ const serverMessages: Record<ServerMessageType, ServerMessage> = {
|
|
|
1952
1954
|
type: 'approved_device_remove_response',
|
|
1953
1955
|
success: true,
|
|
1954
1956
|
},
|
|
1957
|
+
recording_pause: {
|
|
1958
|
+
type: 'recording_pause',
|
|
1959
|
+
recordingId: 'rec-001',
|
|
1960
|
+
},
|
|
1961
|
+
recording_resume: {
|
|
1962
|
+
type: 'recording_resume',
|
|
1963
|
+
recordingId: 'rec-001',
|
|
1964
|
+
},
|
|
1955
1965
|
recording_start: {
|
|
1956
1966
|
type: 'recording_start',
|
|
1957
1967
|
recordingId: 'rec-001',
|