@vellumai/assistant 0.3.26 → 0.3.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +48 -1
- package/Dockerfile +2 -2
- package/package.json +1 -1
- package/scripts/ipc/generate-swift.ts +6 -2
- package/src/__tests__/agent-loop.test.ts +119 -0
- package/src/__tests__/bundled-asset.test.ts +107 -0
- package/src/__tests__/canonical-guardian-store.test.ts +636 -0
- package/src/__tests__/channel-approval-routes.test.ts +174 -1
- package/src/__tests__/emit-signal-routing-intent.test.ts +43 -1
- package/src/__tests__/guardian-actions-endpoint.test.ts +205 -345
- package/src/__tests__/guardian-decision-primitive-canonical.test.ts +599 -0
- package/src/__tests__/guardian-dispatch.test.ts +19 -19
- package/src/__tests__/guardian-routing-invariants.test.ts +954 -0
- package/src/__tests__/mcp-cli.test.ts +77 -0
- package/src/__tests__/non-member-access-request.test.ts +31 -29
- package/src/__tests__/notification-decision-fallback.test.ts +61 -3
- package/src/__tests__/notification-decision-strategy.test.ts +17 -0
- package/src/__tests__/notification-guardian-path.test.ts +13 -15
- package/src/__tests__/onboarding-template-contract.test.ts +116 -21
- package/src/__tests__/secret-scanner-executor.test.ts +59 -0
- package/src/__tests__/secret-scanner.test.ts +8 -0
- package/src/__tests__/sensitive-output-placeholders.test.ts +208 -0
- package/src/__tests__/session-runtime-assembly.test.ts +76 -47
- package/src/__tests__/tool-grant-request-escalation.test.ts +497 -0
- package/src/agent/loop.ts +46 -3
- package/src/approvals/guardian-decision-primitive.ts +285 -0
- package/src/approvals/guardian-request-resolvers.ts +539 -0
- package/src/calls/guardian-dispatch.ts +46 -40
- package/src/calls/relay-server.ts +147 -2
- package/src/calls/types.ts +1 -1
- package/src/config/system-prompt.ts +2 -1
- package/src/config/templates/BOOTSTRAP.md +47 -31
- package/src/config/templates/USER.md +5 -0
- package/src/config/update-bulletin-template-path.ts +4 -1
- package/src/config/vellum-skills/trusted-contacts/SKILL.md +22 -17
- package/src/daemon/handlers/guardian-actions.ts +45 -66
- package/src/daemon/ipc-contract/guardian-actions.ts +7 -0
- package/src/daemon/lifecycle.ts +3 -16
- package/src/daemon/server.ts +18 -0
- package/src/daemon/session-agent-loop-handlers.ts +5 -4
- package/src/daemon/session-agent-loop.ts +32 -5
- package/src/daemon/session-process.ts +68 -307
- package/src/daemon/session-runtime-assembly.ts +112 -24
- package/src/daemon/session-tool-setup.ts +1 -0
- package/src/daemon/session.ts +1 -0
- package/src/home-base/prebuilt/seed.ts +2 -1
- package/src/hooks/templates.ts +2 -1
- package/src/memory/canonical-guardian-store.ts +524 -0
- package/src/memory/channel-guardian-store.ts +1 -0
- package/src/memory/db-init.ts +16 -0
- package/src/memory/guardian-action-store.ts +7 -60
- package/src/memory/guardian-approvals.ts +9 -4
- package/src/memory/migrations/036-normalize-phone-identities.ts +289 -0
- package/src/memory/migrations/118-reminder-routing-intent.ts +3 -3
- package/src/memory/migrations/121-canonical-guardian-requests.ts +59 -0
- package/src/memory/migrations/122-canonical-guardian-requester-chat-id.ts +15 -0
- package/src/memory/migrations/123-canonical-guardian-deliveries-destination-index.ts +15 -0
- package/src/memory/migrations/index.ts +4 -0
- package/src/memory/migrations/registry.ts +5 -0
- package/src/memory/schema-migration.ts +1 -0
- package/src/memory/schema.ts +52 -0
- package/src/notifications/copy-composer.ts +16 -4
- package/src/notifications/decision-engine.ts +57 -0
- package/src/permissions/defaults.ts +2 -0
- package/src/runtime/access-request-helper.ts +137 -0
- package/src/runtime/actor-trust-resolver.ts +225 -0
- package/src/runtime/channel-guardian-service.ts +12 -4
- package/src/runtime/guardian-context-resolver.ts +32 -7
- package/src/runtime/guardian-decision-types.ts +6 -0
- package/src/runtime/guardian-reply-router.ts +687 -0
- package/src/runtime/http-server.ts +8 -0
- package/src/runtime/routes/canonical-guardian-expiry-sweep.ts +116 -0
- package/src/runtime/routes/conversation-routes.ts +18 -0
- package/src/runtime/routes/guardian-action-routes.ts +100 -109
- package/src/runtime/routes/inbound-message-handler.ts +170 -525
- package/src/runtime/tool-grant-request-helper.ts +195 -0
- package/src/tools/executor.ts +13 -1
- package/src/tools/sensitive-output-placeholders.ts +203 -0
- package/src/tools/tool-approval-handler.ts +44 -1
- package/src/tools/types.ts +11 -0
- package/src/util/bundled-asset.ts +31 -0
- package/src/util/canonicalize-identity.ts +52 -0
|
@@ -0,0 +1,497 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for the non-guardian tool grant escalation path:
|
|
3
|
+
*
|
|
4
|
+
* 1. ToolApprovalHandler grant-miss escalation behavior
|
|
5
|
+
* 2. tool_grant_request resolver registration and behavior
|
|
6
|
+
* 3. Canonical decision primitive grant minting for tool_grant_request kind
|
|
7
|
+
* 4. End-to-end: deny -> approve -> consume grant flow
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { mkdtempSync, rmSync } from 'node:fs';
|
|
11
|
+
import { tmpdir } from 'node:os';
|
|
12
|
+
import { join } from 'node:path';
|
|
13
|
+
|
|
14
|
+
import { afterAll, beforeEach, describe, expect, mock, test } from 'bun:test';
|
|
15
|
+
|
|
16
|
+
const testDir = mkdtempSync(join(tmpdir(), 'tool-grant-escalation-test-'));
|
|
17
|
+
|
|
18
|
+
mock.module('../util/platform.js', () => ({
|
|
19
|
+
getDataDir: () => testDir,
|
|
20
|
+
isMacOS: () => process.platform === 'darwin',
|
|
21
|
+
isLinux: () => process.platform === 'linux',
|
|
22
|
+
isWindows: () => process.platform === 'win32',
|
|
23
|
+
getSocketPath: () => join(testDir, 'test.sock'),
|
|
24
|
+
getPidPath: () => join(testDir, 'test.pid'),
|
|
25
|
+
getDbPath: () => join(testDir, 'test.db'),
|
|
26
|
+
getLogPath: () => join(testDir, 'test.log'),
|
|
27
|
+
ensureDataDir: () => {},
|
|
28
|
+
migrateToDataLayout: () => {},
|
|
29
|
+
migrateToWorkspaceLayout: () => {},
|
|
30
|
+
}));
|
|
31
|
+
|
|
32
|
+
mock.module('../util/logger.js', () => ({
|
|
33
|
+
getLogger: () =>
|
|
34
|
+
new Proxy({} as Record<string, unknown>, {
|
|
35
|
+
get: () => () => {},
|
|
36
|
+
}),
|
|
37
|
+
isDebug: () => false,
|
|
38
|
+
truncateForLog: (value: string) => value,
|
|
39
|
+
}));
|
|
40
|
+
|
|
41
|
+
// Mock guardian control-plane policy — not targeting control-plane by default
|
|
42
|
+
mock.module('../tools/guardian-control-plane-policy.js', () => ({
|
|
43
|
+
enforceGuardianOnlyPolicy: () => ({ denied: false }),
|
|
44
|
+
}));
|
|
45
|
+
|
|
46
|
+
// Mock task run rules — no task run rules by default
|
|
47
|
+
mock.module('../tasks/ephemeral-permissions.js', () => ({
|
|
48
|
+
getTaskRunRules: () => [],
|
|
49
|
+
}));
|
|
50
|
+
|
|
51
|
+
// Mock tool registry — return a fake tool for 'bash'
|
|
52
|
+
const fakeTool = {
|
|
53
|
+
name: 'bash',
|
|
54
|
+
description: 'Run a shell command',
|
|
55
|
+
category: 'shell',
|
|
56
|
+
defaultRiskLevel: 'high',
|
|
57
|
+
getDefinition: () => ({ name: 'bash', description: 'Run a shell command', input_schema: {} }),
|
|
58
|
+
execute: async () => ({ content: 'ok', isError: false }),
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
mock.module('../tools/registry.js', () => ({
|
|
62
|
+
getTool: (name: string) => (name === 'bash' ? fakeTool : undefined),
|
|
63
|
+
getAllTools: () => [fakeTool],
|
|
64
|
+
}));
|
|
65
|
+
|
|
66
|
+
// Mock notification emission — capture calls without running the full pipeline
|
|
67
|
+
const emittedSignals: Array<Record<string, unknown>> = [];
|
|
68
|
+
mock.module('../notifications/emit-signal.js', () => ({
|
|
69
|
+
emitNotificationSignal: async (params: Record<string, unknown>) => {
|
|
70
|
+
emittedSignals.push(params);
|
|
71
|
+
return { signalId: 'test-signal', deduplicated: false, dispatched: true, reason: 'ok', deliveryResults: [] };
|
|
72
|
+
},
|
|
73
|
+
registerBroadcastFn: () => {},
|
|
74
|
+
}));
|
|
75
|
+
|
|
76
|
+
// Mock channel guardian service — provide a guardian binding for 'self' + 'telegram'
|
|
77
|
+
mock.module('../runtime/channel-guardian-service.js', () => ({
|
|
78
|
+
getGuardianBinding: (assistantId: string, channel: string) => {
|
|
79
|
+
if (assistantId === 'self' && channel === 'telegram') {
|
|
80
|
+
return {
|
|
81
|
+
id: 'binding-1',
|
|
82
|
+
assistantId: 'self',
|
|
83
|
+
channel: 'telegram',
|
|
84
|
+
guardianExternalUserId: 'guardian-1',
|
|
85
|
+
guardianDeliveryChatId: 'guardian-chat-1',
|
|
86
|
+
status: 'active',
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
return null;
|
|
90
|
+
},
|
|
91
|
+
createOutboundSession: () => ({
|
|
92
|
+
sessionId: 'test-session',
|
|
93
|
+
secret: '123456',
|
|
94
|
+
}),
|
|
95
|
+
}));
|
|
96
|
+
|
|
97
|
+
// Mock gateway client — capture delivery calls
|
|
98
|
+
const deliveredReplies: Array<{ chatId: string; text: string }> = [];
|
|
99
|
+
mock.module('../runtime/gateway-client.js', () => ({
|
|
100
|
+
deliverChannelReply: async (_url: string, payload: { chatId: string; text: string }) => {
|
|
101
|
+
deliveredReplies.push(payload);
|
|
102
|
+
},
|
|
103
|
+
}));
|
|
104
|
+
|
|
105
|
+
import {
|
|
106
|
+
applyCanonicalGuardianDecision,
|
|
107
|
+
} from '../approvals/guardian-decision-primitive.js';
|
|
108
|
+
import type { ActorContext } from '../approvals/guardian-request-resolvers.js';
|
|
109
|
+
import { getRegisteredKinds, getResolver } from '../approvals/guardian-request-resolvers.js';
|
|
110
|
+
import {
|
|
111
|
+
createCanonicalGuardianRequest,
|
|
112
|
+
getCanonicalGuardianRequest,
|
|
113
|
+
listCanonicalGuardianRequests,
|
|
114
|
+
} from '../memory/canonical-guardian-store.js';
|
|
115
|
+
import { getDb, initializeDb, resetDb } from '../memory/db.js';
|
|
116
|
+
import { scopedApprovalGrants } from '../memory/schema.js';
|
|
117
|
+
import { computeToolApprovalDigest } from '../security/tool-approval-digest.js';
|
|
118
|
+
import { ToolApprovalHandler } from '../tools/tool-approval-handler.js';
|
|
119
|
+
import type { ToolContext, ToolLifecycleEvent } from '../tools/types.js';
|
|
120
|
+
|
|
121
|
+
initializeDb();
|
|
122
|
+
|
|
123
|
+
function resetTables(): void {
|
|
124
|
+
const db = getDb();
|
|
125
|
+
db.delete(scopedApprovalGrants).run();
|
|
126
|
+
db.run('DELETE FROM canonical_guardian_deliveries');
|
|
127
|
+
db.run('DELETE FROM canonical_guardian_requests');
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
afterAll(() => {
|
|
131
|
+
resetDb();
|
|
132
|
+
try {
|
|
133
|
+
rmSync(testDir, { recursive: true });
|
|
134
|
+
} catch {
|
|
135
|
+
/* best effort */
|
|
136
|
+
}
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
// ---------------------------------------------------------------------------
|
|
140
|
+
// Helpers
|
|
141
|
+
// ---------------------------------------------------------------------------
|
|
142
|
+
|
|
143
|
+
function makeContext(overrides: Partial<ToolContext> = {}): ToolContext {
|
|
144
|
+
return {
|
|
145
|
+
workingDir: testDir,
|
|
146
|
+
sessionId: 'session-1',
|
|
147
|
+
conversationId: 'conv-1',
|
|
148
|
+
assistantId: 'self',
|
|
149
|
+
requestId: 'req-1',
|
|
150
|
+
guardianActorRole: 'non-guardian',
|
|
151
|
+
executionChannel: 'telegram',
|
|
152
|
+
requesterExternalUserId: 'requester-1',
|
|
153
|
+
...overrides,
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function guardianActor(overrides: Partial<ActorContext> = {}): ActorContext {
|
|
158
|
+
return {
|
|
159
|
+
externalUserId: 'guardian-1',
|
|
160
|
+
channel: 'telegram',
|
|
161
|
+
isTrusted: false,
|
|
162
|
+
...overrides,
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// ===========================================================================
|
|
167
|
+
// TESTS
|
|
168
|
+
// ===========================================================================
|
|
169
|
+
|
|
170
|
+
// ---------------------------------------------------------------------------
|
|
171
|
+
// 1. tool_grant_request resolver registration
|
|
172
|
+
// ---------------------------------------------------------------------------
|
|
173
|
+
|
|
174
|
+
describe('tool_grant_request resolver registration', () => {
|
|
175
|
+
test('tool_grant_request resolver is registered', () => {
|
|
176
|
+
const kinds = getRegisteredKinds();
|
|
177
|
+
expect(kinds).toContain('tool_grant_request');
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
test('getResolver returns resolver for tool_grant_request', () => {
|
|
181
|
+
const resolver = getResolver('tool_grant_request');
|
|
182
|
+
expect(resolver).toBeDefined();
|
|
183
|
+
expect(resolver!.kind).toBe('tool_grant_request');
|
|
184
|
+
});
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
// ---------------------------------------------------------------------------
|
|
188
|
+
// 2. Grant-miss escalation behavior in ToolApprovalHandler
|
|
189
|
+
// ---------------------------------------------------------------------------
|
|
190
|
+
|
|
191
|
+
describe('ToolApprovalHandler / grant-miss escalation', () => {
|
|
192
|
+
const handler = new ToolApprovalHandler();
|
|
193
|
+
const events: ToolLifecycleEvent[] = [];
|
|
194
|
+
const emitLifecycleEvent = (event: ToolLifecycleEvent) => { events.push(event); };
|
|
195
|
+
|
|
196
|
+
beforeEach(() => {
|
|
197
|
+
resetTables();
|
|
198
|
+
events.length = 0;
|
|
199
|
+
emittedSignals.length = 0;
|
|
200
|
+
deliveredReplies.length = 0;
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
test('non-guardian + grant miss + host tool creates canonical tool_grant_request', async () => {
|
|
204
|
+
const toolName = 'bash';
|
|
205
|
+
const input = { command: 'cat /etc/passwd' };
|
|
206
|
+
|
|
207
|
+
const context = makeContext({ guardianActorRole: 'non-guardian' });
|
|
208
|
+
const result = await handler.checkPreExecutionGates(
|
|
209
|
+
toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
|
|
210
|
+
);
|
|
211
|
+
|
|
212
|
+
expect(result.allowed).toBe(false);
|
|
213
|
+
if (result.allowed) return;
|
|
214
|
+
|
|
215
|
+
// A canonical tool_grant_request should have been created
|
|
216
|
+
const requests = listCanonicalGuardianRequests({
|
|
217
|
+
kind: 'tool_grant_request',
|
|
218
|
+
status: 'pending',
|
|
219
|
+
});
|
|
220
|
+
expect(requests.length).toBe(1);
|
|
221
|
+
expect(requests[0].toolName).toBe('bash');
|
|
222
|
+
expect(requests[0].requesterExternalUserId).toBe('requester-1');
|
|
223
|
+
expect(requests[0].guardianExternalUserId).toBe('guardian-1');
|
|
224
|
+
|
|
225
|
+
// Notification signal should have been emitted
|
|
226
|
+
expect(emittedSignals.length).toBe(1);
|
|
227
|
+
expect(emittedSignals[0].sourceEventName).toBe('guardian.question');
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
test('non-guardian grant-miss response includes request code', async () => {
|
|
231
|
+
const toolName = 'bash';
|
|
232
|
+
const input = { command: 'deploy' };
|
|
233
|
+
|
|
234
|
+
const context = makeContext({ guardianActorRole: 'non-guardian' });
|
|
235
|
+
const result = await handler.checkPreExecutionGates(
|
|
236
|
+
toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
|
|
237
|
+
);
|
|
238
|
+
|
|
239
|
+
expect(result.allowed).toBe(false);
|
|
240
|
+
if (result.allowed) return;
|
|
241
|
+
expect(result.result.content).toContain('request has been sent to the guardian');
|
|
242
|
+
expect(result.result.content).toContain('request code:');
|
|
243
|
+
expect(result.result.content).toContain('Please retry after the guardian approves');
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
test('non-guardian duplicate grant-miss deduplicates the request', async () => {
|
|
247
|
+
const toolName = 'bash';
|
|
248
|
+
const input = { command: 'rm -rf /' };
|
|
249
|
+
|
|
250
|
+
const context = makeContext({ guardianActorRole: 'non-guardian' });
|
|
251
|
+
|
|
252
|
+
// First invocation creates the request
|
|
253
|
+
await handler.checkPreExecutionGates(
|
|
254
|
+
toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
|
|
255
|
+
);
|
|
256
|
+
|
|
257
|
+
const firstRequests = listCanonicalGuardianRequests({
|
|
258
|
+
kind: 'tool_grant_request',
|
|
259
|
+
status: 'pending',
|
|
260
|
+
});
|
|
261
|
+
expect(firstRequests.length).toBe(1);
|
|
262
|
+
|
|
263
|
+
// Reset notification tracking
|
|
264
|
+
emittedSignals.length = 0;
|
|
265
|
+
|
|
266
|
+
// Second invocation with same tool+input deduplicates
|
|
267
|
+
const result = await handler.checkPreExecutionGates(
|
|
268
|
+
toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
|
|
269
|
+
);
|
|
270
|
+
|
|
271
|
+
expect(result.allowed).toBe(false);
|
|
272
|
+
if (result.allowed) return;
|
|
273
|
+
expect(result.result.content).toContain('already pending');
|
|
274
|
+
|
|
275
|
+
// Still only one canonical request
|
|
276
|
+
const requests = listCanonicalGuardianRequests({
|
|
277
|
+
kind: 'tool_grant_request',
|
|
278
|
+
status: 'pending',
|
|
279
|
+
});
|
|
280
|
+
expect(requests.length).toBe(1);
|
|
281
|
+
|
|
282
|
+
// No duplicate notification
|
|
283
|
+
expect(emittedSignals.length).toBe(0);
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
test('unverified_channel does NOT create escalation request', async () => {
|
|
287
|
+
const toolName = 'bash';
|
|
288
|
+
const input = { command: 'ls' };
|
|
289
|
+
|
|
290
|
+
const context = makeContext({
|
|
291
|
+
guardianActorRole: 'unverified_channel',
|
|
292
|
+
executionChannel: 'telegram',
|
|
293
|
+
requesterExternalUserId: 'unknown-user',
|
|
294
|
+
});
|
|
295
|
+
const result = await handler.checkPreExecutionGates(
|
|
296
|
+
toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
|
|
297
|
+
);
|
|
298
|
+
|
|
299
|
+
expect(result.allowed).toBe(false);
|
|
300
|
+
if (result.allowed) return;
|
|
301
|
+
// Should get the generic denial message, not escalation
|
|
302
|
+
expect(result.result.content).toContain('verified channel identity');
|
|
303
|
+
|
|
304
|
+
// No canonical request should have been created
|
|
305
|
+
const requests = listCanonicalGuardianRequests({
|
|
306
|
+
kind: 'tool_grant_request',
|
|
307
|
+
status: 'pending',
|
|
308
|
+
});
|
|
309
|
+
expect(requests.length).toBe(0);
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
test('non-guardian without executionChannel falls back to generic denial', async () => {
|
|
313
|
+
const toolName = 'bash';
|
|
314
|
+
const input = { command: 'deploy' };
|
|
315
|
+
|
|
316
|
+
const context = makeContext({
|
|
317
|
+
guardianActorRole: 'non-guardian',
|
|
318
|
+
executionChannel: undefined, // no channel info
|
|
319
|
+
});
|
|
320
|
+
const result = await handler.checkPreExecutionGates(
|
|
321
|
+
toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
|
|
322
|
+
);
|
|
323
|
+
|
|
324
|
+
expect(result.allowed).toBe(false);
|
|
325
|
+
if (result.allowed) return;
|
|
326
|
+
// Generic denial, no escalation attempted
|
|
327
|
+
expect(result.result.content).toContain('guardian approval');
|
|
328
|
+
expect(result.result.content).not.toContain('request has been sent');
|
|
329
|
+
|
|
330
|
+
const requests = listCanonicalGuardianRequests({
|
|
331
|
+
kind: 'tool_grant_request',
|
|
332
|
+
status: 'pending',
|
|
333
|
+
});
|
|
334
|
+
expect(requests.length).toBe(0);
|
|
335
|
+
});
|
|
336
|
+
});
|
|
337
|
+
|
|
338
|
+
// ---------------------------------------------------------------------------
|
|
339
|
+
// 3. Canonical decision and grant minting for tool_grant_request kind
|
|
340
|
+
// ---------------------------------------------------------------------------
|
|
341
|
+
|
|
342
|
+
describe('applyCanonicalGuardianDecision / tool_grant_request', () => {
|
|
343
|
+
beforeEach(() => {
|
|
344
|
+
resetTables();
|
|
345
|
+
deliveredReplies.length = 0;
|
|
346
|
+
});
|
|
347
|
+
|
|
348
|
+
test('approving tool_grant_request with tool metadata mints a grant', async () => {
|
|
349
|
+
const req = createCanonicalGuardianRequest({
|
|
350
|
+
kind: 'tool_grant_request',
|
|
351
|
+
sourceType: 'channel',
|
|
352
|
+
sourceChannel: 'telegram',
|
|
353
|
+
conversationId: 'conv-1',
|
|
354
|
+
requesterExternalUserId: 'requester-1',
|
|
355
|
+
guardianExternalUserId: 'guardian-1',
|
|
356
|
+
toolName: 'bash',
|
|
357
|
+
inputDigest: 'sha256:testdigest',
|
|
358
|
+
expiresAt: new Date(Date.now() + 60_000).toISOString(),
|
|
359
|
+
});
|
|
360
|
+
|
|
361
|
+
const result = await applyCanonicalGuardianDecision({
|
|
362
|
+
requestId: req.id,
|
|
363
|
+
action: 'approve_once',
|
|
364
|
+
actorContext: guardianActor(),
|
|
365
|
+
});
|
|
366
|
+
|
|
367
|
+
expect(result.applied).toBe(true);
|
|
368
|
+
if (!result.applied) return;
|
|
369
|
+
expect(result.grantMinted).toBe(true);
|
|
370
|
+
|
|
371
|
+
// Verify canonical request is approved
|
|
372
|
+
const resolved = getCanonicalGuardianRequest(req.id);
|
|
373
|
+
expect(resolved!.status).toBe('approved');
|
|
374
|
+
expect(resolved!.decidedByExternalUserId).toBe('guardian-1');
|
|
375
|
+
});
|
|
376
|
+
|
|
377
|
+
test('rejecting tool_grant_request does NOT mint a grant', async () => {
|
|
378
|
+
const req = createCanonicalGuardianRequest({
|
|
379
|
+
kind: 'tool_grant_request',
|
|
380
|
+
sourceType: 'channel',
|
|
381
|
+
sourceChannel: 'telegram',
|
|
382
|
+
conversationId: 'conv-1',
|
|
383
|
+
requesterExternalUserId: 'requester-1',
|
|
384
|
+
guardianExternalUserId: 'guardian-1',
|
|
385
|
+
toolName: 'bash',
|
|
386
|
+
inputDigest: 'sha256:testdigest',
|
|
387
|
+
expiresAt: new Date(Date.now() + 60_000).toISOString(),
|
|
388
|
+
});
|
|
389
|
+
|
|
390
|
+
const result = await applyCanonicalGuardianDecision({
|
|
391
|
+
requestId: req.id,
|
|
392
|
+
action: 'reject',
|
|
393
|
+
actorContext: guardianActor(),
|
|
394
|
+
});
|
|
395
|
+
|
|
396
|
+
expect(result.applied).toBe(true);
|
|
397
|
+
if (!result.applied) return;
|
|
398
|
+
expect(result.grantMinted).toBe(false);
|
|
399
|
+
|
|
400
|
+
const resolved = getCanonicalGuardianRequest(req.id);
|
|
401
|
+
expect(resolved!.status).toBe('denied');
|
|
402
|
+
});
|
|
403
|
+
|
|
404
|
+
test('identity mismatch blocks tool_grant_request approval', async () => {
|
|
405
|
+
const req = createCanonicalGuardianRequest({
|
|
406
|
+
kind: 'tool_grant_request',
|
|
407
|
+
sourceType: 'channel',
|
|
408
|
+
sourceChannel: 'telegram',
|
|
409
|
+
conversationId: 'conv-1',
|
|
410
|
+
requesterExternalUserId: 'requester-1',
|
|
411
|
+
guardianExternalUserId: 'guardian-1',
|
|
412
|
+
toolName: 'bash',
|
|
413
|
+
inputDigest: 'sha256:testdigest',
|
|
414
|
+
expiresAt: new Date(Date.now() + 60_000).toISOString(),
|
|
415
|
+
});
|
|
416
|
+
|
|
417
|
+
const result = await applyCanonicalGuardianDecision({
|
|
418
|
+
requestId: req.id,
|
|
419
|
+
action: 'approve_once',
|
|
420
|
+
actorContext: guardianActor({ externalUserId: 'imposter-99' }),
|
|
421
|
+
});
|
|
422
|
+
|
|
423
|
+
expect(result.applied).toBe(false);
|
|
424
|
+
if (result.applied) return;
|
|
425
|
+
expect(result.reason).toBe('identity_mismatch');
|
|
426
|
+
|
|
427
|
+
const unchanged = getCanonicalGuardianRequest(req.id);
|
|
428
|
+
expect(unchanged!.status).toBe('pending');
|
|
429
|
+
});
|
|
430
|
+
});
|
|
431
|
+
|
|
432
|
+
// ---------------------------------------------------------------------------
|
|
433
|
+
// 4. End-to-end: deny -> approve -> consume grant flow
|
|
434
|
+
// ---------------------------------------------------------------------------
|
|
435
|
+
|
|
436
|
+
describe('end-to-end: tool grant escalation -> approval -> consume', () => {
|
|
437
|
+
const handler = new ToolApprovalHandler();
|
|
438
|
+
const events: ToolLifecycleEvent[] = [];
|
|
439
|
+
const emitLifecycleEvent = (event: ToolLifecycleEvent) => { events.push(event); };
|
|
440
|
+
|
|
441
|
+
beforeEach(() => {
|
|
442
|
+
resetTables();
|
|
443
|
+
events.length = 0;
|
|
444
|
+
emittedSignals.length = 0;
|
|
445
|
+
});
|
|
446
|
+
|
|
447
|
+
test('first invocation denied + request created; guardian approves; second invocation succeeds; replay denied', async () => {
|
|
448
|
+
const toolName = 'bash';
|
|
449
|
+
const input = { command: 'echo secret' };
|
|
450
|
+
const _inputDigest = computeToolApprovalDigest(toolName, input);
|
|
451
|
+
|
|
452
|
+
const context = makeContext({ guardianActorRole: 'non-guardian' });
|
|
453
|
+
|
|
454
|
+
// Step 1: First invocation is denied, but a tool_grant_request is created
|
|
455
|
+
const firstResult = await handler.checkPreExecutionGates(
|
|
456
|
+
toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
|
|
457
|
+
);
|
|
458
|
+
expect(firstResult.allowed).toBe(false);
|
|
459
|
+
|
|
460
|
+
// Verify the canonical request was created
|
|
461
|
+
const pendingRequests = listCanonicalGuardianRequests({
|
|
462
|
+
kind: 'tool_grant_request',
|
|
463
|
+
status: 'pending',
|
|
464
|
+
toolName: 'bash',
|
|
465
|
+
});
|
|
466
|
+
expect(pendingRequests.length).toBe(1);
|
|
467
|
+
const canonicalRequestId = pendingRequests[0].id;
|
|
468
|
+
|
|
469
|
+
// Step 2: Guardian approves the canonical request -> grant is minted
|
|
470
|
+
const approvalResult = await applyCanonicalGuardianDecision({
|
|
471
|
+
requestId: canonicalRequestId,
|
|
472
|
+
action: 'approve_once',
|
|
473
|
+
actorContext: guardianActor(),
|
|
474
|
+
});
|
|
475
|
+
expect(approvalResult.applied).toBe(true);
|
|
476
|
+
if (!approvalResult.applied) return;
|
|
477
|
+
expect(approvalResult.grantMinted).toBe(true);
|
|
478
|
+
|
|
479
|
+
// Verify request is now approved
|
|
480
|
+
const resolvedRequest = getCanonicalGuardianRequest(canonicalRequestId);
|
|
481
|
+
expect(resolvedRequest!.status).toBe('approved');
|
|
482
|
+
|
|
483
|
+
// Step 3: Second identical invocation consumes the grant and succeeds
|
|
484
|
+
const secondResult = await handler.checkPreExecutionGates(
|
|
485
|
+
toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
|
|
486
|
+
);
|
|
487
|
+
expect(secondResult.allowed).toBe(true);
|
|
488
|
+
if (!secondResult.allowed) return;
|
|
489
|
+
expect(secondResult.grantConsumed).toBe(true);
|
|
490
|
+
|
|
491
|
+
// Step 4: Replay is denied (one-time grant semantics)
|
|
492
|
+
const replayResult = await handler.checkPreExecutionGates(
|
|
493
|
+
toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
|
|
494
|
+
);
|
|
495
|
+
expect(replayResult.allowed).toBe(false);
|
|
496
|
+
});
|
|
497
|
+
});
|
package/src/agent/loop.ts
CHANGED
|
@@ -4,6 +4,8 @@ import { truncateOversizedToolResults } from '../context/tool-result-truncation.
|
|
|
4
4
|
import { getHookManager } from '../hooks/manager.js';
|
|
5
5
|
import type { ContentBlock,Message, Provider, ToolDefinition } from '../providers/types.js';
|
|
6
6
|
import type { ToolResultContent } from '../providers/types.js';
|
|
7
|
+
import type { SensitiveOutputBinding } from '../tools/sensitive-output-placeholders.js';
|
|
8
|
+
import { applyStreamingSubstitution, applySubstitutions } from '../tools/sensitive-output-placeholders.js';
|
|
7
9
|
import { getLogger, isDebug, truncateForLog } from '../util/logger.js';
|
|
8
10
|
|
|
9
11
|
const log = getLogger('agent-loop');
|
|
@@ -63,14 +65,14 @@ export class AgentLoop {
|
|
|
63
65
|
private tools: ToolDefinition[];
|
|
64
66
|
private resolveTools: ((history: Message[]) => ToolDefinition[]) | null;
|
|
65
67
|
private resolveSystemPrompt: ((history: Message[]) => ResolvedSystemPrompt) | null;
|
|
66
|
-
private toolExecutor: ((name: string, input: Record<string, unknown>, onOutput?: (chunk: string) => void) => Promise<{ content: string; isError: boolean; diff?: { filePath: string; oldContent: string; newContent: string; isNewFile: boolean }; status?: string; contentBlocks?: ContentBlock[] }>) | null;
|
|
68
|
+
private toolExecutor: ((name: string, input: Record<string, unknown>, onOutput?: (chunk: string) => void) => Promise<{ content: string; isError: boolean; diff?: { filePath: string; oldContent: string; newContent: string; isNewFile: boolean }; status?: string; contentBlocks?: ContentBlock[]; sensitiveBindings?: SensitiveOutputBinding[] }>) | null;
|
|
67
69
|
|
|
68
70
|
constructor(
|
|
69
71
|
provider: Provider,
|
|
70
72
|
systemPrompt: string,
|
|
71
73
|
config?: Partial<AgentLoopConfig>,
|
|
72
74
|
tools?: ToolDefinition[],
|
|
73
|
-
toolExecutor?: (name: string, input: Record<string, unknown>, onOutput?: (chunk: string) => void) => Promise<{ content: string; isError: boolean; diff?: { filePath: string; oldContent: string; newContent: string; isNewFile: boolean }; status?: string; contentBlocks?: ContentBlock[] }>,
|
|
75
|
+
toolExecutor?: (name: string, input: Record<string, unknown>, onOutput?: (chunk: string) => void) => Promise<{ content: string; isError: boolean; diff?: { filePath: string; oldContent: string; newContent: string; isNewFile: boolean }; status?: string; contentBlocks?: ContentBlock[]; sensitiveBindings?: SensitiveOutputBinding[] }>,
|
|
74
76
|
resolveTools?: (history: Message[]) => ToolDefinition[],
|
|
75
77
|
resolveSystemPrompt?: (history: Message[]) => ResolvedSystemPrompt,
|
|
76
78
|
) {
|
|
@@ -97,6 +99,12 @@ export class AgentLoop {
|
|
|
97
99
|
const debug = isDebug();
|
|
98
100
|
const rlog = requestId ? log.child({ requestId }) : log;
|
|
99
101
|
|
|
102
|
+
// Per-run substitution map for sensitive output placeholders.
|
|
103
|
+
// Bindings are accumulated from tool results; placeholders are
|
|
104
|
+
// resolved in streamed deltas and final assistant message text.
|
|
105
|
+
const substitutionMap = new Map<string, string>();
|
|
106
|
+
let streamingPending = '';
|
|
107
|
+
|
|
100
108
|
while (true) {
|
|
101
109
|
if (signal?.aborted) break;
|
|
102
110
|
|
|
@@ -188,7 +196,17 @@ export class AgentLoop {
|
|
|
188
196
|
config: providerConfig,
|
|
189
197
|
onEvent: (event) => {
|
|
190
198
|
if (event.type === 'text_delta') {
|
|
191
|
-
|
|
199
|
+
// Apply sensitive-output placeholder substitution (chunk-safe)
|
|
200
|
+
if (substitutionMap.size > 0) {
|
|
201
|
+
const combined = streamingPending + event.text;
|
|
202
|
+
const { emit, pending } = applyStreamingSubstitution(combined, substitutionMap);
|
|
203
|
+
streamingPending = pending;
|
|
204
|
+
if (emit.length > 0) {
|
|
205
|
+
onEvent({ type: 'text_delta', text: emit });
|
|
206
|
+
}
|
|
207
|
+
} else {
|
|
208
|
+
onEvent({ type: 'text_delta', text: event.text });
|
|
209
|
+
}
|
|
192
210
|
} else if (event.type === 'thinking_delta') {
|
|
193
211
|
onEvent({ type: 'thinking_delta', thinking: event.thinking });
|
|
194
212
|
} else if (event.type === 'input_json_delta') {
|
|
@@ -238,6 +256,20 @@ export class AgentLoop {
|
|
|
238
256
|
durationMs: providerDurationMs,
|
|
239
257
|
});
|
|
240
258
|
|
|
259
|
+
// Flush any buffered streaming text from the substitution pipeline
|
|
260
|
+
if (streamingPending.length > 0) {
|
|
261
|
+
const flushed = applySubstitutions(streamingPending, substitutionMap);
|
|
262
|
+
if (flushed.length > 0) {
|
|
263
|
+
onEvent({ type: 'text_delta', text: flushed });
|
|
264
|
+
}
|
|
265
|
+
streamingPending = '';
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Build the assistant message with placeholder-only text.
|
|
269
|
+
// Both provider history and persisted conversation store must retain
|
|
270
|
+
// placeholders so the model never sees real sensitive values — neither
|
|
271
|
+
// on subsequent loop turns nor on session reload from the database.
|
|
272
|
+
// Substitution to real values happens only in streamed text_delta events.
|
|
241
273
|
const assistantMessage: Message = {
|
|
242
274
|
role: 'assistant',
|
|
243
275
|
content: response.content,
|
|
@@ -391,6 +423,17 @@ export class AgentLoop {
|
|
|
391
423
|
toolResults = await toolExecutionPromise;
|
|
392
424
|
}
|
|
393
425
|
|
|
426
|
+
// Merge sensitive output bindings from tool results into the
|
|
427
|
+
// per-run substitution map. Bindings carry placeholder->value pairs
|
|
428
|
+
// that are resolved in streamed text deltas and final message text.
|
|
429
|
+
for (const { result } of toolResults) {
|
|
430
|
+
if (result.sensitiveBindings) {
|
|
431
|
+
for (const binding of result.sensitiveBindings) {
|
|
432
|
+
substitutionMap.set(binding.placeholder, binding.value);
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
|
|
394
437
|
// Collect result blocks preserving tool_use order (Promise.all maintains order)
|
|
395
438
|
const rawResultBlocks: ContentBlock[] = toolResults.map(({ toolUse, result }) => ({
|
|
396
439
|
type: 'tool_result' as const,
|