@vellumai/assistant 0.3.26 → 0.3.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/ARCHITECTURE.md +48 -1
  2. package/Dockerfile +2 -2
  3. package/package.json +1 -1
  4. package/scripts/ipc/generate-swift.ts +6 -2
  5. package/src/__tests__/agent-loop.test.ts +119 -0
  6. package/src/__tests__/bundled-asset.test.ts +107 -0
  7. package/src/__tests__/canonical-guardian-store.test.ts +636 -0
  8. package/src/__tests__/channel-approval-routes.test.ts +174 -1
  9. package/src/__tests__/emit-signal-routing-intent.test.ts +43 -1
  10. package/src/__tests__/guardian-actions-endpoint.test.ts +205 -345
  11. package/src/__tests__/guardian-decision-primitive-canonical.test.ts +599 -0
  12. package/src/__tests__/guardian-dispatch.test.ts +19 -19
  13. package/src/__tests__/guardian-routing-invariants.test.ts +954 -0
  14. package/src/__tests__/mcp-cli.test.ts +77 -0
  15. package/src/__tests__/non-member-access-request.test.ts +31 -29
  16. package/src/__tests__/notification-decision-fallback.test.ts +61 -3
  17. package/src/__tests__/notification-decision-strategy.test.ts +17 -0
  18. package/src/__tests__/notification-guardian-path.test.ts +13 -15
  19. package/src/__tests__/onboarding-template-contract.test.ts +116 -21
  20. package/src/__tests__/secret-scanner-executor.test.ts +59 -0
  21. package/src/__tests__/secret-scanner.test.ts +8 -0
  22. package/src/__tests__/sensitive-output-placeholders.test.ts +208 -0
  23. package/src/__tests__/session-runtime-assembly.test.ts +76 -47
  24. package/src/__tests__/tool-grant-request-escalation.test.ts +497 -0
  25. package/src/agent/loop.ts +46 -3
  26. package/src/approvals/guardian-decision-primitive.ts +285 -0
  27. package/src/approvals/guardian-request-resolvers.ts +539 -0
  28. package/src/calls/guardian-dispatch.ts +46 -40
  29. package/src/calls/relay-server.ts +147 -2
  30. package/src/calls/types.ts +1 -1
  31. package/src/config/system-prompt.ts +2 -1
  32. package/src/config/templates/BOOTSTRAP.md +47 -31
  33. package/src/config/templates/USER.md +5 -0
  34. package/src/config/update-bulletin-template-path.ts +4 -1
  35. package/src/config/vellum-skills/trusted-contacts/SKILL.md +22 -17
  36. package/src/daemon/handlers/guardian-actions.ts +45 -66
  37. package/src/daemon/ipc-contract/guardian-actions.ts +7 -0
  38. package/src/daemon/lifecycle.ts +3 -16
  39. package/src/daemon/server.ts +18 -0
  40. package/src/daemon/session-agent-loop-handlers.ts +5 -4
  41. package/src/daemon/session-agent-loop.ts +32 -5
  42. package/src/daemon/session-process.ts +68 -307
  43. package/src/daemon/session-runtime-assembly.ts +112 -24
  44. package/src/daemon/session-tool-setup.ts +1 -0
  45. package/src/daemon/session.ts +1 -0
  46. package/src/home-base/prebuilt/seed.ts +2 -1
  47. package/src/hooks/templates.ts +2 -1
  48. package/src/memory/canonical-guardian-store.ts +524 -0
  49. package/src/memory/channel-guardian-store.ts +1 -0
  50. package/src/memory/db-init.ts +16 -0
  51. package/src/memory/guardian-action-store.ts +7 -60
  52. package/src/memory/guardian-approvals.ts +9 -4
  53. package/src/memory/migrations/036-normalize-phone-identities.ts +289 -0
  54. package/src/memory/migrations/118-reminder-routing-intent.ts +3 -3
  55. package/src/memory/migrations/121-canonical-guardian-requests.ts +59 -0
  56. package/src/memory/migrations/122-canonical-guardian-requester-chat-id.ts +15 -0
  57. package/src/memory/migrations/123-canonical-guardian-deliveries-destination-index.ts +15 -0
  58. package/src/memory/migrations/index.ts +4 -0
  59. package/src/memory/migrations/registry.ts +5 -0
  60. package/src/memory/schema-migration.ts +1 -0
  61. package/src/memory/schema.ts +52 -0
  62. package/src/notifications/copy-composer.ts +16 -4
  63. package/src/notifications/decision-engine.ts +57 -0
  64. package/src/permissions/defaults.ts +2 -0
  65. package/src/runtime/access-request-helper.ts +137 -0
  66. package/src/runtime/actor-trust-resolver.ts +225 -0
  67. package/src/runtime/channel-guardian-service.ts +12 -4
  68. package/src/runtime/guardian-context-resolver.ts +32 -7
  69. package/src/runtime/guardian-decision-types.ts +6 -0
  70. package/src/runtime/guardian-reply-router.ts +687 -0
  71. package/src/runtime/http-server.ts +8 -0
  72. package/src/runtime/routes/canonical-guardian-expiry-sweep.ts +116 -0
  73. package/src/runtime/routes/conversation-routes.ts +18 -0
  74. package/src/runtime/routes/guardian-action-routes.ts +100 -109
  75. package/src/runtime/routes/inbound-message-handler.ts +170 -525
  76. package/src/runtime/tool-grant-request-helper.ts +195 -0
  77. package/src/tools/executor.ts +13 -1
  78. package/src/tools/sensitive-output-placeholders.ts +203 -0
  79. package/src/tools/tool-approval-handler.ts +44 -1
  80. package/src/tools/types.ts +11 -0
  81. package/src/util/bundled-asset.ts +31 -0
  82. package/src/util/canonicalize-identity.ts +52 -0
@@ -0,0 +1,497 @@
1
+ /**
2
+ * Tests for the non-guardian tool grant escalation path:
3
+ *
4
+ * 1. ToolApprovalHandler grant-miss escalation behavior
5
+ * 2. tool_grant_request resolver registration and behavior
6
+ * 3. Canonical decision primitive grant minting for tool_grant_request kind
7
+ * 4. End-to-end: deny -> approve -> consume grant flow
8
+ */
9
+
10
+ import { mkdtempSync, rmSync } from 'node:fs';
11
+ import { tmpdir } from 'node:os';
12
+ import { join } from 'node:path';
13
+
14
+ import { afterAll, beforeEach, describe, expect, mock, test } from 'bun:test';
15
+
16
+ const testDir = mkdtempSync(join(tmpdir(), 'tool-grant-escalation-test-'));
17
+
18
+ mock.module('../util/platform.js', () => ({
19
+ getDataDir: () => testDir,
20
+ isMacOS: () => process.platform === 'darwin',
21
+ isLinux: () => process.platform === 'linux',
22
+ isWindows: () => process.platform === 'win32',
23
+ getSocketPath: () => join(testDir, 'test.sock'),
24
+ getPidPath: () => join(testDir, 'test.pid'),
25
+ getDbPath: () => join(testDir, 'test.db'),
26
+ getLogPath: () => join(testDir, 'test.log'),
27
+ ensureDataDir: () => {},
28
+ migrateToDataLayout: () => {},
29
+ migrateToWorkspaceLayout: () => {},
30
+ }));
31
+
32
+ mock.module('../util/logger.js', () => ({
33
+ getLogger: () =>
34
+ new Proxy({} as Record<string, unknown>, {
35
+ get: () => () => {},
36
+ }),
37
+ isDebug: () => false,
38
+ truncateForLog: (value: string) => value,
39
+ }));
40
+
41
+ // Mock guardian control-plane policy — not targeting control-plane by default
42
+ mock.module('../tools/guardian-control-plane-policy.js', () => ({
43
+ enforceGuardianOnlyPolicy: () => ({ denied: false }),
44
+ }));
45
+
46
+ // Mock task run rules — no task run rules by default
47
+ mock.module('../tasks/ephemeral-permissions.js', () => ({
48
+ getTaskRunRules: () => [],
49
+ }));
50
+
51
+ // Mock tool registry — return a fake tool for 'bash'
52
+ const fakeTool = {
53
+ name: 'bash',
54
+ description: 'Run a shell command',
55
+ category: 'shell',
56
+ defaultRiskLevel: 'high',
57
+ getDefinition: () => ({ name: 'bash', description: 'Run a shell command', input_schema: {} }),
58
+ execute: async () => ({ content: 'ok', isError: false }),
59
+ };
60
+
61
+ mock.module('../tools/registry.js', () => ({
62
+ getTool: (name: string) => (name === 'bash' ? fakeTool : undefined),
63
+ getAllTools: () => [fakeTool],
64
+ }));
65
+
66
+ // Mock notification emission — capture calls without running the full pipeline
67
+ const emittedSignals: Array<Record<string, unknown>> = [];
68
+ mock.module('../notifications/emit-signal.js', () => ({
69
+ emitNotificationSignal: async (params: Record<string, unknown>) => {
70
+ emittedSignals.push(params);
71
+ return { signalId: 'test-signal', deduplicated: false, dispatched: true, reason: 'ok', deliveryResults: [] };
72
+ },
73
+ registerBroadcastFn: () => {},
74
+ }));
75
+
76
+ // Mock channel guardian service — provide a guardian binding for 'self' + 'telegram'
77
+ mock.module('../runtime/channel-guardian-service.js', () => ({
78
+ getGuardianBinding: (assistantId: string, channel: string) => {
79
+ if (assistantId === 'self' && channel === 'telegram') {
80
+ return {
81
+ id: 'binding-1',
82
+ assistantId: 'self',
83
+ channel: 'telegram',
84
+ guardianExternalUserId: 'guardian-1',
85
+ guardianDeliveryChatId: 'guardian-chat-1',
86
+ status: 'active',
87
+ };
88
+ }
89
+ return null;
90
+ },
91
+ createOutboundSession: () => ({
92
+ sessionId: 'test-session',
93
+ secret: '123456',
94
+ }),
95
+ }));
96
+
97
+ // Mock gateway client — capture delivery calls
98
+ const deliveredReplies: Array<{ chatId: string; text: string }> = [];
99
+ mock.module('../runtime/gateway-client.js', () => ({
100
+ deliverChannelReply: async (_url: string, payload: { chatId: string; text: string }) => {
101
+ deliveredReplies.push(payload);
102
+ },
103
+ }));
104
+
105
+ import {
106
+ applyCanonicalGuardianDecision,
107
+ } from '../approvals/guardian-decision-primitive.js';
108
+ import type { ActorContext } from '../approvals/guardian-request-resolvers.js';
109
+ import { getRegisteredKinds, getResolver } from '../approvals/guardian-request-resolvers.js';
110
+ import {
111
+ createCanonicalGuardianRequest,
112
+ getCanonicalGuardianRequest,
113
+ listCanonicalGuardianRequests,
114
+ } from '../memory/canonical-guardian-store.js';
115
+ import { getDb, initializeDb, resetDb } from '../memory/db.js';
116
+ import { scopedApprovalGrants } from '../memory/schema.js';
117
+ import { computeToolApprovalDigest } from '../security/tool-approval-digest.js';
118
+ import { ToolApprovalHandler } from '../tools/tool-approval-handler.js';
119
+ import type { ToolContext, ToolLifecycleEvent } from '../tools/types.js';
120
+
121
+ initializeDb();
122
+
123
+ function resetTables(): void {
124
+ const db = getDb();
125
+ db.delete(scopedApprovalGrants).run();
126
+ db.run('DELETE FROM canonical_guardian_deliveries');
127
+ db.run('DELETE FROM canonical_guardian_requests');
128
+ }
129
+
130
+ afterAll(() => {
131
+ resetDb();
132
+ try {
133
+ rmSync(testDir, { recursive: true });
134
+ } catch {
135
+ /* best effort */
136
+ }
137
+ });
138
+
139
+ // ---------------------------------------------------------------------------
140
+ // Helpers
141
+ // ---------------------------------------------------------------------------
142
+
143
+ function makeContext(overrides: Partial<ToolContext> = {}): ToolContext {
144
+ return {
145
+ workingDir: testDir,
146
+ sessionId: 'session-1',
147
+ conversationId: 'conv-1',
148
+ assistantId: 'self',
149
+ requestId: 'req-1',
150
+ guardianActorRole: 'non-guardian',
151
+ executionChannel: 'telegram',
152
+ requesterExternalUserId: 'requester-1',
153
+ ...overrides,
154
+ };
155
+ }
156
+
157
+ function guardianActor(overrides: Partial<ActorContext> = {}): ActorContext {
158
+ return {
159
+ externalUserId: 'guardian-1',
160
+ channel: 'telegram',
161
+ isTrusted: false,
162
+ ...overrides,
163
+ };
164
+ }
165
+
166
+ // ===========================================================================
167
+ // TESTS
168
+ // ===========================================================================
169
+
170
+ // ---------------------------------------------------------------------------
171
+ // 1. tool_grant_request resolver registration
172
+ // ---------------------------------------------------------------------------
173
+
174
+ describe('tool_grant_request resolver registration', () => {
175
+ test('tool_grant_request resolver is registered', () => {
176
+ const kinds = getRegisteredKinds();
177
+ expect(kinds).toContain('tool_grant_request');
178
+ });
179
+
180
+ test('getResolver returns resolver for tool_grant_request', () => {
181
+ const resolver = getResolver('tool_grant_request');
182
+ expect(resolver).toBeDefined();
183
+ expect(resolver!.kind).toBe('tool_grant_request');
184
+ });
185
+ });
186
+
187
+ // ---------------------------------------------------------------------------
188
+ // 2. Grant-miss escalation behavior in ToolApprovalHandler
189
+ // ---------------------------------------------------------------------------
190
+
191
+ describe('ToolApprovalHandler / grant-miss escalation', () => {
192
+ const handler = new ToolApprovalHandler();
193
+ const events: ToolLifecycleEvent[] = [];
194
+ const emitLifecycleEvent = (event: ToolLifecycleEvent) => { events.push(event); };
195
+
196
+ beforeEach(() => {
197
+ resetTables();
198
+ events.length = 0;
199
+ emittedSignals.length = 0;
200
+ deliveredReplies.length = 0;
201
+ });
202
+
203
+ test('non-guardian + grant miss + host tool creates canonical tool_grant_request', async () => {
204
+ const toolName = 'bash';
205
+ const input = { command: 'cat /etc/passwd' };
206
+
207
+ const context = makeContext({ guardianActorRole: 'non-guardian' });
208
+ const result = await handler.checkPreExecutionGates(
209
+ toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
210
+ );
211
+
212
+ expect(result.allowed).toBe(false);
213
+ if (result.allowed) return;
214
+
215
+ // A canonical tool_grant_request should have been created
216
+ const requests = listCanonicalGuardianRequests({
217
+ kind: 'tool_grant_request',
218
+ status: 'pending',
219
+ });
220
+ expect(requests.length).toBe(1);
221
+ expect(requests[0].toolName).toBe('bash');
222
+ expect(requests[0].requesterExternalUserId).toBe('requester-1');
223
+ expect(requests[0].guardianExternalUserId).toBe('guardian-1');
224
+
225
+ // Notification signal should have been emitted
226
+ expect(emittedSignals.length).toBe(1);
227
+ expect(emittedSignals[0].sourceEventName).toBe('guardian.question');
228
+ });
229
+
230
+ test('non-guardian grant-miss response includes request code', async () => {
231
+ const toolName = 'bash';
232
+ const input = { command: 'deploy' };
233
+
234
+ const context = makeContext({ guardianActorRole: 'non-guardian' });
235
+ const result = await handler.checkPreExecutionGates(
236
+ toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
237
+ );
238
+
239
+ expect(result.allowed).toBe(false);
240
+ if (result.allowed) return;
241
+ expect(result.result.content).toContain('request has been sent to the guardian');
242
+ expect(result.result.content).toContain('request code:');
243
+ expect(result.result.content).toContain('Please retry after the guardian approves');
244
+ });
245
+
246
+ test('non-guardian duplicate grant-miss deduplicates the request', async () => {
247
+ const toolName = 'bash';
248
+ const input = { command: 'rm -rf /' };
249
+
250
+ const context = makeContext({ guardianActorRole: 'non-guardian' });
251
+
252
+ // First invocation creates the request
253
+ await handler.checkPreExecutionGates(
254
+ toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
255
+ );
256
+
257
+ const firstRequests = listCanonicalGuardianRequests({
258
+ kind: 'tool_grant_request',
259
+ status: 'pending',
260
+ });
261
+ expect(firstRequests.length).toBe(1);
262
+
263
+ // Reset notification tracking
264
+ emittedSignals.length = 0;
265
+
266
+ // Second invocation with same tool+input deduplicates
267
+ const result = await handler.checkPreExecutionGates(
268
+ toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
269
+ );
270
+
271
+ expect(result.allowed).toBe(false);
272
+ if (result.allowed) return;
273
+ expect(result.result.content).toContain('already pending');
274
+
275
+ // Still only one canonical request
276
+ const requests = listCanonicalGuardianRequests({
277
+ kind: 'tool_grant_request',
278
+ status: 'pending',
279
+ });
280
+ expect(requests.length).toBe(1);
281
+
282
+ // No duplicate notification
283
+ expect(emittedSignals.length).toBe(0);
284
+ });
285
+
286
+ test('unverified_channel does NOT create escalation request', async () => {
287
+ const toolName = 'bash';
288
+ const input = { command: 'ls' };
289
+
290
+ const context = makeContext({
291
+ guardianActorRole: 'unverified_channel',
292
+ executionChannel: 'telegram',
293
+ requesterExternalUserId: 'unknown-user',
294
+ });
295
+ const result = await handler.checkPreExecutionGates(
296
+ toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
297
+ );
298
+
299
+ expect(result.allowed).toBe(false);
300
+ if (result.allowed) return;
301
+ // Should get the generic denial message, not escalation
302
+ expect(result.result.content).toContain('verified channel identity');
303
+
304
+ // No canonical request should have been created
305
+ const requests = listCanonicalGuardianRequests({
306
+ kind: 'tool_grant_request',
307
+ status: 'pending',
308
+ });
309
+ expect(requests.length).toBe(0);
310
+ });
311
+
312
+ test('non-guardian without executionChannel falls back to generic denial', async () => {
313
+ const toolName = 'bash';
314
+ const input = { command: 'deploy' };
315
+
316
+ const context = makeContext({
317
+ guardianActorRole: 'non-guardian',
318
+ executionChannel: undefined, // no channel info
319
+ });
320
+ const result = await handler.checkPreExecutionGates(
321
+ toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
322
+ );
323
+
324
+ expect(result.allowed).toBe(false);
325
+ if (result.allowed) return;
326
+ // Generic denial, no escalation attempted
327
+ expect(result.result.content).toContain('guardian approval');
328
+ expect(result.result.content).not.toContain('request has been sent');
329
+
330
+ const requests = listCanonicalGuardianRequests({
331
+ kind: 'tool_grant_request',
332
+ status: 'pending',
333
+ });
334
+ expect(requests.length).toBe(0);
335
+ });
336
+ });
337
+
338
+ // ---------------------------------------------------------------------------
339
+ // 3. Canonical decision and grant minting for tool_grant_request kind
340
+ // ---------------------------------------------------------------------------
341
+
342
+ describe('applyCanonicalGuardianDecision / tool_grant_request', () => {
343
+ beforeEach(() => {
344
+ resetTables();
345
+ deliveredReplies.length = 0;
346
+ });
347
+
348
+ test('approving tool_grant_request with tool metadata mints a grant', async () => {
349
+ const req = createCanonicalGuardianRequest({
350
+ kind: 'tool_grant_request',
351
+ sourceType: 'channel',
352
+ sourceChannel: 'telegram',
353
+ conversationId: 'conv-1',
354
+ requesterExternalUserId: 'requester-1',
355
+ guardianExternalUserId: 'guardian-1',
356
+ toolName: 'bash',
357
+ inputDigest: 'sha256:testdigest',
358
+ expiresAt: new Date(Date.now() + 60_000).toISOString(),
359
+ });
360
+
361
+ const result = await applyCanonicalGuardianDecision({
362
+ requestId: req.id,
363
+ action: 'approve_once',
364
+ actorContext: guardianActor(),
365
+ });
366
+
367
+ expect(result.applied).toBe(true);
368
+ if (!result.applied) return;
369
+ expect(result.grantMinted).toBe(true);
370
+
371
+ // Verify canonical request is approved
372
+ const resolved = getCanonicalGuardianRequest(req.id);
373
+ expect(resolved!.status).toBe('approved');
374
+ expect(resolved!.decidedByExternalUserId).toBe('guardian-1');
375
+ });
376
+
377
+ test('rejecting tool_grant_request does NOT mint a grant', async () => {
378
+ const req = createCanonicalGuardianRequest({
379
+ kind: 'tool_grant_request',
380
+ sourceType: 'channel',
381
+ sourceChannel: 'telegram',
382
+ conversationId: 'conv-1',
383
+ requesterExternalUserId: 'requester-1',
384
+ guardianExternalUserId: 'guardian-1',
385
+ toolName: 'bash',
386
+ inputDigest: 'sha256:testdigest',
387
+ expiresAt: new Date(Date.now() + 60_000).toISOString(),
388
+ });
389
+
390
+ const result = await applyCanonicalGuardianDecision({
391
+ requestId: req.id,
392
+ action: 'reject',
393
+ actorContext: guardianActor(),
394
+ });
395
+
396
+ expect(result.applied).toBe(true);
397
+ if (!result.applied) return;
398
+ expect(result.grantMinted).toBe(false);
399
+
400
+ const resolved = getCanonicalGuardianRequest(req.id);
401
+ expect(resolved!.status).toBe('denied');
402
+ });
403
+
404
+ test('identity mismatch blocks tool_grant_request approval', async () => {
405
+ const req = createCanonicalGuardianRequest({
406
+ kind: 'tool_grant_request',
407
+ sourceType: 'channel',
408
+ sourceChannel: 'telegram',
409
+ conversationId: 'conv-1',
410
+ requesterExternalUserId: 'requester-1',
411
+ guardianExternalUserId: 'guardian-1',
412
+ toolName: 'bash',
413
+ inputDigest: 'sha256:testdigest',
414
+ expiresAt: new Date(Date.now() + 60_000).toISOString(),
415
+ });
416
+
417
+ const result = await applyCanonicalGuardianDecision({
418
+ requestId: req.id,
419
+ action: 'approve_once',
420
+ actorContext: guardianActor({ externalUserId: 'imposter-99' }),
421
+ });
422
+
423
+ expect(result.applied).toBe(false);
424
+ if (result.applied) return;
425
+ expect(result.reason).toBe('identity_mismatch');
426
+
427
+ const unchanged = getCanonicalGuardianRequest(req.id);
428
+ expect(unchanged!.status).toBe('pending');
429
+ });
430
+ });
431
+
432
+ // ---------------------------------------------------------------------------
433
+ // 4. End-to-end: deny -> approve -> consume grant flow
434
+ // ---------------------------------------------------------------------------
435
+
436
+ describe('end-to-end: tool grant escalation -> approval -> consume', () => {
437
+ const handler = new ToolApprovalHandler();
438
+ const events: ToolLifecycleEvent[] = [];
439
+ const emitLifecycleEvent = (event: ToolLifecycleEvent) => { events.push(event); };
440
+
441
+ beforeEach(() => {
442
+ resetTables();
443
+ events.length = 0;
444
+ emittedSignals.length = 0;
445
+ });
446
+
447
+ test('first invocation denied + request created; guardian approves; second invocation succeeds; replay denied', async () => {
448
+ const toolName = 'bash';
449
+ const input = { command: 'echo secret' };
450
+ const _inputDigest = computeToolApprovalDigest(toolName, input);
451
+
452
+ const context = makeContext({ guardianActorRole: 'non-guardian' });
453
+
454
+ // Step 1: First invocation is denied, but a tool_grant_request is created
455
+ const firstResult = await handler.checkPreExecutionGates(
456
+ toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
457
+ );
458
+ expect(firstResult.allowed).toBe(false);
459
+
460
+ // Verify the canonical request was created
461
+ const pendingRequests = listCanonicalGuardianRequests({
462
+ kind: 'tool_grant_request',
463
+ status: 'pending',
464
+ toolName: 'bash',
465
+ });
466
+ expect(pendingRequests.length).toBe(1);
467
+ const canonicalRequestId = pendingRequests[0].id;
468
+
469
+ // Step 2: Guardian approves the canonical request -> grant is minted
470
+ const approvalResult = await applyCanonicalGuardianDecision({
471
+ requestId: canonicalRequestId,
472
+ action: 'approve_once',
473
+ actorContext: guardianActor(),
474
+ });
475
+ expect(approvalResult.applied).toBe(true);
476
+ if (!approvalResult.applied) return;
477
+ expect(approvalResult.grantMinted).toBe(true);
478
+
479
+ // Verify request is now approved
480
+ const resolvedRequest = getCanonicalGuardianRequest(canonicalRequestId);
481
+ expect(resolvedRequest!.status).toBe('approved');
482
+
483
+ // Step 3: Second identical invocation consumes the grant and succeeds
484
+ const secondResult = await handler.checkPreExecutionGates(
485
+ toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
486
+ );
487
+ expect(secondResult.allowed).toBe(true);
488
+ if (!secondResult.allowed) return;
489
+ expect(secondResult.grantConsumed).toBe(true);
490
+
491
+ // Step 4: Replay is denied (one-time grant semantics)
492
+ const replayResult = await handler.checkPreExecutionGates(
493
+ toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
494
+ );
495
+ expect(replayResult.allowed).toBe(false);
496
+ });
497
+ });
package/src/agent/loop.ts CHANGED
@@ -4,6 +4,8 @@ import { truncateOversizedToolResults } from '../context/tool-result-truncation.
4
4
  import { getHookManager } from '../hooks/manager.js';
5
5
  import type { ContentBlock,Message, Provider, ToolDefinition } from '../providers/types.js';
6
6
  import type { ToolResultContent } from '../providers/types.js';
7
+ import type { SensitiveOutputBinding } from '../tools/sensitive-output-placeholders.js';
8
+ import { applyStreamingSubstitution, applySubstitutions } from '../tools/sensitive-output-placeholders.js';
7
9
  import { getLogger, isDebug, truncateForLog } from '../util/logger.js';
8
10
 
9
11
  const log = getLogger('agent-loop');
@@ -63,14 +65,14 @@ export class AgentLoop {
63
65
  private tools: ToolDefinition[];
64
66
  private resolveTools: ((history: Message[]) => ToolDefinition[]) | null;
65
67
  private resolveSystemPrompt: ((history: Message[]) => ResolvedSystemPrompt) | null;
66
- private toolExecutor: ((name: string, input: Record<string, unknown>, onOutput?: (chunk: string) => void) => Promise<{ content: string; isError: boolean; diff?: { filePath: string; oldContent: string; newContent: string; isNewFile: boolean }; status?: string; contentBlocks?: ContentBlock[] }>) | null;
68
+ private toolExecutor: ((name: string, input: Record<string, unknown>, onOutput?: (chunk: string) => void) => Promise<{ content: string; isError: boolean; diff?: { filePath: string; oldContent: string; newContent: string; isNewFile: boolean }; status?: string; contentBlocks?: ContentBlock[]; sensitiveBindings?: SensitiveOutputBinding[] }>) | null;
67
69
 
68
70
  constructor(
69
71
  provider: Provider,
70
72
  systemPrompt: string,
71
73
  config?: Partial<AgentLoopConfig>,
72
74
  tools?: ToolDefinition[],
73
- toolExecutor?: (name: string, input: Record<string, unknown>, onOutput?: (chunk: string) => void) => Promise<{ content: string; isError: boolean; diff?: { filePath: string; oldContent: string; newContent: string; isNewFile: boolean }; status?: string; contentBlocks?: ContentBlock[] }>,
75
+ toolExecutor?: (name: string, input: Record<string, unknown>, onOutput?: (chunk: string) => void) => Promise<{ content: string; isError: boolean; diff?: { filePath: string; oldContent: string; newContent: string; isNewFile: boolean }; status?: string; contentBlocks?: ContentBlock[]; sensitiveBindings?: SensitiveOutputBinding[] }>,
74
76
  resolveTools?: (history: Message[]) => ToolDefinition[],
75
77
  resolveSystemPrompt?: (history: Message[]) => ResolvedSystemPrompt,
76
78
  ) {
@@ -97,6 +99,12 @@ export class AgentLoop {
97
99
  const debug = isDebug();
98
100
  const rlog = requestId ? log.child({ requestId }) : log;
99
101
 
102
+ // Per-run substitution map for sensitive output placeholders.
103
+ // Bindings are accumulated from tool results; placeholders are
104
+ // resolved in streamed deltas and final assistant message text.
105
+ const substitutionMap = new Map<string, string>();
106
+ let streamingPending = '';
107
+
100
108
  while (true) {
101
109
  if (signal?.aborted) break;
102
110
 
@@ -188,7 +196,17 @@ export class AgentLoop {
188
196
  config: providerConfig,
189
197
  onEvent: (event) => {
190
198
  if (event.type === 'text_delta') {
191
- onEvent({ type: 'text_delta', text: event.text });
199
+ // Apply sensitive-output placeholder substitution (chunk-safe)
200
+ if (substitutionMap.size > 0) {
201
+ const combined = streamingPending + event.text;
202
+ const { emit, pending } = applyStreamingSubstitution(combined, substitutionMap);
203
+ streamingPending = pending;
204
+ if (emit.length > 0) {
205
+ onEvent({ type: 'text_delta', text: emit });
206
+ }
207
+ } else {
208
+ onEvent({ type: 'text_delta', text: event.text });
209
+ }
192
210
  } else if (event.type === 'thinking_delta') {
193
211
  onEvent({ type: 'thinking_delta', thinking: event.thinking });
194
212
  } else if (event.type === 'input_json_delta') {
@@ -238,6 +256,20 @@ export class AgentLoop {
238
256
  durationMs: providerDurationMs,
239
257
  });
240
258
 
259
+ // Flush any buffered streaming text from the substitution pipeline
260
+ if (streamingPending.length > 0) {
261
+ const flushed = applySubstitutions(streamingPending, substitutionMap);
262
+ if (flushed.length > 0) {
263
+ onEvent({ type: 'text_delta', text: flushed });
264
+ }
265
+ streamingPending = '';
266
+ }
267
+
268
+ // Build the assistant message with placeholder-only text.
269
+ // Both provider history and persisted conversation store must retain
270
+ // placeholders so the model never sees real sensitive values — neither
271
+ // on subsequent loop turns nor on session reload from the database.
272
+ // Substitution to real values happens only in streamed text_delta events.
241
273
  const assistantMessage: Message = {
242
274
  role: 'assistant',
243
275
  content: response.content,
@@ -391,6 +423,17 @@ export class AgentLoop {
391
423
  toolResults = await toolExecutionPromise;
392
424
  }
393
425
 
426
+ // Merge sensitive output bindings from tool results into the
427
+ // per-run substitution map. Bindings carry placeholder->value pairs
428
+ // that are resolved in streamed text deltas and final message text.
429
+ for (const { result } of toolResults) {
430
+ if (result.sensitiveBindings) {
431
+ for (const binding of result.sensitiveBindings) {
432
+ substitutionMap.set(binding.placeholder, binding.value);
433
+ }
434
+ }
435
+ }
436
+
394
437
  // Collect result blocks preserving tool_use order (Promise.all maintains order)
395
438
  const rawResultBlocks: ContentBlock[] = toolResults.map(({ toolUse, result }) => ({
396
439
  type: 'tool_result' as const,