npm - @vellumai/assistant - Versions diffs - 0.3.26 → 0.3.28 - Mend

@vellumai/assistant 0.3.26 → 0.3.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

package/ARCHITECTURE.md +48 -1
package/Dockerfile +2 -2
package/package.json +1 -1
package/scripts/ipc/generate-swift.ts +6 -2
package/src/__tests__/agent-loop.test.ts +119 -0
package/src/__tests__/bundled-asset.test.ts +107 -0
package/src/__tests__/canonical-guardian-store.test.ts +636 -0
package/src/__tests__/channel-approval-routes.test.ts +174 -1
package/src/__tests__/emit-signal-routing-intent.test.ts +43 -1
package/src/__tests__/guardian-actions-endpoint.test.ts +205 -345
package/src/__tests__/guardian-decision-primitive-canonical.test.ts +599 -0
package/src/__tests__/guardian-dispatch.test.ts +19 -19
package/src/__tests__/guardian-routing-invariants.test.ts +954 -0
package/src/__tests__/mcp-cli.test.ts +77 -0
package/src/__tests__/non-member-access-request.test.ts +31 -29
package/src/__tests__/notification-decision-fallback.test.ts +61 -3
package/src/__tests__/notification-decision-strategy.test.ts +17 -0
package/src/__tests__/notification-guardian-path.test.ts +13 -15
package/src/__tests__/onboarding-template-contract.test.ts +116 -21
package/src/__tests__/secret-scanner-executor.test.ts +59 -0
package/src/__tests__/secret-scanner.test.ts +8 -0
package/src/__tests__/sensitive-output-placeholders.test.ts +208 -0
package/src/__tests__/session-runtime-assembly.test.ts +76 -47
package/src/__tests__/tool-grant-request-escalation.test.ts +497 -0
package/src/agent/loop.ts +46 -3
package/src/approvals/guardian-decision-primitive.ts +285 -0
package/src/approvals/guardian-request-resolvers.ts +539 -0
package/src/calls/guardian-dispatch.ts +46 -40
package/src/calls/relay-server.ts +147 -2
package/src/calls/types.ts +1 -1
package/src/config/system-prompt.ts +2 -1
package/src/config/templates/BOOTSTRAP.md +47 -31
package/src/config/templates/USER.md +5 -0
package/src/config/update-bulletin-template-path.ts +4 -1
package/src/config/vellum-skills/trusted-contacts/SKILL.md +22 -17
package/src/daemon/handlers/guardian-actions.ts +45 -66
package/src/daemon/ipc-contract/guardian-actions.ts +7 -0
package/src/daemon/lifecycle.ts +3 -16
package/src/daemon/server.ts +18 -0
package/src/daemon/session-agent-loop-handlers.ts +5 -4
package/src/daemon/session-agent-loop.ts +32 -5
package/src/daemon/session-process.ts +68 -307
package/src/daemon/session-runtime-assembly.ts +112 -24
package/src/daemon/session-tool-setup.ts +1 -0
package/src/daemon/session.ts +1 -0
package/src/home-base/prebuilt/seed.ts +2 -1
package/src/hooks/templates.ts +2 -1
package/src/memory/canonical-guardian-store.ts +524 -0
package/src/memory/channel-guardian-store.ts +1 -0
package/src/memory/db-init.ts +16 -0
package/src/memory/guardian-action-store.ts +7 -60
package/src/memory/guardian-approvals.ts +9 -4
package/src/memory/migrations/036-normalize-phone-identities.ts +289 -0
package/src/memory/migrations/118-reminder-routing-intent.ts +3 -3
package/src/memory/migrations/121-canonical-guardian-requests.ts +59 -0
package/src/memory/migrations/122-canonical-guardian-requester-chat-id.ts +15 -0
package/src/memory/migrations/123-canonical-guardian-deliveries-destination-index.ts +15 -0
package/src/memory/migrations/index.ts +4 -0
package/src/memory/migrations/registry.ts +5 -0
package/src/memory/schema-migration.ts +1 -0
package/src/memory/schema.ts +52 -0
package/src/notifications/copy-composer.ts +16 -4
package/src/notifications/decision-engine.ts +57 -0
package/src/permissions/defaults.ts +2 -0
package/src/runtime/access-request-helper.ts +137 -0
package/src/runtime/actor-trust-resolver.ts +225 -0
package/src/runtime/channel-guardian-service.ts +12 -4
package/src/runtime/guardian-context-resolver.ts +32 -7
package/src/runtime/guardian-decision-types.ts +6 -0
package/src/runtime/guardian-reply-router.ts +687 -0
package/src/runtime/http-server.ts +8 -0
package/src/runtime/routes/canonical-guardian-expiry-sweep.ts +116 -0
package/src/runtime/routes/conversation-routes.ts +18 -0
package/src/runtime/routes/guardian-action-routes.ts +100 -109
package/src/runtime/routes/inbound-message-handler.ts +170 -525
package/src/runtime/tool-grant-request-helper.ts +195 -0
package/src/tools/executor.ts +13 -1
package/src/tools/sensitive-output-placeholders.ts +203 -0
package/src/tools/tool-approval-handler.ts +44 -1
package/src/tools/types.ts +11 -0
package/src/util/bundled-asset.ts +31 -0
package/src/util/canonicalize-identity.ts +52 -0

package/src/__tests__/tool-grant-request-escalation.test.ts ADDED Viewed

@@ -0,0 +1,497 @@
+/**
+ * Tests for the non-guardian tool grant escalation path:
+ *
+ * 1. ToolApprovalHandler grant-miss escalation behavior
+ * 2. tool_grant_request resolver registration and behavior
+ * 3. Canonical decision primitive grant minting for tool_grant_request kind
+ * 4. End-to-end: deny -> approve -> consume grant flow
+ */
+import { mkdtempSync, rmSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { afterAll, beforeEach, describe, expect, mock, test } from 'bun:test';
+const testDir = mkdtempSync(join(tmpdir(), 'tool-grant-escalation-test-'));
+mock.module('../util/platform.js', () => ({
+  getDataDir: () => testDir,
+  isMacOS: () => process.platform === 'darwin',
+  isLinux: () => process.platform === 'linux',
+  isWindows: () => process.platform === 'win32',
+  getSocketPath: () => join(testDir, 'test.sock'),
+  getPidPath: () => join(testDir, 'test.pid'),
+  getDbPath: () => join(testDir, 'test.db'),
+  getLogPath: () => join(testDir, 'test.log'),
+  ensureDataDir: () => {},
+  migrateToDataLayout: () => {},
+  migrateToWorkspaceLayout: () => {},
+}));
+mock.module('../util/logger.js', () => ({
+  getLogger: () =>
+    new Proxy({} as Record<string, unknown>, {
+      get: () => () => {},
+    }),
+  isDebug: () => false,
+  truncateForLog: (value: string) => value,
+}));
+// Mock guardian control-plane policy — not targeting control-plane by default
+mock.module('../tools/guardian-control-plane-policy.js', () => ({
+  enforceGuardianOnlyPolicy: () => ({ denied: false }),
+}));
+// Mock task run rules — no task run rules by default
+mock.module('../tasks/ephemeral-permissions.js', () => ({
+  getTaskRunRules: () => [],
+}));
+// Mock tool registry — return a fake tool for 'bash'
+const fakeTool = {
+  name: 'bash',
+  description: 'Run a shell command',
+  category: 'shell',
+  defaultRiskLevel: 'high',
+  getDefinition: () => ({ name: 'bash', description: 'Run a shell command', input_schema: {} }),
+  execute: async () => ({ content: 'ok', isError: false }),
+};
+mock.module('../tools/registry.js', () => ({
+  getTool: (name: string) => (name === 'bash' ? fakeTool : undefined),
+  getAllTools: () => [fakeTool],
+}));
+// Mock notification emission — capture calls without running the full pipeline
+const emittedSignals: Array<Record<string, unknown>> = [];
+mock.module('../notifications/emit-signal.js', () => ({
+  emitNotificationSignal: async (params: Record<string, unknown>) => {
+    emittedSignals.push(params);
+    return { signalId: 'test-signal', deduplicated: false, dispatched: true, reason: 'ok', deliveryResults: [] };
+  },
+  registerBroadcastFn: () => {},
+}));
+// Mock channel guardian service — provide a guardian binding for 'self' + 'telegram'
+mock.module('../runtime/channel-guardian-service.js', () => ({
+  getGuardianBinding: (assistantId: string, channel: string) => {
+    if (assistantId === 'self' && channel === 'telegram') {
+      return {
+        id: 'binding-1',
+        assistantId: 'self',
+        channel: 'telegram',
+        guardianExternalUserId: 'guardian-1',
+        guardianDeliveryChatId: 'guardian-chat-1',
+        status: 'active',
+      };
+    }
+    return null;
+  },
+  createOutboundSession: () => ({
+    sessionId: 'test-session',
+    secret: '123456',
+  }),
+}));
+// Mock gateway client — capture delivery calls
+const deliveredReplies: Array<{ chatId: string; text: string }> = [];
+mock.module('../runtime/gateway-client.js', () => ({
+  deliverChannelReply: async (_url: string, payload: { chatId: string; text: string }) => {
+    deliveredReplies.push(payload);
+  },
+}));
+import {
+  applyCanonicalGuardianDecision,
+} from '../approvals/guardian-decision-primitive.js';
+import type { ActorContext } from '../approvals/guardian-request-resolvers.js';
+import { getRegisteredKinds, getResolver } from '../approvals/guardian-request-resolvers.js';
+import {
+  createCanonicalGuardianRequest,
+  getCanonicalGuardianRequest,
+  listCanonicalGuardianRequests,
+} from '../memory/canonical-guardian-store.js';
+import { getDb, initializeDb, resetDb } from '../memory/db.js';
+import { scopedApprovalGrants } from '../memory/schema.js';
+import { computeToolApprovalDigest } from '../security/tool-approval-digest.js';
+import { ToolApprovalHandler } from '../tools/tool-approval-handler.js';
+import type { ToolContext, ToolLifecycleEvent } from '../tools/types.js';
+initializeDb();
+function resetTables(): void {
+  const db = getDb();
+  db.delete(scopedApprovalGrants).run();
+  db.run('DELETE FROM canonical_guardian_deliveries');
+  db.run('DELETE FROM canonical_guardian_requests');
+}
+afterAll(() => {
+  resetDb();
+  try {
+    rmSync(testDir, { recursive: true });
+  } catch {
+    /* best effort */
+  }
+});
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+function makeContext(overrides: Partial<ToolContext> = {}): ToolContext {
+  return {
+    workingDir: testDir,
+    sessionId: 'session-1',
+    conversationId: 'conv-1',
+    assistantId: 'self',
+    requestId: 'req-1',
+    guardianActorRole: 'non-guardian',
+    executionChannel: 'telegram',
+    requesterExternalUserId: 'requester-1',
+    ...overrides,
+  };
+}
+function guardianActor(overrides: Partial<ActorContext> = {}): ActorContext {
+  return {
+    externalUserId: 'guardian-1',
+    channel: 'telegram',
+    isTrusted: false,
+    ...overrides,
+  };
+}
+// ===========================================================================
+// TESTS
+// ===========================================================================
+// ---------------------------------------------------------------------------
+// 1. tool_grant_request resolver registration
+// ---------------------------------------------------------------------------
+describe('tool_grant_request resolver registration', () => {
+  test('tool_grant_request resolver is registered', () => {
+    const kinds = getRegisteredKinds();
+    expect(kinds).toContain('tool_grant_request');
+  });
+  test('getResolver returns resolver for tool_grant_request', () => {
+    const resolver = getResolver('tool_grant_request');
+    expect(resolver).toBeDefined();
+    expect(resolver!.kind).toBe('tool_grant_request');
+  });
+});
+// ---------------------------------------------------------------------------
+// 2. Grant-miss escalation behavior in ToolApprovalHandler
+// ---------------------------------------------------------------------------
+describe('ToolApprovalHandler / grant-miss escalation', () => {
+  const handler = new ToolApprovalHandler();
+  const events: ToolLifecycleEvent[] = [];
+  const emitLifecycleEvent = (event: ToolLifecycleEvent) => { events.push(event); };
+  beforeEach(() => {
+    resetTables();
+    events.length = 0;
+    emittedSignals.length = 0;
+    deliveredReplies.length = 0;
+  });
+  test('non-guardian + grant miss + host tool creates canonical tool_grant_request', async () => {
+    const toolName = 'bash';
+    const input = { command: 'cat /etc/passwd' };
+    const context = makeContext({ guardianActorRole: 'non-guardian' });
+    const result = await handler.checkPreExecutionGates(
+      toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
+    );
+    expect(result.allowed).toBe(false);
+    if (result.allowed) return;
+    // A canonical tool_grant_request should have been created
+    const requests = listCanonicalGuardianRequests({
+      kind: 'tool_grant_request',
+      status: 'pending',
+    });
+    expect(requests.length).toBe(1);
+    expect(requests[0].toolName).toBe('bash');
+    expect(requests[0].requesterExternalUserId).toBe('requester-1');
+    expect(requests[0].guardianExternalUserId).toBe('guardian-1');
+    // Notification signal should have been emitted
+    expect(emittedSignals.length).toBe(1);
+    expect(emittedSignals[0].sourceEventName).toBe('guardian.question');
+  });
+  test('non-guardian grant-miss response includes request code', async () => {
+    const toolName = 'bash';
+    const input = { command: 'deploy' };
+    const context = makeContext({ guardianActorRole: 'non-guardian' });
+    const result = await handler.checkPreExecutionGates(
+      toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
+    );
+    expect(result.allowed).toBe(false);
+    if (result.allowed) return;
+    expect(result.result.content).toContain('request has been sent to the guardian');
+    expect(result.result.content).toContain('request code:');
+    expect(result.result.content).toContain('Please retry after the guardian approves');
+  });
+  test('non-guardian duplicate grant-miss deduplicates the request', async () => {
+    const toolName = 'bash';
+    const input = { command: 'rm -rf /' };
+    const context = makeContext({ guardianActorRole: 'non-guardian' });
+    // First invocation creates the request
+    await handler.checkPreExecutionGates(
+      toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
+    );
+    const firstRequests = listCanonicalGuardianRequests({
+      kind: 'tool_grant_request',
+      status: 'pending',
+    });
+    expect(firstRequests.length).toBe(1);
+    // Reset notification tracking
+    emittedSignals.length = 0;
+    // Second invocation with same tool+input deduplicates
+    const result = await handler.checkPreExecutionGates(
+      toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
+    );
+    expect(result.allowed).toBe(false);
+    if (result.allowed) return;
+    expect(result.result.content).toContain('already pending');
+    // Still only one canonical request
+    const requests = listCanonicalGuardianRequests({
+      kind: 'tool_grant_request',
+      status: 'pending',
+    });
+    expect(requests.length).toBe(1);
+    // No duplicate notification
+    expect(emittedSignals.length).toBe(0);
+  });
+  test('unverified_channel does NOT create escalation request', async () => {
+    const toolName = 'bash';
+    const input = { command: 'ls' };
+    const context = makeContext({
+      guardianActorRole: 'unverified_channel',
+      executionChannel: 'telegram',
+      requesterExternalUserId: 'unknown-user',
+    });
+    const result = await handler.checkPreExecutionGates(
+      toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
+    );
+    expect(result.allowed).toBe(false);
+    if (result.allowed) return;
+    // Should get the generic denial message, not escalation
+    expect(result.result.content).toContain('verified channel identity');
+    // No canonical request should have been created
+    const requests = listCanonicalGuardianRequests({
+      kind: 'tool_grant_request',
+      status: 'pending',
+    });
+    expect(requests.length).toBe(0);
+  });
+  test('non-guardian without executionChannel falls back to generic denial', async () => {
+    const toolName = 'bash';
+    const input = { command: 'deploy' };
+    const context = makeContext({
+      guardianActorRole: 'non-guardian',
+      executionChannel: undefined, // no channel info
+    });
+    const result = await handler.checkPreExecutionGates(
+      toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
+    );
+    expect(result.allowed).toBe(false);
+    if (result.allowed) return;
+    // Generic denial, no escalation attempted
+    expect(result.result.content).toContain('guardian approval');
+    expect(result.result.content).not.toContain('request has been sent');
+    const requests = listCanonicalGuardianRequests({
+      kind: 'tool_grant_request',
+      status: 'pending',
+    });
+    expect(requests.length).toBe(0);
+  });
+});
+// ---------------------------------------------------------------------------
+// 3. Canonical decision and grant minting for tool_grant_request kind
+// ---------------------------------------------------------------------------
+describe('applyCanonicalGuardianDecision / tool_grant_request', () => {
+  beforeEach(() => {
+    resetTables();
+    deliveredReplies.length = 0;
+  });
+  test('approving tool_grant_request with tool metadata mints a grant', async () => {
+    const req = createCanonicalGuardianRequest({
+      kind: 'tool_grant_request',
+      sourceType: 'channel',
+      sourceChannel: 'telegram',
+      conversationId: 'conv-1',
+      requesterExternalUserId: 'requester-1',
+      guardianExternalUserId: 'guardian-1',
+      toolName: 'bash',
+      inputDigest: 'sha256:testdigest',
+      expiresAt: new Date(Date.now() + 60_000).toISOString(),
+    });
+    const result = await applyCanonicalGuardianDecision({
+      requestId: req.id,
+      action: 'approve_once',
+      actorContext: guardianActor(),
+    });
+    expect(result.applied).toBe(true);
+    if (!result.applied) return;
+    expect(result.grantMinted).toBe(true);
+    // Verify canonical request is approved
+    const resolved = getCanonicalGuardianRequest(req.id);
+    expect(resolved!.status).toBe('approved');
+    expect(resolved!.decidedByExternalUserId).toBe('guardian-1');
+  });
+  test('rejecting tool_grant_request does NOT mint a grant', async () => {
+    const req = createCanonicalGuardianRequest({
+      kind: 'tool_grant_request',
+      sourceType: 'channel',
+      sourceChannel: 'telegram',
+      conversationId: 'conv-1',
+      requesterExternalUserId: 'requester-1',
+      guardianExternalUserId: 'guardian-1',
+      toolName: 'bash',
+      inputDigest: 'sha256:testdigest',
+      expiresAt: new Date(Date.now() + 60_000).toISOString(),
+    });
+    const result = await applyCanonicalGuardianDecision({
+      requestId: req.id,
+      action: 'reject',
+      actorContext: guardianActor(),
+    });
+    expect(result.applied).toBe(true);
+    if (!result.applied) return;
+    expect(result.grantMinted).toBe(false);
+    const resolved = getCanonicalGuardianRequest(req.id);
+    expect(resolved!.status).toBe('denied');
+  });
+  test('identity mismatch blocks tool_grant_request approval', async () => {
+    const req = createCanonicalGuardianRequest({
+      kind: 'tool_grant_request',
+      sourceType: 'channel',
+      sourceChannel: 'telegram',
+      conversationId: 'conv-1',
+      requesterExternalUserId: 'requester-1',
+      guardianExternalUserId: 'guardian-1',
+      toolName: 'bash',
+      inputDigest: 'sha256:testdigest',
+      expiresAt: new Date(Date.now() + 60_000).toISOString(),
+    });
+    const result = await applyCanonicalGuardianDecision({
+      requestId: req.id,
+      action: 'approve_once',
+      actorContext: guardianActor({ externalUserId: 'imposter-99' }),
+    });
+    expect(result.applied).toBe(false);
+    if (result.applied) return;
+    expect(result.reason).toBe('identity_mismatch');
+    const unchanged = getCanonicalGuardianRequest(req.id);
+    expect(unchanged!.status).toBe('pending');
+  });
+});
+// ---------------------------------------------------------------------------
+// 4. End-to-end: deny -> approve -> consume grant flow
+// ---------------------------------------------------------------------------
+describe('end-to-end: tool grant escalation -> approval -> consume', () => {
+  const handler = new ToolApprovalHandler();
+  const events: ToolLifecycleEvent[] = [];
+  const emitLifecycleEvent = (event: ToolLifecycleEvent) => { events.push(event); };
+  beforeEach(() => {
+    resetTables();
+    events.length = 0;
+    emittedSignals.length = 0;
+  });
+  test('first invocation denied + request created; guardian approves; second invocation succeeds; replay denied', async () => {
+    const toolName = 'bash';
+    const input = { command: 'echo secret' };
+    const _inputDigest = computeToolApprovalDigest(toolName, input);
+    const context = makeContext({ guardianActorRole: 'non-guardian' });
+    // Step 1: First invocation is denied, but a tool_grant_request is created
+    const firstResult = await handler.checkPreExecutionGates(
+      toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
+    );
+    expect(firstResult.allowed).toBe(false);
+    // Verify the canonical request was created
+    const pendingRequests = listCanonicalGuardianRequests({
+      kind: 'tool_grant_request',
+      status: 'pending',
+      toolName: 'bash',
+    });
+    expect(pendingRequests.length).toBe(1);
+    const canonicalRequestId = pendingRequests[0].id;
+    // Step 2: Guardian approves the canonical request -> grant is minted
+    const approvalResult = await applyCanonicalGuardianDecision({
+      requestId: canonicalRequestId,
+      action: 'approve_once',
+      actorContext: guardianActor(),
+    });
+    expect(approvalResult.applied).toBe(true);
+    if (!approvalResult.applied) return;
+    expect(approvalResult.grantMinted).toBe(true);
+    // Verify request is now approved
+    const resolvedRequest = getCanonicalGuardianRequest(canonicalRequestId);
+    expect(resolvedRequest!.status).toBe('approved');
+    // Step 3: Second identical invocation consumes the grant and succeeds
+    const secondResult = await handler.checkPreExecutionGates(
+      toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
+    );
+    expect(secondResult.allowed).toBe(true);
+    if (!secondResult.allowed) return;
+    expect(secondResult.grantConsumed).toBe(true);
+    // Step 4: Replay is denied (one-time grant semantics)
+    const replayResult = await handler.checkPreExecutionGates(
+      toolName, input, context, 'host', 'high', Date.now(), emitLifecycleEvent,
+    );
+    expect(replayResult.allowed).toBe(false);
+  });
+});

package/src/agent/loop.ts CHANGED Viewed

@@ -4,6 +4,8 @@ import { truncateOversizedToolResults } from '../context/tool-result-truncation.
 import { getHookManager } from '../hooks/manager.js';
 import type { ContentBlock,Message, Provider, ToolDefinition } from '../providers/types.js';
 import type { ToolResultContent } from '../providers/types.js';
+import type { SensitiveOutputBinding } from '../tools/sensitive-output-placeholders.js';
+import { applyStreamingSubstitution, applySubstitutions } from '../tools/sensitive-output-placeholders.js';
 import { getLogger, isDebug, truncateForLog } from '../util/logger.js';
 const log = getLogger('agent-loop');
@@ -63,14 +65,14 @@ export class AgentLoop {
   private tools: ToolDefinition[];
   private resolveTools: ((history: Message[]) => ToolDefinition[]) | null;
   private resolveSystemPrompt: ((history: Message[]) => ResolvedSystemPrompt) | null;
-  private toolExecutor: ((name: string, input: Record<string, unknown>, onOutput?: (chunk: string) => void) => Promise<{ content: string; isError: boolean; diff?: { filePath: string; oldContent: string; newContent: string; isNewFile: boolean }; status?: string; contentBlocks?: ContentBlock[] }>) | null;
+  private toolExecutor: ((name: string, input: Record<string, unknown>, onOutput?: (chunk: string) => void) => Promise<{ content: string; isError: boolean; diff?: { filePath: string; oldContent: string; newContent: string; isNewFile: boolean }; status?: string; contentBlocks?: ContentBlock[]; sensitiveBindings?: SensitiveOutputBinding[] }>) | null;
   constructor(
     provider: Provider,
     systemPrompt: string,
     config?: Partial<AgentLoopConfig>,
     tools?: ToolDefinition[],
-    toolExecutor?: (name: string, input: Record<string, unknown>, onOutput?: (chunk: string) => void) => Promise<{ content: string; isError: boolean; diff?: { filePath: string; oldContent: string; newContent: string; isNewFile: boolean }; status?: string; contentBlocks?: ContentBlock[] }>,
+    toolExecutor?: (name: string, input: Record<string, unknown>, onOutput?: (chunk: string) => void) => Promise<{ content: string; isError: boolean; diff?: { filePath: string; oldContent: string; newContent: string; isNewFile: boolean }; status?: string; contentBlocks?: ContentBlock[]; sensitiveBindings?: SensitiveOutputBinding[] }>,
     resolveTools?: (history: Message[]) => ToolDefinition[],
     resolveSystemPrompt?: (history: Message[]) => ResolvedSystemPrompt,
   ) {
@@ -97,6 +99,12 @@ export class AgentLoop {
     const debug = isDebug();
     const rlog = requestId ? log.child({ requestId }) : log;
+    // Per-run substitution map for sensitive output placeholders.
+    // Bindings are accumulated from tool results; placeholders are
+    // resolved in streamed deltas and final assistant message text.
+    const substitutionMap = new Map<string, string>();
+    let streamingPending = '';
     while (true) {
       if (signal?.aborted) break;
@@ -188,7 +196,17 @@ export class AgentLoop {
             config: providerConfig,
             onEvent: (event) => {
               if (event.type === 'text_delta') {
-                onEvent({ type: 'text_delta', text: event.text });
+                // Apply sensitive-output placeholder substitution (chunk-safe)
+                if (substitutionMap.size > 0) {
+                  const combined = streamingPending + event.text;
+                  const { emit, pending } = applyStreamingSubstitution(combined, substitutionMap);
+                  streamingPending = pending;
+                  if (emit.length > 0) {
+                    onEvent({ type: 'text_delta', text: emit });
+                  }
+                } else {
+                  onEvent({ type: 'text_delta', text: event.text });
+                }
               } else if (event.type === 'thinking_delta') {
                 onEvent({ type: 'thinking_delta', thinking: event.thinking });
               } else if (event.type === 'input_json_delta') {
@@ -238,6 +256,20 @@ export class AgentLoop {
           durationMs: providerDurationMs,
         });
+        // Flush any buffered streaming text from the substitution pipeline
+        if (streamingPending.length > 0) {
+          const flushed = applySubstitutions(streamingPending, substitutionMap);
+          if (flushed.length > 0) {
+            onEvent({ type: 'text_delta', text: flushed });
+          }
+          streamingPending = '';
+        }
+        // Build the assistant message with placeholder-only text.
+        // Both provider history and persisted conversation store must retain
+        // placeholders so the model never sees real sensitive values — neither
+        // on subsequent loop turns nor on session reload from the database.
+        // Substitution to real values happens only in streamed text_delta events.
         const assistantMessage: Message = {
           role: 'assistant',
           content: response.content,
@@ -391,6 +423,17 @@ export class AgentLoop {
           toolResults = await toolExecutionPromise;
         }
+        // Merge sensitive output bindings from tool results into the
+        // per-run substitution map. Bindings carry placeholder->value pairs
+        // that are resolved in streamed text deltas and final message text.
+        for (const { result } of toolResults) {
+          if (result.sensitiveBindings) {
+            for (const binding of result.sensitiveBindings) {
+              substitutionMap.set(binding.placeholder, binding.value);
+            }
+          }
+        }
         // Collect result blocks preserving tool_use order (Promise.all maintains order)
         const rawResultBlocks: ContentBlock[] = toolResults.map(({ toolUse, result }) => ({
           type: 'tool_result' as const,