@vellumai/assistant 0.3.19 → 0.3.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. package/ARCHITECTURE.md +151 -15
  2. package/Dockerfile +1 -0
  3. package/README.md +40 -4
  4. package/bun.lock +139 -2
  5. package/docs/architecture/integrations.md +7 -11
  6. package/package.json +2 -1
  7. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +54 -0
  8. package/src/__tests__/approval-primitive.test.ts +540 -0
  9. package/src/__tests__/assistant-feature-flag-guard.test.ts +206 -0
  10. package/src/__tests__/assistant-feature-flag-guardrails.test.ts +198 -0
  11. package/src/__tests__/assistant-feature-flags-integration.test.ts +272 -0
  12. package/src/__tests__/call-controller.test.ts +439 -108
  13. package/src/__tests__/channel-invite-transport.test.ts +264 -0
  14. package/src/__tests__/cli.test.ts +42 -1
  15. package/src/__tests__/config-schema.test.ts +11 -127
  16. package/src/__tests__/config-watcher.test.ts +0 -8
  17. package/src/__tests__/daemon-lifecycle.test.ts +1 -0
  18. package/src/__tests__/daemon-server-session-init.test.ts +8 -2
  19. package/src/__tests__/diff.test.ts +22 -0
  20. package/src/__tests__/guardian-action-copy-generator.test.ts +5 -0
  21. package/src/__tests__/guardian-action-grant-mint-consume.test.ts +300 -32
  22. package/src/__tests__/guardian-action-late-reply.test.ts +546 -1
  23. package/src/__tests__/guardian-actions-endpoint.test.ts +774 -0
  24. package/src/__tests__/guardian-control-plane-policy.test.ts +36 -3
  25. package/src/__tests__/guardian-dispatch.test.ts +124 -0
  26. package/src/__tests__/guardian-grant-minting.test.ts +6 -17
  27. package/src/__tests__/inbound-invite-redemption.test.ts +367 -0
  28. package/src/__tests__/invite-redemption-service.test.ts +306 -0
  29. package/src/__tests__/ipc-snapshot.test.ts +57 -0
  30. package/src/__tests__/notification-decision-fallback.test.ts +88 -0
  31. package/src/__tests__/sandbox-diagnostics.test.ts +6 -249
  32. package/src/__tests__/sandbox-host-parity.test.ts +6 -13
  33. package/src/__tests__/scoped-approval-grants.test.ts +6 -6
  34. package/src/__tests__/scoped-grant-security-matrix.test.ts +5 -4
  35. package/src/__tests__/script-proxy-session-manager.test.ts +1 -19
  36. package/src/__tests__/session-load-history-repair.test.ts +169 -2
  37. package/src/__tests__/session-runtime-assembly.test.ts +33 -5
  38. package/src/__tests__/skill-feature-flags-integration.test.ts +171 -0
  39. package/src/__tests__/skill-feature-flags.test.ts +188 -0
  40. package/src/__tests__/skill-load-feature-flag.test.ts +141 -0
  41. package/src/__tests__/skill-mirror-parity.test.ts +1 -0
  42. package/src/__tests__/skill-projection-feature-flag.test.ts +363 -0
  43. package/src/__tests__/system-prompt.test.ts +1 -1
  44. package/src/__tests__/terminal-sandbox.test.ts +142 -9
  45. package/src/__tests__/terminal-tools.test.ts +2 -93
  46. package/src/__tests__/thread-seed-composer.test.ts +18 -0
  47. package/src/__tests__/tool-approval-handler.test.ts +350 -0
  48. package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +8 -10
  49. package/src/__tests__/voice-scoped-grant-consumer.test.ts +46 -84
  50. package/src/agent/loop.ts +36 -1
  51. package/src/approvals/approval-primitive.ts +381 -0
  52. package/src/approvals/guardian-decision-primitive.ts +191 -0
  53. package/src/calls/call-controller.ts +252 -209
  54. package/src/calls/call-domain.ts +44 -6
  55. package/src/calls/guardian-dispatch.ts +48 -0
  56. package/src/calls/types.ts +1 -1
  57. package/src/calls/voice-session-bridge.ts +46 -30
  58. package/src/cli/core-commands.ts +0 -4
  59. package/src/cli/mcp.ts +58 -0
  60. package/src/cli.ts +76 -34
  61. package/src/config/__tests__/feature-flag-registry-guard.test.ts +179 -0
  62. package/src/config/assistant-feature-flags.ts +162 -0
  63. package/src/config/bundled-skills/api-mapping/icon.svg +18 -0
  64. package/src/config/bundled-skills/messaging/TOOLS.json +30 -0
  65. package/src/config/bundled-skills/messaging/tools/slack-delete-message.ts +24 -0
  66. package/src/config/bundled-skills/notifications/SKILL.md +1 -1
  67. package/src/config/bundled-skills/reminder/SKILL.md +49 -2
  68. package/src/config/bundled-skills/time-based-actions/SKILL.md +49 -2
  69. package/src/config/bundled-skills/voice-setup/SKILL.md +122 -0
  70. package/src/config/core-schema.ts +1 -1
  71. package/src/config/env-registry.ts +10 -0
  72. package/src/config/feature-flag-registry.json +61 -0
  73. package/src/config/loader.ts +22 -1
  74. package/src/config/mcp-schema.ts +46 -0
  75. package/src/config/sandbox-schema.ts +0 -39
  76. package/src/config/schema.ts +18 -2
  77. package/src/config/skill-state.ts +34 -0
  78. package/src/config/skills-schema.ts +0 -1
  79. package/src/config/skills.ts +9 -0
  80. package/src/config/system-prompt.ts +110 -46
  81. package/src/config/templates/SOUL.md +1 -1
  82. package/src/config/types.ts +19 -1
  83. package/src/config/vellum-skills/catalog.json +1 -1
  84. package/src/config/vellum-skills/guardian-verify-setup/SKILL.md +1 -0
  85. package/src/config/vellum-skills/sms-setup/SKILL.md +1 -1
  86. package/src/config/vellum-skills/telegram-setup/SKILL.md +6 -5
  87. package/src/config/vellum-skills/trusted-contacts/SKILL.md +105 -3
  88. package/src/config/vellum-skills/twilio-setup/SKILL.md +1 -1
  89. package/src/daemon/config-watcher.ts +0 -1
  90. package/src/daemon/daemon-control.ts +1 -1
  91. package/src/daemon/guardian-invite-intent.ts +124 -0
  92. package/src/daemon/handlers/avatar.ts +68 -0
  93. package/src/daemon/handlers/browser.ts +2 -2
  94. package/src/daemon/handlers/guardian-actions.ts +120 -0
  95. package/src/daemon/handlers/index.ts +4 -0
  96. package/src/daemon/handlers/sessions.ts +19 -0
  97. package/src/daemon/handlers/shared.ts +3 -1
  98. package/src/daemon/install-cli-launchers.ts +58 -13
  99. package/src/daemon/ipc-contract/guardian-actions.ts +53 -0
  100. package/src/daemon/ipc-contract/sessions.ts +8 -2
  101. package/src/daemon/ipc-contract/settings.ts +25 -2
  102. package/src/daemon/ipc-contract-inventory.json +10 -0
  103. package/src/daemon/ipc-contract.ts +4 -0
  104. package/src/daemon/lifecycle.ts +14 -2
  105. package/src/daemon/main.ts +1 -0
  106. package/src/daemon/providers-setup.ts +26 -1
  107. package/src/daemon/server.ts +1 -0
  108. package/src/daemon/session-lifecycle.ts +52 -7
  109. package/src/daemon/session-memory.ts +45 -0
  110. package/src/daemon/session-process.ts +258 -432
  111. package/src/daemon/session-runtime-assembly.ts +12 -0
  112. package/src/daemon/session-skill-tools.ts +14 -1
  113. package/src/daemon/session-tool-setup.ts +5 -0
  114. package/src/daemon/session.ts +11 -0
  115. package/src/daemon/shutdown-handlers.ts +11 -0
  116. package/src/daemon/tool-side-effects.ts +35 -9
  117. package/src/index.ts +2 -2
  118. package/src/mcp/client.ts +152 -0
  119. package/src/mcp/manager.ts +139 -0
  120. package/src/memory/conversation-display-order-migration.ts +44 -0
  121. package/src/memory/conversation-queries.ts +2 -0
  122. package/src/memory/conversation-store.ts +91 -0
  123. package/src/memory/db-init.ts +5 -1
  124. package/src/memory/embedding-local.ts +13 -8
  125. package/src/memory/guardian-action-store.ts +125 -2
  126. package/src/memory/ingress-invite-store.ts +95 -1
  127. package/src/memory/migrations/035-guardian-action-supersession.ts +23 -0
  128. package/src/memory/migrations/index.ts +2 -1
  129. package/src/memory/schema.ts +5 -1
  130. package/src/memory/scoped-approval-grants.ts +14 -5
  131. package/src/messaging/providers/slack/client.ts +12 -0
  132. package/src/messaging/providers/slack/types.ts +5 -0
  133. package/src/notifications/decision-engine.ts +49 -12
  134. package/src/notifications/emit-signal.ts +7 -0
  135. package/src/notifications/signal.ts +7 -0
  136. package/src/notifications/thread-seed-composer.ts +2 -1
  137. package/src/runtime/channel-approval-types.ts +16 -6
  138. package/src/runtime/channel-approvals.ts +19 -15
  139. package/src/runtime/channel-invite-transport.ts +85 -0
  140. package/src/runtime/channel-invite-transports/telegram.ts +105 -0
  141. package/src/runtime/guardian-action-grant-minter.ts +92 -35
  142. package/src/runtime/guardian-action-message-composer.ts +30 -0
  143. package/src/runtime/guardian-decision-types.ts +91 -0
  144. package/src/runtime/http-server.ts +23 -1
  145. package/src/runtime/ingress-service.ts +22 -0
  146. package/src/runtime/invite-redemption-service.ts +181 -0
  147. package/src/runtime/invite-redemption-templates.ts +39 -0
  148. package/src/runtime/routes/call-routes.ts +2 -1
  149. package/src/runtime/routes/guardian-action-routes.ts +206 -0
  150. package/src/runtime/routes/guardian-approval-interception.ts +66 -190
  151. package/src/runtime/routes/identity-routes.ts +73 -0
  152. package/src/runtime/routes/inbound-message-handler.ts +486 -394
  153. package/src/runtime/routes/pairing-routes.ts +4 -0
  154. package/src/security/encrypted-store.ts +31 -17
  155. package/src/security/keychain.ts +176 -2
  156. package/src/security/secure-keys.ts +97 -0
  157. package/src/security/tool-approval-digest.ts +1 -1
  158. package/src/tools/browser/browser-execution.ts +2 -2
  159. package/src/tools/browser/browser-manager.ts +46 -32
  160. package/src/tools/browser/browser-screencast.ts +2 -2
  161. package/src/tools/calls/call-start.ts +1 -1
  162. package/src/tools/executor.ts +22 -17
  163. package/src/tools/mcp/mcp-tool-factory.ts +100 -0
  164. package/src/tools/network/script-proxy/session-manager.ts +1 -5
  165. package/src/tools/registry.ts +64 -1
  166. package/src/tools/skills/load.ts +22 -8
  167. package/src/tools/system/avatar-generator.ts +119 -0
  168. package/src/tools/system/navigate-settings.ts +65 -0
  169. package/src/tools/system/open-system-settings.ts +75 -0
  170. package/src/tools/system/voice-config.ts +121 -32
  171. package/src/tools/terminal/backends/native.ts +40 -19
  172. package/src/tools/terminal/backends/types.ts +3 -3
  173. package/src/tools/terminal/parser.ts +1 -1
  174. package/src/tools/terminal/sandbox-diagnostics.ts +6 -87
  175. package/src/tools/terminal/sandbox.ts +1 -12
  176. package/src/tools/terminal/shell.ts +3 -31
  177. package/src/tools/tool-approval-handler.ts +141 -3
  178. package/src/tools/tool-manifest.ts +6 -0
  179. package/src/tools/types.ts +10 -2
  180. package/src/util/diff.ts +36 -13
  181. package/Dockerfile.sandbox +0 -5
  182. package/src/__tests__/doordash-client.test.ts +0 -187
  183. package/src/__tests__/doordash-session.test.ts +0 -154
  184. package/src/__tests__/signup-e2e.test.ts +0 -354
  185. package/src/__tests__/terminal-sandbox-docker.test.ts +0 -1065
  186. package/src/__tests__/terminal-sandbox.integration.test.ts +0 -180
  187. package/src/cli/doordash.ts +0 -1057
  188. package/src/config/bundled-skills/doordash/SKILL.md +0 -163
  189. package/src/config/templates/LOOKS.md +0 -25
  190. package/src/doordash/cart-queries.ts +0 -787
  191. package/src/doordash/client.ts +0 -1016
  192. package/src/doordash/order-queries.ts +0 -85
  193. package/src/doordash/queries.ts +0 -13
  194. package/src/doordash/query-extractor.ts +0 -94
  195. package/src/doordash/search-queries.ts +0 -203
  196. package/src/doordash/session.ts +0 -84
  197. package/src/doordash/store-queries.ts +0 -246
  198. package/src/doordash/types.ts +0 -367
  199. package/src/tools/terminal/backends/docker.ts +0 -379
@@ -3,14 +3,10 @@ import * as realChildProcess from 'node:child_process';
3
3
  import { beforeEach, describe, expect, mock, test } from 'bun:test';
4
4
 
5
5
  const execSyncMock = mock((_command: string, _opts?: unknown): unknown => undefined);
6
- const execFileSyncMock = mock(
7
- (_file: string, _args?: readonly string[], _opts?: unknown): unknown => undefined,
8
- );
9
6
 
10
7
  mock.module('node:child_process', () => ({
11
8
  ...realChildProcess,
12
9
  execSync: execSyncMock,
13
- execFileSync: execFileSyncMock,
14
10
  }));
15
11
 
16
12
  // Mock platform detection — default to macOS
@@ -36,18 +32,8 @@ mock.module('../util/platform.js', () => ({
36
32
  // Mock config loader — return a config with sandbox settings
37
33
  let mockSandboxConfig: {
38
34
  enabled: boolean;
39
- backend: 'native' | 'docker';
40
- docker: { image: string; cpus: number; memoryMb: number; pidsLimit: number; network: 'none' | 'bridge' };
41
35
  } = {
42
36
  enabled: true,
43
- backend: 'native',
44
- docker: {
45
- image: 'vellum-sandbox:latest',
46
- cpus: 1,
47
- memoryMb: 512,
48
- pidsLimit: 256,
49
- network: 'none',
50
- },
51
37
  };
52
38
 
53
39
  mock.module('../config/loader.js', () => ({
@@ -72,24 +58,13 @@ const { runSandboxDiagnostics } = await import(
72
58
 
73
59
  beforeEach(() => {
74
60
  execSyncMock.mockReset();
75
- execFileSyncMock.mockReset();
76
61
  mockIsMacOS = true;
77
62
  mockIsLinux = false;
78
63
  mockSandboxConfig = {
79
64
  enabled: true,
80
- backend: 'native',
81
- docker: {
82
- image: 'vellum-sandbox:latest',
83
- cpus: 1,
84
- memoryMb: 512,
85
- pidsLimit: 256,
86
- network: 'none',
87
- },
88
65
  };
89
- // Default: all commands succeed. execSync with encoding returns a string,
90
- // so we must return a string to avoid .trim() throwing on undefined.
91
- execSyncMock.mockImplementation(() => 'Docker version 24.0.7, build afdd53b');
92
- execFileSyncMock.mockImplementation(() => 'ok\n');
66
+ // Default: all commands succeed.
67
+ execSyncMock.mockImplementation(() => undefined);
93
68
  });
94
69
 
95
70
  describe('runSandboxDiagnostics — config reporting', () => {
@@ -103,22 +78,6 @@ describe('runSandboxDiagnostics — config reporting', () => {
103
78
  const result = runSandboxDiagnostics();
104
79
  expect(result.config.enabled).toBe(false);
105
80
  });
106
-
107
- test('reports configured backend', () => {
108
- const result = runSandboxDiagnostics();
109
- expect(result.config.backend).toBe('native');
110
- });
111
-
112
- test('reports docker backend when configured', () => {
113
- mockSandboxConfig.backend = 'docker';
114
- const result = runSandboxDiagnostics();
115
- expect(result.config.backend).toBe('docker');
116
- });
117
-
118
- test('reports docker image', () => {
119
- const result = runSandboxDiagnostics();
120
- expect(result.config.dockerImage).toBe('vellum-sandbox:latest');
121
- });
122
81
  });
123
82
 
124
83
  describe('runSandboxDiagnostics — active backend reason', () => {
@@ -127,12 +86,6 @@ describe('runSandboxDiagnostics — active backend reason', () => {
127
86
  expect(result.activeBackendReason).toContain('Native backend');
128
87
  });
129
88
 
130
- test('explains docker backend selection', () => {
131
- mockSandboxConfig.backend = 'docker';
132
- const result = runSandboxDiagnostics();
133
- expect(result.activeBackendReason).toContain('Docker backend');
134
- });
135
-
136
89
  test('explains when sandbox is disabled', () => {
137
90
  mockSandboxConfig.enabled = false;
138
91
  const result = runSandboxDiagnostics();
@@ -203,207 +156,11 @@ describe('runSandboxDiagnostics — native backend check (unsupported OS)', () =
203
156
  });
204
157
  });
205
158
 
206
- describe('runSandboxDiagnostics — Docker CLI check', () => {
207
- test('passes when docker CLI is available', () => {
208
- execSyncMock.mockImplementation((cmd: string) => {
209
- if (typeof cmd === 'string' && cmd === 'docker --version') {
210
- return 'Docker version 24.0.7, build afdd53b';
211
- }
212
- return undefined;
213
- });
214
- const result = runSandboxDiagnostics();
215
- const cliCheck = result.checks.find((c) => c.label === 'Docker CLI installed');
216
- expect(cliCheck).toBeDefined();
217
- expect(cliCheck!.ok).toBe(true);
218
- expect(cliCheck!.detail).toContain('Docker version');
219
- });
220
-
221
- test('fails when docker CLI is not found', () => {
222
- execSyncMock.mockImplementation((cmd: string) => {
223
- if (typeof cmd === 'string' && cmd === 'docker --version') {
224
- throw new Error('command not found: docker');
225
- }
226
- return undefined;
227
- });
228
- const result = runSandboxDiagnostics();
229
- const cliCheck = result.checks.find((c) => c.label === 'Docker CLI installed');
230
- expect(cliCheck).toBeDefined();
231
- expect(cliCheck!.ok).toBe(false);
232
- expect(cliCheck!.detail).toContain('not found');
233
- });
234
- });
235
-
236
- describe('runSandboxDiagnostics — Docker daemon check', () => {
237
- test('passes when daemon is reachable', () => {
238
- const result = runSandboxDiagnostics();
239
- const daemonCheck = result.checks.find((c) => c.label === 'Docker daemon running');
240
- expect(daemonCheck).toBeDefined();
241
- expect(daemonCheck!.ok).toBe(true);
242
- });
243
-
244
- test('fails when daemon is not running', () => {
245
- execSyncMock.mockImplementation((cmd: string) => {
246
- if (typeof cmd === 'string' && cmd === 'docker info') {
247
- throw new Error('Cannot connect to the Docker daemon');
248
- }
249
- return 'Docker version 24.0.7';
250
- });
251
- const result = runSandboxDiagnostics();
252
- const daemonCheck = result.checks.find((c) => c.label === 'Docker daemon running');
253
- expect(daemonCheck).toBeDefined();
254
- expect(daemonCheck!.ok).toBe(false);
255
- });
256
-
257
- test('skipped when CLI is not available', () => {
258
- execSyncMock.mockImplementation((cmd: string) => {
259
- if (typeof cmd === 'string' && cmd.includes('docker')) {
260
- throw new Error('command not found');
261
- }
262
- return undefined;
263
- });
264
- const result = runSandboxDiagnostics();
265
- const daemonCheck = result.checks.find((c) => c.label === 'Docker daemon running');
266
- expect(daemonCheck).toBeUndefined();
267
- });
268
- });
269
-
270
- describe('runSandboxDiagnostics — Docker image check', () => {
271
- test('passes when image is available locally', () => {
272
- const result = runSandboxDiagnostics();
273
- const imageCheck = result.checks.find((c) => c.label.includes('Docker image available'));
274
- expect(imageCheck).toBeDefined();
275
- expect(imageCheck!.ok).toBe(true);
276
- });
277
-
278
- test('fails when image is not available', () => {
279
- execFileSyncMock.mockImplementation(
280
- (file: string, args?: readonly string[]) => {
281
- if (file === 'docker' && Array.isArray(args) && args.includes('inspect')) {
282
- throw new Error('No such image');
283
- }
284
- return 'ok\n';
285
- },
286
- );
287
- const result = runSandboxDiagnostics();
288
- const imageCheck = result.checks.find((c) => c.label.includes('Docker image available'));
289
- expect(imageCheck).toBeDefined();
290
- expect(imageCheck!.ok).toBe(false);
291
- expect(imageCheck!.detail).toContain('docker build');
292
- });
293
-
294
- test('includes configured image name in label', () => {
295
- mockSandboxConfig.docker.image = 'alpine:3.19';
296
- const result = runSandboxDiagnostics();
297
- const imageCheck = result.checks.find((c) => c.label.includes('Docker image available'));
298
- expect(imageCheck).toBeDefined();
299
- expect(imageCheck!.label).toContain('alpine:3.19');
300
- });
301
-
302
- test('skipped when daemon is not running', () => {
303
- execSyncMock.mockImplementation((cmd: string) => {
304
- if (typeof cmd === 'string' && cmd === 'docker info') {
305
- throw new Error('Cannot connect');
306
- }
307
- return 'Docker version 24.0.7';
308
- });
309
- const result = runSandboxDiagnostics();
310
- const imageCheck = result.checks.find((c) => c.label.includes('Docker image available'));
311
- expect(imageCheck).toBeUndefined();
312
- });
313
- });
314
-
315
- describe('runSandboxDiagnostics — Docker mount writable check', () => {
316
- test('passes when mount probe succeeds', () => {
317
- const result = runSandboxDiagnostics();
318
- const mountCheck = result.checks.find((c) => c.label === 'Docker mount writable');
319
- expect(mountCheck).toBeDefined();
320
- expect(mountCheck!.ok).toBe(true);
321
- });
322
-
323
- test('uses configured image and sandbox working dir for mount probe', () => {
324
- mockSandboxConfig.docker.image = 'alpine:3.19';
325
- runSandboxDiagnostics();
326
- const runCall = execFileSyncMock.mock.calls.find(
327
- (call: unknown[]) => call[0] === 'docker' && Array.isArray(call[1]) && call[1].includes('run'),
328
- );
329
- expect(runCall).toBeDefined();
330
- const args = runCall![1] as string[];
331
- expect(args).toContain('alpine:3.19');
332
- // Mount source should be the sandbox working dir (getSandboxWorkingDir)
333
- const mountArg = args.find((a: string) => a.startsWith('type=bind'));
334
- expect(mountArg).toContain('/tmp/vellum-test/workspace');
335
- // Probe command should be 'test -w /workspace' matching runtime preflight
336
- expect(args).toContain('test');
337
- expect(args).toContain('-w');
338
- expect(args).toContain('/workspace');
339
- });
340
-
341
- test('fails when mount probe errors', () => {
342
- execFileSyncMock.mockImplementation(
343
- (file: string, args?: readonly string[]) => {
344
- if (file === 'docker' && Array.isArray(args) && args.includes('run')) {
345
- throw new Error('mount failed');
346
- }
347
- return undefined;
348
- },
349
- );
350
- const result = runSandboxDiagnostics();
351
- const mountCheck = result.checks.find((c) => c.label === 'Docker mount writable');
352
- expect(mountCheck).toBeDefined();
353
- expect(mountCheck!.ok).toBe(false);
354
- expect(mountCheck!.detail).toContain('File Sharing');
355
- });
356
-
357
- test('skipped when daemon is not running', () => {
358
- execSyncMock.mockImplementation((cmd: string) => {
359
- if (typeof cmd === 'string' && cmd === 'docker info') {
360
- throw new Error('Cannot connect');
361
- }
362
- return 'Docker version 24.0.7';
363
- });
364
- const result = runSandboxDiagnostics();
365
- const mountCheck = result.checks.find((c) => c.label === 'Docker mount writable');
366
- expect(mountCheck).toBeUndefined();
367
- });
368
- });
369
-
370
- describe('runSandboxDiagnostics — check cascade', () => {
371
- test('Docker daemon, image, and run checks are skipped when CLI is missing', () => {
372
- execSyncMock.mockImplementation((cmd: string) => {
373
- if (typeof cmd === 'string' && cmd.includes('docker')) {
374
- throw new Error('not found');
375
- }
376
- return undefined;
377
- });
378
- const result = runSandboxDiagnostics();
379
- const labels = result.checks.map((c) => c.label);
380
- expect(labels).toContain('Docker CLI installed');
381
- expect(labels).not.toContain('Docker daemon running');
382
- expect(labels.find((l) => l.includes('Docker image'))).toBeUndefined();
383
- expect(labels).not.toContain('Docker mount writable');
384
- });
385
-
386
- test('image and run checks are skipped when daemon is down', () => {
387
- execSyncMock.mockImplementation((cmd: string) => {
388
- if (typeof cmd === 'string' && cmd === 'docker info') {
389
- throw new Error('Cannot connect');
390
- }
391
- return 'Docker version 24.0.7';
392
- });
393
- const result = runSandboxDiagnostics();
394
- const labels = result.checks.map((c) => c.label);
395
- expect(labels).toContain('Docker CLI installed');
396
- expect(labels).toContain('Docker daemon running');
397
- expect(labels.find((l) => l.includes('Docker image'))).toBeUndefined();
398
- expect(labels).not.toContain('Docker mount writable');
399
- });
400
-
401
- test('all Docker checks run when everything works', () => {
159
+ describe('runSandboxDiagnostics — only native checks', () => {
160
+ test('only includes native backend check', () => {
402
161
  const result = runSandboxDiagnostics();
403
162
  const labels = result.checks.map((c) => c.label);
404
- expect(labels).toContain('Docker CLI installed');
405
- expect(labels).toContain('Docker daemon running');
406
- expect(labels.find((l) => l.includes('Docker image'))).toBeDefined();
407
- expect(labels).toContain('Docker mount writable');
163
+ expect(labels).toHaveLength(1);
164
+ expect(labels[0]).toContain('Native sandbox');
408
165
  });
409
166
  });
@@ -50,7 +50,6 @@ import { formatShellOutput, MAX_OUTPUT_LENGTH } from '../tools/shared/shell-outp
50
50
 
51
51
  // Dynamically import modules that depend on the mocked logger
52
52
  const { NativeBackend } = await import('../tools/terminal/backends/native.js');
53
- const { DockerBackend, _resetDockerChecks } = await import('../tools/terminal/backends/docker.js');
54
53
  const { wrapCommand } = await import('../tools/terminal/sandbox.js');
55
54
  const { ToolError } = await import('../util/errors.js');
56
55
 
@@ -589,7 +588,7 @@ describe('SandboxResult shape consistency across backends', () => {
589
588
  });
590
589
 
591
590
  test('wrapCommand disabled returns bash with sandboxed=false', () => {
592
- const result = wrapCommand('echo hi', '/tmp', { enabled: false, backend: 'native', docker: { image: 'vellum-sandbox:latest', shell: 'bash', cpus: 1, memoryMb: 512, pidsLimit: 256, network: 'none' } });
591
+ const result = wrapCommand('echo hi', '/tmp', { enabled: false });
593
592
 
594
593
  expect(result.command).toBe('bash');
595
594
  expect(result.args).toEqual(['-c', '--', 'echo hi']);
@@ -597,7 +596,7 @@ describe('SandboxResult shape consistency across backends', () => {
597
596
  });
598
597
 
599
598
  test('wrapCommand disabled result has same shape as enabled result', () => {
600
- const disabled = wrapCommand('echo hi', '/tmp', { enabled: false, backend: 'native', docker: { image: 'vellum-sandbox:latest', shell: 'bash', cpus: 1, memoryMb: 512, pidsLimit: 256, network: 'none' } });
599
+ const disabled = wrapCommand('echo hi', '/tmp', { enabled: false });
601
600
 
602
601
  // Both must have: command (string), args (string[]), sandboxed (boolean)
603
602
  expect(typeof disabled.command).toBe('string');
@@ -859,26 +858,20 @@ describe('Regression: edge cases in shared FileSystemOps', () => {
859
858
  });
860
859
 
861
860
  // ===========================================================================
862
- // 9. Docker backend shape parity with native backend
861
+ // 9. NativeBackend shape verification
863
862
  // ===========================================================================
864
863
 
865
- describe('DockerBackend vs NativeBackend: SandboxResult shape parity', () => {
866
- test('both backends produce results with command, args, sandboxed fields', () => {
867
- // Verify both classes have a wrap method that returns SandboxResult
864
+ describe('NativeBackend: SandboxResult shape', () => {
865
+ test('NativeBackend has a wrap method', () => {
868
866
  const native = new NativeBackend();
869
867
  expect(typeof native.wrap).toBe('function');
870
-
871
- _resetDockerChecks();
872
- // DockerBackend requires a real sandbox root for construction
873
- const docker = new DockerBackend(realpathSync('/tmp'), undefined, 1000, 1000);
874
- expect(typeof docker.wrap).toBe('function');
875
868
  });
876
869
 
877
870
  test('disabled sandbox returns consistent bash -c -- invocation', () => {
878
871
  // Various commands should all be wrapped consistently when disabled
879
872
  const commands = ['echo hello', 'ls -la', 'cat /etc/hosts', 'true && false'];
880
873
  for (const cmd of commands) {
881
- const result = wrapCommand(cmd, '/tmp', { enabled: false, backend: 'native', docker: { image: 'vellum-sandbox:latest', shell: 'bash', cpus: 1, memoryMb: 512, pidsLimit: 256, network: 'none' } });
874
+ const result = wrapCommand(cmd, '/tmp', { enabled: false });
882
875
  expect(result.command).toBe('bash');
883
876
  expect(result.args[0]).toBe('-c');
884
877
  expect(result.args[1]).toBe('--');
@@ -29,16 +29,16 @@ mock.module('../util/logger.js', () => ({
29
29
  truncateForLog: (value: string) => value,
30
30
  }));
31
31
 
32
+ import { getDb, initializeDb, resetDb } from '../memory/db.js';
33
+ import { scopedApprovalGrants } from '../memory/schema.js';
32
34
  import {
35
+ _internal,
33
36
  type CreateScopedApprovalGrantParams,
34
- consumeScopedApprovalGrantByRequestId,
35
- consumeScopedApprovalGrantByToolSignature,
36
- createScopedApprovalGrant,
37
37
  expireScopedApprovalGrants,
38
38
  revokeScopedApprovalGrantsForContext,
39
39
  } from '../memory/scoped-approval-grants.js';
40
- import { getDb, initializeDb, resetDb } from '../memory/db.js';
41
- import { scopedApprovalGrants } from '../memory/schema.js';
40
+
41
+ const { consumeScopedApprovalGrantByRequestId, consumeScopedApprovalGrantByToolSignature, createScopedApprovalGrant } = _internal;
42
42
  import {
43
43
  canonicalJsonSerialize,
44
44
  computeToolApprovalDigest,
@@ -389,7 +389,7 @@ describe('scoped-approval-grants / expiry', () => {
389
389
  });
390
390
 
391
391
  test('already-consumed grants are not affected by expiry sweep', () => {
392
- const pastExpiry = new Date(Date.now() - 1_000).toISOString();
392
+ const _pastExpiry = new Date(Date.now() - 1_000).toISOString();
393
393
  createScopedApprovalGrant(
394
394
  grantParams({ scopeMode: 'request_id', requestId: 'req-consumed', expiresAt: new Date(Date.now() + 60_000).toISOString() }),
395
395
  );
@@ -54,13 +54,14 @@ mock.module('../util/logger.js', () => ({
54
54
  truncateForLog: (value: string) => value,
55
55
  }));
56
56
 
57
+ import { getDb, initializeDb, resetDb } from '../memory/db.js';
58
+ import { scopedApprovalGrants } from '../memory/schema.js';
57
59
  import {
60
+ _internal,
58
61
  type CreateScopedApprovalGrantParams,
59
- consumeScopedApprovalGrantByToolSignature,
60
- createScopedApprovalGrant,
61
62
  } from '../memory/scoped-approval-grants.js';
62
- import { getDb, initializeDb, resetDb } from '../memory/db.js';
63
- import { scopedApprovalGrants } from '../memory/schema.js';
63
+
64
+ const { consumeScopedApprovalGrantByToolSignature, createScopedApprovalGrant } = _internal;
64
65
  import { computeToolApprovalDigest } from '../security/tool-approval-digest.js';
65
66
 
66
67
  initializeDb();
@@ -169,25 +169,7 @@ describe('session-manager', () => {
169
169
  expect(() => getSessionEnv(session.id)).toThrow(/not active/);
170
170
  });
171
171
 
172
- test('returns host.docker.internal URL when dockerMode is true', async () => {
173
- const session = createSession(CONV_ID, CRED_IDS);
174
- const started = await startSession(session.id);
175
- const env = getSessionEnv(session.id, { dockerMode: true });
176
-
177
- expect(env.HTTP_PROXY).toBe(`http://host.docker.internal:${started.port}`);
178
- expect(env.HTTPS_PROXY).toBe(`http://host.docker.internal:${started.port}`);
179
- });
180
-
181
- test('returns 127.0.0.1 URL when dockerMode is false', async () => {
182
- const session = createSession(CONV_ID, CRED_IDS);
183
- const started = await startSession(session.id);
184
- const env = getSessionEnv(session.id, { dockerMode: false });
185
-
186
- expect(env.HTTP_PROXY).toBe(`http://127.0.0.1:${started.port}`);
187
- expect(env.HTTPS_PROXY).toBe(`http://127.0.0.1:${started.port}`);
188
- });
189
-
190
- test('returns 127.0.0.1 URL when no options are passed', async () => {
172
+ test('returns 127.0.0.1 URL for active session', async () => {
191
173
  const session = createSession(CONV_ID, CRED_IDS);
192
174
  const started = await startSession(session.id);
193
175
  const env = getSessionEnv(session.id);
@@ -1,4 +1,4 @@
1
- import { describe, expect, mock, test } from 'bun:test';
1
+ import { beforeEach, describe, expect, mock, test } from 'bun:test';
2
2
 
3
3
  import type { Message } from '../providers/types.js';
4
4
 
@@ -49,14 +49,27 @@ mock.module('../security/secret-allowlist.js', () => ({
49
49
  }));
50
50
 
51
51
  // Mutable store so each test can configure its own messages
52
- let mockDbMessages: Array<{ id: string; role: string; content: string }> = [];
52
+ let mockDbMessages: Array<{ id: string; role: string; content: string; metadata?: string | null }> = [];
53
53
  let mockConversation: Record<string, unknown> | null = null;
54
+ let nextMockMessageId = 1;
54
55
 
55
56
  mock.module('../memory/conversation-store.js', () => ({
56
57
  getMessages: () => mockDbMessages,
57
58
  getConversation: () => mockConversation,
58
59
  createConversation: () => ({ id: 'conv-1' }),
59
60
  listConversations: () => [],
61
+ addMessage: async (_conversationId: string, role: string, content: string, metadata?: Record<string, unknown>) => {
62
+ const id = `persisted-${nextMockMessageId++}`;
63
+ mockDbMessages.push({
64
+ id,
65
+ role,
66
+ content,
67
+ metadata: metadata ? JSON.stringify(metadata) : null,
68
+ });
69
+ return { id };
70
+ },
71
+ setConversationOriginChannelIfUnset: () => {},
72
+ setConversationOriginInterfaceIfUnset: () => {},
60
73
  }));
61
74
 
62
75
  import { Session } from '../daemon/session.js';
@@ -67,6 +80,10 @@ function makeSession(): Session {
67
80
  }
68
81
 
69
82
  describe('loadFromDb history repair', () => {
83
+ beforeEach(() => {
84
+ nextMockMessageId = 1;
85
+ });
86
+
70
87
  test('repairs corrupt persisted history: missing tool_result inserted', async () => {
71
88
  mockConversation = {
72
89
  id: 'conv-1',
@@ -220,4 +237,154 @@ describe('loadFromDb history repair', () => {
220
237
  expect(messages).toHaveLength(2);
221
238
  expect(messages[1].content).toEqual([{ type: 'text', text: 'Sure' }]);
222
239
  });
240
+
241
+ test('untrusted actor load hides guardian-provenance history and context summary', async () => {
242
+ mockConversation = {
243
+ id: 'conv-1',
244
+ contextSummary: 'Sensitive guardian summary',
245
+ contextCompactedMessageCount: 3,
246
+ totalInputTokens: 0,
247
+ totalOutputTokens: 0,
248
+ totalEstimatedCost: 0,
249
+ };
250
+ mockDbMessages = [
251
+ {
252
+ id: 'm1',
253
+ role: 'user',
254
+ content: JSON.stringify([{ type: 'text', text: 'Guardian secret question' }]),
255
+ metadata: JSON.stringify({ provenanceActorRole: 'guardian', provenanceSourceChannel: 'telegram' }),
256
+ },
257
+ {
258
+ id: 'm2',
259
+ role: 'assistant',
260
+ content: JSON.stringify([{ type: 'text', text: 'Guardian-only answer' }]),
261
+ metadata: JSON.stringify({ provenanceActorRole: 'guardian', provenanceSourceChannel: 'telegram' }),
262
+ },
263
+ {
264
+ id: 'm3',
265
+ role: 'user',
266
+ content: JSON.stringify([{ type: 'text', text: 'Untrusted follow-up' }]),
267
+ metadata: JSON.stringify({ provenanceActorRole: 'unverified_channel', provenanceSourceChannel: 'telegram' }),
268
+ },
269
+ {
270
+ id: 'm4',
271
+ role: 'assistant',
272
+ content: JSON.stringify([{ type: 'text', text: 'Untrusted-safe reply' }]),
273
+ metadata: JSON.stringify({ provenanceActorRole: 'unverified_channel', provenanceSourceChannel: 'telegram' }),
274
+ },
275
+ ];
276
+
277
+ const session = makeSession();
278
+ session.setGuardianContext({ actorRole: 'unverified_channel', sourceChannel: 'telegram' });
279
+ await session.loadFromDb();
280
+ const messages = session.getMessages();
281
+
282
+ expect(messages).toHaveLength(2);
283
+ expect(messages[0].role).toBe('user');
284
+ expect(messages[0].content).toEqual([{ type: 'text', text: 'Untrusted follow-up' }]);
285
+ expect(messages[1].role).toBe('assistant');
286
+ expect(messages[1].content).toEqual([{ type: 'text', text: 'Untrusted-safe reply' }]);
287
+ });
288
+
289
+ test('ensureActorScopedHistory reloads when actor role changes', async () => {
290
+ mockConversation = {
291
+ id: 'conv-1',
292
+ contextSummary: null,
293
+ contextCompactedMessageCount: 0,
294
+ totalInputTokens: 0,
295
+ totalOutputTokens: 0,
296
+ totalEstimatedCost: 0,
297
+ };
298
+ mockDbMessages = [
299
+ {
300
+ id: 'm1',
301
+ role: 'user',
302
+ content: JSON.stringify([{ type: 'text', text: 'Guardian question' }]),
303
+ metadata: JSON.stringify({ provenanceActorRole: 'guardian', provenanceSourceChannel: 'telegram' }),
304
+ },
305
+ {
306
+ id: 'm2',
307
+ role: 'assistant',
308
+ content: JSON.stringify([{ type: 'text', text: 'Guardian answer' }]),
309
+ metadata: JSON.stringify({ provenanceActorRole: 'guardian', provenanceSourceChannel: 'telegram' }),
310
+ },
311
+ {
312
+ id: 'm3',
313
+ role: 'user',
314
+ content: JSON.stringify([{ type: 'text', text: 'Unverified ping' }]),
315
+ metadata: JSON.stringify({ provenanceActorRole: 'unverified_channel', provenanceSourceChannel: 'telegram' }),
316
+ },
317
+ {
318
+ id: 'm4',
319
+ role: 'assistant',
320
+ content: JSON.stringify([{ type: 'text', text: 'Unverified reply' }]),
321
+ metadata: JSON.stringify({ provenanceActorRole: 'unverified_channel', provenanceSourceChannel: 'telegram' }),
322
+ },
323
+ ];
324
+
325
+ const session = makeSession();
326
+
327
+ session.setGuardianContext({ actorRole: 'guardian', sourceChannel: 'telegram' });
328
+ await session.ensureActorScopedHistory();
329
+ expect(session.getMessages()).toHaveLength(4);
330
+
331
+ session.setGuardianContext({ actorRole: 'unverified_channel', sourceChannel: 'telegram' });
332
+ await session.ensureActorScopedHistory();
333
+ const downgradedMessages = session.getMessages();
334
+ expect(downgradedMessages).toHaveLength(2);
335
+ expect(downgradedMessages[0].content).toEqual([{ type: 'text', text: 'Unverified ping' }]);
336
+ expect(downgradedMessages[1].content).toEqual([{ type: 'text', text: 'Unverified reply' }]);
337
+ });
338
+
339
+ test('persistUserMessage reloads actor-scoped history before persisting on role switch', async () => {
340
+ mockConversation = {
341
+ id: 'conv-1',
342
+ contextSummary: null,
343
+ contextCompactedMessageCount: 0,
344
+ totalInputTokens: 0,
345
+ totalOutputTokens: 0,
346
+ totalEstimatedCost: 0,
347
+ };
348
+ mockDbMessages = [
349
+ {
350
+ id: 'm1',
351
+ role: 'user',
352
+ content: JSON.stringify([{ type: 'text', text: 'Guardian-only question' }]),
353
+ metadata: JSON.stringify({ provenanceActorRole: 'guardian', provenanceSourceChannel: 'telegram' }),
354
+ },
355
+ {
356
+ id: 'm2',
357
+ role: 'assistant',
358
+ content: JSON.stringify([{ type: 'text', text: 'Guardian-only answer' }]),
359
+ metadata: JSON.stringify({ provenanceActorRole: 'guardian', provenanceSourceChannel: 'telegram' }),
360
+ },
361
+ {
362
+ id: 'm3',
363
+ role: 'user',
364
+ content: JSON.stringify([{ type: 'text', text: 'Unverified ping' }]),
365
+ metadata: JSON.stringify({ provenanceActorRole: 'unverified_channel', provenanceSourceChannel: 'telegram' }),
366
+ },
367
+ {
368
+ id: 'm4',
369
+ role: 'assistant',
370
+ content: JSON.stringify([{ type: 'text', text: 'Unverified reply' }]),
371
+ metadata: JSON.stringify({ provenanceActorRole: 'unverified_channel', provenanceSourceChannel: 'telegram' }),
372
+ },
373
+ ];
374
+
375
+ const session = makeSession();
376
+
377
+ session.setGuardianContext({ actorRole: 'unverified_channel', sourceChannel: 'telegram' });
378
+ await session.ensureActorScopedHistory();
379
+ expect(session.getMessages()).toHaveLength(2);
380
+
381
+ session.setGuardianContext({ actorRole: 'guardian', sourceChannel: 'telegram' });
382
+ await session.persistUserMessage('Guardian follow-up', []);
383
+ const messagesAfterPersist = session.getMessages();
384
+
385
+ expect(messagesAfterPersist).toHaveLength(5);
386
+ expect(messagesAfterPersist[0].content).toEqual([{ type: 'text', text: 'Guardian-only question' }]);
387
+ expect(messagesAfterPersist[1].content).toEqual([{ type: 'text', text: 'Guardian-only answer' }]);
388
+ expect(messagesAfterPersist[4].content).toEqual([{ type: 'text', text: 'Guardian follow-up' }]);
389
+ });
223
390
  });