npm - @vellumai/assistant - Versions diffs - 0.3.19 → 0.3.21 - Mend

@vellumai/assistant 0.3.19 → 0.3.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (199) hide show

package/ARCHITECTURE.md +151 -15
package/Dockerfile +1 -0
package/README.md +40 -4
package/bun.lock +139 -2
package/docs/architecture/integrations.md +7 -11
package/package.json +2 -1
package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +54 -0
package/src/__tests__/approval-primitive.test.ts +540 -0
package/src/__tests__/assistant-feature-flag-guard.test.ts +206 -0
package/src/__tests__/assistant-feature-flag-guardrails.test.ts +198 -0
package/src/__tests__/assistant-feature-flags-integration.test.ts +272 -0
package/src/__tests__/call-controller.test.ts +439 -108
package/src/__tests__/channel-invite-transport.test.ts +264 -0
package/src/__tests__/cli.test.ts +42 -1
package/src/__tests__/config-schema.test.ts +11 -127
package/src/__tests__/config-watcher.test.ts +0 -8
package/src/__tests__/daemon-lifecycle.test.ts +1 -0
package/src/__tests__/daemon-server-session-init.test.ts +8 -2
package/src/__tests__/diff.test.ts +22 -0
package/src/__tests__/guardian-action-copy-generator.test.ts +5 -0
package/src/__tests__/guardian-action-grant-mint-consume.test.ts +300 -32
package/src/__tests__/guardian-action-late-reply.test.ts +546 -1
package/src/__tests__/guardian-actions-endpoint.test.ts +774 -0
package/src/__tests__/guardian-control-plane-policy.test.ts +36 -3
package/src/__tests__/guardian-dispatch.test.ts +124 -0
package/src/__tests__/guardian-grant-minting.test.ts +6 -17
package/src/__tests__/inbound-invite-redemption.test.ts +367 -0
package/src/__tests__/invite-redemption-service.test.ts +306 -0
package/src/__tests__/ipc-snapshot.test.ts +57 -0
package/src/__tests__/notification-decision-fallback.test.ts +88 -0
package/src/__tests__/sandbox-diagnostics.test.ts +6 -249
package/src/__tests__/sandbox-host-parity.test.ts +6 -13
package/src/__tests__/scoped-approval-grants.test.ts +6 -6
package/src/__tests__/scoped-grant-security-matrix.test.ts +5 -4
package/src/__tests__/script-proxy-session-manager.test.ts +1 -19
package/src/__tests__/session-load-history-repair.test.ts +169 -2
package/src/__tests__/session-runtime-assembly.test.ts +33 -5
package/src/__tests__/skill-feature-flags-integration.test.ts +171 -0
package/src/__tests__/skill-feature-flags.test.ts +188 -0
package/src/__tests__/skill-load-feature-flag.test.ts +141 -0
package/src/__tests__/skill-mirror-parity.test.ts +1 -0
package/src/__tests__/skill-projection-feature-flag.test.ts +363 -0
package/src/__tests__/system-prompt.test.ts +1 -1
package/src/__tests__/terminal-sandbox.test.ts +142 -9
package/src/__tests__/terminal-tools.test.ts +2 -93
package/src/__tests__/thread-seed-composer.test.ts +18 -0
package/src/__tests__/tool-approval-handler.test.ts +350 -0
package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +8 -10
package/src/__tests__/voice-scoped-grant-consumer.test.ts +46 -84
package/src/agent/loop.ts +36 -1
package/src/approvals/approval-primitive.ts +381 -0
package/src/approvals/guardian-decision-primitive.ts +191 -0
package/src/calls/call-controller.ts +252 -209
package/src/calls/call-domain.ts +44 -6
package/src/calls/guardian-dispatch.ts +48 -0
package/src/calls/types.ts +1 -1
package/src/calls/voice-session-bridge.ts +46 -30
package/src/cli/core-commands.ts +0 -4
package/src/cli/mcp.ts +58 -0
package/src/cli.ts +76 -34
package/src/config/__tests__/feature-flag-registry-guard.test.ts +179 -0
package/src/config/assistant-feature-flags.ts +162 -0
package/src/config/bundled-skills/api-mapping/icon.svg +18 -0
package/src/config/bundled-skills/messaging/TOOLS.json +30 -0
package/src/config/bundled-skills/messaging/tools/slack-delete-message.ts +24 -0
package/src/config/bundled-skills/notifications/SKILL.md +1 -1
package/src/config/bundled-skills/reminder/SKILL.md +49 -2
package/src/config/bundled-skills/time-based-actions/SKILL.md +49 -2
package/src/config/bundled-skills/voice-setup/SKILL.md +122 -0
package/src/config/core-schema.ts +1 -1
package/src/config/env-registry.ts +10 -0
package/src/config/feature-flag-registry.json +61 -0
package/src/config/loader.ts +22 -1
package/src/config/mcp-schema.ts +46 -0
package/src/config/sandbox-schema.ts +0 -39
package/src/config/schema.ts +18 -2
package/src/config/skill-state.ts +34 -0
package/src/config/skills-schema.ts +0 -1
package/src/config/skills.ts +9 -0
package/src/config/system-prompt.ts +110 -46
package/src/config/templates/SOUL.md +1 -1
package/src/config/types.ts +19 -1
package/src/config/vellum-skills/catalog.json +1 -1
package/src/config/vellum-skills/guardian-verify-setup/SKILL.md +1 -0
package/src/config/vellum-skills/sms-setup/SKILL.md +1 -1
package/src/config/vellum-skills/telegram-setup/SKILL.md +6 -5
package/src/config/vellum-skills/trusted-contacts/SKILL.md +105 -3
package/src/config/vellum-skills/twilio-setup/SKILL.md +1 -1
package/src/daemon/config-watcher.ts +0 -1
package/src/daemon/daemon-control.ts +1 -1
package/src/daemon/guardian-invite-intent.ts +124 -0
package/src/daemon/handlers/avatar.ts +68 -0
package/src/daemon/handlers/browser.ts +2 -2
package/src/daemon/handlers/guardian-actions.ts +120 -0
package/src/daemon/handlers/index.ts +4 -0
package/src/daemon/handlers/sessions.ts +19 -0
package/src/daemon/handlers/shared.ts +3 -1
package/src/daemon/install-cli-launchers.ts +58 -13
package/src/daemon/ipc-contract/guardian-actions.ts +53 -0
package/src/daemon/ipc-contract/sessions.ts +8 -2
package/src/daemon/ipc-contract/settings.ts +25 -2
package/src/daemon/ipc-contract-inventory.json +10 -0
package/src/daemon/ipc-contract.ts +4 -0
package/src/daemon/lifecycle.ts +14 -2
package/src/daemon/main.ts +1 -0
package/src/daemon/providers-setup.ts +26 -1
package/src/daemon/server.ts +1 -0
package/src/daemon/session-lifecycle.ts +52 -7
package/src/daemon/session-memory.ts +45 -0
package/src/daemon/session-process.ts +258 -432
package/src/daemon/session-runtime-assembly.ts +12 -0
package/src/daemon/session-skill-tools.ts +14 -1
package/src/daemon/session-tool-setup.ts +5 -0
package/src/daemon/session.ts +11 -0
package/src/daemon/shutdown-handlers.ts +11 -0
package/src/daemon/tool-side-effects.ts +35 -9
package/src/index.ts +2 -2
package/src/mcp/client.ts +152 -0
package/src/mcp/manager.ts +139 -0
package/src/memory/conversation-display-order-migration.ts +44 -0
package/src/memory/conversation-queries.ts +2 -0
package/src/memory/conversation-store.ts +91 -0
package/src/memory/db-init.ts +5 -1
package/src/memory/embedding-local.ts +13 -8
package/src/memory/guardian-action-store.ts +125 -2
package/src/memory/ingress-invite-store.ts +95 -1
package/src/memory/migrations/035-guardian-action-supersession.ts +23 -0
package/src/memory/migrations/index.ts +2 -1
package/src/memory/schema.ts +5 -1
package/src/memory/scoped-approval-grants.ts +14 -5
package/src/messaging/providers/slack/client.ts +12 -0
package/src/messaging/providers/slack/types.ts +5 -0
package/src/notifications/decision-engine.ts +49 -12
package/src/notifications/emit-signal.ts +7 -0
package/src/notifications/signal.ts +7 -0
package/src/notifications/thread-seed-composer.ts +2 -1
package/src/runtime/channel-approval-types.ts +16 -6
package/src/runtime/channel-approvals.ts +19 -15
package/src/runtime/channel-invite-transport.ts +85 -0
package/src/runtime/channel-invite-transports/telegram.ts +105 -0
package/src/runtime/guardian-action-grant-minter.ts +92 -35
package/src/runtime/guardian-action-message-composer.ts +30 -0
package/src/runtime/guardian-decision-types.ts +91 -0
package/src/runtime/http-server.ts +23 -1
package/src/runtime/ingress-service.ts +22 -0
package/src/runtime/invite-redemption-service.ts +181 -0
package/src/runtime/invite-redemption-templates.ts +39 -0
package/src/runtime/routes/call-routes.ts +2 -1
package/src/runtime/routes/guardian-action-routes.ts +206 -0
package/src/runtime/routes/guardian-approval-interception.ts +66 -190
package/src/runtime/routes/identity-routes.ts +73 -0
package/src/runtime/routes/inbound-message-handler.ts +486 -394
package/src/runtime/routes/pairing-routes.ts +4 -0
package/src/security/encrypted-store.ts +31 -17
package/src/security/keychain.ts +176 -2
package/src/security/secure-keys.ts +97 -0
package/src/security/tool-approval-digest.ts +1 -1
package/src/tools/browser/browser-execution.ts +2 -2
package/src/tools/browser/browser-manager.ts +46 -32
package/src/tools/browser/browser-screencast.ts +2 -2
package/src/tools/calls/call-start.ts +1 -1
package/src/tools/executor.ts +22 -17
package/src/tools/mcp/mcp-tool-factory.ts +100 -0
package/src/tools/network/script-proxy/session-manager.ts +1 -5
package/src/tools/registry.ts +64 -1
package/src/tools/skills/load.ts +22 -8
package/src/tools/system/avatar-generator.ts +119 -0
package/src/tools/system/navigate-settings.ts +65 -0
package/src/tools/system/open-system-settings.ts +75 -0
package/src/tools/system/voice-config.ts +121 -32
package/src/tools/terminal/backends/native.ts +40 -19
package/src/tools/terminal/backends/types.ts +3 -3
package/src/tools/terminal/parser.ts +1 -1
package/src/tools/terminal/sandbox-diagnostics.ts +6 -87
package/src/tools/terminal/sandbox.ts +1 -12
package/src/tools/terminal/shell.ts +3 -31
package/src/tools/tool-approval-handler.ts +141 -3
package/src/tools/tool-manifest.ts +6 -0
package/src/tools/types.ts +10 -2
package/src/util/diff.ts +36 -13
package/Dockerfile.sandbox +0 -5
package/src/__tests__/doordash-client.test.ts +0 -187
package/src/__tests__/doordash-session.test.ts +0 -154
package/src/__tests__/signup-e2e.test.ts +0 -354
package/src/__tests__/terminal-sandbox-docker.test.ts +0 -1065
package/src/__tests__/terminal-sandbox.integration.test.ts +0 -180
package/src/cli/doordash.ts +0 -1057
package/src/config/bundled-skills/doordash/SKILL.md +0 -163
package/src/config/templates/LOOKS.md +0 -25
package/src/doordash/cart-queries.ts +0 -787
package/src/doordash/client.ts +0 -1016
package/src/doordash/order-queries.ts +0 -85
package/src/doordash/queries.ts +0 -13
package/src/doordash/query-extractor.ts +0 -94
package/src/doordash/search-queries.ts +0 -203
package/src/doordash/session.ts +0 -84
package/src/doordash/store-queries.ts +0 -246
package/src/doordash/types.ts +0 -367
package/src/tools/terminal/backends/docker.ts +0 -379

package/src/__tests__/skill-projection-feature-flag.test.ts ADDED Viewed

@@ -0,0 +1,363 @@
+/**
+ * Tests that projectSkillTools drops flag-OFF active skills from projected
+ * tools, even when conversation history contains old markers for those skills.
+ */
+import * as realFs from 'node:fs';
+import { beforeEach, describe, expect, mock, test } from 'bun:test';
+import type { SkillSummary, SkillToolManifest } from '../config/skills.js';
+import { RiskLevel } from '../permissions/types.js';
+import type { Message } from '../providers/types.js';
+import type { Tool } from '../tools/types.js';
+// ---------------------------------------------------------------------------
+// Mock state
+// ---------------------------------------------------------------------------
+let mockCatalog: SkillSummary[] = [];
+let mockManifests: Record<string, SkillToolManifest | null> = {};
+let mockRegisteredTools: Map<string, Tool[]> = new Map();
+let mockUnregisteredSkillIds: string[] = [];
+let mockSkillRefCount: Map<string, number> = new Map();
+let currentConfig: Record<string, unknown> = { featureFlags: {} };
+const DECLARED_SKILL_ID = 'hatch-new-assistant';
+const DECLARED_LEGACY_KEY = 'skills.hatch-new-assistant.enabled';
+// ---------------------------------------------------------------------------
+// Mocks
+// ---------------------------------------------------------------------------
+mock.module('../config/skills.js', () => ({
+  loadSkillCatalog: () => mockCatalog,
+}));
+mock.module('../config/loader.js', () => ({
+  getConfig: () => currentConfig,
+}));
+mock.module('../skills/active-skill-tools.js', () => {
+  const parseMarkers = (messages: Message[]) => {
+    const skillLoadUseIds = new Set<string>();
+    for (const msg of messages) {
+      for (const block of msg.content) {
+        if (block.type === 'tool_use' && block.name === 'skill_load') {
+          skillLoadUseIds.add(block.id);
+        }
+      }
+    }
+    const re = /<loaded_skill\s+id="([^"]+)"(?:\s+version="([^"]+)")?\s*\/>/g;
+    const seen = new Set<string>();
+    const entries: Array<{ id: string; version?: string }> = [];
+    for (const msg of messages) {
+      for (const block of msg.content) {
+        if (block.type !== 'tool_result') continue;
+        if (!skillLoadUseIds.has(block.tool_use_id)) continue;
+        const text = block.content;
+        if (!text) continue;
+        for (const m of text.matchAll(re)) {
+          if (!seen.has(m[1])) {
+            seen.add(m[1]);
+            const entry: { id: string; version?: string } = { id: m[1] };
+            if (m[2]) entry.version = m[2];
+            entries.push(entry);
+          }
+        }
+      }
+    }
+    return entries;
+  };
+  return {
+    deriveActiveSkills: (messages: Message[]) => parseMarkers(messages),
+    deriveActiveSkillIds: (messages: Message[]) => parseMarkers(messages).map((e) => e.id),
+  };
+});
+mock.module('../skills/tool-manifest.js', () => ({
+  parseToolManifestFile: (filePath: string) => {
+    const parts = filePath.split('/');
+    const skillId = parts[parts.length - 2];
+    const manifest = mockManifests[skillId];
+    if (!manifest) throw new Error(`Mock: no manifest for skill "${skillId}"`);
+    return manifest;
+  },
+}));
+mock.module('../tools/skills/skill-tool-factory.js', () => ({
+  createSkillToolsFromManifest: (
+    entries: SkillToolManifest['tools'],
+    skillId: string,
+    _skillDir: string,
+    versionHash: string,
+    bundled?: boolean,
+  ): Tool[] => {
+    return entries.map((entry) => ({
+      name: entry.name,
+      description: entry.description,
+      category: entry.category,
+      defaultRiskLevel: RiskLevel.Medium,
+      origin: 'skill' as const,
+      ownerSkillId: skillId,
+      ownerSkillVersionHash: versionHash,
+      ownerSkillBundled: bundled ?? undefined,
+      getDefinition: () => ({
+        name: entry.name,
+        description: entry.description,
+        input_schema: entry.input_schema as object,
+      }),
+      execute: async () => ({ content: '', isError: false }),
+    }));
+  },
+}));
+mock.module('../tools/registry.js', () => ({
+  registerSkillTools: (tools: Tool[]) => {
+    const skillIds = new Set<string>();
+    for (const tool of tools) {
+      const skillId = tool.ownerSkillId!;
+      skillIds.add(skillId);
+      const existing = mockRegisteredTools.get(skillId) ?? [];
+      existing.push(tool);
+      mockRegisteredTools.set(skillId, existing);
+    }
+    for (const id of skillIds) {
+      mockSkillRefCount.set(id, (mockSkillRefCount.get(id) ?? 0) + 1);
+    }
+    return tools;
+  },
+  unregisterSkillTools: (skillId: string) => {
+    mockUnregisteredSkillIds.push(skillId);
+    const current = mockSkillRefCount.get(skillId) ?? 0;
+    if (current > 1) {
+      mockSkillRefCount.set(skillId, current - 1);
+      return;
+    }
+    mockSkillRefCount.delete(skillId);
+    mockRegisteredTools.delete(skillId);
+  },
+  getTool: (name: string): Tool | undefined => {
+    let found: Tool | undefined;
+    for (const tools of mockRegisteredTools.values()) {
+      for (const tool of tools) {
+        if (tool.name === name) found = tool;
+      }
+    }
+    return found;
+  },
+  getSkillToolNames: () => {
+    const names: string[] = [];
+    for (const tools of mockRegisteredTools.values()) {
+      for (const tool of tools) {
+        names.push(tool.name);
+      }
+    }
+    return names;
+  },
+}));
+mock.module('node:fs', () => ({
+  ...realFs,
+  existsSync: (p: string) => {
+    if (typeof p === 'string' && p.endsWith('TOOLS.json')) {
+      const parts = p.split('/');
+      const skillId = parts[parts.length - 2];
+      return skillId in mockManifests;
+    }
+    return realFs.existsSync(p);
+  },
+}));
+mock.module('../skills/version-hash.js', () => ({
+  computeSkillVersionHash: (skillDir: string) => {
+    const parts = skillDir.split('/');
+    const skillId = parts[parts.length - 1];
+    return `v1:default-hash-${skillId}`;
+  },
+}));
+mock.module('../util/logger.js', () => ({
+  getLogger: () => ({
+    info: () => {},
+    warn: () => {},
+    debug: () => {},
+    error: () => {},
+  }),
+}));
+// ---------------------------------------------------------------------------
+// Import module under test (after mocks)
+// ---------------------------------------------------------------------------
+const { projectSkillTools, resetSkillToolProjection } = await import(
+  '../daemon/session-skill-tools.js'
+);
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+function makeSkill(id: string): SkillSummary {
+  return {
+    id,
+    name: id,
+    description: `Skill ${id}`,
+    directoryPath: `/skills/${id}`,
+    skillFilePath: `/skills/${id}/SKILL.md`,
+    userInvocable: true,
+    disableModelInvocation: false,
+    source: 'managed',
+  };
+}
+function makeManifest(toolNames: string[]): SkillToolManifest {
+  return {
+    version: 1,
+    tools: toolNames.map((name) => ({
+      name,
+      description: `Tool ${name}`,
+      category: 'test',
+      risk: 'medium' as const,
+      input_schema: { type: 'object', properties: {} },
+      executor: 'run.ts',
+      execution_target: 'host' as const,
+    })),
+  };
+}
+/** Build conversation history with a loaded_skill marker. */
+function buildHistoryWithMarker(skillId: string): Message[] {
+  return [
+    {
+      role: 'assistant',
+      content: [{ type: 'tool_use', id: 'tu-1', name: 'skill_load', input: { skill: skillId } }],
+    },
+    {
+      role: 'user',
+      content: [{
+        type: 'tool_result',
+        tool_use_id: 'tu-1',
+        content: `Loaded.\n\n<loaded_skill id="${skillId}" version="v1:default-hash-${skillId}" />`,
+      }],
+    },
+  ];
+}
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+describe('projectSkillTools feature flag enforcement', () => {
+  beforeEach(() => {
+    mockCatalog = [];
+    mockManifests = {};
+    mockRegisteredTools = new Map();
+    mockUnregisteredSkillIds = [];
+    mockSkillRefCount = new Map();
+    currentConfig = { featureFlags: {} };
+    resetSkillToolProjection();
+  });
+  test('no skill tools projected for flag OFF skill even with old markers', () => {
+    mockCatalog = [makeSkill(DECLARED_SKILL_ID)];
+    mockManifests = { [DECLARED_SKILL_ID]: makeManifest(['browser_navigate', 'browser_click']) };
+    // History contains a marker from before the flag was turned off
+    const history = buildHistoryWithMarker(DECLARED_SKILL_ID);
+    const prevActive = new Map<string, string>();
+    // Feature flag is OFF
+    currentConfig = { featureFlags: { [DECLARED_LEGACY_KEY]: false } };
+    const result = projectSkillTools(history, { previouslyActiveSkillIds: prevActive });
+    // No tools should be projected
+    expect(result.toolDefinitions).toHaveLength(0);
+    expect(result.allowedToolNames.size).toBe(0);
+  });
+  test('skill tools projected normally when flag is ON', () => {
+    mockCatalog = [makeSkill(DECLARED_SKILL_ID)];
+    mockManifests = { [DECLARED_SKILL_ID]: makeManifest(['browser_navigate', 'browser_click']) };
+    const history = buildHistoryWithMarker(DECLARED_SKILL_ID);
+    const prevActive = new Map<string, string>();
+    // Feature flag is ON
+    currentConfig = { featureFlags: { [DECLARED_LEGACY_KEY]: true } };
+    const result = projectSkillTools(history, { previouslyActiveSkillIds: prevActive });
+    expect(result.toolDefinitions).toHaveLength(2);
+    expect(result.allowedToolNames.has('browser_navigate')).toBe(true);
+    expect(result.allowedToolNames.has('browser_click')).toBe(true);
+  });
+  test('skill tools projected normally when flag key is absent (defaults to enabled)', () => {
+    mockCatalog = [makeSkill(DECLARED_SKILL_ID)];
+    mockManifests = { [DECLARED_SKILL_ID]: makeManifest(['browser_navigate']) };
+    const history = buildHistoryWithMarker(DECLARED_SKILL_ID);
+    const prevActive = new Map<string, string>();
+    // featureFlags is empty — should default to enabled
+    currentConfig = { featureFlags: {} };
+    const result = projectSkillTools(history, { previouslyActiveSkillIds: prevActive });
+    expect(result.toolDefinitions).toHaveLength(1);
+    expect(result.allowedToolNames.has('browser_navigate')).toBe(true);
+  });
+  test('mixed flag-on and flag-off skills — only flag-on tools projected', () => {
+    mockCatalog = [makeSkill(DECLARED_SKILL_ID), makeSkill('twitter')];
+    mockManifests = {
+      [DECLARED_SKILL_ID]: makeManifest(['browser_navigate']),
+      twitter: makeManifest(['twitter_post']),
+    };
+    const history: Message[] = [
+      {
+        role: 'assistant',
+        content: [
+          { type: 'tool_use', id: 'tu-1', name: 'skill_load', input: { skill: DECLARED_SKILL_ID } },
+        ],
+      },
+      {
+        role: 'user',
+        content: [{
+          type: 'tool_result',
+          tool_use_id: 'tu-1',
+          content: `<loaded_skill id="${DECLARED_SKILL_ID}" version="v1:default-hash-${DECLARED_SKILL_ID}" />`,
+        }],
+      },
+      {
+        role: 'assistant',
+        content: [
+          { type: 'tool_use', id: 'tu-2', name: 'skill_load', input: { skill: 'twitter' } },
+        ],
+      },
+      {
+        role: 'user',
+        content: [{
+          type: 'tool_result',
+          tool_use_id: 'tu-2',
+          content: '<loaded_skill id="twitter" version="v1:default-hash-twitter" />',
+        }],
+      },
+    ];
+    const prevActive = new Map<string, string>();
+    // Declared skill is OFF, twitter is undeclared with no persisted override so remains ON.
+    currentConfig = {
+      featureFlags: { [DECLARED_LEGACY_KEY]: false },
+    };
+    const result = projectSkillTools(history, { previouslyActiveSkillIds: prevActive });
+    const toolNames = result.toolDefinitions.map((t) => t.name);
+    expect(toolNames).toContain('twitter_post');
+    expect(toolNames).not.toContain('browser_navigate');
+  });
+});

package/src/__tests__/system-prompt.test.ts CHANGED Viewed

@@ -49,7 +49,7 @@ mock.module('../util/logger.js', () => ({
 mock.module('../config/loader.js', () => ({
   getConfig: () => ({
-    sandbox: { enabled: true, backend: 'docker' },
+    sandbox: { enabled: true },
   }),
 }));

package/src/__tests__/terminal-sandbox.test.ts CHANGED Viewed

@@ -45,14 +45,12 @@ mock.module('node:fs', () => ({
 const { wrapCommand } = await import('../tools/terminal/sandbox.js');
 const { ToolError } = await import('../util/errors.js');
-const defaultDocker = { image: 'vellum-sandbox:latest', shell: 'bash', cpus: 1, memoryMb: 512, pidsLimit: 256, network: 'none' as const };
 function disabledConfig(): SandboxConfig {
-  return { enabled: false, backend: 'native', docker: defaultDocker };
+  return { enabled: false };
 }
 function nativeConfig(): SandboxConfig {
-  return { enabled: true, backend: 'native', docker: defaultDocker };
+  return { enabled: true };
 }
 describe('terminal sandbox — disabled behavior', () => {
@@ -101,14 +99,20 @@ describe('terminal sandbox — enabled fail-closed behavior', () => {
   });
   test('returns bwrap wrapper when bwrap is available on linux', () => {
+    // GIVEN bwrap is available on a linux platform
     execSyncMock.mockImplementation(() => undefined);
+    // WHEN wrapping a command with the native sandbox config
     const result = wrapCommand('echo hello', '/home/user/project', nativeConfig());
+    // THEN the result uses bwrap with network isolation
     expect(result.command).toBe('bwrap');
     expect(result.sandboxed).toBe(true);
     expect(result.args).toContain('--ro-bind');
     expect(result.args).toContain('--unshare-net');
     expect(result.args).toContain('--unshare-pid');
-    // The user command runs via bash inside the sandbox
+    // AND the user command runs via bash inside the sandbox
     const bashIdx = result.args.indexOf('bash');
     expect(bashIdx).toBeGreaterThan(0);
     expect(result.args.slice(bashIdx)).toEqual(['bash', '-c', '--', 'echo hello']);
@@ -155,13 +159,21 @@ describe('terminal sandbox — macOS sandbox-exec behavior', () => {
   });
   test('returns sandbox-exec wrapper on macOS when enabled', () => {
+    // GIVEN the platform is macOS
+    // (set in beforeEach)
+    // WHEN wrapping a command with the native sandbox config
     const result = wrapCommand('echo hello', '/tmp/project', nativeConfig());
+    // THEN the result uses sandbox-exec
     expect(result.command).toBe('sandbox-exec');
     expect(result.sandboxed).toBe(true);
     expect(result.args[0]).toBe('-f');
-    // Profile path is the second arg
+    // AND the profile path is the second arg
     expect(result.args[1]).toContain('sandbox-profile-');
-    // bash -c -- command follows the profile
+    // AND bash -c -- command follows the profile
     expect(result.args.slice(2)).toEqual(['bash', '-c', '--', 'echo hello']);
   });
@@ -195,10 +207,131 @@ describe('terminal sandbox — backend selection', () => {
     expect(result.sandboxed).toBe(true);
   });
-  test('disabled config ignores backend setting', () => {
-    const config: SandboxConfig = { enabled: false, backend: 'docker', docker: defaultDocker };
+  test('disabled config returns unsandboxed wrapper', () => {
+    const config: SandboxConfig = { enabled: false };
     const result = wrapCommand('echo hello', '/tmp/project', config);
     expect(result.command).toBe('bash');
     expect(result.sandboxed).toBe(false);
   });
 });
+describe('terminal sandbox — proxied network mode on Linux', () => {
+  beforeEach(() => {
+    platform = 'linux';
+    execSyncMock.mockImplementation(() => undefined);
+  });
+  test('omits --unshare-net when networkMode is proxied', () => {
+    /**
+     * Tests that bwrap args omit --unshare-net in proxied mode so the process
+     * can reach the local credential proxy on 127.0.0.1.
+     */
+    // GIVEN bwrap is available on linux
+    // (set in beforeEach)
+    // WHEN wrapping a command with proxied network mode
+    const result = wrapCommand('curl https://example.com', '/home/user/project', nativeConfig(), { networkMode: 'proxied' });
+    // THEN the result uses bwrap
+    expect(result.command).toBe('bwrap');
+    expect(result.sandboxed).toBe(true);
+    // AND --unshare-net is NOT present (network is allowed)
+    expect(result.args).not.toContain('--unshare-net');
+    // AND --unshare-pid is still present (PID isolation remains)
+    expect(result.args).toContain('--unshare-pid');
+  });
+  test('includes --unshare-net when networkMode is off', () => {
+    /**
+     * Tests that bwrap args include --unshare-net when network is off (default).
+     */
+    // GIVEN bwrap is available on linux
+    // (set in beforeEach)
+    // WHEN wrapping a command with network mode off
+    const result = wrapCommand('echo hello', '/home/user/project', nativeConfig(), { networkMode: 'off' });
+    // THEN --unshare-net is present (network is blocked)
+    expect(result.args).toContain('--unshare-net');
+  });
+  test('includes --unshare-net when no options are provided', () => {
+    /**
+     * Tests that the default behavior (no options) blocks network access.
+     */
+    // GIVEN bwrap is available on linux
+    // (set in beforeEach)
+    // WHEN wrapping a command without any options
+    const result = wrapCommand('echo hello', '/home/user/project', nativeConfig());
+    // THEN --unshare-net is present (network is blocked by default)
+    expect(result.args).toContain('--unshare-net');
+  });
+});
+describe('terminal sandbox — proxied network mode on macOS', () => {
+  beforeEach(() => {
+    platform = 'darwin';
+    writeFileSyncMock.mockClear();
+    existsSyncMock.mockImplementation(() => true);
+  });
+  test('writes SBPL profile with allow network when networkMode is proxied', () => {
+    /**
+     * Tests that the macOS sandbox profile allows network access in proxied mode
+     * so the process can reach the local credential proxy.
+     */
+    // GIVEN the platform is macOS
+    // (set in beforeEach)
+    // WHEN wrapping a command with proxied network mode
+    wrapCommand('curl https://example.com', '/tmp/project', nativeConfig(), { networkMode: 'proxied' });
+    // THEN the written profile contains (allow network*) instead of (deny network*)
+    const profileContent = writeFileSyncMock.mock.calls[0]?.[1] as string;
+    expect(profileContent).toContain('(allow network*)');
+    expect(profileContent).not.toContain('(deny network*)');
+  });
+  test('writes SBPL profile with deny network when networkMode is off', () => {
+    /**
+     * Tests that the macOS sandbox profile blocks network access when network
+     * mode is off (the default behavior).
+     */
+    // GIVEN the platform is macOS
+    // (set in beforeEach)
+    // WHEN wrapping a command with network mode off
+    wrapCommand('echo hello', '/tmp/project', nativeConfig(), { networkMode: 'off' });
+    // THEN the written profile contains (deny network*)
+    const profileContent = writeFileSyncMock.mock.calls[0]?.[1] as string;
+    expect(profileContent).toContain('(deny network*)');
+    expect(profileContent).not.toContain('(allow network*)');
+  });
+  test('writes SBPL profile with deny network when no options are provided', () => {
+    /**
+     * Tests that the default behavior (no options) blocks network access on macOS.
+     */
+    // GIVEN the platform is macOS
+    // (set in beforeEach)
+    // WHEN wrapping a command without any options
+    wrapCommand('echo hello', '/tmp/project', nativeConfig());
+    // THEN the written profile contains (deny network*)
+    const profileContent = writeFileSyncMock.mock.calls[0]?.[1] as string;
+    expect(profileContent).toContain('(deny network*)');
+    expect(profileContent).not.toContain('(allow network*)');
+  });
+});

package/src/__tests__/terminal-tools.test.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { mkdirSync, mkdtempSync, rmSync, symlinkSync } from 'node:fs';
+import { mkdtempSync } from 'node:fs';
 import { tmpdir } from 'node:os';
 import { join } from 'node:path';
@@ -467,15 +467,6 @@ describe('buildSanitizedEnv', () => {
 describe('wrapCommand', () => {
   const disabledConfig: SandboxConfig = {
     enabled: false,
-    backend: 'native',
-    docker: {
-      image: 'vellum-sandbox:latest',
-      shell: 'bash',
-      cpus: 1,
-      memoryMb: 512,
-      pidsLimit: 256,
-      network: 'none',
-    },
   };
   test('disabled sandbox returns plain bash invocation', () => {
@@ -546,89 +537,7 @@ describe('Native sandbox backend', () => {
 });
 // ═══════════════════════════════════════════════════════════════════════════
-//  5. Docker sandbox backend
-// ═══════════════════════════════════════════════════════════════════════════
-describe('Docker sandbox backend', () => {
-  let DockerBackend: new (sandboxRoot: string, config?: Record<string, unknown>, uid?: number, gid?: number) => SandboxBackend;
-  let _resetDockerChecks: () => void;
-  const sandboxDir = join(testTmpDir, 'docker-sandbox');
-  beforeEach(async () => {
-    mkdirSync(sandboxDir, { recursive: true });
-    const mod = await import('../tools/terminal/backends/docker.js');
-    DockerBackend = mod.DockerBackend;
-    _resetDockerChecks = mod._resetDockerChecks;
-    _resetDockerChecks();
-  });
-  afterEach(() => {
-    try { rmSync(sandboxDir, { recursive: true, force: true }); } catch {}
-  });
-  test('constructor resolves symlinks in sandbox root', () => {
-    const realDir = join(testTmpDir, 'docker-real');
-    const linkDir = join(testTmpDir, 'docker-link');
-    mkdirSync(realDir, { recursive: true });
-    try {
-      symlinkSync(realDir, linkDir);
-      // Construct backend with the symlink — it should resolve to the real path.
-      const backend = new DockerBackend(linkDir, undefined, 1000, 1000);
-      // We can't inspect private fields directly, but wrapping will fail at
-      // preflight checks (Docker not available) — this tests that constructor
-      // does not throw on a valid symlinked path.
-      expect(backend).toBeDefined();
-    } finally {
-      try { rmSync(linkDir); } catch {}
-      try { rmSync(realDir, { recursive: true, force: true }); } catch {}
-    }
-  });
-  test('constructor rejects sandbox root with null bytes', () => {
-    // realpathSync throws TypeError before validatePathSafety can run
-    expect(() => new DockerBackend('/tmp/foo\0bar', undefined, 1000, 1000)).toThrow();
-  });
-  test('constructor rejects sandbox root with newlines', () => {
-    // Create a real directory with a newline in its name so realpathSync
-    // succeeds and the rejection comes from validatePathSafety, not ENOENT.
-    const nlDir = join(testTmpDir, 'has\nnewline');
-    mkdirSync(nlDir, { recursive: true });
-    try {
-      expect(() => new DockerBackend(nlDir, undefined, 1000, 1000)).toThrow(ToolError);
-    } finally {
-      try { rmSync(nlDir, { recursive: true, force: true }); } catch {}
-    }
-  });
-  test('constructor rejects sandbox root with carriage returns', () => {
-    // Create a real directory with a carriage return in its name so
-    // realpathSync succeeds and validatePathSafety is what rejects it.
-    const crDir = join(testTmpDir, 'has\rreturn');
-    mkdirSync(crDir, { recursive: true });
-    try {
-      expect(() => new DockerBackend(crDir, undefined, 1000, 1000)).toThrow(ToolError);
-    } finally {
-      try { rmSync(crDir, { recursive: true, force: true }); } catch {}
-    }
-  });
-  test('validates path safety after resolving symlinks', () => {
-    // Create a directory with a comma in the name to test validatePathSafety.
-    // On most filesystems this is allowed, so validatePathSafety should catch it.
-    const commaDir = join(testTmpDir, 'has,comma');
-    mkdirSync(commaDir, { recursive: true });
-    try {
-      expect(() => new DockerBackend(commaDir, undefined, 1000, 1000)).toThrow(ToolError);
-    } finally {
-      try { rmSync(commaDir, { recursive: true, force: true }); } catch {}
-    }
-  });
-});
-// ═══════════════════════════════════════════════════════════════════════════
-//  6. Shell tool — input validation
+//  5. Shell tool — input validation
 // ═══════════════════════════════════════════════════════════════════════════
 describe('Shell tool input validation', () => {