npm - vellum - Versions diffs - 0.2.13 → 0.2.14 - Mend

vellum 0.2.13 → 0.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (207) hide show

package/README.md +32 -0
package/bun.lock +2 -2
package/docs/skills.md +4 -4
package/package.json +2 -2
package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +213 -3
package/src/__tests__/app-git-history.test.ts +176 -0
package/src/__tests__/app-git-service.test.ts +169 -0
package/src/__tests__/assistant-events-sse-hardening.test.ts +315 -0
package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +8 -8
package/src/__tests__/browser-skill-endstate.test.ts +6 -6
package/src/__tests__/call-bridge.test.ts +105 -13
package/src/__tests__/call-domain.test.ts +163 -0
package/src/__tests__/call-orchestrator.test.ts +113 -0
package/src/__tests__/call-routes-http.test.ts +246 -6
package/src/__tests__/channel-approval-routes.test.ts +438 -0
package/src/__tests__/channel-approval.test.ts +266 -0
package/src/__tests__/channel-approvals.test.ts +393 -0
package/src/__tests__/channel-delivery-store.test.ts +447 -0
package/src/__tests__/checker.test.ts +607 -1048
package/src/__tests__/cli.test.ts +1 -56
package/src/__tests__/config-schema.test.ts +137 -18
package/src/__tests__/conflict-intent-tokenization.test.ts +141 -0
package/src/__tests__/conflict-policy.test.ts +121 -0
package/src/__tests__/conflict-store.test.ts +2 -0
package/src/__tests__/contacts-tools.test.ts +3 -3
package/src/__tests__/contradiction-checker.test.ts +99 -1
package/src/__tests__/credential-security-invariants.test.ts +22 -6
package/src/__tests__/credential-vault-unit.test.ts +780 -0
package/src/__tests__/elevenlabs-client.test.ts +62 -0
package/src/__tests__/ephemeral-permissions.test.ts +73 -23
package/src/__tests__/filesystem-tools.test.ts +579 -0
package/src/__tests__/gateway-only-enforcement.test.ts +114 -4
package/src/__tests__/handlers-add-trust-rule-metadata.test.ts +202 -0
package/src/__tests__/handlers-cu-observation-blob.test.ts +2 -1
package/src/__tests__/handlers-ipc-blob-probe.test.ts +2 -1
package/src/__tests__/handlers-slack-config.test.ts +2 -1
package/src/__tests__/handlers-telegram-config.test.ts +855 -0
package/src/__tests__/handlers-twitter-config.test.ts +141 -1
package/src/__tests__/hooks-runner.test.ts +6 -2
package/src/__tests__/host-file-edit-tool.test.ts +124 -0
package/src/__tests__/host-file-read-tool.test.ts +62 -0
package/src/__tests__/host-file-write-tool.test.ts +59 -0
package/src/__tests__/host-shell-tool.test.ts +251 -0
package/src/__tests__/ingress-reconcile.test.ts +581 -0
package/src/__tests__/ipc-snapshot.test.ts +100 -41
package/src/__tests__/ipc-validate.test.ts +50 -0
package/src/__tests__/key-migration.test.ts +23 -0
package/src/__tests__/memory-regressions.test.ts +99 -0
package/src/__tests__/memory-retrieval.benchmark.test.ts +1 -1
package/src/__tests__/oauth-callback-registry.test.ts +11 -4
package/src/__tests__/playbook-execution.test.ts +502 -0
package/src/__tests__/playbook-tools.test.ts +4 -6
package/src/__tests__/public-ingress-urls.test.ts +34 -0
package/src/__tests__/qdrant-manager.test.ts +267 -0
package/src/__tests__/recurrence-engine-rruleset.test.ts +97 -0
package/src/__tests__/recurrence-engine.test.ts +9 -0
package/src/__tests__/recurrence-types.test.ts +8 -0
package/src/__tests__/registry.test.ts +1 -1
package/src/__tests__/runtime-runs.test.ts +1 -25
package/src/__tests__/schedule-store.test.ts +16 -14
package/src/__tests__/schedule-tools.test.ts +83 -0
package/src/__tests__/scheduler-recurrence.test.ts +111 -10
package/src/__tests__/secret-allowlist.test.ts +18 -17
package/src/__tests__/secret-ingress-handler.test.ts +11 -0
package/src/__tests__/secret-scanner.test.ts +43 -0
package/src/__tests__/session-conflict-gate.test.ts +442 -6
package/src/__tests__/session-init.benchmark.test.ts +3 -0
package/src/__tests__/session-process-bridge.test.ts +242 -0
package/src/__tests__/session-skill-tools.test.ts +1 -1
package/src/__tests__/shell-identity.test.ts +256 -0
package/src/__tests__/skill-projection.benchmark.test.ts +11 -1
package/src/__tests__/subagent-tools.test.ts +637 -54
package/src/__tests__/task-management-tools.test.ts +936 -0
package/src/__tests__/task-runner.test.ts +2 -2
package/src/__tests__/terminal-tools.test.ts +840 -0
package/src/__tests__/tool-executor-shell-integration.test.ts +301 -0
package/src/__tests__/tool-executor.test.ts +85 -151
package/src/__tests__/tool-permission-simulate-handler.test.ts +336 -0
package/src/__tests__/trust-store.test.ts +27 -453
package/src/__tests__/twilio-provider.test.ts +153 -3
package/src/__tests__/twilio-routes-elevenlabs.test.ts +375 -0
package/src/__tests__/twilio-routes-twiml.test.ts +4 -4
package/src/__tests__/twilio-routes.test.ts +17 -262
package/src/__tests__/twitter-auth-handler.test.ts +2 -1
package/src/__tests__/twitter-cli-error-shaping.test.ts +208 -0
package/src/__tests__/twitter-cli-routing.test.ts +252 -0
package/src/__tests__/twitter-oauth-client.test.ts +209 -0
package/src/__tests__/workspace-policy.test.ts +213 -0
package/src/calls/call-bridge.ts +92 -19
package/src/calls/call-domain.ts +157 -5
package/src/calls/call-orchestrator.ts +93 -7
package/src/calls/call-store.ts +6 -0
package/src/calls/elevenlabs-client.ts +8 -0
package/src/calls/elevenlabs-config.ts +7 -5
package/src/calls/twilio-provider.ts +91 -0
package/src/calls/twilio-routes.ts +32 -37
package/src/calls/types.ts +3 -1
package/src/calls/voice-quality.ts +29 -7
package/src/cli/twitter.ts +200 -21
package/src/cli.ts +1 -20
package/src/config/bundled-skills/contacts/tools/contact-merge.ts +52 -4
package/src/config/bundled-skills/contacts/tools/contact-search.ts +55 -4
package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +61 -4
package/src/config/bundled-skills/messaging/SKILL.md +17 -2
package/src/config/bundled-skills/messaging/tools/messaging-reply.ts +4 -1
package/src/config/bundled-skills/messaging/tools/messaging-send.ts +5 -1
package/src/config/bundled-skills/messaging/tools/shared.ts +5 -0
package/src/config/bundled-skills/phone-calls/SKILL.md +142 -34
package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +95 -6
package/src/config/bundled-skills/playbooks/tools/playbook-delete.ts +51 -6
package/src/config/bundled-skills/playbooks/tools/playbook-list.ts +73 -6
package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +110 -6
package/src/config/bundled-skills/public-ingress/SKILL.md +22 -5
package/src/config/bundled-skills/twitter/SKILL.md +103 -17
package/src/config/defaults.ts +10 -4
package/src/config/schema.ts +80 -21
package/src/config/types.ts +1 -0
package/src/config/vellum-skills/telegram-setup/SKILL.md +56 -61
package/src/daemon/assistant-attachments.ts +4 -2
package/src/daemon/handlers/apps.ts +69 -0
package/src/daemon/handlers/config.ts +543 -24
package/src/daemon/handlers/index.ts +1 -0
package/src/daemon/handlers/sessions.ts +22 -6
package/src/daemon/handlers/shared.ts +2 -1
package/src/daemon/handlers/skills.ts +5 -20
package/src/daemon/ipc-contract-inventory.json +28 -0
package/src/daemon/ipc-contract.ts +168 -10
package/src/daemon/ipc-validate.ts +17 -0
package/src/daemon/lifecycle.ts +2 -0
package/src/daemon/server.ts +78 -72
package/src/daemon/session-attachments.ts +1 -1
package/src/daemon/session-conflict-gate.ts +62 -6
package/src/daemon/session-notifiers.ts +1 -1
package/src/daemon/session-process.ts +62 -3
package/src/daemon/session-tool-setup.ts +1 -2
package/src/daemon/tls-certs.ts +189 -0
package/src/daemon/video-thumbnail.ts +5 -3
package/src/hooks/manager.ts +5 -9
package/src/memory/app-git-service.ts +295 -0
package/src/memory/app-store.ts +21 -0
package/src/memory/conflict-intent.ts +47 -4
package/src/memory/conflict-policy.ts +73 -0
package/src/memory/conflict-store.ts +9 -1
package/src/memory/contradiction-checker.ts +28 -0
package/src/memory/conversation-key-store.ts +15 -0
package/src/memory/db.ts +81 -0
package/src/memory/embedding-local.ts +3 -13
package/src/memory/external-conversation-store.ts +234 -0
package/src/memory/job-handlers/conflict.ts +22 -2
package/src/memory/jobs-worker.ts +67 -28
package/src/memory/runs-store.ts +54 -7
package/src/memory/schema.ts +20 -0
package/src/messaging/provider.ts +9 -0
package/src/messaging/providers/telegram-bot/adapter.ts +162 -0
package/src/messaging/providers/telegram-bot/client.ts +104 -0
package/src/messaging/providers/telegram-bot/types.ts +15 -0
package/src/messaging/registry.ts +1 -0
package/src/permissions/checker.ts +48 -44
package/src/permissions/prompter.ts +0 -4
package/src/permissions/shell-identity.ts +227 -0
package/src/permissions/trust-store.ts +76 -53
package/src/permissions/types.ts +0 -19
package/src/permissions/workspace-policy.ts +114 -0
package/src/providers/retry.ts +12 -37
package/src/runtime/assistant-event-hub.ts +41 -4
package/src/runtime/channel-approval-parser.ts +60 -0
package/src/runtime/channel-approval-types.ts +71 -0
package/src/runtime/channel-approvals.ts +145 -0
package/src/runtime/gateway-client.ts +16 -0
package/src/runtime/http-server.ts +29 -9
package/src/runtime/routes/call-routes.ts +52 -2
package/src/runtime/routes/channel-routes.ts +296 -16
package/src/runtime/routes/events-routes.ts +97 -28
package/src/runtime/routes/run-routes.ts +2 -7
package/src/runtime/run-orchestrator.ts +0 -3
package/src/schedule/recurrence-engine.ts +26 -2
package/src/schedule/recurrence-types.ts +1 -1
package/src/schedule/schedule-store.ts +12 -3
package/src/security/secret-scanner.ts +7 -0
package/src/tasks/ephemeral-permissions.ts +0 -2
package/src/tasks/task-scheduler.ts +2 -1
package/src/tools/calls/call-start.ts +8 -0
package/src/tools/execution-target.ts +21 -0
package/src/tools/execution-timeout.ts +49 -0
package/src/tools/executor.ts +6 -135
package/src/tools/network/web-search.ts +9 -32
package/src/tools/policy-context.ts +29 -0
package/src/tools/schedule/update.ts +8 -1
package/src/tools/terminal/parser.ts +16 -18
package/src/tools/types.ts +4 -11
package/src/twitter/oauth-client.ts +102 -0
package/src/twitter/router.ts +101 -0
package/src/util/debounce.ts +88 -0
package/src/util/network-info.ts +47 -0
package/src/util/platform.ts +29 -4
package/src/util/promise-guard.ts +37 -0
package/src/util/retry.ts +98 -0
package/src/util/truncate.ts +1 -1
package/src/workspace/git-service.ts +129 -112
package/src/tools/contacts/contact-merge.ts +0 -55
package/src/tools/contacts/contact-search.ts +0 -58
package/src/tools/contacts/contact-upsert.ts +0 -64
package/src/tools/playbooks/index.ts +0 -4
package/src/tools/playbooks/playbook-create.ts +0 -96
package/src/tools/playbooks/playbook-delete.ts +0 -52
package/src/tools/playbooks/playbook-list.ts +0 -74
package/src/tools/playbooks/playbook-update.ts +0 -111

package/src/__tests__/terminal-tools.test.ts ADDED Viewed

@@ -0,0 +1,840 @@
+/* eslint-disable @typescript-eslint/no-explicit-any */
+import { describe, test, expect, beforeEach, afterEach, mock } from 'bun:test';
+import { mkdtempSync, mkdirSync, rmSync, symlinkSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+// ── Mock modules ────────────────────────────────────────────────────────────
+mock.module('../util/logger.js', () => ({
+  getLogger: () => new Proxy({} as Record<string, unknown>, {
+    get: (_target: Record<string, unknown>, _prop: string) => () => {},
+  }),
+}));
+const testTmpDir = mkdtempSync(join(tmpdir(), 'terminal-test-'));
+mock.module('../util/platform.js', () => ({
+  getRootDir: () => testTmpDir,
+  getDataDir: () => join(testTmpDir, 'data'),
+  getSandboxWorkingDir: () => join(testTmpDir, 'sandbox'),
+  isMacOS: () => process.platform === 'darwin',
+  isLinux: () => process.platform === 'linux',
+  isWindows: () => process.platform === 'win32',
+  getSocketPath: () => join(testTmpDir, 'test.sock'),
+  getPidPath: () => join(testTmpDir, 'test.pid'),
+  getDbPath: () => join(testTmpDir, 'test.db'),
+  getLogPath: () => join(testTmpDir, 'test.log'),
+  ensureDataDir: () => {},
+}));
+mock.module('../config/loader.js', () => ({
+  getConfig: () => ({
+    timeouts: { shellDefaultTimeoutSec: 120, shellMaxTimeoutSec: 600 },
+    sandbox: {
+      enabled: false,
+      backend: 'native',
+      docker: {
+        image: 'vellum-sandbox:latest',
+        shell: 'bash',
+        cpus: 1,
+        memoryMb: 512,
+        pidsLimit: 256,
+        network: 'none',
+      },
+    },
+  }),
+  loadConfig: () => ({}),
+}));
+const proxyGetOrStartSession = mock(() => Promise.resolve({
+  session: { id: 'mock-session' },
+}));
+const proxyGetSessionEnv = mock(() => ({
+  HTTP_PROXY: 'http://localhost:9999',
+  HTTPS_PROXY: 'http://localhost:9999',
+}));
+mock.module('../tools/network/script-proxy/index.js', () => ({
+  getOrStartSession: proxyGetOrStartSession,
+  getSessionEnv: proxyGetSessionEnv,
+  createSession: () => {},
+  startSession: () => {},
+  stopSession: () => {},
+  getActiveSession: () => null,
+  getSessionsForConversation: () => [],
+  stopAllSessions: () => {},
+  ensureLocalCA: () => {},
+  ensureCombinedCABundle: () => {},
+  issueLeafCert: () => {},
+  getCAPath: () => '',
+  getCombinedCAPath: () => '',
+}));
+// ── Imports (after mocks) ───────────────────────────────────────────────────
+import { parse } from '../tools/terminal/parser.js';
+import { buildSanitizedEnv } from '../tools/terminal/safe-env.js';
+import { wrapCommand } from '../tools/terminal/sandbox.js';
+import type { SandboxConfig } from '../config/schema.js';
+import { ToolError } from '../util/errors.js';
+// ═══════════════════════════════════════════════════════════════════════════
+//  1. Shell Parser — parse()
+// ═══════════════════════════════════════════════════════════════════════════
+describe('Shell parser', () => {
+  // ── Basic segment extraction ──────────────────────────────────────────
+  describe('segment extraction', () => {
+    test('simple command', async () => {
+      const result = await parse('ls -la');
+      expect(result.segments.length).toBe(1);
+      expect(result.segments[0].program).toBe('ls');
+      expect(result.segments[0].args).toContain('-la');
+      expect(result.segments[0].operator).toBe('');
+    });
+    test('command with multiple arguments', async () => {
+      const result = await parse('git commit -m "initial commit"');
+      expect(result.segments.length).toBe(1);
+      expect(result.segments[0].program).toBe('git');
+      expect(result.segments[0].args).toContain('commit');
+      expect(result.segments[0].args).toContain('-m');
+    });
+    test('compound command with &&', async () => {
+      const result = await parse('mkdir foo && cd foo');
+      expect(result.segments.length).toBe(2);
+      expect(result.segments[0].program).toBe('mkdir');
+      expect(result.segments[0].operator).toBe('');
+      expect(result.segments[1].program).toBe('cd');
+      expect(result.segments[1].operator).toBe('&&');
+    });
+    test('compound command with ||', async () => {
+      const result = await parse('test -f foo || echo missing');
+      expect(result.segments.length).toBe(2);
+      expect(result.segments[1].operator).toBe('||');
+      expect(result.segments[1].program).toBe('echo');
+    });
+    test('compound command with semicolons', async () => {
+      const result = await parse('echo a; echo b; echo c');
+      expect(result.segments.length).toBe(3);
+      // tree-sitter parses semicolons as list separators; the parser resets
+      // operator to '' after each child, so the second/third segments may
+      // carry ';' or '' depending on the tree-sitter-bash grammar version.
+      // The key invariant is that we get 3 segments.
+      const programs = result.segments.map(s => s.program);
+      expect(programs).toEqual(['echo', 'echo', 'echo']);
+    });
+    test('pipeline', async () => {
+      const result = await parse('cat file.txt | grep pattern | wc -l');
+      expect(result.segments.length).toBe(3);
+      expect(result.segments[0].program).toBe('cat');
+      expect(result.segments[0].operator).toBe('');
+      expect(result.segments[1].program).toBe('grep');
+      expect(result.segments[1].operator).toBe('|');
+      expect(result.segments[2].program).toBe('wc');
+      expect(result.segments[2].operator).toBe('|');
+    });
+    test('pipeline combined with &&', async () => {
+      const result = await parse('ls | wc -l && echo done');
+      expect(result.segments.length).toBe(3);
+      expect(result.segments[0].operator).toBe('');
+      expect(result.segments[1].operator).toBe('|');
+      expect(result.segments[2].operator).toBe('&&');
+    });
+    test('redirected statement extracts command from inside', async () => {
+      const result = await parse('echo hello > output.txt');
+      expect(result.segments.length).toBe(1);
+      expect(result.segments[0].program).toBe('echo');
+    });
+    test('subshell extracts inner commands', async () => {
+      const result = await parse('(echo hello && echo world)');
+      expect(result.segments.length).toBe(2);
+      expect(result.segments[0].program).toBe('echo');
+      expect(result.segments[1].program).toBe('echo');
+    });
+    test('empty command produces no segments', async () => {
+      const result = await parse('');
+      expect(result.segments.length).toBe(0);
+    });
+    test('for loop extracts body commands', async () => {
+      const result = await parse('for i in a b c; do echo $i; done');
+      expect(result.segments.length).toBeGreaterThanOrEqual(1);
+      const programs = result.segments.map(s => s.program);
+      expect(programs).toContain('echo');
+    });
+    test('if statement extracts body commands', async () => {
+      const result = await parse('if true; then echo yes; fi');
+      expect(result.segments.length).toBeGreaterThanOrEqual(1);
+      const programs = result.segments.map(s => s.program);
+      expect(programs).toContain('echo');
+    });
+    test('command with string arguments', async () => {
+      const result = await parse("echo 'single quoted' \"double quoted\"");
+      expect(result.segments.length).toBe(1);
+      expect(result.segments[0].program).toBe('echo');
+    });
+  });
+  // ── Dangerous pattern detection ───────────────────────────────────────
+  describe('dangerous patterns', () => {
+    test('pipe to bash detected', async () => {
+      const result = await parse('curl http://example.com | bash');
+      expect(result.dangerousPatterns.length).toBeGreaterThanOrEqual(1);
+      const types = result.dangerousPatterns.map(p => p.type);
+      expect(types).toContain('pipe_to_shell');
+    });
+    test('pipe to sh detected', async () => {
+      const result = await parse('cat script.sh | sh');
+      const types = result.dangerousPatterns.map(p => p.type);
+      expect(types).toContain('pipe_to_shell');
+    });
+    test('pipe to zsh detected', async () => {
+      const result = await parse('echo "code" | zsh');
+      const types = result.dangerousPatterns.map(p => p.type);
+      expect(types).toContain('pipe_to_shell');
+    });
+    test('pipe to eval detected', async () => {
+      const result = await parse('echo "echo hi" | eval');
+      const types = result.dangerousPatterns.map(p => p.type);
+      expect(types).toContain('pipe_to_shell');
+    });
+    test('pipe to xargs detected', async () => {
+      const result = await parse('find . -name "*.tmp" | xargs rm');
+      const types = result.dangerousPatterns.map(p => p.type);
+      expect(types).toContain('pipe_to_shell');
+    });
+    test('pipe to grep is not flagged as pipe_to_shell', async () => {
+      const result = await parse('cat file | grep pattern');
+      const pipeToShell = result.dangerousPatterns.filter(p => p.type === 'pipe_to_shell');
+      expect(pipeToShell.length).toBe(0);
+    });
+    test('base64 decode piped to bash detected', async () => {
+      const result = await parse('echo payload | base64 -d | bash');
+      const types = result.dangerousPatterns.map(p => p.type);
+      expect(types).toContain('base64_execute');
+    });
+    test('redirect to sensitive path ~/.ssh/ detected', async () => {
+      const result = await parse('echo key > ~/.ssh/authorized_keys');
+      const types = result.dangerousPatterns.map(p => p.type);
+      expect(types).toContain('sensitive_redirect');
+    });
+    test('redirect to sensitive path ~/.bashrc detected', async () => {
+      const result = await parse('echo "export FOO=bar" >> ~/.bashrc');
+      const types = result.dangerousPatterns.map(p => p.type);
+      expect(types).toContain('sensitive_redirect');
+    });
+    test('redirect to /etc/ detected', async () => {
+      const result = await parse('echo "nameserver 8.8.8.8" > /etc/resolv.conf');
+      const types = result.dangerousPatterns.map(p => p.type);
+      expect(types).toContain('sensitive_redirect');
+    });
+    test('redirect to normal path is not flagged', async () => {
+      const result = await parse('echo hello > /tmp/output.txt');
+      const sensitive = result.dangerousPatterns.filter(p => p.type === 'sensitive_redirect');
+      expect(sensitive.length).toBe(0);
+    });
+    test('command substitution as argument to rm detected', async () => {
+      const result = await parse('rm $(find . -name "*.tmp")');
+      const types = result.dangerousPatterns.map(p => p.type);
+      expect(types).toContain('dangerous_substitution');
+    });
+    test('command substitution as argument to chmod detected', async () => {
+      const result = await parse('chmod $(cat perms) file');
+      const types = result.dangerousPatterns.map(p => p.type);
+      expect(types).toContain('dangerous_substitution');
+    });
+    test('assignment to PATH detected as env_injection', async () => {
+      const result = await parse('PATH=/evil:$PATH ls');
+      const types = result.dangerousPatterns.map(p => p.type);
+      expect(types).toContain('env_injection');
+    });
+    test('assignment to LD_PRELOAD detected as env_injection', async () => {
+      const result = await parse('LD_PRELOAD=/evil/lib.so cmd');
+      const types = result.dangerousPatterns.map(p => p.type);
+      expect(types).toContain('env_injection');
+    });
+    test('assignment to NODE_OPTIONS detected as env_injection', async () => {
+      const result = await parse('NODE_OPTIONS="--require=evil" node');
+      const types = result.dangerousPatterns.map(p => p.type);
+      expect(types).toContain('env_injection');
+    });
+    test('assignment to harmless variable is not flagged', async () => {
+      const result = await parse('FOO=bar echo hello');
+      const envInjection = result.dangerousPatterns.filter(p => p.type === 'env_injection');
+      expect(envInjection.length).toBe(0);
+    });
+    test('process substitution detected', async () => {
+      const result = await parse('diff <(sort a.txt) <(sort b.txt)');
+      const types = result.dangerousPatterns.map(p => p.type);
+      expect(types).toContain('process_substitution');
+    });
+  });
+  // ── Opaque construct detection ────────────────────────────────────────
+  describe('opaque constructs', () => {
+    test('eval is opaque', async () => {
+      const result = await parse('eval "echo hello"');
+      expect(result.hasOpaqueConstructs).toBe(true);
+    });
+    test('source is opaque', async () => {
+      const result = await parse('source ~/.bashrc');
+      expect(result.hasOpaqueConstructs).toBe(true);
+    });
+    test('dot-source is opaque', async () => {
+      const result = await parse('. ~/.profile');
+      expect(result.hasOpaqueConstructs).toBe(true);
+    });
+    test('bash -c is opaque', async () => {
+      const result = await parse('bash -c "echo hello"');
+      expect(result.hasOpaqueConstructs).toBe(true);
+    });
+    test('sh -c is opaque', async () => {
+      const result = await parse('sh -c "rm -rf /"');
+      expect(result.hasOpaqueConstructs).toBe(true);
+    });
+    test('zsh -c is opaque', async () => {
+      const result = await parse('zsh -c "echo hi"');
+      expect(result.hasOpaqueConstructs).toBe(true);
+    });
+    test('bash -ec is opaque', async () => {
+      const result = await parse('bash -ec "echo careful"');
+      expect(result.hasOpaqueConstructs).toBe(true);
+    });
+    test('heredoc is opaque', async () => {
+      const result = await parse('cat <<EOF\nhello world\nEOF');
+      expect(result.hasOpaqueConstructs).toBe(true);
+    });
+    test('variable expansion as command is opaque', async () => {
+      const result = await parse('$CMD arg1 arg2');
+      expect(result.hasOpaqueConstructs).toBe(true);
+    });
+    test('command substitution as command is opaque', async () => {
+      const result = await parse('$(get_cmd) arg1 arg2');
+      expect(result.hasOpaqueConstructs).toBe(true);
+    });
+    test('simple command is not opaque', async () => {
+      const result = await parse('ls -la /tmp');
+      expect(result.hasOpaqueConstructs).toBe(false);
+    });
+    test('pipeline of safe commands is not opaque', async () => {
+      const result = await parse('cat file | grep pattern | wc -l');
+      expect(result.hasOpaqueConstructs).toBe(false);
+    });
+    test('compound safe commands are not opaque', async () => {
+      const result = await parse('mkdir foo && cd foo && touch bar');
+      expect(result.hasOpaqueConstructs).toBe(false);
+    });
+  });
+});
+// ═══════════════════════════════════════════════════════════════════════════
+//  2. Safe Environment — buildSanitizedEnv()
+// ═══════════════════════════════════════════════════════════════════════════
+describe('buildSanitizedEnv', () => {
+  const originalEnv = { ...process.env };
+  afterEach(() => {
+    // Restore env
+    for (const key of Object.keys(process.env)) {
+      if (!(key in originalEnv)) {
+        delete process.env[key];
+      }
+    }
+    Object.assign(process.env, originalEnv);
+  });
+  test('passes through safe variables when present', () => {
+    process.env.HOME = '/home/testuser';
+    process.env.PATH = '/usr/bin';
+    process.env.TERM = 'xterm-256color';
+    const env = buildSanitizedEnv();
+    expect(env.HOME).toBe('/home/testuser');
+    expect(env.PATH).toBe('/usr/bin');
+    expect(env.TERM).toBe('xterm-256color');
+  });
+  test('strips non-allowlisted variables', () => {
+    // Set some variables that are NOT on the safe list
+    const unsafeKeys = ['MY_CUSTOM_KEY', 'SOME_TOKEN', 'DB_CONNECTION'];
+    for (const key of unsafeKeys) {
+      process.env[key] = 'test-value';
+    }
+    const env = buildSanitizedEnv();
+    for (const key of unsafeKeys) {
+      expect(key in env).toBe(false);
+      delete process.env[key];
+    }
+  });
+  test('omits undefined safe variables', () => {
+    delete process.env.GPG_TTY;
+    delete process.env.SSH_AGENT_PID;
+    delete process.env.DISPLAY;
+    const env = buildSanitizedEnv();
+    expect('GPG_TTY' in env).toBe(false);
+    expect('SSH_AGENT_PID' in env).toBe(false);
+    expect('DISPLAY' in env).toBe(false);
+  });
+  test('includes SSH_AUTH_SOCK when present', () => {
+    process.env.SSH_AUTH_SOCK = '/tmp/ssh-agent.sock';
+    const env = buildSanitizedEnv();
+    expect(env.SSH_AUTH_SOCK).toBe('/tmp/ssh-agent.sock');
+  });
+  test('includes locale variables', () => {
+    process.env.LANG = 'en_US.UTF-8';
+    process.env.LC_ALL = 'C';
+    process.env.LC_CTYPE = 'UTF-8';
+    const env = buildSanitizedEnv();
+    expect(env.LANG).toBe('en_US.UTF-8');
+    expect(env.LC_ALL).toBe('C');
+    expect(env.LC_CTYPE).toBe('UTF-8');
+  });
+  test('result is a plain object with no prototype-inherited secrets', () => {
+    const env = buildSanitizedEnv();
+    const keys = Object.keys(env);
+    const safeKeys = [
+      'PATH', 'HOME', 'TERM', 'LANG', 'EDITOR', 'SHELL', 'USER', 'TMPDIR',
+      'LC_ALL', 'LC_CTYPE', 'XDG_RUNTIME_DIR', 'DISPLAY', 'COLORTERM',
+      'TERM_PROGRAM', 'SSH_AUTH_SOCK', 'SSH_AGENT_PID', 'GPG_TTY', 'GNUPGHOME',
+    ];
+    for (const key of keys) {
+      expect(safeKeys).toContain(key);
+    }
+  });
+});
+// ═══════════════════════════════════════════════════════════════════════════
+//  3. Sandbox wrapCommand
+// ═══════════════════════════════════════════════════════════════════════════
+describe('wrapCommand', () => {
+  const disabledConfig: SandboxConfig = {
+    enabled: false,
+    backend: 'native',
+    docker: {
+      image: 'vellum-sandbox:latest',
+      shell: 'bash',
+      cpus: 1,
+      memoryMb: 512,
+      pidsLimit: 256,
+      network: 'none',
+    },
+  };
+  test('disabled sandbox returns plain bash invocation', () => {
+    const result = wrapCommand('echo hello', '/tmp', disabledConfig);
+    expect(result.command).toBe('bash');
+    expect(result.args).toEqual(['-c', '--', 'echo hello']);
+    expect(result.sandboxed).toBe(false);
+  });
+  test('disabled sandbox preserves command verbatim', () => {
+    const cmd = 'ls -la /foo && echo "done"';
+    const result = wrapCommand(cmd, '/tmp', disabledConfig);
+    expect(result.args[2]).toBe(cmd);
+  });
+  test('disabled sandbox works with special characters in command', () => {
+    const cmd = "echo 'hello world' | grep 'hello'";
+    const result = wrapCommand(cmd, '/tmp', disabledConfig);
+    expect(result.args[2]).toBe(cmd);
+    expect(result.sandboxed).toBe(false);
+  });
+});
+// ═══════════════════════════════════════════════════════════════════════════
+//  4. Native sandbox backend — path safety
+// ═══════════════════════════════════════════════════════════════════════════
+describe('Native sandbox backend', () => {
+  // We test NativeBackend directly rather than through wrapCommand to avoid
+  // platform-dependent sandbox-exec/bwrap availability.
+  let NativeBackend: any;
+  beforeEach(async () => {
+    const mod = await import('../tools/terminal/backends/native.js');
+    NativeBackend = mod.NativeBackend;
+  });
+  if (process.platform === 'darwin') {
+    test('wraps command with sandbox-exec on macOS', () => {
+      const backend = new NativeBackend();
+      const result = backend.wrap('echo hello', '/tmp');
+      expect(result.command).toBe('sandbox-exec');
+      expect(result.args[0]).toBe('-f');
+      // Profile path is the second arg
+      expect(result.args[1]).toMatch(/sandbox-profile-.*\.sb$/);
+      expect(result.args).toContain('bash');
+      expect(result.args).toContain('-c');
+      expect(result.args).toContain('--');
+      expect(result.args[result.args.length - 1]).toBe('echo hello');
+      expect(result.sandboxed).toBe(true);
+    });
+    test('rejects working dir with SBPL metacharacters', () => {
+      const backend = new NativeBackend();
+      expect(() => backend.wrap('echo hi', '/tmp/foo"bar')).toThrow(ToolError);
+      expect(() => backend.wrap('echo hi', '/tmp/foo(bar')).toThrow(ToolError);
+      expect(() => backend.wrap('echo hi', '/tmp/foo;bar')).toThrow(ToolError);
+      expect(() => backend.wrap('echo hi', '/tmp/foo\\bar')).toThrow(ToolError);
+    });
+    test('accepts working dir with safe special characters', () => {
+      // Spaces, dots, hyphens, underscores are fine
+      const backend = new NativeBackend();
+      const result = backend.wrap('ls', '/tmp/my-dir_name.2024');
+      expect(result.sandboxed).toBe(true);
+    });
+  }
+});
+// ═══════════════════════════════════════════════════════════════════════════
+//  5. Docker sandbox backend
+// ═══════════════════════════════════════════════════════════════════════════
+describe('Docker sandbox backend', () => {
+  let DockerBackend: any;
+  let _resetDockerChecks: any;
+  const sandboxDir = join(testTmpDir, 'docker-sandbox');
+  beforeEach(async () => {
+    mkdirSync(sandboxDir, { recursive: true });
+    const mod = await import('../tools/terminal/backends/docker.js');
+    DockerBackend = mod.DockerBackend;
+    _resetDockerChecks = mod._resetDockerChecks;
+    _resetDockerChecks();
+  });
+  afterEach(() => {
+    try { rmSync(sandboxDir, { recursive: true, force: true }); } catch {}
+  });
+  test('constructor resolves symlinks in sandbox root', () => {
+    const realDir = join(testTmpDir, 'docker-real');
+    const linkDir = join(testTmpDir, 'docker-link');
+    mkdirSync(realDir, { recursive: true });
+    try {
+      symlinkSync(realDir, linkDir);
+      // Construct backend with the symlink — it should resolve to the real path.
+      const backend = new DockerBackend(linkDir, undefined, 1000, 1000);
+      // We can't inspect private fields directly, but wrapping will fail at
+      // preflight checks (Docker not available) — this tests that constructor
+      // does not throw on a valid symlinked path.
+      expect(backend).toBeDefined();
+    } finally {
+      try { rmSync(linkDir); } catch {}
+      try { rmSync(realDir, { recursive: true, force: true }); } catch {}
+    }
+  });
+  test('constructor rejects sandbox root with null bytes', () => {
+    // realpathSync throws TypeError before validatePathSafety can run
+    expect(() => new DockerBackend('/tmp/foo\0bar', undefined, 1000, 1000)).toThrow();
+  });
+  test('constructor rejects sandbox root with newlines', () => {
+    // Create a real directory with a newline in its name so realpathSync
+    // succeeds and the rejection comes from validatePathSafety, not ENOENT.
+    const nlDir = join(testTmpDir, 'has\nnewline');
+    mkdirSync(nlDir, { recursive: true });
+    try {
+      expect(() => new DockerBackend(nlDir, undefined, 1000, 1000)).toThrow(ToolError);
+    } finally {
+      try { rmSync(nlDir, { recursive: true, force: true }); } catch {}
+    }
+  });
+  test('constructor rejects sandbox root with carriage returns', () => {
+    // Create a real directory with a carriage return in its name so
+    // realpathSync succeeds and validatePathSafety is what rejects it.
+    const crDir = join(testTmpDir, 'has\rreturn');
+    mkdirSync(crDir, { recursive: true });
+    try {
+      expect(() => new DockerBackend(crDir, undefined, 1000, 1000)).toThrow(ToolError);
+    } finally {
+      try { rmSync(crDir, { recursive: true, force: true }); } catch {}
+    }
+  });
+  test('validates path safety after resolving symlinks', () => {
+    // Create a directory with a comma in the name to test validatePathSafety.
+    // On most filesystems this is allowed, so validatePathSafety should catch it.
+    const commaDir = join(testTmpDir, 'has,comma');
+    mkdirSync(commaDir, { recursive: true });
+    try {
+      expect(() => new DockerBackend(commaDir, undefined, 1000, 1000)).toThrow(ToolError);
+    } finally {
+      try { rmSync(commaDir, { recursive: true, force: true }); } catch {}
+    }
+  });
+});
+// ═══════════════════════════════════════════════════════════════════════════
+//  6. Shell tool — input validation
+// ═══════════════════════════════════════════════════════════════════════════
+describe('Shell tool input validation', () => {
+  let shellTool: any;
+  beforeEach(async () => {
+    const mod = await import('../tools/terminal/shell.js');
+    shellTool = mod.shellTool;
+  });
+  const baseContext = {
+    workingDir: testTmpDir,
+    conversationId: 'test-conv-1',
+    onOutput: () => {},
+  };
+  test('rejects empty command', async () => {
+    const result = await shellTool.execute({ command: '', reason: 'test' }, baseContext);
+    expect(result.isError).toBe(true);
+    expect(result.content).toContain('command is required');
+  });
+  test('rejects non-string command', async () => {
+    const result = await shellTool.execute({ command: 123, reason: 'test' }, baseContext);
+    expect(result.isError).toBe(true);
+    expect(result.content).toContain('command is required');
+  });
+  test('rejects command with null bytes', async () => {
+    const result = await shellTool.execute(
+      { command: 'echo hello\0world', reason: 'test' },
+      baseContext,
+    );
+    expect(result.isError).toBe(true);
+    expect(result.content).toContain('null bytes');
+  });
+  test('rejects missing command', async () => {
+    const result = await shellTool.execute({ reason: 'test' }, baseContext);
+    expect(result.isError).toBe(true);
+    expect(result.content).toContain('command is required');
+  });
+  test('executes simple command successfully', async () => {
+    const result = await shellTool.execute(
+      { command: 'echo test_output_12345', reason: 'testing' },
+      baseContext,
+    );
+    expect(result.isError).toBe(false);
+    expect(result.content).toContain('test_output_12345');
+  });
+  test('returns error for failed command', async () => {
+    const result = await shellTool.execute(
+      { command: 'false', reason: 'testing failure' },
+      baseContext,
+    );
+    expect(result.isError).toBe(true);
+  });
+  test('default network mode is off', async () => {
+    // When network_mode is not specified, it should default to 'off'.
+    // Verify by checking that the proxy session is never started — the
+    // observable effect of network_mode defaulting to 'off'.
+    proxyGetOrStartSession.mockClear();
+    const result = await shellTool.execute(
+      { command: 'echo network_default', reason: 'testing' },
+      baseContext,
+    );
+    expect(result.isError).toBe(false);
+    expect(proxyGetOrStartSession).not.toHaveBeenCalled();
+  });
+  test('tool definition includes required schema fields', () => {
+    const def = shellTool.getDefinition();
+    expect(def.name).toBe('bash');
+    expect(def.input_schema.required).toContain('command');
+    expect(def.input_schema.required).toContain('reason');
+    expect(def.input_schema.properties.command).toBeDefined();
+    expect(def.input_schema.properties.timeout_seconds).toBeDefined();
+    expect(def.input_schema.properties.network_mode).toBeDefined();
+    expect(def.input_schema.properties.credential_ids).toBeDefined();
+  });
+});
+// ═══════════════════════════════════════════════════════════════════════════
+//  7. Shell output formatting
+// ═══════════════════════════════════════════════════════════════════════════
+describe('formatShellOutput', () => {
+  let formatShellOutput: any;
+  beforeEach(async () => {
+    const mod = await import('../tools/shared/shell-output.js');
+    formatShellOutput = mod.formatShellOutput;
+  });
+  test('successful command with output', () => {
+    const result = formatShellOutput('hello world', '', 0, false, 120);
+    expect(result.content).toBe('hello world');
+    expect(result.isError).toBe(false);
+    expect(result.status).toBeUndefined();
+  });
+  test('successful command with no output shows completion tag', () => {
+    const result = formatShellOutput('', '', 0, false, 120);
+    expect(result.content).toBe('<command_completed />');
+    expect(result.isError).toBe(false);
+  });
+  test('failed command with no output shows exit code tag', () => {
+    const result = formatShellOutput('', '', 1, false, 120);
+    expect(result.content).toBe('<command_exit code="1" />');
+    expect(result.isError).toBe(true);
+  });
+  test('failed command with output includes exit code in status', () => {
+    const result = formatShellOutput('some output', 'some error', 1, false, 120);
+    expect(result.content).toContain('some output');
+    expect(result.content).toContain('some error');
+    expect(result.isError).toBe(true);
+    expect(result.status).toContain('<command_exit code="1" />');
+  });
+  test('timed out command includes timeout tag', () => {
+    const result = formatShellOutput('partial output', '', null, true, 30);
+    expect(result.content).toContain('<command_timeout seconds="30" />');
+    expect(result.isError).toBe(true);
+    expect(result.status).toContain('<command_timeout seconds="30" />');
+  });
+  test('combines stderr with stdout', () => {
+    const result = formatShellOutput('stdout', 'stderr', 0, false, 120);
+    expect(result.content).toContain('stdout');
+    expect(result.content).toContain('stderr');
+  });
+  test('truncates very long output', () => {
+    const longOutput = 'x'.repeat(60_000);
+    const result = formatShellOutput(longOutput, '', 0, false, 120);
+    expect(result.content).toContain('<output_truncated limit="50K" />');
+    expect(result.content.length).toBeLessThan(60_000);
+  });
+});
+// ═══════════════════════════════════════════════════════════════════════════
+//  8. Evaluate TypeScript tool — input validation
+// ═══════════════════════════════════════════════════════════════════════════
+describe('EvaluateTypescriptTool input validation', () => {
+  let evalTool: any;
+  beforeEach(async () => {
+    const mod = await import('../tools/terminal/evaluate-typescript.js');
+    evalTool = mod.evaluateTypescriptTool;
+  });
+  const baseContext = {
+    workingDir: testTmpDir,
+    conversationId: 'test-conv-1',
+    onOutput: () => {},
+  };
+  test('rejects empty code', async () => {
+    const result = await evalTool.execute({ code: '' }, baseContext);
+    expect(result.isError).toBe(true);
+    expect(result.content).toContain('code is required');
+  });
+  test('rejects non-string code', async () => {
+    const result = await evalTool.execute({ code: 123 }, baseContext);
+    expect(result.isError).toBe(true);
+    expect(result.content).toContain('code is required');
+  });
+  test('rejects oversized code', async () => {
+    const result = await evalTool.execute(
+      { code: 'x'.repeat(100_001) },
+      baseContext,
+    );
+    expect(result.isError).toBe(true);
+    expect(result.content).toContain('exceeds maximum size');
+  });
+  test('rejects invalid JSON in mock_input_json', async () => {
+    const result = await evalTool.execute(
+      { code: 'export default (x: unknown) => x;', mock_input_json: '{invalid' },
+      baseContext,
+    );
+    expect(result.isError).toBe(true);
+    expect(result.content).toContain('valid JSON');
+  });
+  test('rejects oversized mock_input_json', async () => {
+    const result = await evalTool.execute(
+      { code: 'export default (x: unknown) => x;', mock_input_json: '"' + 'x'.repeat(100_001) + '"' },
+      baseContext,
+    );
+    expect(result.isError).toBe(true);
+    expect(result.content).toContain('exceeds maximum size');
+  });
+  test('tool definition has correct name and schema', () => {
+    const def = evalTool.getDefinition();
+    expect(def.name).toBe('evaluate_typescript_code');
+    expect(def.input_schema.required).toContain('code');
+    expect(def.input_schema.properties.code).toBeDefined();
+    expect(def.input_schema.properties.mock_input_json).toBeDefined();
+    expect(def.input_schema.properties.timeout_seconds).toBeDefined();
+    expect(def.input_schema.properties.filename).toBeDefined();
+    expect(def.input_schema.properties.entrypoint).toBeDefined();
+  });
+});