npm - @vellumai/assistant - Versions diffs - 0.10.1 → 0.10.2-dev.202606241651.2d2b40d - Mend

@vellumai/assistant 0.10.1 → 0.10.2-dev.202606241651.2d2b40d

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (367) hide show

package/src/__tests__/subagent-disposal.test.ts CHANGED Viewed

@@ -326,3 +326,68 @@ describe("SubagentManager terminal disposal", () => {
     asInternals(manager).stopSweep();
   });
 });
+describe("SubagentManager.abort usage", () => {
+  test("emits the conversation's latest usage on abort, not zeros", () => {
+    const manager = new SubagentManager();
+    const sent: ServerMessage[] = [];
+    const sender = (msg: ServerMessage) => sent.push(msg);
+    const subagentId = "sa-abort-usage";
+    // state.usage starts at {0,0,0}; the live (fake) conversation has accrued
+    // usage (makeFakeConversation → {100, 50, 0.005}). Wire `sender` as the
+    // stored parent sender so `setStatus` routes the terminal event through it.
+    injectFakeSubagent(manager, subagentId, makeState(subagentId), sender);
+    const aborted = manager.abort(subagentId, sender, undefined, {
+      suppressNotification: true,
+    });
+    expect(aborted).toBe(true);
+    const statusMsg = sent.find(
+      (m): m is Extract<ServerMessage, { type: "subagent_status_changed" }> =>
+        m.type === "subagent_status_changed",
+    );
+    expect(statusMsg).toBeDefined();
+    expect(statusMsg!.status).toBe("aborted");
+    // The emitted usage is the conversation's accrued total — NOT the {0,0,0}
+    // init — so the client doesn't flush the token panel to zero on stop.
+    expect(statusMsg!.usage).toEqual({
+      inputTokens: 100,
+      outputTokens: 50,
+      estimatedCost: 0.005,
+    });
+    asInternals(manager).stopSweep();
+  });
+  test("keeps the last-known state.usage when the conversation was already released", () => {
+    const manager = new SubagentManager();
+    const sent: ServerMessage[] = [];
+    const sender = (msg: ServerMessage) => sent.push(msg);
+    const subagentId = "sa-abort-no-conv";
+    // No live conversation (released), but state carries a last-known usage —
+    // the abort must surface that, not overwrite it.
+    const state = makeState(subagentId, {
+      usage: { inputTokens: 320, outputTokens: 80, estimatedCost: 0.004 },
+    });
+    injectFakeSubagent(manager, subagentId, state, sender, null);
+    manager.abort(subagentId, sender, undefined, {
+      suppressNotification: true,
+    });
+    const statusMsg = sent.find(
+      (m): m is Extract<ServerMessage, { type: "subagent_status_changed" }> =>
+        m.type === "subagent_status_changed",
+    );
+    expect(statusMsg!.usage).toEqual({
+      inputTokens: 320,
+      outputTokens: 80,
+      estimatedCost: 0.004,
+    });
+    asInternals(manager).stopSweep();
+  });
+});

package/src/__tests__/subagent-notify-parent.test.ts CHANGED Viewed

@@ -4,6 +4,8 @@ import { describe, expect, mock, test } from "bun:test";
 // Mock conversation-crud before importing tool executors that depend on it.
 mock.module("../memory/conversation-crud.js", () => ({
+    setConversationProcessingStartedAt: () => {},
+    isConversationProcessing: () => false,
   setConversationOriginChannelIfUnset: () => {},
   updateConversationContextWindow: () => {},
   deleteMessageById: () => {},

package/src/__tests__/subagent-spawn-tool-fork.test.ts CHANGED Viewed

@@ -3,6 +3,8 @@ import { mock } from "bun:test";
 // Mock conversation-crud before importing tool executors that depend on it.
 mock.module("../memory/conversation-crud.js", () => ({
+    setConversationProcessingStartedAt: () => {},
+    isConversationProcessing: () => false,
   setConversationOriginChannelIfUnset: () => {},
   updateConversationContextWindow: () => {},
   deleteMessageById: () => {},

package/src/__tests__/subagent-tools.test.ts CHANGED Viewed

@@ -29,6 +29,8 @@ mock.module("../config/loader.js", () => ({
   }),
 }));
 mock.module("../memory/conversation-crud.js", () => ({
+    setConversationProcessingStartedAt: () => {},
+    isConversationProcessing: () => false,
   setConversationOriginChannelIfUnset: () => {},
   updateConversationContextWindow: () => {},
   deleteMessageById: () => {},

package/src/__tests__/suggestion-routes.test.ts CHANGED Viewed

@@ -50,6 +50,8 @@ const mockGetMessages = mock((_conversationId: string) => [
 ]);
 mock.module("../memory/conversation-crud.js", () => ({
+    setConversationProcessingStartedAt: () => {},
+    isConversationProcessing: () => false,
   getMessages: mockGetMessages,
   getConversation: (_id: string) => null,
   reserveMessage: mock(async () => ({ id: "msg-reserve" })),

package/src/__tests__/title-generate-hook.test.ts CHANGED Viewed

@@ -64,6 +64,8 @@ const mockGetConversation = mock(
     },
 );
 mock.module("../memory/conversation-crud.js", () => ({
+    setConversationProcessingStartedAt: () => {},
+    isConversationProcessing: () => false,
   getConversation: mockGetConversation,
 }));

package/src/__tests__/tool-executor-lifecycle-events.test.ts CHANGED Viewed

@@ -78,6 +78,8 @@ mock.module("../permissions/checker.js", () => ({
 }));
 mock.module("../memory/conversation-crud.js", () => ({
+    setConversationProcessingStartedAt: () => {},
+    isConversationProcessing: () => false,
   createConversation: (title: string) => ({ id: "conversation-1", title }),
   reserveMessage: mock(async () => ({ id: "msg-reserve" })),
 }));

package/src/__tests__/tool-executor.test.ts CHANGED Viewed

@@ -17,6 +17,7 @@ const mockConfig = {
     shellDefaultTimeoutSec: 120,
     shellMaxTimeoutSec: 600,
     permissionTimeoutSec: 300,
+    questionResponseTimeoutSec: 1800,
   },
   sandbox: {
     enabled: false,
@@ -1300,27 +1301,31 @@ describe("ToolExecutionResult includes risk metadata from classifier assessment"
 describe("computePerToolTimeoutMs ask_question budget", () => {
   // Regression guard: ask_question blocks on user input inside execute() via
-  // QuestionPrompter, which waits up to permissionTimeoutSec. The executor's
-  // generic toolExecutionTimeoutSec wrapper must give ask_question a budget
-  // strictly larger than that prompt timeout — otherwise the wrapper fires
-  // first and orphans the still-pending prompt behind the confusing "may
+  // QuestionPrompter, which waits up to questionResponseTimeoutSec. The
+  // executor's generic toolExecutionTimeoutSec wrapper must give ask_question a
+  // budget strictly larger than that prompt timeout — otherwise the wrapper
+  // fires first and orphans the still-pending prompt behind the confusing "may
   // still be running in the background" error. These assertions fail if the
-  // special case is removed and ask_question falls back to the generic budget.
-  test("execution-timeout budget exceeds the prompt's own permissionTimeoutSec", () => {
-    const { permissionTimeoutSec } = mockConfig.timeouts;
+  // special case is removed and ask_question falls back to the generic budget,
+  // or if the executor budget and the prompter timeout drift onto different
+  // config knobs.
+  test("execution-timeout budget exceeds the prompt's own questionResponseTimeoutSec", () => {
+    const { questionResponseTimeoutSec } = mockConfig.timeouts;
     const askQuestionBudgetMs = computePerToolTimeoutMs("ask_question", {});
-    expect(askQuestionBudgetMs).toBeGreaterThan(permissionTimeoutSec * 1000);
-    expect(askQuestionBudgetMs).toBe((permissionTimeoutSec + 5) * 1000);
+    expect(askQuestionBudgetMs).toBeGreaterThan(
+      questionResponseTimeoutSec * 1000,
+    );
+    expect(askQuestionBudgetMs).toBe((questionResponseTimeoutSec + 5) * 1000);
   });
   test("the generic budget that would otherwise apply is shorter than the prompt timeout", () => {
-    const { permissionTimeoutSec } = mockConfig.timeouts;
+    const { questionResponseTimeoutSec } = mockConfig.timeouts;
     const genericBudgetMs = computePerToolTimeoutMs("some_other_tool", {});
     // This is the collision the ask_question special case fixes: the generic
     // execution-timeout budget is shorter than the prompter's own wait, so
     // without the special case the wrapper trips first.
-    expect(genericBudgetMs).toBeLessThan(permissionTimeoutSec * 1000);
+    expect(genericBudgetMs).toBeLessThan(questionResponseTimeoutSec * 1000);
   });
 });

package/src/__tests__/tool-preview-lifecycle.test.ts CHANGED Viewed

@@ -60,6 +60,8 @@ const reserveMessageMock = mock(
 const updateMessageContentMock = mock((_id: string, _content: string) => {});
 mock.module("../memory/conversation-crud.js", () => ({
+    setConversationProcessingStartedAt: () => {},
+    isConversationProcessing: () => false,
   getConversation: () => null,
   getMessageById: () => null,
   updateMessageContent: updateMessageContentMock,

package/src/__tests__/tool-result-metadata-plumbing.test.ts CHANGED Viewed

@@ -40,6 +40,8 @@ mock.module("../config/loader.js", () => ({
 }));
 mock.module("../memory/conversation-crud.js", () => ({
+    setConversationProcessingStartedAt: () => {},
+    isConversationProcessing: () => false,
   addMessage: () => ({ id: "mock-msg-id" }),
   getMessageById: () => null,
   updateMessageContent: () => {},

package/src/__tests__/tool-start-timestamp.test.ts CHANGED Viewed

@@ -43,6 +43,8 @@ let mockedRowContent = "";
 const updates: Array<{ id: string; content: string }> = [];
 mock.module("../memory/conversation-crud.js", () => ({
+    setConversationProcessingStartedAt: () => {},
+    isConversationProcessing: () => false,
   addMessage: () => ({ id: "mock-msg-id" }),
   getMessageById: (id: string) =>
     mockedRowContent ? { id, content: mockedRowContent } : null,

package/src/__tests__/trusted-contact-inline-approval-integration.test.ts CHANGED Viewed

@@ -335,7 +335,7 @@ describe("(b) prompt-path flow: confirmation_request bridges to guardian", () =>
     };
   });
-  test("trusted-contact confirmation_request emits guardian.question and creates delivery records", () => {
+  test("trusted-contact confirmation_request emits guardian.question and creates delivery records", async () => {
     const canonicalRequest = createCanonicalGuardianRequest({
       id: `req-bridge-${Date.now()}`,
       kind: "tool_approval",
@@ -352,7 +352,7 @@ describe("(b) prompt-path flow: confirmation_request bridges to guardian", () =>
     const trustContext = makeTrustedContactTrustContext();
-    const result = bridgeConfirmationRequestToGuardian({
+    const result = await bridgeConfirmationRequestToGuardian({
       canonicalRequest,
       trustContext,
       conversationId: "conv-bridge-1",
@@ -371,7 +371,7 @@ describe("(b) prompt-path flow: confirmation_request bridges to guardian", () =>
     expect(payload.requesterIdentifier).toBe("@requester");
   });
-  test("bridge + tool_grant_request both use guardian.question for unified routing", () => {
+  test("bridge + tool_grant_request both use guardian.question for unified routing", async () => {
     // The confirmation_request bridge and tool_grant_request helper both
     // use 'guardian.question' as the notification signal, ensuring consistent
     // guardian routing regardless of the approval path.
@@ -391,7 +391,7 @@ describe("(b) prompt-path flow: confirmation_request bridges to guardian", () =>
     const trustContext = makeTrustedContactTrustContext();
-    bridgeConfirmationRequestToGuardian({
+    await bridgeConfirmationRequestToGuardian({
       canonicalRequest,
       trustContext,
       conversationId: "conv-unified-1",
@@ -432,7 +432,7 @@ describe("(c) no-binding flow: trusted contact fails fast without guardian bindi
     expect(state.promptWaitingAllowed).toBe(false);
   });
-  test("bridge skips when no guardian binding exists for channel", () => {
+  test("bridge skips when no guardian binding exists for channel", async () => {
     const canonicalRequest = createCanonicalGuardianRequest({
       id: `req-nobinding-${Date.now()}`,
       kind: "tool_approval",
@@ -449,7 +449,7 @@ describe("(c) no-binding flow: trusted contact fails fast without guardian bindi
     const trustContext = makeTrustedContactTrustContext();
-    const result = bridgeConfirmationRequestToGuardian({
+    const result = await bridgeConfirmationRequestToGuardian({
       canonicalRequest,
       trustContext,
       conversationId: "conv-nobinding",
@@ -543,7 +543,7 @@ describe("(d) unknown actor flow: fail-closed with no interactive approval", ()
     expect(resolveRoutingState(withoutRoute).canBeInteractive).toBe(false);
   });
-  test("bridge skips unknown actor sessions entirely", () => {
+  test("bridge skips unknown actor sessions entirely", async () => {
     const canonicalRequest = createCanonicalGuardianRequest({
       id: `req-unknown-${Date.now()}`,
       kind: "tool_approval",
@@ -563,7 +563,7 @@ describe("(d) unknown actor flow: fail-closed with no interactive approval", ()
       trustClass: "unknown",
     };
-    const result = bridgeConfirmationRequestToGuardian({
+    const result = await bridgeConfirmationRequestToGuardian({
       canonicalRequest,
       trustContext,
       conversationId: "conv-unknown",
@@ -965,7 +965,7 @@ describe("cross-milestone integration checks", () => {
     );
   });
-  test("M2+M4: bridge and tool_grant_request target the same guardian identity", () => {
+  test("M2+M4: bridge and tool_grant_request target the same guardian identity", async () => {
     // Both the confirmation_request bridge (M2) and tool grant request escalation (M4)
     // use the guardian binding's guardianExternalUserId to route notifications.
     // Verify this consistency:
@@ -986,7 +986,7 @@ describe("cross-milestone integration checks", () => {
     const trustContext = makeTrustedContactTrustContext();
-    const bridgeResult = bridgeConfirmationRequestToGuardian({
+    const bridgeResult = await bridgeConfirmationRequestToGuardian({
       canonicalRequest,
       trustContext,
       conversationId: "conv-consistency",

package/src/__tests__/twilio-routes.test.ts CHANGED Viewed

@@ -109,6 +109,25 @@ mock.module("../calls/channel-admission-reader.js", () => ({
   getChannelAdmissionPolicy: async () => mockAdmissionPolicy,
 }));
+// Mock the inbound trust reader used by the media-stream preflight. Captures
+// its args so tests can assert the inbound caller's verdict is fetched, and
+// returns mockInboundVerdict which is threaded into the preflight routeSetup.
+let mockInboundVerdict: unknown = null;
+let lastInboundVerdictArgs: Record<string, unknown> | null = null;
+mock.module("../calls/inbound-trust-reader.js", () => ({
+  getInboundTrustVerdict: async (args: Record<string, unknown>) => {
+    lastInboundVerdictArgs = args;
+    return mockInboundVerdict;
+  },
+  getPhoneCallerVerdict: async (otherPartyNumber: string | undefined) => {
+    lastInboundVerdictArgs = {
+      channelType: "phone",
+      actorExternalId: otherPartyNumber || undefined,
+    };
+    return mockInboundVerdict;
+  },
+}));
 mock.module("../config/env.js", () => ({
   isHttpAuthDisabled: () => true,
   getGatewayInternalBaseUrl: () => "http://gateway.internal:7830",
@@ -470,6 +489,8 @@ describe("twilio webhook routes", () => {
     // Reset admission policy + captured routeSetup context between tests
     mockAdmissionPolicy = null;
     lastRouteSetupCtx = null;
+    mockInboundVerdict = null;
+    lastInboundVerdictArgs = null;
     // Reset routeSetup mock to default normal_call
     mockRouteSetupResult = {
       outcome: { action: "normal_call", isInbound: true },
@@ -1269,6 +1290,81 @@ describe("twilio webhook routes", () => {
       expect(lastRouteSetupCtx?.admissionPolicy).toBe("guardian_only");
     });
+    test("media-stream inbound: fetches the caller's verdict (From) and threads it into the preflight routeSetup", async () => {
+      mockConfigObj.services.stt.provider = "openai-whisper" as any;
+      mockInboundVerdict = {
+        channelType: "phone",
+        actorExternalId: "+14155551234",
+        contactId: "contact-1",
+        channelId: "channel-1",
+        status: "verified",
+        policy: "allow",
+        resolutionFailed: false,
+      };
+      const req = makeInboundVoiceRequest({
+        CallSid: "CA_ms_verdict_inbound_1",
+        From: "+14155551234",
+        To: "+15550001111",
+      });
+      const res = await handleVoiceWebhook(req);
+      expect(res.status).toBe(200);
+      // Inbound: verdict fetched for the caller (From) on the phone channel.
+      expect(lastInboundVerdictArgs).toEqual({
+        channelType: "phone",
+        actorExternalId: "+14155551234",
+      });
+      // Verdict threaded into the preflight routeSetup.
+      expect(lastRouteSetupCtx?.verdict).toEqual(mockInboundVerdict);
+    });
+    test("media-stream inbound: a blocked/denied member verdict is classified deny in the preflight", async () => {
+      mockConfigObj.services.stt.provider = "openai-whisper" as any;
+      mockInboundVerdict = {
+        channelType: "phone",
+        actorExternalId: "+14155551234",
+        contactId: "contact-1",
+        channelId: "channel-1",
+        status: "blocked",
+        policy: "deny",
+        resolutionFailed: false,
+      };
+      // The real router returns `deny` for a blocked member verdict; the mock
+      // reflects that outcome. deny is supported on media-stream, so the
+      // preflight still emits Stream TwiML (denial spoken at stream start).
+      mockRouteSetupResult = {
+        outcome: {
+          action: "deny",
+          message:
+            "This number is not authorized to reach the assistant right now.",
+          logReason: "Inbound voice ACL: member blocked",
+        },
+        resolved: {
+          assistantId: "self",
+          isInbound: true,
+          otherPartyNumber: "+14155551234",
+          actorTrust: { trustClass: "unknown", memberRecord: null },
+        },
+      };
+      const req = makeInboundVoiceRequest({
+        CallSid: "CA_ms_verdict_deny_1",
+        From: "+14155551234",
+        To: "+15550001111",
+      });
+      const res = await handleVoiceWebhook(req);
+      expect(res.status).toBe(200);
+      // Verdict was threaded into routeSetup, which denied the caller.
+      expect(lastRouteSetupCtx?.verdict).toEqual(mockInboundVerdict);
+      const twiml = await res.text();
+      expect(twiml).toContain("<Stream");
+      expect(twiml).not.toContain("<ConversationRelay");
+    });
     test("media-stream: floor-denied caller classified deny still produces Stream TwiML (deny handled at stream level)", async () => {
       mockConfigObj.services.stt.provider = "openai-whisper" as any;
       mockAdmissionPolicy = "guardian_only";

package/src/__tests__/verification-control-plane-policy.test.ts CHANGED Viewed

@@ -72,6 +72,8 @@ mock.module("../permissions/checker.js", () => ({
 }));
 mock.module("../memory/conversation-crud.js", () => ({
+    setConversationProcessingStartedAt: () => {},
+    isConversationProcessing: () => false,
   createConversation: (title: string) => ({ id: "conversation-1", title }),
   reserveMessage: mock(async () => ({ id: "msg-reserve" })),
 }));

package/src/__tests__/web-search-backend-failure.test.ts CHANGED Viewed

@@ -57,6 +57,8 @@ mock.module("../config/loader.js", () => ({
 }));
 mock.module("../memory/conversation-crud.js", () => ({
+    setConversationProcessingStartedAt: () => {},
+    isConversationProcessing: () => false,
   addMessage: () => ({ id: "mock-msg-id" }),
   getMessageById: () => null,
   updateMessageContent: () => {},

package/src/__tests__/workspace-tool-loader.test.ts CHANGED Viewed

@@ -26,7 +26,7 @@
  *   and skipped (no silent provider-safe rewrite — operator must rename).
  * - Multiple workspace tools register in a single batch.
  */
-import { mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { mkdirSync, rmSync, utimesSync, writeFileSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { afterAll, beforeEach, describe, expect, test } from "bun:test";
@@ -42,7 +42,10 @@ import {
   registerTool,
 } from "../tools/registry.js";
 import type { Tool, ToolContext, ToolExecutionResult } from "../tools/types.js";
-import { loadWorkspaceTools } from "../tools/workspace-tools/loader.js";
+import {
+  __resetWorkspaceToolCacheForTesting,
+  loadWorkspaceTools,
+} from "../tools/workspace-tools/loader.js";
 // Per-test counter so each writeTool() call lands in a unique tempdir,
 // defeating bun's per-URL ESM cache between tests. Without this, a
@@ -87,6 +90,23 @@ function writeRemovedSentinel(name: string): void {
   writeFileSync(join(toolsDir, `${name}.removed`), "");
 }
+/** Delete `<workspaceDir>/tools/<name><ext>` (defaults to `.ts`). */
+function removeToolFile(name: string, ext = ".ts"): void {
+  rmSync(join(currentWorkspaceDir, "tools", `${name}${ext}`), { force: true });
+}
+/**
+ * Overwrite an existing tool file and bump its mtime into the future so the
+ * reconcile's mtime gate re-imports it even when the rewrite lands within
+ * the same millisecond as the original write.
+ */
+function rewriteTool(name: string, body: string, ext = ".ts"): void {
+  const path = join(currentWorkspaceDir, "tools", `${name}${ext}`);
+  writeFileSync(path, body);
+  const future = new Date(Date.now() + 5000);
+  utimesSync(path, future, future);
+}
 function makeFakeCoreTool(name: string): Tool {
   return {
     name,
@@ -94,6 +114,9 @@ function makeFakeCoreTool(name: string): Tool {
     category: "test",
     defaultRiskLevel: RiskLevel.Low,
     executionTarget: "sandbox",
+    // Match the finalized shape the registry stores (defaults filled), so
+    // `getCoreToolOverride(name)` toEqual comparisons hold after registration.
+    exclusive: false,
     input_schema: { type: "object", properties: {}, required: [] },
     async execute(
       _input: Record<string, unknown>,
@@ -133,6 +156,7 @@ export default {
 describe("workspace tool loader", () => {
   beforeEach(() => {
     __clearRegistryForTesting();
+    __resetWorkspaceToolCacheForTesting();
     freshWorkspace();
   });
@@ -316,4 +340,173 @@ export default 42;
     const names = getWorkspaceToolNames().sort();
     expect(names).toEqual(["alpha", "beta", "gamma"]);
   });
+  // ── Reconcile-on-read behavior ─────────────────────────────────────────
+  //
+  // loadWorkspaceTools() is idempotent and re-derives registry state from
+  // disk on every call. These cases cover the deltas a repeat call applies,
+  // which is what replaces the old filesystem watcher.
+  test("repeat call with no disk changes is a no-op (does not throw or duplicate)", async () => {
+    writeTool("stable_tool", WELL_FORMED_BODY);
+    await loadWorkspaceTools();
+    // A second reconcile must not throw on the already-registered name —
+    // the mtime cache recognizes the unchanged file and skips re-import.
+    await loadWorkspaceTools();
+    expect(getTool("stable_tool")).toBeDefined();
+    expect(getWorkspaceToolNames()).toEqual(["stable_tool"]);
+  });
+  test("a file added after the first reconcile registers on the next", async () => {
+    writeTool("first", WELL_FORMED_BODY);
+    await loadWorkspaceTools();
+    expect(getWorkspaceToolNames()).toEqual(["first"]);
+    writeTool("second", WELL_FORMED_BODY);
+    await loadWorkspaceTools();
+    expect(getWorkspaceToolNames().sort()).toEqual(["first", "second"]);
+  });
+  test("a changed file is re-imported on the next reconcile", async () => {
+    writeTool("mutable", WELL_FORMED_BODY);
+    await loadWorkspaceTools();
+    expect(getTool("mutable")?.description).toBe("from workspace");
+    rewriteTool(
+      "mutable",
+      `
+export default {
+  description: "edited in place",
+  defaultRiskLevel: "low",
+  input_schema: { type: "object", properties: {}, required: [] },
+  async execute() {
+    return { content: "edited", isError: false };
+  },
+};
+`,
+    );
+    await loadWorkspaceTools();
+    expect(getTool("mutable")?.description).toBe("edited in place");
+  });
+  test("a deleted net-new tool file is unregistered on the next reconcile", async () => {
+    writeTool("ephemeral", WELL_FORMED_BODY);
+    await loadWorkspaceTools();
+    expect(getTool("ephemeral")).toBeDefined();
+    removeToolFile("ephemeral");
+    await loadWorkspaceTools();
+    expect(getTool("ephemeral")).toBeUndefined();
+    expect(getWorkspaceToolNames()).toEqual([]);
+  });
+  test("deleting an override file restores the stashed core tool", async () => {
+    const core = makeFakeCoreTool("restore_me");
+    registerTool(core);
+    writeTool("restore_me", WELL_FORMED_BODY);
+    await loadWorkspaceTools();
+    expect(getToolOwner("restore_me")?.kind).toBe("workspace");
+    removeToolFile("restore_me");
+    await loadWorkspaceTools();
+    expect(getToolOwner("restore_me")).toBeUndefined();
+    expect(getTool("restore_me")).toEqual(core);
+    expect(getCoreToolOverride("restore_me")).toBeUndefined();
+  });
+  test("deleting a .removed sentinel restores the stripped core tool", async () => {
+    const core = makeFakeCoreTool("strip_then_restore");
+    registerTool(core);
+    writeRemovedSentinel("strip_then_restore");
+    await loadWorkspaceTools();
+    expect(getTool("strip_then_restore")).toBeUndefined();
+    expect(getStrippedCoreToolNames()).toContain("strip_then_restore");
+    removeToolFile("strip_then_restore", ".removed");
+    await loadWorkspaceTools();
+    expect(getTool("strip_then_restore")).toEqual(core);
+    expect(getStrippedCoreToolNames()).not.toContain("strip_then_restore");
+  });
+  test("the registered name is the filename stem, ignoring the file's own name field", async () => {
+    // The default export sets a different `name` — the loader must pin the
+    // registered name to the stem ("stem_wins") so the mtime cache and the
+    // unregister-on-delete path stay keyed by the same name.
+    writeTool(
+      "stem_wins",
+      `
+export default {
+  name: "different_name",
+  description: "name field should be ignored",
+  defaultRiskLevel: "low",
+  input_schema: { type: "object", properties: {}, required: [] },
+  async execute() {
+    return { content: "ok", isError: false };
+  },
+};
+`,
+    );
+    await loadWorkspaceTools();
+    expect(getTool("stem_wins")).toBeDefined();
+    expect(getTool("different_name")).toBeUndefined();
+    expect(getWorkspaceToolNames()).toEqual(["stem_wins"]);
+    // Deleting the file unregisters by stem — no leaked "different_name".
+    removeToolFile("stem_wins");
+    await loadWorkspaceTools();
+    expect(getTool("stem_wins")).toBeUndefined();
+    expect(getTool("different_name")).toBeUndefined();
+  });
+  test("per-tool isolation on reconcile: a bad file does not drop a valid edited tool", async () => {
+    writeTool("good_edit", WELL_FORMED_BODY);
+    await loadWorkspaceTools();
+    expect(getTool("good_edit")?.description).toBe("from workspace");
+    // Add a file that throws at import, and edit the good tool, in the same
+    // reconcile. The broken file must not prevent the edited tool from
+    // re-registering.
+    writeTool("broken_now", `throw new Error("boom at import");`);
+    rewriteTool(
+      "good_edit",
+      `
+export default {
+  description: "edited and still here",
+  defaultRiskLevel: "low",
+  input_schema: { type: "object", properties: {}, required: [] },
+  async execute() {
+    return { content: "ok", isError: false };
+  },
+};
+`,
+    );
+    await loadWorkspaceTools();
+    expect(getTool("broken_now")).toBeUndefined();
+    expect(getTool("good_edit")?.description).toBe("edited and still here");
+  });
+  test("an edit that breaks an existing tool keeps the prior registration", async () => {
+    writeTool("was_good", WELL_FORMED_BODY);
+    await loadWorkspaceTools();
+    expect(getTool("was_good")?.description).toBe("from workspace");
+    // Rewrite the file into something that throws at import. The prior,
+    // working registration must stay in place rather than being torn down.
+    rewriteTool("was_good", `throw new Error("now broken");`);
+    await loadWorkspaceTools();
+    expect(getTool("was_good")?.description).toBe("from workspace");
+  });
 });