npm - @vellumai/assistant - Versions diffs - 0.10.1-dev.202606240206.7c2bca6 → 0.10.1-dev.202606240317.ea25efe - Mend

@vellumai/assistant 0.10.1-dev.202606240206.7c2bca6 → 0.10.1-dev.202606240317.ea25efe

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/package.json +1 -1
package/src/__tests__/disk-pressure-guard.test.ts +41 -0
package/src/__tests__/provider-usage-tracking.test.ts +39 -0
package/src/__tests__/registry.test.ts +3 -0
package/src/__tests__/workspace-tool-loader.test.ts +3 -0
package/src/agent/loop-exclusive-tool.test.ts +150 -0
package/src/agent/loop.ts +56 -0
package/src/daemon/conversation.ts +4 -1
package/src/daemon/disk-pressure-guard.ts +12 -2
package/src/plugins/defaults/advisor/__tests__/consult.test.ts +44 -0
package/src/plugins/defaults/advisor/__tests__/context-pack-gating.test.ts +106 -0
package/src/plugins/defaults/advisor/__tests__/context-pack.test.ts +60 -0
package/src/plugins/defaults/advisor/consult.ts +48 -6
package/src/plugins/defaults/advisor/context-pack.ts +288 -0
package/src/plugins/defaults/advisor/steering.ts +14 -2
package/src/plugins/defaults/advisor/tools/advisor.ts +28 -5
package/src/providers/anthropic/client.ts +5 -0
package/src/providers/call-site-routing.ts +4 -0
package/src/providers/openai/responses-provider.ts +5 -0
package/src/providers/openrouter/client.ts +5 -0
package/src/providers/provider-send-message.ts +4 -0
package/src/providers/ratelimit.ts +4 -0
package/src/providers/retry.ts +4 -0
package/src/providers/types.ts +9 -0
package/src/providers/usage-tracking.ts +4 -0
package/src/tools/tool-defaults.ts +2 -0
package/src/tools/types.ts +17 -2

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@vellumai/assistant",
-  "version": "0.10.1-dev.202606240206.7c2bca6",
+  "version": "0.10.1-dev.202606240317.ea25efe",
   "license": "MIT",
   "type": "module",
   "exports": {

package/src/__tests__/disk-pressure-guard.test.ts CHANGED Viewed

@@ -38,6 +38,7 @@ mock.module("../runtime/assistant-event-hub.js", () => ({
 const {
   DISK_PRESSURE_CLEAR_THRESHOLD_PERCENT,
+  DISK_PRESSURE_MIN_FREE_FLOOR_MB,
   DISK_PRESSURE_OVERRIDE_CONFIRMATION,
   DISK_PRESSURE_THRESHOLD_PERCENT,
   DISK_PRESSURE_WARNING_CLEAR_THRESHOLD_PERCENT,
@@ -342,4 +343,44 @@ describe("disk pressure guard", () => {
     setDiskUsage(DISK_PRESSURE_WARNING_CLEAR_THRESHOLD_PERCENT - 1);
     expect(evaluateDiskPressureNow().state).toBe("ok");
   });
+  test("stays ok at a critical usage percentage while ample free space remains", () => {
+    // 99% used of a large volume still leaves gigabytes free — above the floor.
+    const totalMb = 1_000_000;
+    const usedMb = Math.round(totalMb * 0.99); // freeMb ~= 10_000 MiB
+    setDiskUsage(usedMb, totalMb);
+    expect(diskSample!.freeMb).toBeGreaterThanOrEqual(
+      DISK_PRESSURE_MIN_FREE_FLOOR_MB,
+    );
+    const status = evaluateDiskPressureNow();
+    expect(status.state).toBe("ok");
+    expect(status.locked).toBe(false);
+    expect(status.effectivelyLocked).toBe(false);
+  });
+  test("stays ok at a warning usage percentage while ample free space remains", () => {
+    const totalMb = 1_000_000;
+    const usedMb = Math.round(totalMb * 0.85); // 85% used, freeMb ~= 150_000 MiB
+    setDiskUsage(usedMb, totalMb);
+    const status = evaluateDiskPressureNow();
+    expect(status.state).toBe("ok");
+  });
+  test("locks at a critical usage percentage once free space drops below the floor", () => {
+    // High percentage AND little absolute headroom: floor does not apply.
+    const totalMb = 100_000;
+    const freeMb = DISK_PRESSURE_MIN_FREE_FLOOR_MB - 1;
+    setDiskUsage(totalMb - freeMb, totalMb);
+    expect(diskSample!.freeMb).toBeLessThan(DISK_PRESSURE_MIN_FREE_FLOOR_MB);
+    const status = evaluateDiskPressureNow();
+    expect(status.state).toBe("critical");
+    expect(status.locked).toBe(true);
+    expect(status.effectivelyLocked).toBe(true);
+  });
 });

package/src/__tests__/provider-usage-tracking.test.ts CHANGED Viewed

@@ -200,3 +200,42 @@ describe("UsageTrackingProvider", () => {
     });
   });
 });
+describe("native web-search capability survives the wrapper chain", () => {
+  function leaf(supports: boolean | undefined): Provider {
+    return {
+      name: "anthropic",
+      ...(supports === undefined ? {} : { supportsNativeWebSearch: supports }),
+      async sendMessage(): Promise<ProviderResponse> {
+        return {
+          content: [{ type: "text", text: "" }],
+          model: "m",
+          usage: { inputTokens: 0, outputTokens: 0 },
+          stopReason: "end_turn",
+        };
+      },
+    };
+  }
+  test("UsageTrackingProvider forwards supportsNativeWebSearch", () => {
+    expect(new UsageTrackingProvider(leaf(true)).supportsNativeWebSearch).toBe(
+      true,
+    );
+    expect(new UsageTrackingProvider(leaf(false)).supportsNativeWebSearch).toBe(
+      false,
+    );
+    expect(
+      new UsageTrackingProvider(leaf(undefined)).supportsNativeWebSearch,
+    ).toBeUndefined();
+  });
+  test("CallSiteConfiguredProvider forwards it through a nested wrapper", () => {
+    // The exact chain getConfiguredProvider returns: CallSiteConfigured →
+    // UsageTracking → leaf. The advisor consult reads the flag off the top.
+    const wrapped = new CallSiteConfiguredProvider(
+      new UsageTrackingProvider(leaf(true)),
+      "advisor",
+    );
+    expect(wrapped.supportsNativeWebSearch).toBe(true);
+  });
+});

package/src/__tests__/registry.test.ts CHANGED Viewed

@@ -36,6 +36,9 @@ function makeFakeTool(name: string): Tool {
     category: "test",
     defaultRiskLevel: RiskLevel.Low,
     executionTarget: "sandbox",
+    // Match the finalized shape the registry stores, so identity comparisons
+    // (`getTool(name)` toEqual coreTool) hold after registration fills defaults.
+    exclusive: false,
     input_schema: { type: "object", properties: {}, required: [] },
     async execute(
       _input: Record<string, unknown>,

package/src/__tests__/workspace-tool-loader.test.ts CHANGED Viewed

@@ -94,6 +94,9 @@ function makeFakeCoreTool(name: string): Tool {
     category: "test",
     defaultRiskLevel: RiskLevel.Low,
     executionTarget: "sandbox",
+    // Match the finalized shape the registry stores (defaults filled), so
+    // `getCoreToolOverride(name)` toEqual comparisons hold after registration.
+    exclusive: false,
     input_schema: { type: "object", properties: {}, required: [] },
     async execute(
       _input: Record<string, unknown>,

package/src/agent/loop-exclusive-tool.test.ts ADDED Viewed

@@ -0,0 +1,150 @@
+/**
+ * Verifies the agent loop's exclusive-tool dispatch: when a tool the loop is
+ * told is exclusive (e.g. the advisor) appears in a multi-call turn, only that
+ * tool runs and the siblings are deferred un-run with a benign result — so the
+ * model incorporates the exclusive tool's output before acting on anything
+ * else. Drives the REAL loop, mocking only the provider boundary.
+ */
+import { describe, expect, test } from "bun:test";
+import { createMockProvider } from "../__tests__/helpers/mock-provider.js";
+import type { ContentBlock, ProviderResponse } from "../providers/types.js";
+import { AgentLoop } from "./loop.js";
+const endTurn = (text: string): ProviderResponse => ({
+  content: [{ type: "text", text }],
+  model: "mock-model",
+  usage: { inputTokens: 1, outputTokens: 1 },
+  stopReason: "end_turn",
+});
+const toolUseTurn = (
+  blocks: Array<{ id: string; name: string }>,
+): ProviderResponse => ({
+  content: [
+    { type: "text", text: "working" },
+    ...blocks.map((b) => ({
+      type: "tool_use" as const,
+      id: b.id,
+      name: b.name,
+      input: {},
+    })),
+  ],
+  model: "mock-model",
+  usage: { inputTokens: 1, outputTokens: 1 },
+  stopReason: "tool_use",
+});
+function toolResults(history: { content: ContentBlock[] }[]) {
+  return history
+    .flatMap((m) => m.content)
+    .filter(
+      (b): b is Extract<ContentBlock, { type: "tool_result" }> =>
+        b.type === "tool_result",
+    );
+}
+const baseRun = {
+  requestId: "req-excl",
+  onEvent: () => {},
+  callSite: "mainAgent" as const,
+  trust: { sourceChannel: "vellum" as const, trustClass: "unknown" as const },
+};
+describe("AgentLoop — exclusive tool deferral", () => {
+  test("runs the exclusive tool alone and defers sibling calls un-run", async () => {
+    const { provider } = createMockProvider([
+      toolUseTurn([
+        { id: "call-advisor", name: "advisor" },
+        { id: "call-edit", name: "write_file" },
+      ]),
+      endTurn("done"),
+    ]);
+    const executed: string[] = [];
+    const loop = new AgentLoop({
+      provider,
+      systemPrompt: "sys",
+      conversationId: "excl-1",
+      tools: [
+        { name: "advisor", description: "", input_schema: { type: "object" } },
+        {
+          name: "write_file",
+          description: "",
+          input_schema: { type: "object" },
+        },
+      ],
+      toolExecutor: async (name) => {
+        executed.push(name);
+        return { content: `ran ${name}`, isError: false };
+      },
+      isExclusiveTool: (name) => name === "advisor",
+    });
+    const { history } = await loop.run({
+      ...baseRun,
+      messages: [{ role: "user", content: [{ type: "text", text: "do it" }] }],
+    });
+    // Only the exclusive tool actually executed.
+    expect(executed).toEqual(["advisor"]);
+    const results = toolResults(history);
+    const advisorResult = results.find(
+      (b) => b.tool_use_id === "call-advisor",
+    )!;
+    const editResult = results.find((b) => b.tool_use_id === "call-edit")!;
+    // The advisor ran; the sibling came back un-run (not an error) so the model
+    // can re-issue it after reading the guidance.
+    expect(advisorResult.content).toBe("ran advisor");
+    expect(editResult.content).toContain("not run");
+    expect(editResult.content).toContain("advisor");
+    expect(editResult.is_error).toBe(false);
+  });
+  test("runs sibling tools normally when no exclusive tool is present", async () => {
+    const { provider } = createMockProvider([
+      toolUseTurn([
+        { id: "call-read", name: "read_file" },
+        { id: "call-write", name: "write_file" },
+      ]),
+      endTurn("done"),
+    ]);
+    const executed: string[] = [];
+    const loop = new AgentLoop({
+      provider,
+      systemPrompt: "sys",
+      conversationId: "excl-2",
+      tools: [
+        {
+          name: "read_file",
+          description: "",
+          input_schema: { type: "object" },
+        },
+        {
+          name: "write_file",
+          description: "",
+          input_schema: { type: "object" },
+        },
+      ],
+      toolExecutor: async (name) => {
+        executed.push(name);
+        return { content: `ran ${name}`, isError: false };
+      },
+      isExclusiveTool: (name) => name === "advisor",
+    });
+    const { history } = await loop.run({
+      ...baseRun,
+      messages: [{ role: "user", content: [{ type: "text", text: "do it" }] }],
+    });
+    // Both non-exclusive tools ran; nothing was deferred.
+    expect(executed.sort()).toEqual(["read_file", "write_file"]);
+    for (const result of toolResults(history)) {
+      expect(result.content).not.toContain("not run");
+    }
+  });
+});

package/src/agent/loop.ts CHANGED Viewed

@@ -625,6 +625,20 @@ export type LoopToolExecutor = (
   activityMetadata?: ToolActivityMetadata;
 }>;
+/**
+ * The benign result returned for a sibling tool call that was deferred because
+ * an exclusive tool ran in the same turn. Phrased so the model treats it as a
+ * "not run yet" signal — read the exclusive tool's output, then re-issue this
+ * call if it is still the right next step.
+ */
+function deferredForExclusiveMessage(exclusiveToolName: string): string {
+  return (
+    `(not run: \`${exclusiveToolName}\` was called this turn and runs first, on its own, ` +
+    `so the rest of your tool calls were held back. Read its output, then call this tool ` +
+    `again if it is still the right next step.)`
+  );
+}
 export interface AgentLoopConstructorOptions {
   /** LLM provider the loop issues every call through. */
   provider: Provider;
@@ -634,6 +648,14 @@ export interface AgentLoopConstructorOptions {
   tools?: ToolDefinition[];
   toolExecutor?: LoopToolExecutor;
   resolveTools?: (history: Message[]) => ToolDefinition[];
+  /**
+   * Decide whether a tool runs exclusively in its turn (see
+   * {@link ToolDefinition.exclusive}). When it returns true for a tool present
+   * in a multi-call turn, the loop runs only that tool and defers the siblings
+   * un-run. Injected by the conversation wiring, which can read the tool
+   * registry; lightweight loops that omit it never defer.
+   */
+  isExclusiveTool?: (toolName: string) => boolean;
   /**
    * Conversation this loop drives. Scopes the loop-held compaction circuit
    * breaker and is the source of truth the loop's pipeline contexts and
@@ -659,6 +681,7 @@ export class AgentLoop {
   private tools: ToolDefinition[];
   private resolveTools: ((history: Message[]) => ToolDefinition[]) | null;
   private toolExecutor: LoopToolExecutor | null;
+  private isExclusiveTool: ((toolName: string) => boolean) | null;
   /**
    * Conversation this loop drives. Source of truth for the `conversationId`
@@ -688,6 +711,7 @@ export class AgentLoop {
       tools,
       toolExecutor,
       resolveTools,
+      isExclusiveTool,
       conversationId,
       resolveConversationDir,
     } = options;
@@ -697,6 +721,7 @@ export class AgentLoop {
     this.tools = tools ?? [];
     this.resolveTools = resolveTools ?? null;
     this.toolExecutor = toolExecutor ?? null;
+    this.isExclusiveTool = isExclusiveTool ?? null;
     this.conversationId = conversationId;
     this.resolveConversationDir = resolveConversationDir ?? null;
     this.compactionCircuit = new CompactionCircuit(this.conversationId);
@@ -1883,8 +1908,39 @@ export class AgentLoop {
           "Tool execution start",
         );
+        // When an exclusive tool (e.g. the advisor) is among this turn's calls,
+        // it must run alone: the model should incorporate its output before
+        // acting on anything else. Run only the first exclusive call and defer
+        // the siblings with a benign, un-run result so the model re-issues them
+        // next turn if still needed. Every tool_use still gets a matching
+        // tool_result, so history stays well-formed.
+        const exclusiveBlock = this.isExclusiveTool
+          ? toolUseBlocks.find((block) => this.isExclusiveTool!(block.name))
+          : undefined;
+        const deferSiblings =
+          exclusiveBlock !== undefined && toolUseBlocks.length > 1;
+        if (deferSiblings) {
+          rlog.info(
+            {
+              turn: toolUseTurns,
+              exclusiveTool: exclusiveBlock!.name,
+              deferred: toolUseBlocks
+                .filter((block) => block !== exclusiveBlock)
+                .map((block) => block.name),
+            },
+            "Exclusive tool present — running it alone and deferring sibling tool calls this turn",
+          );
+        }
         const toolExecutionPromise = Promise.all(
           toolUseBlocks.map(async (toolUse) => {
+            if (deferSiblings && toolUse !== exclusiveBlock) {
+              const result: Awaited<ReturnType<LoopToolExecutor>> = {
+                content: deferredForExclusiveMessage(exclusiveBlock!.name),
+                isError: false,
+              };
+              return { toolUse, result };
+            }
             const result = await this.toolExecutor!(
               toolUse.name,
               toolUse.input,

package/src/daemon/conversation.ts CHANGED Viewed

@@ -91,7 +91,7 @@ import {
   isActivationMomentParam,
 } from "../telemetry/activation-funnel.js";
 import { ToolExecutor } from "../tools/executor.js";
-import { getAllToolDefinitions } from "../tools/registry.js";
+import { getAllToolDefinitions, getTool } from "../tools/registry.js";
 import type { ToolLifecycleEvent } from "../tools/types.js";
 import type { OnboardingContext } from "../types/onboarding-context.js";
 import type { AbortReason } from "../util/abort-reasons.js";
@@ -702,6 +702,9 @@ export class Conversation {
       tools: toolDefs.length > 0 ? toolDefs : undefined,
       toolExecutor: toolDefs.length > 0 ? toolExecutor : undefined,
       resolveTools,
+      // A tool the registry marks exclusive (e.g. `advisor`) runs alone in its
+      // turn; the loop defers any sibling calls until the next turn.
+      isExclusiveTool: (name) => getTool(name)?.exclusive === true,
       resolveConversationDir: () => {
         const conv = getConversation(this.conversationId);
         if (!conv) return null;

package/src/daemon/disk-pressure-guard.ts CHANGED Viewed

@@ -24,6 +24,12 @@ export const DISK_PRESSURE_CLEAR_THRESHOLD_PERCENT = 90;
 // clears the warning state, which discards the banner's (state-scoped) dismissal
 // so it re-appears the moment usage ticks back up.
 export const DISK_PRESSURE_WARNING_CLEAR_THRESHOLD_PERCENT = 77;
+// Absolute free-space floor (MiB). Regardless of usage percentage, never enter
+// the warning or critical state while at least this much space remains free. A
+// high usage percentage on a large disk can still leave many gigabytes
+// available, where locking is pointless. Small volumes (where a high percentage
+// genuinely means near-full) drop below the floor and remain protected.
+export const DISK_PRESSURE_MIN_FREE_FLOOR_MB = 2048;
 export const DISK_PRESSURE_CHECK_INTERVAL_MS = 60_000;
 export const DISK_PRESSURE_OVERRIDE_CONFIRMATION = "I understand the risks";
 export const DISK_PRESSURE_BLOCKED_CAPABILITIES = [
@@ -219,7 +225,10 @@ export function evaluateDiskPressureNow(): DiskPressureStatus {
   const criticalThreshold = state.status.locked
     ? DISK_PRESSURE_CLEAR_THRESHOLD_PERCENT
     : DISK_PRESSURE_THRESHOLD_PERCENT;
-  const isCritical = usagePercent >= criticalThreshold;
+  // Absolute free-space floor overrides the percentage thresholds: while ample
+  // space remains free, report "ok" no matter how full the volume is by percent.
+  const hasAmpleFreeSpace = usageInfo.freeMb >= DISK_PRESSURE_MIN_FREE_FLOOR_MB;
+  const isCritical = !hasAmpleFreeSpace && usagePercent >= criticalThreshold;
   // Mirror the critical deadband for the warning band: once in an active
   // pressure state (warning or critical), hold warning until usage clears the
   // lower warning-clear threshold. Treating "critical" as active here matters
@@ -235,7 +244,8 @@ export function evaluateDiskPressureNow(): DiskPressureStatus {
   const warningThreshold = inActivePressureState
     ? DISK_PRESSURE_WARNING_CLEAR_THRESHOLD_PERCENT
     : DISK_PRESSURE_WARNING_THRESHOLD_PERCENT;
-  const isWarning = !isCritical && usagePercent >= warningThreshold;
+  const isWarning =
+    !hasAmpleFreeSpace && !isCritical && usagePercent >= warningThreshold;
   const lastCheckedAt = new Date().toISOString();
   if (!isCritical && !isWarning) {

package/src/plugins/defaults/advisor/__tests__/consult.test.ts CHANGED Viewed

@@ -8,10 +8,14 @@ let sendMessageArgs: Record<string, unknown> | null = null;
 let responseText = "Use a channel-based worker pool; drain on shutdown.";
 let sendMessageError: Error | null = null;
 let providerResolves = true;
+let providerSupportsWeb = false;
 let streamDeltas: string[] = [];
 const fakeProvider = {
   name: "mock-advisor-provider",
+  get supportsNativeWebSearch() {
+    return providerSupportsWeb;
+  },
   async sendMessage(messages: unknown, options: unknown) {
     sendMessageArgs = { messages, options } as Record<string, unknown>;
     if (sendMessageError) throw sendMessageError;
@@ -36,6 +40,14 @@ mock.module("../../../../providers/provider-send-message.js", () => ({
   getConfiguredProvider: async () => (providerResolves ? fakeProvider : null),
 }));
+// Keep the tool tests focused on the consult wiring: stub the context pack so
+// they don't reach into the registry / workspace / memory sources (those have
+// their own coverage). The consult itself never imports this module.
+mock.module("../context-pack.js", () => ({
+  buildAdvisorContext: async () => null,
+  deriveRecallQuery: () => null,
+}));
 const { consultAdvisor } = await import("../consult.js");
 const advisorTool = (await import("../tools/advisor.js")).default;
 const { recordSystemPrompt, recordMessages, resetAdvisorStateForTests } =
@@ -56,6 +68,7 @@ beforeEach(() => {
   responseText = "Use a channel-based worker pool; drain on shutdown.";
   sendMessageError = null;
   providerResolves = true;
+  providerSupportsWeb = false;
   streamDeltas = [];
   resetAdvisorStateForTests();
 });
@@ -108,6 +121,37 @@ describe("consultAdvisor", () => {
     expect(options.systemPrompt).toContain("You are a coding agent.");
   });
+  test("stays tool-less when the provider has no native web search", async () => {
+    providerSupportsWeb = false;
+    await consultAdvisor({ systemPrompt: null, messages: [userMsg("hi")] });
+    const options = sendMessageArgs?.options as { tools?: unknown };
+    expect(options.tools).toBeUndefined();
+    expect(optionConfig().tool_choice).toEqual({ type: "none" });
+  });
+  test("enables native web search when the provider supports it", async () => {
+    providerSupportsWeb = true;
+    await consultAdvisor({ systemPrompt: null, messages: [userMsg("hi")] });
+    const options = sendMessageArgs?.options as {
+      tools?: Array<{ name: string }>;
+    };
+    expect(options.tools?.map((t) => t.name)).toEqual(["web_search"]);
+    // tool_choice must not be `none`, or the provider suppresses its server tool.
+    expect(optionConfig().tool_choice).toEqual({ type: "auto" });
+  });
+  test("embeds the runtime context in the advisor system prompt", async () => {
+    await consultAdvisor({
+      systemPrompt: "You are a coding agent.",
+      messages: [userMsg("hi")],
+      runtimeContext: "## Available tools\n- bash — run commands",
+    });
+    const options = sendMessageArgs?.options as { systemPrompt: string };
+    expect(options.systemPrompt).toContain("<agent_runtime_context>");
+    expect(options.systemPrompt).toContain("- bash — run commands");
+  });
   test("soft-fails when no provider is configured", async () => {
     providerResolves = false;
     const advice = await consultAdvisor({

package/src/plugins/defaults/advisor/__tests__/context-pack-gating.test.ts ADDED Viewed

@@ -0,0 +1,106 @@
+/**
+ * Personal-memory gating for the advisor context pack: NOW.md and PKB must only
+ * reach the advisor when the turn's trust admits personal memory (and, for
+ * NOW.md, when the scratchpad-injection toggle is on) — the same policy the
+ * runtime memory injectors apply. Without it, a low-risk advisor consult on a
+ * remote/trusted-contact turn could forward private content the main agent
+ * would never receive.
+ *
+ * Mocks are isolated to this file (the test runner runs each file in its own
+ * process), so the broad module stubs here don't leak into other suites.
+ */
+import { beforeEach, describe, expect, mock, test } from "bun:test";
+let personalAllowed = false;
+let scratchpadEnabled = true;
+let gateArg: unknown = null;
+mock.module("../../../../daemon/trust-context.js", () => ({
+  isPersonalMemoryAllowed: (trust: unknown) => {
+    gateArg = trust;
+    return personalAllowed;
+  },
+}));
+mock.module("../../../../daemon/now-scratchpad.js", () => ({
+  readNowScratchpad: () => "NOW-CONTENT",
+}));
+mock.module("../../../../memory/pkb/context.js", () => ({
+  readPkbContext: () => "PKB-CONTENT",
+}));
+mock.module("../../../../config/loader.js", () => ({
+  getConfig: () => ({
+    memory: {
+      retrieval: { scratchpadInjection: { enabled: scratchpadEnabled } },
+    },
+    llm: {},
+  }),
+}));
+// Keep every other section empty so the assertions isolate NOW.md / PKB.
+mock.module("../../../../daemon/conversation-workspace.js", () => ({
+  resolveWorkspaceTopLevelContext: () => null,
+}));
+mock.module("../../../../daemon/conversation-runtime-assembly.js", () => ({
+  buildActiveDocuments: () => null,
+}));
+mock.module("../../../../runtime/capabilities.js", () => ({
+  resolveCapabilities: () => ({ canAccessMemory: false }),
+}));
+mock.module("../../../../config/skills.js", () => ({
+  loadSkillCatalog: () => [],
+}));
+const { buildAdvisorContext } = await import("../context-pack.js");
+const sources = {
+  conversationId: "c1",
+  workingDir: "/tmp",
+  // A remote, non-guardian per-turn snapshot — the case the live-state read
+  // could have wrongly elevated.
+  trustClass: "unknown" as const,
+  sourceChannel: "telegram",
+  transcript: [],
+  allowedToolNames: new Set<string>(),
+};
+beforeEach(() => {
+  personalAllowed = false;
+  scratchpadEnabled = true;
+  gateArg = null;
+});
+describe("advisor context pack — personal-memory gating", () => {
+  test("withholds NOW.md and PKB when personal memory is disallowed", async () => {
+    personalAllowed = false;
+    const ctx = (await buildAdvisorContext(sources)) ?? "";
+    expect(ctx).not.toContain("NOW-CONTENT");
+    expect(ctx).not.toContain("PKB-CONTENT");
+  });
+  test("includes NOW.md and PKB when allowed and the scratchpad toggle is on", async () => {
+    personalAllowed = true;
+    scratchpadEnabled = true;
+    const ctx = await buildAdvisorContext(sources);
+    expect(ctx).toContain("NOW-CONTENT");
+    expect(ctx).toContain("PKB-CONTENT");
+  });
+  test("withholds NOW.md when the scratchpad toggle is off, PKB still allowed", async () => {
+    personalAllowed = true;
+    scratchpadEnabled = false;
+    const ctx = (await buildAdvisorContext(sources)) ?? "";
+    expect(ctx).not.toContain("NOW-CONTENT");
+    expect(ctx).toContain("PKB-CONTENT");
+  });
+  test("feeds the gate the per-turn trust snapshot, not live conversation state", async () => {
+    personalAllowed = true;
+    await buildAdvisorContext(sources);
+    // The gate must see exactly the snapshot threaded from ToolContext —
+    // trustClass + executionChannel — so a concurrent live-trust change can't
+    // elevate this invocation.
+    expect(gateArg).toEqual({
+      sourceChannel: "telegram",
+      trustClass: "unknown",
+    });
+  });
+});

package/src/plugins/defaults/advisor/__tests__/context-pack.test.ts ADDED Viewed

@@ -0,0 +1,60 @@
+import { describe, expect, test } from "bun:test";
+import type { Message } from "../../../../providers/types.js";
+import { buildAdvisorContext, deriveRecallQuery } from "../context-pack.js";
+const userMsg = (t: string): Message => ({
+  role: "user",
+  content: [{ type: "text", text: t }],
+});
+describe("deriveRecallQuery", () => {
+  test("returns the most recent user message text", () => {
+    const query = deriveRecallQuery([
+      userMsg("the original task"),
+      { role: "assistant", content: [{ type: "text", text: "ok" }] },
+      userMsg("the latest question"),
+    ]);
+    expect(query).toBe("the latest question");
+  });
+  test("returns null when there is no user text", () => {
+    expect(
+      deriveRecallQuery([
+        { role: "assistant", content: [{ type: "text", text: "hi" }] },
+      ]),
+    ).toBeNull();
+    expect(deriveRecallQuery([])).toBeNull();
+  });
+});
+describe("buildAdvisorContext", () => {
+  test("lists the agent's available tools, skipping the advisor itself", async () => {
+    const context = await buildAdvisorContext({
+      conversationId: "ctx-1",
+      workingDir: "/tmp/does-not-exist",
+      allowedToolNames: new Set(["bash", "advisor", "read_file"]),
+      trustClass: "unknown",
+      transcript: [userMsg("hi")],
+    });
+    expect(context).toContain("## Available tools");
+    expect(context).toContain("- bash");
+    expect(context).toContain("- read_file");
+    // The advisor advises; it never tells the agent to consult itself.
+    expect(context).not.toContain("- advisor");
+  });
+  test("omits the tools section when no tools are available", async () => {
+    const context = await buildAdvisorContext({
+      conversationId: "ctx-2",
+      workingDir: "/tmp/does-not-exist",
+      allowedToolNames: new Set(),
+      trustClass: "unknown",
+      transcript: [],
+    });
+    // Other sources (e.g. the skills catalog) may still contribute, but with no
+    // allowed tools the tools section must not appear.
+    if (context !== null) expect(context).not.toContain("## Available tools");
+  });
+});