npm - @vellumai/assistant - Versions diffs - 0.10.1-staging.2 → 0.10.1-staging.4 - Mend

@vellumai/assistant 0.10.1-staging.2 → 0.10.1-staging.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/package.json +1 -1
package/src/__tests__/anthropic-provider.test.ts +67 -0
package/src/__tests__/card-surface-data.test.ts +60 -0
package/src/__tests__/conversation-surfaces-activation-emit.test.ts +3 -3
package/src/__tests__/conversation-surfaces-task-progress.test.ts +352 -0
package/src/__tests__/dynamic-page-surface.test.ts +0 -94
package/src/__tests__/llm-resolver.test.ts +205 -5
package/src/api/events/ui-surface-show.ts +8 -3
package/src/api/index.ts +1 -0
package/src/api/responses/conversation-message.ts +4 -0
package/src/api/surfaces.ts +33 -0
package/src/config/llm-resolver.ts +151 -14
package/src/daemon/conversation-surfaces.ts +273 -18
package/src/daemon/message-types/surfaces.ts +11 -20
package/src/memory/embedding-gemini.ts +1 -1
package/src/providers/anthropic/client.ts +31 -0
package/src/tools/ui-surface/definitions.ts +0 -43

package/src/__tests__/llm-resolver.test.ts CHANGED Viewed

@@ -509,10 +509,13 @@ describe("resolveCallSiteConfig", () => {
     expect(resolved.model).toBe("claude-opus-4-7");
   });
-  test("thinking and contextWindow deep-merge across all five layers for non-main call sites", () => {
+  test("thinking and contextWindow deep-merge across the contributing layers for non-main call sites", () => {
     // Each layer touches a different leaf inside `thinking` and
     // `contextWindow.overflowRecovery` so we can verify deep merge composes
     // every contribution rather than wholesale-replacing the nested objects.
+    // The call site pins `siteProfile`, so the active profile is excluded — its
+    // leaves fall through to default while override, site profile, and the
+    // call-site fragment still compose.
     const llm = LLMSchema.parse({
       default: fullDefault,
       profiles: {
@@ -539,13 +542,15 @@ describe("resolveCallSiteConfig", () => {
     const resolved = resolveCallSiteConfig("memoryExtraction", llm, {
       overrideProfile: "override",
     });
-    // Each layer's leaf survives because no higher layer touches it.
-    expect(resolved.thinking.enabled).toBe(false); // active
+    // Override, site profile, and the call-site fragment each contribute a leaf.
     expect(resolved.thinking.streamThinking).toBe(false); // override
-    expect(resolved.contextWindow.overflowRecovery.maxAttempts).toBe(7); // active
     expect(resolved.contextWindow.overflowRecovery.safetyMarginRatio).toBe(0.1); // override
     expect(resolved.contextWindow.targetBudgetRatio).toBe(0.5); // siteProfile
     expect(resolved.contextWindow.compactThreshold).toBe(0.9); // callsite
+    // The active profile is excluded (the call site pins its own profile), so
+    // its leaves fall through to default instead of contributing.
+    expect(resolved.thinking.enabled).toBe(true); // default, NOT active's false
+    expect(resolved.contextWindow.overflowRecovery.maxAttempts).toBe(3); // default, NOT active's 7
     // Untouched leaves at depth 2 fall through to default.
     expect(resolved.contextWindow.overflowRecovery.enabled).toBe(true);
     expect(
@@ -582,7 +587,9 @@ describe("resolveCallSiteConfig", () => {
     // Lower layers contribute fields the site fragment does not touch.
     expect(resolved.verbosity).toBe("high"); // from siteProfile
     expect(resolved.speed).toBe("fast"); // from override
-    expect(resolved.effort).toBe("low"); // from active
+    // The active profile is excluded when the call site pins its own profile,
+    // so `effort` falls through to default rather than active's "low".
+    expect(resolved.effort).toBe("max"); // default, NOT active's "low"
   });
   test("mainAgent activeProfile overrides static call-site defaults", () => {
@@ -1435,6 +1442,199 @@ describe("resolveCallSiteConfig logitBias provenance", () => {
   });
 });
+describe("resolveCallSiteConfig sampling-param provenance (temperature / top_p)", () => {
+  // Mirrors production: the active `balanced` profile carries `topP: 0.95` (a
+  // MiniMax tuning), while background call sites resolve to the Anthropic
+  // `cost-optimized` profile. A field-by-field deep-merge would leak the active
+  // profile's `top_p` onto those Anthropic requests.
+  const balancedActive = LLMSchema.parse({
+    default: fullDefault,
+    profiles: {
+      balanced: {
+        provider: "together",
+        model: "MiniMaxAI/MiniMax-M3",
+        topP: 0.95,
+      },
+      "cost-optimized": {
+        provider: "anthropic",
+        model: "claude-haiku-4-5-20251001",
+        effort: "low",
+        thinking: { enabled: false },
+      },
+    },
+    activeProfile: "balanced",
+  });
+  test("active profile's top_p does not leak into a profile-pinned call site (Option 1 + 2)", () => {
+    const llm = LLMSchema.parse({
+      default: fullDefault,
+      profiles: {
+        balanced: {
+          provider: "together",
+          model: "MiniMaxAI/MiniMax-M3",
+          topP: 0.95,
+        },
+        "cost-optimized": {
+          provider: "anthropic",
+          model: "claude-haiku-4-5-20251001",
+        },
+      },
+      activeProfile: "balanced",
+      callSites: { memoryExtraction: { profile: "cost-optimized" } },
+    });
+    const resolved = resolveCallSiteConfig("memoryExtraction", llm);
+    expect(resolved.provider).toBe("anthropic");
+    expect(resolved.model).toBe("claude-haiku-4-5-20251001");
+    // balanced (active) is shadowed by the pinned cost-optimized profile, so
+    // its top_p must not ride along onto the Anthropic request.
+    expect(resolved.topP).toBeNull();
+  });
+  test("homeGreeting / commitMessage resolve to a temperature with NO top_p", () => {
+    const greeting = resolveCallSiteConfig("homeGreeting", balancedActive);
+    expect(greeting.model).toBe("claude-haiku-4-5-20251001");
+    // Per-call-site temperature from CALL_SITE_DEFAULTS survives.
+    expect(greeting.temperature).toBe(0.7);
+    // The active profile's top_p does NOT — both together would trip
+    // Anthropic's "temperature and top_p cannot both be specified".
+    expect(greeting.topP).toBeNull();
+    const commit = resolveCallSiteConfig("commitMessage", balancedActive);
+    expect(commit.temperature).toBe(0.2);
+    expect(commit.topP).toBeNull();
+  });
+  test("profile-less call site still inherits the active profile's provider AND sampling", () => {
+    // `workflowLeaf` pins no profile, so the active profile is the legitimate
+    // fallback (Option 1 keeps it): it supplies provider/model and its own
+    // (coherent, same-provider) sampling.
+    const resolved = resolveCallSiteConfig("workflowLeaf", balancedActive);
+    expect(resolved.provider).toBe("together");
+    expect(resolved.model).toBe("MiniMaxAI/MiniMax-M3");
+    expect(resolved.topP).toBe(0.95);
+  });
+  test("mainAgent keeps the active profile's top_p (balanced wins there)", () => {
+    const resolved = resolveCallSiteConfig("mainAgent", balancedActive);
+    expect(resolved.model).toBe("MiniMaxAI/MiniMax-M3");
+    expect(resolved.topP).toBe(0.95);
+  });
+  test("an explicit call-site temperature override still wins over the winning profile", () => {
+    const llm = LLMSchema.parse({
+      default: fullDefault,
+      profiles: { nucleus: { topP: 0.9, temperature: 0.1 } },
+      callSites: { memoryExtraction: { profile: "nucleus", temperature: 0.5 } },
+    });
+    const resolved = resolveCallSiteConfig("memoryExtraction", llm);
+    // Call-site override wins for the field it sets.
+    expect(resolved.temperature).toBe(0.5);
+    // The winning profile's top_p (no call-site override) still applies.
+    expect(resolved.topP).toBe(0.9);
+  });
+  test("a higher-precedence profile that omits top_p clears a lower profile's top_p (Option 2)", () => {
+    // No site profile is involved here, so the active profile IS folded in —
+    // this isolates Option 2: the override profile wins and omits top_p, so
+    // balanced's 0.95 must be cleared rather than surviving the merge.
+    const llm = LLMSchema.parse({
+      default: fullDefault,
+      profiles: {
+        balanced: {
+          provider: "together",
+          model: "MiniMaxAI/MiniMax-M3",
+          topP: 0.95,
+        },
+        plain: { provider: "anthropic", model: "claude-opus-4-7" },
+      },
+      activeProfile: "balanced",
+    });
+    const resolved = resolveCallSiteConfig("mainAgent", llm, {
+      overrideProfile: "plain",
+    });
+    expect(resolved.model).toBe("claude-opus-4-7");
+    expect(resolved.topP).toBeNull();
+  });
+  test("forceOverrideProfile: an explicit call-site temperature survives a forced profile silent on sampling", () => {
+    const llm = LLMSchema.parse({
+      default: fullDefault,
+      profiles: {
+        active: { verbosity: "low" },
+        sitep: { provider: "anthropic", model: "claude-haiku-4-5-20251001" },
+        forced: { model: "claude-opus-4-7", effort: "high" },
+      },
+      callSites: {
+        memoryExtraction: {
+          profile: "sitep",
+          temperature: 0.7,
+          maxTokens: 1000,
+        },
+      },
+      activeProfile: "active",
+    });
+    const resolved = resolveCallSiteConfig("memoryExtraction", llm, {
+      overrideProfile: "forced",
+      forceOverrideProfile: true,
+    });
+    // The forced profile floats to the top for fields it sets.
+    expect(resolved.model).toBe("claude-opus-4-7");
+    expect(resolved.effort).toBe("high");
+    // It is silent on temperature, so the deliberate call-site value survives —
+    // consistent with sibling call-site fields like maxTokens (which flow
+    // through the deep-merge).
+    expect(resolved.temperature).toBe(0.7);
+    expect(resolved.maxTokens).toBe(1000);
+  });
+  test("forceOverrideProfile: a forced profile that sets temperature wins over the call-site override", () => {
+    const llm = LLMSchema.parse({
+      default: fullDefault,
+      profiles: {
+        sitep: { provider: "anthropic", model: "claude-haiku-4-5-20251001" },
+        forced: { model: "claude-opus-4-7", temperature: 0.1 },
+      },
+      callSites: {
+        memoryExtraction: { profile: "sitep", temperature: 0.7 },
+      },
+    });
+    const resolved = resolveCallSiteConfig("memoryExtraction", llm, {
+      overrideProfile: "forced",
+      forceOverrideProfile: true,
+    });
+    // The forced profile explicitly sets temperature, so it floats above the
+    // call-site override.
+    expect(resolved.temperature).toBe(0.1);
+  });
+  test("mainAgent: an explicit call-site temperature survives an active profile silent on sampling", () => {
+    const llm = LLMSchema.parse({
+      default: fullDefault,
+      profiles: { active: { model: "claude-sonnet-4-7" } },
+      callSites: { mainAgent: { temperature: 0.5 } },
+      activeProfile: "active",
+    });
+    const resolved = resolveCallSiteConfig("mainAgent", llm);
+    // The active profile floats above the call-site for mainAgent but is silent
+    // on temperature, so the deliberate call-site value survives.
+    expect(resolved.model).toBe("claude-sonnet-4-7");
+    expect(resolved.temperature).toBe(0.5);
+  });
+  test("mainAgent: the active profile's explicit temperature wins over a call-site temperature", () => {
+    const llm = LLMSchema.parse({
+      default: fullDefault,
+      profiles: { active: { model: "claude-sonnet-4-7", temperature: 0.2 } },
+      callSites: { mainAgent: { temperature: 0.5 } },
+      activeProfile: "active",
+    });
+    const resolved = resolveCallSiteConfig("mainAgent", llm);
+    // For mainAgent the active profile floats above the call-site override, so
+    // its explicit temperature wins.
+    expect(resolved.temperature).toBe(0.2);
+  });
+});
 describe("resolveCallSiteConfig — workflowLeaf default", () => {
   test("inherits the workspace default config rather than pinning cost-optimized", () => {
     const llm = LLMSchema.parse({

package/src/api/events/ui-surface-show.ts CHANGED Viewed

@@ -5,9 +5,14 @@
  * form, list, table, confirmation, dynamic_page, file_upload,
  * document_preview, task_preferences) inside the chat view. The
  * concrete `data` shape depends on `surfaceType` and is owned by the
- * surface-data subsystem in `daemon/message-types/surfaces.ts`; the
- * canonical schema treats `data` as opaque on the wire so this file
- * doesn't have to mirror eight nested-payload schemas.
+ * surface-data subsystem in `daemon/message-types/surfaces.ts`
+ * (`CardSurfaceDataSchema` et al.). `data` is intentionally opaque on the
+ * wire — not for brevity, but because (1) this event is a member of the
+ * `type`-discriminated `AssistantEventSchema`, and (2) the stream parser
+ * drops any event that fails validation, so a strict per-`surfaceType`
+ * payload schema would silently vanish renderable-but-messy LLM surfaces.
+ * Consumers narrow `data` by parsing it with the canonical per-type schema
+ * (all-optional, so it never rejects a real surface) at their boundary.
  *
  * Lifecycle: a surface progresses `show` → (zero or more `update`s) →
  * (`dismiss` for cancellation OR `complete` with a `summary` /

package/src/api/index.ts CHANGED Viewed

@@ -471,6 +471,7 @@ export {
   type WorkflowLeaf,
   WorkflowLeafSchema,
 } from "./responses/workflow-journal.js";
+export { type CardSurfaceData, CardSurfaceDataSchema } from "./surfaces.js";
 /**
  * Canonical SSE event schema for the assistant runtime.

package/src/api/responses/conversation-message.ts CHANGED Viewed

@@ -225,6 +225,10 @@ export type ConversationMessageToolCall = z.infer<
 // Surface
 // ---------------------------------------------------------------------------
+// Intentionally more permissive than the canonical SurfaceActionSchema in
+// api/events/ui-surface-show.ts: the write-path schema uses z.enum for style
+// so new surfaces only emit known values; this read-path schema uses z.string
+// so historical surfaces with non-standard style values still parse.
 const SurfaceActionSchema = z.object({
   id: z.string(),
   label: z.string(),

package/src/api/surfaces.ts ADDED Viewed

@@ -0,0 +1,33 @@
+/**
+ * Canonical surface-data wire payloads.
+ *
+ * The `ui_surface_*` events and the conversation-message response all carry a
+ * surface `data` object whose shape depends on `surfaceType`. The wire keeps
+ * `data` opaque (`z.record`) — see `events/ui-surface-show.ts` for why — so
+ * consumers narrow it by parsing with the canonical per-type schema here. The
+ * schemas are deliberately tolerant (every field optional, Zod strip mode): a
+ * parse miss makes a renderable surface silently vanish, so they must never
+ * reject a real payload. The schema also defines what the daemon's `ui_show`
+ * normalizer *supports* — anything the model sends outside these fields is
+ * dropped (and logged) there, which is how we learn the shapes to recover.
+ *
+ * Card is the first surface type migrated to a canonical schema; the remaining
+ * types still live as hand-written interfaces in
+ * `daemon/message-types/surfaces.ts` pending migration.
+ */
+import { z } from "zod";
+export const CardSurfaceDataSchema = z.object({
+  title: z.string().optional(),
+  subtitle: z.string().optional(),
+  body: z.string().optional(),
+  metadata: z
+    .array(z.object({ label: z.coerce.string(), value: z.coerce.string() }))
+    .optional(),
+  /** Optional template name for specialized rendering (e.g. "weather_forecast"). */
+  template: z.string().optional(),
+  /** Arbitrary data consumed by the template renderer. Shape depends on template. */
+  templateData: z.record(z.string(), z.unknown()).optional(),
+});
+export type CardSurfaceData = z.infer<typeof CardSurfaceDataSchema>;

package/src/config/llm-resolver.ts CHANGED Viewed

@@ -17,7 +17,13 @@ import {
  * Merge layers (low → high precedence; later layers override earlier) for
  * non-main-agent call sites:
  *   1. `llm.default` fields (required base)
- *   2. `llm.profiles[llm.activeProfile]` (workspace-wide active profile)
+ *   2. `llm.profiles[llm.activeProfile]` (workspace-wide active profile) —
+ *      folded in ONLY when the call site resolves no profile of its own (a
+ *      profile-less leaf like `vision`/`workflowLeaf`, or a BYOK install whose
+ *      pinned managed profile was stripped). When the call site resolves a
+ *      profile, that profile is the authoritative provider config and the
+ *      active profile does not contribute — otherwise a deep-merge would let
+ *      its orphan fields bleed onto a different provider.
  *   3. `llm.profiles[opts.overrideProfile]` (per-call ad-hoc override)
  *   4. `llm.profiles[site.profile]` fields (call-site's named profile)
  *   5. `llm.callSites[callSite]` fields (call-site override)
@@ -50,6 +56,15 @@ import {
  * any nesting level merge into — rather than replace — the corresponding
  * base value.
  *
+ * `temperature` and `top_p` are provider-coupled, so they do NOT deep-merge
+ * field-by-field with the rest of the config: only the winning profile (the
+ * highest-precedence profile that determines provider/model) contributes them,
+ * and an explicit `llm.callSites[callSite]` override still wins. A lower-
+ * precedence profile whose model is shadowed never leaks its sampling onto a
+ * different provider (which would trip e.g. Anthropic's "temperature and top_p
+ * cannot both be specified" constraint). `logitBias` is winning-profile-scoped
+ * the same way.
+ *
  * `activeProfile` and `overrideProfile` are resolved by name lookup against
  * `llm.profiles`. Missing references silently fall through (no throw) so the
  * resolver stays pure; schema validation in `LLMSchema.superRefine` catches
@@ -108,6 +123,19 @@ export function resolveCallSiteConfig(
   // call-site default selected by `effectiveDefault`.
   const biasRef: LogitBiasRef = { preset: undefined };
+  // Effective sampling params, tracked outside the deep-merge for the same
+  // reason as `logitBias`: `temperature`/`top_p` are provider-coupled, so only
+  // the winning profile may contribute them. A profile clears what a lower
+  // PROFILE set where it is silent (so a shadowed profile's sampling can't
+  // leak), while an explicit call-site override is sticky and survives a later
+  // silent profile (see `applyProfileSampling` / `appendCallSiteLayers`).
+  const samplingRef: SamplingRef = {
+    temperature: undefined,
+    topP: undefined,
+    temperatureFromCallSite: false,
+    topPFromCallSite: false,
+  };
   const activeFragment = resolveProfileFragment(llm.activeProfile, llm, opts);
   const overrideFragment = resolveProfileFragment(
     opts.overrideProfile,
@@ -119,22 +147,55 @@ export function resolveCallSiteConfig(
     effectiveDefault(callSite, llm, opts.overrideProfile != null);
   if (callSite === "mainAgent") {
-    appendCallSiteLayers(layers, callSite, llm, site, opts, biasRef);
-    appendProfileLayer(layers, activeFragment, biasRef);
-    appendProfileLayer(layers, overrideFragment, biasRef);
+    appendCallSiteLayers(
+      layers,
+      callSite,
+      llm,
+      site,
+      opts,
+      biasRef,
+      samplingRef,
+    );
+    appendProfileLayer(layers, activeFragment, biasRef, samplingRef);
+    appendProfileLayer(layers, overrideFragment, biasRef, samplingRef);
   } else if (opts.forceOverrideProfile === true && overrideFragment != null) {
     // Escape hatch: float the override profile above the call-site layers,
     // mirroring mainAgent's treatment of the user's chat-model selection.
     // Guarded on a resolved fragment so a missing profile reference degrades
     // to the normal precedence below instead of silently dropping the
-    // call-site layers' standing.
-    appendProfileLayer(layers, activeFragment, biasRef);
-    appendCallSiteLayers(layers, callSite, llm, site, opts, biasRef);
-    appendProfileLayer(layers, overrideFragment, biasRef);
+    // call-site layers' standing. The active profile stays the bottom fallback
+    // (its sampling can't leak — a higher profile's REPLACE clears it).
+    appendProfileLayer(layers, activeFragment, biasRef, samplingRef);
+    appendCallSiteLayers(
+      layers,
+      callSite,
+      llm,
+      site,
+      opts,
+      biasRef,
+      samplingRef,
+    );
+    appendProfileLayer(layers, overrideFragment, biasRef, samplingRef);
   } else {
-    appendProfileLayer(layers, activeFragment, biasRef);
-    appendProfileLayer(layers, overrideFragment, biasRef);
-    appendCallSiteLayers(layers, callSite, llm, site, opts, biasRef);
+    // The active profile is a low-precedence FALLBACK for call sites that
+    // resolve no profile of their own — profile-less leaves (`vision`,
+    // `workflowLeaf`) and BYOK installs where the pinned managed profile was
+    // stripped. When the call site DOES resolve its own profile, that profile
+    // is the authoritative provider config, so the active profile must not
+    // contribute its orphan fields to a different provider.
+    if (site?.profile == null) {
+      appendProfileLayer(layers, activeFragment, biasRef, samplingRef);
+    }
+    appendProfileLayer(layers, overrideFragment, biasRef, samplingRef);
+    appendCallSiteLayers(
+      layers,
+      callSite,
+      llm,
+      site,
+      opts,
+      biasRef,
+      samplingRef,
+    );
   }
   const resolved = finalize(
@@ -149,11 +210,34 @@ export function resolveCallSiteConfig(
   } else {
     delete (resolved as { logitBias?: unknown }).logitBias;
   }
+  // `temperature`/`top_p` are winning-profile-scoped like `logitBias`, but an
+  // explicit call-site override may also set them. Apply the tracked value,
+  // overriding whatever a shadowed profile may have left in the merge. An
+  // `undefined` ref means no profile or override opted in, so the `llm.default`
+  // base already in `resolved` stands.
+  if (samplingRef.temperature !== undefined) {
+    resolved.temperature = samplingRef.temperature;
+  }
+  if (samplingRef.topP !== undefined) {
+    resolved.topP = samplingRef.topP;
+  }
   return resolved;
 }
 type LogitBiasRef = { preset: ProfileEntry["logitBias"] };
+type SamplingRef = {
+  temperature: ProfileEntry["temperature"];
+  topP: ProfileEntry["topP"];
+  // Provenance of the current pair: `true` when a field came from an explicit
+  // call-site override (deliberate, sticky), `false` when it came from a profile
+  // (clearable by a higher-precedence profile that determines the model). Lets a
+  // silent higher profile clear a lower profile's sampling without discarding a
+  // deliberate call-site override.
+  temperatureFromCallSite: boolean;
+  topPFromCallSite: boolean;
+};
 // ---------------------------------------------------------------------------
 // Internal helpers
 // ---------------------------------------------------------------------------
@@ -311,13 +395,41 @@ function withImpliedProviderForKnownModel(source: Mergeable): Mergeable {
   };
 }
+/**
+ * Fold a profile's sampling into `samplingRef`. A profile determines
+ * provider/model, so its pair supersedes any LOWER PROFILE's: set each field the
+ * profile specifies, and clear a lower profile's value where the profile is
+ * silent. A deliberate call-site override is NOT a profile and outranks a silent
+ * profile — it survives until a profile EXPLICITLY sets the field. (The mirror
+ * COALESCE for call-site overrides lives in `appendCallSiteLayers`.)
+ */
+function applyProfileSampling(
+  samplingRef: SamplingRef,
+  profile: ProfileEntry,
+): void {
+  if (profile.temperature !== undefined) {
+    samplingRef.temperature = profile.temperature;
+    samplingRef.temperatureFromCallSite = false;
+  } else if (!samplingRef.temperatureFromCallSite) {
+    samplingRef.temperature = undefined;
+  }
+  if (profile.topP !== undefined) {
+    samplingRef.topP = profile.topP;
+    samplingRef.topPFromCallSite = false;
+  } else if (!samplingRef.topPFromCallSite) {
+    samplingRef.topP = undefined;
+  }
+}
 function appendProfileLayer(
   layers: Mergeable[],
   profile: ProfileEntry | undefined,
   biasRef: LogitBiasRef,
+  samplingRef: SamplingRef,
 ): void {
   if (profile != null) {
     biasRef.preset = profile.logitBias;
+    applyProfileSampling(samplingRef, profile);
     layers.push(profileConfigFragment(profile));
   }
 }
@@ -329,6 +441,7 @@ function appendCallSiteLayers(
   site: z.infer<typeof LLMSchema>["callSites"][LLMCallSite] | undefined,
   opts: ResolveCallSiteOpts,
   biasRef: LogitBiasRef,
+  samplingRef: SamplingRef,
 ): void {
   if (site != null) {
     if (site.profile != null) {
@@ -343,11 +456,29 @@ function appendCallSiteLayers(
         );
       }
       biasRef.preset = profileFragment.logitBias;
+      applyProfileSampling(samplingRef, profileFragment);
       layers.push(profileConfigFragment(profileFragment));
     }
-    // Strip the `profile` discriminator before merging — it isn't a
-    // `LLMConfigBase` field.
-    const { profile: _profile, ...siteFragment } = site;
+    // Strip the `profile` discriminator (not a `LLMConfigBase` field) and the
+    // sampling params before merging. An explicit call-site `temperature` /
+    // `topP` is a deliberate per-site choice, so it COALESCES over the winning
+    // profile's pair (only overriding the fields it sets) and is marked sticky
+    // so a later silent profile can't clear it — routed through `samplingRef` so
+    // it never inherits a shadowed profile's value via merge.
+    const {
+      profile: _profile,
+      temperature: siteTemperature,
+      topP: siteTopP,
+      ...siteFragment
+    } = site;
+    if (siteTemperature !== undefined) {
+      samplingRef.temperature = siteTemperature;
+      samplingRef.temperatureFromCallSite = true;
+    }
+    if (siteTopP !== undefined) {
+      samplingRef.topP = siteTopP;
+      samplingRef.topPFromCallSite = true;
+    }
     layers.push(siteFragment as Mergeable);
   }
 }
@@ -369,6 +500,12 @@ function profileConfigFragment(profile: ProfileEntry): Mergeable {
     // Per-profile advisor toggle is profile identity, not inheritable model
     // config — strip it so it can't leak into the merged `LLMConfigBase`.
     advisorEnabled: _advisorEnabled,
+    // `temperature`/`top_p` are provider-coupled: only the winning profile
+    // contributes them (tracked via `samplingRef`, applied post-merge), so a
+    // shadowed profile's sampling can never reach a different provider through
+    // the deep-merge. Strip here so no profile's sampling enters the merge.
+    temperature: _temperature,
+    topP: _topP,
     ...config
   } = profile;
   return config as Mergeable;