npm - @vellumai/assistant - Versions diffs - 0.10.1-staging.2 → 0.10.1-staging.3 - Mend

@vellumai/assistant 0.10.1-staging.2 → 0.10.1-staging.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +1 -1
package/src/__tests__/anthropic-provider.test.ts +67 -0
package/src/__tests__/llm-resolver.test.ts +205 -5
package/src/config/llm-resolver.ts +151 -14
package/src/memory/embedding-gemini.ts +1 -1
package/src/providers/anthropic/client.ts +31 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@vellumai/assistant",
-  "version": "0.10.1-staging.2",
+  "version": "0.10.1-staging.3",
   "license": "MIT",
   "type": "module",
   "exports": {

package/src/__tests__/anthropic-provider.test.ts CHANGED Viewed

@@ -3191,3 +3191,70 @@ describe("AnthropicProvider — thinking block send-time filtering", () => {
     expect(signatures).toContain("sig-step2");
   });
 });
+describe("AnthropicProvider — deprecated sampling params (temperature / top_p / top_k)", () => {
+  beforeEach(() => {
+    lastStreamParams = null;
+  });
+  // opus-4-7 / opus-4-8 (and, conservatively, fable) reject `temperature`,
+  // `top_p`, and `top_k` with a 400; the provider must strip all three.
+  for (const model of [
+    "claude-opus-4-8",
+    "claude-opus-4-7",
+    "claude-fable-5",
+  ]) {
+    test(`strips temperature, top_p, and top_k for ${model}`, async () => {
+      const provider = new AnthropicProvider("sk-ant-test", model);
+      await provider.sendMessage([userMsg("Hi")], {
+        systemPrompt: "You are helpful.",
+        config: { temperature: 0, top_p: 0.95, top_k: 40 },
+      });
+      expect(lastStreamParams!).not.toHaveProperty("temperature");
+      expect(lastStreamParams!).not.toHaveProperty("top_p");
+      expect(lastStreamParams!).not.toHaveProperty("top_k");
+    });
+  }
+  // opus-4-6 / sonnet-4-6 still accept the params — they must pass through,
+  // including `temperature: 0` (a value check, not truthiness).
+  test("forwards temperature (including 0), top_p, and top_k for opus-4-6", async () => {
+    const provider = new AnthropicProvider("sk-ant-test", "claude-opus-4-6");
+    await provider.sendMessage([userMsg("Hi")], {
+      systemPrompt: "You are helpful.",
+      config: { temperature: 0, top_p: 0.95, top_k: 40 },
+    });
+    expect(lastStreamParams!.temperature).toBe(0);
+    expect(lastStreamParams!.top_p).toBe(0.95);
+    expect(lastStreamParams!.top_k).toBe(40);
+  });
+  test("forwards temperature, top_p, and top_k for sonnet-4-6", async () => {
+    const provider = new AnthropicProvider("sk-ant-test", "claude-sonnet-4-6");
+    await provider.sendMessage([userMsg("Hi")], {
+      systemPrompt: "You are helpful.",
+      config: { temperature: 0.7, top_p: 0.9, top_k: 20 },
+    });
+    expect(lastStreamParams!.temperature).toBe(0.7);
+    expect(lastStreamParams!.top_p).toBe(0.9);
+    expect(lastStreamParams!.top_k).toBe(20);
+  });
+  // A per-call model override targeting a deprecating model must win over the
+  // provider's default (accepting) model.
+  test("strips params when a per-call model override deprecates them", async () => {
+    const provider = new AnthropicProvider("sk-ant-test", "claude-sonnet-4-6");
+    await provider.sendMessage([userMsg("Hi")], {
+      systemPrompt: "You are helpful.",
+      config: {
+        temperature: 0,
+        top_p: 0.95,
+        top_k: 40,
+        model: "claude-opus-4-8",
+      },
+    });
+    expect(lastStreamParams!).not.toHaveProperty("temperature");
+    expect(lastStreamParams!).not.toHaveProperty("top_p");
+    expect(lastStreamParams!).not.toHaveProperty("top_k");
+  });
+});

package/src/__tests__/llm-resolver.test.ts CHANGED Viewed

@@ -509,10 +509,13 @@ describe("resolveCallSiteConfig", () => {
     expect(resolved.model).toBe("claude-opus-4-7");
   });
-  test("thinking and contextWindow deep-merge across all five layers for non-main call sites", () => {
+  test("thinking and contextWindow deep-merge across the contributing layers for non-main call sites", () => {
     // Each layer touches a different leaf inside `thinking` and
     // `contextWindow.overflowRecovery` so we can verify deep merge composes
     // every contribution rather than wholesale-replacing the nested objects.
+    // The call site pins `siteProfile`, so the active profile is excluded — its
+    // leaves fall through to default while override, site profile, and the
+    // call-site fragment still compose.
     const llm = LLMSchema.parse({
       default: fullDefault,
       profiles: {
@@ -539,13 +542,15 @@ describe("resolveCallSiteConfig", () => {
     const resolved = resolveCallSiteConfig("memoryExtraction", llm, {
       overrideProfile: "override",
     });
-    // Each layer's leaf survives because no higher layer touches it.
-    expect(resolved.thinking.enabled).toBe(false); // active
+    // Override, site profile, and the call-site fragment each contribute a leaf.
     expect(resolved.thinking.streamThinking).toBe(false); // override
-    expect(resolved.contextWindow.overflowRecovery.maxAttempts).toBe(7); // active
     expect(resolved.contextWindow.overflowRecovery.safetyMarginRatio).toBe(0.1); // override
     expect(resolved.contextWindow.targetBudgetRatio).toBe(0.5); // siteProfile
     expect(resolved.contextWindow.compactThreshold).toBe(0.9); // callsite
+    // The active profile is excluded (the call site pins its own profile), so
+    // its leaves fall through to default instead of contributing.
+    expect(resolved.thinking.enabled).toBe(true); // default, NOT active's false
+    expect(resolved.contextWindow.overflowRecovery.maxAttempts).toBe(3); // default, NOT active's 7
     // Untouched leaves at depth 2 fall through to default.
     expect(resolved.contextWindow.overflowRecovery.enabled).toBe(true);
     expect(
@@ -582,7 +587,9 @@ describe("resolveCallSiteConfig", () => {
     // Lower layers contribute fields the site fragment does not touch.
     expect(resolved.verbosity).toBe("high"); // from siteProfile
     expect(resolved.speed).toBe("fast"); // from override
-    expect(resolved.effort).toBe("low"); // from active
+    // The active profile is excluded when the call site pins its own profile,
+    // so `effort` falls through to default rather than active's "low".
+    expect(resolved.effort).toBe("max"); // default, NOT active's "low"
   });
   test("mainAgent activeProfile overrides static call-site defaults", () => {
@@ -1435,6 +1442,199 @@ describe("resolveCallSiteConfig logitBias provenance", () => {
   });
 });
+describe("resolveCallSiteConfig sampling-param provenance (temperature / top_p)", () => {
+  // Mirrors production: the active `balanced` profile carries `topP: 0.95` (a
+  // MiniMax tuning), while background call sites resolve to the Anthropic
+  // `cost-optimized` profile. A field-by-field deep-merge would leak the active
+  // profile's `top_p` onto those Anthropic requests.
+  const balancedActive = LLMSchema.parse({
+    default: fullDefault,
+    profiles: {
+      balanced: {
+        provider: "together",
+        model: "MiniMaxAI/MiniMax-M3",
+        topP: 0.95,
+      },
+      "cost-optimized": {
+        provider: "anthropic",
+        model: "claude-haiku-4-5-20251001",
+        effort: "low",
+        thinking: { enabled: false },
+      },
+    },
+    activeProfile: "balanced",
+  });
+  test("active profile's top_p does not leak into a profile-pinned call site (Option 1 + 2)", () => {
+    const llm = LLMSchema.parse({
+      default: fullDefault,
+      profiles: {
+        balanced: {
+          provider: "together",
+          model: "MiniMaxAI/MiniMax-M3",
+          topP: 0.95,
+        },
+        "cost-optimized": {
+          provider: "anthropic",
+          model: "claude-haiku-4-5-20251001",
+        },
+      },
+      activeProfile: "balanced",
+      callSites: { memoryExtraction: { profile: "cost-optimized" } },
+    });
+    const resolved = resolveCallSiteConfig("memoryExtraction", llm);
+    expect(resolved.provider).toBe("anthropic");
+    expect(resolved.model).toBe("claude-haiku-4-5-20251001");
+    // balanced (active) is shadowed by the pinned cost-optimized profile, so
+    // its top_p must not ride along onto the Anthropic request.
+    expect(resolved.topP).toBeNull();
+  });
+  test("homeGreeting / commitMessage resolve to a temperature with NO top_p", () => {
+    const greeting = resolveCallSiteConfig("homeGreeting", balancedActive);
+    expect(greeting.model).toBe("claude-haiku-4-5-20251001");
+    // Per-call-site temperature from CALL_SITE_DEFAULTS survives.
+    expect(greeting.temperature).toBe(0.7);
+    // The active profile's top_p does NOT — both together would trip
+    // Anthropic's "temperature and top_p cannot both be specified".
+    expect(greeting.topP).toBeNull();
+    const commit = resolveCallSiteConfig("commitMessage", balancedActive);
+    expect(commit.temperature).toBe(0.2);
+    expect(commit.topP).toBeNull();
+  });
+  test("profile-less call site still inherits the active profile's provider AND sampling", () => {
+    // `workflowLeaf` pins no profile, so the active profile is the legitimate
+    // fallback (Option 1 keeps it): it supplies provider/model and its own
+    // (coherent, same-provider) sampling.
+    const resolved = resolveCallSiteConfig("workflowLeaf", balancedActive);
+    expect(resolved.provider).toBe("together");
+    expect(resolved.model).toBe("MiniMaxAI/MiniMax-M3");
+    expect(resolved.topP).toBe(0.95);
+  });
+  test("mainAgent keeps the active profile's top_p (balanced wins there)", () => {
+    const resolved = resolveCallSiteConfig("mainAgent", balancedActive);
+    expect(resolved.model).toBe("MiniMaxAI/MiniMax-M3");
+    expect(resolved.topP).toBe(0.95);
+  });
+  test("an explicit call-site temperature override still wins over the winning profile", () => {
+    const llm = LLMSchema.parse({
+      default: fullDefault,
+      profiles: { nucleus: { topP: 0.9, temperature: 0.1 } },
+      callSites: { memoryExtraction: { profile: "nucleus", temperature: 0.5 } },
+    });
+    const resolved = resolveCallSiteConfig("memoryExtraction", llm);
+    // Call-site override wins for the field it sets.
+    expect(resolved.temperature).toBe(0.5);
+    // The winning profile's top_p (no call-site override) still applies.
+    expect(resolved.topP).toBe(0.9);
+  });
+  test("a higher-precedence profile that omits top_p clears a lower profile's top_p (Option 2)", () => {
+    // No site profile is involved here, so the active profile IS folded in —
+    // this isolates Option 2: the override profile wins and omits top_p, so
+    // balanced's 0.95 must be cleared rather than surviving the merge.
+    const llm = LLMSchema.parse({
+      default: fullDefault,
+      profiles: {
+        balanced: {
+          provider: "together",
+          model: "MiniMaxAI/MiniMax-M3",
+          topP: 0.95,
+        },
+        plain: { provider: "anthropic", model: "claude-opus-4-7" },
+      },
+      activeProfile: "balanced",
+    });
+    const resolved = resolveCallSiteConfig("mainAgent", llm, {
+      overrideProfile: "plain",
+    });
+    expect(resolved.model).toBe("claude-opus-4-7");
+    expect(resolved.topP).toBeNull();
+  });
+  test("forceOverrideProfile: an explicit call-site temperature survives a forced profile silent on sampling", () => {
+    const llm = LLMSchema.parse({
+      default: fullDefault,
+      profiles: {
+        active: { verbosity: "low" },
+        sitep: { provider: "anthropic", model: "claude-haiku-4-5-20251001" },
+        forced: { model: "claude-opus-4-7", effort: "high" },
+      },
+      callSites: {
+        memoryExtraction: {
+          profile: "sitep",
+          temperature: 0.7,
+          maxTokens: 1000,
+        },
+      },
+      activeProfile: "active",
+    });
+    const resolved = resolveCallSiteConfig("memoryExtraction", llm, {
+      overrideProfile: "forced",
+      forceOverrideProfile: true,
+    });
+    // The forced profile floats to the top for fields it sets.
+    expect(resolved.model).toBe("claude-opus-4-7");
+    expect(resolved.effort).toBe("high");
+    // It is silent on temperature, so the deliberate call-site value survives —
+    // consistent with sibling call-site fields like maxTokens (which flow
+    // through the deep-merge).
+    expect(resolved.temperature).toBe(0.7);
+    expect(resolved.maxTokens).toBe(1000);
+  });
+  test("forceOverrideProfile: a forced profile that sets temperature wins over the call-site override", () => {
+    const llm = LLMSchema.parse({
+      default: fullDefault,
+      profiles: {
+        sitep: { provider: "anthropic", model: "claude-haiku-4-5-20251001" },
+        forced: { model: "claude-opus-4-7", temperature: 0.1 },
+      },
+      callSites: {
+        memoryExtraction: { profile: "sitep", temperature: 0.7 },
+      },
+    });
+    const resolved = resolveCallSiteConfig("memoryExtraction", llm, {
+      overrideProfile: "forced",
+      forceOverrideProfile: true,
+    });
+    // The forced profile explicitly sets temperature, so it floats above the
+    // call-site override.
+    expect(resolved.temperature).toBe(0.1);
+  });
+  test("mainAgent: an explicit call-site temperature survives an active profile silent on sampling", () => {
+    const llm = LLMSchema.parse({
+      default: fullDefault,
+      profiles: { active: { model: "claude-sonnet-4-7" } },
+      callSites: { mainAgent: { temperature: 0.5 } },
+      activeProfile: "active",
+    });
+    const resolved = resolveCallSiteConfig("mainAgent", llm);
+    // The active profile floats above the call-site for mainAgent but is silent
+    // on temperature, so the deliberate call-site value survives.
+    expect(resolved.model).toBe("claude-sonnet-4-7");
+    expect(resolved.temperature).toBe(0.5);
+  });
+  test("mainAgent: the active profile's explicit temperature wins over a call-site temperature", () => {
+    const llm = LLMSchema.parse({
+      default: fullDefault,
+      profiles: { active: { model: "claude-sonnet-4-7", temperature: 0.2 } },
+      callSites: { mainAgent: { temperature: 0.5 } },
+      activeProfile: "active",
+    });
+    const resolved = resolveCallSiteConfig("mainAgent", llm);
+    // For mainAgent the active profile floats above the call-site override, so
+    // its explicit temperature wins.
+    expect(resolved.temperature).toBe(0.2);
+  });
+});
 describe("resolveCallSiteConfig — workflowLeaf default", () => {
   test("inherits the workspace default config rather than pinning cost-optimized", () => {
     const llm = LLMSchema.parse({

package/src/config/llm-resolver.ts CHANGED Viewed

@@ -17,7 +17,13 @@ import {
  * Merge layers (low → high precedence; later layers override earlier) for
  * non-main-agent call sites:
  *   1. `llm.default` fields (required base)
- *   2. `llm.profiles[llm.activeProfile]` (workspace-wide active profile)
+ *   2. `llm.profiles[llm.activeProfile]` (workspace-wide active profile) —
+ *      folded in ONLY when the call site resolves no profile of its own (a
+ *      profile-less leaf like `vision`/`workflowLeaf`, or a BYOK install whose
+ *      pinned managed profile was stripped). When the call site resolves a
+ *      profile, that profile is the authoritative provider config and the
+ *      active profile does not contribute — otherwise a deep-merge would let
+ *      its orphan fields bleed onto a different provider.
  *   3. `llm.profiles[opts.overrideProfile]` (per-call ad-hoc override)
  *   4. `llm.profiles[site.profile]` fields (call-site's named profile)
  *   5. `llm.callSites[callSite]` fields (call-site override)
@@ -50,6 +56,15 @@ import {
  * any nesting level merge into — rather than replace — the corresponding
  * base value.
  *
+ * `temperature` and `top_p` are provider-coupled, so they do NOT deep-merge
+ * field-by-field with the rest of the config: only the winning profile (the
+ * highest-precedence profile that determines provider/model) contributes them,
+ * and an explicit `llm.callSites[callSite]` override still wins. A lower-
+ * precedence profile whose model is shadowed never leaks its sampling onto a
+ * different provider (which would trip e.g. Anthropic's "temperature and top_p
+ * cannot both be specified" constraint). `logitBias` is winning-profile-scoped
+ * the same way.
+ *
  * `activeProfile` and `overrideProfile` are resolved by name lookup against
  * `llm.profiles`. Missing references silently fall through (no throw) so the
  * resolver stays pure; schema validation in `LLMSchema.superRefine` catches
@@ -108,6 +123,19 @@ export function resolveCallSiteConfig(
   // call-site default selected by `effectiveDefault`.
   const biasRef: LogitBiasRef = { preset: undefined };
+  // Effective sampling params, tracked outside the deep-merge for the same
+  // reason as `logitBias`: `temperature`/`top_p` are provider-coupled, so only
+  // the winning profile may contribute them. A profile clears what a lower
+  // PROFILE set where it is silent (so a shadowed profile's sampling can't
+  // leak), while an explicit call-site override is sticky and survives a later
+  // silent profile (see `applyProfileSampling` / `appendCallSiteLayers`).
+  const samplingRef: SamplingRef = {
+    temperature: undefined,
+    topP: undefined,
+    temperatureFromCallSite: false,
+    topPFromCallSite: false,
+  };
   const activeFragment = resolveProfileFragment(llm.activeProfile, llm, opts);
   const overrideFragment = resolveProfileFragment(
     opts.overrideProfile,
@@ -119,22 +147,55 @@ export function resolveCallSiteConfig(
     effectiveDefault(callSite, llm, opts.overrideProfile != null);
   if (callSite === "mainAgent") {
-    appendCallSiteLayers(layers, callSite, llm, site, opts, biasRef);
-    appendProfileLayer(layers, activeFragment, biasRef);
-    appendProfileLayer(layers, overrideFragment, biasRef);
+    appendCallSiteLayers(
+      layers,
+      callSite,
+      llm,
+      site,
+      opts,
+      biasRef,
+      samplingRef,
+    );
+    appendProfileLayer(layers, activeFragment, biasRef, samplingRef);
+    appendProfileLayer(layers, overrideFragment, biasRef, samplingRef);
   } else if (opts.forceOverrideProfile === true && overrideFragment != null) {
     // Escape hatch: float the override profile above the call-site layers,
     // mirroring mainAgent's treatment of the user's chat-model selection.
     // Guarded on a resolved fragment so a missing profile reference degrades
     // to the normal precedence below instead of silently dropping the
-    // call-site layers' standing.
-    appendProfileLayer(layers, activeFragment, biasRef);
-    appendCallSiteLayers(layers, callSite, llm, site, opts, biasRef);
-    appendProfileLayer(layers, overrideFragment, biasRef);
+    // call-site layers' standing. The active profile stays the bottom fallback
+    // (its sampling can't leak — a higher profile's REPLACE clears it).
+    appendProfileLayer(layers, activeFragment, biasRef, samplingRef);
+    appendCallSiteLayers(
+      layers,
+      callSite,
+      llm,
+      site,
+      opts,
+      biasRef,
+      samplingRef,
+    );
+    appendProfileLayer(layers, overrideFragment, biasRef, samplingRef);
   } else {
-    appendProfileLayer(layers, activeFragment, biasRef);
-    appendProfileLayer(layers, overrideFragment, biasRef);
-    appendCallSiteLayers(layers, callSite, llm, site, opts, biasRef);
+    // The active profile is a low-precedence FALLBACK for call sites that
+    // resolve no profile of their own — profile-less leaves (`vision`,
+    // `workflowLeaf`) and BYOK installs where the pinned managed profile was
+    // stripped. When the call site DOES resolve its own profile, that profile
+    // is the authoritative provider config, so the active profile must not
+    // contribute its orphan fields to a different provider.
+    if (site?.profile == null) {
+      appendProfileLayer(layers, activeFragment, biasRef, samplingRef);
+    }
+    appendProfileLayer(layers, overrideFragment, biasRef, samplingRef);
+    appendCallSiteLayers(
+      layers,
+      callSite,
+      llm,
+      site,
+      opts,
+      biasRef,
+      samplingRef,
+    );
   }
   const resolved = finalize(
@@ -149,11 +210,34 @@ export function resolveCallSiteConfig(
   } else {
     delete (resolved as { logitBias?: unknown }).logitBias;
   }
+  // `temperature`/`top_p` are winning-profile-scoped like `logitBias`, but an
+  // explicit call-site override may also set them. Apply the tracked value,
+  // overriding whatever a shadowed profile may have left in the merge. An
+  // `undefined` ref means no profile or override opted in, so the `llm.default`
+  // base already in `resolved` stands.
+  if (samplingRef.temperature !== undefined) {
+    resolved.temperature = samplingRef.temperature;
+  }
+  if (samplingRef.topP !== undefined) {
+    resolved.topP = samplingRef.topP;
+  }
   return resolved;
 }
 type LogitBiasRef = { preset: ProfileEntry["logitBias"] };
+type SamplingRef = {
+  temperature: ProfileEntry["temperature"];
+  topP: ProfileEntry["topP"];
+  // Provenance of the current pair: `true` when a field came from an explicit
+  // call-site override (deliberate, sticky), `false` when it came from a profile
+  // (clearable by a higher-precedence profile that determines the model). Lets a
+  // silent higher profile clear a lower profile's sampling without discarding a
+  // deliberate call-site override.
+  temperatureFromCallSite: boolean;
+  topPFromCallSite: boolean;
+};
 // ---------------------------------------------------------------------------
 // Internal helpers
 // ---------------------------------------------------------------------------
@@ -311,13 +395,41 @@ function withImpliedProviderForKnownModel(source: Mergeable): Mergeable {
   };
 }
+/**
+ * Fold a profile's sampling into `samplingRef`. A profile determines
+ * provider/model, so its pair supersedes any LOWER PROFILE's: set each field the
+ * profile specifies, and clear a lower profile's value where the profile is
+ * silent. A deliberate call-site override is NOT a profile and outranks a silent
+ * profile — it survives until a profile EXPLICITLY sets the field. (The mirror
+ * COALESCE for call-site overrides lives in `appendCallSiteLayers`.)
+ */
+function applyProfileSampling(
+  samplingRef: SamplingRef,
+  profile: ProfileEntry,
+): void {
+  if (profile.temperature !== undefined) {
+    samplingRef.temperature = profile.temperature;
+    samplingRef.temperatureFromCallSite = false;
+  } else if (!samplingRef.temperatureFromCallSite) {
+    samplingRef.temperature = undefined;
+  }
+  if (profile.topP !== undefined) {
+    samplingRef.topP = profile.topP;
+    samplingRef.topPFromCallSite = false;
+  } else if (!samplingRef.topPFromCallSite) {
+    samplingRef.topP = undefined;
+  }
+}
 function appendProfileLayer(
   layers: Mergeable[],
   profile: ProfileEntry | undefined,
   biasRef: LogitBiasRef,
+  samplingRef: SamplingRef,
 ): void {
   if (profile != null) {
     biasRef.preset = profile.logitBias;
+    applyProfileSampling(samplingRef, profile);
     layers.push(profileConfigFragment(profile));
   }
 }
@@ -329,6 +441,7 @@ function appendCallSiteLayers(
   site: z.infer<typeof LLMSchema>["callSites"][LLMCallSite] | undefined,
   opts: ResolveCallSiteOpts,
   biasRef: LogitBiasRef,
+  samplingRef: SamplingRef,
 ): void {
   if (site != null) {
     if (site.profile != null) {
@@ -343,11 +456,29 @@ function appendCallSiteLayers(
         );
       }
       biasRef.preset = profileFragment.logitBias;
+      applyProfileSampling(samplingRef, profileFragment);
       layers.push(profileConfigFragment(profileFragment));
     }
-    // Strip the `profile` discriminator before merging — it isn't a
-    // `LLMConfigBase` field.
-    const { profile: _profile, ...siteFragment } = site;
+    // Strip the `profile` discriminator (not a `LLMConfigBase` field) and the
+    // sampling params before merging. An explicit call-site `temperature` /
+    // `topP` is a deliberate per-site choice, so it COALESCES over the winning
+    // profile's pair (only overriding the fields it sets) and is marked sticky
+    // so a later silent profile can't clear it — routed through `samplingRef` so
+    // it never inherits a shadowed profile's value via merge.
+    const {
+      profile: _profile,
+      temperature: siteTemperature,
+      topP: siteTopP,
+      ...siteFragment
+    } = site;
+    if (siteTemperature !== undefined) {
+      samplingRef.temperature = siteTemperature;
+      samplingRef.temperatureFromCallSite = true;
+    }
+    if (siteTopP !== undefined) {
+      samplingRef.topP = siteTopP;
+      samplingRef.topPFromCallSite = true;
+    }
     layers.push(siteFragment as Mergeable);
   }
 }
@@ -369,6 +500,12 @@ function profileConfigFragment(profile: ProfileEntry): Mergeable {
     // Per-profile advisor toggle is profile identity, not inheritable model
     // config — strip it so it can't leak into the merged `LLMConfigBase`.
     advisorEnabled: _advisorEnabled,
+    // `temperature`/`top_p` are provider-coupled: only the winning profile
+    // contributes them (tracked via `samplingRef`, applied post-merge), so a
+    // shadowed profile's sampling can never reach a different provider through
+    // the deep-merge. Strip here so no profile's sampling enters the merge.
+    temperature: _temperature,
+    topP: _topP,
     ...config
   } = profile;
   return config as Mergeable;

package/src/memory/embedding-gemini.ts CHANGED Viewed

@@ -40,7 +40,7 @@ export class GeminiEmbeddingBackend implements EmbeddingBackend {
     this.taskType = options?.taskType;
     this.dimensions = options?.dimensions;
     this.managedBaseUrl = options?.managedBaseUrl;
-    this.interCallDelayMs = options?.interCallDelayMs ?? 5000;
+    this.interCallDelayMs = options?.interCallDelayMs ?? 100;
   }
   /** True when requests route through the managed platform proxy. */

package/src/providers/anthropic/client.ts CHANGED Viewed

@@ -835,6 +835,11 @@ export class AnthropicProvider implements Provider {
         disableCache: _disableCache,
         max_tokens: callerMaxTokens,
         usageAttributionHeaders,
+        // Pulled out of `restConfig` so they are forwarded conditionally below:
+        // newer models reject them outright (see `deprecatesSamplingParams`).
+        temperature: callerTemperature,
+        top_p: callerTopP,
+        top_k: callerTopK,
         ...restConfig
       } = (config ?? {}) as Record<string, unknown> & {
         // "xhigh" is an intermediate tier between "high" and "max" supported
@@ -847,6 +852,9 @@ export class AnthropicProvider implements Provider {
         speed?: "standard" | "fast";
         output_config?: Record<string, unknown>;
         usageAttributionHeaders?: Record<string, string>;
+        temperature?: number;
+        top_p?: number;
+        top_k?: number;
       };
       // Haiku does not support the effort / output_config parameter or
       // extended cache TTL betas.
@@ -856,6 +864,16 @@ export class AnthropicProvider implements Provider {
         (restConfig as Record<string, unknown>).model?.toString() ?? this.model;
       const isHaiku = effectiveModel.includes("haiku");
       const supportsEffort = !isHaiku;
+      // opus-4-7 / opus-4-8 reject `temperature` and `top_p` with a 400
+      // "`temperature`/`top_p` is deprecated for this model" — model-wide, not
+      // effort-conditional (verified 2026-06-23). opus-4-6 / sonnet-4-6 /
+      // haiku-4-5 still accept them. fable-5 is included conservatively (a
+      // frontier model that could not be verified directly but follows the same
+      // deprecation direction). Stripping the params here keeps callers that set
+      // them (e.g. the memory-v3 L2 selector's `temperature: 0`) from 400ing.
+      const deprecatesSamplingParams =
+        /claude-opus-4-[78]\b/.test(effectiveModel) ||
+        effectiveModel.startsWith("claude-fable-");
       const mergedOutputConfig = {
         ...(output_config ?? {}),
         ...(effort && effort !== "none" && supportsEffort
@@ -883,6 +901,19 @@ export class AnthropicProvider implements Provider {
             : 64000,
         messages: sentMessages,
         ...restConfig,
+        // Forward `temperature` / `top_p` / `top_k` only to models that still
+        // accept them; newer models 400 on any of the deprecated sampler params.
+        // `temperature: 0` is preserved for accepting models (a `typeof ===
+        // "number"` check, not truthiness).
+        ...(deprecatesSamplingParams
+          ? {}
+          : {
+              ...(typeof callerTemperature === "number"
+                ? { temperature: callerTemperature }
+                : {}),
+              ...(typeof callerTopP === "number" ? { top_p: callerTopP } : {}),
+              ...(typeof callerTopK === "number" ? { top_k: callerTopK } : {}),
+            }),
         ...(Object.keys(mergedOutputConfig).length > 0
           ? { output_config: mergedOutputConfig }
           : {}),