@vellumai/assistant 0.10.1-staging.2 → 0.10.1-staging.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -509,10 +509,13 @@ describe("resolveCallSiteConfig", () => {
509
509
  expect(resolved.model).toBe("claude-opus-4-7");
510
510
  });
511
511
 
512
- test("thinking and contextWindow deep-merge across all five layers for non-main call sites", () => {
512
+ test("thinking and contextWindow deep-merge across the contributing layers for non-main call sites", () => {
513
513
  // Each layer touches a different leaf inside `thinking` and
514
514
  // `contextWindow.overflowRecovery` so we can verify deep merge composes
515
515
  // every contribution rather than wholesale-replacing the nested objects.
516
+ // The call site pins `siteProfile`, so the active profile is excluded — its
517
+ // leaves fall through to default while override, site profile, and the
518
+ // call-site fragment still compose.
516
519
  const llm = LLMSchema.parse({
517
520
  default: fullDefault,
518
521
  profiles: {
@@ -539,13 +542,15 @@ describe("resolveCallSiteConfig", () => {
539
542
  const resolved = resolveCallSiteConfig("memoryExtraction", llm, {
540
543
  overrideProfile: "override",
541
544
  });
542
- // Each layer's leaf survives because no higher layer touches it.
543
- expect(resolved.thinking.enabled).toBe(false); // active
545
+ // Override, site profile, and the call-site fragment each contribute a leaf.
544
546
  expect(resolved.thinking.streamThinking).toBe(false); // override
545
- expect(resolved.contextWindow.overflowRecovery.maxAttempts).toBe(7); // active
546
547
  expect(resolved.contextWindow.overflowRecovery.safetyMarginRatio).toBe(0.1); // override
547
548
  expect(resolved.contextWindow.targetBudgetRatio).toBe(0.5); // siteProfile
548
549
  expect(resolved.contextWindow.compactThreshold).toBe(0.9); // callsite
550
+ // The active profile is excluded (the call site pins its own profile), so
551
+ // its leaves fall through to default instead of contributing.
552
+ expect(resolved.thinking.enabled).toBe(true); // default, NOT active's false
553
+ expect(resolved.contextWindow.overflowRecovery.maxAttempts).toBe(3); // default, NOT active's 7
549
554
  // Untouched leaves at depth 2 fall through to default.
550
555
  expect(resolved.contextWindow.overflowRecovery.enabled).toBe(true);
551
556
  expect(
@@ -582,7 +587,9 @@ describe("resolveCallSiteConfig", () => {
582
587
  // Lower layers contribute fields the site fragment does not touch.
583
588
  expect(resolved.verbosity).toBe("high"); // from siteProfile
584
589
  expect(resolved.speed).toBe("fast"); // from override
585
- expect(resolved.effort).toBe("low"); // from active
590
+ // The active profile is excluded when the call site pins its own profile,
591
+ // so `effort` falls through to default rather than active's "low".
592
+ expect(resolved.effort).toBe("max"); // default, NOT active's "low"
586
593
  });
587
594
 
588
595
  test("mainAgent activeProfile overrides static call-site defaults", () => {
@@ -1435,6 +1442,199 @@ describe("resolveCallSiteConfig logitBias provenance", () => {
1435
1442
  });
1436
1443
  });
1437
1444
 
1445
+ describe("resolveCallSiteConfig sampling-param provenance (temperature / top_p)", () => {
1446
+ // Mirrors production: the active `balanced` profile carries `topP: 0.95` (a
1447
+ // MiniMax tuning), while background call sites resolve to the Anthropic
1448
+ // `cost-optimized` profile. A field-by-field deep-merge would leak the active
1449
+ // profile's `top_p` onto those Anthropic requests.
1450
+ const balancedActive = LLMSchema.parse({
1451
+ default: fullDefault,
1452
+ profiles: {
1453
+ balanced: {
1454
+ provider: "together",
1455
+ model: "MiniMaxAI/MiniMax-M3",
1456
+ topP: 0.95,
1457
+ },
1458
+ "cost-optimized": {
1459
+ provider: "anthropic",
1460
+ model: "claude-haiku-4-5-20251001",
1461
+ effort: "low",
1462
+ thinking: { enabled: false },
1463
+ },
1464
+ },
1465
+ activeProfile: "balanced",
1466
+ });
1467
+
1468
+ test("active profile's top_p does not leak into a profile-pinned call site (Option 1 + 2)", () => {
1469
+ const llm = LLMSchema.parse({
1470
+ default: fullDefault,
1471
+ profiles: {
1472
+ balanced: {
1473
+ provider: "together",
1474
+ model: "MiniMaxAI/MiniMax-M3",
1475
+ topP: 0.95,
1476
+ },
1477
+ "cost-optimized": {
1478
+ provider: "anthropic",
1479
+ model: "claude-haiku-4-5-20251001",
1480
+ },
1481
+ },
1482
+ activeProfile: "balanced",
1483
+ callSites: { memoryExtraction: { profile: "cost-optimized" } },
1484
+ });
1485
+ const resolved = resolveCallSiteConfig("memoryExtraction", llm);
1486
+ expect(resolved.provider).toBe("anthropic");
1487
+ expect(resolved.model).toBe("claude-haiku-4-5-20251001");
1488
+ // balanced (active) is shadowed by the pinned cost-optimized profile, so
1489
+ // its top_p must not ride along onto the Anthropic request.
1490
+ expect(resolved.topP).toBeNull();
1491
+ });
1492
+
1493
+ test("homeGreeting / commitMessage resolve to a temperature with NO top_p", () => {
1494
+ const greeting = resolveCallSiteConfig("homeGreeting", balancedActive);
1495
+ expect(greeting.model).toBe("claude-haiku-4-5-20251001");
1496
+ // Per-call-site temperature from CALL_SITE_DEFAULTS survives.
1497
+ expect(greeting.temperature).toBe(0.7);
1498
+ // The active profile's top_p does NOT — both together would trip
1499
+ // Anthropic's "temperature and top_p cannot both be specified".
1500
+ expect(greeting.topP).toBeNull();
1501
+
1502
+ const commit = resolveCallSiteConfig("commitMessage", balancedActive);
1503
+ expect(commit.temperature).toBe(0.2);
1504
+ expect(commit.topP).toBeNull();
1505
+ });
1506
+
1507
+ test("profile-less call site still inherits the active profile's provider AND sampling", () => {
1508
+ // `workflowLeaf` pins no profile, so the active profile is the legitimate
1509
+ // fallback (Option 1 keeps it): it supplies provider/model and its own
1510
+ // (coherent, same-provider) sampling.
1511
+ const resolved = resolveCallSiteConfig("workflowLeaf", balancedActive);
1512
+ expect(resolved.provider).toBe("together");
1513
+ expect(resolved.model).toBe("MiniMaxAI/MiniMax-M3");
1514
+ expect(resolved.topP).toBe(0.95);
1515
+ });
1516
+
1517
+ test("mainAgent keeps the active profile's top_p (balanced wins there)", () => {
1518
+ const resolved = resolveCallSiteConfig("mainAgent", balancedActive);
1519
+ expect(resolved.model).toBe("MiniMaxAI/MiniMax-M3");
1520
+ expect(resolved.topP).toBe(0.95);
1521
+ });
1522
+
1523
+ test("an explicit call-site temperature override still wins over the winning profile", () => {
1524
+ const llm = LLMSchema.parse({
1525
+ default: fullDefault,
1526
+ profiles: { nucleus: { topP: 0.9, temperature: 0.1 } },
1527
+ callSites: { memoryExtraction: { profile: "nucleus", temperature: 0.5 } },
1528
+ });
1529
+ const resolved = resolveCallSiteConfig("memoryExtraction", llm);
1530
+ // Call-site override wins for the field it sets.
1531
+ expect(resolved.temperature).toBe(0.5);
1532
+ // The winning profile's top_p (no call-site override) still applies.
1533
+ expect(resolved.topP).toBe(0.9);
1534
+ });
1535
+
1536
+ test("a higher-precedence profile that omits top_p clears a lower profile's top_p (Option 2)", () => {
1537
+ // No site profile is involved here, so the active profile IS folded in —
1538
+ // this isolates Option 2: the override profile wins and omits top_p, so
1539
+ // balanced's 0.95 must be cleared rather than surviving the merge.
1540
+ const llm = LLMSchema.parse({
1541
+ default: fullDefault,
1542
+ profiles: {
1543
+ balanced: {
1544
+ provider: "together",
1545
+ model: "MiniMaxAI/MiniMax-M3",
1546
+ topP: 0.95,
1547
+ },
1548
+ plain: { provider: "anthropic", model: "claude-opus-4-7" },
1549
+ },
1550
+ activeProfile: "balanced",
1551
+ });
1552
+ const resolved = resolveCallSiteConfig("mainAgent", llm, {
1553
+ overrideProfile: "plain",
1554
+ });
1555
+ expect(resolved.model).toBe("claude-opus-4-7");
1556
+ expect(resolved.topP).toBeNull();
1557
+ });
1558
+
1559
+ test("forceOverrideProfile: an explicit call-site temperature survives a forced profile silent on sampling", () => {
1560
+ const llm = LLMSchema.parse({
1561
+ default: fullDefault,
1562
+ profiles: {
1563
+ active: { verbosity: "low" },
1564
+ sitep: { provider: "anthropic", model: "claude-haiku-4-5-20251001" },
1565
+ forced: { model: "claude-opus-4-7", effort: "high" },
1566
+ },
1567
+ callSites: {
1568
+ memoryExtraction: {
1569
+ profile: "sitep",
1570
+ temperature: 0.7,
1571
+ maxTokens: 1000,
1572
+ },
1573
+ },
1574
+ activeProfile: "active",
1575
+ });
1576
+ const resolved = resolveCallSiteConfig("memoryExtraction", llm, {
1577
+ overrideProfile: "forced",
1578
+ forceOverrideProfile: true,
1579
+ });
1580
+ // The forced profile floats to the top for fields it sets.
1581
+ expect(resolved.model).toBe("claude-opus-4-7");
1582
+ expect(resolved.effort).toBe("high");
1583
+ // It is silent on temperature, so the deliberate call-site value survives —
1584
+ // consistent with sibling call-site fields like maxTokens (which flow
1585
+ // through the deep-merge).
1586
+ expect(resolved.temperature).toBe(0.7);
1587
+ expect(resolved.maxTokens).toBe(1000);
1588
+ });
1589
+
1590
+ test("forceOverrideProfile: a forced profile that sets temperature wins over the call-site override", () => {
1591
+ const llm = LLMSchema.parse({
1592
+ default: fullDefault,
1593
+ profiles: {
1594
+ sitep: { provider: "anthropic", model: "claude-haiku-4-5-20251001" },
1595
+ forced: { model: "claude-opus-4-7", temperature: 0.1 },
1596
+ },
1597
+ callSites: {
1598
+ memoryExtraction: { profile: "sitep", temperature: 0.7 },
1599
+ },
1600
+ });
1601
+ const resolved = resolveCallSiteConfig("memoryExtraction", llm, {
1602
+ overrideProfile: "forced",
1603
+ forceOverrideProfile: true,
1604
+ });
1605
+ // The forced profile explicitly sets temperature, so it floats above the
1606
+ // call-site override.
1607
+ expect(resolved.temperature).toBe(0.1);
1608
+ });
1609
+
1610
+ test("mainAgent: an explicit call-site temperature survives an active profile silent on sampling", () => {
1611
+ const llm = LLMSchema.parse({
1612
+ default: fullDefault,
1613
+ profiles: { active: { model: "claude-sonnet-4-7" } },
1614
+ callSites: { mainAgent: { temperature: 0.5 } },
1615
+ activeProfile: "active",
1616
+ });
1617
+ const resolved = resolveCallSiteConfig("mainAgent", llm);
1618
+ // The active profile floats above the call-site for mainAgent but is silent
1619
+ // on temperature, so the deliberate call-site value survives.
1620
+ expect(resolved.model).toBe("claude-sonnet-4-7");
1621
+ expect(resolved.temperature).toBe(0.5);
1622
+ });
1623
+
1624
+ test("mainAgent: the active profile's explicit temperature wins over a call-site temperature", () => {
1625
+ const llm = LLMSchema.parse({
1626
+ default: fullDefault,
1627
+ profiles: { active: { model: "claude-sonnet-4-7", temperature: 0.2 } },
1628
+ callSites: { mainAgent: { temperature: 0.5 } },
1629
+ activeProfile: "active",
1630
+ });
1631
+ const resolved = resolveCallSiteConfig("mainAgent", llm);
1632
+ // For mainAgent the active profile floats above the call-site override, so
1633
+ // its explicit temperature wins.
1634
+ expect(resolved.temperature).toBe(0.2);
1635
+ });
1636
+ });
1637
+
1438
1638
  describe("resolveCallSiteConfig — workflowLeaf default", () => {
1439
1639
  test("inherits the workspace default config rather than pinning cost-optimized", () => {
1440
1640
  const llm = LLMSchema.parse({
@@ -5,9 +5,14 @@
5
5
  * form, list, table, confirmation, dynamic_page, file_upload,
6
6
  * document_preview, task_preferences) inside the chat view. The
7
7
  * concrete `data` shape depends on `surfaceType` and is owned by the
8
- * surface-data subsystem in `daemon/message-types/surfaces.ts`; the
9
- * canonical schema treats `data` as opaque on the wire so this file
10
- * doesn't have to mirror eight nested-payload schemas.
8
+ * surface-data subsystem in `daemon/message-types/surfaces.ts`
9
+ * (`CardSurfaceDataSchema` et al.). `data` is intentionally opaque on the
10
+ * wire not for brevity, but because (1) this event is a member of the
11
+ * `type`-discriminated `AssistantEventSchema`, and (2) the stream parser
12
+ * drops any event that fails validation, so a strict per-`surfaceType`
13
+ * payload schema would silently vanish renderable-but-messy LLM surfaces.
14
+ * Consumers narrow `data` by parsing it with the canonical per-type schema
15
+ * (all-optional, so it never rejects a real surface) at their boundary.
11
16
  *
12
17
  * Lifecycle: a surface progresses `show` → (zero or more `update`s) →
13
18
  * (`dismiss` for cancellation OR `complete` with a `summary` /
package/src/api/index.ts CHANGED
@@ -471,6 +471,7 @@ export {
471
471
  type WorkflowLeaf,
472
472
  WorkflowLeafSchema,
473
473
  } from "./responses/workflow-journal.js";
474
+ export { type CardSurfaceData, CardSurfaceDataSchema } from "./surfaces.js";
474
475
 
475
476
  /**
476
477
  * Canonical SSE event schema for the assistant runtime.
@@ -225,6 +225,10 @@ export type ConversationMessageToolCall = z.infer<
225
225
  // Surface
226
226
  // ---------------------------------------------------------------------------
227
227
 
228
+ // Intentionally more permissive than the canonical SurfaceActionSchema in
229
+ // api/events/ui-surface-show.ts: the write-path schema uses z.enum for style
230
+ // so new surfaces only emit known values; this read-path schema uses z.string
231
+ // so historical surfaces with non-standard style values still parse.
228
232
  const SurfaceActionSchema = z.object({
229
233
  id: z.string(),
230
234
  label: z.string(),
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Canonical surface-data wire payloads.
3
+ *
4
+ * The `ui_surface_*` events and the conversation-message response all carry a
5
+ * surface `data` object whose shape depends on `surfaceType`. The wire keeps
6
+ * `data` opaque (`z.record`) — see `events/ui-surface-show.ts` for why — so
7
+ * consumers narrow it by parsing with the canonical per-type schema here. The
8
+ * schemas are deliberately tolerant (every field optional, Zod strip mode): a
9
+ * parse miss makes a renderable surface silently vanish, so they must never
10
+ * reject a real payload. The schema also defines what the daemon's `ui_show`
11
+ * normalizer *supports* — anything the model sends outside these fields is
12
+ * dropped (and logged) there, which is how we learn the shapes to recover.
13
+ *
14
+ * Card is the first surface type migrated to a canonical schema; the remaining
15
+ * types still live as hand-written interfaces in
16
+ * `daemon/message-types/surfaces.ts` pending migration.
17
+ */
18
+
19
+ import { z } from "zod";
20
+
21
+ export const CardSurfaceDataSchema = z.object({
22
+ title: z.string().optional(),
23
+ subtitle: z.string().optional(),
24
+ body: z.string().optional(),
25
+ metadata: z
26
+ .array(z.object({ label: z.coerce.string(), value: z.coerce.string() }))
27
+ .optional(),
28
+ /** Optional template name for specialized rendering (e.g. "weather_forecast"). */
29
+ template: z.string().optional(),
30
+ /** Arbitrary data consumed by the template renderer. Shape depends on template. */
31
+ templateData: z.record(z.string(), z.unknown()).optional(),
32
+ });
33
+ export type CardSurfaceData = z.infer<typeof CardSurfaceDataSchema>;
@@ -17,7 +17,13 @@ import {
17
17
  * Merge layers (low → high precedence; later layers override earlier) for
18
18
  * non-main-agent call sites:
19
19
  * 1. `llm.default` fields (required base)
20
- * 2. `llm.profiles[llm.activeProfile]` (workspace-wide active profile)
20
+ * 2. `llm.profiles[llm.activeProfile]` (workspace-wide active profile)
21
+ * folded in ONLY when the call site resolves no profile of its own (a
22
+ * profile-less leaf like `vision`/`workflowLeaf`, or a BYOK install whose
23
+ * pinned managed profile was stripped). When the call site resolves a
24
+ * profile, that profile is the authoritative provider config and the
25
+ * active profile does not contribute — otherwise a deep-merge would let
26
+ * its orphan fields bleed onto a different provider.
21
27
  * 3. `llm.profiles[opts.overrideProfile]` (per-call ad-hoc override)
22
28
  * 4. `llm.profiles[site.profile]` fields (call-site's named profile)
23
29
  * 5. `llm.callSites[callSite]` fields (call-site override)
@@ -50,6 +56,15 @@ import {
50
56
  * any nesting level merge into — rather than replace — the corresponding
51
57
  * base value.
52
58
  *
59
+ * `temperature` and `top_p` are provider-coupled, so they do NOT deep-merge
60
+ * field-by-field with the rest of the config: only the winning profile (the
61
+ * highest-precedence profile that determines provider/model) contributes them,
62
+ * and an explicit `llm.callSites[callSite]` override still wins. A lower-
63
+ * precedence profile whose model is shadowed never leaks its sampling onto a
64
+ * different provider (which would trip e.g. Anthropic's "temperature and top_p
65
+ * cannot both be specified" constraint). `logitBias` is winning-profile-scoped
66
+ * the same way.
67
+ *
53
68
  * `activeProfile` and `overrideProfile` are resolved by name lookup against
54
69
  * `llm.profiles`. Missing references silently fall through (no throw) so the
55
70
  * resolver stays pure; schema validation in `LLMSchema.superRefine` catches
@@ -108,6 +123,19 @@ export function resolveCallSiteConfig(
108
123
  // call-site default selected by `effectiveDefault`.
109
124
  const biasRef: LogitBiasRef = { preset: undefined };
110
125
 
126
+ // Effective sampling params, tracked outside the deep-merge for the same
127
+ // reason as `logitBias`: `temperature`/`top_p` are provider-coupled, so only
128
+ // the winning profile may contribute them. A profile clears what a lower
129
+ // PROFILE set where it is silent (so a shadowed profile's sampling can't
130
+ // leak), while an explicit call-site override is sticky and survives a later
131
+ // silent profile (see `applyProfileSampling` / `appendCallSiteLayers`).
132
+ const samplingRef: SamplingRef = {
133
+ temperature: undefined,
134
+ topP: undefined,
135
+ temperatureFromCallSite: false,
136
+ topPFromCallSite: false,
137
+ };
138
+
111
139
  const activeFragment = resolveProfileFragment(llm.activeProfile, llm, opts);
112
140
  const overrideFragment = resolveProfileFragment(
113
141
  opts.overrideProfile,
@@ -119,22 +147,55 @@ export function resolveCallSiteConfig(
119
147
  effectiveDefault(callSite, llm, opts.overrideProfile != null);
120
148
 
121
149
  if (callSite === "mainAgent") {
122
- appendCallSiteLayers(layers, callSite, llm, site, opts, biasRef);
123
- appendProfileLayer(layers, activeFragment, biasRef);
124
- appendProfileLayer(layers, overrideFragment, biasRef);
150
+ appendCallSiteLayers(
151
+ layers,
152
+ callSite,
153
+ llm,
154
+ site,
155
+ opts,
156
+ biasRef,
157
+ samplingRef,
158
+ );
159
+ appendProfileLayer(layers, activeFragment, biasRef, samplingRef);
160
+ appendProfileLayer(layers, overrideFragment, biasRef, samplingRef);
125
161
  } else if (opts.forceOverrideProfile === true && overrideFragment != null) {
126
162
  // Escape hatch: float the override profile above the call-site layers,
127
163
  // mirroring mainAgent's treatment of the user's chat-model selection.
128
164
  // Guarded on a resolved fragment so a missing profile reference degrades
129
165
  // to the normal precedence below instead of silently dropping the
130
- // call-site layers' standing.
131
- appendProfileLayer(layers, activeFragment, biasRef);
132
- appendCallSiteLayers(layers, callSite, llm, site, opts, biasRef);
133
- appendProfileLayer(layers, overrideFragment, biasRef);
166
+ // call-site layers' standing. The active profile stays the bottom fallback
167
+ // (its sampling can't leak — a higher profile's REPLACE clears it).
168
+ appendProfileLayer(layers, activeFragment, biasRef, samplingRef);
169
+ appendCallSiteLayers(
170
+ layers,
171
+ callSite,
172
+ llm,
173
+ site,
174
+ opts,
175
+ biasRef,
176
+ samplingRef,
177
+ );
178
+ appendProfileLayer(layers, overrideFragment, biasRef, samplingRef);
134
179
  } else {
135
- appendProfileLayer(layers, activeFragment, biasRef);
136
- appendProfileLayer(layers, overrideFragment, biasRef);
137
- appendCallSiteLayers(layers, callSite, llm, site, opts, biasRef);
180
+ // The active profile is a low-precedence FALLBACK for call sites that
181
+ // resolve no profile of their own — profile-less leaves (`vision`,
182
+ // `workflowLeaf`) and BYOK installs where the pinned managed profile was
183
+ // stripped. When the call site DOES resolve its own profile, that profile
184
+ // is the authoritative provider config, so the active profile must not
185
+ // contribute its orphan fields to a different provider.
186
+ if (site?.profile == null) {
187
+ appendProfileLayer(layers, activeFragment, biasRef, samplingRef);
188
+ }
189
+ appendProfileLayer(layers, overrideFragment, biasRef, samplingRef);
190
+ appendCallSiteLayers(
191
+ layers,
192
+ callSite,
193
+ llm,
194
+ site,
195
+ opts,
196
+ biasRef,
197
+ samplingRef,
198
+ );
138
199
  }
139
200
 
140
201
  const resolved = finalize(
@@ -149,11 +210,34 @@ export function resolveCallSiteConfig(
149
210
  } else {
150
211
  delete (resolved as { logitBias?: unknown }).logitBias;
151
212
  }
213
+ // `temperature`/`top_p` are winning-profile-scoped like `logitBias`, but an
214
+ // explicit call-site override may also set them. Apply the tracked value,
215
+ // overriding whatever a shadowed profile may have left in the merge. An
216
+ // `undefined` ref means no profile or override opted in, so the `llm.default`
217
+ // base already in `resolved` stands.
218
+ if (samplingRef.temperature !== undefined) {
219
+ resolved.temperature = samplingRef.temperature;
220
+ }
221
+ if (samplingRef.topP !== undefined) {
222
+ resolved.topP = samplingRef.topP;
223
+ }
152
224
  return resolved;
153
225
  }
154
226
 
155
227
  type LogitBiasRef = { preset: ProfileEntry["logitBias"] };
156
228
 
229
+ type SamplingRef = {
230
+ temperature: ProfileEntry["temperature"];
231
+ topP: ProfileEntry["topP"];
232
+ // Provenance of the current pair: `true` when a field came from an explicit
233
+ // call-site override (deliberate, sticky), `false` when it came from a profile
234
+ // (clearable by a higher-precedence profile that determines the model). Lets a
235
+ // silent higher profile clear a lower profile's sampling without discarding a
236
+ // deliberate call-site override.
237
+ temperatureFromCallSite: boolean;
238
+ topPFromCallSite: boolean;
239
+ };
240
+
157
241
  // ---------------------------------------------------------------------------
158
242
  // Internal helpers
159
243
  // ---------------------------------------------------------------------------
@@ -311,13 +395,41 @@ function withImpliedProviderForKnownModel(source: Mergeable): Mergeable {
311
395
  };
312
396
  }
313
397
 
398
+ /**
399
+ * Fold a profile's sampling into `samplingRef`. A profile determines
400
+ * provider/model, so its pair supersedes any LOWER PROFILE's: set each field the
401
+ * profile specifies, and clear a lower profile's value where the profile is
402
+ * silent. A deliberate call-site override is NOT a profile and outranks a silent
403
+ * profile — it survives until a profile EXPLICITLY sets the field. (The mirror
404
+ * COALESCE for call-site overrides lives in `appendCallSiteLayers`.)
405
+ */
406
+ function applyProfileSampling(
407
+ samplingRef: SamplingRef,
408
+ profile: ProfileEntry,
409
+ ): void {
410
+ if (profile.temperature !== undefined) {
411
+ samplingRef.temperature = profile.temperature;
412
+ samplingRef.temperatureFromCallSite = false;
413
+ } else if (!samplingRef.temperatureFromCallSite) {
414
+ samplingRef.temperature = undefined;
415
+ }
416
+ if (profile.topP !== undefined) {
417
+ samplingRef.topP = profile.topP;
418
+ samplingRef.topPFromCallSite = false;
419
+ } else if (!samplingRef.topPFromCallSite) {
420
+ samplingRef.topP = undefined;
421
+ }
422
+ }
423
+
314
424
  function appendProfileLayer(
315
425
  layers: Mergeable[],
316
426
  profile: ProfileEntry | undefined,
317
427
  biasRef: LogitBiasRef,
428
+ samplingRef: SamplingRef,
318
429
  ): void {
319
430
  if (profile != null) {
320
431
  biasRef.preset = profile.logitBias;
432
+ applyProfileSampling(samplingRef, profile);
321
433
  layers.push(profileConfigFragment(profile));
322
434
  }
323
435
  }
@@ -329,6 +441,7 @@ function appendCallSiteLayers(
329
441
  site: z.infer<typeof LLMSchema>["callSites"][LLMCallSite] | undefined,
330
442
  opts: ResolveCallSiteOpts,
331
443
  biasRef: LogitBiasRef,
444
+ samplingRef: SamplingRef,
332
445
  ): void {
333
446
  if (site != null) {
334
447
  if (site.profile != null) {
@@ -343,11 +456,29 @@ function appendCallSiteLayers(
343
456
  );
344
457
  }
345
458
  biasRef.preset = profileFragment.logitBias;
459
+ applyProfileSampling(samplingRef, profileFragment);
346
460
  layers.push(profileConfigFragment(profileFragment));
347
461
  }
348
- // Strip the `profile` discriminator before merging it isn't a
349
- // `LLMConfigBase` field.
350
- const { profile: _profile, ...siteFragment } = site;
462
+ // Strip the `profile` discriminator (not a `LLMConfigBase` field) and the
463
+ // sampling params before merging. An explicit call-site `temperature` /
464
+ // `topP` is a deliberate per-site choice, so it COALESCES over the winning
465
+ // profile's pair (only overriding the fields it sets) and is marked sticky
466
+ // so a later silent profile can't clear it — routed through `samplingRef` so
467
+ // it never inherits a shadowed profile's value via merge.
468
+ const {
469
+ profile: _profile,
470
+ temperature: siteTemperature,
471
+ topP: siteTopP,
472
+ ...siteFragment
473
+ } = site;
474
+ if (siteTemperature !== undefined) {
475
+ samplingRef.temperature = siteTemperature;
476
+ samplingRef.temperatureFromCallSite = true;
477
+ }
478
+ if (siteTopP !== undefined) {
479
+ samplingRef.topP = siteTopP;
480
+ samplingRef.topPFromCallSite = true;
481
+ }
351
482
  layers.push(siteFragment as Mergeable);
352
483
  }
353
484
  }
@@ -369,6 +500,12 @@ function profileConfigFragment(profile: ProfileEntry): Mergeable {
369
500
  // Per-profile advisor toggle is profile identity, not inheritable model
370
501
  // config — strip it so it can't leak into the merged `LLMConfigBase`.
371
502
  advisorEnabled: _advisorEnabled,
503
+ // `temperature`/`top_p` are provider-coupled: only the winning profile
504
+ // contributes them (tracked via `samplingRef`, applied post-merge), so a
505
+ // shadowed profile's sampling can never reach a different provider through
506
+ // the deep-merge. Strip here so no profile's sampling enters the merge.
507
+ temperature: _temperature,
508
+ topP: _topP,
372
509
  ...config
373
510
  } = profile;
374
511
  return config as Mergeable;