@vellumai/assistant 0.10.1-staging.2 → 0.10.1-staging.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -3191,3 +3191,70 @@ describe("AnthropicProvider — thinking block send-time filtering", () => {
|
|
|
3191
3191
|
expect(signatures).toContain("sig-step2");
|
|
3192
3192
|
});
|
|
3193
3193
|
});
|
|
3194
|
+
|
|
3195
|
+
describe("AnthropicProvider — deprecated sampling params (temperature / top_p / top_k)", () => {
|
|
3196
|
+
beforeEach(() => {
|
|
3197
|
+
lastStreamParams = null;
|
|
3198
|
+
});
|
|
3199
|
+
|
|
3200
|
+
// opus-4-7 / opus-4-8 (and, conservatively, fable) reject `temperature`,
|
|
3201
|
+
// `top_p`, and `top_k` with a 400; the provider must strip all three.
|
|
3202
|
+
for (const model of [
|
|
3203
|
+
"claude-opus-4-8",
|
|
3204
|
+
"claude-opus-4-7",
|
|
3205
|
+
"claude-fable-5",
|
|
3206
|
+
]) {
|
|
3207
|
+
test(`strips temperature, top_p, and top_k for ${model}`, async () => {
|
|
3208
|
+
const provider = new AnthropicProvider("sk-ant-test", model);
|
|
3209
|
+
await provider.sendMessage([userMsg("Hi")], {
|
|
3210
|
+
systemPrompt: "You are helpful.",
|
|
3211
|
+
config: { temperature: 0, top_p: 0.95, top_k: 40 },
|
|
3212
|
+
});
|
|
3213
|
+
expect(lastStreamParams!).not.toHaveProperty("temperature");
|
|
3214
|
+
expect(lastStreamParams!).not.toHaveProperty("top_p");
|
|
3215
|
+
expect(lastStreamParams!).not.toHaveProperty("top_k");
|
|
3216
|
+
});
|
|
3217
|
+
}
|
|
3218
|
+
|
|
3219
|
+
// opus-4-6 / sonnet-4-6 still accept the params — they must pass through,
|
|
3220
|
+
// including `temperature: 0` (a value check, not truthiness).
|
|
3221
|
+
test("forwards temperature (including 0), top_p, and top_k for opus-4-6", async () => {
|
|
3222
|
+
const provider = new AnthropicProvider("sk-ant-test", "claude-opus-4-6");
|
|
3223
|
+
await provider.sendMessage([userMsg("Hi")], {
|
|
3224
|
+
systemPrompt: "You are helpful.",
|
|
3225
|
+
config: { temperature: 0, top_p: 0.95, top_k: 40 },
|
|
3226
|
+
});
|
|
3227
|
+
expect(lastStreamParams!.temperature).toBe(0);
|
|
3228
|
+
expect(lastStreamParams!.top_p).toBe(0.95);
|
|
3229
|
+
expect(lastStreamParams!.top_k).toBe(40);
|
|
3230
|
+
});
|
|
3231
|
+
|
|
3232
|
+
test("forwards temperature, top_p, and top_k for sonnet-4-6", async () => {
|
|
3233
|
+
const provider = new AnthropicProvider("sk-ant-test", "claude-sonnet-4-6");
|
|
3234
|
+
await provider.sendMessage([userMsg("Hi")], {
|
|
3235
|
+
systemPrompt: "You are helpful.",
|
|
3236
|
+
config: { temperature: 0.7, top_p: 0.9, top_k: 20 },
|
|
3237
|
+
});
|
|
3238
|
+
expect(lastStreamParams!.temperature).toBe(0.7);
|
|
3239
|
+
expect(lastStreamParams!.top_p).toBe(0.9);
|
|
3240
|
+
expect(lastStreamParams!.top_k).toBe(20);
|
|
3241
|
+
});
|
|
3242
|
+
|
|
3243
|
+
// A per-call model override targeting a deprecating model must win over the
|
|
3244
|
+
// provider's default (accepting) model.
|
|
3245
|
+
test("strips params when a per-call model override deprecates them", async () => {
|
|
3246
|
+
const provider = new AnthropicProvider("sk-ant-test", "claude-sonnet-4-6");
|
|
3247
|
+
await provider.sendMessage([userMsg("Hi")], {
|
|
3248
|
+
systemPrompt: "You are helpful.",
|
|
3249
|
+
config: {
|
|
3250
|
+
temperature: 0,
|
|
3251
|
+
top_p: 0.95,
|
|
3252
|
+
top_k: 40,
|
|
3253
|
+
model: "claude-opus-4-8",
|
|
3254
|
+
},
|
|
3255
|
+
});
|
|
3256
|
+
expect(lastStreamParams!).not.toHaveProperty("temperature");
|
|
3257
|
+
expect(lastStreamParams!).not.toHaveProperty("top_p");
|
|
3258
|
+
expect(lastStreamParams!).not.toHaveProperty("top_k");
|
|
3259
|
+
});
|
|
3260
|
+
});
|
|
@@ -509,10 +509,13 @@ describe("resolveCallSiteConfig", () => {
|
|
|
509
509
|
expect(resolved.model).toBe("claude-opus-4-7");
|
|
510
510
|
});
|
|
511
511
|
|
|
512
|
-
test("thinking and contextWindow deep-merge across
|
|
512
|
+
test("thinking and contextWindow deep-merge across the contributing layers for non-main call sites", () => {
|
|
513
513
|
// Each layer touches a different leaf inside `thinking` and
|
|
514
514
|
// `contextWindow.overflowRecovery` so we can verify deep merge composes
|
|
515
515
|
// every contribution rather than wholesale-replacing the nested objects.
|
|
516
|
+
// The call site pins `siteProfile`, so the active profile is excluded — its
|
|
517
|
+
// leaves fall through to default while override, site profile, and the
|
|
518
|
+
// call-site fragment still compose.
|
|
516
519
|
const llm = LLMSchema.parse({
|
|
517
520
|
default: fullDefault,
|
|
518
521
|
profiles: {
|
|
@@ -539,13 +542,15 @@ describe("resolveCallSiteConfig", () => {
|
|
|
539
542
|
const resolved = resolveCallSiteConfig("memoryExtraction", llm, {
|
|
540
543
|
overrideProfile: "override",
|
|
541
544
|
});
|
|
542
|
-
//
|
|
543
|
-
expect(resolved.thinking.enabled).toBe(false); // active
|
|
545
|
+
// Override, site profile, and the call-site fragment each contribute a leaf.
|
|
544
546
|
expect(resolved.thinking.streamThinking).toBe(false); // override
|
|
545
|
-
expect(resolved.contextWindow.overflowRecovery.maxAttempts).toBe(7); // active
|
|
546
547
|
expect(resolved.contextWindow.overflowRecovery.safetyMarginRatio).toBe(0.1); // override
|
|
547
548
|
expect(resolved.contextWindow.targetBudgetRatio).toBe(0.5); // siteProfile
|
|
548
549
|
expect(resolved.contextWindow.compactThreshold).toBe(0.9); // callsite
|
|
550
|
+
// The active profile is excluded (the call site pins its own profile), so
|
|
551
|
+
// its leaves fall through to default instead of contributing.
|
|
552
|
+
expect(resolved.thinking.enabled).toBe(true); // default, NOT active's false
|
|
553
|
+
expect(resolved.contextWindow.overflowRecovery.maxAttempts).toBe(3); // default, NOT active's 7
|
|
549
554
|
// Untouched leaves at depth 2 fall through to default.
|
|
550
555
|
expect(resolved.contextWindow.overflowRecovery.enabled).toBe(true);
|
|
551
556
|
expect(
|
|
@@ -582,7 +587,9 @@ describe("resolveCallSiteConfig", () => {
|
|
|
582
587
|
// Lower layers contribute fields the site fragment does not touch.
|
|
583
588
|
expect(resolved.verbosity).toBe("high"); // from siteProfile
|
|
584
589
|
expect(resolved.speed).toBe("fast"); // from override
|
|
585
|
-
|
|
590
|
+
// The active profile is excluded when the call site pins its own profile,
|
|
591
|
+
// so `effort` falls through to default rather than active's "low".
|
|
592
|
+
expect(resolved.effort).toBe("max"); // default, NOT active's "low"
|
|
586
593
|
});
|
|
587
594
|
|
|
588
595
|
test("mainAgent activeProfile overrides static call-site defaults", () => {
|
|
@@ -1435,6 +1442,199 @@ describe("resolveCallSiteConfig logitBias provenance", () => {
|
|
|
1435
1442
|
});
|
|
1436
1443
|
});
|
|
1437
1444
|
|
|
1445
|
+
describe("resolveCallSiteConfig sampling-param provenance (temperature / top_p)", () => {
|
|
1446
|
+
// Mirrors production: the active `balanced` profile carries `topP: 0.95` (a
|
|
1447
|
+
// MiniMax tuning), while background call sites resolve to the Anthropic
|
|
1448
|
+
// `cost-optimized` profile. A field-by-field deep-merge would leak the active
|
|
1449
|
+
// profile's `top_p` onto those Anthropic requests.
|
|
1450
|
+
const balancedActive = LLMSchema.parse({
|
|
1451
|
+
default: fullDefault,
|
|
1452
|
+
profiles: {
|
|
1453
|
+
balanced: {
|
|
1454
|
+
provider: "together",
|
|
1455
|
+
model: "MiniMaxAI/MiniMax-M3",
|
|
1456
|
+
topP: 0.95,
|
|
1457
|
+
},
|
|
1458
|
+
"cost-optimized": {
|
|
1459
|
+
provider: "anthropic",
|
|
1460
|
+
model: "claude-haiku-4-5-20251001",
|
|
1461
|
+
effort: "low",
|
|
1462
|
+
thinking: { enabled: false },
|
|
1463
|
+
},
|
|
1464
|
+
},
|
|
1465
|
+
activeProfile: "balanced",
|
|
1466
|
+
});
|
|
1467
|
+
|
|
1468
|
+
test("active profile's top_p does not leak into a profile-pinned call site (Option 1 + 2)", () => {
|
|
1469
|
+
const llm = LLMSchema.parse({
|
|
1470
|
+
default: fullDefault,
|
|
1471
|
+
profiles: {
|
|
1472
|
+
balanced: {
|
|
1473
|
+
provider: "together",
|
|
1474
|
+
model: "MiniMaxAI/MiniMax-M3",
|
|
1475
|
+
topP: 0.95,
|
|
1476
|
+
},
|
|
1477
|
+
"cost-optimized": {
|
|
1478
|
+
provider: "anthropic",
|
|
1479
|
+
model: "claude-haiku-4-5-20251001",
|
|
1480
|
+
},
|
|
1481
|
+
},
|
|
1482
|
+
activeProfile: "balanced",
|
|
1483
|
+
callSites: { memoryExtraction: { profile: "cost-optimized" } },
|
|
1484
|
+
});
|
|
1485
|
+
const resolved = resolveCallSiteConfig("memoryExtraction", llm);
|
|
1486
|
+
expect(resolved.provider).toBe("anthropic");
|
|
1487
|
+
expect(resolved.model).toBe("claude-haiku-4-5-20251001");
|
|
1488
|
+
// balanced (active) is shadowed by the pinned cost-optimized profile, so
|
|
1489
|
+
// its top_p must not ride along onto the Anthropic request.
|
|
1490
|
+
expect(resolved.topP).toBeNull();
|
|
1491
|
+
});
|
|
1492
|
+
|
|
1493
|
+
test("homeGreeting / commitMessage resolve to a temperature with NO top_p", () => {
|
|
1494
|
+
const greeting = resolveCallSiteConfig("homeGreeting", balancedActive);
|
|
1495
|
+
expect(greeting.model).toBe("claude-haiku-4-5-20251001");
|
|
1496
|
+
// Per-call-site temperature from CALL_SITE_DEFAULTS survives.
|
|
1497
|
+
expect(greeting.temperature).toBe(0.7);
|
|
1498
|
+
// The active profile's top_p does NOT — both together would trip
|
|
1499
|
+
// Anthropic's "temperature and top_p cannot both be specified".
|
|
1500
|
+
expect(greeting.topP).toBeNull();
|
|
1501
|
+
|
|
1502
|
+
const commit = resolveCallSiteConfig("commitMessage", balancedActive);
|
|
1503
|
+
expect(commit.temperature).toBe(0.2);
|
|
1504
|
+
expect(commit.topP).toBeNull();
|
|
1505
|
+
});
|
|
1506
|
+
|
|
1507
|
+
test("profile-less call site still inherits the active profile's provider AND sampling", () => {
|
|
1508
|
+
// `workflowLeaf` pins no profile, so the active profile is the legitimate
|
|
1509
|
+
// fallback (Option 1 keeps it): it supplies provider/model and its own
|
|
1510
|
+
// (coherent, same-provider) sampling.
|
|
1511
|
+
const resolved = resolveCallSiteConfig("workflowLeaf", balancedActive);
|
|
1512
|
+
expect(resolved.provider).toBe("together");
|
|
1513
|
+
expect(resolved.model).toBe("MiniMaxAI/MiniMax-M3");
|
|
1514
|
+
expect(resolved.topP).toBe(0.95);
|
|
1515
|
+
});
|
|
1516
|
+
|
|
1517
|
+
test("mainAgent keeps the active profile's top_p (balanced wins there)", () => {
|
|
1518
|
+
const resolved = resolveCallSiteConfig("mainAgent", balancedActive);
|
|
1519
|
+
expect(resolved.model).toBe("MiniMaxAI/MiniMax-M3");
|
|
1520
|
+
expect(resolved.topP).toBe(0.95);
|
|
1521
|
+
});
|
|
1522
|
+
|
|
1523
|
+
test("an explicit call-site temperature override still wins over the winning profile", () => {
|
|
1524
|
+
const llm = LLMSchema.parse({
|
|
1525
|
+
default: fullDefault,
|
|
1526
|
+
profiles: { nucleus: { topP: 0.9, temperature: 0.1 } },
|
|
1527
|
+
callSites: { memoryExtraction: { profile: "nucleus", temperature: 0.5 } },
|
|
1528
|
+
});
|
|
1529
|
+
const resolved = resolveCallSiteConfig("memoryExtraction", llm);
|
|
1530
|
+
// Call-site override wins for the field it sets.
|
|
1531
|
+
expect(resolved.temperature).toBe(0.5);
|
|
1532
|
+
// The winning profile's top_p (no call-site override) still applies.
|
|
1533
|
+
expect(resolved.topP).toBe(0.9);
|
|
1534
|
+
});
|
|
1535
|
+
|
|
1536
|
+
test("a higher-precedence profile that omits top_p clears a lower profile's top_p (Option 2)", () => {
|
|
1537
|
+
// No site profile is involved here, so the active profile IS folded in —
|
|
1538
|
+
// this isolates Option 2: the override profile wins and omits top_p, so
|
|
1539
|
+
// balanced's 0.95 must be cleared rather than surviving the merge.
|
|
1540
|
+
const llm = LLMSchema.parse({
|
|
1541
|
+
default: fullDefault,
|
|
1542
|
+
profiles: {
|
|
1543
|
+
balanced: {
|
|
1544
|
+
provider: "together",
|
|
1545
|
+
model: "MiniMaxAI/MiniMax-M3",
|
|
1546
|
+
topP: 0.95,
|
|
1547
|
+
},
|
|
1548
|
+
plain: { provider: "anthropic", model: "claude-opus-4-7" },
|
|
1549
|
+
},
|
|
1550
|
+
activeProfile: "balanced",
|
|
1551
|
+
});
|
|
1552
|
+
const resolved = resolveCallSiteConfig("mainAgent", llm, {
|
|
1553
|
+
overrideProfile: "plain",
|
|
1554
|
+
});
|
|
1555
|
+
expect(resolved.model).toBe("claude-opus-4-7");
|
|
1556
|
+
expect(resolved.topP).toBeNull();
|
|
1557
|
+
});
|
|
1558
|
+
|
|
1559
|
+
test("forceOverrideProfile: an explicit call-site temperature survives a forced profile silent on sampling", () => {
|
|
1560
|
+
const llm = LLMSchema.parse({
|
|
1561
|
+
default: fullDefault,
|
|
1562
|
+
profiles: {
|
|
1563
|
+
active: { verbosity: "low" },
|
|
1564
|
+
sitep: { provider: "anthropic", model: "claude-haiku-4-5-20251001" },
|
|
1565
|
+
forced: { model: "claude-opus-4-7", effort: "high" },
|
|
1566
|
+
},
|
|
1567
|
+
callSites: {
|
|
1568
|
+
memoryExtraction: {
|
|
1569
|
+
profile: "sitep",
|
|
1570
|
+
temperature: 0.7,
|
|
1571
|
+
maxTokens: 1000,
|
|
1572
|
+
},
|
|
1573
|
+
},
|
|
1574
|
+
activeProfile: "active",
|
|
1575
|
+
});
|
|
1576
|
+
const resolved = resolveCallSiteConfig("memoryExtraction", llm, {
|
|
1577
|
+
overrideProfile: "forced",
|
|
1578
|
+
forceOverrideProfile: true,
|
|
1579
|
+
});
|
|
1580
|
+
// The forced profile floats to the top for fields it sets.
|
|
1581
|
+
expect(resolved.model).toBe("claude-opus-4-7");
|
|
1582
|
+
expect(resolved.effort).toBe("high");
|
|
1583
|
+
// It is silent on temperature, so the deliberate call-site value survives —
|
|
1584
|
+
// consistent with sibling call-site fields like maxTokens (which flow
|
|
1585
|
+
// through the deep-merge).
|
|
1586
|
+
expect(resolved.temperature).toBe(0.7);
|
|
1587
|
+
expect(resolved.maxTokens).toBe(1000);
|
|
1588
|
+
});
|
|
1589
|
+
|
|
1590
|
+
test("forceOverrideProfile: a forced profile that sets temperature wins over the call-site override", () => {
|
|
1591
|
+
const llm = LLMSchema.parse({
|
|
1592
|
+
default: fullDefault,
|
|
1593
|
+
profiles: {
|
|
1594
|
+
sitep: { provider: "anthropic", model: "claude-haiku-4-5-20251001" },
|
|
1595
|
+
forced: { model: "claude-opus-4-7", temperature: 0.1 },
|
|
1596
|
+
},
|
|
1597
|
+
callSites: {
|
|
1598
|
+
memoryExtraction: { profile: "sitep", temperature: 0.7 },
|
|
1599
|
+
},
|
|
1600
|
+
});
|
|
1601
|
+
const resolved = resolveCallSiteConfig("memoryExtraction", llm, {
|
|
1602
|
+
overrideProfile: "forced",
|
|
1603
|
+
forceOverrideProfile: true,
|
|
1604
|
+
});
|
|
1605
|
+
// The forced profile explicitly sets temperature, so it floats above the
|
|
1606
|
+
// call-site override.
|
|
1607
|
+
expect(resolved.temperature).toBe(0.1);
|
|
1608
|
+
});
|
|
1609
|
+
|
|
1610
|
+
test("mainAgent: an explicit call-site temperature survives an active profile silent on sampling", () => {
|
|
1611
|
+
const llm = LLMSchema.parse({
|
|
1612
|
+
default: fullDefault,
|
|
1613
|
+
profiles: { active: { model: "claude-sonnet-4-7" } },
|
|
1614
|
+
callSites: { mainAgent: { temperature: 0.5 } },
|
|
1615
|
+
activeProfile: "active",
|
|
1616
|
+
});
|
|
1617
|
+
const resolved = resolveCallSiteConfig("mainAgent", llm);
|
|
1618
|
+
// The active profile floats above the call-site for mainAgent but is silent
|
|
1619
|
+
// on temperature, so the deliberate call-site value survives.
|
|
1620
|
+
expect(resolved.model).toBe("claude-sonnet-4-7");
|
|
1621
|
+
expect(resolved.temperature).toBe(0.5);
|
|
1622
|
+
});
|
|
1623
|
+
|
|
1624
|
+
test("mainAgent: the active profile's explicit temperature wins over a call-site temperature", () => {
|
|
1625
|
+
const llm = LLMSchema.parse({
|
|
1626
|
+
default: fullDefault,
|
|
1627
|
+
profiles: { active: { model: "claude-sonnet-4-7", temperature: 0.2 } },
|
|
1628
|
+
callSites: { mainAgent: { temperature: 0.5 } },
|
|
1629
|
+
activeProfile: "active",
|
|
1630
|
+
});
|
|
1631
|
+
const resolved = resolveCallSiteConfig("mainAgent", llm);
|
|
1632
|
+
// For mainAgent the active profile floats above the call-site override, so
|
|
1633
|
+
// its explicit temperature wins.
|
|
1634
|
+
expect(resolved.temperature).toBe(0.2);
|
|
1635
|
+
});
|
|
1636
|
+
});
|
|
1637
|
+
|
|
1438
1638
|
describe("resolveCallSiteConfig — workflowLeaf default", () => {
|
|
1439
1639
|
test("inherits the workspace default config rather than pinning cost-optimized", () => {
|
|
1440
1640
|
const llm = LLMSchema.parse({
|
|
@@ -17,7 +17,13 @@ import {
|
|
|
17
17
|
* Merge layers (low → high precedence; later layers override earlier) for
|
|
18
18
|
* non-main-agent call sites:
|
|
19
19
|
* 1. `llm.default` fields (required base)
|
|
20
|
-
* 2. `llm.profiles[llm.activeProfile]` (workspace-wide active profile)
|
|
20
|
+
* 2. `llm.profiles[llm.activeProfile]` (workspace-wide active profile) —
|
|
21
|
+
* folded in ONLY when the call site resolves no profile of its own (a
|
|
22
|
+
* profile-less leaf like `vision`/`workflowLeaf`, or a BYOK install whose
|
|
23
|
+
* pinned managed profile was stripped). When the call site resolves a
|
|
24
|
+
* profile, that profile is the authoritative provider config and the
|
|
25
|
+
* active profile does not contribute — otherwise a deep-merge would let
|
|
26
|
+
* its orphan fields bleed onto a different provider.
|
|
21
27
|
* 3. `llm.profiles[opts.overrideProfile]` (per-call ad-hoc override)
|
|
22
28
|
* 4. `llm.profiles[site.profile]` fields (call-site's named profile)
|
|
23
29
|
* 5. `llm.callSites[callSite]` fields (call-site override)
|
|
@@ -50,6 +56,15 @@ import {
|
|
|
50
56
|
* any nesting level merge into — rather than replace — the corresponding
|
|
51
57
|
* base value.
|
|
52
58
|
*
|
|
59
|
+
* `temperature` and `top_p` are provider-coupled, so they do NOT deep-merge
|
|
60
|
+
* field-by-field with the rest of the config: only the winning profile (the
|
|
61
|
+
* highest-precedence profile that determines provider/model) contributes them,
|
|
62
|
+
* and an explicit `llm.callSites[callSite]` override still wins. A lower-
|
|
63
|
+
* precedence profile whose model is shadowed never leaks its sampling onto a
|
|
64
|
+
* different provider (which would trip e.g. Anthropic's "temperature and top_p
|
|
65
|
+
* cannot both be specified" constraint). `logitBias` is winning-profile-scoped
|
|
66
|
+
* the same way.
|
|
67
|
+
*
|
|
53
68
|
* `activeProfile` and `overrideProfile` are resolved by name lookup against
|
|
54
69
|
* `llm.profiles`. Missing references silently fall through (no throw) so the
|
|
55
70
|
* resolver stays pure; schema validation in `LLMSchema.superRefine` catches
|
|
@@ -108,6 +123,19 @@ export function resolveCallSiteConfig(
|
|
|
108
123
|
// call-site default selected by `effectiveDefault`.
|
|
109
124
|
const biasRef: LogitBiasRef = { preset: undefined };
|
|
110
125
|
|
|
126
|
+
// Effective sampling params, tracked outside the deep-merge for the same
|
|
127
|
+
// reason as `logitBias`: `temperature`/`top_p` are provider-coupled, so only
|
|
128
|
+
// the winning profile may contribute them. A profile clears what a lower
|
|
129
|
+
// PROFILE set where it is silent (so a shadowed profile's sampling can't
|
|
130
|
+
// leak), while an explicit call-site override is sticky and survives a later
|
|
131
|
+
// silent profile (see `applyProfileSampling` / `appendCallSiteLayers`).
|
|
132
|
+
const samplingRef: SamplingRef = {
|
|
133
|
+
temperature: undefined,
|
|
134
|
+
topP: undefined,
|
|
135
|
+
temperatureFromCallSite: false,
|
|
136
|
+
topPFromCallSite: false,
|
|
137
|
+
};
|
|
138
|
+
|
|
111
139
|
const activeFragment = resolveProfileFragment(llm.activeProfile, llm, opts);
|
|
112
140
|
const overrideFragment = resolveProfileFragment(
|
|
113
141
|
opts.overrideProfile,
|
|
@@ -119,22 +147,55 @@ export function resolveCallSiteConfig(
|
|
|
119
147
|
effectiveDefault(callSite, llm, opts.overrideProfile != null);
|
|
120
148
|
|
|
121
149
|
if (callSite === "mainAgent") {
|
|
122
|
-
appendCallSiteLayers(
|
|
123
|
-
|
|
124
|
-
|
|
150
|
+
appendCallSiteLayers(
|
|
151
|
+
layers,
|
|
152
|
+
callSite,
|
|
153
|
+
llm,
|
|
154
|
+
site,
|
|
155
|
+
opts,
|
|
156
|
+
biasRef,
|
|
157
|
+
samplingRef,
|
|
158
|
+
);
|
|
159
|
+
appendProfileLayer(layers, activeFragment, biasRef, samplingRef);
|
|
160
|
+
appendProfileLayer(layers, overrideFragment, biasRef, samplingRef);
|
|
125
161
|
} else if (opts.forceOverrideProfile === true && overrideFragment != null) {
|
|
126
162
|
// Escape hatch: float the override profile above the call-site layers,
|
|
127
163
|
// mirroring mainAgent's treatment of the user's chat-model selection.
|
|
128
164
|
// Guarded on a resolved fragment so a missing profile reference degrades
|
|
129
165
|
// to the normal precedence below instead of silently dropping the
|
|
130
|
-
// call-site layers' standing.
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
166
|
+
// call-site layers' standing. The active profile stays the bottom fallback
|
|
167
|
+
// (its sampling can't leak — a higher profile's REPLACE clears it).
|
|
168
|
+
appendProfileLayer(layers, activeFragment, biasRef, samplingRef);
|
|
169
|
+
appendCallSiteLayers(
|
|
170
|
+
layers,
|
|
171
|
+
callSite,
|
|
172
|
+
llm,
|
|
173
|
+
site,
|
|
174
|
+
opts,
|
|
175
|
+
biasRef,
|
|
176
|
+
samplingRef,
|
|
177
|
+
);
|
|
178
|
+
appendProfileLayer(layers, overrideFragment, biasRef, samplingRef);
|
|
134
179
|
} else {
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
180
|
+
// The active profile is a low-precedence FALLBACK for call sites that
|
|
181
|
+
// resolve no profile of their own — profile-less leaves (`vision`,
|
|
182
|
+
// `workflowLeaf`) and BYOK installs where the pinned managed profile was
|
|
183
|
+
// stripped. When the call site DOES resolve its own profile, that profile
|
|
184
|
+
// is the authoritative provider config, so the active profile must not
|
|
185
|
+
// contribute its orphan fields to a different provider.
|
|
186
|
+
if (site?.profile == null) {
|
|
187
|
+
appendProfileLayer(layers, activeFragment, biasRef, samplingRef);
|
|
188
|
+
}
|
|
189
|
+
appendProfileLayer(layers, overrideFragment, biasRef, samplingRef);
|
|
190
|
+
appendCallSiteLayers(
|
|
191
|
+
layers,
|
|
192
|
+
callSite,
|
|
193
|
+
llm,
|
|
194
|
+
site,
|
|
195
|
+
opts,
|
|
196
|
+
biasRef,
|
|
197
|
+
samplingRef,
|
|
198
|
+
);
|
|
138
199
|
}
|
|
139
200
|
|
|
140
201
|
const resolved = finalize(
|
|
@@ -149,11 +210,34 @@ export function resolveCallSiteConfig(
|
|
|
149
210
|
} else {
|
|
150
211
|
delete (resolved as { logitBias?: unknown }).logitBias;
|
|
151
212
|
}
|
|
213
|
+
// `temperature`/`top_p` are winning-profile-scoped like `logitBias`, but an
|
|
214
|
+
// explicit call-site override may also set them. Apply the tracked value,
|
|
215
|
+
// overriding whatever a shadowed profile may have left in the merge. An
|
|
216
|
+
// `undefined` ref means no profile or override opted in, so the `llm.default`
|
|
217
|
+
// base already in `resolved` stands.
|
|
218
|
+
if (samplingRef.temperature !== undefined) {
|
|
219
|
+
resolved.temperature = samplingRef.temperature;
|
|
220
|
+
}
|
|
221
|
+
if (samplingRef.topP !== undefined) {
|
|
222
|
+
resolved.topP = samplingRef.topP;
|
|
223
|
+
}
|
|
152
224
|
return resolved;
|
|
153
225
|
}
|
|
154
226
|
|
|
155
227
|
type LogitBiasRef = { preset: ProfileEntry["logitBias"] };
|
|
156
228
|
|
|
229
|
+
type SamplingRef = {
|
|
230
|
+
temperature: ProfileEntry["temperature"];
|
|
231
|
+
topP: ProfileEntry["topP"];
|
|
232
|
+
// Provenance of the current pair: `true` when a field came from an explicit
|
|
233
|
+
// call-site override (deliberate, sticky), `false` when it came from a profile
|
|
234
|
+
// (clearable by a higher-precedence profile that determines the model). Lets a
|
|
235
|
+
// silent higher profile clear a lower profile's sampling without discarding a
|
|
236
|
+
// deliberate call-site override.
|
|
237
|
+
temperatureFromCallSite: boolean;
|
|
238
|
+
topPFromCallSite: boolean;
|
|
239
|
+
};
|
|
240
|
+
|
|
157
241
|
// ---------------------------------------------------------------------------
|
|
158
242
|
// Internal helpers
|
|
159
243
|
// ---------------------------------------------------------------------------
|
|
@@ -311,13 +395,41 @@ function withImpliedProviderForKnownModel(source: Mergeable): Mergeable {
|
|
|
311
395
|
};
|
|
312
396
|
}
|
|
313
397
|
|
|
398
|
+
/**
|
|
399
|
+
* Fold a profile's sampling into `samplingRef`. A profile determines
|
|
400
|
+
* provider/model, so its pair supersedes any LOWER PROFILE's: set each field the
|
|
401
|
+
* profile specifies, and clear a lower profile's value where the profile is
|
|
402
|
+
* silent. A deliberate call-site override is NOT a profile and outranks a silent
|
|
403
|
+
* profile — it survives until a profile EXPLICITLY sets the field. (The mirror
|
|
404
|
+
* COALESCE for call-site overrides lives in `appendCallSiteLayers`.)
|
|
405
|
+
*/
|
|
406
|
+
function applyProfileSampling(
|
|
407
|
+
samplingRef: SamplingRef,
|
|
408
|
+
profile: ProfileEntry,
|
|
409
|
+
): void {
|
|
410
|
+
if (profile.temperature !== undefined) {
|
|
411
|
+
samplingRef.temperature = profile.temperature;
|
|
412
|
+
samplingRef.temperatureFromCallSite = false;
|
|
413
|
+
} else if (!samplingRef.temperatureFromCallSite) {
|
|
414
|
+
samplingRef.temperature = undefined;
|
|
415
|
+
}
|
|
416
|
+
if (profile.topP !== undefined) {
|
|
417
|
+
samplingRef.topP = profile.topP;
|
|
418
|
+
samplingRef.topPFromCallSite = false;
|
|
419
|
+
} else if (!samplingRef.topPFromCallSite) {
|
|
420
|
+
samplingRef.topP = undefined;
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
|
|
314
424
|
function appendProfileLayer(
|
|
315
425
|
layers: Mergeable[],
|
|
316
426
|
profile: ProfileEntry | undefined,
|
|
317
427
|
biasRef: LogitBiasRef,
|
|
428
|
+
samplingRef: SamplingRef,
|
|
318
429
|
): void {
|
|
319
430
|
if (profile != null) {
|
|
320
431
|
biasRef.preset = profile.logitBias;
|
|
432
|
+
applyProfileSampling(samplingRef, profile);
|
|
321
433
|
layers.push(profileConfigFragment(profile));
|
|
322
434
|
}
|
|
323
435
|
}
|
|
@@ -329,6 +441,7 @@ function appendCallSiteLayers(
|
|
|
329
441
|
site: z.infer<typeof LLMSchema>["callSites"][LLMCallSite] | undefined,
|
|
330
442
|
opts: ResolveCallSiteOpts,
|
|
331
443
|
biasRef: LogitBiasRef,
|
|
444
|
+
samplingRef: SamplingRef,
|
|
332
445
|
): void {
|
|
333
446
|
if (site != null) {
|
|
334
447
|
if (site.profile != null) {
|
|
@@ -343,11 +456,29 @@ function appendCallSiteLayers(
|
|
|
343
456
|
);
|
|
344
457
|
}
|
|
345
458
|
biasRef.preset = profileFragment.logitBias;
|
|
459
|
+
applyProfileSampling(samplingRef, profileFragment);
|
|
346
460
|
layers.push(profileConfigFragment(profileFragment));
|
|
347
461
|
}
|
|
348
|
-
// Strip the `profile` discriminator
|
|
349
|
-
// `
|
|
350
|
-
|
|
462
|
+
// Strip the `profile` discriminator (not a `LLMConfigBase` field) and the
|
|
463
|
+
// sampling params before merging. An explicit call-site `temperature` /
|
|
464
|
+
// `topP` is a deliberate per-site choice, so it COALESCES over the winning
|
|
465
|
+
// profile's pair (only overriding the fields it sets) and is marked sticky
|
|
466
|
+
// so a later silent profile can't clear it — routed through `samplingRef` so
|
|
467
|
+
// it never inherits a shadowed profile's value via merge.
|
|
468
|
+
const {
|
|
469
|
+
profile: _profile,
|
|
470
|
+
temperature: siteTemperature,
|
|
471
|
+
topP: siteTopP,
|
|
472
|
+
...siteFragment
|
|
473
|
+
} = site;
|
|
474
|
+
if (siteTemperature !== undefined) {
|
|
475
|
+
samplingRef.temperature = siteTemperature;
|
|
476
|
+
samplingRef.temperatureFromCallSite = true;
|
|
477
|
+
}
|
|
478
|
+
if (siteTopP !== undefined) {
|
|
479
|
+
samplingRef.topP = siteTopP;
|
|
480
|
+
samplingRef.topPFromCallSite = true;
|
|
481
|
+
}
|
|
351
482
|
layers.push(siteFragment as Mergeable);
|
|
352
483
|
}
|
|
353
484
|
}
|
|
@@ -369,6 +500,12 @@ function profileConfigFragment(profile: ProfileEntry): Mergeable {
|
|
|
369
500
|
// Per-profile advisor toggle is profile identity, not inheritable model
|
|
370
501
|
// config — strip it so it can't leak into the merged `LLMConfigBase`.
|
|
371
502
|
advisorEnabled: _advisorEnabled,
|
|
503
|
+
// `temperature`/`top_p` are provider-coupled: only the winning profile
|
|
504
|
+
// contributes them (tracked via `samplingRef`, applied post-merge), so a
|
|
505
|
+
// shadowed profile's sampling can never reach a different provider through
|
|
506
|
+
// the deep-merge. Strip here so no profile's sampling enters the merge.
|
|
507
|
+
temperature: _temperature,
|
|
508
|
+
topP: _topP,
|
|
372
509
|
...config
|
|
373
510
|
} = profile;
|
|
374
511
|
return config as Mergeable;
|
|
@@ -40,7 +40,7 @@ export class GeminiEmbeddingBackend implements EmbeddingBackend {
|
|
|
40
40
|
this.taskType = options?.taskType;
|
|
41
41
|
this.dimensions = options?.dimensions;
|
|
42
42
|
this.managedBaseUrl = options?.managedBaseUrl;
|
|
43
|
-
this.interCallDelayMs = options?.interCallDelayMs ??
|
|
43
|
+
this.interCallDelayMs = options?.interCallDelayMs ?? 100;
|
|
44
44
|
}
|
|
45
45
|
|
|
46
46
|
/** True when requests route through the managed platform proxy. */
|
|
@@ -835,6 +835,11 @@ export class AnthropicProvider implements Provider {
|
|
|
835
835
|
disableCache: _disableCache,
|
|
836
836
|
max_tokens: callerMaxTokens,
|
|
837
837
|
usageAttributionHeaders,
|
|
838
|
+
// Pulled out of `restConfig` so they are forwarded conditionally below:
|
|
839
|
+
// newer models reject them outright (see `deprecatesSamplingParams`).
|
|
840
|
+
temperature: callerTemperature,
|
|
841
|
+
top_p: callerTopP,
|
|
842
|
+
top_k: callerTopK,
|
|
838
843
|
...restConfig
|
|
839
844
|
} = (config ?? {}) as Record<string, unknown> & {
|
|
840
845
|
// "xhigh" is an intermediate tier between "high" and "max" supported
|
|
@@ -847,6 +852,9 @@ export class AnthropicProvider implements Provider {
|
|
|
847
852
|
speed?: "standard" | "fast";
|
|
848
853
|
output_config?: Record<string, unknown>;
|
|
849
854
|
usageAttributionHeaders?: Record<string, string>;
|
|
855
|
+
temperature?: number;
|
|
856
|
+
top_p?: number;
|
|
857
|
+
top_k?: number;
|
|
850
858
|
};
|
|
851
859
|
// Haiku does not support the effort / output_config parameter or
|
|
852
860
|
// extended cache TTL betas.
|
|
@@ -856,6 +864,16 @@ export class AnthropicProvider implements Provider {
|
|
|
856
864
|
(restConfig as Record<string, unknown>).model?.toString() ?? this.model;
|
|
857
865
|
const isHaiku = effectiveModel.includes("haiku");
|
|
858
866
|
const supportsEffort = !isHaiku;
|
|
867
|
+
// opus-4-7 / opus-4-8 reject `temperature` and `top_p` with a 400
|
|
868
|
+
// "`temperature`/`top_p` is deprecated for this model" — model-wide, not
|
|
869
|
+
// effort-conditional (verified 2026-06-23). opus-4-6 / sonnet-4-6 /
|
|
870
|
+
// haiku-4-5 still accept them. fable-5 is included conservatively (a
|
|
871
|
+
// frontier model that could not be verified directly but follows the same
|
|
872
|
+
// deprecation direction). Stripping the params here keeps callers that set
|
|
873
|
+
// them (e.g. the memory-v3 L2 selector's `temperature: 0`) from 400ing.
|
|
874
|
+
const deprecatesSamplingParams =
|
|
875
|
+
/claude-opus-4-[78]\b/.test(effectiveModel) ||
|
|
876
|
+
effectiveModel.startsWith("claude-fable-");
|
|
859
877
|
const mergedOutputConfig = {
|
|
860
878
|
...(output_config ?? {}),
|
|
861
879
|
...(effort && effort !== "none" && supportsEffort
|
|
@@ -883,6 +901,19 @@ export class AnthropicProvider implements Provider {
|
|
|
883
901
|
: 64000,
|
|
884
902
|
messages: sentMessages,
|
|
885
903
|
...restConfig,
|
|
904
|
+
// Forward `temperature` / `top_p` / `top_k` only to models that still
|
|
905
|
+
// accept them; newer models 400 on any of the deprecated sampler params.
|
|
906
|
+
// `temperature: 0` is preserved for accepting models (a `typeof ===
|
|
907
|
+
// "number"` check, not truthiness).
|
|
908
|
+
...(deprecatesSamplingParams
|
|
909
|
+
? {}
|
|
910
|
+
: {
|
|
911
|
+
...(typeof callerTemperature === "number"
|
|
912
|
+
? { temperature: callerTemperature }
|
|
913
|
+
: {}),
|
|
914
|
+
...(typeof callerTopP === "number" ? { top_p: callerTopP } : {}),
|
|
915
|
+
...(typeof callerTopK === "number" ? { top_k: callerTopK } : {}),
|
|
916
|
+
}),
|
|
886
917
|
...(Object.keys(mergedOutputConfig).length > 0
|
|
887
918
|
? { output_config: mergedOutputConfig }
|
|
888
919
|
: {}),
|