@jsonstudio/llms 0.6.3379 → 0.6.3409

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/dist/conversion/compat/actions/claude-thinking-tools.d.ts +1 -14
  2. package/dist/conversion/compat/actions/claude-thinking-tools.js +3 -71
  3. package/dist/conversion/compat/actions/lmstudio-responses-fc-ids.d.ts +0 -8
  4. package/dist/conversion/compat/actions/lmstudio-responses-fc-ids.js +2 -57
  5. package/dist/conversion/compat/actions/normalize-tool-call-ids.d.ts +0 -9
  6. package/dist/conversion/compat/actions/normalize-tool-call-ids.js +6 -136
  7. package/dist/conversion/compat/actions/request-rules.js +2 -61
  8. package/dist/conversion/compat/actions/response-blacklist.d.ts +0 -4
  9. package/dist/conversion/compat/actions/response-blacklist.js +2 -77
  10. package/dist/conversion/compat/actions/response-normalize.js +2 -119
  11. package/dist/conversion/compat/actions/response-validate.js +2 -74
  12. package/dist/conversion/compat/actions/strip-orphan-function-calls-tag.js +2 -150
  13. package/dist/conversion/hub/operation-table/semantic-mappers/anthropic-mapper.js +24 -1
  14. package/dist/conversion/hub/pipeline/hub-pipeline.js +91 -0
  15. package/dist/conversion/shared/reasoning-tool-parser.js +7 -8
  16. package/dist/conversion/shared/responses-response-utils.js +3 -48
  17. package/dist/conversion/shared/responses-tool-utils.js +22 -126
  18. package/dist/conversion/shared/tool-call-id-manager.js +18 -21
  19. package/dist/native/router_hotpath_napi.node +0 -0
  20. package/dist/router/virtual-router/bootstrap/routing-config.d.ts +2 -1
  21. package/dist/router/virtual-router/bootstrap/routing-config.js +47 -2
  22. package/dist/router/virtual-router/bootstrap/web-search-config.js +25 -0
  23. package/dist/router/virtual-router/bootstrap.js +21 -16
  24. package/dist/router/virtual-router/engine-selection/native-compat-action-semantics.d.ts +6 -0
  25. package/dist/router/virtual-router/engine-selection/native-compat-action-semantics.js +171 -0
  26. package/dist/router/virtual-router/engine-selection/native-router-hotpath-loader.js +11 -0
  27. package/dist/router/virtual-router/engine-selection/native-shared-conversion-semantics.d.ts +5 -0
  28. package/dist/router/virtual-router/engine-selection/native-shared-conversion-semantics.js +137 -0
  29. package/dist/router/virtual-router/engine-selection/tier-load-balancing.d.ts +16 -0
  30. package/dist/router/virtual-router/engine-selection/tier-load-balancing.js +120 -0
  31. package/dist/router/virtual-router/engine-selection/tier-selection-quota-integration.d.ts +2 -0
  32. package/dist/router/virtual-router/engine-selection/tier-selection-quota-integration.js +44 -66
  33. package/dist/router/virtual-router/engine-selection/tier-selection-select.js +53 -84
  34. package/dist/router/virtual-router/types.d.ts +39 -0
  35. package/dist/servertool/handlers/web-search.js +26 -1
  36. package/dist/servertool/server-side-tools.js +11 -2
  37. package/dist/servertool/types.d.ts +4 -0
  38. package/package.json +1 -1
@@ -0,0 +1,120 @@
1
+ import { extractProviderId, getProviderModelId } from './key-parsing.js';
2
+ export function resolveTierLoadBalancing(tier, globalPolicy) {
3
+ const tierPolicy = tier.loadBalancing;
4
+ return {
5
+ strategy: tierPolicy?.strategy ?? globalPolicy?.strategy ?? 'round-robin',
6
+ weights: tierPolicy?.weights ?? globalPolicy?.weights
7
+ };
8
+ }
9
+ export function resolveGroupWeight(groupId, weights) {
10
+ if (!weights) {
11
+ return 1;
12
+ }
13
+ const direct = weights[groupId];
14
+ if (typeof direct === 'number' && Number.isFinite(direct) && direct > 0) {
15
+ return direct;
16
+ }
17
+ const providerId = groupId.split('.')[0] ?? groupId;
18
+ const providerOnly = weights[providerId];
19
+ if (typeof providerOnly === 'number' && Number.isFinite(providerOnly) && providerOnly > 0) {
20
+ return providerOnly;
21
+ }
22
+ return 1;
23
+ }
24
+ export function buildGroupWeights(groups, weights) {
25
+ if (!groups.size || !weights) {
26
+ return undefined;
27
+ }
28
+ const out = {};
29
+ let hasExplicit = false;
30
+ for (const [groupId] of groups.entries()) {
31
+ const resolved = resolveGroupWeight(groupId, weights);
32
+ out[groupId] = resolved;
33
+ if (resolved !== 1) {
34
+ hasExplicit = true;
35
+ }
36
+ }
37
+ return hasExplicit ? out : undefined;
38
+ }
39
+ export function hasNonUniformWeights(candidates, weights) {
40
+ if (!weights || candidates.length < 2) {
41
+ return false;
42
+ }
43
+ let ref;
44
+ for (const key of candidates) {
45
+ const raw = weights[key];
46
+ if (typeof raw !== 'number' || !Number.isFinite(raw)) {
47
+ continue;
48
+ }
49
+ if (ref === undefined) {
50
+ ref = raw;
51
+ }
52
+ else if (Math.abs(raw - ref) > 1e-6) {
53
+ return true;
54
+ }
55
+ }
56
+ return false;
57
+ }
58
+ export function buildCandidateWeights(opts) {
59
+ const { candidates, providerRegistry, staticWeights, dynamicWeights } = opts;
60
+ if ((!staticWeights || Object.keys(staticWeights).length === 0) && (!dynamicWeights || Object.keys(dynamicWeights).length === 0)) {
61
+ return undefined;
62
+ }
63
+ const out = {};
64
+ let hasExplicit = false;
65
+ for (const key of candidates) {
66
+ const dynamic = dynamicWeights?.[key];
67
+ const staticWeight = resolveCandidateWeight(key, staticWeights, providerRegistry);
68
+ const resolved = multiplyPositiveWeights(dynamic, staticWeight);
69
+ if (resolved !== undefined) {
70
+ out[key] = resolved;
71
+ if (resolved !== 1) {
72
+ hasExplicit = true;
73
+ }
74
+ }
75
+ }
76
+ if (!hasExplicit) {
77
+ return undefined;
78
+ }
79
+ return out;
80
+ }
81
+ function resolveCandidateWeight(key, weights, providerRegistry) {
82
+ if (!weights) {
83
+ return undefined;
84
+ }
85
+ const direct = normalizePositiveWeight(weights[key]);
86
+ if (direct !== undefined) {
87
+ return direct;
88
+ }
89
+ const providerId = extractProviderId(key) ?? '';
90
+ if (!providerId) {
91
+ return undefined;
92
+ }
93
+ try {
94
+ const modelId = getProviderModelId(key, providerRegistry) ?? '';
95
+ if (modelId) {
96
+ const grouped = normalizePositiveWeight(weights[`${providerId}.${modelId}`]);
97
+ if (grouped !== undefined) {
98
+ return grouped;
99
+ }
100
+ }
101
+ }
102
+ catch {
103
+ // Ignore registry misses and fall back to provider-only weight.
104
+ }
105
+ return normalizePositiveWeight(weights[providerId]);
106
+ }
107
+ function normalizePositiveWeight(value) {
108
+ return typeof value === 'number' && Number.isFinite(value) && value > 0 ? value : undefined;
109
+ }
110
+ function multiplyPositiveWeights(...values) {
111
+ let resolved;
112
+ for (const value of values) {
113
+ const normalized = normalizePositiveWeight(value);
114
+ if (normalized === undefined) {
115
+ continue;
116
+ }
117
+ resolved = resolved === undefined ? normalized : Math.max(1, Math.round(resolved * normalized));
118
+ }
119
+ return resolved;
120
+ }
@@ -3,6 +3,7 @@ import { type ResolvedContextWeightedConfig } from '../context-weighted.js';
3
3
  import { type ResolvedHealthWeightedConfig } from '../health-weighted.js';
4
4
  import type { RoutePoolTier } from '../types.js';
5
5
  import type { SelectionDeps, TrySelectFromTierOptions } from './selection-deps.js';
6
+ import { type ResolvedTierLoadBalancing } from './tier-load-balancing.js';
6
7
  export declare function selectProviderKeyWithQuotaBuckets(opts: {
7
8
  routeName: string;
8
9
  tier: RoutePoolTier;
@@ -19,6 +20,7 @@ export declare function selectProviderKeyWithQuotaBuckets(opts: {
19
20
  nowForWeights: number;
20
21
  healthWeightedCfg: ResolvedHealthWeightedConfig;
21
22
  contextWeightedCfg: ResolvedContextWeightedConfig;
23
+ tierLoadBalancing: ResolvedTierLoadBalancing;
22
24
  quotaView: NonNullable<SelectionDeps['quotaView']>;
23
25
  isAvailable: (key: string) => boolean;
24
26
  selectFirstAvailable: (keys: string[]) => string | null;
@@ -2,6 +2,7 @@ import { computeContextMultiplier } from '../context-weighted.js';
2
2
  import { computeHealthWeight } from '../health-weighted.js';
3
3
  import { buildQuotaBuckets } from './native-router-hotpath.js';
4
4
  import { computeContextWeightMultipliers } from './context-weight-multipliers.js';
5
+ import { buildCandidateWeights, buildGroupWeights, hasNonUniformWeights } from './tier-load-balancing.js';
5
6
  import { pickPriorityGroup } from './tier-priority.js';
6
7
  import { extractProviderId, getProviderModelId } from './key-parsing.js';
7
8
  function buildPrimaryTargetGroups(candidates, deps) {
@@ -25,52 +26,8 @@ function buildPrimaryTargetGroups(candidates, deps) {
25
26
  }
26
27
  return groups;
27
28
  }
28
- function resolveGroupWeight(groupId, weights) {
29
- if (!weights) {
30
- return 1;
31
- }
32
- const direct = weights[groupId];
33
- if (typeof direct === 'number' && Number.isFinite(direct) && direct > 0) {
34
- return direct;
35
- }
36
- const providerId = groupId.split('.')[0] ?? groupId;
37
- const providerOnly = weights[providerId];
38
- if (typeof providerOnly === 'number' && Number.isFinite(providerOnly) && providerOnly > 0) {
39
- return providerOnly;
40
- }
41
- return 1;
42
- }
43
- function buildGroupWeights(groups, weights) {
44
- if (!groups.size) {
45
- return undefined;
46
- }
47
- const out = {};
48
- for (const [groupId] of groups.entries()) {
49
- out[groupId] = resolveGroupWeight(groupId, weights);
50
- }
51
- return out;
52
- }
53
- function hasNonUniformWeights(candidates, weights) {
54
- if (!weights || candidates.length < 2) {
55
- return false;
56
- }
57
- let ref;
58
- for (const key of candidates) {
59
- const raw = weights[key];
60
- if (typeof raw !== 'number' || !Number.isFinite(raw)) {
61
- continue;
62
- }
63
- if (ref === undefined) {
64
- ref = raw;
65
- }
66
- else if (Math.abs(raw - ref) > 1e-6) {
67
- return true;
68
- }
69
- }
70
- return false;
71
- }
72
29
  export function selectProviderKeyWithQuotaBuckets(opts) {
73
- const { routeName, tier, stickyKey, candidates, isSafePool, deps, options, contextResult, warnRatio, isRecoveryAttempt, now, nowForWeights, healthWeightedCfg, contextWeightedCfg, quotaView, isAvailable, selectFirstAvailable, applyAliasStickyQueuePinning, preferAntigravityAliasesOnRetry } = opts;
30
+ const { routeName, tier, stickyKey, candidates, isSafePool, deps, options, contextResult, warnRatio, isRecoveryAttempt, now, nowForWeights, healthWeightedCfg, contextWeightedCfg, tierLoadBalancing, quotaView, isAvailable, selectFirstAvailable, applyAliasStickyQueuePinning, preferAntigravityAliasesOnRetry } = opts;
74
31
  const bucketInputs = candidates.map((key, order) => {
75
32
  const entry = quotaView(key);
76
33
  const penaltyRaw = entry?.selectionPenalty;
@@ -100,35 +57,56 @@ export function selectProviderKeyWithQuotaBuckets(opts) {
100
57
  bucketCandidates = preferAntigravityAliasesOnRetry(bucketCandidates);
101
58
  }
102
59
  bucketCandidates = applyAliasStickyQueuePinning(bucketCandidates);
103
- const bucketWeights = {};
60
+ const quotaWeights = {};
104
61
  for (const item of bucket) {
105
62
  if (healthWeightedCfg.enabled) {
106
63
  const entry = quotaView(item.key);
107
64
  const { weight } = computeHealthWeight(entry, nowForWeights, healthWeightedCfg);
108
- bucketWeights[item.key] = weight;
65
+ quotaWeights[item.key] = weight;
109
66
  }
110
67
  else {
111
- bucketWeights[item.key] = Math.max(1, Math.floor(100 / (1 + Math.max(0, item.penalty))));
68
+ quotaWeights[item.key] = Math.max(1, Math.floor(100 / (1 + Math.max(0, item.penalty))));
112
69
  }
113
70
  }
114
- if (isSafePool && contextWeightedCfg.enabled) {
71
+ const contextWeights = (() => {
72
+ if (!isSafePool || !contextWeightedCfg.enabled) {
73
+ return undefined;
74
+ }
115
75
  const ctx = computeContextWeightMultipliers({
116
76
  candidates: bucketCandidates,
117
77
  usage: contextResult.usage,
118
78
  warnRatio,
119
79
  cfg: contextWeightedCfg
120
80
  });
121
- if (ctx) {
122
- for (const key of bucketCandidates) {
123
- const m = computeContextMultiplier({
124
- effectiveSafeRefTokens: ctx.ref,
125
- effectiveSafeTokens: ctx.eff[key] ?? 1,
126
- cfg: contextWeightedCfg
127
- });
128
- bucketWeights[key] = Math.max(1, Math.round((bucketWeights[key] ?? 1) * m));
129
- }
81
+ if (!ctx) {
82
+ return undefined;
130
83
  }
131
- }
84
+ const out = {};
85
+ for (const key of bucketCandidates) {
86
+ const m = computeContextMultiplier({
87
+ effectiveSafeRefTokens: ctx.ref,
88
+ effectiveSafeTokens: ctx.eff[key] ?? 1,
89
+ cfg: contextWeightedCfg
90
+ });
91
+ out[key] = Math.max(1, Math.round(100 * m));
92
+ }
93
+ return out;
94
+ })();
95
+ const bucketWeights = buildCandidateWeights({
96
+ candidates: bucketCandidates,
97
+ providerRegistry: deps.providerRegistry,
98
+ staticWeights: tierLoadBalancing.weights,
99
+ dynamicWeights: Object.keys(quotaWeights).length || contextWeights
100
+ ? Object.fromEntries(bucketCandidates.map((key) => {
101
+ const quotaWeight = quotaWeights[key];
102
+ const contextWeight = contextWeights?.[key];
103
+ const combined = typeof contextWeight === 'number'
104
+ ? Math.max(1, Math.round((quotaWeight ?? 1) * contextWeight))
105
+ : quotaWeight;
106
+ return [key, combined ?? 1];
107
+ }))
108
+ : undefined
109
+ });
132
110
  if (tier.mode === 'priority') {
133
111
  if (!isRecoveryAttempt) {
134
112
  const group = pickPriorityGroup({
@@ -142,13 +120,13 @@ export function selectProviderKeyWithQuotaBuckets(opts) {
142
120
  }
143
121
  const groupWeights = {};
144
122
  for (const key of group.groupCandidates) {
145
- groupWeights[key] = bucketWeights[key] ?? 1;
123
+ groupWeights[key] = bucketWeights?.[key] ?? 1;
146
124
  }
147
125
  const allowGrouped = !hasNonUniformWeights(group.groupCandidates, bucketWeights);
148
- if (allowGrouped && deps.loadBalancer.getPolicy().strategy !== 'sticky') {
126
+ if (allowGrouped && tierLoadBalancing.strategy !== 'sticky') {
149
127
  const groups = buildPrimaryTargetGroups(group.groupCandidates, deps);
150
128
  if (groups.size > 0) {
151
- const groupWeightMap = buildGroupWeights(groups, deps.loadBalancer.getPolicy().weights);
129
+ const groupWeightMap = buildGroupWeights(groups, tierLoadBalancing.weights);
152
130
  const selected = deps.loadBalancer.selectGrouped({
153
131
  routeName: `${routeName}:${tier.id}:priority:${priority}:group:${group.groupId}`,
154
132
  groups,
@@ -185,17 +163,17 @@ export function selectProviderKeyWithQuotaBuckets(opts) {
185
163
  continue;
186
164
  }
187
165
  const allowGrouped = !hasNonUniformWeights(bucketCandidates, bucketWeights);
188
- if (allowGrouped && deps.loadBalancer.getPolicy().strategy !== 'sticky') {
166
+ if (allowGrouped && tierLoadBalancing.strategy !== 'sticky') {
189
167
  const groups = buildPrimaryTargetGroups(bucketCandidates, deps);
190
168
  if (groups.size > 0) {
191
- const groupWeightMap = buildGroupWeights(groups, deps.loadBalancer.getPolicy().weights);
169
+ const groupWeightMap = buildGroupWeights(groups, tierLoadBalancing.weights);
192
170
  const selected = deps.loadBalancer.selectGrouped({
193
171
  routeName: `${routeName}:${tier.id}:${priority}`,
194
172
  groups,
195
173
  stickyKey: options.allowAliasRotation ? undefined : stickyKey,
196
174
  weights: groupWeightMap,
197
175
  availabilityCheck: isAvailable
198
- }, tier.mode === 'round-robin' ? 'round-robin' : undefined);
176
+ }, tier.mode === 'round-robin' ? 'round-robin' : tierLoadBalancing.strategy);
199
177
  if (selected) {
200
178
  return selected;
201
179
  }
@@ -207,7 +185,7 @@ export function selectProviderKeyWithQuotaBuckets(opts) {
207
185
  stickyKey: options.allowAliasRotation ? undefined : stickyKey,
208
186
  weights: bucketWeights,
209
187
  availabilityCheck: isAvailable
210
- }, tier.mode === 'round-robin' ? 'round-robin' : undefined);
188
+ }, tier.mode === 'round-robin' ? 'round-robin' : tierLoadBalancing.strategy);
211
189
  if (selected) {
212
190
  return selected;
213
191
  }
@@ -2,6 +2,7 @@ import { computeContextMultiplier } from '../context-weighted.js';
2
2
  import { pinCandidatesByAliasQueue, resolveAliasSelectionStrategy } from './alias-selection.js';
3
3
  import { computeContextWeightMultipliers } from './context-weight-multipliers.js';
4
4
  import { extractKeyAlias, extractProviderId, getProviderModelId } from './key-parsing.js';
5
+ import { buildCandidateWeights, buildGroupWeights, hasNonUniformWeights, resolveTierLoadBalancing } from './tier-load-balancing.js';
5
6
  import { pickPriorityGroup } from './tier-priority.js';
6
7
  import { selectProviderKeyWithQuotaBuckets } from './tier-selection-quota-integration.js';
7
8
  function buildPrimaryTargetGroups(candidates, deps) {
@@ -25,50 +26,6 @@ function buildPrimaryTargetGroups(candidates, deps) {
25
26
  }
26
27
  return groups;
27
28
  }
28
- function resolveGroupWeight(groupId, weights) {
29
- if (!weights) {
30
- return 1;
31
- }
32
- const direct = weights[groupId];
33
- if (typeof direct === 'number' && Number.isFinite(direct) && direct > 0) {
34
- return direct;
35
- }
36
- const providerId = groupId.split('.')[0] ?? groupId;
37
- const providerOnly = weights[providerId];
38
- if (typeof providerOnly === 'number' && Number.isFinite(providerOnly) && providerOnly > 0) {
39
- return providerOnly;
40
- }
41
- return 1;
42
- }
43
- function buildGroupWeights(groups, weights) {
44
- if (!groups.size) {
45
- return undefined;
46
- }
47
- const out = {};
48
- for (const [groupId] of groups.entries()) {
49
- out[groupId] = resolveGroupWeight(groupId, weights);
50
- }
51
- return out;
52
- }
53
- function hasNonUniformWeights(candidates, weights) {
54
- if (!weights || candidates.length < 2) {
55
- return false;
56
- }
57
- let ref;
58
- for (const key of candidates) {
59
- const raw = weights[key];
60
- if (typeof raw !== 'number' || !Number.isFinite(raw)) {
61
- continue;
62
- }
63
- if (ref === undefined) {
64
- ref = raw;
65
- }
66
- else if (Math.abs(raw - ref) > 1e-6) {
67
- return true;
68
- }
69
- }
70
- return false;
71
- }
72
29
  function applyAliasStickyQueuePinning(opts) {
73
30
  const { candidates, orderedTargets, deps, excludedKeys } = opts;
74
31
  if (!Array.isArray(candidates) || candidates.length < 2) {
@@ -202,6 +159,7 @@ function preferAntigravityAliasesOnRetry(opts) {
202
159
  export function selectProviderKeyFromCandidatePool(opts) {
203
160
  const { routeName, tier, stickyKey, candidates, isSafePool, deps, options, contextResult, warnRatio, excludedKeys, isRecoveryAttempt, now, nowForWeights, healthWeightedCfg, contextWeightedCfg } = opts;
204
161
  const quotaView = deps.quotaView;
162
+ const tierLoadBalancing = resolveTierLoadBalancing(tier, deps.loadBalancer.getPolicy());
205
163
  const isAvailable = (key) => {
206
164
  if (!quotaView) {
207
165
  return deps.healthManager.isAvailable(key);
@@ -265,28 +223,33 @@ export function selectProviderKeyFromCandidatePool(opts) {
265
223
  if (!group) {
266
224
  return null;
267
225
  }
268
- const weights = (() => {
269
- if (!isSafePool)
270
- return undefined;
271
- const ctx = computeContextWeightMultipliers({ candidates: group.groupCandidates, usage: contextResult.usage, warnRatio, cfg: contextWeightedCfg });
272
- if (!ctx)
273
- return undefined;
274
- const out = {};
275
- for (const key of group.groupCandidates) {
276
- const m = computeContextMultiplier({
277
- effectiveSafeRefTokens: ctx.ref,
278
- effectiveSafeTokens: ctx.eff[key] ?? 1,
279
- cfg: contextWeightedCfg
280
- });
281
- out[key] = Math.max(1, Math.round(100 * m));
282
- }
283
- return out;
284
- })();
226
+ const weights = buildCandidateWeights({
227
+ candidates: group.groupCandidates,
228
+ providerRegistry: deps.providerRegistry,
229
+ staticWeights: tierLoadBalancing.weights,
230
+ dynamicWeights: (() => {
231
+ if (!isSafePool)
232
+ return undefined;
233
+ const ctx = computeContextWeightMultipliers({ candidates: group.groupCandidates, usage: contextResult.usage, warnRatio, cfg: contextWeightedCfg });
234
+ if (!ctx)
235
+ return undefined;
236
+ const out = {};
237
+ for (const key of group.groupCandidates) {
238
+ const m = computeContextMultiplier({
239
+ effectiveSafeRefTokens: ctx.ref,
240
+ effectiveSafeTokens: ctx.eff[key] ?? 1,
241
+ cfg: contextWeightedCfg
242
+ });
243
+ out[key] = Math.max(1, Math.round(100 * m));
244
+ }
245
+ return out;
246
+ })()
247
+ });
285
248
  const allowGrouped = !hasNonUniformWeights(group.groupCandidates, weights);
286
- if (allowGrouped && deps.loadBalancer.getPolicy().strategy !== 'sticky') {
249
+ if (allowGrouped && tierLoadBalancing.strategy !== 'sticky') {
287
250
  const groups = buildPrimaryTargetGroups(group.groupCandidates, deps);
288
251
  if (groups.size > 0) {
289
- const groupWeights = buildGroupWeights(groups, deps.loadBalancer.getPolicy().weights);
252
+ const groupWeights = buildGroupWeights(groups, tierLoadBalancing.weights);
290
253
  const selected = deps.loadBalancer.selectGrouped({
291
254
  routeName: `${routeName}:${tier.id}:priority:group:${group.groupId}`,
292
255
  groups,
@@ -307,35 +270,40 @@ export function selectProviderKeyFromCandidatePool(opts) {
307
270
  availabilityCheck: isAvailable
308
271
  }, 'round-robin');
309
272
  }
310
- const weights = (() => {
311
- if (!isSafePool || !contextWeightedCfg.enabled)
312
- return undefined;
313
- const ctx = computeContextWeightMultipliers({ candidates: pinnedCandidates, usage: contextResult.usage, warnRatio, cfg: contextWeightedCfg });
314
- if (!ctx)
315
- return undefined;
316
- const out = {};
317
- for (const key of pinnedCandidates) {
318
- const m = computeContextMultiplier({
319
- effectiveSafeRefTokens: ctx.ref,
320
- effectiveSafeTokens: ctx.eff[key] ?? 1,
321
- cfg: contextWeightedCfg
322
- });
323
- out[key] = Math.max(1, Math.round(100 * m));
324
- }
325
- return out;
326
- })();
273
+ const weights = buildCandidateWeights({
274
+ candidates: pinnedCandidates,
275
+ providerRegistry: deps.providerRegistry,
276
+ staticWeights: tierLoadBalancing.weights,
277
+ dynamicWeights: (() => {
278
+ if (!isSafePool || !contextWeightedCfg.enabled)
279
+ return undefined;
280
+ const ctx = computeContextWeightMultipliers({ candidates: pinnedCandidates, usage: contextResult.usage, warnRatio, cfg: contextWeightedCfg });
281
+ if (!ctx)
282
+ return undefined;
283
+ const out = {};
284
+ for (const key of pinnedCandidates) {
285
+ const m = computeContextMultiplier({
286
+ effectiveSafeRefTokens: ctx.ref,
287
+ effectiveSafeTokens: ctx.eff[key] ?? 1,
288
+ cfg: contextWeightedCfg
289
+ });
290
+ out[key] = Math.max(1, Math.round(100 * m));
291
+ }
292
+ return out;
293
+ })()
294
+ });
327
295
  const allowGrouped = !hasNonUniformWeights(pinnedCandidates, weights);
328
- if (allowGrouped && deps.loadBalancer.getPolicy().strategy !== 'sticky') {
296
+ if (allowGrouped && tierLoadBalancing.strategy !== 'sticky') {
329
297
  const groups = buildPrimaryTargetGroups(pinnedCandidates, deps);
330
298
  if (groups.size > 0) {
331
- const groupWeights = buildGroupWeights(groups, deps.loadBalancer.getPolicy().weights);
299
+ const groupWeights = buildGroupWeights(groups, tierLoadBalancing.weights);
332
300
  const selected = deps.loadBalancer.selectGrouped({
333
301
  routeName: `${routeName}:${tier.id}`,
334
302
  groups,
335
303
  stickyKey: options.allowAliasRotation ? undefined : stickyKey,
336
304
  weights: groupWeights,
337
305
  availabilityCheck: isAvailable
338
- }, tier.mode === 'round-robin' ? 'round-robin' : undefined);
306
+ }, tier.mode === 'round-robin' ? 'round-robin' : tierLoadBalancing.strategy);
339
307
  if (selected) {
340
308
  return selected;
341
309
  }
@@ -347,7 +315,7 @@ export function selectProviderKeyFromCandidatePool(opts) {
347
315
  stickyKey: options.allowAliasRotation ? undefined : stickyKey,
348
316
  weights,
349
317
  availabilityCheck: isAvailable
350
- }, tier.mode === 'round-robin' ? 'round-robin' : undefined);
318
+ }, tier.mode === 'round-robin' ? 'round-robin' : tierLoadBalancing.strategy);
351
319
  }
352
320
  return selectProviderKeyWithQuotaBuckets({
353
321
  routeName,
@@ -365,6 +333,7 @@ export function selectProviderKeyFromCandidatePool(opts) {
365
333
  nowForWeights,
366
334
  healthWeightedCfg,
367
335
  contextWeightedCfg,
336
+ tierLoadBalancing,
368
337
  quotaView,
369
338
  isAvailable,
370
339
  selectFirstAvailable,
@@ -7,6 +7,17 @@ export declare const DEFAULT_ROUTE = "default";
7
7
  export declare const ROUTE_PRIORITY: string[];
8
8
  export type RoutingInstructionMode = 'force' | 'sticky' | 'none';
9
9
  export type RoutePoolMode = 'round-robin' | 'priority';
10
+ export interface RoutePoolLoadBalancingPolicy {
11
+ /**
12
+ * Optional pool-level override for provider selection strategy.
13
+ * When omitted, Virtual Router falls back to the global loadBalancing.strategy.
14
+ */
15
+ strategy?: 'round-robin' | 'weighted' | 'sticky';
16
+ /**
17
+ * Optional pool-local weights. Keys may target runtime keys, provider.model groups, or provider ids.
18
+ */
19
+ weights?: Record<string, number>;
20
+ }
10
21
  export interface RoutePoolTier {
11
22
  id: string;
12
23
  targets: string[];
@@ -25,6 +36,11 @@ export interface RoutePoolTier {
25
36
  * - routing.web_search: force server-side web_search flow.
26
37
  */
27
38
  force?: boolean;
39
+ /**
40
+ * Optional pool-scoped load-balancing override. This lets different route pools
41
+ * use different strategies/weights without mutating the global policy.
42
+ */
43
+ loadBalancing?: RoutePoolLoadBalancingPolicy;
28
44
  }
29
45
  export type RoutingPools = Record<string, RoutePoolTier[]>;
30
46
  export type StreamingPreference = 'auto' | 'always' | 'never';
@@ -215,11 +231,34 @@ export interface ProviderHealthConfig {
215
231
  cooldownMs: number;
216
232
  fatalCooldownMs?: number;
217
233
  }
234
+ export type VirtualRouterWebSearchExecutionMode = 'servertool' | 'direct';
235
+ export type VirtualRouterWebSearchDirectActivation = 'route' | 'builtin';
218
236
  export interface VirtualRouterWebSearchEngineConfig {
219
237
  id: string;
220
238
  providerKey: string;
221
239
  description?: string;
222
240
  default?: boolean;
241
+ /**
242
+ * Search execution mode:
243
+ * - servertool: expose canonical web_search tool and execute through servertool engine.
244
+ * - direct: route to a search-capable model/provider directly; servertool injection must skip it.
245
+ */
246
+ executionMode?: VirtualRouterWebSearchExecutionMode;
247
+ /**
248
+ * When executionMode=direct, controls how the upstream search capability is activated.
249
+ * - route: route selection itself enables native search behavior (e.g. deepseek-web search route).
250
+ * - builtin: upstream requires a provider-native builtin search tool/schema.
251
+ */
252
+ directActivation?: VirtualRouterWebSearchDirectActivation;
253
+ /**
254
+ * Optional target model id for direct-mode matching when request/compat layers need to detect
255
+ * which routed provider payload should receive native web search activation.
256
+ */
257
+ modelId?: string;
258
+ /**
259
+ * Optional builtin max-uses hint for providers that support builtin web search tools.
260
+ */
261
+ maxUses?: number;
223
262
  /**
224
263
  * When true, this engine will never be used by server-side tools
225
264
  * (e.g. web_search). It will also be omitted from injected tool
@@ -118,6 +118,27 @@ function getWebSearchConfig(ctx) {
118
118
  : undefined;
119
119
  if (!id || !providerKey)
120
120
  continue;
121
+ const rawExecutionMode = typeof obj.executionMode === 'string'
122
+ ? obj.executionMode.trim().toLowerCase()
123
+ : typeof obj.mode === 'string'
124
+ ? obj.mode.trim().toLowerCase()
125
+ : '';
126
+ const executionMode = rawExecutionMode === 'direct' ? 'direct' : 'servertool';
127
+ const rawDirectActivation = typeof obj.directActivation === 'string'
128
+ ? obj.directActivation.trim().toLowerCase()
129
+ : typeof obj.activation === 'string'
130
+ ? obj.activation.trim().toLowerCase()
131
+ : '';
132
+ const directActivation = rawDirectActivation === 'builtin'
133
+ ? 'builtin'
134
+ : rawDirectActivation === 'route'
135
+ ? 'route'
136
+ : executionMode === 'direct'
137
+ ? 'route'
138
+ : undefined;
139
+ const modelId = typeof obj.modelId === 'string' && obj.modelId.trim() ? obj.modelId.trim() : undefined;
140
+ const rawMaxUses = typeof obj.maxUses === 'number' ? obj.maxUses : Number(obj.maxUses);
141
+ const maxUses = Number.isFinite(rawMaxUses) && rawMaxUses > 0 ? Math.floor(rawMaxUses) : undefined;
121
142
  const serverToolsDisabled = obj.serverToolsDisabled === true ||
122
143
  (typeof obj.serverToolsDisabled === 'string' &&
123
144
  obj.serverToolsDisabled.trim().toLowerCase() === 'true') ||
@@ -142,6 +163,10 @@ function getWebSearchConfig(ctx) {
142
163
  providerKey,
143
164
  description: typeof obj.description === 'string' && obj.description.trim() ? obj.description.trim() : undefined,
144
165
  default: obj.default === true,
166
+ executionMode,
167
+ ...(directActivation ? { directActivation } : {}),
168
+ ...(modelId ? { modelId } : {}),
169
+ ...(maxUses ? { maxUses } : {}),
145
170
  ...(serverToolsDisabled ? { serverToolsDisabled: true } : {}),
146
171
  ...(searchEngineList ? { searchEngineList } : {})
147
172
  });
@@ -181,7 +206,7 @@ function resolveWebSearchEngine(config, engineId) {
181
206
  return undefined;
182
207
  }
183
208
  function buildEnginePriorityList(config, engineId) {
184
- const engines = (Array.isArray(config.engines) ? config.engines : []).filter((engine) => !engine.serverToolsDisabled);
209
+ const engines = (Array.isArray(config.engines) ? config.engines : []).filter((engine) => !engine.serverToolsDisabled && (engine.executionMode ?? 'servertool') === 'servertool');
185
210
  if (!engines.length) {
186
211
  return [];
187
212
  }
@@ -100,7 +100,7 @@ function normalizeFilterTokenSet(values) {
100
100
  return normalized.size > 0 ? normalized : null;
101
101
  }
102
102
  function isNameIncluded(name, includeSet, excludeSet) {
103
- const normalized = name.trim().toLowerCase();
103
+ const normalized = normalizeServerToolCallName(name);
104
104
  if (includeSet && !includeSet.has(normalized)) {
105
105
  return false;
106
106
  }
@@ -109,6 +109,13 @@ function isNameIncluded(name, includeSet, excludeSet) {
109
109
  }
110
110
  return true;
111
111
  }
112
+ function normalizeServerToolCallName(name) {
113
+ const normalized = name.trim().toLowerCase();
114
+ if (normalized === 'websearch' || normalized === 'web-search') {
115
+ return 'web_search';
116
+ }
117
+ return normalized;
118
+ }
112
119
  function extractToolCallsFromMessage(message) {
113
120
  const toolCalls = getArray(message.tool_calls);
114
121
  const out = [];
@@ -120,7 +127,9 @@ function extractToolCallsFromMessage(message) {
120
127
  const fn = asObject(tc.function) ??
121
128
  asObject(tc.functionCall) ??
122
129
  asObject(tc.function_call);
123
- const name = fn && typeof fn.name === 'string' && String(fn.name).trim() ? String(fn.name).trim() : '';
130
+ const name = fn && typeof fn.name === 'string' && String(fn.name).trim()
131
+ ? normalizeServerToolCallName(String(fn.name))
132
+ : '';
124
133
  const rawArgs = (fn ? fn.arguments : undefined) ??
125
134
  (fn ? fn.args : undefined) ??
126
135
  (fn ? fn.input : undefined) ??
@@ -137,6 +137,10 @@ export type ServerToolBackendPlan = {
137
137
  providerKey: string;
138
138
  description?: string;
139
139
  default?: boolean;
140
+ executionMode?: 'servertool' | 'direct';
141
+ directActivation?: 'route' | 'builtin';
142
+ modelId?: string;
143
+ maxUses?: number;
140
144
  serverToolsDisabled?: boolean;
141
145
  searchEngineList?: string[];
142
146
  }[];