@jsonstudio/llms 0.6.3379 → 0.6.3409
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/conversion/compat/actions/claude-thinking-tools.d.ts +1 -14
- package/dist/conversion/compat/actions/claude-thinking-tools.js +3 -71
- package/dist/conversion/compat/actions/lmstudio-responses-fc-ids.d.ts +0 -8
- package/dist/conversion/compat/actions/lmstudio-responses-fc-ids.js +2 -57
- package/dist/conversion/compat/actions/normalize-tool-call-ids.d.ts +0 -9
- package/dist/conversion/compat/actions/normalize-tool-call-ids.js +6 -136
- package/dist/conversion/compat/actions/request-rules.js +2 -61
- package/dist/conversion/compat/actions/response-blacklist.d.ts +0 -4
- package/dist/conversion/compat/actions/response-blacklist.js +2 -77
- package/dist/conversion/compat/actions/response-normalize.js +2 -119
- package/dist/conversion/compat/actions/response-validate.js +2 -74
- package/dist/conversion/compat/actions/strip-orphan-function-calls-tag.js +2 -150
- package/dist/conversion/hub/operation-table/semantic-mappers/anthropic-mapper.js +24 -1
- package/dist/conversion/hub/pipeline/hub-pipeline.js +91 -0
- package/dist/conversion/shared/reasoning-tool-parser.js +7 -8
- package/dist/conversion/shared/responses-response-utils.js +3 -48
- package/dist/conversion/shared/responses-tool-utils.js +22 -126
- package/dist/conversion/shared/tool-call-id-manager.js +18 -21
- package/dist/native/router_hotpath_napi.node +0 -0
- package/dist/router/virtual-router/bootstrap/routing-config.d.ts +2 -1
- package/dist/router/virtual-router/bootstrap/routing-config.js +47 -2
- package/dist/router/virtual-router/bootstrap/web-search-config.js +25 -0
- package/dist/router/virtual-router/bootstrap.js +21 -16
- package/dist/router/virtual-router/engine-selection/native-compat-action-semantics.d.ts +6 -0
- package/dist/router/virtual-router/engine-selection/native-compat-action-semantics.js +171 -0
- package/dist/router/virtual-router/engine-selection/native-router-hotpath-loader.js +11 -0
- package/dist/router/virtual-router/engine-selection/native-shared-conversion-semantics.d.ts +5 -0
- package/dist/router/virtual-router/engine-selection/native-shared-conversion-semantics.js +137 -0
- package/dist/router/virtual-router/engine-selection/tier-load-balancing.d.ts +16 -0
- package/dist/router/virtual-router/engine-selection/tier-load-balancing.js +120 -0
- package/dist/router/virtual-router/engine-selection/tier-selection-quota-integration.d.ts +2 -0
- package/dist/router/virtual-router/engine-selection/tier-selection-quota-integration.js +44 -66
- package/dist/router/virtual-router/engine-selection/tier-selection-select.js +53 -84
- package/dist/router/virtual-router/types.d.ts +39 -0
- package/dist/servertool/handlers/web-search.js +26 -1
- package/dist/servertool/server-side-tools.js +11 -2
- package/dist/servertool/types.d.ts +4 -0
- package/package.json +1 -1
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import { extractProviderId, getProviderModelId } from './key-parsing.js';
|
|
2
|
+
export function resolveTierLoadBalancing(tier, globalPolicy) {
|
|
3
|
+
const tierPolicy = tier.loadBalancing;
|
|
4
|
+
return {
|
|
5
|
+
strategy: tierPolicy?.strategy ?? globalPolicy?.strategy ?? 'round-robin',
|
|
6
|
+
weights: tierPolicy?.weights ?? globalPolicy?.weights
|
|
7
|
+
};
|
|
8
|
+
}
|
|
9
|
+
export function resolveGroupWeight(groupId, weights) {
|
|
10
|
+
if (!weights) {
|
|
11
|
+
return 1;
|
|
12
|
+
}
|
|
13
|
+
const direct = weights[groupId];
|
|
14
|
+
if (typeof direct === 'number' && Number.isFinite(direct) && direct > 0) {
|
|
15
|
+
return direct;
|
|
16
|
+
}
|
|
17
|
+
const providerId = groupId.split('.')[0] ?? groupId;
|
|
18
|
+
const providerOnly = weights[providerId];
|
|
19
|
+
if (typeof providerOnly === 'number' && Number.isFinite(providerOnly) && providerOnly > 0) {
|
|
20
|
+
return providerOnly;
|
|
21
|
+
}
|
|
22
|
+
return 1;
|
|
23
|
+
}
|
|
24
|
+
export function buildGroupWeights(groups, weights) {
|
|
25
|
+
if (!groups.size || !weights) {
|
|
26
|
+
return undefined;
|
|
27
|
+
}
|
|
28
|
+
const out = {};
|
|
29
|
+
let hasExplicit = false;
|
|
30
|
+
for (const [groupId] of groups.entries()) {
|
|
31
|
+
const resolved = resolveGroupWeight(groupId, weights);
|
|
32
|
+
out[groupId] = resolved;
|
|
33
|
+
if (resolved !== 1) {
|
|
34
|
+
hasExplicit = true;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
return hasExplicit ? out : undefined;
|
|
38
|
+
}
|
|
39
|
+
export function hasNonUniformWeights(candidates, weights) {
|
|
40
|
+
if (!weights || candidates.length < 2) {
|
|
41
|
+
return false;
|
|
42
|
+
}
|
|
43
|
+
let ref;
|
|
44
|
+
for (const key of candidates) {
|
|
45
|
+
const raw = weights[key];
|
|
46
|
+
if (typeof raw !== 'number' || !Number.isFinite(raw)) {
|
|
47
|
+
continue;
|
|
48
|
+
}
|
|
49
|
+
if (ref === undefined) {
|
|
50
|
+
ref = raw;
|
|
51
|
+
}
|
|
52
|
+
else if (Math.abs(raw - ref) > 1e-6) {
|
|
53
|
+
return true;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
return false;
|
|
57
|
+
}
|
|
58
|
+
export function buildCandidateWeights(opts) {
|
|
59
|
+
const { candidates, providerRegistry, staticWeights, dynamicWeights } = opts;
|
|
60
|
+
if ((!staticWeights || Object.keys(staticWeights).length === 0) && (!dynamicWeights || Object.keys(dynamicWeights).length === 0)) {
|
|
61
|
+
return undefined;
|
|
62
|
+
}
|
|
63
|
+
const out = {};
|
|
64
|
+
let hasExplicit = false;
|
|
65
|
+
for (const key of candidates) {
|
|
66
|
+
const dynamic = dynamicWeights?.[key];
|
|
67
|
+
const staticWeight = resolveCandidateWeight(key, staticWeights, providerRegistry);
|
|
68
|
+
const resolved = multiplyPositiveWeights(dynamic, staticWeight);
|
|
69
|
+
if (resolved !== undefined) {
|
|
70
|
+
out[key] = resolved;
|
|
71
|
+
if (resolved !== 1) {
|
|
72
|
+
hasExplicit = true;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
if (!hasExplicit) {
|
|
77
|
+
return undefined;
|
|
78
|
+
}
|
|
79
|
+
return out;
|
|
80
|
+
}
|
|
81
|
+
function resolveCandidateWeight(key, weights, providerRegistry) {
|
|
82
|
+
if (!weights) {
|
|
83
|
+
return undefined;
|
|
84
|
+
}
|
|
85
|
+
const direct = normalizePositiveWeight(weights[key]);
|
|
86
|
+
if (direct !== undefined) {
|
|
87
|
+
return direct;
|
|
88
|
+
}
|
|
89
|
+
const providerId = extractProviderId(key) ?? '';
|
|
90
|
+
if (!providerId) {
|
|
91
|
+
return undefined;
|
|
92
|
+
}
|
|
93
|
+
try {
|
|
94
|
+
const modelId = getProviderModelId(key, providerRegistry) ?? '';
|
|
95
|
+
if (modelId) {
|
|
96
|
+
const grouped = normalizePositiveWeight(weights[`${providerId}.${modelId}`]);
|
|
97
|
+
if (grouped !== undefined) {
|
|
98
|
+
return grouped;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
catch {
|
|
103
|
+
// Ignore registry misses and fall back to provider-only weight.
|
|
104
|
+
}
|
|
105
|
+
return normalizePositiveWeight(weights[providerId]);
|
|
106
|
+
}
|
|
107
|
+
function normalizePositiveWeight(value) {
|
|
108
|
+
return typeof value === 'number' && Number.isFinite(value) && value > 0 ? value : undefined;
|
|
109
|
+
}
|
|
110
|
+
function multiplyPositiveWeights(...values) {
|
|
111
|
+
let resolved;
|
|
112
|
+
for (const value of values) {
|
|
113
|
+
const normalized = normalizePositiveWeight(value);
|
|
114
|
+
if (normalized === undefined) {
|
|
115
|
+
continue;
|
|
116
|
+
}
|
|
117
|
+
resolved = resolved === undefined ? normalized : Math.max(1, Math.round(resolved * normalized));
|
|
118
|
+
}
|
|
119
|
+
return resolved;
|
|
120
|
+
}
|
|
@@ -3,6 +3,7 @@ import { type ResolvedContextWeightedConfig } from '../context-weighted.js';
|
|
|
3
3
|
import { type ResolvedHealthWeightedConfig } from '../health-weighted.js';
|
|
4
4
|
import type { RoutePoolTier } from '../types.js';
|
|
5
5
|
import type { SelectionDeps, TrySelectFromTierOptions } from './selection-deps.js';
|
|
6
|
+
import { type ResolvedTierLoadBalancing } from './tier-load-balancing.js';
|
|
6
7
|
export declare function selectProviderKeyWithQuotaBuckets(opts: {
|
|
7
8
|
routeName: string;
|
|
8
9
|
tier: RoutePoolTier;
|
|
@@ -19,6 +20,7 @@ export declare function selectProviderKeyWithQuotaBuckets(opts: {
|
|
|
19
20
|
nowForWeights: number;
|
|
20
21
|
healthWeightedCfg: ResolvedHealthWeightedConfig;
|
|
21
22
|
contextWeightedCfg: ResolvedContextWeightedConfig;
|
|
23
|
+
tierLoadBalancing: ResolvedTierLoadBalancing;
|
|
22
24
|
quotaView: NonNullable<SelectionDeps['quotaView']>;
|
|
23
25
|
isAvailable: (key: string) => boolean;
|
|
24
26
|
selectFirstAvailable: (keys: string[]) => string | null;
|
|
@@ -2,6 +2,7 @@ import { computeContextMultiplier } from '../context-weighted.js';
|
|
|
2
2
|
import { computeHealthWeight } from '../health-weighted.js';
|
|
3
3
|
import { buildQuotaBuckets } from './native-router-hotpath.js';
|
|
4
4
|
import { computeContextWeightMultipliers } from './context-weight-multipliers.js';
|
|
5
|
+
import { buildCandidateWeights, buildGroupWeights, hasNonUniformWeights } from './tier-load-balancing.js';
|
|
5
6
|
import { pickPriorityGroup } from './tier-priority.js';
|
|
6
7
|
import { extractProviderId, getProviderModelId } from './key-parsing.js';
|
|
7
8
|
function buildPrimaryTargetGroups(candidates, deps) {
|
|
@@ -25,52 +26,8 @@ function buildPrimaryTargetGroups(candidates, deps) {
|
|
|
25
26
|
}
|
|
26
27
|
return groups;
|
|
27
28
|
}
|
|
28
|
-
function resolveGroupWeight(groupId, weights) {
|
|
29
|
-
if (!weights) {
|
|
30
|
-
return 1;
|
|
31
|
-
}
|
|
32
|
-
const direct = weights[groupId];
|
|
33
|
-
if (typeof direct === 'number' && Number.isFinite(direct) && direct > 0) {
|
|
34
|
-
return direct;
|
|
35
|
-
}
|
|
36
|
-
const providerId = groupId.split('.')[0] ?? groupId;
|
|
37
|
-
const providerOnly = weights[providerId];
|
|
38
|
-
if (typeof providerOnly === 'number' && Number.isFinite(providerOnly) && providerOnly > 0) {
|
|
39
|
-
return providerOnly;
|
|
40
|
-
}
|
|
41
|
-
return 1;
|
|
42
|
-
}
|
|
43
|
-
function buildGroupWeights(groups, weights) {
|
|
44
|
-
if (!groups.size) {
|
|
45
|
-
return undefined;
|
|
46
|
-
}
|
|
47
|
-
const out = {};
|
|
48
|
-
for (const [groupId] of groups.entries()) {
|
|
49
|
-
out[groupId] = resolveGroupWeight(groupId, weights);
|
|
50
|
-
}
|
|
51
|
-
return out;
|
|
52
|
-
}
|
|
53
|
-
function hasNonUniformWeights(candidates, weights) {
|
|
54
|
-
if (!weights || candidates.length < 2) {
|
|
55
|
-
return false;
|
|
56
|
-
}
|
|
57
|
-
let ref;
|
|
58
|
-
for (const key of candidates) {
|
|
59
|
-
const raw = weights[key];
|
|
60
|
-
if (typeof raw !== 'number' || !Number.isFinite(raw)) {
|
|
61
|
-
continue;
|
|
62
|
-
}
|
|
63
|
-
if (ref === undefined) {
|
|
64
|
-
ref = raw;
|
|
65
|
-
}
|
|
66
|
-
else if (Math.abs(raw - ref) > 1e-6) {
|
|
67
|
-
return true;
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
return false;
|
|
71
|
-
}
|
|
72
29
|
export function selectProviderKeyWithQuotaBuckets(opts) {
|
|
73
|
-
const { routeName, tier, stickyKey, candidates, isSafePool, deps, options, contextResult, warnRatio, isRecoveryAttempt, now, nowForWeights, healthWeightedCfg, contextWeightedCfg, quotaView, isAvailable, selectFirstAvailable, applyAliasStickyQueuePinning, preferAntigravityAliasesOnRetry } = opts;
|
|
30
|
+
const { routeName, tier, stickyKey, candidates, isSafePool, deps, options, contextResult, warnRatio, isRecoveryAttempt, now, nowForWeights, healthWeightedCfg, contextWeightedCfg, tierLoadBalancing, quotaView, isAvailable, selectFirstAvailable, applyAliasStickyQueuePinning, preferAntigravityAliasesOnRetry } = opts;
|
|
74
31
|
const bucketInputs = candidates.map((key, order) => {
|
|
75
32
|
const entry = quotaView(key);
|
|
76
33
|
const penaltyRaw = entry?.selectionPenalty;
|
|
@@ -100,35 +57,56 @@ export function selectProviderKeyWithQuotaBuckets(opts) {
|
|
|
100
57
|
bucketCandidates = preferAntigravityAliasesOnRetry(bucketCandidates);
|
|
101
58
|
}
|
|
102
59
|
bucketCandidates = applyAliasStickyQueuePinning(bucketCandidates);
|
|
103
|
-
const
|
|
60
|
+
const quotaWeights = {};
|
|
104
61
|
for (const item of bucket) {
|
|
105
62
|
if (healthWeightedCfg.enabled) {
|
|
106
63
|
const entry = quotaView(item.key);
|
|
107
64
|
const { weight } = computeHealthWeight(entry, nowForWeights, healthWeightedCfg);
|
|
108
|
-
|
|
65
|
+
quotaWeights[item.key] = weight;
|
|
109
66
|
}
|
|
110
67
|
else {
|
|
111
|
-
|
|
68
|
+
quotaWeights[item.key] = Math.max(1, Math.floor(100 / (1 + Math.max(0, item.penalty))));
|
|
112
69
|
}
|
|
113
70
|
}
|
|
114
|
-
|
|
71
|
+
const contextWeights = (() => {
|
|
72
|
+
if (!isSafePool || !contextWeightedCfg.enabled) {
|
|
73
|
+
return undefined;
|
|
74
|
+
}
|
|
115
75
|
const ctx = computeContextWeightMultipliers({
|
|
116
76
|
candidates: bucketCandidates,
|
|
117
77
|
usage: contextResult.usage,
|
|
118
78
|
warnRatio,
|
|
119
79
|
cfg: contextWeightedCfg
|
|
120
80
|
});
|
|
121
|
-
if (ctx) {
|
|
122
|
-
|
|
123
|
-
const m = computeContextMultiplier({
|
|
124
|
-
effectiveSafeRefTokens: ctx.ref,
|
|
125
|
-
effectiveSafeTokens: ctx.eff[key] ?? 1,
|
|
126
|
-
cfg: contextWeightedCfg
|
|
127
|
-
});
|
|
128
|
-
bucketWeights[key] = Math.max(1, Math.round((bucketWeights[key] ?? 1) * m));
|
|
129
|
-
}
|
|
81
|
+
if (!ctx) {
|
|
82
|
+
return undefined;
|
|
130
83
|
}
|
|
131
|
-
|
|
84
|
+
const out = {};
|
|
85
|
+
for (const key of bucketCandidates) {
|
|
86
|
+
const m = computeContextMultiplier({
|
|
87
|
+
effectiveSafeRefTokens: ctx.ref,
|
|
88
|
+
effectiveSafeTokens: ctx.eff[key] ?? 1,
|
|
89
|
+
cfg: contextWeightedCfg
|
|
90
|
+
});
|
|
91
|
+
out[key] = Math.max(1, Math.round(100 * m));
|
|
92
|
+
}
|
|
93
|
+
return out;
|
|
94
|
+
})();
|
|
95
|
+
const bucketWeights = buildCandidateWeights({
|
|
96
|
+
candidates: bucketCandidates,
|
|
97
|
+
providerRegistry: deps.providerRegistry,
|
|
98
|
+
staticWeights: tierLoadBalancing.weights,
|
|
99
|
+
dynamicWeights: Object.keys(quotaWeights).length || contextWeights
|
|
100
|
+
? Object.fromEntries(bucketCandidates.map((key) => {
|
|
101
|
+
const quotaWeight = quotaWeights[key];
|
|
102
|
+
const contextWeight = contextWeights?.[key];
|
|
103
|
+
const combined = typeof contextWeight === 'number'
|
|
104
|
+
? Math.max(1, Math.round((quotaWeight ?? 1) * contextWeight))
|
|
105
|
+
: quotaWeight;
|
|
106
|
+
return [key, combined ?? 1];
|
|
107
|
+
}))
|
|
108
|
+
: undefined
|
|
109
|
+
});
|
|
132
110
|
if (tier.mode === 'priority') {
|
|
133
111
|
if (!isRecoveryAttempt) {
|
|
134
112
|
const group = pickPriorityGroup({
|
|
@@ -142,13 +120,13 @@ export function selectProviderKeyWithQuotaBuckets(opts) {
|
|
|
142
120
|
}
|
|
143
121
|
const groupWeights = {};
|
|
144
122
|
for (const key of group.groupCandidates) {
|
|
145
|
-
groupWeights[key] = bucketWeights[key] ?? 1;
|
|
123
|
+
groupWeights[key] = bucketWeights?.[key] ?? 1;
|
|
146
124
|
}
|
|
147
125
|
const allowGrouped = !hasNonUniformWeights(group.groupCandidates, bucketWeights);
|
|
148
|
-
if (allowGrouped &&
|
|
126
|
+
if (allowGrouped && tierLoadBalancing.strategy !== 'sticky') {
|
|
149
127
|
const groups = buildPrimaryTargetGroups(group.groupCandidates, deps);
|
|
150
128
|
if (groups.size > 0) {
|
|
151
|
-
const groupWeightMap = buildGroupWeights(groups,
|
|
129
|
+
const groupWeightMap = buildGroupWeights(groups, tierLoadBalancing.weights);
|
|
152
130
|
const selected = deps.loadBalancer.selectGrouped({
|
|
153
131
|
routeName: `${routeName}:${tier.id}:priority:${priority}:group:${group.groupId}`,
|
|
154
132
|
groups,
|
|
@@ -185,17 +163,17 @@ export function selectProviderKeyWithQuotaBuckets(opts) {
|
|
|
185
163
|
continue;
|
|
186
164
|
}
|
|
187
165
|
const allowGrouped = !hasNonUniformWeights(bucketCandidates, bucketWeights);
|
|
188
|
-
if (allowGrouped &&
|
|
166
|
+
if (allowGrouped && tierLoadBalancing.strategy !== 'sticky') {
|
|
189
167
|
const groups = buildPrimaryTargetGroups(bucketCandidates, deps);
|
|
190
168
|
if (groups.size > 0) {
|
|
191
|
-
const groupWeightMap = buildGroupWeights(groups,
|
|
169
|
+
const groupWeightMap = buildGroupWeights(groups, tierLoadBalancing.weights);
|
|
192
170
|
const selected = deps.loadBalancer.selectGrouped({
|
|
193
171
|
routeName: `${routeName}:${tier.id}:${priority}`,
|
|
194
172
|
groups,
|
|
195
173
|
stickyKey: options.allowAliasRotation ? undefined : stickyKey,
|
|
196
174
|
weights: groupWeightMap,
|
|
197
175
|
availabilityCheck: isAvailable
|
|
198
|
-
}, tier.mode === 'round-robin' ? 'round-robin' :
|
|
176
|
+
}, tier.mode === 'round-robin' ? 'round-robin' : tierLoadBalancing.strategy);
|
|
199
177
|
if (selected) {
|
|
200
178
|
return selected;
|
|
201
179
|
}
|
|
@@ -207,7 +185,7 @@ export function selectProviderKeyWithQuotaBuckets(opts) {
|
|
|
207
185
|
stickyKey: options.allowAliasRotation ? undefined : stickyKey,
|
|
208
186
|
weights: bucketWeights,
|
|
209
187
|
availabilityCheck: isAvailable
|
|
210
|
-
}, tier.mode === 'round-robin' ? 'round-robin' :
|
|
188
|
+
}, tier.mode === 'round-robin' ? 'round-robin' : tierLoadBalancing.strategy);
|
|
211
189
|
if (selected) {
|
|
212
190
|
return selected;
|
|
213
191
|
}
|
|
@@ -2,6 +2,7 @@ import { computeContextMultiplier } from '../context-weighted.js';
|
|
|
2
2
|
import { pinCandidatesByAliasQueue, resolveAliasSelectionStrategy } from './alias-selection.js';
|
|
3
3
|
import { computeContextWeightMultipliers } from './context-weight-multipliers.js';
|
|
4
4
|
import { extractKeyAlias, extractProviderId, getProviderModelId } from './key-parsing.js';
|
|
5
|
+
import { buildCandidateWeights, buildGroupWeights, hasNonUniformWeights, resolveTierLoadBalancing } from './tier-load-balancing.js';
|
|
5
6
|
import { pickPriorityGroup } from './tier-priority.js';
|
|
6
7
|
import { selectProviderKeyWithQuotaBuckets } from './tier-selection-quota-integration.js';
|
|
7
8
|
function buildPrimaryTargetGroups(candidates, deps) {
|
|
@@ -25,50 +26,6 @@ function buildPrimaryTargetGroups(candidates, deps) {
|
|
|
25
26
|
}
|
|
26
27
|
return groups;
|
|
27
28
|
}
|
|
28
|
-
function resolveGroupWeight(groupId, weights) {
|
|
29
|
-
if (!weights) {
|
|
30
|
-
return 1;
|
|
31
|
-
}
|
|
32
|
-
const direct = weights[groupId];
|
|
33
|
-
if (typeof direct === 'number' && Number.isFinite(direct) && direct > 0) {
|
|
34
|
-
return direct;
|
|
35
|
-
}
|
|
36
|
-
const providerId = groupId.split('.')[0] ?? groupId;
|
|
37
|
-
const providerOnly = weights[providerId];
|
|
38
|
-
if (typeof providerOnly === 'number' && Number.isFinite(providerOnly) && providerOnly > 0) {
|
|
39
|
-
return providerOnly;
|
|
40
|
-
}
|
|
41
|
-
return 1;
|
|
42
|
-
}
|
|
43
|
-
function buildGroupWeights(groups, weights) {
|
|
44
|
-
if (!groups.size) {
|
|
45
|
-
return undefined;
|
|
46
|
-
}
|
|
47
|
-
const out = {};
|
|
48
|
-
for (const [groupId] of groups.entries()) {
|
|
49
|
-
out[groupId] = resolveGroupWeight(groupId, weights);
|
|
50
|
-
}
|
|
51
|
-
return out;
|
|
52
|
-
}
|
|
53
|
-
function hasNonUniformWeights(candidates, weights) {
|
|
54
|
-
if (!weights || candidates.length < 2) {
|
|
55
|
-
return false;
|
|
56
|
-
}
|
|
57
|
-
let ref;
|
|
58
|
-
for (const key of candidates) {
|
|
59
|
-
const raw = weights[key];
|
|
60
|
-
if (typeof raw !== 'number' || !Number.isFinite(raw)) {
|
|
61
|
-
continue;
|
|
62
|
-
}
|
|
63
|
-
if (ref === undefined) {
|
|
64
|
-
ref = raw;
|
|
65
|
-
}
|
|
66
|
-
else if (Math.abs(raw - ref) > 1e-6) {
|
|
67
|
-
return true;
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
return false;
|
|
71
|
-
}
|
|
72
29
|
function applyAliasStickyQueuePinning(opts) {
|
|
73
30
|
const { candidates, orderedTargets, deps, excludedKeys } = opts;
|
|
74
31
|
if (!Array.isArray(candidates) || candidates.length < 2) {
|
|
@@ -202,6 +159,7 @@ function preferAntigravityAliasesOnRetry(opts) {
|
|
|
202
159
|
export function selectProviderKeyFromCandidatePool(opts) {
|
|
203
160
|
const { routeName, tier, stickyKey, candidates, isSafePool, deps, options, contextResult, warnRatio, excludedKeys, isRecoveryAttempt, now, nowForWeights, healthWeightedCfg, contextWeightedCfg } = opts;
|
|
204
161
|
const quotaView = deps.quotaView;
|
|
162
|
+
const tierLoadBalancing = resolveTierLoadBalancing(tier, deps.loadBalancer.getPolicy());
|
|
205
163
|
const isAvailable = (key) => {
|
|
206
164
|
if (!quotaView) {
|
|
207
165
|
return deps.healthManager.isAvailable(key);
|
|
@@ -265,28 +223,33 @@ export function selectProviderKeyFromCandidatePool(opts) {
|
|
|
265
223
|
if (!group) {
|
|
266
224
|
return null;
|
|
267
225
|
}
|
|
268
|
-
const weights = (
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
226
|
+
const weights = buildCandidateWeights({
|
|
227
|
+
candidates: group.groupCandidates,
|
|
228
|
+
providerRegistry: deps.providerRegistry,
|
|
229
|
+
staticWeights: tierLoadBalancing.weights,
|
|
230
|
+
dynamicWeights: (() => {
|
|
231
|
+
if (!isSafePool)
|
|
232
|
+
return undefined;
|
|
233
|
+
const ctx = computeContextWeightMultipliers({ candidates: group.groupCandidates, usage: contextResult.usage, warnRatio, cfg: contextWeightedCfg });
|
|
234
|
+
if (!ctx)
|
|
235
|
+
return undefined;
|
|
236
|
+
const out = {};
|
|
237
|
+
for (const key of group.groupCandidates) {
|
|
238
|
+
const m = computeContextMultiplier({
|
|
239
|
+
effectiveSafeRefTokens: ctx.ref,
|
|
240
|
+
effectiveSafeTokens: ctx.eff[key] ?? 1,
|
|
241
|
+
cfg: contextWeightedCfg
|
|
242
|
+
});
|
|
243
|
+
out[key] = Math.max(1, Math.round(100 * m));
|
|
244
|
+
}
|
|
245
|
+
return out;
|
|
246
|
+
})()
|
|
247
|
+
});
|
|
285
248
|
const allowGrouped = !hasNonUniformWeights(group.groupCandidates, weights);
|
|
286
|
-
if (allowGrouped &&
|
|
249
|
+
if (allowGrouped && tierLoadBalancing.strategy !== 'sticky') {
|
|
287
250
|
const groups = buildPrimaryTargetGroups(group.groupCandidates, deps);
|
|
288
251
|
if (groups.size > 0) {
|
|
289
|
-
const groupWeights = buildGroupWeights(groups,
|
|
252
|
+
const groupWeights = buildGroupWeights(groups, tierLoadBalancing.weights);
|
|
290
253
|
const selected = deps.loadBalancer.selectGrouped({
|
|
291
254
|
routeName: `${routeName}:${tier.id}:priority:group:${group.groupId}`,
|
|
292
255
|
groups,
|
|
@@ -307,35 +270,40 @@ export function selectProviderKeyFromCandidatePool(opts) {
|
|
|
307
270
|
availabilityCheck: isAvailable
|
|
308
271
|
}, 'round-robin');
|
|
309
272
|
}
|
|
310
|
-
const weights = (
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
273
|
+
const weights = buildCandidateWeights({
|
|
274
|
+
candidates: pinnedCandidates,
|
|
275
|
+
providerRegistry: deps.providerRegistry,
|
|
276
|
+
staticWeights: tierLoadBalancing.weights,
|
|
277
|
+
dynamicWeights: (() => {
|
|
278
|
+
if (!isSafePool || !contextWeightedCfg.enabled)
|
|
279
|
+
return undefined;
|
|
280
|
+
const ctx = computeContextWeightMultipliers({ candidates: pinnedCandidates, usage: contextResult.usage, warnRatio, cfg: contextWeightedCfg });
|
|
281
|
+
if (!ctx)
|
|
282
|
+
return undefined;
|
|
283
|
+
const out = {};
|
|
284
|
+
for (const key of pinnedCandidates) {
|
|
285
|
+
const m = computeContextMultiplier({
|
|
286
|
+
effectiveSafeRefTokens: ctx.ref,
|
|
287
|
+
effectiveSafeTokens: ctx.eff[key] ?? 1,
|
|
288
|
+
cfg: contextWeightedCfg
|
|
289
|
+
});
|
|
290
|
+
out[key] = Math.max(1, Math.round(100 * m));
|
|
291
|
+
}
|
|
292
|
+
return out;
|
|
293
|
+
})()
|
|
294
|
+
});
|
|
327
295
|
const allowGrouped = !hasNonUniformWeights(pinnedCandidates, weights);
|
|
328
|
-
if (allowGrouped &&
|
|
296
|
+
if (allowGrouped && tierLoadBalancing.strategy !== 'sticky') {
|
|
329
297
|
const groups = buildPrimaryTargetGroups(pinnedCandidates, deps);
|
|
330
298
|
if (groups.size > 0) {
|
|
331
|
-
const groupWeights = buildGroupWeights(groups,
|
|
299
|
+
const groupWeights = buildGroupWeights(groups, tierLoadBalancing.weights);
|
|
332
300
|
const selected = deps.loadBalancer.selectGrouped({
|
|
333
301
|
routeName: `${routeName}:${tier.id}`,
|
|
334
302
|
groups,
|
|
335
303
|
stickyKey: options.allowAliasRotation ? undefined : stickyKey,
|
|
336
304
|
weights: groupWeights,
|
|
337
305
|
availabilityCheck: isAvailable
|
|
338
|
-
}, tier.mode === 'round-robin' ? 'round-robin' :
|
|
306
|
+
}, tier.mode === 'round-robin' ? 'round-robin' : tierLoadBalancing.strategy);
|
|
339
307
|
if (selected) {
|
|
340
308
|
return selected;
|
|
341
309
|
}
|
|
@@ -347,7 +315,7 @@ export function selectProviderKeyFromCandidatePool(opts) {
|
|
|
347
315
|
stickyKey: options.allowAliasRotation ? undefined : stickyKey,
|
|
348
316
|
weights,
|
|
349
317
|
availabilityCheck: isAvailable
|
|
350
|
-
}, tier.mode === 'round-robin' ? 'round-robin' :
|
|
318
|
+
}, tier.mode === 'round-robin' ? 'round-robin' : tierLoadBalancing.strategy);
|
|
351
319
|
}
|
|
352
320
|
return selectProviderKeyWithQuotaBuckets({
|
|
353
321
|
routeName,
|
|
@@ -365,6 +333,7 @@ export function selectProviderKeyFromCandidatePool(opts) {
|
|
|
365
333
|
nowForWeights,
|
|
366
334
|
healthWeightedCfg,
|
|
367
335
|
contextWeightedCfg,
|
|
336
|
+
tierLoadBalancing,
|
|
368
337
|
quotaView,
|
|
369
338
|
isAvailable,
|
|
370
339
|
selectFirstAvailable,
|
|
@@ -7,6 +7,17 @@ export declare const DEFAULT_ROUTE = "default";
|
|
|
7
7
|
export declare const ROUTE_PRIORITY: string[];
|
|
8
8
|
export type RoutingInstructionMode = 'force' | 'sticky' | 'none';
|
|
9
9
|
export type RoutePoolMode = 'round-robin' | 'priority';
|
|
10
|
+
export interface RoutePoolLoadBalancingPolicy {
|
|
11
|
+
/**
|
|
12
|
+
* Optional pool-level override for provider selection strategy.
|
|
13
|
+
* When omitted, Virtual Router falls back to the global loadBalancing.strategy.
|
|
14
|
+
*/
|
|
15
|
+
strategy?: 'round-robin' | 'weighted' | 'sticky';
|
|
16
|
+
/**
|
|
17
|
+
* Optional pool-local weights. Keys may target runtime keys, provider.model groups, or provider ids.
|
|
18
|
+
*/
|
|
19
|
+
weights?: Record<string, number>;
|
|
20
|
+
}
|
|
10
21
|
export interface RoutePoolTier {
|
|
11
22
|
id: string;
|
|
12
23
|
targets: string[];
|
|
@@ -25,6 +36,11 @@ export interface RoutePoolTier {
|
|
|
25
36
|
* - routing.web_search: force server-side web_search flow.
|
|
26
37
|
*/
|
|
27
38
|
force?: boolean;
|
|
39
|
+
/**
|
|
40
|
+
* Optional pool-scoped load-balancing override. This lets different route pools
|
|
41
|
+
* use different strategies/weights without mutating the global policy.
|
|
42
|
+
*/
|
|
43
|
+
loadBalancing?: RoutePoolLoadBalancingPolicy;
|
|
28
44
|
}
|
|
29
45
|
export type RoutingPools = Record<string, RoutePoolTier[]>;
|
|
30
46
|
export type StreamingPreference = 'auto' | 'always' | 'never';
|
|
@@ -215,11 +231,34 @@ export interface ProviderHealthConfig {
|
|
|
215
231
|
cooldownMs: number;
|
|
216
232
|
fatalCooldownMs?: number;
|
|
217
233
|
}
|
|
234
|
+
export type VirtualRouterWebSearchExecutionMode = 'servertool' | 'direct';
|
|
235
|
+
export type VirtualRouterWebSearchDirectActivation = 'route' | 'builtin';
|
|
218
236
|
export interface VirtualRouterWebSearchEngineConfig {
|
|
219
237
|
id: string;
|
|
220
238
|
providerKey: string;
|
|
221
239
|
description?: string;
|
|
222
240
|
default?: boolean;
|
|
241
|
+
/**
|
|
242
|
+
* Search execution mode:
|
|
243
|
+
* - servertool: expose canonical web_search tool and execute through servertool engine.
|
|
244
|
+
* - direct: route to a search-capable model/provider directly; servertool injection must skip it.
|
|
245
|
+
*/
|
|
246
|
+
executionMode?: VirtualRouterWebSearchExecutionMode;
|
|
247
|
+
/**
|
|
248
|
+
* When executionMode=direct, controls how the upstream search capability is activated.
|
|
249
|
+
* - route: route selection itself enables native search behavior (e.g. deepseek-web search route).
|
|
250
|
+
* - builtin: upstream requires a provider-native builtin search tool/schema.
|
|
251
|
+
*/
|
|
252
|
+
directActivation?: VirtualRouterWebSearchDirectActivation;
|
|
253
|
+
/**
|
|
254
|
+
* Optional target model id for direct-mode matching when request/compat layers need to detect
|
|
255
|
+
* which routed provider payload should receive native web search activation.
|
|
256
|
+
*/
|
|
257
|
+
modelId?: string;
|
|
258
|
+
/**
|
|
259
|
+
* Optional builtin max-uses hint for providers that support builtin web search tools.
|
|
260
|
+
*/
|
|
261
|
+
maxUses?: number;
|
|
223
262
|
/**
|
|
224
263
|
* When true, this engine will never be used by server-side tools
|
|
225
264
|
* (e.g. web_search). It will also be omitted from injected tool
|
|
@@ -118,6 +118,27 @@ function getWebSearchConfig(ctx) {
|
|
|
118
118
|
: undefined;
|
|
119
119
|
if (!id || !providerKey)
|
|
120
120
|
continue;
|
|
121
|
+
const rawExecutionMode = typeof obj.executionMode === 'string'
|
|
122
|
+
? obj.executionMode.trim().toLowerCase()
|
|
123
|
+
: typeof obj.mode === 'string'
|
|
124
|
+
? obj.mode.trim().toLowerCase()
|
|
125
|
+
: '';
|
|
126
|
+
const executionMode = rawExecutionMode === 'direct' ? 'direct' : 'servertool';
|
|
127
|
+
const rawDirectActivation = typeof obj.directActivation === 'string'
|
|
128
|
+
? obj.directActivation.trim().toLowerCase()
|
|
129
|
+
: typeof obj.activation === 'string'
|
|
130
|
+
? obj.activation.trim().toLowerCase()
|
|
131
|
+
: '';
|
|
132
|
+
const directActivation = rawDirectActivation === 'builtin'
|
|
133
|
+
? 'builtin'
|
|
134
|
+
: rawDirectActivation === 'route'
|
|
135
|
+
? 'route'
|
|
136
|
+
: executionMode === 'direct'
|
|
137
|
+
? 'route'
|
|
138
|
+
: undefined;
|
|
139
|
+
const modelId = typeof obj.modelId === 'string' && obj.modelId.trim() ? obj.modelId.trim() : undefined;
|
|
140
|
+
const rawMaxUses = typeof obj.maxUses === 'number' ? obj.maxUses : Number(obj.maxUses);
|
|
141
|
+
const maxUses = Number.isFinite(rawMaxUses) && rawMaxUses > 0 ? Math.floor(rawMaxUses) : undefined;
|
|
121
142
|
const serverToolsDisabled = obj.serverToolsDisabled === true ||
|
|
122
143
|
(typeof obj.serverToolsDisabled === 'string' &&
|
|
123
144
|
obj.serverToolsDisabled.trim().toLowerCase() === 'true') ||
|
|
@@ -142,6 +163,10 @@ function getWebSearchConfig(ctx) {
|
|
|
142
163
|
providerKey,
|
|
143
164
|
description: typeof obj.description === 'string' && obj.description.trim() ? obj.description.trim() : undefined,
|
|
144
165
|
default: obj.default === true,
|
|
166
|
+
executionMode,
|
|
167
|
+
...(directActivation ? { directActivation } : {}),
|
|
168
|
+
...(modelId ? { modelId } : {}),
|
|
169
|
+
...(maxUses ? { maxUses } : {}),
|
|
145
170
|
...(serverToolsDisabled ? { serverToolsDisabled: true } : {}),
|
|
146
171
|
...(searchEngineList ? { searchEngineList } : {})
|
|
147
172
|
});
|
|
@@ -181,7 +206,7 @@ function resolveWebSearchEngine(config, engineId) {
|
|
|
181
206
|
return undefined;
|
|
182
207
|
}
|
|
183
208
|
function buildEnginePriorityList(config, engineId) {
|
|
184
|
-
const engines = (Array.isArray(config.engines) ? config.engines : []).filter((engine) => !engine.serverToolsDisabled);
|
|
209
|
+
const engines = (Array.isArray(config.engines) ? config.engines : []).filter((engine) => !engine.serverToolsDisabled && (engine.executionMode ?? 'servertool') === 'servertool');
|
|
185
210
|
if (!engines.length) {
|
|
186
211
|
return [];
|
|
187
212
|
}
|
|
@@ -100,7 +100,7 @@ function normalizeFilterTokenSet(values) {
|
|
|
100
100
|
return normalized.size > 0 ? normalized : null;
|
|
101
101
|
}
|
|
102
102
|
function isNameIncluded(name, includeSet, excludeSet) {
|
|
103
|
-
const normalized = name
|
|
103
|
+
const normalized = normalizeServerToolCallName(name);
|
|
104
104
|
if (includeSet && !includeSet.has(normalized)) {
|
|
105
105
|
return false;
|
|
106
106
|
}
|
|
@@ -109,6 +109,13 @@ function isNameIncluded(name, includeSet, excludeSet) {
|
|
|
109
109
|
}
|
|
110
110
|
return true;
|
|
111
111
|
}
|
|
112
|
+
function normalizeServerToolCallName(name) {
|
|
113
|
+
const normalized = name.trim().toLowerCase();
|
|
114
|
+
if (normalized === 'websearch' || normalized === 'web-search') {
|
|
115
|
+
return 'web_search';
|
|
116
|
+
}
|
|
117
|
+
return normalized;
|
|
118
|
+
}
|
|
112
119
|
function extractToolCallsFromMessage(message) {
|
|
113
120
|
const toolCalls = getArray(message.tool_calls);
|
|
114
121
|
const out = [];
|
|
@@ -120,7 +127,9 @@ function extractToolCallsFromMessage(message) {
|
|
|
120
127
|
const fn = asObject(tc.function) ??
|
|
121
128
|
asObject(tc.functionCall) ??
|
|
122
129
|
asObject(tc.function_call);
|
|
123
|
-
const name = fn && typeof fn.name === 'string' && String(fn.name).trim()
|
|
130
|
+
const name = fn && typeof fn.name === 'string' && String(fn.name).trim()
|
|
131
|
+
? normalizeServerToolCallName(String(fn.name))
|
|
132
|
+
: '';
|
|
124
133
|
const rawArgs = (fn ? fn.arguments : undefined) ??
|
|
125
134
|
(fn ? fn.args : undefined) ??
|
|
126
135
|
(fn ? fn.input : undefined) ??
|
|
@@ -137,6 +137,10 @@ export type ServerToolBackendPlan = {
|
|
|
137
137
|
providerKey: string;
|
|
138
138
|
description?: string;
|
|
139
139
|
default?: boolean;
|
|
140
|
+
executionMode?: 'servertool' | 'direct';
|
|
141
|
+
directActivation?: 'route' | 'builtin';
|
|
142
|
+
modelId?: string;
|
|
143
|
+
maxUses?: number;
|
|
140
144
|
serverToolsDisabled?: boolean;
|
|
141
145
|
searchEngineList?: string[];
|
|
142
146
|
}[];
|