@jsonstudio/llms 0.6.230 → 0.6.467
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/conversion/codecs/gemini-openai-codec.js +24 -2
- package/dist/conversion/compat/actions/gemini-web-search.d.ts +17 -0
- package/dist/conversion/compat/actions/gemini-web-search.js +68 -0
- package/dist/conversion/compat/actions/glm-image-content.d.ts +2 -0
- package/dist/conversion/compat/actions/glm-image-content.js +83 -0
- package/dist/conversion/compat/actions/glm-vision-prompt.d.ts +11 -0
- package/dist/conversion/compat/actions/glm-vision-prompt.js +177 -0
- package/dist/conversion/compat/actions/glm-web-search.js +25 -28
- package/dist/conversion/compat/actions/iflow-web-search.d.ts +18 -0
- package/dist/conversion/compat/actions/iflow-web-search.js +87 -0
- package/dist/conversion/compat/actions/universal-shape-filter.js +11 -0
- package/dist/conversion/compat/profiles/chat-gemini.json +17 -0
- package/dist/conversion/compat/profiles/chat-glm.json +194 -184
- package/dist/conversion/compat/profiles/chat-iflow.json +199 -195
- package/dist/conversion/compat/profiles/chat-lmstudio.json +43 -43
- package/dist/conversion/compat/profiles/chat-qwen.json +20 -20
- package/dist/conversion/compat/profiles/responses-c4m.json +42 -42
- package/dist/conversion/config/sample-config.json +1 -1
- package/dist/conversion/hub/pipeline/compat/compat-pipeline-executor.js +24 -0
- package/dist/conversion/hub/pipeline/compat/compat-types.d.ts +8 -0
- package/dist/conversion/hub/pipeline/hub-pipeline.js +32 -1
- package/dist/conversion/hub/pipeline/session-identifiers.d.ts +9 -0
- package/dist/conversion/hub/pipeline/session-identifiers.js +76 -0
- package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage1_sse_decode/index.js +31 -2
- package/dist/conversion/hub/pipeline/target-utils.js +6 -0
- package/dist/conversion/hub/process/chat-process.js +186 -40
- package/dist/conversion/hub/response/provider-response.d.ts +13 -1
- package/dist/conversion/hub/response/provider-response.js +84 -35
- package/dist/conversion/hub/response/server-side-tools.js +61 -4
- package/dist/conversion/hub/semantic-mappers/gemini-mapper.js +123 -3
- package/dist/conversion/hub/semantic-mappers/responses-mapper.js +17 -1
- package/dist/conversion/hub/standardized-bridge.js +14 -0
- package/dist/conversion/responses/responses-openai-bridge.js +110 -6
- package/dist/conversion/shared/anthropic-message-utils.js +133 -9
- package/dist/conversion/shared/bridge-message-utils.js +137 -10
- package/dist/conversion/shared/errors.d.ts +20 -0
- package/dist/conversion/shared/errors.js +28 -0
- package/dist/conversion/shared/responses-conversation-store.js +30 -3
- package/dist/conversion/shared/responses-output-builder.js +111 -8
- package/dist/conversion/shared/tool-filter-pipeline.js +1 -0
- package/dist/filters/special/request-toolcalls-stringify.d.ts +13 -0
- package/dist/filters/special/request-toolcalls-stringify.js +103 -3
- package/dist/filters/special/response-tool-text-canonicalize.d.ts +16 -0
- package/dist/filters/special/response-tool-text-canonicalize.js +27 -3
- package/dist/router/virtual-router/bootstrap.js +44 -12
- package/dist/router/virtual-router/classifier.js +13 -17
- package/dist/router/virtual-router/engine.d.ts +39 -0
- package/dist/router/virtual-router/engine.js +755 -55
- package/dist/router/virtual-router/features.js +1 -1
- package/dist/router/virtual-router/message-utils.js +36 -24
- package/dist/router/virtual-router/provider-registry.d.ts +15 -0
- package/dist/router/virtual-router/provider-registry.js +42 -1
- package/dist/router/virtual-router/routing-instructions.d.ts +34 -0
- package/dist/router/virtual-router/routing-instructions.js +383 -0
- package/dist/router/virtual-router/sticky-session-store.d.ts +3 -0
- package/dist/router/virtual-router/sticky-session-store.js +110 -0
- package/dist/router/virtual-router/token-counter.js +14 -3
- package/dist/router/virtual-router/tool-signals.js +0 -22
- package/dist/router/virtual-router/types.d.ts +80 -0
- package/dist/router/virtual-router/types.js +2 -1
- package/dist/servertool/engine.d.ts +27 -0
- package/dist/servertool/engine.js +101 -0
- package/dist/servertool/flow-types.d.ts +40 -0
- package/dist/servertool/flow-types.js +1 -0
- package/dist/servertool/handlers/vision.d.ts +1 -0
- package/dist/servertool/handlers/vision.js +194 -0
- package/dist/servertool/handlers/web-search.d.ts +1 -0
- package/dist/servertool/handlers/web-search.js +791 -0
- package/dist/servertool/orchestration-types.d.ts +33 -0
- package/dist/servertool/orchestration-types.js +1 -0
- package/dist/servertool/registry.d.ts +18 -0
- package/dist/servertool/registry.js +27 -0
- package/dist/servertool/server-side-tools.d.ts +8 -0
- package/dist/servertool/server-side-tools.js +208 -0
- package/dist/servertool/types.d.ts +94 -0
- package/dist/servertool/types.js +1 -0
- package/dist/servertool/vision-tool.d.ts +2 -0
- package/dist/servertool/vision-tool.js +185 -0
- package/dist/sse/sse-to-json/builders/response-builder.js +6 -3
- package/package.json +1 -1
|
@@ -6,6 +6,8 @@ import { buildRoutingFeatures } from './features.js';
|
|
|
6
6
|
import { ContextAdvisor } from './context-advisor.js';
|
|
7
7
|
import { DEFAULT_MODEL_CONTEXT_TOKENS, DEFAULT_ROUTE, ROUTE_PRIORITY, VirtualRouterError, VirtualRouterErrorCode } from './types.js';
|
|
8
8
|
import { getStatsCenter } from '../../telemetry/stats-center.js';
|
|
9
|
+
import { parseRoutingInstructions, applyRoutingInstructions, cleanMessagesFromRoutingInstructions } from './routing-instructions.js';
|
|
10
|
+
import { loadRoutingInstructionStateSync, saveRoutingInstructionStateAsync } from './sticky-session-store.js';
|
|
9
11
|
export class VirtualRouterEngine {
|
|
10
12
|
routing = {};
|
|
11
13
|
providerRegistry = new ProviderRegistry();
|
|
@@ -18,6 +20,9 @@ export class VirtualRouterEngine {
|
|
|
18
20
|
debug = console; // thin hook; host may monkey-patch for colored logging
|
|
19
21
|
healthConfig = null;
|
|
20
22
|
statsCenter = getStatsCenter();
|
|
23
|
+
// Derived flags from VirtualRouterConfig/routing used by process / response layers.
|
|
24
|
+
webSearchForce = false;
|
|
25
|
+
routingInstructionState = new Map();
|
|
21
26
|
initialize(config) {
|
|
22
27
|
this.validateConfig(config);
|
|
23
28
|
this.routing = config.routing;
|
|
@@ -29,17 +34,48 @@ export class VirtualRouterEngine {
|
|
|
29
34
|
this.classifier = new RoutingClassifier(config.classifier);
|
|
30
35
|
this.contextRouting = config.contextRouting ?? { warnRatio: 0.9, hardLimit: false };
|
|
31
36
|
this.contextAdvisor.configure(this.contextRouting);
|
|
37
|
+
this.webSearchForce = config.webSearch?.force === true;
|
|
32
38
|
this.routeStats = new Map();
|
|
33
39
|
for (const routeName of Object.keys(this.routing)) {
|
|
34
40
|
this.routeStats.set(routeName, { hits: 0 });
|
|
35
41
|
}
|
|
36
42
|
}
|
|
37
43
|
route(request, metadata) {
|
|
44
|
+
const stickyKey = this.resolveStickyKey(metadata);
|
|
45
|
+
const baseState = this.getRoutingInstructionState(stickyKey);
|
|
46
|
+
let routingState = baseState;
|
|
47
|
+
const metadataInstructions = this.buildMetadataInstructions(metadata);
|
|
48
|
+
if (metadataInstructions.length > 0) {
|
|
49
|
+
routingState = applyRoutingInstructions(metadataInstructions, routingState);
|
|
50
|
+
}
|
|
51
|
+
const instructions = parseRoutingInstructions(request.messages);
|
|
52
|
+
if (instructions.length > 0) {
|
|
53
|
+
routingState = applyRoutingInstructions(instructions, routingState);
|
|
54
|
+
const effectiveKey = stickyKey || 'default';
|
|
55
|
+
this.routingInstructionState.set(effectiveKey, routingState);
|
|
56
|
+
request.messages = cleanMessagesFromRoutingInstructions(request.messages);
|
|
57
|
+
this.persistRoutingInstructionState(effectiveKey, routingState);
|
|
58
|
+
}
|
|
59
|
+
const routingMode = this.resolveRoutingMode([...metadataInstructions, ...instructions], routingState);
|
|
38
60
|
const features = buildRoutingFeatures(request, metadata);
|
|
39
|
-
const classification =
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
61
|
+
const classification = metadata.routeHint && metadata.routeHint.trim()
|
|
62
|
+
? {
|
|
63
|
+
routeName: metadata.routeHint.trim(),
|
|
64
|
+
confidence: 1,
|
|
65
|
+
reasoning: `route_hint:${metadata.routeHint.trim()}`,
|
|
66
|
+
fallback: false,
|
|
67
|
+
candidates: [metadata.routeHint.trim()]
|
|
68
|
+
}
|
|
69
|
+
: this.classifier.classify(features);
|
|
70
|
+
const requestedRoute = this.normalizeRouteAlias(classification.routeName || DEFAULT_ROUTE);
|
|
71
|
+
const selection = this.selectProvider(requestedRoute, metadata, classification, features, routingState);
|
|
72
|
+
const baseTarget = this.providerRegistry.buildTarget(selection.providerKey);
|
|
73
|
+
const forceVision = this.routeHasForceFlag('vision');
|
|
74
|
+
const target = {
|
|
75
|
+
...baseTarget,
|
|
76
|
+
...(this.webSearchForce ? { forceWebSearch: true } : {}),
|
|
77
|
+
...(forceVision ? { forceVision: true } : {})
|
|
78
|
+
};
|
|
43
79
|
this.healthManager.recordSuccess(selection.providerKey);
|
|
44
80
|
this.incrementRouteStat(selection.routeUsed, selection.providerKey);
|
|
45
81
|
try {
|
|
@@ -56,15 +92,17 @@ export class VirtualRouterEngine {
|
|
|
56
92
|
catch {
|
|
57
93
|
// stats must never break routing
|
|
58
94
|
}
|
|
59
|
-
const hitReason = this.buildHitReason(selection.routeUsed, selection.providerKey, classification, features);
|
|
60
|
-
const
|
|
95
|
+
const hitReason = this.buildHitReason(selection.routeUsed, selection.providerKey, classification, features, routingMode);
|
|
96
|
+
const stickyScope = routingMode !== 'none' ? this.resolveSessionScope(metadata) : undefined;
|
|
97
|
+
const routeForLog = routingMode === 'sticky' ? 'sticky' : selection.routeUsed;
|
|
98
|
+
const formatted = this.formatVirtualRouterHit(routeForLog, selection.poolId, selection.providerKey, target.modelId || '', hitReason, stickyScope);
|
|
61
99
|
if (formatted) {
|
|
62
100
|
this.debug?.log?.(formatted);
|
|
63
101
|
}
|
|
64
102
|
else {
|
|
65
103
|
this.debug?.log?.('[virtual-router-hit]', selection.routeUsed, selection.providerKey, target.modelId || '', hitReason ? `reason=${hitReason}` : '');
|
|
66
104
|
}
|
|
67
|
-
const didFallback = selection.routeUsed !==
|
|
105
|
+
const didFallback = selection.routeUsed !== requestedRoute;
|
|
68
106
|
return {
|
|
69
107
|
target,
|
|
70
108
|
decision: {
|
|
@@ -123,6 +161,14 @@ export class VirtualRouterEngine {
|
|
|
123
161
|
health: this.healthManager.getSnapshot()
|
|
124
162
|
};
|
|
125
163
|
}
|
|
164
|
+
/**
|
|
165
|
+
* 将分类器产生的逻辑路由名直接归一化为配置中的路由键。
|
|
166
|
+
* 不再维护 "websearch" 之类的别名,调用方应显式使用 "web_search" 或 "search" 等实际路由名。
|
|
167
|
+
*/
|
|
168
|
+
normalizeRouteAlias(routeName) {
|
|
169
|
+
const base = routeName && routeName.trim() ? routeName.trim() : DEFAULT_ROUTE;
|
|
170
|
+
return base;
|
|
171
|
+
}
|
|
126
172
|
validateConfig(config) {
|
|
127
173
|
if (!config.routing || typeof config.routing !== 'object') {
|
|
128
174
|
throw new VirtualRouterError('routing configuration is required', VirtualRouterErrorCode.CONFIG_ERROR);
|
|
@@ -157,42 +203,180 @@ export class VirtualRouterEngine {
|
|
|
157
203
|
}
|
|
158
204
|
}
|
|
159
205
|
}
|
|
160
|
-
selectProvider(requestedRoute, metadata, classification, features) {
|
|
161
|
-
const
|
|
162
|
-
const
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
206
|
+
selectProvider(requestedRoute, metadata, classification, features, routingState) {
|
|
207
|
+
const activeState = routingState || this.getRoutingInstructionState(this.resolveStickyKey(metadata));
|
|
208
|
+
const forcedResolution = activeState.forcedTarget
|
|
209
|
+
? this.resolveInstructionTarget(activeState.forcedTarget)
|
|
210
|
+
: null;
|
|
211
|
+
if (forcedResolution && forcedResolution.mode === 'exact') {
|
|
212
|
+
const forcedKey = forcedResolution.keys[0];
|
|
213
|
+
return {
|
|
214
|
+
providerKey: forcedKey,
|
|
215
|
+
routeUsed: requestedRoute,
|
|
216
|
+
pool: [forcedKey],
|
|
217
|
+
poolId: 'forced'
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
// sticky 语义:
|
|
221
|
+
// - 显式绑定到具体 key(alias/index)时,直接使用该 key;
|
|
222
|
+
// - provider / model 级别 sticky 解析为一组 providerKey;
|
|
223
|
+
// 在 sticky 这组 key「可用」之前,不会回落到 default 中的非 sticky provider。
|
|
224
|
+
let stickyResolution = null;
|
|
225
|
+
let stickyKeySet;
|
|
226
|
+
if (!forcedResolution && activeState.stickyTarget) {
|
|
227
|
+
stickyResolution = this.resolveInstructionTarget(activeState.stickyTarget);
|
|
228
|
+
if (stickyResolution && stickyResolution.mode === 'exact') {
|
|
229
|
+
const stickyKey = stickyResolution.keys[0];
|
|
230
|
+
// 已经被健康管理标记为不可用的 key 不能被 sticky 语法“复活”
|
|
231
|
+
if (this.healthManager.isAvailable(stickyKey)) {
|
|
232
|
+
return {
|
|
233
|
+
providerKey: stickyKey,
|
|
234
|
+
routeUsed: requestedRoute,
|
|
235
|
+
pool: [stickyKey],
|
|
236
|
+
poolId: 'sticky'
|
|
237
|
+
};
|
|
238
|
+
}
|
|
173
239
|
}
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
240
|
+
if (stickyResolution && stickyResolution.mode === 'filter' && stickyResolution.keys.length > 0) {
|
|
241
|
+
// 仅保留当前仍可用的 key;已被熔断/拉黑的 key 不会被 sticky 语法重新加入池子
|
|
242
|
+
const liveKeys = stickyResolution.keys.filter((key) => this.healthManager.isAvailable(key));
|
|
243
|
+
if (liveKeys.length > 0) {
|
|
244
|
+
stickyKeySet = new Set(liveKeys);
|
|
245
|
+
}
|
|
179
246
|
}
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
247
|
+
}
|
|
248
|
+
const allowAliasRotation = Boolean(activeState.stickyTarget) &&
|
|
249
|
+
!activeState.stickyTarget?.keyAlias &&
|
|
250
|
+
activeState.stickyTarget?.keyIndex === undefined;
|
|
251
|
+
// force(filter) 优先级高于 sticky:显式 force 视为覆盖 sticky 约束。
|
|
252
|
+
if (forcedResolution && forcedResolution.mode === 'filter') {
|
|
253
|
+
const candidates = this.buildRouteCandidates(requestedRoute, classification.candidates, features);
|
|
254
|
+
const filteredCandidates = this.filterCandidatesByRoutingState(candidates, activeState);
|
|
255
|
+
if (filteredCandidates.length === 0) {
|
|
256
|
+
throw new VirtualRouterError('No available providers after applying routing instructions', VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, {
|
|
257
|
+
requestedRoute,
|
|
258
|
+
allowedProviders: Array.from(activeState.allowedProviders),
|
|
259
|
+
disabledProviders: Array.from(activeState.disabledProviders)
|
|
260
|
+
});
|
|
261
|
+
}
|
|
262
|
+
const forcedKeySet = new Set(forcedResolution.keys);
|
|
263
|
+
return this.selectFromCandidates(filteredCandidates, metadata, classification, features, activeState, forcedKeySet, allowAliasRotation);
|
|
264
|
+
}
|
|
265
|
+
if (stickyKeySet && stickyKeySet.size > 0) {
|
|
266
|
+
const stickySelection = this.selectFromStickyPool(stickyKeySet, metadata, features, activeState, allowAliasRotation);
|
|
267
|
+
if (stickySelection) {
|
|
268
|
+
return stickySelection;
|
|
269
|
+
}
|
|
270
|
+
// sticky 池在本次请求中完全不可用(全部被黑名单/健康状态过滤):视为 sticky 池暂时失效,
|
|
271
|
+
// 本次回落到普通路由选择,但保留 stickyTarget,等待后续恢复。
|
|
272
|
+
}
|
|
273
|
+
// 无 sticky,或 sticky 池在本次请求中全部不可用(无可用 key):按原始分类结果执行正常路由选择。
|
|
274
|
+
const candidates = this.buildRouteCandidates(requestedRoute, classification.candidates, features);
|
|
275
|
+
const filteredCandidates = this.filterCandidatesByRoutingState(candidates, activeState);
|
|
276
|
+
if (filteredCandidates.length === 0) {
|
|
277
|
+
throw new VirtualRouterError('No available providers after applying routing instructions', VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, {
|
|
278
|
+
requestedRoute,
|
|
279
|
+
allowedProviders: Array.from(activeState.allowedProviders),
|
|
280
|
+
disabledProviders: Array.from(activeState.disabledProviders)
|
|
281
|
+
});
|
|
282
|
+
}
|
|
283
|
+
return this.selectFromCandidates(filteredCandidates, metadata, classification, features, activeState, undefined, allowAliasRotation);
|
|
284
|
+
}
|
|
285
|
+
trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features, disabledProviders, disabledKeysMap, allowedProviders, disabledModels, requiredProviderKeys, allowAliasRotation) {
|
|
286
|
+
let targets = Array.isArray(tier.targets) ? tier.targets : [];
|
|
287
|
+
if (allowedProviders && allowedProviders.size > 0) {
|
|
288
|
+
targets = targets.filter(key => {
|
|
289
|
+
const providerId = this.extractProviderId(key);
|
|
290
|
+
return providerId && allowedProviders.has(providerId);
|
|
291
|
+
});
|
|
292
|
+
}
|
|
293
|
+
if (disabledProviders && disabledProviders.size > 0) {
|
|
294
|
+
targets = targets.filter((key) => {
|
|
295
|
+
const providerId = this.extractProviderId(key);
|
|
296
|
+
return providerId && !disabledProviders.has(providerId);
|
|
297
|
+
});
|
|
298
|
+
}
|
|
299
|
+
if (disabledKeysMap && disabledKeysMap.size > 0) {
|
|
300
|
+
targets = targets.filter((key) => {
|
|
301
|
+
const providerId = this.extractProviderId(key);
|
|
302
|
+
if (!providerId)
|
|
303
|
+
return true;
|
|
304
|
+
const disabledKeys = disabledKeysMap.get(providerId);
|
|
305
|
+
if (!disabledKeys || disabledKeys.size === 0)
|
|
306
|
+
return true;
|
|
307
|
+
const keyAlias = this.extractKeyAlias(key);
|
|
308
|
+
const keyIndex = this.extractKeyIndex(key);
|
|
309
|
+
if (keyAlias && disabledKeys.has(keyAlias)) {
|
|
310
|
+
return false;
|
|
186
311
|
}
|
|
187
|
-
if (
|
|
188
|
-
|
|
312
|
+
if (keyIndex !== undefined && disabledKeys.has(keyIndex + 1)) {
|
|
313
|
+
return false;
|
|
314
|
+
}
|
|
315
|
+
return true;
|
|
316
|
+
});
|
|
317
|
+
}
|
|
318
|
+
if (disabledModels && disabledModels.size > 0) {
|
|
319
|
+
targets = targets.filter((key) => {
|
|
320
|
+
const providerId = this.extractProviderId(key);
|
|
321
|
+
if (!providerId) {
|
|
322
|
+
return true;
|
|
323
|
+
}
|
|
324
|
+
const disabled = disabledModels.get(providerId);
|
|
325
|
+
if (!disabled || disabled.size === 0) {
|
|
326
|
+
return true;
|
|
327
|
+
}
|
|
328
|
+
const modelId = this.getProviderModelId(key);
|
|
329
|
+
if (!modelId) {
|
|
330
|
+
return true;
|
|
331
|
+
}
|
|
332
|
+
return !disabled.has(modelId);
|
|
333
|
+
});
|
|
334
|
+
}
|
|
335
|
+
if (requiredProviderKeys && requiredProviderKeys.size > 0) {
|
|
336
|
+
targets = targets.filter((key) => requiredProviderKeys.has(key));
|
|
337
|
+
}
|
|
338
|
+
const serverToolRequired = features.metadata?.serverToolRequired === true;
|
|
339
|
+
if (serverToolRequired) {
|
|
340
|
+
const filtered = [];
|
|
341
|
+
for (const key of targets) {
|
|
342
|
+
try {
|
|
343
|
+
const profile = this.providerRegistry.get(key);
|
|
344
|
+
if (!profile.serverToolsDisabled) {
|
|
345
|
+
filtered.push(key);
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
catch {
|
|
349
|
+
// ignore unknown providers when filtering for servertools
|
|
189
350
|
}
|
|
190
351
|
}
|
|
352
|
+
targets = filtered;
|
|
353
|
+
}
|
|
354
|
+
// 当当前请求包含图片且路由为 default/thinking 时,优先在该路由池内选择
|
|
355
|
+
// Responses/Gemini 类型的 Provider,以便一次完成多模态推理;如果不存在则回退到原始列表。
|
|
356
|
+
if (features.hasImageAttachment && (routeName === DEFAULT_ROUTE || routeName === 'thinking')) {
|
|
357
|
+
const prioritized = [];
|
|
358
|
+
const fallthrough = [];
|
|
359
|
+
for (const key of targets) {
|
|
360
|
+
try {
|
|
361
|
+
const profile = this.providerRegistry.get(key);
|
|
362
|
+
if (profile.providerType === 'responses') {
|
|
363
|
+
prioritized.push(key);
|
|
364
|
+
}
|
|
365
|
+
else if (profile.providerType === 'gemini') {
|
|
366
|
+
prioritized.push(key);
|
|
367
|
+
}
|
|
368
|
+
else {
|
|
369
|
+
fallthrough.push(key);
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
catch {
|
|
373
|
+
fallthrough.push(key);
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
if (prioritized.length) {
|
|
377
|
+
targets = prioritized;
|
|
378
|
+
}
|
|
191
379
|
}
|
|
192
|
-
throw new VirtualRouterError(`All providers unavailable for route ${requestedRoute}`, VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, { routeName: requestedRoute, attempted });
|
|
193
|
-
}
|
|
194
|
-
trySelectFromTier(routeName, tier, stickyKey, estimatedTokens) {
|
|
195
|
-
const targets = Array.isArray(tier.targets) ? tier.targets : [];
|
|
196
380
|
if (!targets.length) {
|
|
197
381
|
return { providerKey: null, poolTargets: [], tierId: tier.id, failureHint: `${routeName}:${tier.id}:empty` };
|
|
198
382
|
}
|
|
@@ -202,7 +386,7 @@ export class VirtualRouterEngine {
|
|
|
202
386
|
const providerKey = this.loadBalancer.select({
|
|
203
387
|
routeName: `${routeName}:${tier.id}`,
|
|
204
388
|
candidates: candidatePool,
|
|
205
|
-
stickyKey,
|
|
389
|
+
stickyKey: allowAliasRotation ? undefined : stickyKey,
|
|
206
390
|
availabilityCheck: (key) => this.healthManager.isAvailable(key)
|
|
207
391
|
});
|
|
208
392
|
if (providerKey) {
|
|
@@ -258,12 +442,415 @@ export class VirtualRouterEngine {
|
|
|
258
442
|
return prefix;
|
|
259
443
|
}
|
|
260
444
|
resolveStickyKey(metadata) {
|
|
445
|
+
const sessionScope = this.resolveSessionScope(metadata);
|
|
446
|
+
if (sessionScope) {
|
|
447
|
+
return sessionScope;
|
|
448
|
+
}
|
|
261
449
|
const resume = metadata.responsesResume;
|
|
262
450
|
if (resume && typeof resume.previousRequestId === 'string' && resume.previousRequestId.trim()) {
|
|
263
451
|
return resume.previousRequestId.trim();
|
|
264
452
|
}
|
|
265
453
|
return metadata.requestId;
|
|
266
454
|
}
|
|
455
|
+
resolveSessionScope(metadata) {
|
|
456
|
+
const sessionId = typeof metadata.sessionId === 'string' ? metadata.sessionId.trim() : '';
|
|
457
|
+
if (sessionId) {
|
|
458
|
+
return `session:${sessionId}`;
|
|
459
|
+
}
|
|
460
|
+
const conversationId = typeof metadata.conversationId === 'string' ? metadata.conversationId.trim() : '';
|
|
461
|
+
if (conversationId) {
|
|
462
|
+
return `conversation:${conversationId}`;
|
|
463
|
+
}
|
|
464
|
+
return undefined;
|
|
465
|
+
}
|
|
466
|
+
getRoutingInstructionState(stickyKey) {
|
|
467
|
+
const key = stickyKey || 'default';
|
|
468
|
+
if (this.routingInstructionState.has(key)) {
|
|
469
|
+
return this.routingInstructionState.get(key);
|
|
470
|
+
}
|
|
471
|
+
let initial = null;
|
|
472
|
+
// 仅对 session:/conversation: 作用域的 key 尝试从磁盘恢复持久化状态
|
|
473
|
+
if (key.startsWith('session:') || key.startsWith('conversation:')) {
|
|
474
|
+
initial = loadRoutingInstructionStateSync(key);
|
|
475
|
+
}
|
|
476
|
+
if (!initial) {
|
|
477
|
+
initial = {
|
|
478
|
+
forcedTarget: undefined,
|
|
479
|
+
stickyTarget: undefined,
|
|
480
|
+
allowedProviders: new Set(),
|
|
481
|
+
disabledProviders: new Set(),
|
|
482
|
+
disabledKeys: new Map(),
|
|
483
|
+
disabledModels: new Map()
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
this.routingInstructionState.set(key, initial);
|
|
487
|
+
return initial;
|
|
488
|
+
}
|
|
489
|
+
buildMetadataInstructions(metadata) {
|
|
490
|
+
const instructions = [];
|
|
491
|
+
if (Array.isArray(metadata.disabledProviderKeyAliases)) {
|
|
492
|
+
for (const entry of metadata.disabledProviderKeyAliases) {
|
|
493
|
+
const parsed = this.parseMetadataDisableDescriptor(entry);
|
|
494
|
+
if (parsed) {
|
|
495
|
+
instructions.push({ type: 'disable', ...parsed });
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
return instructions;
|
|
500
|
+
}
|
|
501
|
+
parseMetadataDisableDescriptor(entry) {
|
|
502
|
+
if (typeof entry !== 'string') {
|
|
503
|
+
return null;
|
|
504
|
+
}
|
|
505
|
+
const trimmed = entry.trim();
|
|
506
|
+
if (!trimmed) {
|
|
507
|
+
return null;
|
|
508
|
+
}
|
|
509
|
+
const parts = trimmed.split('.');
|
|
510
|
+
if (parts.length < 2) {
|
|
511
|
+
return null;
|
|
512
|
+
}
|
|
513
|
+
const provider = parts[0];
|
|
514
|
+
const alias = parts[1];
|
|
515
|
+
if (!provider || !alias) {
|
|
516
|
+
return null;
|
|
517
|
+
}
|
|
518
|
+
if (/^\d+$/.test(alias)) {
|
|
519
|
+
return { provider, keyIndex: Number.parseInt(alias, 10) };
|
|
520
|
+
}
|
|
521
|
+
return { provider, keyAlias: alias };
|
|
522
|
+
}
|
|
523
|
+
resolveRoutingMode(instructions, state) {
|
|
524
|
+
const hasForce = instructions.some((inst) => inst.type === 'force');
|
|
525
|
+
const hasAllow = instructions.some((inst) => inst.type === 'allow');
|
|
526
|
+
const hasClear = instructions.some((inst) => inst.type === 'clear');
|
|
527
|
+
if (hasClear) {
|
|
528
|
+
return 'none';
|
|
529
|
+
}
|
|
530
|
+
if (hasAllow || state.allowedProviders.size > 0) {
|
|
531
|
+
return 'sticky';
|
|
532
|
+
}
|
|
533
|
+
if (hasForce || state.forcedTarget) {
|
|
534
|
+
return 'force';
|
|
535
|
+
}
|
|
536
|
+
if (state.stickyTarget) {
|
|
537
|
+
return 'sticky';
|
|
538
|
+
}
|
|
539
|
+
return 'none';
|
|
540
|
+
}
|
|
541
|
+
resolveInstructionTarget(target) {
|
|
542
|
+
if (!target || !target.provider) {
|
|
543
|
+
return null;
|
|
544
|
+
}
|
|
545
|
+
const providerId = target.provider;
|
|
546
|
+
const providerKeys = this.providerRegistry.listProviderKeys(providerId);
|
|
547
|
+
if (providerKeys.length === 0) {
|
|
548
|
+
return null;
|
|
549
|
+
}
|
|
550
|
+
const alias = typeof target.keyAlias === 'string' ? target.keyAlias.trim() : '';
|
|
551
|
+
const aliasExplicit = alias.length > 0 && target.pathLength === 3;
|
|
552
|
+
if (aliasExplicit) {
|
|
553
|
+
const runtimeKey = this.providerRegistry.resolveRuntimeKeyByAlias(providerId, alias);
|
|
554
|
+
if (runtimeKey) {
|
|
555
|
+
return { mode: 'exact', keys: [runtimeKey] };
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
if (typeof target.keyIndex === 'number' && target.keyIndex > 0) {
|
|
559
|
+
const runtimeKey = this.providerRegistry.resolveRuntimeKeyByIndex(providerId, target.keyIndex);
|
|
560
|
+
if (runtimeKey) {
|
|
561
|
+
return { mode: 'exact', keys: [runtimeKey] };
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
if (target.model && target.model.trim()) {
|
|
565
|
+
const normalizedModel = target.model.trim();
|
|
566
|
+
const matchingKeys = providerKeys.filter((key) => {
|
|
567
|
+
const modelId = this.getProviderModelId(key);
|
|
568
|
+
return modelId === normalizedModel;
|
|
569
|
+
});
|
|
570
|
+
if (matchingKeys.length > 0) {
|
|
571
|
+
return { mode: 'filter', keys: matchingKeys };
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
if (alias && !aliasExplicit) {
|
|
575
|
+
const legacyKey = this.providerRegistry.resolveRuntimeKeyByAlias(providerId, alias);
|
|
576
|
+
if (legacyKey) {
|
|
577
|
+
return { mode: 'exact', keys: [legacyKey] };
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
return { mode: 'filter', keys: providerKeys };
|
|
581
|
+
}
|
|
582
|
+
filterCandidatesByRoutingState(routes, state) {
|
|
583
|
+
// console.log('[filter] routes:', routes, 'state:', {
|
|
584
|
+
// allowed: Array.from(state.allowedProviders),
|
|
585
|
+
// disabled: Array.from(state.disabledProviders)
|
|
586
|
+
// });
|
|
587
|
+
if (state.allowedProviders.size === 0 &&
|
|
588
|
+
state.disabledProviders.size === 0 &&
|
|
589
|
+
state.disabledKeys.size === 0 &&
|
|
590
|
+
state.disabledModels.size === 0) {
|
|
591
|
+
return routes;
|
|
592
|
+
}
|
|
593
|
+
return routes.filter(routeName => {
|
|
594
|
+
const pools = this.routing[routeName];
|
|
595
|
+
if (!pools)
|
|
596
|
+
return false;
|
|
597
|
+
for (const pool of pools) {
|
|
598
|
+
if (!Array.isArray(pool.targets) || pool.targets.length === 0) {
|
|
599
|
+
continue;
|
|
600
|
+
}
|
|
601
|
+
for (const providerKey of pool.targets) {
|
|
602
|
+
const providerId = this.extractProviderId(providerKey);
|
|
603
|
+
// console.log('[filter] checking', providerKey, 'id=', providerId);
|
|
604
|
+
if (!providerId)
|
|
605
|
+
continue;
|
|
606
|
+
if (state.allowedProviders.size > 0 && !state.allowedProviders.has(providerId)) {
|
|
607
|
+
// console.log('[filter] dropped by allowed list');
|
|
608
|
+
continue;
|
|
609
|
+
}
|
|
610
|
+
if (state.disabledProviders.has(providerId)) {
|
|
611
|
+
continue;
|
|
612
|
+
}
|
|
613
|
+
const disabledKeys = state.disabledKeys.get(providerId);
|
|
614
|
+
if (disabledKeys && disabledKeys.size > 0) {
|
|
615
|
+
const keyAlias = this.extractKeyAlias(providerKey);
|
|
616
|
+
const keyIndex = this.extractKeyIndex(providerKey);
|
|
617
|
+
if (keyAlias && disabledKeys.has(keyAlias)) {
|
|
618
|
+
continue;
|
|
619
|
+
}
|
|
620
|
+
if (keyIndex !== undefined && disabledKeys.has(keyIndex + 1)) {
|
|
621
|
+
continue;
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
const disabledModels = state.disabledModels.get(providerId);
|
|
625
|
+
if (disabledModels && disabledModels.size > 0) {
|
|
626
|
+
const modelId = this.getProviderModelId(providerKey);
|
|
627
|
+
if (modelId && disabledModels.has(modelId)) {
|
|
628
|
+
continue;
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
return true;
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
return false;
|
|
635
|
+
});
|
|
636
|
+
}
|
|
637
|
+
selectFromCandidates(routes, metadata, classification, features, state, requiredProviderKeys, allowAliasRotation) {
|
|
638
|
+
const allowedProviders = new Set(state.allowedProviders);
|
|
639
|
+
const disabledProviders = new Set(state.disabledProviders);
|
|
640
|
+
const disabledKeysMap = new Map(Array.from(state.disabledKeys.entries()).map(([provider, keys]) => [
|
|
641
|
+
provider,
|
|
642
|
+
new Set(Array.from(keys).map(k => typeof k === 'string' ? k : k + 1))
|
|
643
|
+
]));
|
|
644
|
+
const disabledModels = new Map(Array.from(state.disabledModels.entries()).map(([provider, models]) => [provider, new Set(models)]));
|
|
645
|
+
const stickyKey = allowAliasRotation ? undefined : this.resolveStickyKey(metadata);
|
|
646
|
+
const attempted = [];
|
|
647
|
+
const visitedRoutes = new Set();
|
|
648
|
+
const routeQueue = this.initializeRouteQueue(routes);
|
|
649
|
+
const estimatedTokens = typeof features.estimatedTokens === 'number' && Number.isFinite(features.estimatedTokens)
|
|
650
|
+
? Math.max(0, features.estimatedTokens)
|
|
651
|
+
: 0;
|
|
652
|
+
while (routeQueue.length) {
|
|
653
|
+
const routeName = routeQueue.shift();
|
|
654
|
+
if (visitedRoutes.has(routeName)) {
|
|
655
|
+
continue;
|
|
656
|
+
}
|
|
657
|
+
const routePools = this.routing[routeName];
|
|
658
|
+
if (!this.routeHasTargets(routePools)) {
|
|
659
|
+
visitedRoutes.add(routeName);
|
|
660
|
+
attempted.push(`${routeName}:empty`);
|
|
661
|
+
continue;
|
|
662
|
+
}
|
|
663
|
+
visitedRoutes.add(routeName);
|
|
664
|
+
const orderedPools = this.sortRoutePools(routePools);
|
|
665
|
+
for (const poolTier of orderedPools) {
|
|
666
|
+
const { providerKey, poolTargets, tierId, failureHint } = this.trySelectFromTier(routeName, poolTier, stickyKey, estimatedTokens, features, disabledProviders, disabledKeysMap, allowedProviders, disabledModels, requiredProviderKeys, allowAliasRotation);
|
|
667
|
+
if (providerKey) {
|
|
668
|
+
return { providerKey, routeUsed: routeName, pool: poolTargets, poolId: tierId };
|
|
669
|
+
}
|
|
670
|
+
if (failureHint) {
|
|
671
|
+
attempted.push(failureHint);
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
const requestedRoute = this.normalizeRouteAlias(classification.routeName || DEFAULT_ROUTE);
|
|
676
|
+
throw new VirtualRouterError(`All providers unavailable for route ${requestedRoute}`, VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, { routeName: requestedRoute, attempted });
|
|
677
|
+
}
|
|
678
|
+
extractProviderId(providerKey) {
|
|
679
|
+
const firstDot = providerKey.indexOf('.');
|
|
680
|
+
if (firstDot <= 0)
|
|
681
|
+
return null;
|
|
682
|
+
return providerKey.substring(0, firstDot);
|
|
683
|
+
}
|
|
684
|
+
/**
|
|
685
|
+
* 在已有候选路由集合上,筛选出真正挂载了 sticky 池内 providerKey 的路由,
|
|
686
|
+
* 并按 ROUTE_PRIORITY 进行排序;同时显式排除 tools 路由,保证一旦进入
|
|
687
|
+
* sticky 模式,就不会再命中独立的 tools 池(例如 glm/qwen 工具模型)。
|
|
688
|
+
* 若候选集合中完全没有挂载 sticky key 的路由,则尝试在 default 路由上兜底。
|
|
689
|
+
*/
|
|
690
|
+
buildStickyRouteCandidatesFromFiltered(filteredCandidates, stickyKeySet) {
|
|
691
|
+
const routesWithSticky = [];
|
|
692
|
+
const candidateSet = new Set(filteredCandidates.filter((name) => name && name !== 'tools'));
|
|
693
|
+
for (const routeName of candidateSet) {
|
|
694
|
+
const pools = this.routing[routeName];
|
|
695
|
+
if (!this.routeHasTargets(pools)) {
|
|
696
|
+
continue;
|
|
697
|
+
}
|
|
698
|
+
const targets = this.flattenPoolTargets(pools);
|
|
699
|
+
if (!targets.some((key) => stickyKeySet.has(key))) {
|
|
700
|
+
continue;
|
|
701
|
+
}
|
|
702
|
+
routesWithSticky.push(routeName);
|
|
703
|
+
}
|
|
704
|
+
// 若当前候选路由中没有任何挂载 sticky key 的路由,尝试直接在 default 路由上兜底;
|
|
705
|
+
// 若 default 也不包含 sticky key,则视为 sticky 配置失效,由调用方回落到非 sticky 逻辑。
|
|
706
|
+
if (routesWithSticky.length === 0) {
|
|
707
|
+
const defaultPools = this.routing[DEFAULT_ROUTE];
|
|
708
|
+
if (this.routeHasTargets(defaultPools)) {
|
|
709
|
+
const targets = this.flattenPoolTargets(defaultPools);
|
|
710
|
+
if (targets.some((key) => stickyKeySet.has(key))) {
|
|
711
|
+
return [DEFAULT_ROUTE];
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
return [];
|
|
715
|
+
}
|
|
716
|
+
const ordered = this.sortByPriority(routesWithSticky);
|
|
717
|
+
const result = [];
|
|
718
|
+
let hasDefault = false;
|
|
719
|
+
for (const routeName of ordered) {
|
|
720
|
+
if (routeName === DEFAULT_ROUTE) {
|
|
721
|
+
hasDefault = true;
|
|
722
|
+
continue;
|
|
723
|
+
}
|
|
724
|
+
if (!result.includes(routeName)) {
|
|
725
|
+
result.push(routeName);
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
// default 路由若包含 sticky key,则始终放在候选列表最后,用于 sticky 模式兜底。
|
|
729
|
+
if (hasDefault && !result.includes(DEFAULT_ROUTE)) {
|
|
730
|
+
result.push(DEFAULT_ROUTE);
|
|
731
|
+
}
|
|
732
|
+
return result;
|
|
733
|
+
}
|
|
734
|
+
/**
|
|
735
|
+
* 在 sticky 模式下,仅在 sticky 池内选择 Provider:
|
|
736
|
+
* - stickyKeySet 表示已经解析并通过健康检查的 providerKey 集合;
|
|
737
|
+
* - 不再依赖 routing[*].targets 中是否挂载这些 key,避免「未初始化路由池」导致 sticky 池为空;
|
|
738
|
+
* - 仍然尊重 allowed/disabledProviders、disabledKeys、disabledModels 以及上下文长度。
|
|
739
|
+
*/
|
|
740
|
+
selectFromStickyPool(stickyKeySet, metadata, features, state, allowAliasRotation) {
|
|
741
|
+
if (!stickyKeySet || stickyKeySet.size === 0) {
|
|
742
|
+
return null;
|
|
743
|
+
}
|
|
744
|
+
const allowedProviders = new Set(state.allowedProviders);
|
|
745
|
+
const disabledProviders = new Set(state.disabledProviders);
|
|
746
|
+
const disabledKeysMap = new Map(Array.from(state.disabledKeys.entries()).map(([provider, keys]) => [
|
|
747
|
+
provider,
|
|
748
|
+
new Set(Array.from(keys).map((k) => (typeof k === 'string' ? k : k + 1)))
|
|
749
|
+
]));
|
|
750
|
+
const disabledModels = new Map(Array.from(state.disabledModels.entries()).map(([provider, models]) => [provider, new Set(models)]));
|
|
751
|
+
// 初始候选集合:sticky 池中的所有 key
|
|
752
|
+
let candidates = Array.from(stickyKeySet);
|
|
753
|
+
// 应用 provider 白名单 / 黑名单
|
|
754
|
+
if (allowedProviders.size > 0) {
|
|
755
|
+
candidates = candidates.filter((key) => {
|
|
756
|
+
const providerId = this.extractProviderId(key);
|
|
757
|
+
return providerId && allowedProviders.has(providerId);
|
|
758
|
+
});
|
|
759
|
+
}
|
|
760
|
+
if (disabledProviders.size > 0) {
|
|
761
|
+
candidates = candidates.filter((key) => {
|
|
762
|
+
const providerId = this.extractProviderId(key);
|
|
763
|
+
return providerId && !disabledProviders.has(providerId);
|
|
764
|
+
});
|
|
765
|
+
}
|
|
766
|
+
// 应用 key / model 级别黑名单
|
|
767
|
+
if (disabledKeysMap.size > 0 || disabledModels.size > 0) {
|
|
768
|
+
candidates = candidates.filter((key) => {
|
|
769
|
+
const providerId = this.extractProviderId(key);
|
|
770
|
+
if (!providerId) {
|
|
771
|
+
return true;
|
|
772
|
+
}
|
|
773
|
+
const disabledKeys = disabledKeysMap.get(providerId);
|
|
774
|
+
if (disabledKeys && disabledKeys.size > 0) {
|
|
775
|
+
const keyAlias = this.extractKeyAlias(key);
|
|
776
|
+
const keyIndex = this.extractKeyIndex(key);
|
|
777
|
+
if (keyAlias && disabledKeys.has(keyAlias)) {
|
|
778
|
+
return false;
|
|
779
|
+
}
|
|
780
|
+
if (keyIndex !== undefined && disabledKeys.has(keyIndex + 1)) {
|
|
781
|
+
return false;
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
const disabledModelSet = disabledModels.get(providerId);
|
|
785
|
+
if (disabledModelSet && disabledModelSet.size > 0) {
|
|
786
|
+
const modelId = this.getProviderModelId(key);
|
|
787
|
+
if (modelId && disabledModelSet.has(modelId)) {
|
|
788
|
+
return false;
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
return true;
|
|
792
|
+
});
|
|
793
|
+
}
|
|
794
|
+
if (!candidates.length) {
|
|
795
|
+
return null;
|
|
796
|
+
}
|
|
797
|
+
const stickyKey = allowAliasRotation ? undefined : this.resolveStickyKey(metadata);
|
|
798
|
+
const estimatedTokens = typeof features.estimatedTokens === 'number' && Number.isFinite(features.estimatedTokens)
|
|
799
|
+
? Math.max(0, features.estimatedTokens)
|
|
800
|
+
: 0;
|
|
801
|
+
const tier = {
|
|
802
|
+
id: 'sticky-primary',
|
|
803
|
+
targets: candidates,
|
|
804
|
+
priority: 0
|
|
805
|
+
};
|
|
806
|
+
const { providerKey, poolTargets, tierId } = this.trySelectFromTier('sticky', tier, stickyKey, estimatedTokens, features, disabledProviders, disabledKeysMap, allowedProviders, disabledModels, stickyKeySet, allowAliasRotation);
|
|
807
|
+
if (!providerKey) {
|
|
808
|
+
return null;
|
|
809
|
+
}
|
|
810
|
+
return {
|
|
811
|
+
providerKey,
|
|
812
|
+
routeUsed: 'sticky',
|
|
813
|
+
pool: poolTargets,
|
|
814
|
+
poolId: tierId
|
|
815
|
+
};
|
|
816
|
+
}
|
|
817
|
+
extractKeyAlias(providerKey) {
|
|
818
|
+
const parts = providerKey.split('.');
|
|
819
|
+
if (parts.length === 3) {
|
|
820
|
+
return this.normalizeAliasDescriptor(parts[1]);
|
|
821
|
+
}
|
|
822
|
+
return null;
|
|
823
|
+
}
|
|
824
|
+
normalizeAliasDescriptor(alias) {
|
|
825
|
+
if (/^\d+-/.test(alias)) {
|
|
826
|
+
return alias.replace(/^\d+-/, '');
|
|
827
|
+
}
|
|
828
|
+
return alias;
|
|
829
|
+
}
|
|
830
|
+
extractKeyIndex(providerKey) {
|
|
831
|
+
const parts = providerKey.split('.');
|
|
832
|
+
if (parts.length === 2) {
|
|
833
|
+
const index = parseInt(parts[1], 10);
|
|
834
|
+
if (!isNaN(index) && index > 0) {
|
|
835
|
+
return index;
|
|
836
|
+
}
|
|
837
|
+
}
|
|
838
|
+
return undefined;
|
|
839
|
+
}
|
|
840
|
+
getProviderModelId(providerKey) {
|
|
841
|
+
const profile = this.providerRegistry.get(providerKey);
|
|
842
|
+
if (profile.modelId) {
|
|
843
|
+
return profile.modelId;
|
|
844
|
+
}
|
|
845
|
+
const parts = providerKey.split('.');
|
|
846
|
+
if (parts.length === 2) {
|
|
847
|
+
return parts[1] || null;
|
|
848
|
+
}
|
|
849
|
+
if (parts.length === 3) {
|
|
850
|
+
return parts[2] || null;
|
|
851
|
+
}
|
|
852
|
+
return null;
|
|
853
|
+
}
|
|
267
854
|
mapProviderError(event) {
|
|
268
855
|
// NOTE: mapProviderError is the only place where VirtualRouter translates providerErrorCenter
|
|
269
856
|
// events into health signals. Classification is intentionally coarse; upstream providers
|
|
@@ -346,12 +933,32 @@ export class VirtualRouterEngine {
|
|
|
346
933
|
return 'client_error';
|
|
347
934
|
return 'unknown';
|
|
348
935
|
}
|
|
349
|
-
buildRouteCandidates(requestedRoute, classificationCandidates) {
|
|
350
|
-
const
|
|
351
|
-
const
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
936
|
+
buildRouteCandidates(requestedRoute, classificationCandidates, features) {
|
|
937
|
+
const forceVision = this.routeHasForceFlag('vision');
|
|
938
|
+
const normalized = this.normalizeRouteAlias(requestedRoute || DEFAULT_ROUTE);
|
|
939
|
+
const baseList = [];
|
|
940
|
+
if (classificationCandidates && classificationCandidates.length) {
|
|
941
|
+
for (const candidate of classificationCandidates) {
|
|
942
|
+
baseList.push(this.normalizeRouteAlias(candidate));
|
|
943
|
+
}
|
|
944
|
+
}
|
|
945
|
+
else if (normalized) {
|
|
946
|
+
baseList.push(normalized);
|
|
947
|
+
}
|
|
948
|
+
// 当检测到当前请求包含图片时,确保 default/thinking 也参与候选集,
|
|
949
|
+
// 以便优先尝试内建多模态模型(Responses/Gemini),再回落到 vision 路由池。
|
|
950
|
+
if (features.hasImageAttachment && !forceVision) {
|
|
951
|
+
const visionAwareRoutes = [DEFAULT_ROUTE, 'thinking'];
|
|
952
|
+
for (const routeName of visionAwareRoutes) {
|
|
953
|
+
if (this.routeHasTargets(this.routing[routeName]) && !baseList.includes(routeName)) {
|
|
954
|
+
baseList.push(routeName);
|
|
955
|
+
}
|
|
956
|
+
}
|
|
957
|
+
}
|
|
958
|
+
let ordered = this.sortByPriority(baseList);
|
|
959
|
+
if (features.hasImageAttachment && !forceVision) {
|
|
960
|
+
ordered = this.reorderForInlineVision(ordered);
|
|
961
|
+
}
|
|
355
962
|
const deduped = [];
|
|
356
963
|
for (const routeName of ordered) {
|
|
357
964
|
if (routeName && !deduped.includes(routeName)) {
|
|
@@ -367,6 +974,53 @@ export class VirtualRouterEngine {
|
|
|
367
974
|
}
|
|
368
975
|
return filtered.length ? filtered : [DEFAULT_ROUTE];
|
|
369
976
|
}
|
|
977
|
+
reorderForInlineVision(routeNames) {
|
|
978
|
+
const unique = Array.from(new Set(routeNames.filter(Boolean)));
|
|
979
|
+
if (!unique.length) {
|
|
980
|
+
return unique;
|
|
981
|
+
}
|
|
982
|
+
// 仅当 default/thinking 中存在 Responses/Gemini 提供方时,才将其提前作为「一次完成」优先级。
|
|
983
|
+
const inlinePreferred = [];
|
|
984
|
+
const inlineRoutes = [DEFAULT_ROUTE, 'thinking'];
|
|
985
|
+
for (const routeName of inlineRoutes) {
|
|
986
|
+
if (this.routeSupportsInlineVision(routeName) && !inlinePreferred.includes(routeName)) {
|
|
987
|
+
inlinePreferred.push(routeName);
|
|
988
|
+
}
|
|
989
|
+
}
|
|
990
|
+
if (!inlinePreferred.length) {
|
|
991
|
+
return unique;
|
|
992
|
+
}
|
|
993
|
+
const remaining = [];
|
|
994
|
+
for (const routeName of unique) {
|
|
995
|
+
if (!inlinePreferred.includes(routeName)) {
|
|
996
|
+
remaining.push(routeName);
|
|
997
|
+
}
|
|
998
|
+
}
|
|
999
|
+
return [...inlinePreferred, ...remaining];
|
|
1000
|
+
}
|
|
1001
|
+
routeSupportsInlineVision(routeName) {
|
|
1002
|
+
const pools = this.routing[routeName];
|
|
1003
|
+
if (!Array.isArray(pools)) {
|
|
1004
|
+
return false;
|
|
1005
|
+
}
|
|
1006
|
+
for (const pool of pools) {
|
|
1007
|
+
if (!Array.isArray(pool.targets)) {
|
|
1008
|
+
continue;
|
|
1009
|
+
}
|
|
1010
|
+
for (const providerKey of pool.targets) {
|
|
1011
|
+
try {
|
|
1012
|
+
const profile = this.providerRegistry.get(providerKey);
|
|
1013
|
+
if (profile.providerType === 'responses' || profile.providerType === 'gemini') {
|
|
1014
|
+
return true;
|
|
1015
|
+
}
|
|
1016
|
+
}
|
|
1017
|
+
catch {
|
|
1018
|
+
// ignore unknown provider keys during capability probing
|
|
1019
|
+
}
|
|
1020
|
+
}
|
|
1021
|
+
}
|
|
1022
|
+
return false;
|
|
1023
|
+
}
|
|
370
1024
|
sortByPriority(routeNames) {
|
|
371
1025
|
return [...routeNames].sort((a, b) => this.routeWeight(a) - this.routeWeight(b));
|
|
372
1026
|
}
|
|
@@ -374,6 +1028,13 @@ export class VirtualRouterEngine {
|
|
|
374
1028
|
const idx = ROUTE_PRIORITY.indexOf(routeName);
|
|
375
1029
|
return idx >= 0 ? idx : ROUTE_PRIORITY.length;
|
|
376
1030
|
}
|
|
1031
|
+
routeHasForceFlag(routeName) {
|
|
1032
|
+
const pools = this.routing[routeName];
|
|
1033
|
+
if (!Array.isArray(pools)) {
|
|
1034
|
+
return false;
|
|
1035
|
+
}
|
|
1036
|
+
return pools.some((pool) => pool.force);
|
|
1037
|
+
}
|
|
377
1038
|
routeHasTargets(pools) {
|
|
378
1039
|
if (!Array.isArray(pools)) {
|
|
379
1040
|
return false;
|
|
@@ -420,24 +1081,37 @@ export class VirtualRouterEngine {
|
|
|
420
1081
|
}
|
|
421
1082
|
return flattened;
|
|
422
1083
|
}
|
|
423
|
-
buildHitReason(routeUsed, providerKey, classification, features) {
|
|
1084
|
+
buildHitReason(routeUsed, providerKey, classification, features, mode) {
|
|
424
1085
|
const reasoning = classification.reasoning || '';
|
|
425
|
-
|
|
1086
|
+
let primary = reasoning.split('|')[0] || '';
|
|
426
1087
|
const commandDetail = features.lastAssistantToolLabel;
|
|
1088
|
+
const isStickyMode = mode === 'sticky';
|
|
1089
|
+
if (isStickyMode &&
|
|
1090
|
+
(routeUsed === 'tools' || routeUsed === 'thinking' || routeUsed === 'coding')) {
|
|
1091
|
+
// sticky 模式下不再把 tools/thinking/coding 作为主标签,统一折叠为 sticky,
|
|
1092
|
+
// 避免日志中出现 "tools:last-tool-*" 这类误导性前缀。
|
|
1093
|
+
primary = '';
|
|
1094
|
+
}
|
|
427
1095
|
const base = (() => {
|
|
428
1096
|
if (routeUsed === 'tools') {
|
|
429
|
-
|
|
1097
|
+
const label = isStickyMode ? 'sticky' : 'tools';
|
|
1098
|
+
return this.decorateWithDetail(primary || label, primary, commandDetail);
|
|
430
1099
|
}
|
|
431
1100
|
if (routeUsed === 'thinking') {
|
|
432
|
-
|
|
1101
|
+
const label = isStickyMode ? 'sticky' : 'thinking';
|
|
1102
|
+
return this.decorateWithDetail(primary || label, primary, commandDetail);
|
|
433
1103
|
}
|
|
434
1104
|
if (routeUsed === 'coding') {
|
|
435
|
-
|
|
1105
|
+
const label = isStickyMode ? 'sticky' : 'coding';
|
|
1106
|
+
return this.decorateWithDetail(primary || label, primary, commandDetail);
|
|
436
1107
|
}
|
|
437
|
-
if (routeUsed === '
|
|
438
|
-
return this.decorateWithDetail(primary ||
|
|
1108
|
+
if (routeUsed === 'web_search' || routeUsed === 'search') {
|
|
1109
|
+
return this.decorateWithDetail(primary || routeUsed, primary, commandDetail);
|
|
439
1110
|
}
|
|
440
1111
|
if (routeUsed === DEFAULT_ROUTE && classification.fallback) {
|
|
1112
|
+
if (isStickyMode) {
|
|
1113
|
+
return primary || 'sticky:default';
|
|
1114
|
+
}
|
|
441
1115
|
return primary || 'fallback:default';
|
|
442
1116
|
}
|
|
443
1117
|
if (primary) {
|
|
@@ -451,6 +1125,28 @@ export class VirtualRouterEngine {
|
|
|
451
1125
|
}
|
|
452
1126
|
return base;
|
|
453
1127
|
}
|
|
1128
|
+
isRoutingStateEmpty(state) {
|
|
1129
|
+
if (!state) {
|
|
1130
|
+
return true;
|
|
1131
|
+
}
|
|
1132
|
+
const noForced = !state.forcedTarget;
|
|
1133
|
+
const noSticky = !state.stickyTarget;
|
|
1134
|
+
const noAllowed = state.allowedProviders.size === 0;
|
|
1135
|
+
const noDisabledProviders = state.disabledProviders.size === 0;
|
|
1136
|
+
const noDisabledKeys = state.disabledKeys.size === 0;
|
|
1137
|
+
const noDisabledModels = state.disabledModels.size === 0;
|
|
1138
|
+
return noForced && noSticky && noAllowed && noDisabledProviders && noDisabledKeys && noDisabledModels;
|
|
1139
|
+
}
|
|
1140
|
+
persistRoutingInstructionState(key, state) {
|
|
1141
|
+
if (!key || (!key.startsWith('session:') && !key.startsWith('conversation:'))) {
|
|
1142
|
+
return;
|
|
1143
|
+
}
|
|
1144
|
+
if (this.isRoutingStateEmpty(state)) {
|
|
1145
|
+
saveRoutingInstructionStateAsync(key, null);
|
|
1146
|
+
return;
|
|
1147
|
+
}
|
|
1148
|
+
saveRoutingInstructionStateAsync(key, state);
|
|
1149
|
+
}
|
|
454
1150
|
decorateWithDetail(baseLabel, primaryReason, detail) {
|
|
455
1151
|
const normalizedDetail = detail && detail.trim();
|
|
456
1152
|
if (!normalizedDetail) {
|
|
@@ -461,7 +1157,7 @@ export class VirtualRouterEngine {
|
|
|
461
1157
|
}
|
|
462
1158
|
return `${baseLabel}(${normalizedDetail})`;
|
|
463
1159
|
}
|
|
464
|
-
formatVirtualRouterHit(routeName, poolId, providerKey, modelId, hitReason) {
|
|
1160
|
+
formatVirtualRouterHit(routeName, poolId, providerKey, modelId, hitReason, stickyScope) {
|
|
465
1161
|
try {
|
|
466
1162
|
// 生成本地时间戳
|
|
467
1163
|
const now = new Date();
|
|
@@ -472,20 +1168,23 @@ export class VirtualRouterEngine {
|
|
|
472
1168
|
const prefixColor = '\x1b[38;5;208m';
|
|
473
1169
|
const reset = '\x1b[0m';
|
|
474
1170
|
const timeColor = '\x1b[90m'; // 灰色
|
|
1171
|
+
const stickyColor = '\x1b[33m'; // 黄色
|
|
475
1172
|
const routeColor = this.resolveRouteColor(routeName);
|
|
476
1173
|
const prefix = `${prefixColor}[virtual-router-hit]${reset}`;
|
|
477
1174
|
const timeLabel = `${timeColor}${timestamp}${reset}`;
|
|
478
1175
|
const { providerLabel, resolvedModel } = this.describeTargetProvider(providerKey, modelId);
|
|
479
1176
|
const routeLabel = poolId ? `${routeName}/${poolId}` : routeName;
|
|
480
1177
|
const targetLabel = `${routeLabel} -> ${providerLabel}${resolvedModel ? '.' + resolvedModel : ''}`;
|
|
1178
|
+
const stickyLabel = stickyScope ? ` ${stickyColor}[sticky:${stickyScope}]${reset}` : '';
|
|
481
1179
|
const reasonLabel = hitReason ? ` reason=${hitReason}` : '';
|
|
482
|
-
return `${prefix} ${timeLabel} ${routeColor}${targetLabel}${reasonLabel}${reset}`;
|
|
1180
|
+
return `${prefix} ${timeLabel} ${routeColor}${targetLabel}${stickyLabel}${reasonLabel}${reset}`;
|
|
483
1181
|
}
|
|
484
1182
|
catch {
|
|
485
1183
|
const now = new Date();
|
|
486
1184
|
const timestamp = now.toLocaleTimeString('zh-CN', { hour12: false });
|
|
487
1185
|
const routeLabel = poolId ? `${routeName}/${poolId}` : routeName;
|
|
488
|
-
|
|
1186
|
+
const stickyLabel = stickyScope ? ` [sticky:${stickyScope}]` : '';
|
|
1187
|
+
return `[virtual-router-hit] ${timestamp} ${routeLabel} -> ${providerKey}${modelId ? '.' + modelId : ''}${stickyLabel}${hitReason ? ` reason=${hitReason}` : ''}`;
|
|
489
1188
|
}
|
|
490
1189
|
}
|
|
491
1190
|
resolveRouteColor(routeName) {
|
|
@@ -495,7 +1194,8 @@ export class VirtualRouterEngine {
|
|
|
495
1194
|
thinking: '\x1b[34m',
|
|
496
1195
|
coding: '\x1b[35m',
|
|
497
1196
|
longcontext: '\x1b[38;5;141m',
|
|
498
|
-
|
|
1197
|
+
web_search: '\x1b[32m',
|
|
1198
|
+
search: '\x1b[38;5;34m',
|
|
499
1199
|
vision: '\x1b[38;5;207m',
|
|
500
1200
|
background: '\x1b[90m'
|
|
501
1201
|
};
|