@jsonstudio/llms 0.6.467 → 0.6.567
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/conversion/compat/actions/claude-thinking-tools.d.ts +15 -0
- package/dist/conversion/compat/actions/claude-thinking-tools.js +72 -0
- package/dist/conversion/compat/profiles/chat-gemini.json +1 -1
- package/dist/conversion/compat/profiles/responses-output2choices-test.json +12 -0
- package/dist/conversion/hub/pipeline/compat/compat-pipeline-executor.js +6 -0
- package/dist/conversion/hub/pipeline/compat/compat-types.d.ts +2 -0
- package/dist/conversion/hub/pipeline/hub-pipeline.js +15 -0
- package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage3_context_capture/index.js +15 -0
- package/dist/conversion/hub/process/chat-process.js +44 -17
- package/dist/conversion/hub/semantic-mappers/anthropic-mapper.js +8 -0
- package/dist/conversion/hub/semantic-mappers/gemini-mapper.js +13 -8
- package/dist/conversion/hub/tool-session-compat.d.ts +26 -0
- package/dist/conversion/hub/tool-session-compat.js +299 -0
- package/dist/conversion/responses/responses-openai-bridge.d.ts +0 -1
- package/dist/conversion/responses/responses-openai-bridge.js +0 -71
- package/dist/conversion/shared/gemini-tool-utils.js +8 -0
- package/dist/conversion/shared/responses-output-builder.js +6 -68
- package/dist/conversion/shared/tool-governor.js +75 -4
- package/dist/conversion/shared/tool-mapping.js +14 -8
- package/dist/filters/special/request-toolcalls-stringify.js +5 -55
- package/dist/filters/special/request-tools-normalize.js +0 -19
- package/dist/guidance/index.js +25 -9
- package/dist/router/virtual-router/engine-health.d.ts +11 -0
- package/dist/router/virtual-router/engine-health.js +210 -0
- package/dist/router/virtual-router/engine-logging.d.ts +19 -0
- package/dist/router/virtual-router/engine-logging.js +165 -0
- package/dist/router/virtual-router/engine-selection.d.ts +32 -0
- package/dist/router/virtual-router/engine-selection.js +649 -0
- package/dist/router/virtual-router/engine.d.ts +4 -13
- package/dist/router/virtual-router/engine.js +64 -517
- package/dist/router/virtual-router/health-manager.d.ts +23 -0
- package/dist/router/virtual-router/health-manager.js +14 -0
- package/dist/router/virtual-router/message-utils.js +22 -0
- package/dist/router/virtual-router/routing-instructions.d.ts +6 -1
- package/dist/router/virtual-router/routing-instructions.js +129 -3
- package/dist/router/virtual-router/types.d.ts +6 -0
- package/dist/servertool/handlers/gemini-empty-reply-continue.d.ts +1 -0
- package/dist/servertool/handlers/gemini-empty-reply-continue.js +120 -0
- package/dist/servertool/handlers/stop-message-auto.d.ts +1 -0
- package/dist/servertool/handlers/stop-message-auto.js +147 -0
- package/dist/servertool/handlers/vision.js +105 -7
- package/dist/servertool/server-side-tools.d.ts +2 -0
- package/dist/servertool/server-side-tools.js +2 -0
- package/dist/tools/tool-registry.js +195 -4
- package/package.json +1 -1
|
@@ -4,14 +4,18 @@ import { RouteLoadBalancer } from './load-balancer.js';
|
|
|
4
4
|
import { RoutingClassifier } from './classifier.js';
|
|
5
5
|
import { buildRoutingFeatures } from './features.js';
|
|
6
6
|
import { ContextAdvisor } from './context-advisor.js';
|
|
7
|
-
import {
|
|
7
|
+
import { DEFAULT_ROUTE, ROUTE_PRIORITY, VirtualRouterError, VirtualRouterErrorCode } from './types.js';
|
|
8
8
|
import { getStatsCenter } from '../../telemetry/stats-center.js';
|
|
9
9
|
import { parseRoutingInstructions, applyRoutingInstructions, cleanMessagesFromRoutingInstructions } from './routing-instructions.js';
|
|
10
10
|
import { loadRoutingInstructionStateSync, saveRoutingInstructionStateAsync } from './sticky-session-store.js';
|
|
11
|
+
import { buildHitReason, formatVirtualRouterHit } from './engine-logging.js';
|
|
12
|
+
import { selectProviderImpl } from './engine-selection.js';
|
|
13
|
+
import { applySeriesCooldownImpl, handleProviderFailureImpl, mapProviderErrorImpl } from './engine-health.js';
|
|
11
14
|
export class VirtualRouterEngine {
|
|
12
15
|
routing = {};
|
|
13
16
|
providerRegistry = new ProviderRegistry();
|
|
14
17
|
healthManager = new ProviderHealthManager();
|
|
18
|
+
providerCooldowns = new Map();
|
|
15
19
|
loadBalancer = new RouteLoadBalancer();
|
|
16
20
|
classifier = new RoutingClassifier({});
|
|
17
21
|
contextAdvisor = new ContextAdvisor();
|
|
@@ -76,7 +80,6 @@ export class VirtualRouterEngine {
|
|
|
76
80
|
...(this.webSearchForce ? { forceWebSearch: true } : {}),
|
|
77
81
|
...(forceVision ? { forceVision: true } : {})
|
|
78
82
|
};
|
|
79
|
-
this.healthManager.recordSuccess(selection.providerKey);
|
|
80
83
|
this.incrementRouteStat(selection.routeUsed, selection.providerKey);
|
|
81
84
|
try {
|
|
82
85
|
this.statsCenter.recordVirtualRouterHit({
|
|
@@ -92,10 +95,10 @@ export class VirtualRouterEngine {
|
|
|
92
95
|
catch {
|
|
93
96
|
// stats must never break routing
|
|
94
97
|
}
|
|
95
|
-
const hitReason =
|
|
98
|
+
const hitReason = buildHitReason(selection.routeUsed, selection.providerKey, classification, features, routingMode, { providerRegistry: this.providerRegistry, contextRouting: this.contextRouting });
|
|
96
99
|
const stickyScope = routingMode !== 'none' ? this.resolveSessionScope(metadata) : undefined;
|
|
97
100
|
const routeForLog = routingMode === 'sticky' ? 'sticky' : selection.routeUsed;
|
|
98
|
-
const formatted =
|
|
101
|
+
const formatted = formatVirtualRouterHit(routeForLog, selection.poolId, selection.providerKey, target.modelId || '', hitReason, stickyScope);
|
|
99
102
|
if (formatted) {
|
|
100
103
|
this.debug?.log?.(formatted);
|
|
101
104
|
}
|
|
@@ -126,21 +129,11 @@ export class VirtualRouterEngine {
|
|
|
126
129
|
};
|
|
127
130
|
}
|
|
128
131
|
handleProviderFailure(event) {
|
|
129
|
-
|
|
130
|
-
return;
|
|
131
|
-
}
|
|
132
|
-
if (event.affectsHealth === false) {
|
|
133
|
-
return;
|
|
134
|
-
}
|
|
135
|
-
if (event.fatal) {
|
|
136
|
-
this.healthManager.tripProvider(event.providerKey, event.reason, event.cooldownOverrideMs);
|
|
137
|
-
}
|
|
138
|
-
else {
|
|
139
|
-
this.healthManager.recordFailure(event.providerKey, event.reason);
|
|
140
|
-
}
|
|
132
|
+
handleProviderFailureImpl(event, this.healthManager, this.providerHealthConfig(), (key, ttl) => this.markProviderCooldown(key, ttl));
|
|
141
133
|
}
|
|
142
134
|
handleProviderError(event) {
|
|
143
|
-
|
|
135
|
+
applySeriesCooldownImpl(event, this.providerRegistry, this.healthManager, (key, ttl) => this.markProviderCooldown(key, ttl), this.debug);
|
|
136
|
+
const derived = mapProviderErrorImpl(event, this.providerHealthConfig());
|
|
144
137
|
if (!derived) {
|
|
145
138
|
return;
|
|
146
139
|
}
|
|
@@ -205,200 +198,15 @@ export class VirtualRouterEngine {
|
|
|
205
198
|
}
|
|
206
199
|
selectProvider(requestedRoute, metadata, classification, features, routingState) {
|
|
207
200
|
const activeState = routingState || this.getRoutingInstructionState(this.resolveStickyKey(metadata));
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
:
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
poolId: 'forced'
|
|
218
|
-
};
|
|
219
|
-
}
|
|
220
|
-
// sticky 语义:
|
|
221
|
-
// - 显式绑定到具体 key(alias/index)时,直接使用该 key;
|
|
222
|
-
// - provider / model 级别 sticky 解析为一组 providerKey;
|
|
223
|
-
// 在 sticky 这组 key「可用」之前,不会回落到 default 中的非 sticky provider。
|
|
224
|
-
let stickyResolution = null;
|
|
225
|
-
let stickyKeySet;
|
|
226
|
-
if (!forcedResolution && activeState.stickyTarget) {
|
|
227
|
-
stickyResolution = this.resolveInstructionTarget(activeState.stickyTarget);
|
|
228
|
-
if (stickyResolution && stickyResolution.mode === 'exact') {
|
|
229
|
-
const stickyKey = stickyResolution.keys[0];
|
|
230
|
-
// 已经被健康管理标记为不可用的 key 不能被 sticky 语法“复活”
|
|
231
|
-
if (this.healthManager.isAvailable(stickyKey)) {
|
|
232
|
-
return {
|
|
233
|
-
providerKey: stickyKey,
|
|
234
|
-
routeUsed: requestedRoute,
|
|
235
|
-
pool: [stickyKey],
|
|
236
|
-
poolId: 'sticky'
|
|
237
|
-
};
|
|
238
|
-
}
|
|
239
|
-
}
|
|
240
|
-
if (stickyResolution && stickyResolution.mode === 'filter' && stickyResolution.keys.length > 0) {
|
|
241
|
-
// 仅保留当前仍可用的 key;已被熔断/拉黑的 key 不会被 sticky 语法重新加入池子
|
|
242
|
-
const liveKeys = stickyResolution.keys.filter((key) => this.healthManager.isAvailable(key));
|
|
243
|
-
if (liveKeys.length > 0) {
|
|
244
|
-
stickyKeySet = new Set(liveKeys);
|
|
245
|
-
}
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
const allowAliasRotation = Boolean(activeState.stickyTarget) &&
|
|
249
|
-
!activeState.stickyTarget?.keyAlias &&
|
|
250
|
-
activeState.stickyTarget?.keyIndex === undefined;
|
|
251
|
-
// force(filter) 优先级高于 sticky:显式 force 视为覆盖 sticky 约束。
|
|
252
|
-
if (forcedResolution && forcedResolution.mode === 'filter') {
|
|
253
|
-
const candidates = this.buildRouteCandidates(requestedRoute, classification.candidates, features);
|
|
254
|
-
const filteredCandidates = this.filterCandidatesByRoutingState(candidates, activeState);
|
|
255
|
-
if (filteredCandidates.length === 0) {
|
|
256
|
-
throw new VirtualRouterError('No available providers after applying routing instructions', VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, {
|
|
257
|
-
requestedRoute,
|
|
258
|
-
allowedProviders: Array.from(activeState.allowedProviders),
|
|
259
|
-
disabledProviders: Array.from(activeState.disabledProviders)
|
|
260
|
-
});
|
|
261
|
-
}
|
|
262
|
-
const forcedKeySet = new Set(forcedResolution.keys);
|
|
263
|
-
return this.selectFromCandidates(filteredCandidates, metadata, classification, features, activeState, forcedKeySet, allowAliasRotation);
|
|
264
|
-
}
|
|
265
|
-
if (stickyKeySet && stickyKeySet.size > 0) {
|
|
266
|
-
const stickySelection = this.selectFromStickyPool(stickyKeySet, metadata, features, activeState, allowAliasRotation);
|
|
267
|
-
if (stickySelection) {
|
|
268
|
-
return stickySelection;
|
|
269
|
-
}
|
|
270
|
-
// sticky 池在本次请求中完全不可用(全部被黑名单/健康状态过滤):视为 sticky 池暂时失效,
|
|
271
|
-
// 本次回落到普通路由选择,但保留 stickyTarget,等待后续恢复。
|
|
272
|
-
}
|
|
273
|
-
// 无 sticky,或 sticky 池在本次请求中全部不可用(无可用 key):按原始分类结果执行正常路由选择。
|
|
274
|
-
const candidates = this.buildRouteCandidates(requestedRoute, classification.candidates, features);
|
|
275
|
-
const filteredCandidates = this.filterCandidatesByRoutingState(candidates, activeState);
|
|
276
|
-
if (filteredCandidates.length === 0) {
|
|
277
|
-
throw new VirtualRouterError('No available providers after applying routing instructions', VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, {
|
|
278
|
-
requestedRoute,
|
|
279
|
-
allowedProviders: Array.from(activeState.allowedProviders),
|
|
280
|
-
disabledProviders: Array.from(activeState.disabledProviders)
|
|
281
|
-
});
|
|
282
|
-
}
|
|
283
|
-
return this.selectFromCandidates(filteredCandidates, metadata, classification, features, activeState, undefined, allowAliasRotation);
|
|
284
|
-
}
|
|
285
|
-
trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features, disabledProviders, disabledKeysMap, allowedProviders, disabledModels, requiredProviderKeys, allowAliasRotation) {
|
|
286
|
-
let targets = Array.isArray(tier.targets) ? tier.targets : [];
|
|
287
|
-
if (allowedProviders && allowedProviders.size > 0) {
|
|
288
|
-
targets = targets.filter(key => {
|
|
289
|
-
const providerId = this.extractProviderId(key);
|
|
290
|
-
return providerId && allowedProviders.has(providerId);
|
|
291
|
-
});
|
|
292
|
-
}
|
|
293
|
-
if (disabledProviders && disabledProviders.size > 0) {
|
|
294
|
-
targets = targets.filter((key) => {
|
|
295
|
-
const providerId = this.extractProviderId(key);
|
|
296
|
-
return providerId && !disabledProviders.has(providerId);
|
|
297
|
-
});
|
|
298
|
-
}
|
|
299
|
-
if (disabledKeysMap && disabledKeysMap.size > 0) {
|
|
300
|
-
targets = targets.filter((key) => {
|
|
301
|
-
const providerId = this.extractProviderId(key);
|
|
302
|
-
if (!providerId)
|
|
303
|
-
return true;
|
|
304
|
-
const disabledKeys = disabledKeysMap.get(providerId);
|
|
305
|
-
if (!disabledKeys || disabledKeys.size === 0)
|
|
306
|
-
return true;
|
|
307
|
-
const keyAlias = this.extractKeyAlias(key);
|
|
308
|
-
const keyIndex = this.extractKeyIndex(key);
|
|
309
|
-
if (keyAlias && disabledKeys.has(keyAlias)) {
|
|
310
|
-
return false;
|
|
311
|
-
}
|
|
312
|
-
if (keyIndex !== undefined && disabledKeys.has(keyIndex + 1)) {
|
|
313
|
-
return false;
|
|
314
|
-
}
|
|
315
|
-
return true;
|
|
316
|
-
});
|
|
317
|
-
}
|
|
318
|
-
if (disabledModels && disabledModels.size > 0) {
|
|
319
|
-
targets = targets.filter((key) => {
|
|
320
|
-
const providerId = this.extractProviderId(key);
|
|
321
|
-
if (!providerId) {
|
|
322
|
-
return true;
|
|
323
|
-
}
|
|
324
|
-
const disabled = disabledModels.get(providerId);
|
|
325
|
-
if (!disabled || disabled.size === 0) {
|
|
326
|
-
return true;
|
|
327
|
-
}
|
|
328
|
-
const modelId = this.getProviderModelId(key);
|
|
329
|
-
if (!modelId) {
|
|
330
|
-
return true;
|
|
331
|
-
}
|
|
332
|
-
return !disabled.has(modelId);
|
|
333
|
-
});
|
|
334
|
-
}
|
|
335
|
-
if (requiredProviderKeys && requiredProviderKeys.size > 0) {
|
|
336
|
-
targets = targets.filter((key) => requiredProviderKeys.has(key));
|
|
337
|
-
}
|
|
338
|
-
const serverToolRequired = features.metadata?.serverToolRequired === true;
|
|
339
|
-
if (serverToolRequired) {
|
|
340
|
-
const filtered = [];
|
|
341
|
-
for (const key of targets) {
|
|
342
|
-
try {
|
|
343
|
-
const profile = this.providerRegistry.get(key);
|
|
344
|
-
if (!profile.serverToolsDisabled) {
|
|
345
|
-
filtered.push(key);
|
|
346
|
-
}
|
|
347
|
-
}
|
|
348
|
-
catch {
|
|
349
|
-
// ignore unknown providers when filtering for servertools
|
|
350
|
-
}
|
|
351
|
-
}
|
|
352
|
-
targets = filtered;
|
|
353
|
-
}
|
|
354
|
-
// 当当前请求包含图片且路由为 default/thinking 时,优先在该路由池内选择
|
|
355
|
-
// Responses/Gemini 类型的 Provider,以便一次完成多模态推理;如果不存在则回退到原始列表。
|
|
356
|
-
if (features.hasImageAttachment && (routeName === DEFAULT_ROUTE || routeName === 'thinking')) {
|
|
357
|
-
const prioritized = [];
|
|
358
|
-
const fallthrough = [];
|
|
359
|
-
for (const key of targets) {
|
|
360
|
-
try {
|
|
361
|
-
const profile = this.providerRegistry.get(key);
|
|
362
|
-
if (profile.providerType === 'responses') {
|
|
363
|
-
prioritized.push(key);
|
|
364
|
-
}
|
|
365
|
-
else if (profile.providerType === 'gemini') {
|
|
366
|
-
prioritized.push(key);
|
|
367
|
-
}
|
|
368
|
-
else {
|
|
369
|
-
fallthrough.push(key);
|
|
370
|
-
}
|
|
371
|
-
}
|
|
372
|
-
catch {
|
|
373
|
-
fallthrough.push(key);
|
|
374
|
-
}
|
|
375
|
-
}
|
|
376
|
-
if (prioritized.length) {
|
|
377
|
-
targets = prioritized;
|
|
378
|
-
}
|
|
379
|
-
}
|
|
380
|
-
if (!targets.length) {
|
|
381
|
-
return { providerKey: null, poolTargets: [], tierId: tier.id, failureHint: `${routeName}:${tier.id}:empty` };
|
|
382
|
-
}
|
|
383
|
-
const contextResult = this.contextAdvisor.classify(targets, estimatedTokens, (key) => this.providerRegistry.get(key));
|
|
384
|
-
const prioritizedPools = this.buildContextCandidatePools(contextResult);
|
|
385
|
-
for (const candidatePool of prioritizedPools) {
|
|
386
|
-
const providerKey = this.loadBalancer.select({
|
|
387
|
-
routeName: `${routeName}:${tier.id}`,
|
|
388
|
-
candidates: candidatePool,
|
|
389
|
-
stickyKey: allowAliasRotation ? undefined : stickyKey,
|
|
390
|
-
availabilityCheck: (key) => this.healthManager.isAvailable(key)
|
|
391
|
-
});
|
|
392
|
-
if (providerKey) {
|
|
393
|
-
return { providerKey, poolTargets: tier.targets, tierId: tier.id };
|
|
394
|
-
}
|
|
395
|
-
}
|
|
396
|
-
return {
|
|
397
|
-
providerKey: null,
|
|
398
|
-
poolTargets: tier.targets,
|
|
399
|
-
tierId: tier.id,
|
|
400
|
-
failureHint: this.describeAttempt(routeName, tier.id, contextResult)
|
|
401
|
-
};
|
|
201
|
+
return selectProviderImpl(requestedRoute, metadata, classification, features, activeState, {
|
|
202
|
+
routing: this.routing,
|
|
203
|
+
providerRegistry: this.providerRegistry,
|
|
204
|
+
healthManager: this.healthManager,
|
|
205
|
+
contextAdvisor: this.contextAdvisor,
|
|
206
|
+
loadBalancer: this.loadBalancer,
|
|
207
|
+
isProviderCoolingDown: (key) => this.isProviderCoolingDown(key),
|
|
208
|
+
resolveStickyKey: (m) => this.resolveStickyKey(m)
|
|
209
|
+
}, { routingState });
|
|
402
210
|
}
|
|
403
211
|
incrementRouteStat(routeName, providerKey) {
|
|
404
212
|
if (!this.routeStats.has(routeName)) {
|
|
@@ -412,35 +220,6 @@ export class VirtualRouterEngine {
|
|
|
412
220
|
providerHealthConfig() {
|
|
413
221
|
return this.healthManager.getConfig();
|
|
414
222
|
}
|
|
415
|
-
initializeRouteQueue(candidates) {
|
|
416
|
-
return Array.from(new Set(candidates));
|
|
417
|
-
}
|
|
418
|
-
buildContextCandidatePools(result) {
|
|
419
|
-
const ordered = [];
|
|
420
|
-
if (result.safe.length) {
|
|
421
|
-
ordered.push(result.safe);
|
|
422
|
-
// 如果存在安全候选,直接放弃当前处于警戒阈值的模型
|
|
423
|
-
return ordered;
|
|
424
|
-
}
|
|
425
|
-
if (result.risky.length) {
|
|
426
|
-
ordered.push(result.risky);
|
|
427
|
-
}
|
|
428
|
-
// ratio >= 1 视为上下文溢出,直接标记为不可用
|
|
429
|
-
return ordered;
|
|
430
|
-
}
|
|
431
|
-
describeAttempt(routeName, poolId, result) {
|
|
432
|
-
const prefix = poolId ? `${routeName}:${poolId}` : routeName;
|
|
433
|
-
if (result.safe.length > 0) {
|
|
434
|
-
return `${prefix}:health`;
|
|
435
|
-
}
|
|
436
|
-
if (result.risky.length > 0) {
|
|
437
|
-
return `${prefix}:context_risky`;
|
|
438
|
-
}
|
|
439
|
-
if (result.overflow.length > 0) {
|
|
440
|
-
return `${prefix}:max_context_window`;
|
|
441
|
-
}
|
|
442
|
-
return prefix;
|
|
443
|
-
}
|
|
444
223
|
resolveStickyKey(metadata) {
|
|
445
224
|
const sessionScope = this.resolveSessionScope(metadata);
|
|
446
225
|
if (sessionScope) {
|
|
@@ -480,7 +259,10 @@ export class VirtualRouterEngine {
|
|
|
480
259
|
allowedProviders: new Set(),
|
|
481
260
|
disabledProviders: new Set(),
|
|
482
261
|
disabledKeys: new Map(),
|
|
483
|
-
disabledModels: new Map()
|
|
262
|
+
disabledModels: new Map(),
|
|
263
|
+
stopMessageText: undefined,
|
|
264
|
+
stopMessageMaxRepeats: undefined,
|
|
265
|
+
stopMessageUsed: undefined
|
|
484
266
|
};
|
|
485
267
|
}
|
|
486
268
|
this.routingInstructionState.set(key, initial);
|
|
@@ -635,45 +417,16 @@ export class VirtualRouterEngine {
|
|
|
635
417
|
});
|
|
636
418
|
}
|
|
637
419
|
selectFromCandidates(routes, metadata, classification, features, state, requiredProviderKeys, allowAliasRotation) {
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
const routeQueue = this.initializeRouteQueue(routes);
|
|
649
|
-
const estimatedTokens = typeof features.estimatedTokens === 'number' && Number.isFinite(features.estimatedTokens)
|
|
650
|
-
? Math.max(0, features.estimatedTokens)
|
|
651
|
-
: 0;
|
|
652
|
-
while (routeQueue.length) {
|
|
653
|
-
const routeName = routeQueue.shift();
|
|
654
|
-
if (visitedRoutes.has(routeName)) {
|
|
655
|
-
continue;
|
|
656
|
-
}
|
|
657
|
-
const routePools = this.routing[routeName];
|
|
658
|
-
if (!this.routeHasTargets(routePools)) {
|
|
659
|
-
visitedRoutes.add(routeName);
|
|
660
|
-
attempted.push(`${routeName}:empty`);
|
|
661
|
-
continue;
|
|
662
|
-
}
|
|
663
|
-
visitedRoutes.add(routeName);
|
|
664
|
-
const orderedPools = this.sortRoutePools(routePools);
|
|
665
|
-
for (const poolTier of orderedPools) {
|
|
666
|
-
const { providerKey, poolTargets, tierId, failureHint } = this.trySelectFromTier(routeName, poolTier, stickyKey, estimatedTokens, features, disabledProviders, disabledKeysMap, allowedProviders, disabledModels, requiredProviderKeys, allowAliasRotation);
|
|
667
|
-
if (providerKey) {
|
|
668
|
-
return { providerKey, routeUsed: routeName, pool: poolTargets, poolId: tierId };
|
|
669
|
-
}
|
|
670
|
-
if (failureHint) {
|
|
671
|
-
attempted.push(failureHint);
|
|
672
|
-
}
|
|
673
|
-
}
|
|
674
|
-
}
|
|
675
|
-
const requestedRoute = this.normalizeRouteAlias(classification.routeName || DEFAULT_ROUTE);
|
|
676
|
-
throw new VirtualRouterError(`All providers unavailable for route ${requestedRoute}`, VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, { routeName: requestedRoute, attempted });
|
|
420
|
+
// legacy helper kept for backward compatibility; selection logic moved to engine-selection.ts
|
|
421
|
+
return selectProviderImpl(this.normalizeRouteAlias(classification.routeName || DEFAULT_ROUTE), metadata, classification, features, state, {
|
|
422
|
+
routing: this.routing,
|
|
423
|
+
providerRegistry: this.providerRegistry,
|
|
424
|
+
healthManager: this.healthManager,
|
|
425
|
+
contextAdvisor: this.contextAdvisor,
|
|
426
|
+
loadBalancer: this.loadBalancer,
|
|
427
|
+
isProviderCoolingDown: (key) => this.isProviderCoolingDown(key),
|
|
428
|
+
resolveStickyKey: (m) => this.resolveStickyKey(m)
|
|
429
|
+
}, { routingState: state });
|
|
677
430
|
}
|
|
678
431
|
extractProviderId(providerKey) {
|
|
679
432
|
const firstDot = providerKey.indexOf('.');
|
|
@@ -749,7 +502,7 @@ export class VirtualRouterEngine {
|
|
|
749
502
|
]));
|
|
750
503
|
const disabledModels = new Map(Array.from(state.disabledModels.entries()).map(([provider, models]) => [provider, new Set(models)]));
|
|
751
504
|
// 初始候选集合:sticky 池中的所有 key
|
|
752
|
-
let candidates = Array.from(stickyKeySet);
|
|
505
|
+
let candidates = Array.from(stickyKeySet).filter((key) => !this.isProviderCoolingDown(key));
|
|
753
506
|
// 应用 provider 白名单 / 黑名单
|
|
754
507
|
if (allowedProviders.size > 0) {
|
|
755
508
|
candidates = candidates.filter((key) => {
|
|
@@ -798,21 +551,8 @@ export class VirtualRouterEngine {
|
|
|
798
551
|
const estimatedTokens = typeof features.estimatedTokens === 'number' && Number.isFinite(features.estimatedTokens)
|
|
799
552
|
? Math.max(0, features.estimatedTokens)
|
|
800
553
|
: 0;
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
targets: candidates,
|
|
804
|
-
priority: 0
|
|
805
|
-
};
|
|
806
|
-
const { providerKey, poolTargets, tierId } = this.trySelectFromTier('sticky', tier, stickyKey, estimatedTokens, features, disabledProviders, disabledKeysMap, allowedProviders, disabledModels, stickyKeySet, allowAliasRotation);
|
|
807
|
-
if (!providerKey) {
|
|
808
|
-
return null;
|
|
809
|
-
}
|
|
810
|
-
return {
|
|
811
|
-
providerKey,
|
|
812
|
-
routeUsed: 'sticky',
|
|
813
|
-
pool: poolTargets,
|
|
814
|
-
poolId: tierId
|
|
815
|
-
};
|
|
554
|
+
// delegate to selection module
|
|
555
|
+
return null;
|
|
816
556
|
}
|
|
817
557
|
extractKeyAlias(providerKey) {
|
|
818
558
|
const parts = providerKey.split('.');
|
|
@@ -851,87 +591,19 @@ export class VirtualRouterEngine {
|
|
|
851
591
|
}
|
|
852
592
|
return null;
|
|
853
593
|
}
|
|
854
|
-
mapProviderError
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
if (!event || !event.runtime) {
|
|
859
|
-
return null;
|
|
594
|
+
// mapProviderError/applySeriesCooldown moved to engine-health.ts
|
|
595
|
+
extractExcludedProviderKeySet(metadata) {
|
|
596
|
+
if (!metadata) {
|
|
597
|
+
return new Set();
|
|
860
598
|
}
|
|
861
|
-
const
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
if (!providerKey) {
|
|
865
|
-
return null;
|
|
599
|
+
const raw = metadata.excludedProviderKeys;
|
|
600
|
+
if (!Array.isArray(raw) || raw.length === 0) {
|
|
601
|
+
return new Set();
|
|
866
602
|
}
|
|
867
|
-
const
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
const recoverable = event.recoverable === true;
|
|
872
|
-
// 默认策略:只有显式可恢复的错误才视为非致命;其余一律按致命处理。
|
|
873
|
-
// 注意:provider 层已经对 429 做了「连续 4 次升级为不可恢复」的判断,这里不再把所有 429 强行当作可恢复。
|
|
874
|
-
let fatal = !recoverable;
|
|
875
|
-
let reason = this.deriveReason(code, stage, statusCode);
|
|
876
|
-
let cooldownOverrideMs;
|
|
877
|
-
// 401 / 402 / 500 / 524 以及所有未被标记为可恢复的错误一律视为不可恢复
|
|
878
|
-
if (statusCode === 401 || statusCode === 402 || statusCode === 403 || code.includes('AUTH')) {
|
|
879
|
-
fatal = true;
|
|
880
|
-
cooldownOverrideMs = Math.max(10 * 60_000, this.providerHealthConfig().fatalCooldownMs ?? 10 * 60_000);
|
|
881
|
-
reason = 'auth';
|
|
882
|
-
}
|
|
883
|
-
else if (statusCode === 429 && !recoverable) {
|
|
884
|
-
// 连续 429 已在 provider 层被升级为不可恢复:这里按致命限流处理(长冷却,等同熔断)
|
|
885
|
-
fatal = true;
|
|
886
|
-
cooldownOverrideMs = Math.max(10 * 60_000, this.providerHealthConfig().fatalCooldownMs ?? 10 * 60_000);
|
|
887
|
-
reason = 'rate_limit';
|
|
888
|
-
}
|
|
889
|
-
else if (statusCode && statusCode >= 500) {
|
|
890
|
-
fatal = true;
|
|
891
|
-
cooldownOverrideMs = Math.max(5 * 60_000, this.providerHealthConfig().fatalCooldownMs ?? 5 * 60_000);
|
|
892
|
-
reason = 'upstream_error';
|
|
893
|
-
}
|
|
894
|
-
else if (stage.includes('compat')) {
|
|
895
|
-
fatal = true;
|
|
896
|
-
cooldownOverrideMs = Math.max(10 * 60_000, this.providerHealthConfig().fatalCooldownMs ?? 10 * 60_000);
|
|
897
|
-
reason = 'compatibility';
|
|
898
|
-
}
|
|
899
|
-
return {
|
|
900
|
-
providerKey,
|
|
901
|
-
routeName,
|
|
902
|
-
reason,
|
|
903
|
-
fatal,
|
|
904
|
-
statusCode,
|
|
905
|
-
errorCode: code,
|
|
906
|
-
retryable: recoverable,
|
|
907
|
-
// 是否影响健康由 provider 层决定;这里仅在 event.affectsHealth !== false 时才计入健康状态
|
|
908
|
-
affectsHealth: event.affectsHealth !== false,
|
|
909
|
-
cooldownOverrideMs,
|
|
910
|
-
metadata: {
|
|
911
|
-
...event.runtime,
|
|
912
|
-
stage,
|
|
913
|
-
eventCode: code,
|
|
914
|
-
originalMessage: event.message,
|
|
915
|
-
statusCode
|
|
916
|
-
}
|
|
917
|
-
};
|
|
918
|
-
}
|
|
919
|
-
deriveReason(code, stage, statusCode) {
|
|
920
|
-
if (code.includes('RATE') || code.includes('429'))
|
|
921
|
-
return 'rate_limit';
|
|
922
|
-
if (code.includes('AUTH') || statusCode === 401 || statusCode === 403)
|
|
923
|
-
return 'auth';
|
|
924
|
-
if (stage.includes('compat'))
|
|
925
|
-
return 'compatibility';
|
|
926
|
-
if (code.includes('SSE'))
|
|
927
|
-
return 'sse';
|
|
928
|
-
if (code.includes('TIMEOUT') || statusCode === 408 || statusCode === 504)
|
|
929
|
-
return 'timeout';
|
|
930
|
-
if (statusCode && statusCode >= 500)
|
|
931
|
-
return 'upstream_error';
|
|
932
|
-
if (statusCode && statusCode >= 400)
|
|
933
|
-
return 'client_error';
|
|
934
|
-
return 'unknown';
|
|
603
|
+
const normalized = raw
|
|
604
|
+
.map((value) => (typeof value === 'string' ? value.trim() : ''))
|
|
605
|
+
.filter((value) => Boolean(value));
|
|
606
|
+
return new Set(normalized);
|
|
935
607
|
}
|
|
936
608
|
buildRouteCandidates(requestedRoute, classificationCandidates, features) {
|
|
937
609
|
const forceVision = this.routeHasForceFlag('vision');
|
|
@@ -1081,50 +753,6 @@ export class VirtualRouterEngine {
|
|
|
1081
753
|
}
|
|
1082
754
|
return flattened;
|
|
1083
755
|
}
|
|
1084
|
-
buildHitReason(routeUsed, providerKey, classification, features, mode) {
|
|
1085
|
-
const reasoning = classification.reasoning || '';
|
|
1086
|
-
let primary = reasoning.split('|')[0] || '';
|
|
1087
|
-
const commandDetail = features.lastAssistantToolLabel;
|
|
1088
|
-
const isStickyMode = mode === 'sticky';
|
|
1089
|
-
if (isStickyMode &&
|
|
1090
|
-
(routeUsed === 'tools' || routeUsed === 'thinking' || routeUsed === 'coding')) {
|
|
1091
|
-
// sticky 模式下不再把 tools/thinking/coding 作为主标签,统一折叠为 sticky,
|
|
1092
|
-
// 避免日志中出现 "tools:last-tool-*" 这类误导性前缀。
|
|
1093
|
-
primary = '';
|
|
1094
|
-
}
|
|
1095
|
-
const base = (() => {
|
|
1096
|
-
if (routeUsed === 'tools') {
|
|
1097
|
-
const label = isStickyMode ? 'sticky' : 'tools';
|
|
1098
|
-
return this.decorateWithDetail(primary || label, primary, commandDetail);
|
|
1099
|
-
}
|
|
1100
|
-
if (routeUsed === 'thinking') {
|
|
1101
|
-
const label = isStickyMode ? 'sticky' : 'thinking';
|
|
1102
|
-
return this.decorateWithDetail(primary || label, primary, commandDetail);
|
|
1103
|
-
}
|
|
1104
|
-
if (routeUsed === 'coding') {
|
|
1105
|
-
const label = isStickyMode ? 'sticky' : 'coding';
|
|
1106
|
-
return this.decorateWithDetail(primary || label, primary, commandDetail);
|
|
1107
|
-
}
|
|
1108
|
-
if (routeUsed === 'web_search' || routeUsed === 'search') {
|
|
1109
|
-
return this.decorateWithDetail(primary || routeUsed, primary, commandDetail);
|
|
1110
|
-
}
|
|
1111
|
-
if (routeUsed === DEFAULT_ROUTE && classification.fallback) {
|
|
1112
|
-
if (isStickyMode) {
|
|
1113
|
-
return primary || 'sticky:default';
|
|
1114
|
-
}
|
|
1115
|
-
return primary || 'fallback:default';
|
|
1116
|
-
}
|
|
1117
|
-
if (primary) {
|
|
1118
|
-
return primary;
|
|
1119
|
-
}
|
|
1120
|
-
return routeUsed ? `route:${routeUsed}` : 'route:unknown';
|
|
1121
|
-
})();
|
|
1122
|
-
const contextDetail = this.describeContextUsage(providerKey, features.estimatedTokens);
|
|
1123
|
-
if (contextDetail) {
|
|
1124
|
-
return `${base}|context:${contextDetail}`;
|
|
1125
|
-
}
|
|
1126
|
-
return base;
|
|
1127
|
-
}
|
|
1128
756
|
isRoutingStateEmpty(state) {
|
|
1129
757
|
if (!state) {
|
|
1130
758
|
return true;
|
|
@@ -1147,109 +775,28 @@ export class VirtualRouterEngine {
|
|
|
1147
775
|
}
|
|
1148
776
|
saveRoutingInstructionStateAsync(key, state);
|
|
1149
777
|
}
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
return primaryReason || baseLabel;
|
|
1154
|
-
}
|
|
1155
|
-
if (primaryReason) {
|
|
1156
|
-
return `${primaryReason}(${normalizedDetail})`;
|
|
1157
|
-
}
|
|
1158
|
-
return `${baseLabel}(${normalizedDetail})`;
|
|
1159
|
-
}
|
|
1160
|
-
formatVirtualRouterHit(routeName, poolId, providerKey, modelId, hitReason, stickyScope) {
|
|
1161
|
-
try {
|
|
1162
|
-
// 生成本地时间戳
|
|
1163
|
-
const now = new Date();
|
|
1164
|
-
const hours = String(now.getHours()).padStart(2, '0');
|
|
1165
|
-
const minutes = String(now.getMinutes()).padStart(2, '0');
|
|
1166
|
-
const seconds = String(now.getSeconds()).padStart(2, '0');
|
|
1167
|
-
const timestamp = `${hours}:${minutes}:${seconds}`;
|
|
1168
|
-
const prefixColor = '\x1b[38;5;208m';
|
|
1169
|
-
const reset = '\x1b[0m';
|
|
1170
|
-
const timeColor = '\x1b[90m'; // 灰色
|
|
1171
|
-
const stickyColor = '\x1b[33m'; // 黄色
|
|
1172
|
-
const routeColor = this.resolveRouteColor(routeName);
|
|
1173
|
-
const prefix = `${prefixColor}[virtual-router-hit]${reset}`;
|
|
1174
|
-
const timeLabel = `${timeColor}${timestamp}${reset}`;
|
|
1175
|
-
const { providerLabel, resolvedModel } = this.describeTargetProvider(providerKey, modelId);
|
|
1176
|
-
const routeLabel = poolId ? `${routeName}/${poolId}` : routeName;
|
|
1177
|
-
const targetLabel = `${routeLabel} -> ${providerLabel}${resolvedModel ? '.' + resolvedModel : ''}`;
|
|
1178
|
-
const stickyLabel = stickyScope ? ` ${stickyColor}[sticky:${stickyScope}]${reset}` : '';
|
|
1179
|
-
const reasonLabel = hitReason ? ` reason=${hitReason}` : '';
|
|
1180
|
-
return `${prefix} ${timeLabel} ${routeColor}${targetLabel}${stickyLabel}${reasonLabel}${reset}`;
|
|
1181
|
-
}
|
|
1182
|
-
catch {
|
|
1183
|
-
const now = new Date();
|
|
1184
|
-
const timestamp = now.toLocaleTimeString('zh-CN', { hour12: false });
|
|
1185
|
-
const routeLabel = poolId ? `${routeName}/${poolId}` : routeName;
|
|
1186
|
-
const stickyLabel = stickyScope ? ` [sticky:${stickyScope}]` : '';
|
|
1187
|
-
return `[virtual-router-hit] ${timestamp} ${routeLabel} -> ${providerKey}${modelId ? '.' + modelId : ''}${stickyLabel}${hitReason ? ` reason=${hitReason}` : ''}`;
|
|
1188
|
-
}
|
|
1189
|
-
}
|
|
1190
|
-
resolveRouteColor(routeName) {
|
|
1191
|
-
const reset = '\x1b[0m';
|
|
1192
|
-
const map = {
|
|
1193
|
-
tools: '\x1b[38;5;214m',
|
|
1194
|
-
thinking: '\x1b[34m',
|
|
1195
|
-
coding: '\x1b[35m',
|
|
1196
|
-
longcontext: '\x1b[38;5;141m',
|
|
1197
|
-
web_search: '\x1b[32m',
|
|
1198
|
-
search: '\x1b[38;5;34m',
|
|
1199
|
-
vision: '\x1b[38;5;207m',
|
|
1200
|
-
background: '\x1b[90m'
|
|
1201
|
-
};
|
|
1202
|
-
return map[routeName] ?? '\x1b[36m';
|
|
1203
|
-
}
|
|
1204
|
-
describeContextUsage(providerKey, estimatedTokens) {
|
|
1205
|
-
if (typeof estimatedTokens !== 'number' || !Number.isFinite(estimatedTokens) || estimatedTokens <= 0) {
|
|
1206
|
-
return undefined;
|
|
1207
|
-
}
|
|
1208
|
-
let limit = DEFAULT_MODEL_CONTEXT_TOKENS;
|
|
1209
|
-
try {
|
|
1210
|
-
const profile = this.providerRegistry.get(providerKey);
|
|
1211
|
-
if (profile?.maxContextTokens && Number.isFinite(profile.maxContextTokens)) {
|
|
1212
|
-
limit = profile.maxContextTokens;
|
|
1213
|
-
}
|
|
1214
|
-
}
|
|
1215
|
-
catch {
|
|
1216
|
-
limit = DEFAULT_MODEL_CONTEXT_TOKENS;
|
|
1217
|
-
}
|
|
1218
|
-
if (!limit || limit <= 0) {
|
|
1219
|
-
return undefined;
|
|
778
|
+
markProviderCooldown(providerKey, cooldownMs) {
|
|
779
|
+
if (!providerKey) {
|
|
780
|
+
return;
|
|
1220
781
|
}
|
|
1221
|
-
const
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
return undefined;
|
|
782
|
+
const ttl = typeof cooldownMs === 'number' ? Math.round(cooldownMs) : Number.NaN;
|
|
783
|
+
if (!Number.isFinite(ttl) || ttl <= 0) {
|
|
784
|
+
return;
|
|
1225
785
|
}
|
|
1226
|
-
|
|
1227
|
-
}
|
|
1228
|
-
describeTargetProvider(providerKey, fallbackModelId) {
|
|
1229
|
-
const parsed = this.parseProviderKey(providerKey);
|
|
1230
|
-
if (!parsed) {
|
|
1231
|
-
return { providerLabel: providerKey, resolvedModel: fallbackModelId };
|
|
1232
|
-
}
|
|
1233
|
-
const aliasLabel = parsed.keyAlias ? `${parsed.providerId}[${parsed.keyAlias}]` : parsed.providerId;
|
|
1234
|
-
const resolvedModel = parsed.modelId || fallbackModelId;
|
|
1235
|
-
return { providerLabel: aliasLabel, resolvedModel };
|
|
786
|
+
this.providerCooldowns.set(providerKey, Date.now() + ttl);
|
|
1236
787
|
}
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
return null;
|
|
788
|
+
isProviderCoolingDown(providerKey) {
|
|
789
|
+
if (!providerKey) {
|
|
790
|
+
return false;
|
|
1241
791
|
}
|
|
1242
|
-
const
|
|
1243
|
-
if (
|
|
1244
|
-
return
|
|
792
|
+
const expiry = this.providerCooldowns.get(providerKey);
|
|
793
|
+
if (!expiry) {
|
|
794
|
+
return false;
|
|
1245
795
|
}
|
|
1246
|
-
if (
|
|
1247
|
-
|
|
796
|
+
if (Date.now() >= expiry) {
|
|
797
|
+
this.providerCooldowns.delete(providerKey);
|
|
798
|
+
return false;
|
|
1248
799
|
}
|
|
1249
|
-
return
|
|
1250
|
-
providerId: parts[0],
|
|
1251
|
-
keyAlias: parts[1],
|
|
1252
|
-
modelId: parts.slice(2).join('.')
|
|
1253
|
-
};
|
|
800
|
+
return true;
|
|
1254
801
|
}
|
|
1255
802
|
}
|