@jsonstudio/llms 0.6.473 → 0.6.568
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/conversion/codecs/gemini-openai-codec.js +33 -4
- package/dist/conversion/codecs/openai-openai-codec.js +2 -1
- package/dist/conversion/codecs/responses-openai-codec.js +3 -2
- package/dist/conversion/compat/actions/claude-thinking-tools.d.ts +15 -0
- package/dist/conversion/compat/actions/claude-thinking-tools.js +72 -0
- package/dist/conversion/compat/actions/glm-history-image-trim.d.ts +2 -0
- package/dist/conversion/compat/actions/glm-history-image-trim.js +88 -0
- package/dist/conversion/compat/profiles/chat-gemini.json +15 -14
- package/dist/conversion/compat/profiles/chat-glm.json +194 -194
- package/dist/conversion/compat/profiles/chat-iflow.json +199 -199
- package/dist/conversion/compat/profiles/chat-lmstudio.json +43 -43
- package/dist/conversion/compat/profiles/chat-qwen.json +20 -20
- package/dist/conversion/compat/profiles/responses-c4m.json +42 -42
- package/dist/conversion/compat/profiles/responses-output2choices-test.json +12 -0
- package/dist/conversion/hub/pipeline/compat/compat-pipeline-executor.js +6 -0
- package/dist/conversion/hub/pipeline/compat/compat-types.d.ts +2 -0
- package/dist/conversion/hub/pipeline/hub-pipeline.d.ts +6 -1
- package/dist/conversion/hub/pipeline/hub-pipeline.js +40 -13
- package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage3_context_capture/index.js +15 -0
- package/dist/conversion/hub/process/chat-process.js +107 -26
- package/dist/conversion/hub/semantic-mappers/anthropic-mapper.js +8 -0
- package/dist/conversion/hub/semantic-mappers/gemini-mapper.js +28 -10
- package/dist/conversion/hub/semantic-mappers/responses-mapper.js +51 -2
- package/dist/conversion/hub/tool-session-compat.d.ts +26 -0
- package/dist/conversion/hub/tool-session-compat.js +299 -0
- package/dist/conversion/hub/types/chat-envelope.d.ts +1 -0
- package/dist/conversion/responses/responses-openai-bridge.d.ts +0 -1
- package/dist/conversion/responses/responses-openai-bridge.js +0 -71
- package/dist/conversion/shared/anthropic-message-utils.js +54 -0
- package/dist/conversion/shared/args-mapping.js +11 -3
- package/dist/conversion/shared/gemini-tool-utils.js +8 -0
- package/dist/conversion/shared/responses-output-builder.js +47 -88
- package/dist/conversion/shared/streaming-text-extractor.d.ts +25 -0
- package/dist/conversion/shared/streaming-text-extractor.js +31 -38
- package/dist/conversion/shared/text-markup-normalizer.js +42 -27
- package/dist/conversion/shared/tool-filter-pipeline.js +2 -1
- package/dist/conversion/shared/tool-governor.js +75 -4
- package/dist/conversion/shared/tool-harvester.js +43 -12
- package/dist/conversion/shared/tool-mapping.d.ts +1 -0
- package/dist/conversion/shared/tool-mapping.js +33 -13
- package/dist/filters/index.d.ts +1 -0
- package/dist/filters/index.js +1 -0
- package/dist/filters/special/request-toolcalls-stringify.js +5 -55
- package/dist/filters/special/request-tools-normalize.js +14 -23
- package/dist/filters/special/response-apply-patch-toon-decode.d.ts +23 -0
- package/dist/filters/special/response-apply-patch-toon-decode.js +109 -0
- package/dist/filters/special/response-tool-arguments-toon-decode.d.ts +10 -0
- package/dist/filters/special/response-tool-arguments-toon-decode.js +55 -13
- package/dist/guidance/index.js +70 -27
- package/dist/router/virtual-router/bootstrap.js +10 -5
- package/dist/router/virtual-router/classifier.js +9 -4
- package/dist/router/virtual-router/engine-health.d.ts +22 -0
- package/dist/router/virtual-router/engine-health.js +423 -0
- package/dist/router/virtual-router/engine-logging.d.ts +20 -0
- package/dist/router/virtual-router/engine-logging.js +197 -0
- package/dist/router/virtual-router/engine-selection.d.ts +32 -0
- package/dist/router/virtual-router/engine-selection.js +649 -0
- package/dist/router/virtual-router/engine.d.ts +21 -14
- package/dist/router/virtual-router/engine.js +200 -523
- package/dist/router/virtual-router/message-utils.js +22 -0
- package/dist/router/virtual-router/routing-instructions.d.ts +8 -1
- package/dist/router/virtual-router/routing-instructions.js +137 -3
- package/dist/router/virtual-router/tool-signals.js +57 -11
- package/dist/router/virtual-router/types.d.ts +30 -0
- package/dist/router/virtual-router/types.js +1 -1
- package/dist/servertool/engine.js +3 -0
- package/dist/servertool/handlers/gemini-empty-reply-continue.d.ts +1 -0
- package/dist/servertool/handlers/gemini-empty-reply-continue.js +120 -0
- package/dist/servertool/handlers/iflow-model-error-retry.d.ts +1 -0
- package/dist/servertool/handlers/iflow-model-error-retry.js +93 -0
- package/dist/servertool/handlers/stop-message-auto.d.ts +1 -0
- package/dist/servertool/handlers/stop-message-auto.js +204 -0
- package/dist/servertool/handlers/vision.js +105 -7
- package/dist/servertool/server-side-tools.d.ts +3 -0
- package/dist/servertool/server-side-tools.js +29 -0
- package/dist/sse/sse-to-json/builders/anthropic-response-builder.js +16 -0
- package/dist/tools/apply-patch-structured.d.ts +20 -0
- package/dist/tools/apply-patch-structured.js +239 -0
- package/dist/tools/tool-description-utils.d.ts +5 -0
- package/dist/tools/tool-description-utils.js +50 -0
- package/dist/tools/tool-registry.js +14 -5
- package/package.json +2 -2
|
@@ -4,14 +4,18 @@ import { RouteLoadBalancer } from './load-balancer.js';
|
|
|
4
4
|
import { RoutingClassifier } from './classifier.js';
|
|
5
5
|
import { buildRoutingFeatures } from './features.js';
|
|
6
6
|
import { ContextAdvisor } from './context-advisor.js';
|
|
7
|
-
import {
|
|
7
|
+
import { DEFAULT_ROUTE, ROUTE_PRIORITY, VirtualRouterError, VirtualRouterErrorCode } from './types.js';
|
|
8
8
|
import { getStatsCenter } from '../../telemetry/stats-center.js';
|
|
9
9
|
import { parseRoutingInstructions, applyRoutingInstructions, cleanMessagesFromRoutingInstructions } from './routing-instructions.js';
|
|
10
10
|
import { loadRoutingInstructionStateSync, saveRoutingInstructionStateAsync } from './sticky-session-store.js';
|
|
11
|
+
import { buildHitReason, formatVirtualRouterHit } from './engine-logging.js';
|
|
12
|
+
import { selectProviderImpl } from './engine-selection.js';
|
|
13
|
+
import { applyQuotaDepletedImpl, applyQuotaRecoveryImpl, applySeriesCooldownImpl, handleProviderFailureImpl, mapProviderErrorImpl } from './engine-health.js';
|
|
11
14
|
export class VirtualRouterEngine {
|
|
12
15
|
routing = {};
|
|
13
16
|
providerRegistry = new ProviderRegistry();
|
|
14
17
|
healthManager = new ProviderHealthManager();
|
|
18
|
+
providerCooldowns = new Map();
|
|
15
19
|
loadBalancer = new RouteLoadBalancer();
|
|
16
20
|
classifier = new RoutingClassifier({});
|
|
17
21
|
contextAdvisor = new ContextAdvisor();
|
|
@@ -22,7 +26,20 @@ export class VirtualRouterEngine {
|
|
|
22
26
|
statsCenter = getStatsCenter();
|
|
23
27
|
// Derived flags from VirtualRouterConfig/routing used by process / response layers.
|
|
24
28
|
webSearchForce = false;
|
|
29
|
+
healthStore;
|
|
30
|
+
routingStateStore = {
|
|
31
|
+
loadSync: loadRoutingInstructionStateSync,
|
|
32
|
+
saveAsync: saveRoutingInstructionStateAsync
|
|
33
|
+
};
|
|
25
34
|
routingInstructionState = new Map();
|
|
35
|
+
constructor(deps) {
|
|
36
|
+
if (deps?.healthStore) {
|
|
37
|
+
this.healthStore = deps.healthStore;
|
|
38
|
+
}
|
|
39
|
+
if (deps?.routingStateStore) {
|
|
40
|
+
this.routingStateStore = deps.routingStateStore;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
26
43
|
initialize(config) {
|
|
27
44
|
this.validateConfig(config);
|
|
28
45
|
this.routing = config.routing;
|
|
@@ -30,6 +47,8 @@ export class VirtualRouterEngine {
|
|
|
30
47
|
this.healthManager.configure(config.health);
|
|
31
48
|
this.healthConfig = config.health ?? null;
|
|
32
49
|
this.healthManager.registerProviders(Object.keys(config.providers));
|
|
50
|
+
this.providerCooldowns.clear();
|
|
51
|
+
this.restoreHealthFromStore();
|
|
33
52
|
this.loadBalancer = new RouteLoadBalancer(config.loadBalancing);
|
|
34
53
|
this.classifier = new RoutingClassifier(config.classifier);
|
|
35
54
|
this.contextRouting = config.contextRouting ?? { warnRatio: 0.9, hardLimit: false };
|
|
@@ -48,6 +67,15 @@ export class VirtualRouterEngine {
|
|
|
48
67
|
if (metadataInstructions.length > 0) {
|
|
49
68
|
routingState = applyRoutingInstructions(metadataInstructions, routingState);
|
|
50
69
|
}
|
|
70
|
+
const disableStickyRoutes = metadata &&
|
|
71
|
+
typeof metadata === 'object' &&
|
|
72
|
+
metadata.disableStickyRoutes === true;
|
|
73
|
+
if (disableStickyRoutes && routingState.stickyTarget) {
|
|
74
|
+
routingState = {
|
|
75
|
+
...routingState,
|
|
76
|
+
stickyTarget: undefined
|
|
77
|
+
};
|
|
78
|
+
}
|
|
51
79
|
const instructions = parseRoutingInstructions(request.messages);
|
|
52
80
|
if (instructions.length > 0) {
|
|
53
81
|
routingState = applyRoutingInstructions(instructions, routingState);
|
|
@@ -76,7 +104,6 @@ export class VirtualRouterEngine {
|
|
|
76
104
|
...(this.webSearchForce ? { forceWebSearch: true } : {}),
|
|
77
105
|
...(forceVision ? { forceVision: true } : {})
|
|
78
106
|
};
|
|
79
|
-
this.healthManager.recordSuccess(selection.providerKey);
|
|
80
107
|
this.incrementRouteStat(selection.routeUsed, selection.providerKey);
|
|
81
108
|
try {
|
|
82
109
|
this.statsCenter.recordVirtualRouterHit({
|
|
@@ -92,10 +119,10 @@ export class VirtualRouterEngine {
|
|
|
92
119
|
catch {
|
|
93
120
|
// stats must never break routing
|
|
94
121
|
}
|
|
95
|
-
const hitReason =
|
|
122
|
+
const hitReason = buildHitReason(selection.routeUsed, selection.providerKey, classification, features, routingMode, { providerRegistry: this.providerRegistry, contextRouting: this.contextRouting });
|
|
96
123
|
const stickyScope = routingMode !== 'none' ? this.resolveSessionScope(metadata) : undefined;
|
|
97
124
|
const routeForLog = routingMode === 'sticky' ? 'sticky' : selection.routeUsed;
|
|
98
|
-
const formatted =
|
|
125
|
+
const formatted = formatVirtualRouterHit(routeForLog, selection.poolId, selection.providerKey, target.modelId || '', hitReason, stickyScope, routingState);
|
|
99
126
|
if (formatted) {
|
|
100
127
|
this.debug?.log?.(formatted);
|
|
101
128
|
}
|
|
@@ -126,26 +153,29 @@ export class VirtualRouterEngine {
|
|
|
126
153
|
};
|
|
127
154
|
}
|
|
128
155
|
handleProviderFailure(event) {
|
|
129
|
-
|
|
130
|
-
|
|
156
|
+
handleProviderFailureImpl(event, this.healthManager, this.providerHealthConfig(), (key, ttl) => this.markProviderCooldown(key, ttl));
|
|
157
|
+
}
|
|
158
|
+
handleProviderError(event) {
|
|
159
|
+
if (this.healthStore && typeof this.healthStore.recordProviderError === 'function') {
|
|
160
|
+
try {
|
|
161
|
+
this.healthStore.recordProviderError(event);
|
|
162
|
+
}
|
|
163
|
+
catch {
|
|
164
|
+
// ignore persistence errors
|
|
165
|
+
}
|
|
131
166
|
}
|
|
132
|
-
|
|
167
|
+
// 配额恢复事件优先处理:一旦识别到 virtualRouterQuotaRecovery,
|
|
168
|
+
// 直接清理健康状态/冷却 TTL,避免继续走常规错误映射逻辑。
|
|
169
|
+
const handledByQuota = applyQuotaRecoveryImpl(event, this.healthManager, (key) => this.clearProviderCooldown(key), this.debug);
|
|
170
|
+
if (handledByQuota) {
|
|
133
171
|
return;
|
|
134
172
|
}
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
else if (event.reason === 'rate_limit' && event.statusCode === 429) {
|
|
139
|
-
// 对可恢复的 429 错误使用短冷静期:在 cooldownMs 内将该 key 标记为不可用,
|
|
140
|
-
// 以便 Virtual Router 在随后的选路中优先尝试其他 key 或模型。
|
|
141
|
-
this.healthManager.cooldownProvider(event.providerKey, event.reason, event.cooldownOverrideMs);
|
|
142
|
-
}
|
|
143
|
-
else {
|
|
144
|
-
this.healthManager.recordFailure(event.providerKey, event.reason);
|
|
173
|
+
const handledByQuotaDepleted = applyQuotaDepletedImpl(event, this.healthManager, (key, ttl) => this.markProviderCooldown(key, ttl), this.debug);
|
|
174
|
+
if (handledByQuotaDepleted) {
|
|
175
|
+
return;
|
|
145
176
|
}
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
const derived = this.mapProviderError(event);
|
|
177
|
+
applySeriesCooldownImpl(event, this.providerRegistry, this.healthManager, (key, ttl) => this.markProviderCooldown(key, ttl), this.debug);
|
|
178
|
+
const derived = mapProviderErrorImpl(event, this.providerHealthConfig());
|
|
149
179
|
if (!derived) {
|
|
150
180
|
return;
|
|
151
181
|
}
|
|
@@ -210,213 +240,15 @@ export class VirtualRouterEngine {
|
|
|
210
240
|
}
|
|
211
241
|
selectProvider(requestedRoute, metadata, classification, features, routingState) {
|
|
212
242
|
const activeState = routingState || this.getRoutingInstructionState(this.resolveStickyKey(metadata));
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
:
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
poolId: 'forced'
|
|
223
|
-
};
|
|
224
|
-
}
|
|
225
|
-
// sticky 语义:
|
|
226
|
-
// - 显式绑定到具体 key(alias/index)时,直接使用该 key;
|
|
227
|
-
// - provider / model 级别 sticky 解析为一组 providerKey;
|
|
228
|
-
// 在 sticky 这组 key「可用」之前,不会回落到 default 中的非 sticky provider。
|
|
229
|
-
let stickyResolution = null;
|
|
230
|
-
let stickyKeySet;
|
|
231
|
-
if (!forcedResolution && activeState.stickyTarget) {
|
|
232
|
-
stickyResolution = this.resolveInstructionTarget(activeState.stickyTarget);
|
|
233
|
-
if (stickyResolution && stickyResolution.mode === 'exact') {
|
|
234
|
-
const stickyKey = stickyResolution.keys[0];
|
|
235
|
-
// 已经被健康管理标记为不可用的 key 不能被 sticky 语法“复活”
|
|
236
|
-
if (this.healthManager.isAvailable(stickyKey)) {
|
|
237
|
-
return {
|
|
238
|
-
providerKey: stickyKey,
|
|
239
|
-
routeUsed: requestedRoute,
|
|
240
|
-
pool: [stickyKey],
|
|
241
|
-
poolId: 'sticky'
|
|
242
|
-
};
|
|
243
|
-
}
|
|
244
|
-
}
|
|
245
|
-
if (stickyResolution && stickyResolution.mode === 'filter' && stickyResolution.keys.length > 0) {
|
|
246
|
-
// 仅保留当前仍可用的 key;已被熔断/拉黑的 key 不会被 sticky 语法重新加入池子
|
|
247
|
-
const liveKeys = stickyResolution.keys.filter((key) => this.healthManager.isAvailable(key));
|
|
248
|
-
if (liveKeys.length > 0) {
|
|
249
|
-
stickyKeySet = new Set(liveKeys);
|
|
250
|
-
}
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
const allowAliasRotation = Boolean(activeState.stickyTarget) &&
|
|
254
|
-
!activeState.stickyTarget?.keyAlias &&
|
|
255
|
-
activeState.stickyTarget?.keyIndex === undefined;
|
|
256
|
-
// force(filter) 优先级高于 sticky:显式 force 视为覆盖 sticky 约束。
|
|
257
|
-
if (forcedResolution && forcedResolution.mode === 'filter') {
|
|
258
|
-
const candidates = this.buildRouteCandidates(requestedRoute, classification.candidates, features);
|
|
259
|
-
const filteredCandidates = this.filterCandidatesByRoutingState(candidates, activeState);
|
|
260
|
-
if (filteredCandidates.length === 0) {
|
|
261
|
-
throw new VirtualRouterError('No available providers after applying routing instructions', VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, {
|
|
262
|
-
requestedRoute,
|
|
263
|
-
allowedProviders: Array.from(activeState.allowedProviders),
|
|
264
|
-
disabledProviders: Array.from(activeState.disabledProviders)
|
|
265
|
-
});
|
|
266
|
-
}
|
|
267
|
-
const forcedKeySet = new Set(forcedResolution.keys);
|
|
268
|
-
return this.selectFromCandidates(filteredCandidates, metadata, classification, features, activeState, forcedKeySet, allowAliasRotation);
|
|
269
|
-
}
|
|
270
|
-
if (stickyKeySet && stickyKeySet.size > 0) {
|
|
271
|
-
const stickySelection = this.selectFromStickyPool(stickyKeySet, metadata, features, activeState, allowAliasRotation);
|
|
272
|
-
if (stickySelection) {
|
|
273
|
-
return stickySelection;
|
|
274
|
-
}
|
|
275
|
-
// sticky 池在本次请求中完全不可用(全部被黑名单/健康状态过滤):视为 sticky 池暂时失效,
|
|
276
|
-
// 本次回落到普通路由选择,但保留 stickyTarget,等待后续恢复。
|
|
277
|
-
}
|
|
278
|
-
// 无 sticky,或 sticky 池在本次请求中全部不可用(无可用 key):按原始分类结果执行正常路由选择。
|
|
279
|
-
const candidates = this.buildRouteCandidates(requestedRoute, classification.candidates, features);
|
|
280
|
-
const filteredCandidates = this.filterCandidatesByRoutingState(candidates, activeState);
|
|
281
|
-
if (filteredCandidates.length === 0) {
|
|
282
|
-
throw new VirtualRouterError('No available providers after applying routing instructions', VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, {
|
|
283
|
-
requestedRoute,
|
|
284
|
-
allowedProviders: Array.from(activeState.allowedProviders),
|
|
285
|
-
disabledProviders: Array.from(activeState.disabledProviders)
|
|
286
|
-
});
|
|
287
|
-
}
|
|
288
|
-
return this.selectFromCandidates(filteredCandidates, metadata, classification, features, activeState, undefined, allowAliasRotation);
|
|
289
|
-
}
|
|
290
|
-
trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features, disabledProviders, disabledKeysMap, allowedProviders, disabledModels, requiredProviderKeys, allowAliasRotation) {
|
|
291
|
-
let targets = Array.isArray(tier.targets) ? tier.targets : [];
|
|
292
|
-
// 基于本次请求 metadata 中的 excludedProviderKeys 做临时过滤:
|
|
293
|
-
// - 这些 key 仅在当前 route() 调用内被排除,不会写入 sticky 状态;
|
|
294
|
-
// - 主要用于 HTTP 层在同一请求内对 429 失败的 key 进行快速 failover。
|
|
295
|
-
const excludedRaw = features.metadata?.excludedProviderKeys &&
|
|
296
|
-
Array.isArray(features.metadata.excludedProviderKeys)
|
|
297
|
-
? features.metadata.excludedProviderKeys
|
|
298
|
-
: [];
|
|
299
|
-
const excludedKeys = new Set(excludedRaw
|
|
300
|
-
.map((val) => (typeof val === 'string' ? val.trim() : ''))
|
|
301
|
-
.filter((val) => Boolean(val)));
|
|
302
|
-
if (excludedKeys.size > 0) {
|
|
303
|
-
targets = targets.filter((key) => !excludedKeys.has(key));
|
|
304
|
-
}
|
|
305
|
-
if (allowedProviders && allowedProviders.size > 0) {
|
|
306
|
-
targets = targets.filter(key => {
|
|
307
|
-
const providerId = this.extractProviderId(key);
|
|
308
|
-
return providerId && allowedProviders.has(providerId);
|
|
309
|
-
});
|
|
310
|
-
}
|
|
311
|
-
if (disabledProviders && disabledProviders.size > 0) {
|
|
312
|
-
targets = targets.filter((key) => {
|
|
313
|
-
const providerId = this.extractProviderId(key);
|
|
314
|
-
return providerId && !disabledProviders.has(providerId);
|
|
315
|
-
});
|
|
316
|
-
}
|
|
317
|
-
if (disabledKeysMap && disabledKeysMap.size > 0) {
|
|
318
|
-
targets = targets.filter((key) => {
|
|
319
|
-
const providerId = this.extractProviderId(key);
|
|
320
|
-
if (!providerId)
|
|
321
|
-
return true;
|
|
322
|
-
const disabledKeys = disabledKeysMap.get(providerId);
|
|
323
|
-
if (!disabledKeys || disabledKeys.size === 0)
|
|
324
|
-
return true;
|
|
325
|
-
const keyAlias = this.extractKeyAlias(key);
|
|
326
|
-
const keyIndex = this.extractKeyIndex(key);
|
|
327
|
-
if (keyAlias && disabledKeys.has(keyAlias)) {
|
|
328
|
-
return false;
|
|
329
|
-
}
|
|
330
|
-
if (keyIndex !== undefined && disabledKeys.has(keyIndex + 1)) {
|
|
331
|
-
return false;
|
|
332
|
-
}
|
|
333
|
-
return true;
|
|
334
|
-
});
|
|
335
|
-
}
|
|
336
|
-
if (disabledModels && disabledModels.size > 0) {
|
|
337
|
-
targets = targets.filter((key) => {
|
|
338
|
-
const providerId = this.extractProviderId(key);
|
|
339
|
-
if (!providerId) {
|
|
340
|
-
return true;
|
|
341
|
-
}
|
|
342
|
-
const disabled = disabledModels.get(providerId);
|
|
343
|
-
if (!disabled || disabled.size === 0) {
|
|
344
|
-
return true;
|
|
345
|
-
}
|
|
346
|
-
const modelId = this.getProviderModelId(key);
|
|
347
|
-
if (!modelId) {
|
|
348
|
-
return true;
|
|
349
|
-
}
|
|
350
|
-
return !disabled.has(modelId);
|
|
351
|
-
});
|
|
352
|
-
}
|
|
353
|
-
if (requiredProviderKeys && requiredProviderKeys.size > 0) {
|
|
354
|
-
targets = targets.filter((key) => requiredProviderKeys.has(key));
|
|
355
|
-
}
|
|
356
|
-
const serverToolRequired = features.metadata?.serverToolRequired === true;
|
|
357
|
-
if (serverToolRequired) {
|
|
358
|
-
const filtered = [];
|
|
359
|
-
for (const key of targets) {
|
|
360
|
-
try {
|
|
361
|
-
const profile = this.providerRegistry.get(key);
|
|
362
|
-
if (!profile.serverToolsDisabled) {
|
|
363
|
-
filtered.push(key);
|
|
364
|
-
}
|
|
365
|
-
}
|
|
366
|
-
catch {
|
|
367
|
-
// ignore unknown providers when filtering for servertools
|
|
368
|
-
}
|
|
369
|
-
}
|
|
370
|
-
targets = filtered;
|
|
371
|
-
}
|
|
372
|
-
// 当当前请求包含图片且路由为 default/thinking 时,优先在该路由池内选择
|
|
373
|
-
// Responses/Gemini 类型的 Provider,以便一次完成多模态推理;如果不存在则回退到原始列表。
|
|
374
|
-
if (features.hasImageAttachment && (routeName === DEFAULT_ROUTE || routeName === 'thinking')) {
|
|
375
|
-
const prioritized = [];
|
|
376
|
-
const fallthrough = [];
|
|
377
|
-
for (const key of targets) {
|
|
378
|
-
try {
|
|
379
|
-
const profile = this.providerRegistry.get(key);
|
|
380
|
-
if (profile.providerType === 'responses') {
|
|
381
|
-
prioritized.push(key);
|
|
382
|
-
}
|
|
383
|
-
else if (profile.providerType === 'gemini') {
|
|
384
|
-
prioritized.push(key);
|
|
385
|
-
}
|
|
386
|
-
else {
|
|
387
|
-
fallthrough.push(key);
|
|
388
|
-
}
|
|
389
|
-
}
|
|
390
|
-
catch {
|
|
391
|
-
fallthrough.push(key);
|
|
392
|
-
}
|
|
393
|
-
}
|
|
394
|
-
if (prioritized.length) {
|
|
395
|
-
targets = prioritized;
|
|
396
|
-
}
|
|
397
|
-
}
|
|
398
|
-
if (!targets.length) {
|
|
399
|
-
return { providerKey: null, poolTargets: [], tierId: tier.id, failureHint: `${routeName}:${tier.id}:empty` };
|
|
400
|
-
}
|
|
401
|
-
const contextResult = this.contextAdvisor.classify(targets, estimatedTokens, (key) => this.providerRegistry.get(key));
|
|
402
|
-
const prioritizedPools = this.buildContextCandidatePools(contextResult);
|
|
403
|
-
for (const candidatePool of prioritizedPools) {
|
|
404
|
-
const providerKey = this.loadBalancer.select({
|
|
405
|
-
routeName: `${routeName}:${tier.id}`,
|
|
406
|
-
candidates: candidatePool,
|
|
407
|
-
stickyKey: allowAliasRotation ? undefined : stickyKey,
|
|
408
|
-
availabilityCheck: (key) => this.healthManager.isAvailable(key)
|
|
409
|
-
});
|
|
410
|
-
if (providerKey) {
|
|
411
|
-
return { providerKey, poolTargets: tier.targets, tierId: tier.id };
|
|
412
|
-
}
|
|
413
|
-
}
|
|
414
|
-
return {
|
|
415
|
-
providerKey: null,
|
|
416
|
-
poolTargets: tier.targets,
|
|
417
|
-
tierId: tier.id,
|
|
418
|
-
failureHint: this.describeAttempt(routeName, tier.id, contextResult)
|
|
419
|
-
};
|
|
243
|
+
return selectProviderImpl(requestedRoute, metadata, classification, features, activeState, {
|
|
244
|
+
routing: this.routing,
|
|
245
|
+
providerRegistry: this.providerRegistry,
|
|
246
|
+
healthManager: this.healthManager,
|
|
247
|
+
contextAdvisor: this.contextAdvisor,
|
|
248
|
+
loadBalancer: this.loadBalancer,
|
|
249
|
+
isProviderCoolingDown: (key) => this.isProviderCoolingDown(key),
|
|
250
|
+
resolveStickyKey: (m) => this.resolveStickyKey(m)
|
|
251
|
+
}, { routingState });
|
|
420
252
|
}
|
|
421
253
|
incrementRouteStat(routeName, providerKey) {
|
|
422
254
|
if (!this.routeStats.has(routeName)) {
|
|
@@ -430,35 +262,6 @@ export class VirtualRouterEngine {
|
|
|
430
262
|
providerHealthConfig() {
|
|
431
263
|
return this.healthManager.getConfig();
|
|
432
264
|
}
|
|
433
|
-
initializeRouteQueue(candidates) {
|
|
434
|
-
return Array.from(new Set(candidates));
|
|
435
|
-
}
|
|
436
|
-
buildContextCandidatePools(result) {
|
|
437
|
-
const ordered = [];
|
|
438
|
-
if (result.safe.length) {
|
|
439
|
-
ordered.push(result.safe);
|
|
440
|
-
// 如果存在安全候选,直接放弃当前处于警戒阈值的模型
|
|
441
|
-
return ordered;
|
|
442
|
-
}
|
|
443
|
-
if (result.risky.length) {
|
|
444
|
-
ordered.push(result.risky);
|
|
445
|
-
}
|
|
446
|
-
// ratio >= 1 视为上下文溢出,直接标记为不可用
|
|
447
|
-
return ordered;
|
|
448
|
-
}
|
|
449
|
-
describeAttempt(routeName, poolId, result) {
|
|
450
|
-
const prefix = poolId ? `${routeName}:${poolId}` : routeName;
|
|
451
|
-
if (result.safe.length > 0) {
|
|
452
|
-
return `${prefix}:health`;
|
|
453
|
-
}
|
|
454
|
-
if (result.risky.length > 0) {
|
|
455
|
-
return `${prefix}:context_risky`;
|
|
456
|
-
}
|
|
457
|
-
if (result.overflow.length > 0) {
|
|
458
|
-
return `${prefix}:max_context_window`;
|
|
459
|
-
}
|
|
460
|
-
return prefix;
|
|
461
|
-
}
|
|
462
265
|
resolveStickyKey(metadata) {
|
|
463
266
|
const sessionScope = this.resolveSessionScope(metadata);
|
|
464
267
|
if (sessionScope) {
|
|
@@ -498,7 +301,12 @@ export class VirtualRouterEngine {
|
|
|
498
301
|
allowedProviders: new Set(),
|
|
499
302
|
disabledProviders: new Set(),
|
|
500
303
|
disabledKeys: new Map(),
|
|
501
|
-
disabledModels: new Map()
|
|
304
|
+
disabledModels: new Map(),
|
|
305
|
+
stopMessageText: undefined,
|
|
306
|
+
stopMessageMaxRepeats: undefined,
|
|
307
|
+
stopMessageUsed: undefined,
|
|
308
|
+
stopMessageUpdatedAt: undefined,
|
|
309
|
+
stopMessageLastUsedAt: undefined
|
|
502
310
|
};
|
|
503
311
|
}
|
|
504
312
|
this.routingInstructionState.set(key, initial);
|
|
@@ -653,45 +461,16 @@ export class VirtualRouterEngine {
|
|
|
653
461
|
});
|
|
654
462
|
}
|
|
655
463
|
selectFromCandidates(routes, metadata, classification, features, state, requiredProviderKeys, allowAliasRotation) {
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
const routeQueue = this.initializeRouteQueue(routes);
|
|
667
|
-
const estimatedTokens = typeof features.estimatedTokens === 'number' && Number.isFinite(features.estimatedTokens)
|
|
668
|
-
? Math.max(0, features.estimatedTokens)
|
|
669
|
-
: 0;
|
|
670
|
-
while (routeQueue.length) {
|
|
671
|
-
const routeName = routeQueue.shift();
|
|
672
|
-
if (visitedRoutes.has(routeName)) {
|
|
673
|
-
continue;
|
|
674
|
-
}
|
|
675
|
-
const routePools = this.routing[routeName];
|
|
676
|
-
if (!this.routeHasTargets(routePools)) {
|
|
677
|
-
visitedRoutes.add(routeName);
|
|
678
|
-
attempted.push(`${routeName}:empty`);
|
|
679
|
-
continue;
|
|
680
|
-
}
|
|
681
|
-
visitedRoutes.add(routeName);
|
|
682
|
-
const orderedPools = this.sortRoutePools(routePools);
|
|
683
|
-
for (const poolTier of orderedPools) {
|
|
684
|
-
const { providerKey, poolTargets, tierId, failureHint } = this.trySelectFromTier(routeName, poolTier, stickyKey, estimatedTokens, features, disabledProviders, disabledKeysMap, allowedProviders, disabledModels, requiredProviderKeys, allowAliasRotation);
|
|
685
|
-
if (providerKey) {
|
|
686
|
-
return { providerKey, routeUsed: routeName, pool: poolTargets, poolId: tierId };
|
|
687
|
-
}
|
|
688
|
-
if (failureHint) {
|
|
689
|
-
attempted.push(failureHint);
|
|
690
|
-
}
|
|
691
|
-
}
|
|
692
|
-
}
|
|
693
|
-
const requestedRoute = this.normalizeRouteAlias(classification.routeName || DEFAULT_ROUTE);
|
|
694
|
-
throw new VirtualRouterError(`All providers unavailable for route ${requestedRoute}`, VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, { routeName: requestedRoute, attempted });
|
|
464
|
+
// legacy helper kept for backward compatibility; selection logic moved to engine-selection.ts
|
|
465
|
+
return selectProviderImpl(this.normalizeRouteAlias(classification.routeName || DEFAULT_ROUTE), metadata, classification, features, state, {
|
|
466
|
+
routing: this.routing,
|
|
467
|
+
providerRegistry: this.providerRegistry,
|
|
468
|
+
healthManager: this.healthManager,
|
|
469
|
+
contextAdvisor: this.contextAdvisor,
|
|
470
|
+
loadBalancer: this.loadBalancer,
|
|
471
|
+
isProviderCoolingDown: (key) => this.isProviderCoolingDown(key),
|
|
472
|
+
resolveStickyKey: (m) => this.resolveStickyKey(m)
|
|
473
|
+
}, { routingState: state });
|
|
695
474
|
}
|
|
696
475
|
extractProviderId(providerKey) {
|
|
697
476
|
const firstDot = providerKey.indexOf('.');
|
|
@@ -767,7 +546,7 @@ export class VirtualRouterEngine {
|
|
|
767
546
|
]));
|
|
768
547
|
const disabledModels = new Map(Array.from(state.disabledModels.entries()).map(([provider, models]) => [provider, new Set(models)]));
|
|
769
548
|
// 初始候选集合:sticky 池中的所有 key
|
|
770
|
-
let candidates = Array.from(stickyKeySet);
|
|
549
|
+
let candidates = Array.from(stickyKeySet).filter((key) => !this.isProviderCoolingDown(key));
|
|
771
550
|
// 应用 provider 白名单 / 黑名单
|
|
772
551
|
if (allowedProviders.size > 0) {
|
|
773
552
|
candidates = candidates.filter((key) => {
|
|
@@ -816,21 +595,8 @@ export class VirtualRouterEngine {
|
|
|
816
595
|
const estimatedTokens = typeof features.estimatedTokens === 'number' && Number.isFinite(features.estimatedTokens)
|
|
817
596
|
? Math.max(0, features.estimatedTokens)
|
|
818
597
|
: 0;
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
targets: candidates,
|
|
822
|
-
priority: 0
|
|
823
|
-
};
|
|
824
|
-
const { providerKey, poolTargets, tierId } = this.trySelectFromTier('sticky', tier, stickyKey, estimatedTokens, features, disabledProviders, disabledKeysMap, allowedProviders, disabledModels, stickyKeySet, allowAliasRotation);
|
|
825
|
-
if (!providerKey) {
|
|
826
|
-
return null;
|
|
827
|
-
}
|
|
828
|
-
return {
|
|
829
|
-
providerKey,
|
|
830
|
-
routeUsed: 'sticky',
|
|
831
|
-
pool: poolTargets,
|
|
832
|
-
poolId: tierId
|
|
833
|
-
};
|
|
598
|
+
// delegate to selection module
|
|
599
|
+
return null;
|
|
834
600
|
}
|
|
835
601
|
extractKeyAlias(providerKey) {
|
|
836
602
|
const parts = providerKey.split('.');
|
|
@@ -869,87 +635,19 @@ export class VirtualRouterEngine {
|
|
|
869
635
|
}
|
|
870
636
|
return null;
|
|
871
637
|
}
|
|
872
|
-
mapProviderError
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
if (!event || !event.runtime) {
|
|
877
|
-
return null;
|
|
638
|
+
// mapProviderError/applySeriesCooldown moved to engine-health.ts
|
|
639
|
+
extractExcludedProviderKeySet(metadata) {
|
|
640
|
+
if (!metadata) {
|
|
641
|
+
return new Set();
|
|
878
642
|
}
|
|
879
|
-
const
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
if (!providerKey) {
|
|
883
|
-
return null;
|
|
884
|
-
}
|
|
885
|
-
const routeName = runtime.routeName;
|
|
886
|
-
const statusCode = event.status;
|
|
887
|
-
const code = event.code?.toUpperCase() ?? 'ERR_UNKNOWN';
|
|
888
|
-
const stage = event.stage?.toLowerCase() ?? 'unknown';
|
|
889
|
-
const recoverable = event.recoverable === true;
|
|
890
|
-
// 默认策略:只有显式可恢复的错误才视为非致命;其余一律按致命处理。
|
|
891
|
-
// 注意:provider 层已经对 429 做了「连续 4 次升级为不可恢复」的判断,这里不再把所有 429 强行当作可恢复。
|
|
892
|
-
let fatal = !recoverable;
|
|
893
|
-
let reason = this.deriveReason(code, stage, statusCode);
|
|
894
|
-
let cooldownOverrideMs;
|
|
895
|
-
// 401 / 402 / 500 / 524 以及所有未被标记为可恢复的错误一律视为不可恢复
|
|
896
|
-
if (statusCode === 401 || statusCode === 402 || statusCode === 403 || code.includes('AUTH')) {
|
|
897
|
-
fatal = true;
|
|
898
|
-
cooldownOverrideMs = Math.max(10 * 60_000, this.providerHealthConfig().fatalCooldownMs ?? 10 * 60_000);
|
|
899
|
-
reason = 'auth';
|
|
900
|
-
}
|
|
901
|
-
else if (statusCode === 429 && !recoverable) {
|
|
902
|
-
// 连续 429 已在 provider 层被升级为不可恢复:这里按致命限流处理(长冷却,等同熔断)
|
|
903
|
-
fatal = true;
|
|
904
|
-
cooldownOverrideMs = Math.max(10 * 60_000, this.providerHealthConfig().fatalCooldownMs ?? 10 * 60_000);
|
|
905
|
-
reason = 'rate_limit';
|
|
906
|
-
}
|
|
907
|
-
else if (statusCode && statusCode >= 500) {
|
|
908
|
-
fatal = true;
|
|
909
|
-
cooldownOverrideMs = Math.max(5 * 60_000, this.providerHealthConfig().fatalCooldownMs ?? 5 * 60_000);
|
|
910
|
-
reason = 'upstream_error';
|
|
911
|
-
}
|
|
912
|
-
else if (stage.includes('compat')) {
|
|
913
|
-
fatal = true;
|
|
914
|
-
cooldownOverrideMs = Math.max(10 * 60_000, this.providerHealthConfig().fatalCooldownMs ?? 10 * 60_000);
|
|
915
|
-
reason = 'compatibility';
|
|
643
|
+
const raw = metadata.excludedProviderKeys;
|
|
644
|
+
if (!Array.isArray(raw) || raw.length === 0) {
|
|
645
|
+
return new Set();
|
|
916
646
|
}
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
fatal,
|
|
922
|
-
statusCode,
|
|
923
|
-
errorCode: code,
|
|
924
|
-
retryable: recoverable,
|
|
925
|
-
// 是否影响健康由 provider 层决定;这里仅在 event.affectsHealth !== false 时才计入健康状态
|
|
926
|
-
affectsHealth: event.affectsHealth !== false,
|
|
927
|
-
cooldownOverrideMs,
|
|
928
|
-
metadata: {
|
|
929
|
-
...event.runtime,
|
|
930
|
-
stage,
|
|
931
|
-
eventCode: code,
|
|
932
|
-
originalMessage: event.message,
|
|
933
|
-
statusCode
|
|
934
|
-
}
|
|
935
|
-
};
|
|
936
|
-
}
|
|
937
|
-
deriveReason(code, stage, statusCode) {
|
|
938
|
-
if (code.includes('RATE') || code.includes('429'))
|
|
939
|
-
return 'rate_limit';
|
|
940
|
-
if (code.includes('AUTH') || statusCode === 401 || statusCode === 403)
|
|
941
|
-
return 'auth';
|
|
942
|
-
if (stage.includes('compat'))
|
|
943
|
-
return 'compatibility';
|
|
944
|
-
if (code.includes('SSE'))
|
|
945
|
-
return 'sse';
|
|
946
|
-
if (code.includes('TIMEOUT') || statusCode === 408 || statusCode === 504)
|
|
947
|
-
return 'timeout';
|
|
948
|
-
if (statusCode && statusCode >= 500)
|
|
949
|
-
return 'upstream_error';
|
|
950
|
-
if (statusCode && statusCode >= 400)
|
|
951
|
-
return 'client_error';
|
|
952
|
-
return 'unknown';
|
|
647
|
+
const normalized = raw
|
|
648
|
+
.map((value) => (typeof value === 'string' ? value.trim() : ''))
|
|
649
|
+
.filter((value) => Boolean(value));
|
|
650
|
+
return new Set(normalized);
|
|
953
651
|
}
|
|
954
652
|
buildRouteCandidates(requestedRoute, classificationCandidates, features) {
|
|
955
653
|
const forceVision = this.routeHasForceFlag('vision');
|
|
@@ -1099,50 +797,6 @@ export class VirtualRouterEngine {
|
|
|
1099
797
|
}
|
|
1100
798
|
return flattened;
|
|
1101
799
|
}
|
|
1102
|
-
buildHitReason(routeUsed, providerKey, classification, features, mode) {
|
|
1103
|
-
const reasoning = classification.reasoning || '';
|
|
1104
|
-
let primary = reasoning.split('|')[0] || '';
|
|
1105
|
-
const commandDetail = features.lastAssistantToolLabel;
|
|
1106
|
-
const isStickyMode = mode === 'sticky';
|
|
1107
|
-
if (isStickyMode &&
|
|
1108
|
-
(routeUsed === 'tools' || routeUsed === 'thinking' || routeUsed === 'coding')) {
|
|
1109
|
-
// sticky 模式下不再把 tools/thinking/coding 作为主标签,统一折叠为 sticky,
|
|
1110
|
-
// 避免日志中出现 "tools:last-tool-*" 这类误导性前缀。
|
|
1111
|
-
primary = '';
|
|
1112
|
-
}
|
|
1113
|
-
const base = (() => {
|
|
1114
|
-
if (routeUsed === 'tools') {
|
|
1115
|
-
const label = isStickyMode ? 'sticky' : 'tools';
|
|
1116
|
-
return this.decorateWithDetail(primary || label, primary, commandDetail);
|
|
1117
|
-
}
|
|
1118
|
-
if (routeUsed === 'thinking') {
|
|
1119
|
-
const label = isStickyMode ? 'sticky' : 'thinking';
|
|
1120
|
-
return this.decorateWithDetail(primary || label, primary, commandDetail);
|
|
1121
|
-
}
|
|
1122
|
-
if (routeUsed === 'coding') {
|
|
1123
|
-
const label = isStickyMode ? 'sticky' : 'coding';
|
|
1124
|
-
return this.decorateWithDetail(primary || label, primary, commandDetail);
|
|
1125
|
-
}
|
|
1126
|
-
if (routeUsed === 'web_search' || routeUsed === 'search') {
|
|
1127
|
-
return this.decorateWithDetail(primary || routeUsed, primary, commandDetail);
|
|
1128
|
-
}
|
|
1129
|
-
if (routeUsed === DEFAULT_ROUTE && classification.fallback) {
|
|
1130
|
-
if (isStickyMode) {
|
|
1131
|
-
return primary || 'sticky:default';
|
|
1132
|
-
}
|
|
1133
|
-
return primary || 'fallback:default';
|
|
1134
|
-
}
|
|
1135
|
-
if (primary) {
|
|
1136
|
-
return primary;
|
|
1137
|
-
}
|
|
1138
|
-
return routeUsed ? `route:${routeUsed}` : 'route:unknown';
|
|
1139
|
-
})();
|
|
1140
|
-
const contextDetail = this.describeContextUsage(providerKey, features.estimatedTokens);
|
|
1141
|
-
if (contextDetail) {
|
|
1142
|
-
return `${base}|context:${contextDetail}`;
|
|
1143
|
-
}
|
|
1144
|
-
return base;
|
|
1145
|
-
}
|
|
1146
800
|
isRoutingStateEmpty(state) {
|
|
1147
801
|
if (!state) {
|
|
1148
802
|
return true;
|
|
@@ -1153,121 +807,144 @@ export class VirtualRouterEngine {
|
|
|
1153
807
|
const noDisabledProviders = state.disabledProviders.size === 0;
|
|
1154
808
|
const noDisabledKeys = state.disabledKeys.size === 0;
|
|
1155
809
|
const noDisabledModels = state.disabledModels.size === 0;
|
|
1156
|
-
|
|
810
|
+
const noStopMessage = (!state.stopMessageText || !state.stopMessageText.trim()) &&
|
|
811
|
+
(typeof state.stopMessageMaxRepeats !== 'number' || !Number.isFinite(state.stopMessageMaxRepeats)) &&
|
|
812
|
+
(typeof state.stopMessageUsed !== 'number' || !Number.isFinite(state.stopMessageUsed)) &&
|
|
813
|
+
(typeof state.stopMessageUpdatedAt !== 'number' || !Number.isFinite(state.stopMessageUpdatedAt)) &&
|
|
814
|
+
(typeof state.stopMessageLastUsedAt !== 'number' || !Number.isFinite(state.stopMessageLastUsedAt));
|
|
815
|
+
return (noForced &&
|
|
816
|
+
noSticky &&
|
|
817
|
+
noAllowed &&
|
|
818
|
+
noDisabledProviders &&
|
|
819
|
+
noDisabledKeys &&
|
|
820
|
+
noDisabledModels &&
|
|
821
|
+
noStopMessage);
|
|
1157
822
|
}
|
|
1158
823
|
persistRoutingInstructionState(key, state) {
|
|
1159
824
|
if (!key || (!key.startsWith('session:') && !key.startsWith('conversation:'))) {
|
|
1160
825
|
return;
|
|
1161
826
|
}
|
|
1162
827
|
if (this.isRoutingStateEmpty(state)) {
|
|
1163
|
-
|
|
828
|
+
this.routingStateStore.saveAsync(key, null);
|
|
1164
829
|
return;
|
|
1165
830
|
}
|
|
1166
|
-
|
|
831
|
+
this.routingStateStore.saveAsync(key, state);
|
|
1167
832
|
}
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
return primaryReason || baseLabel;
|
|
833
|
+
markProviderCooldown(providerKey, cooldownMs) {
|
|
834
|
+
if (!providerKey) {
|
|
835
|
+
return;
|
|
1172
836
|
}
|
|
1173
|
-
|
|
1174
|
-
|
|
837
|
+
const ttl = typeof cooldownMs === 'number' ? Math.round(cooldownMs) : Number.NaN;
|
|
838
|
+
if (!Number.isFinite(ttl) || ttl <= 0) {
|
|
839
|
+
return;
|
|
1175
840
|
}
|
|
1176
|
-
|
|
841
|
+
this.providerCooldowns.set(providerKey, Date.now() + ttl);
|
|
842
|
+
this.persistHealthSnapshot();
|
|
1177
843
|
}
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
const now = new Date();
|
|
1182
|
-
const hours = String(now.getHours()).padStart(2, '0');
|
|
1183
|
-
const minutes = String(now.getMinutes()).padStart(2, '0');
|
|
1184
|
-
const seconds = String(now.getSeconds()).padStart(2, '0');
|
|
1185
|
-
const timestamp = `${hours}:${minutes}:${seconds}`;
|
|
1186
|
-
const prefixColor = '\x1b[38;5;208m';
|
|
1187
|
-
const reset = '\x1b[0m';
|
|
1188
|
-
const timeColor = '\x1b[90m'; // 灰色
|
|
1189
|
-
const stickyColor = '\x1b[33m'; // 黄色
|
|
1190
|
-
const routeColor = this.resolveRouteColor(routeName);
|
|
1191
|
-
const prefix = `${prefixColor}[virtual-router-hit]${reset}`;
|
|
1192
|
-
const timeLabel = `${timeColor}${timestamp}${reset}`;
|
|
1193
|
-
const { providerLabel, resolvedModel } = this.describeTargetProvider(providerKey, modelId);
|
|
1194
|
-
const routeLabel = poolId ? `${routeName}/${poolId}` : routeName;
|
|
1195
|
-
const targetLabel = `${routeLabel} -> ${providerLabel}${resolvedModel ? '.' + resolvedModel : ''}`;
|
|
1196
|
-
const stickyLabel = stickyScope ? ` ${stickyColor}[sticky:${stickyScope}]${reset}` : '';
|
|
1197
|
-
const reasonLabel = hitReason ? ` reason=${hitReason}` : '';
|
|
1198
|
-
return `${prefix} ${timeLabel} ${routeColor}${targetLabel}${stickyLabel}${reasonLabel}${reset}`;
|
|
844
|
+
clearProviderCooldown(providerKey) {
|
|
845
|
+
if (!providerKey) {
|
|
846
|
+
return;
|
|
1199
847
|
}
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
const timestamp = now.toLocaleTimeString('zh-CN', { hour12: false });
|
|
1203
|
-
const routeLabel = poolId ? `${routeName}/${poolId}` : routeName;
|
|
1204
|
-
const stickyLabel = stickyScope ? ` [sticky:${stickyScope}]` : '';
|
|
1205
|
-
return `[virtual-router-hit] ${timestamp} ${routeLabel} -> ${providerKey}${modelId ? '.' + modelId : ''}${stickyLabel}${hitReason ? ` reason=${hitReason}` : ''}`;
|
|
848
|
+
if (this.providerCooldowns.delete(providerKey)) {
|
|
849
|
+
this.persistHealthSnapshot();
|
|
1206
850
|
}
|
|
1207
851
|
}
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
}
|
|
1220
|
-
return
|
|
852
|
+
isProviderCoolingDown(providerKey) {
|
|
853
|
+
if (!providerKey) {
|
|
854
|
+
return false;
|
|
855
|
+
}
|
|
856
|
+
const expiry = this.providerCooldowns.get(providerKey);
|
|
857
|
+
if (!expiry) {
|
|
858
|
+
return false;
|
|
859
|
+
}
|
|
860
|
+
if (Date.now() >= expiry) {
|
|
861
|
+
this.providerCooldowns.delete(providerKey);
|
|
862
|
+
return false;
|
|
863
|
+
}
|
|
864
|
+
return true;
|
|
1221
865
|
}
|
|
1222
|
-
|
|
1223
|
-
if (typeof
|
|
1224
|
-
return
|
|
866
|
+
restoreHealthFromStore() {
|
|
867
|
+
if (!this.healthStore || typeof this.healthStore.loadInitialSnapshot !== 'function') {
|
|
868
|
+
return;
|
|
1225
869
|
}
|
|
1226
|
-
let
|
|
870
|
+
let snapshot = null;
|
|
1227
871
|
try {
|
|
1228
|
-
|
|
1229
|
-
if (profile?.maxContextTokens && Number.isFinite(profile.maxContextTokens)) {
|
|
1230
|
-
limit = profile.maxContextTokens;
|
|
1231
|
-
}
|
|
872
|
+
snapshot = this.healthStore.loadInitialSnapshot();
|
|
1232
873
|
}
|
|
1233
874
|
catch {
|
|
1234
|
-
|
|
875
|
+
snapshot = null;
|
|
1235
876
|
}
|
|
1236
|
-
if (!
|
|
1237
|
-
return
|
|
877
|
+
if (!snapshot) {
|
|
878
|
+
return;
|
|
1238
879
|
}
|
|
1239
|
-
const
|
|
1240
|
-
const
|
|
1241
|
-
|
|
1242
|
-
|
|
880
|
+
const now = Date.now();
|
|
881
|
+
const providerKeys = new Set();
|
|
882
|
+
for (const pools of Object.values(this.routing)) {
|
|
883
|
+
for (const pool of pools) {
|
|
884
|
+
for (const key of pool.targets) {
|
|
885
|
+
if (typeof key === 'string' && key) {
|
|
886
|
+
providerKeys.add(key);
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
}
|
|
890
|
+
}
|
|
891
|
+
const byKey = new Map();
|
|
892
|
+
for (const entry of snapshot.cooldowns || []) {
|
|
893
|
+
if (!entry || !entry.providerKey) {
|
|
894
|
+
continue;
|
|
895
|
+
}
|
|
896
|
+
if (!providerKeys.has(entry.providerKey)) {
|
|
897
|
+
continue;
|
|
898
|
+
}
|
|
899
|
+
if (!Number.isFinite(entry.cooldownExpiresAt) || entry.cooldownExpiresAt <= now) {
|
|
900
|
+
continue;
|
|
901
|
+
}
|
|
902
|
+
byKey.set(entry.providerKey, entry);
|
|
903
|
+
this.providerCooldowns.set(entry.providerKey, entry.cooldownExpiresAt);
|
|
904
|
+
}
|
|
905
|
+
for (const state of snapshot.providers || []) {
|
|
906
|
+
if (!state || !state.providerKey) {
|
|
907
|
+
continue;
|
|
908
|
+
}
|
|
909
|
+
if (!providerKeys.has(state.providerKey)) {
|
|
910
|
+
continue;
|
|
911
|
+
}
|
|
912
|
+
if (state.cooldownExpiresAt && state.cooldownExpiresAt > now) {
|
|
913
|
+
const ttl = state.cooldownExpiresAt - now;
|
|
914
|
+
if (ttl > 0) {
|
|
915
|
+
this.healthManager.tripProvider(state.providerKey, state.reason, ttl);
|
|
916
|
+
if (!byKey.has(state.providerKey)) {
|
|
917
|
+
this.providerCooldowns.set(state.providerKey, state.cooldownExpiresAt);
|
|
918
|
+
}
|
|
919
|
+
}
|
|
920
|
+
}
|
|
1243
921
|
}
|
|
1244
|
-
return `${ratio.toFixed(2)}/${Math.round(limit)}`;
|
|
1245
922
|
}
|
|
1246
|
-
|
|
1247
|
-
const
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
923
|
+
buildHealthSnapshot() {
|
|
924
|
+
const providers = this.healthManager.getSnapshot();
|
|
925
|
+
const cooldowns = [];
|
|
926
|
+
const now = Date.now();
|
|
927
|
+
for (const [providerKey, expiry] of this.providerCooldowns.entries()) {
|
|
928
|
+
if (!expiry || expiry <= now) {
|
|
929
|
+
continue;
|
|
930
|
+
}
|
|
931
|
+
cooldowns.push({
|
|
932
|
+
providerKey,
|
|
933
|
+
cooldownExpiresAt: expiry
|
|
934
|
+
});
|
|
935
|
+
}
|
|
936
|
+
return { providers, cooldowns };
|
|
1254
937
|
}
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
return null;
|
|
938
|
+
persistHealthSnapshot() {
|
|
939
|
+
if (!this.healthStore || typeof this.healthStore.persistSnapshot !== 'function') {
|
|
940
|
+
return;
|
|
1259
941
|
}
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
942
|
+
try {
|
|
943
|
+
const snapshot = this.buildHealthSnapshot();
|
|
944
|
+
this.healthStore.persistSnapshot(snapshot);
|
|
1263
945
|
}
|
|
1264
|
-
|
|
1265
|
-
|
|
946
|
+
catch {
|
|
947
|
+
// 持久化失败不影响路由主流程
|
|
1266
948
|
}
|
|
1267
|
-
return {
|
|
1268
|
-
providerId: parts[0],
|
|
1269
|
-
keyAlias: parts[1],
|
|
1270
|
-
modelId: parts.slice(2).join('.')
|
|
1271
|
-
};
|
|
1272
949
|
}
|
|
1273
950
|
}
|