@jsonstudio/rcc 0.89.555 → 0.89.611

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/dist/build-info.js +2 -2
  2. package/dist/modules/llmswitch/bridge.d.ts +43 -0
  3. package/dist/modules/llmswitch/bridge.js +103 -0
  4. package/dist/modules/llmswitch/bridge.js.map +1 -1
  5. package/dist/monitoring/semantic-config-loader.js +3 -1
  6. package/dist/monitoring/semantic-config-loader.js.map +1 -1
  7. package/dist/providers/core/runtime/http-transport-provider.d.ts +3 -0
  8. package/dist/providers/core/runtime/http-transport-provider.js +70 -4
  9. package/dist/providers/core/runtime/http-transport-provider.js.map +1 -1
  10. package/dist/providers/core/runtime/responses-provider.d.ts +2 -2
  11. package/dist/providers/core/runtime/responses-provider.js +33 -28
  12. package/dist/providers/core/runtime/responses-provider.js.map +1 -1
  13. package/dist/providers/core/utils/provider-error-reporter.js +7 -7
  14. package/dist/providers/core/utils/provider-error-reporter.js.map +1 -1
  15. package/dist/providers/core/utils/snapshot-writer.js +6 -2
  16. package/dist/providers/core/utils/snapshot-writer.js.map +1 -1
  17. package/dist/server/runtime/http-server/index.js +59 -47
  18. package/dist/server/runtime/http-server/index.js.map +1 -1
  19. package/dist/server/runtime/http-server/llmswitch-loader.d.ts +0 -1
  20. package/dist/server/runtime/http-server/llmswitch-loader.js +17 -21
  21. package/dist/server/runtime/http-server/llmswitch-loader.js.map +1 -1
  22. package/dist/server/runtime/http-server/request-executor.d.ts +6 -0
  23. package/dist/server/runtime/http-server/request-executor.js +113 -37
  24. package/dist/server/runtime/http-server/request-executor.js.map +1 -1
  25. package/node_modules/@jsonstudio/llms/dist/conversion/codecs/gemini-openai-codec.js +15 -1
  26. package/node_modules/@jsonstudio/llms/dist/conversion/compat/actions/iflow-web-search.d.ts +18 -0
  27. package/node_modules/@jsonstudio/llms/dist/conversion/compat/actions/iflow-web-search.js +87 -0
  28. package/node_modules/@jsonstudio/llms/dist/conversion/compat/profiles/chat-gemini.json +14 -15
  29. package/node_modules/@jsonstudio/llms/dist/conversion/compat/profiles/chat-glm.json +194 -190
  30. package/node_modules/@jsonstudio/llms/dist/conversion/compat/profiles/chat-iflow.json +199 -195
  31. package/node_modules/@jsonstudio/llms/dist/conversion/compat/profiles/chat-lmstudio.json +43 -43
  32. package/node_modules/@jsonstudio/llms/dist/conversion/compat/profiles/chat-qwen.json +20 -20
  33. package/node_modules/@jsonstudio/llms/dist/conversion/compat/profiles/responses-c4m.json +42 -42
  34. package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/compat/compat-pipeline-executor.js +6 -0
  35. package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/compat/compat-types.d.ts +2 -0
  36. package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/hub-pipeline.js +5 -1
  37. package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/session-identifiers.d.ts +9 -0
  38. package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/session-identifiers.js +76 -0
  39. package/node_modules/@jsonstudio/llms/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage1_sse_decode/index.js +31 -2
  40. package/node_modules/@jsonstudio/llms/dist/conversion/hub/process/chat-process.js +89 -25
  41. package/node_modules/@jsonstudio/llms/dist/conversion/responses/responses-openai-bridge.js +75 -4
  42. package/node_modules/@jsonstudio/llms/dist/conversion/shared/anthropic-message-utils.js +41 -6
  43. package/node_modules/@jsonstudio/llms/dist/conversion/shared/errors.d.ts +20 -0
  44. package/node_modules/@jsonstudio/llms/dist/conversion/shared/errors.js +28 -0
  45. package/node_modules/@jsonstudio/llms/dist/conversion/shared/responses-conversation-store.js +30 -3
  46. package/node_modules/@jsonstudio/llms/dist/conversion/shared/responses-output-builder.js +68 -6
  47. package/node_modules/@jsonstudio/llms/dist/filters/special/request-toolcalls-stringify.d.ts +13 -0
  48. package/node_modules/@jsonstudio/llms/dist/filters/special/request-toolcalls-stringify.js +103 -3
  49. package/node_modules/@jsonstudio/llms/dist/filters/special/response-tool-text-canonicalize.d.ts +16 -0
  50. package/node_modules/@jsonstudio/llms/dist/filters/special/response-tool-text-canonicalize.js +27 -3
  51. package/node_modules/@jsonstudio/llms/dist/router/virtual-router/classifier.js +4 -2
  52. package/node_modules/@jsonstudio/llms/dist/router/virtual-router/engine.d.ts +30 -0
  53. package/node_modules/@jsonstudio/llms/dist/router/virtual-router/engine.js +618 -42
  54. package/node_modules/@jsonstudio/llms/dist/router/virtual-router/health-manager.d.ts +23 -0
  55. package/node_modules/@jsonstudio/llms/dist/router/virtual-router/health-manager.js +14 -0
  56. package/node_modules/@jsonstudio/llms/dist/router/virtual-router/provider-registry.d.ts +15 -0
  57. package/node_modules/@jsonstudio/llms/dist/router/virtual-router/provider-registry.js +40 -0
  58. package/node_modules/@jsonstudio/llms/dist/router/virtual-router/routing-instructions.d.ts +34 -0
  59. package/node_modules/@jsonstudio/llms/dist/router/virtual-router/routing-instructions.js +393 -0
  60. package/node_modules/@jsonstudio/llms/dist/router/virtual-router/sticky-session-store.d.ts +3 -0
  61. package/node_modules/@jsonstudio/llms/dist/router/virtual-router/sticky-session-store.js +110 -0
  62. package/node_modules/@jsonstudio/llms/dist/router/virtual-router/tool-signals.js +0 -22
  63. package/node_modules/@jsonstudio/llms/dist/router/virtual-router/types.d.ts +41 -0
  64. package/node_modules/@jsonstudio/llms/dist/servertool/engine.js +42 -1
  65. package/node_modules/@jsonstudio/llms/dist/servertool/handlers/web-search.js +157 -4
  66. package/node_modules/@jsonstudio/llms/dist/servertool/types.d.ts +6 -0
  67. package/node_modules/@jsonstudio/llms/package.json +1 -1
  68. package/package.json +8 -5
  69. package/scripts/mock-provider/run-regressions.mjs +38 -2
  70. package/scripts/verify-apply-patch.mjs +132 -0
@@ -6,6 +6,8 @@ import { buildRoutingFeatures } from './features.js';
6
6
  import { ContextAdvisor } from './context-advisor.js';
7
7
  import { DEFAULT_MODEL_CONTEXT_TOKENS, DEFAULT_ROUTE, ROUTE_PRIORITY, VirtualRouterError, VirtualRouterErrorCode } from './types.js';
8
8
  import { getStatsCenter } from '../../telemetry/stats-center.js';
9
+ import { parseRoutingInstructions, applyRoutingInstructions, cleanMessagesFromRoutingInstructions } from './routing-instructions.js';
10
+ import { loadRoutingInstructionStateSync, saveRoutingInstructionStateAsync } from './sticky-session-store.js';
9
11
  export class VirtualRouterEngine {
10
12
  routing = {};
11
13
  providerRegistry = new ProviderRegistry();
@@ -20,6 +22,7 @@ export class VirtualRouterEngine {
20
22
  statsCenter = getStatsCenter();
21
23
  // Derived flags from VirtualRouterConfig/routing used by process / response layers.
22
24
  webSearchForce = false;
25
+ routingInstructionState = new Map();
23
26
  initialize(config) {
24
27
  this.validateConfig(config);
25
28
  this.routing = config.routing;
@@ -38,6 +41,22 @@ export class VirtualRouterEngine {
38
41
  }
39
42
  }
40
43
  route(request, metadata) {
44
+ const stickyKey = this.resolveStickyKey(metadata);
45
+ const baseState = this.getRoutingInstructionState(stickyKey);
46
+ let routingState = baseState;
47
+ const metadataInstructions = this.buildMetadataInstructions(metadata);
48
+ if (metadataInstructions.length > 0) {
49
+ routingState = applyRoutingInstructions(metadataInstructions, routingState);
50
+ }
51
+ const instructions = parseRoutingInstructions(request.messages);
52
+ if (instructions.length > 0) {
53
+ routingState = applyRoutingInstructions(instructions, routingState);
54
+ const effectiveKey = stickyKey || 'default';
55
+ this.routingInstructionState.set(effectiveKey, routingState);
56
+ request.messages = cleanMessagesFromRoutingInstructions(request.messages);
57
+ this.persistRoutingInstructionState(effectiveKey, routingState);
58
+ }
59
+ const routingMode = this.resolveRoutingMode([...metadataInstructions, ...instructions], routingState);
41
60
  const features = buildRoutingFeatures(request, metadata);
42
61
  const classification = metadata.routeHint && metadata.routeHint.trim()
43
62
  ? {
@@ -49,7 +68,7 @@ export class VirtualRouterEngine {
49
68
  }
50
69
  : this.classifier.classify(features);
51
70
  const requestedRoute = this.normalizeRouteAlias(classification.routeName || DEFAULT_ROUTE);
52
- const selection = this.selectProvider(requestedRoute, metadata, classification, features);
71
+ const selection = this.selectProvider(requestedRoute, metadata, classification, features, routingState);
53
72
  const baseTarget = this.providerRegistry.buildTarget(selection.providerKey);
54
73
  const forceVision = this.routeHasForceFlag('vision');
55
74
  const target = {
@@ -73,8 +92,10 @@ export class VirtualRouterEngine {
73
92
  catch {
74
93
  // stats must never break routing
75
94
  }
76
- const hitReason = this.buildHitReason(selection.routeUsed, selection.providerKey, classification, features);
77
- const formatted = this.formatVirtualRouterHit(selection.routeUsed, selection.poolId, selection.providerKey, target.modelId || '', hitReason);
95
+ const hitReason = this.buildHitReason(selection.routeUsed, selection.providerKey, classification, features, routingMode);
96
+ const stickyScope = routingMode !== 'none' ? this.resolveSessionScope(metadata) : undefined;
97
+ const routeForLog = routingMode === 'sticky' ? 'sticky' : selection.routeUsed;
98
+ const formatted = this.formatVirtualRouterHit(routeForLog, selection.poolId, selection.providerKey, target.modelId || '', hitReason, stickyScope);
78
99
  if (formatted) {
79
100
  this.debug?.log?.(formatted);
80
101
  }
@@ -114,6 +135,11 @@ export class VirtualRouterEngine {
114
135
  if (event.fatal) {
115
136
  this.healthManager.tripProvider(event.providerKey, event.reason, event.cooldownOverrideMs);
116
137
  }
138
+ else if (event.reason === 'rate_limit' && event.statusCode === 429) {
139
+ // 对可恢复的 429 错误使用短冷静期:在 cooldownMs 内将该 key 标记为不可用,
140
+ // 以便 Virtual Router 在随后的选路中优先尝试其他 key 或模型。
141
+ this.healthManager.cooldownProvider(event.providerKey, event.reason, event.cooldownOverrideMs);
142
+ }
117
143
  else {
118
144
  this.healthManager.recordFailure(event.providerKey, event.reason);
119
145
  }
@@ -182,42 +208,151 @@ export class VirtualRouterEngine {
182
208
  }
183
209
  }
184
210
  }
185
- selectProvider(requestedRoute, metadata, classification, features) {
186
- const candidates = this.buildRouteCandidates(requestedRoute, classification.candidates, features);
187
- const stickyKey = this.resolveStickyKey(metadata);
188
- const attempted = [];
189
- const visitedRoutes = new Set();
190
- const routeQueue = this.initializeRouteQueue(candidates);
191
- const estimatedTokens = typeof features.estimatedTokens === 'number' && Number.isFinite(features.estimatedTokens)
192
- ? Math.max(0, features.estimatedTokens)
193
- : 0;
194
- while (routeQueue.length) {
195
- const routeName = routeQueue.shift();
196
- if (visitedRoutes.has(routeName)) {
197
- continue;
198
- }
199
- const routePools = this.routing[routeName];
200
- if (!this.routeHasTargets(routePools)) {
201
- visitedRoutes.add(routeName);
202
- attempted.push(`${routeName}:empty`);
203
- continue;
204
- }
205
- visitedRoutes.add(routeName);
206
- const orderedPools = this.sortRoutePools(routePools);
207
- for (const poolTier of orderedPools) {
208
- const { providerKey, poolTargets, tierId, failureHint } = this.trySelectFromTier(routeName, poolTier, stickyKey, estimatedTokens, features);
209
- if (providerKey) {
210
- return { providerKey, routeUsed: routeName, pool: poolTargets, poolId: tierId };
211
+ selectProvider(requestedRoute, metadata, classification, features, routingState) {
212
+ const activeState = routingState || this.getRoutingInstructionState(this.resolveStickyKey(metadata));
213
+ const forcedResolution = activeState.forcedTarget
214
+ ? this.resolveInstructionTarget(activeState.forcedTarget)
215
+ : null;
216
+ if (forcedResolution && forcedResolution.mode === 'exact') {
217
+ const forcedKey = forcedResolution.keys[0];
218
+ return {
219
+ providerKey: forcedKey,
220
+ routeUsed: requestedRoute,
221
+ pool: [forcedKey],
222
+ poolId: 'forced'
223
+ };
224
+ }
225
+ // sticky 语义:
226
+ // - 显式绑定到具体 key(alias/index)时,直接使用该 key;
227
+ // - provider / model 级别 sticky 解析为一组 providerKey;
228
+ // 在 sticky 这组 key「可用」之前,不会回落到 default 中的非 sticky provider。
229
+ let stickyResolution = null;
230
+ let stickyKeySet;
231
+ if (!forcedResolution && activeState.stickyTarget) {
232
+ stickyResolution = this.resolveInstructionTarget(activeState.stickyTarget);
233
+ if (stickyResolution && stickyResolution.mode === 'exact') {
234
+ const stickyKey = stickyResolution.keys[0];
235
+ // 已经被健康管理标记为不可用的 key 不能被 sticky 语法“复活”
236
+ if (this.healthManager.isAvailable(stickyKey)) {
237
+ return {
238
+ providerKey: stickyKey,
239
+ routeUsed: requestedRoute,
240
+ pool: [stickyKey],
241
+ poolId: 'sticky'
242
+ };
211
243
  }
212
- if (failureHint) {
213
- attempted.push(failureHint);
244
+ }
245
+ if (stickyResolution && stickyResolution.mode === 'filter' && stickyResolution.keys.length > 0) {
246
+ // 仅保留当前仍可用的 key;已被熔断/拉黑的 key 不会被 sticky 语法重新加入池子
247
+ const liveKeys = stickyResolution.keys.filter((key) => this.healthManager.isAvailable(key));
248
+ if (liveKeys.length > 0) {
249
+ stickyKeySet = new Set(liveKeys);
214
250
  }
215
251
  }
216
252
  }
217
- throw new VirtualRouterError(`All providers unavailable for route ${requestedRoute}`, VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, { routeName: requestedRoute, attempted });
253
+ const allowAliasRotation = Boolean(activeState.stickyTarget) &&
254
+ !activeState.stickyTarget?.keyAlias &&
255
+ activeState.stickyTarget?.keyIndex === undefined;
256
+ // force(filter) 优先级高于 sticky:显式 force 视为覆盖 sticky 约束。
257
+ if (forcedResolution && forcedResolution.mode === 'filter') {
258
+ const candidates = this.buildRouteCandidates(requestedRoute, classification.candidates, features);
259
+ const filteredCandidates = this.filterCandidatesByRoutingState(candidates, activeState);
260
+ if (filteredCandidates.length === 0) {
261
+ throw new VirtualRouterError('No available providers after applying routing instructions', VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, {
262
+ requestedRoute,
263
+ allowedProviders: Array.from(activeState.allowedProviders),
264
+ disabledProviders: Array.from(activeState.disabledProviders)
265
+ });
266
+ }
267
+ const forcedKeySet = new Set(forcedResolution.keys);
268
+ return this.selectFromCandidates(filteredCandidates, metadata, classification, features, activeState, forcedKeySet, allowAliasRotation);
269
+ }
270
+ if (stickyKeySet && stickyKeySet.size > 0) {
271
+ const stickySelection = this.selectFromStickyPool(stickyKeySet, metadata, features, activeState, allowAliasRotation);
272
+ if (stickySelection) {
273
+ return stickySelection;
274
+ }
275
+ // sticky 池在本次请求中完全不可用(全部被黑名单/健康状态过滤):视为 sticky 池暂时失效,
276
+ // 本次回落到普通路由选择,但保留 stickyTarget,等待后续恢复。
277
+ }
278
+ // 无 sticky,或 sticky 池在本次请求中全部不可用(无可用 key):按原始分类结果执行正常路由选择。
279
+ const candidates = this.buildRouteCandidates(requestedRoute, classification.candidates, features);
280
+ const filteredCandidates = this.filterCandidatesByRoutingState(candidates, activeState);
281
+ if (filteredCandidates.length === 0) {
282
+ throw new VirtualRouterError('No available providers after applying routing instructions', VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, {
283
+ requestedRoute,
284
+ allowedProviders: Array.from(activeState.allowedProviders),
285
+ disabledProviders: Array.from(activeState.disabledProviders)
286
+ });
287
+ }
288
+ return this.selectFromCandidates(filteredCandidates, metadata, classification, features, activeState, undefined, allowAliasRotation);
218
289
  }
219
- trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features) {
290
+ trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features, disabledProviders, disabledKeysMap, allowedProviders, disabledModels, requiredProviderKeys, allowAliasRotation) {
220
291
  let targets = Array.isArray(tier.targets) ? tier.targets : [];
292
+ // 基于本次请求 metadata 中的 excludedProviderKeys 做临时过滤:
293
+ // - 这些 key 仅在当前 route() 调用内被排除,不会写入 sticky 状态;
294
+ // - 主要用于 HTTP 层在同一请求内对 429 失败的 key 进行快速 failover。
295
+ const excludedRaw = features.metadata?.excludedProviderKeys &&
296
+ Array.isArray(features.metadata.excludedProviderKeys)
297
+ ? features.metadata.excludedProviderKeys
298
+ : [];
299
+ const excludedKeys = new Set(excludedRaw
300
+ .map((val) => (typeof val === 'string' ? val.trim() : ''))
301
+ .filter((val) => Boolean(val)));
302
+ if (excludedKeys.size > 0) {
303
+ targets = targets.filter((key) => !excludedKeys.has(key));
304
+ }
305
+ if (allowedProviders && allowedProviders.size > 0) {
306
+ targets = targets.filter(key => {
307
+ const providerId = this.extractProviderId(key);
308
+ return providerId && allowedProviders.has(providerId);
309
+ });
310
+ }
311
+ if (disabledProviders && disabledProviders.size > 0) {
312
+ targets = targets.filter((key) => {
313
+ const providerId = this.extractProviderId(key);
314
+ return providerId && !disabledProviders.has(providerId);
315
+ });
316
+ }
317
+ if (disabledKeysMap && disabledKeysMap.size > 0) {
318
+ targets = targets.filter((key) => {
319
+ const providerId = this.extractProviderId(key);
320
+ if (!providerId)
321
+ return true;
322
+ const disabledKeys = disabledKeysMap.get(providerId);
323
+ if (!disabledKeys || disabledKeys.size === 0)
324
+ return true;
325
+ const keyAlias = this.extractKeyAlias(key);
326
+ const keyIndex = this.extractKeyIndex(key);
327
+ if (keyAlias && disabledKeys.has(keyAlias)) {
328
+ return false;
329
+ }
330
+ if (keyIndex !== undefined && disabledKeys.has(keyIndex + 1)) {
331
+ return false;
332
+ }
333
+ return true;
334
+ });
335
+ }
336
+ if (disabledModels && disabledModels.size > 0) {
337
+ targets = targets.filter((key) => {
338
+ const providerId = this.extractProviderId(key);
339
+ if (!providerId) {
340
+ return true;
341
+ }
342
+ const disabled = disabledModels.get(providerId);
343
+ if (!disabled || disabled.size === 0) {
344
+ return true;
345
+ }
346
+ const modelId = this.getProviderModelId(key);
347
+ if (!modelId) {
348
+ return true;
349
+ }
350
+ return !disabled.has(modelId);
351
+ });
352
+ }
353
+ if (requiredProviderKeys && requiredProviderKeys.size > 0) {
354
+ targets = targets.filter((key) => requiredProviderKeys.has(key));
355
+ }
221
356
  const serverToolRequired = features.metadata?.serverToolRequired === true;
222
357
  if (serverToolRequired) {
223
358
  const filtered = [];
@@ -269,7 +404,7 @@ export class VirtualRouterEngine {
269
404
  const providerKey = this.loadBalancer.select({
270
405
  routeName: `${routeName}:${tier.id}`,
271
406
  candidates: candidatePool,
272
- stickyKey,
407
+ stickyKey: allowAliasRotation ? undefined : stickyKey,
273
408
  availabilityCheck: (key) => this.healthManager.isAvailable(key)
274
409
  });
275
410
  if (providerKey) {
@@ -325,12 +460,415 @@ export class VirtualRouterEngine {
325
460
  return prefix;
326
461
  }
327
462
  resolveStickyKey(metadata) {
463
+ const sessionScope = this.resolveSessionScope(metadata);
464
+ if (sessionScope) {
465
+ return sessionScope;
466
+ }
328
467
  const resume = metadata.responsesResume;
329
468
  if (resume && typeof resume.previousRequestId === 'string' && resume.previousRequestId.trim()) {
330
469
  return resume.previousRequestId.trim();
331
470
  }
332
471
  return metadata.requestId;
333
472
  }
473
+ resolveSessionScope(metadata) {
474
+ const sessionId = typeof metadata.sessionId === 'string' ? metadata.sessionId.trim() : '';
475
+ if (sessionId) {
476
+ return `session:${sessionId}`;
477
+ }
478
+ const conversationId = typeof metadata.conversationId === 'string' ? metadata.conversationId.trim() : '';
479
+ if (conversationId) {
480
+ return `conversation:${conversationId}`;
481
+ }
482
+ return undefined;
483
+ }
484
+ getRoutingInstructionState(stickyKey) {
485
+ const key = stickyKey || 'default';
486
+ if (this.routingInstructionState.has(key)) {
487
+ return this.routingInstructionState.get(key);
488
+ }
489
+ let initial = null;
490
+ // 仅对 session:/conversation: 作用域的 key 尝试从磁盘恢复持久化状态
491
+ if (key.startsWith('session:') || key.startsWith('conversation:')) {
492
+ initial = loadRoutingInstructionStateSync(key);
493
+ }
494
+ if (!initial) {
495
+ initial = {
496
+ forcedTarget: undefined,
497
+ stickyTarget: undefined,
498
+ allowedProviders: new Set(),
499
+ disabledProviders: new Set(),
500
+ disabledKeys: new Map(),
501
+ disabledModels: new Map()
502
+ };
503
+ }
504
+ this.routingInstructionState.set(key, initial);
505
+ return initial;
506
+ }
507
+ buildMetadataInstructions(metadata) {
508
+ const instructions = [];
509
+ if (Array.isArray(metadata.disabledProviderKeyAliases)) {
510
+ for (const entry of metadata.disabledProviderKeyAliases) {
511
+ const parsed = this.parseMetadataDisableDescriptor(entry);
512
+ if (parsed) {
513
+ instructions.push({ type: 'disable', ...parsed });
514
+ }
515
+ }
516
+ }
517
+ return instructions;
518
+ }
519
+ parseMetadataDisableDescriptor(entry) {
520
+ if (typeof entry !== 'string') {
521
+ return null;
522
+ }
523
+ const trimmed = entry.trim();
524
+ if (!trimmed) {
525
+ return null;
526
+ }
527
+ const parts = trimmed.split('.');
528
+ if (parts.length < 2) {
529
+ return null;
530
+ }
531
+ const provider = parts[0];
532
+ const alias = parts[1];
533
+ if (!provider || !alias) {
534
+ return null;
535
+ }
536
+ if (/^\d+$/.test(alias)) {
537
+ return { provider, keyIndex: Number.parseInt(alias, 10) };
538
+ }
539
+ return { provider, keyAlias: alias };
540
+ }
541
+ resolveRoutingMode(instructions, state) {
542
+ const hasForce = instructions.some((inst) => inst.type === 'force');
543
+ const hasAllow = instructions.some((inst) => inst.type === 'allow');
544
+ const hasClear = instructions.some((inst) => inst.type === 'clear');
545
+ if (hasClear) {
546
+ return 'none';
547
+ }
548
+ if (hasAllow || state.allowedProviders.size > 0) {
549
+ return 'sticky';
550
+ }
551
+ if (hasForce || state.forcedTarget) {
552
+ return 'force';
553
+ }
554
+ if (state.stickyTarget) {
555
+ return 'sticky';
556
+ }
557
+ return 'none';
558
+ }
559
+ resolveInstructionTarget(target) {
560
+ if (!target || !target.provider) {
561
+ return null;
562
+ }
563
+ const providerId = target.provider;
564
+ const providerKeys = this.providerRegistry.listProviderKeys(providerId);
565
+ if (providerKeys.length === 0) {
566
+ return null;
567
+ }
568
+ const alias = typeof target.keyAlias === 'string' ? target.keyAlias.trim() : '';
569
+ const aliasExplicit = alias.length > 0 && target.pathLength === 3;
570
+ if (aliasExplicit) {
571
+ const runtimeKey = this.providerRegistry.resolveRuntimeKeyByAlias(providerId, alias);
572
+ if (runtimeKey) {
573
+ return { mode: 'exact', keys: [runtimeKey] };
574
+ }
575
+ }
576
+ if (typeof target.keyIndex === 'number' && target.keyIndex > 0) {
577
+ const runtimeKey = this.providerRegistry.resolveRuntimeKeyByIndex(providerId, target.keyIndex);
578
+ if (runtimeKey) {
579
+ return { mode: 'exact', keys: [runtimeKey] };
580
+ }
581
+ }
582
+ if (target.model && target.model.trim()) {
583
+ const normalizedModel = target.model.trim();
584
+ const matchingKeys = providerKeys.filter((key) => {
585
+ const modelId = this.getProviderModelId(key);
586
+ return modelId === normalizedModel;
587
+ });
588
+ if (matchingKeys.length > 0) {
589
+ return { mode: 'filter', keys: matchingKeys };
590
+ }
591
+ }
592
+ if (alias && !aliasExplicit) {
593
+ const legacyKey = this.providerRegistry.resolveRuntimeKeyByAlias(providerId, alias);
594
+ if (legacyKey) {
595
+ return { mode: 'exact', keys: [legacyKey] };
596
+ }
597
+ }
598
+ return { mode: 'filter', keys: providerKeys };
599
+ }
600
+ filterCandidatesByRoutingState(routes, state) {
601
+ // console.log('[filter] routes:', routes, 'state:', {
602
+ // allowed: Array.from(state.allowedProviders),
603
+ // disabled: Array.from(state.disabledProviders)
604
+ // });
605
+ if (state.allowedProviders.size === 0 &&
606
+ state.disabledProviders.size === 0 &&
607
+ state.disabledKeys.size === 0 &&
608
+ state.disabledModels.size === 0) {
609
+ return routes;
610
+ }
611
+ return routes.filter(routeName => {
612
+ const pools = this.routing[routeName];
613
+ if (!pools)
614
+ return false;
615
+ for (const pool of pools) {
616
+ if (!Array.isArray(pool.targets) || pool.targets.length === 0) {
617
+ continue;
618
+ }
619
+ for (const providerKey of pool.targets) {
620
+ const providerId = this.extractProviderId(providerKey);
621
+ // console.log('[filter] checking', providerKey, 'id=', providerId);
622
+ if (!providerId)
623
+ continue;
624
+ if (state.allowedProviders.size > 0 && !state.allowedProviders.has(providerId)) {
625
+ // console.log('[filter] dropped by allowed list');
626
+ continue;
627
+ }
628
+ if (state.disabledProviders.has(providerId)) {
629
+ continue;
630
+ }
631
+ const disabledKeys = state.disabledKeys.get(providerId);
632
+ if (disabledKeys && disabledKeys.size > 0) {
633
+ const keyAlias = this.extractKeyAlias(providerKey);
634
+ const keyIndex = this.extractKeyIndex(providerKey);
635
+ if (keyAlias && disabledKeys.has(keyAlias)) {
636
+ continue;
637
+ }
638
+ if (keyIndex !== undefined && disabledKeys.has(keyIndex + 1)) {
639
+ continue;
640
+ }
641
+ }
642
+ const disabledModels = state.disabledModels.get(providerId);
643
+ if (disabledModels && disabledModels.size > 0) {
644
+ const modelId = this.getProviderModelId(providerKey);
645
+ if (modelId && disabledModels.has(modelId)) {
646
+ continue;
647
+ }
648
+ }
649
+ return true;
650
+ }
651
+ }
652
+ return false;
653
+ });
654
+ }
655
+ selectFromCandidates(routes, metadata, classification, features, state, requiredProviderKeys, allowAliasRotation) {
656
+ const allowedProviders = new Set(state.allowedProviders);
657
+ const disabledProviders = new Set(state.disabledProviders);
658
+ const disabledKeysMap = new Map(Array.from(state.disabledKeys.entries()).map(([provider, keys]) => [
659
+ provider,
660
+ new Set(Array.from(keys).map(k => typeof k === 'string' ? k : k + 1))
661
+ ]));
662
+ const disabledModels = new Map(Array.from(state.disabledModels.entries()).map(([provider, models]) => [provider, new Set(models)]));
663
+ const stickyKey = allowAliasRotation ? undefined : this.resolveStickyKey(metadata);
664
+ const attempted = [];
665
+ const visitedRoutes = new Set();
666
+ const routeQueue = this.initializeRouteQueue(routes);
667
+ const estimatedTokens = typeof features.estimatedTokens === 'number' && Number.isFinite(features.estimatedTokens)
668
+ ? Math.max(0, features.estimatedTokens)
669
+ : 0;
670
+ while (routeQueue.length) {
671
+ const routeName = routeQueue.shift();
672
+ if (visitedRoutes.has(routeName)) {
673
+ continue;
674
+ }
675
+ const routePools = this.routing[routeName];
676
+ if (!this.routeHasTargets(routePools)) {
677
+ visitedRoutes.add(routeName);
678
+ attempted.push(`${routeName}:empty`);
679
+ continue;
680
+ }
681
+ visitedRoutes.add(routeName);
682
+ const orderedPools = this.sortRoutePools(routePools);
683
+ for (const poolTier of orderedPools) {
684
+ const { providerKey, poolTargets, tierId, failureHint } = this.trySelectFromTier(routeName, poolTier, stickyKey, estimatedTokens, features, disabledProviders, disabledKeysMap, allowedProviders, disabledModels, requiredProviderKeys, allowAliasRotation);
685
+ if (providerKey) {
686
+ return { providerKey, routeUsed: routeName, pool: poolTargets, poolId: tierId };
687
+ }
688
+ if (failureHint) {
689
+ attempted.push(failureHint);
690
+ }
691
+ }
692
+ }
693
+ const requestedRoute = this.normalizeRouteAlias(classification.routeName || DEFAULT_ROUTE);
694
+ throw new VirtualRouterError(`All providers unavailable for route ${requestedRoute}`, VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, { routeName: requestedRoute, attempted });
695
+ }
696
+ extractProviderId(providerKey) {
697
+ const firstDot = providerKey.indexOf('.');
698
+ if (firstDot <= 0)
699
+ return null;
700
+ return providerKey.substring(0, firstDot);
701
+ }
702
+ /**
703
+ * 在已有候选路由集合上,筛选出真正挂载了 sticky 池内 providerKey 的路由,
704
+ * 并按 ROUTE_PRIORITY 进行排序;同时显式排除 tools 路由,保证一旦进入
705
+ * sticky 模式,就不会再命中独立的 tools 池(例如 glm/qwen 工具模型)。
706
+ * 若候选集合中完全没有挂载 sticky key 的路由,则尝试在 default 路由上兜底。
707
+ */
708
+ buildStickyRouteCandidatesFromFiltered(filteredCandidates, stickyKeySet) {
709
+ const routesWithSticky = [];
710
+ const candidateSet = new Set(filteredCandidates.filter((name) => name && name !== 'tools'));
711
+ for (const routeName of candidateSet) {
712
+ const pools = this.routing[routeName];
713
+ if (!this.routeHasTargets(pools)) {
714
+ continue;
715
+ }
716
+ const targets = this.flattenPoolTargets(pools);
717
+ if (!targets.some((key) => stickyKeySet.has(key))) {
718
+ continue;
719
+ }
720
+ routesWithSticky.push(routeName);
721
+ }
722
+ // 若当前候选路由中没有任何挂载 sticky key 的路由,尝试直接在 default 路由上兜底;
723
+ // 若 default 也不包含 sticky key,则视为 sticky 配置失效,由调用方回落到非 sticky 逻辑。
724
+ if (routesWithSticky.length === 0) {
725
+ const defaultPools = this.routing[DEFAULT_ROUTE];
726
+ if (this.routeHasTargets(defaultPools)) {
727
+ const targets = this.flattenPoolTargets(defaultPools);
728
+ if (targets.some((key) => stickyKeySet.has(key))) {
729
+ return [DEFAULT_ROUTE];
730
+ }
731
+ }
732
+ return [];
733
+ }
734
+ const ordered = this.sortByPriority(routesWithSticky);
735
+ const result = [];
736
+ let hasDefault = false;
737
+ for (const routeName of ordered) {
738
+ if (routeName === DEFAULT_ROUTE) {
739
+ hasDefault = true;
740
+ continue;
741
+ }
742
+ if (!result.includes(routeName)) {
743
+ result.push(routeName);
744
+ }
745
+ }
746
+ // default 路由若包含 sticky key,则始终放在候选列表最后,用于 sticky 模式兜底。
747
+ if (hasDefault && !result.includes(DEFAULT_ROUTE)) {
748
+ result.push(DEFAULT_ROUTE);
749
+ }
750
+ return result;
751
+ }
752
+ /**
753
+ * 在 sticky 模式下,仅在 sticky 池内选择 Provider:
754
+ * - stickyKeySet 表示已经解析并通过健康检查的 providerKey 集合;
755
+ * - 不再依赖 routing[*].targets 中是否挂载这些 key,避免「未初始化路由池」导致 sticky 池为空;
756
+ * - 仍然尊重 allowed/disabledProviders、disabledKeys、disabledModels 以及上下文长度。
757
+ */
758
+ selectFromStickyPool(stickyKeySet, metadata, features, state, allowAliasRotation) {
759
+ if (!stickyKeySet || stickyKeySet.size === 0) {
760
+ return null;
761
+ }
762
+ const allowedProviders = new Set(state.allowedProviders);
763
+ const disabledProviders = new Set(state.disabledProviders);
764
+ const disabledKeysMap = new Map(Array.from(state.disabledKeys.entries()).map(([provider, keys]) => [
765
+ provider,
766
+ new Set(Array.from(keys).map((k) => (typeof k === 'string' ? k : k + 1)))
767
+ ]));
768
+ const disabledModels = new Map(Array.from(state.disabledModels.entries()).map(([provider, models]) => [provider, new Set(models)]));
769
+ // 初始候选集合:sticky 池中的所有 key
770
+ let candidates = Array.from(stickyKeySet);
771
+ // 应用 provider 白名单 / 黑名单
772
+ if (allowedProviders.size > 0) {
773
+ candidates = candidates.filter((key) => {
774
+ const providerId = this.extractProviderId(key);
775
+ return providerId && allowedProviders.has(providerId);
776
+ });
777
+ }
778
+ if (disabledProviders.size > 0) {
779
+ candidates = candidates.filter((key) => {
780
+ const providerId = this.extractProviderId(key);
781
+ return providerId && !disabledProviders.has(providerId);
782
+ });
783
+ }
784
+ // 应用 key / model 级别黑名单
785
+ if (disabledKeysMap.size > 0 || disabledModels.size > 0) {
786
+ candidates = candidates.filter((key) => {
787
+ const providerId = this.extractProviderId(key);
788
+ if (!providerId) {
789
+ return true;
790
+ }
791
+ const disabledKeys = disabledKeysMap.get(providerId);
792
+ if (disabledKeys && disabledKeys.size > 0) {
793
+ const keyAlias = this.extractKeyAlias(key);
794
+ const keyIndex = this.extractKeyIndex(key);
795
+ if (keyAlias && disabledKeys.has(keyAlias)) {
796
+ return false;
797
+ }
798
+ if (keyIndex !== undefined && disabledKeys.has(keyIndex + 1)) {
799
+ return false;
800
+ }
801
+ }
802
+ const disabledModelSet = disabledModels.get(providerId);
803
+ if (disabledModelSet && disabledModelSet.size > 0) {
804
+ const modelId = this.getProviderModelId(key);
805
+ if (modelId && disabledModelSet.has(modelId)) {
806
+ return false;
807
+ }
808
+ }
809
+ return true;
810
+ });
811
+ }
812
+ if (!candidates.length) {
813
+ return null;
814
+ }
815
+ const stickyKey = allowAliasRotation ? undefined : this.resolveStickyKey(metadata);
816
+ const estimatedTokens = typeof features.estimatedTokens === 'number' && Number.isFinite(features.estimatedTokens)
817
+ ? Math.max(0, features.estimatedTokens)
818
+ : 0;
819
+ const tier = {
820
+ id: 'sticky-primary',
821
+ targets: candidates,
822
+ priority: 0
823
+ };
824
+ const { providerKey, poolTargets, tierId } = this.trySelectFromTier('sticky', tier, stickyKey, estimatedTokens, features, disabledProviders, disabledKeysMap, allowedProviders, disabledModels, stickyKeySet, allowAliasRotation);
825
+ if (!providerKey) {
826
+ return null;
827
+ }
828
+ return {
829
+ providerKey,
830
+ routeUsed: 'sticky',
831
+ pool: poolTargets,
832
+ poolId: tierId
833
+ };
834
+ }
835
+ extractKeyAlias(providerKey) {
836
+ const parts = providerKey.split('.');
837
+ if (parts.length === 3) {
838
+ return this.normalizeAliasDescriptor(parts[1]);
839
+ }
840
+ return null;
841
+ }
842
+ normalizeAliasDescriptor(alias) {
843
+ if (/^\d+-/.test(alias)) {
844
+ return alias.replace(/^\d+-/, '');
845
+ }
846
+ return alias;
847
+ }
848
+ extractKeyIndex(providerKey) {
849
+ const parts = providerKey.split('.');
850
+ if (parts.length === 2) {
851
+ const index = parseInt(parts[1], 10);
852
+ if (!isNaN(index) && index > 0) {
853
+ return index;
854
+ }
855
+ }
856
+ return undefined;
857
+ }
858
+ getProviderModelId(providerKey) {
859
+ const profile = this.providerRegistry.get(providerKey);
860
+ if (profile.modelId) {
861
+ return profile.modelId;
862
+ }
863
+ const parts = providerKey.split('.');
864
+ if (parts.length === 2) {
865
+ return parts[1] || null;
866
+ }
867
+ if (parts.length === 3) {
868
+ return parts[2] || null;
869
+ }
870
+ return null;
871
+ }
334
872
  mapProviderError(event) {
335
873
  // NOTE: mapProviderError is the only place where VirtualRouter translates providerErrorCenter
336
874
  // events into health signals. Classification is intentionally coarse; upstream providers
@@ -561,24 +1099,37 @@ export class VirtualRouterEngine {
561
1099
  }
562
1100
  return flattened;
563
1101
  }
564
- buildHitReason(routeUsed, providerKey, classification, features) {
1102
+ buildHitReason(routeUsed, providerKey, classification, features, mode) {
565
1103
  const reasoning = classification.reasoning || '';
566
- const primary = reasoning.split('|')[0] || '';
1104
+ let primary = reasoning.split('|')[0] || '';
567
1105
  const commandDetail = features.lastAssistantToolLabel;
1106
+ const isStickyMode = mode === 'sticky';
1107
+ if (isStickyMode &&
1108
+ (routeUsed === 'tools' || routeUsed === 'thinking' || routeUsed === 'coding')) {
1109
+ // sticky 模式下不再把 tools/thinking/coding 作为主标签,统一折叠为 sticky,
1110
+ // 避免日志中出现 "tools:last-tool-*" 这类误导性前缀。
1111
+ primary = '';
1112
+ }
568
1113
  const base = (() => {
569
1114
  if (routeUsed === 'tools') {
570
- return this.decorateWithDetail(primary || 'tools', primary, commandDetail);
1115
+ const label = isStickyMode ? 'sticky' : 'tools';
1116
+ return this.decorateWithDetail(primary || label, primary, commandDetail);
571
1117
  }
572
1118
  if (routeUsed === 'thinking') {
573
- return this.decorateWithDetail(primary || 'thinking', primary, commandDetail);
1119
+ const label = isStickyMode ? 'sticky' : 'thinking';
1120
+ return this.decorateWithDetail(primary || label, primary, commandDetail);
574
1121
  }
575
1122
  if (routeUsed === 'coding') {
576
- return this.decorateWithDetail(primary || 'coding', primary, commandDetail);
1123
+ const label = isStickyMode ? 'sticky' : 'coding';
1124
+ return this.decorateWithDetail(primary || label, primary, commandDetail);
577
1125
  }
578
1126
  if (routeUsed === 'web_search' || routeUsed === 'search') {
579
1127
  return this.decorateWithDetail(primary || routeUsed, primary, commandDetail);
580
1128
  }
581
1129
  if (routeUsed === DEFAULT_ROUTE && classification.fallback) {
1130
+ if (isStickyMode) {
1131
+ return primary || 'sticky:default';
1132
+ }
582
1133
  return primary || 'fallback:default';
583
1134
  }
584
1135
  if (primary) {
@@ -592,6 +1143,28 @@ export class VirtualRouterEngine {
592
1143
  }
593
1144
  return base;
594
1145
  }
1146
+ isRoutingStateEmpty(state) {
1147
+ if (!state) {
1148
+ return true;
1149
+ }
1150
+ const noForced = !state.forcedTarget;
1151
+ const noSticky = !state.stickyTarget;
1152
+ const noAllowed = state.allowedProviders.size === 0;
1153
+ const noDisabledProviders = state.disabledProviders.size === 0;
1154
+ const noDisabledKeys = state.disabledKeys.size === 0;
1155
+ const noDisabledModels = state.disabledModels.size === 0;
1156
+ return noForced && noSticky && noAllowed && noDisabledProviders && noDisabledKeys && noDisabledModels;
1157
+ }
1158
+ persistRoutingInstructionState(key, state) {
1159
+ if (!key || (!key.startsWith('session:') && !key.startsWith('conversation:'))) {
1160
+ return;
1161
+ }
1162
+ if (this.isRoutingStateEmpty(state)) {
1163
+ saveRoutingInstructionStateAsync(key, null);
1164
+ return;
1165
+ }
1166
+ saveRoutingInstructionStateAsync(key, state);
1167
+ }
595
1168
  decorateWithDetail(baseLabel, primaryReason, detail) {
596
1169
  const normalizedDetail = detail && detail.trim();
597
1170
  if (!normalizedDetail) {
@@ -602,7 +1175,7 @@ export class VirtualRouterEngine {
602
1175
  }
603
1176
  return `${baseLabel}(${normalizedDetail})`;
604
1177
  }
605
- formatVirtualRouterHit(routeName, poolId, providerKey, modelId, hitReason) {
1178
+ formatVirtualRouterHit(routeName, poolId, providerKey, modelId, hitReason, stickyScope) {
606
1179
  try {
607
1180
  // 生成本地时间戳
608
1181
  const now = new Date();
@@ -613,20 +1186,23 @@ export class VirtualRouterEngine {
613
1186
  const prefixColor = '\x1b[38;5;208m';
614
1187
  const reset = '\x1b[0m';
615
1188
  const timeColor = '\x1b[90m'; // 灰色
1189
+ const stickyColor = '\x1b[33m'; // 黄色
616
1190
  const routeColor = this.resolveRouteColor(routeName);
617
1191
  const prefix = `${prefixColor}[virtual-router-hit]${reset}`;
618
1192
  const timeLabel = `${timeColor}${timestamp}${reset}`;
619
1193
  const { providerLabel, resolvedModel } = this.describeTargetProvider(providerKey, modelId);
620
1194
  const routeLabel = poolId ? `${routeName}/${poolId}` : routeName;
621
1195
  const targetLabel = `${routeLabel} -> ${providerLabel}${resolvedModel ? '.' + resolvedModel : ''}`;
1196
+ const stickyLabel = stickyScope ? ` ${stickyColor}[sticky:${stickyScope}]${reset}` : '';
622
1197
  const reasonLabel = hitReason ? ` reason=${hitReason}` : '';
623
- return `${prefix} ${timeLabel} ${routeColor}${targetLabel}${reasonLabel}${reset}`;
1198
+ return `${prefix} ${timeLabel} ${routeColor}${targetLabel}${stickyLabel}${reasonLabel}${reset}`;
624
1199
  }
625
1200
  catch {
626
1201
  const now = new Date();
627
1202
  const timestamp = now.toLocaleTimeString('zh-CN', { hour12: false });
628
1203
  const routeLabel = poolId ? `${routeName}/${poolId}` : routeName;
629
- return `[virtual-router-hit] ${timestamp} ${routeLabel} -> ${providerKey}${modelId ? '.' + modelId : ''}${hitReason ? ` reason=${hitReason}` : ''}`;
1204
+ const stickyLabel = stickyScope ? ` [sticky:${stickyScope}]` : '';
1205
+ return `[virtual-router-hit] ${timestamp} ${routeLabel} -> ${providerKey}${modelId ? '.' + modelId : ''}${stickyLabel}${hitReason ? ` reason=${hitReason}` : ''}`;
630
1206
  }
631
1207
  }
632
1208
  resolveRouteColor(routeName) {