@jsonstudio/llms 0.6.954 → 0.6.1172

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/dist/conversion/hub/operation-table/operation-table-runner.d.ts +18 -0
  2. package/dist/conversion/hub/operation-table/operation-table-runner.js +158 -0
  3. package/dist/conversion/hub/operation-table/semantic-mappers/anthropic-mapper.d.ts +8 -0
  4. package/dist/conversion/hub/operation-table/semantic-mappers/anthropic-mapper.js +303 -0
  5. package/dist/conversion/hub/operation-table/semantic-mappers/chat-mapper.d.ts +8 -0
  6. package/dist/conversion/hub/operation-table/semantic-mappers/chat-mapper.js +413 -0
  7. package/dist/conversion/hub/operation-table/semantic-mappers/gemini-mapper.d.ts +7 -0
  8. package/dist/conversion/hub/operation-table/semantic-mappers/gemini-mapper.js +841 -0
  9. package/dist/conversion/hub/operation-table/semantic-mappers/responses-mapper.d.ts +21 -0
  10. package/dist/conversion/hub/operation-table/semantic-mappers/responses-mapper.js +535 -0
  11. package/dist/conversion/hub/ops/operations.d.ts +19 -0
  12. package/dist/conversion/hub/ops/operations.js +126 -0
  13. package/dist/conversion/hub/pipeline/hub-pipeline.d.ts +9 -0
  14. package/dist/conversion/hub/pipeline/hub-pipeline.js +489 -19
  15. package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage2_semantic_map/index.js +6 -0
  16. package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage1_semantic_map/index.js +11 -0
  17. package/dist/conversion/hub/policy/policy-engine.js +41 -9
  18. package/dist/conversion/hub/policy/protocol-spec.d.ts +25 -0
  19. package/dist/conversion/hub/policy/protocol-spec.js +73 -23
  20. package/dist/conversion/hub/process/chat-process.js +252 -41
  21. package/dist/conversion/hub/response/provider-response.js +175 -2
  22. package/dist/conversion/hub/response/response-runtime.js +1 -1
  23. package/dist/conversion/hub/semantic-mappers/anthropic-mapper.d.ts +1 -8
  24. package/dist/conversion/hub/semantic-mappers/anthropic-mapper.js +1 -365
  25. package/dist/conversion/hub/semantic-mappers/chat-mapper.d.ts +1 -8
  26. package/dist/conversion/hub/semantic-mappers/chat-mapper.js +1 -467
  27. package/dist/conversion/hub/semantic-mappers/gemini-mapper.d.ts +1 -7
  28. package/dist/conversion/hub/semantic-mappers/gemini-mapper.js +1 -903
  29. package/dist/conversion/hub/semantic-mappers/responses-mapper.d.ts +1 -21
  30. package/dist/conversion/hub/semantic-mappers/responses-mapper.js +1 -593
  31. package/dist/conversion/hub/tool-surface/tool-surface-engine.d.ts +18 -0
  32. package/dist/conversion/hub/tool-surface/tool-surface-engine.js +571 -0
  33. package/dist/conversion/responses/responses-openai-bridge.js +14 -2
  34. package/dist/conversion/shared/bridge-message-utils.js +2 -8
  35. package/dist/conversion/shared/bridge-policies.js +5 -105
  36. package/dist/conversion/shared/gemini-tool-utils.js +89 -15
  37. package/dist/conversion/shared/protocol-field-allowlists.d.ts +7 -0
  38. package/dist/conversion/shared/protocol-field-allowlists.js +145 -0
  39. package/dist/conversion/shared/reasoning-tool-normalizer.js +4 -2
  40. package/dist/conversion/shared/snapshot-hooks.js +166 -3
  41. package/dist/conversion/shared/text-markup-normalizer.d.ts +2 -0
  42. package/dist/conversion/shared/text-markup-normalizer.js +345 -9
  43. package/dist/conversion/shared/thought-signature-validator.d.ts +52 -0
  44. package/dist/conversion/shared/thought-signature-validator.js +170 -0
  45. package/dist/conversion/shared/tool-argument-repairer.d.ts +39 -0
  46. package/dist/conversion/shared/tool-argument-repairer.js +56 -0
  47. package/dist/conversion/shared/tool-call-id-manager.d.ts +113 -0
  48. package/dist/conversion/shared/tool-call-id-manager.js +231 -0
  49. package/dist/conversion/shared/tool-canonicalizer.js +2 -11
  50. package/dist/router/virtual-router/bootstrap.js +70 -5
  51. package/dist/router/virtual-router/context-advisor.d.ts +4 -0
  52. package/dist/router/virtual-router/context-advisor.js +3 -0
  53. package/dist/router/virtual-router/context-weighted.d.ts +31 -0
  54. package/dist/router/virtual-router/context-weighted.js +54 -0
  55. package/dist/router/virtual-router/engine-selection.js +284 -47
  56. package/dist/router/virtual-router/engine.d.ts +3 -0
  57. package/dist/router/virtual-router/engine.js +142 -33
  58. package/dist/router/virtual-router/health-weighted.d.ts +25 -0
  59. package/dist/router/virtual-router/health-weighted.js +63 -0
  60. package/dist/router/virtual-router/load-balancer.d.ts +2 -0
  61. package/dist/router/virtual-router/load-balancer.js +45 -16
  62. package/dist/router/virtual-router/routing-instructions.js +17 -1
  63. package/dist/router/virtual-router/sticky-session-store.js +136 -24
  64. package/dist/router/virtual-router/stop-message-file-resolver.d.ts +1 -0
  65. package/dist/router/virtual-router/stop-message-file-resolver.js +74 -0
  66. package/dist/router/virtual-router/stop-message-state-sync.d.ts +15 -0
  67. package/dist/router/virtual-router/stop-message-state-sync.js +57 -0
  68. package/dist/router/virtual-router/types.d.ts +98 -0
  69. package/dist/servertool/clock/config.d.ts +7 -0
  70. package/dist/servertool/clock/config.js +27 -0
  71. package/dist/servertool/clock/daemon.d.ts +3 -0
  72. package/dist/servertool/clock/daemon.js +79 -0
  73. package/dist/servertool/clock/io.d.ts +2 -0
  74. package/dist/servertool/clock/io.js +13 -0
  75. package/dist/servertool/clock/paths.d.ts +4 -0
  76. package/dist/servertool/clock/paths.js +25 -0
  77. package/dist/servertool/clock/session-store.d.ts +3 -0
  78. package/dist/servertool/clock/session-store.js +56 -0
  79. package/dist/servertool/clock/state.d.ts +5 -0
  80. package/dist/servertool/clock/state.js +62 -0
  81. package/dist/servertool/clock/task-store.d.ts +5 -0
  82. package/dist/servertool/clock/task-store.js +4 -0
  83. package/dist/servertool/clock/tasks.d.ts +17 -0
  84. package/dist/servertool/clock/tasks.js +221 -0
  85. package/dist/servertool/clock/types.d.ts +36 -0
  86. package/dist/servertool/clock/types.js +1 -0
  87. package/dist/servertool/engine.d.ts +2 -0
  88. package/dist/servertool/engine.js +161 -7
  89. package/dist/servertool/followup-shadow.d.ts +16 -0
  90. package/dist/servertool/followup-shadow.js +145 -0
  91. package/dist/servertool/handlers/apply-patch-guard.js +1 -265
  92. package/dist/servertool/handlers/clock-auto.d.ts +1 -0
  93. package/dist/servertool/handlers/clock-auto.js +160 -0
  94. package/dist/servertool/handlers/clock.d.ts +1 -0
  95. package/dist/servertool/handlers/clock.js +197 -0
  96. package/dist/servertool/handlers/exec-command-guard.js +7 -555
  97. package/dist/servertool/handlers/followup-request-builder.d.ts +15 -7
  98. package/dist/servertool/handlers/followup-request-builder.js +248 -28
  99. package/dist/servertool/handlers/gemini-empty-reply-continue.js +62 -169
  100. package/dist/servertool/handlers/iflow-model-error-retry.js +18 -28
  101. package/dist/servertool/handlers/recursive-detection-guard.d.ts +1 -0
  102. package/dist/servertool/handlers/recursive-detection-guard.js +333 -0
  103. package/dist/servertool/handlers/stop-message-auto.js +47 -175
  104. package/dist/servertool/handlers/vision.d.ts +7 -1
  105. package/dist/servertool/handlers/vision.js +61 -117
  106. package/dist/servertool/handlers/web-search.d.ts +7 -1
  107. package/dist/servertool/handlers/web-search.js +122 -105
  108. package/dist/servertool/reenter-backend.d.ts +23 -0
  109. package/dist/servertool/reenter-backend.js +18 -0
  110. package/dist/servertool/server-side-tools.d.ts +3 -2
  111. package/dist/servertool/server-side-tools.js +64 -10
  112. package/dist/servertool/types.d.ts +92 -3
  113. package/dist/sse/json-to-sse/event-generators/responses.js +3 -21
  114. package/dist/sse/shared/serializers/responses-event-serializer.d.ts +8 -0
  115. package/dist/sse/shared/serializers/responses-event-serializer.js +19 -0
  116. package/dist/sse/shared/writer.js +24 -7
  117. package/dist/tools/apply-patch/execution-capturer.js +3 -1
  118. package/dist/tools/apply-patch/json/parse-loose.d.ts +3 -0
  119. package/dist/tools/apply-patch/json/parse-loose.js +139 -0
  120. package/dist/tools/apply-patch/patch-text/context-diff.d.ts +1 -0
  121. package/dist/tools/apply-patch/patch-text/context-diff.js +173 -0
  122. package/dist/tools/apply-patch/patch-text/git-diff.d.ts +1 -0
  123. package/dist/tools/apply-patch/patch-text/git-diff.js +138 -0
  124. package/dist/tools/apply-patch/patch-text/looks-like-patch.d.ts +1 -0
  125. package/dist/tools/apply-patch/patch-text/looks-like-patch.js +13 -0
  126. package/dist/tools/apply-patch/patch-text/normalize.d.ts +3 -0
  127. package/dist/tools/apply-patch/patch-text/normalize.js +262 -0
  128. package/dist/tools/apply-patch/structured/coercion.d.ts +3 -0
  129. package/dist/tools/apply-patch/structured/coercion.js +82 -0
  130. package/dist/tools/apply-patch/validation/shared.d.ts +3 -0
  131. package/dist/tools/apply-patch/validation/shared.js +6 -0
  132. package/dist/tools/apply-patch/validator.d.ts +2 -2
  133. package/dist/tools/apply-patch/validator.js +6 -556
  134. package/package.json +1 -1
@@ -1,3 +1,5 @@
1
+ import { computeContextMultiplier, computeEffectiveSafeWindowTokens, resolveContextWeightedConfig } from './context-weighted.js';
2
+ import { computeHealthWeight, resolveHealthWeightedConfig } from './health-weighted.js';
1
3
  import { DEFAULT_ROUTE, ROUTE_PRIORITY, VirtualRouterError, VirtualRouterErrorCode } from './types.js';
2
4
  export function selectProviderImpl(requestedRoute, metadata, classification, features, activeState, deps, options = {}) {
3
5
  const state = options.routingState ?? activeState;
@@ -294,12 +296,13 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
294
296
  if (excludedKeys.size > 0) {
295
297
  targets = targets.filter((key) => !excludedKeys.has(key));
296
298
  }
299
+ const isRecoveryAttempt = excludedKeys.size > 0;
300
+ const singleCandidateFallback = targets.length === 1 ? targets[0] : undefined;
297
301
  if (targets.length > 0) {
298
- const cooled = targets.filter((key) => !deps.isProviderCoolingDown(key));
299
- // provider 兜底:当一个 tier 只有一个候选 key 时,不因 cooldown 造成路由池为空。
300
- if (cooled.length > 0 || targets.length !== 1) {
301
- targets = cooled;
302
- }
302
+ // Always respect cooldown signals. If a route/tier is depleted due to cooldown,
303
+ // routing is expected to fall back to other tiers/routes (e.g. longcontext → default),
304
+ // rather than repeatedly selecting the cooled-down provider.
305
+ targets = targets.filter((key) => !deps.isProviderCoolingDown(key));
303
306
  }
304
307
  if (allowedProviders && allowedProviders.size > 0) {
305
308
  targets = targets.filter((key) => {
@@ -399,6 +402,10 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
399
402
  const prioritizedPools = buildContextCandidatePools(contextResult);
400
403
  const quotaView = deps.quotaView;
401
404
  const now = quotaView ? Date.now() : 0;
405
+ const healthWeightedCfg = resolveHealthWeightedConfig(deps.loadBalancer.getPolicy().healthWeighted);
406
+ const contextWeightedCfg = resolveContextWeightedConfig(deps.loadBalancer.getPolicy().contextWeighted);
407
+ const warnRatio = deps.contextAdvisor.getConfig().warnRatio;
408
+ const nowForWeights = Date.now();
402
409
  const selectFirstAvailable = (candidates) => {
403
410
  for (const key of candidates) {
404
411
  if (deps.healthManager.isAvailable(key)) {
@@ -407,32 +414,148 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
407
414
  }
408
415
  return null;
409
416
  };
410
- const selectWithQuota = (candidates) => {
417
+ const resolvePriorityMeta = (orderedTargets) => {
418
+ // Priority mode semantics (strict group priority + alias-level balancing):
419
+ // - Targets are interpreted as ordered (providerId, modelId) groups.
420
+ // - Group base priorities: 100, 90, 80, ... (step=10) by appearance order.
421
+ // - Within a group (different auth aliases), base scores: 100, 99, 98, ... (step=1).
422
+ //
423
+ // Group selection is strict: always use the best group until it is unavailable.
424
+ // Alias selection is balanced within the chosen group (RR / health-weighted / context-weighted).
425
+ const meta = new Map();
426
+ if (!Array.isArray(orderedTargets) || orderedTargets.length === 0) {
427
+ return meta;
428
+ }
429
+ let groupIndex = -1;
430
+ let aliasOffset = 0;
431
+ let lastGroupKey = '';
432
+ for (const key of orderedTargets) {
433
+ const providerId = extractProviderId(key) ?? '';
434
+ const modelId = getProviderModelId(key, deps.providerRegistry) ?? '';
435
+ const groupKey = `${providerId}::${modelId}`;
436
+ if (groupKey !== lastGroupKey) {
437
+ groupIndex += 1;
438
+ aliasOffset = 0;
439
+ lastGroupKey = groupKey;
440
+ }
441
+ const groupBase = 100 - groupIndex * 10;
442
+ const base = groupBase - aliasOffset;
443
+ meta.set(key, { groupId: `${providerId}.${modelId}`, groupBase, base });
444
+ aliasOffset += 1;
445
+ }
446
+ return meta;
447
+ };
448
+ const pickPriorityGroup = (candidates, orderedTargets, penalties) => {
449
+ const meta = resolvePriorityMeta(orderedTargets);
450
+ let bestGroupId = null;
451
+ let bestScore = Number.NEGATIVE_INFINITY;
452
+ for (const key of candidates) {
453
+ if (!deps.healthManager.isAvailable(key))
454
+ continue;
455
+ const m = meta.get(key);
456
+ if (!m)
457
+ continue;
458
+ const penalty = penalties ? Math.max(0, Math.floor(penalties[key] ?? 0)) : 0;
459
+ const score = m.base - penalty;
460
+ if (score > bestScore) {
461
+ bestScore = score;
462
+ bestGroupId = m.groupId;
463
+ }
464
+ }
465
+ if (!bestGroupId)
466
+ return null;
467
+ const groupCandidates = candidates.filter((key) => meta.get(key)?.groupId === bestGroupId);
468
+ return groupCandidates.length ? { groupId: bestGroupId, groupCandidates } : null;
469
+ };
470
+ const computeContextWeightMultipliers = (candidates) => {
471
+ if (!contextWeightedCfg.enabled) {
472
+ return null;
473
+ }
474
+ const eff = {};
475
+ let ref = 1;
476
+ for (const key of candidates) {
477
+ const usage = contextResult.usage?.[key];
478
+ const limit = usage && typeof usage.limit === 'number' && Number.isFinite(usage.limit) ? Math.floor(usage.limit) : 0;
479
+ const safeEff = computeEffectiveSafeWindowTokens({
480
+ modelMaxTokens: Math.max(1, limit),
481
+ warnRatio,
482
+ clientCapTokens: contextWeightedCfg.clientCapTokens
483
+ });
484
+ eff[key] = safeEff;
485
+ if (safeEff > ref) {
486
+ ref = safeEff;
487
+ }
488
+ }
489
+ return { ref, eff };
490
+ };
491
+ const selectWithQuota = (candidates, isSafePool) => {
411
492
  if (!quotaView) {
412
493
  if (tier.mode === 'priority') {
413
- const selected = selectFirstAvailable(candidates);
414
- if (!selected && candidates.length === 1) {
415
- return candidates[0];
494
+ if (isRecoveryAttempt) {
495
+ return selectFirstAvailable(candidates);
496
+ }
497
+ const group = pickPriorityGroup(candidates, tier.targets);
498
+ if (!group) {
499
+ return null;
416
500
  }
417
- return selected;
501
+ const weights = (() => {
502
+ if (!isSafePool)
503
+ return undefined;
504
+ const ctx = computeContextWeightMultipliers(group.groupCandidates);
505
+ if (!ctx)
506
+ return undefined;
507
+ const out = {};
508
+ for (const key of group.groupCandidates) {
509
+ const m = computeContextMultiplier({
510
+ effectiveSafeRefTokens: ctx.ref,
511
+ effectiveSafeTokens: ctx.eff[key] ?? 1,
512
+ cfg: contextWeightedCfg
513
+ });
514
+ out[key] = Math.max(1, Math.round(100 * m));
515
+ }
516
+ return out;
517
+ })();
518
+ return deps.loadBalancer.select({
519
+ routeName: `${routeName}:${tier.id}:priority:group:${group.groupId}`,
520
+ candidates: group.groupCandidates,
521
+ stickyKey: options.allowAliasRotation ? undefined : stickyKey,
522
+ weights,
523
+ availabilityCheck: (key) => deps.healthManager.isAvailable(key)
524
+ }, 'round-robin');
418
525
  }
526
+ const weights = (() => {
527
+ if (!isSafePool || !contextWeightedCfg.enabled)
528
+ return undefined;
529
+ const ctx = computeContextWeightMultipliers(candidates);
530
+ if (!ctx)
531
+ return undefined;
532
+ const out = {};
533
+ for (const key of candidates) {
534
+ const m = computeContextMultiplier({
535
+ effectiveSafeRefTokens: ctx.ref,
536
+ effectiveSafeTokens: ctx.eff[key] ?? 1,
537
+ cfg: contextWeightedCfg
538
+ });
539
+ out[key] = Math.max(1, Math.round(100 * m));
540
+ }
541
+ return out;
542
+ })();
419
543
  const selected = deps.loadBalancer.select({
420
544
  routeName: `${routeName}:${tier.id}`,
421
545
  candidates,
422
546
  stickyKey: options.allowAliasRotation ? undefined : stickyKey,
547
+ weights,
423
548
  availabilityCheck: (key) => deps.healthManager.isAvailable(key)
424
549
  }, tier.mode === 'round-robin' ? 'round-robin' : undefined);
425
- if (!selected && candidates.length === 1) {
426
- return candidates[0];
427
- }
428
550
  return selected;
429
551
  }
430
552
  const buckets = new Map();
553
+ let order = 0;
431
554
  for (const key of candidates) {
432
555
  const entry = quotaView(key);
433
556
  if (!entry) {
434
557
  const list = buckets.get(100) ?? [];
435
- list.push(key);
558
+ list.push({ key, penalty: 0, order: order++ });
436
559
  buckets.set(100, list);
437
560
  continue;
438
561
  }
@@ -448,50 +571,169 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
448
571
  const tierPriority = typeof entry.priorityTier === 'number' && Number.isFinite(entry.priorityTier)
449
572
  ? entry.priorityTier
450
573
  : 100;
574
+ const penaltyRaw = entry.selectionPenalty;
575
+ const penalty = typeof penaltyRaw === 'number' && Number.isFinite(penaltyRaw) && penaltyRaw > 0 ? Math.floor(penaltyRaw) : 0;
451
576
  const list = buckets.get(tierPriority) ?? [];
452
- list.push(key);
577
+ list.push({ key, penalty, order: order++ });
453
578
  buckets.set(tierPriority, list);
454
579
  }
455
580
  const sortedPriorities = Array.from(buckets.keys()).sort((a, b) => a - b);
456
581
  for (const priority of sortedPriorities) {
457
- const bucketCandidates = buckets.get(priority) ?? [];
458
- if (!bucketCandidates.length) {
582
+ const bucket = buckets.get(priority) ?? [];
583
+ if (!bucket.length) {
459
584
  continue;
460
585
  }
461
- if (tier.mode === 'priority') {
462
- const selected = selectFirstAvailable(bucketCandidates);
463
- if (selected) {
464
- return selected;
586
+ bucket.sort((a, b) => (a.penalty - b.penalty) || (a.order - b.order));
587
+ const bucketCandidates = bucket.map((item) => item.key);
588
+ // antigravity special: avoid rotating across keys while the current key is healthy.
589
+ // Rationale: some upstream gateways reject rapid cross-key switching even when quota exists,
590
+ // causing repeated 429s. We therefore pin a single key per (providerId, modelId) until it is
591
+ // excluded by quota/cooldown, then fail over to the next available key.
592
+ //
593
+ // This is only applied when the request has no session-level sticky key, to avoid breaking
594
+ // explicit session stickiness.
595
+ const shouldPinAntigravityModel = (() => {
596
+ // Only respect explicit session/conversation stickiness. requestId-scoped sticky keys
597
+ // (used for request-chain pinning) should not prevent global antigravity key pinning.
598
+ if (typeof stickyKey === 'string' && (stickyKey.startsWith('session:') || stickyKey.startsWith('conversation:'))) {
599
+ return false;
600
+ }
601
+ if (bucketCandidates.length < 2) {
602
+ return false;
603
+ }
604
+ let modelId = null;
605
+ for (const key of bucketCandidates) {
606
+ const providerId = extractProviderId(key);
607
+ if (providerId !== 'antigravity') {
608
+ return false;
609
+ }
610
+ const candidateModel = getProviderModelId(key, deps.providerRegistry);
611
+ if (!candidateModel) {
612
+ return false;
613
+ }
614
+ if (modelId === null) {
615
+ modelId = candidateModel;
616
+ }
617
+ else if (modelId !== candidateModel) {
618
+ return false;
619
+ }
620
+ }
621
+ return Boolean(modelId);
622
+ })();
623
+ if (shouldPinAntigravityModel && !isRecoveryAttempt) {
624
+ const pinned = selectFirstAvailable(bucketCandidates);
625
+ if (pinned) {
626
+ return pinned;
465
627
  }
466
628
  }
467
- else {
468
- const selected = deps.loadBalancer.select({
469
- routeName: `${routeName}:${tier.id}`,
470
- candidates: bucketCandidates,
471
- stickyKey: options.allowAliasRotation ? undefined : stickyKey,
472
- availabilityCheck: (key) => deps.healthManager.isAvailable(key)
473
- }, tier.mode === 'round-robin' ? 'round-robin' : undefined);
474
- if (selected) {
475
- return selected;
629
+ const bucketPenaltyMap = {};
630
+ for (const item of bucket) {
631
+ bucketPenaltyMap[item.key] = item.penalty;
632
+ }
633
+ const bucketWeights = {};
634
+ const bucketMultipliers = {};
635
+ for (const item of bucket) {
636
+ if (healthWeightedCfg.enabled) {
637
+ const entry = quotaView(item.key);
638
+ const { weight, multiplier } = computeHealthWeight(entry, nowForWeights, healthWeightedCfg);
639
+ bucketWeights[item.key] = weight;
640
+ bucketMultipliers[item.key] = multiplier;
641
+ }
642
+ else {
643
+ // Legacy: penalty => lower weight, but never zero (unhealthy should still get a chance).
644
+ bucketWeights[item.key] = Math.max(1, Math.floor(100 / (1 + Math.max(0, item.penalty))));
645
+ bucketMultipliers[item.key] = 1;
646
+ }
647
+ }
648
+ if (isSafePool && contextWeightedCfg.enabled) {
649
+ const ctx = computeContextWeightMultipliers(bucketCandidates);
650
+ if (ctx) {
651
+ for (const key of bucketCandidates) {
652
+ const m = computeContextMultiplier({
653
+ effectiveSafeRefTokens: ctx.ref,
654
+ effectiveSafeTokens: ctx.eff[key] ?? 1,
655
+ cfg: contextWeightedCfg
656
+ });
657
+ bucketWeights[key] = Math.max(1, Math.round((bucketWeights[key] ?? 1) * m));
658
+ }
476
659
  }
477
660
  }
478
- }
479
- // default 路由永不因 quota gating 而“空池”:
480
- // 当 quotaView 过滤后没有任何可用候选时,默认路由允许忽略 quotaView,
481
- // 继续按健康/负载均衡选择一个 providerKey(但不覆盖 forced/required 约束)。
482
- const quotaBypassAllowed = routeName === DEFAULT_ROUTE && (!requiredProviderKeys || requiredProviderKeys.size === 0);
483
- if (quotaBypassAllowed) {
484
661
  if (tier.mode === 'priority') {
485
- const selected = selectFirstAvailable(candidates);
486
- if (selected) {
487
- return selected;
662
+ if (!isRecoveryAttempt) {
663
+ const group = pickPriorityGroup(bucketCandidates, tier.targets, bucketPenaltyMap);
664
+ if (!group) {
665
+ continue;
666
+ }
667
+ const groupWeights = {};
668
+ for (const key of group.groupCandidates) {
669
+ groupWeights[key] = bucketWeights[key] ?? 1;
670
+ }
671
+ const selected = deps.loadBalancer.select({
672
+ routeName: `${routeName}:${tier.id}:priority:${priority}:group:${group.groupId}`,
673
+ candidates: group.groupCandidates,
674
+ stickyKey: options.allowAliasRotation ? undefined : stickyKey,
675
+ weights: groupWeights,
676
+ availabilityCheck: (key) => deps.healthManager.isAvailable(key)
677
+ }, 'round-robin');
678
+ if (selected) {
679
+ return selected;
680
+ }
681
+ continue;
488
682
  }
683
+ if (isRecoveryAttempt && healthWeightedCfg.enabled && healthWeightedCfg.recoverToBestOnRetry) {
684
+ let best = null;
685
+ let bestM = Number.NEGATIVE_INFINITY;
686
+ for (const key of bucketCandidates) {
687
+ if (!deps.healthManager.isAvailable(key))
688
+ continue;
689
+ const m = bucketMultipliers[key] ?? 1;
690
+ if (m > bestM) {
691
+ bestM = m;
692
+ best = key;
693
+ }
694
+ }
695
+ if (best) {
696
+ return best;
697
+ }
698
+ continue;
699
+ }
700
+ else if (isRecoveryAttempt) {
701
+ const recovered = selectFirstAvailable(bucketCandidates);
702
+ if (recovered)
703
+ return recovered;
704
+ continue;
705
+ }
706
+ // (unreachable) recovery handled above
489
707
  }
490
708
  else {
709
+ if (isRecoveryAttempt && healthWeightedCfg.enabled && healthWeightedCfg.recoverToBestOnRetry) {
710
+ let best = null;
711
+ let bestM = Number.NEGATIVE_INFINITY;
712
+ for (const key of bucketCandidates) {
713
+ if (!deps.healthManager.isAvailable(key))
714
+ continue;
715
+ const m = bucketMultipliers[key] ?? 1;
716
+ if (m > bestM) {
717
+ bestM = m;
718
+ best = key;
719
+ }
720
+ }
721
+ if (best) {
722
+ return best;
723
+ }
724
+ continue;
725
+ }
726
+ else if (isRecoveryAttempt) {
727
+ const recovered = selectFirstAvailable(bucketCandidates);
728
+ if (recovered)
729
+ return recovered;
730
+ continue;
731
+ }
491
732
  const selected = deps.loadBalancer.select({
492
- routeName: `${routeName}:${tier.id}:quota-bypass`,
493
- candidates,
733
+ routeName: `${routeName}:${tier.id}`,
734
+ candidates: bucketCandidates,
494
735
  stickyKey: options.allowAliasRotation ? undefined : stickyKey,
736
+ weights: bucketWeights,
495
737
  availabilityCheck: (key) => deps.healthManager.isAvailable(key)
496
738
  }, tier.mode === 'round-robin' ? 'round-robin' : undefined);
497
739
  if (selected) {
@@ -499,14 +741,10 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
499
741
  }
500
742
  }
501
743
  }
502
- // 单 provider 兜底:当只剩一个候选 key 时,不因 quota/blacklist/cooldown 或健康状态过滤导致无 provider。
503
- if (candidates.length === 1) {
504
- return candidates[0];
505
- }
506
744
  return null;
507
745
  };
508
746
  for (const candidatePool of prioritizedPools) {
509
- const providerKey = selectWithQuota(candidatePool);
747
+ const providerKey = selectWithQuota(candidatePool, candidatePool === contextResult.safe);
510
748
  if (providerKey) {
511
749
  return { providerKey, poolTargets: tier.targets, tierId: tier.id };
512
750
  }
@@ -802,7 +1040,6 @@ function buildContextCandidatePools(result) {
802
1040
  const ordered = [];
803
1041
  if (result.safe.length) {
804
1042
  ordered.push(result.safe);
805
- return ordered;
806
1043
  }
807
1044
  if (result.risky.length) {
808
1045
  ordered.push(result.risky);
@@ -5,6 +5,7 @@ import type { ProviderQuotaView } from './types.js';
5
5
  interface RoutingInstructionStateStore {
6
6
  loadSync(key: string): RoutingInstructionState | null;
7
7
  saveAsync(key: string, state: RoutingInstructionState | null): void;
8
+ saveSync?: (key: string, state: RoutingInstructionState | null) => void;
8
9
  }
9
10
  export declare class VirtualRouterEngine {
10
11
  private routing;
@@ -63,9 +64,11 @@ export declare class VirtualRouterEngine {
63
64
  private providerHealthConfig;
64
65
  private resolveStickyKey;
65
66
  private resolveSessionScope;
67
+ private resolveStopMessageScope;
66
68
  private getRoutingInstructionState;
67
69
  private buildMetadataInstructions;
68
70
  private parseMetadataDisableDescriptor;
71
+ private parseMetadataForceProviderKey;
69
72
  private resolveRoutingMode;
70
73
  private resolveInstructionTarget;
71
74
  private filterCandidatesByRoutingState;