@jsonstudio/llms 0.6.954 → 0.6.1164

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. package/dist/conversion/hub/operation-table/operation-table-runner.d.ts +18 -0
  2. package/dist/conversion/hub/operation-table/operation-table-runner.js +158 -0
  3. package/dist/conversion/hub/operation-table/semantic-mappers/anthropic-mapper.d.ts +8 -0
  4. package/dist/conversion/hub/operation-table/semantic-mappers/anthropic-mapper.js +303 -0
  5. package/dist/conversion/hub/operation-table/semantic-mappers/chat-mapper.d.ts +8 -0
  6. package/dist/conversion/hub/operation-table/semantic-mappers/chat-mapper.js +413 -0
  7. package/dist/conversion/hub/operation-table/semantic-mappers/gemini-mapper.d.ts +7 -0
  8. package/dist/conversion/hub/operation-table/semantic-mappers/gemini-mapper.js +841 -0
  9. package/dist/conversion/hub/operation-table/semantic-mappers/responses-mapper.d.ts +21 -0
  10. package/dist/conversion/hub/operation-table/semantic-mappers/responses-mapper.js +535 -0
  11. package/dist/conversion/hub/ops/operations.d.ts +19 -0
  12. package/dist/conversion/hub/ops/operations.js +126 -0
  13. package/dist/conversion/hub/pipeline/hub-pipeline.d.ts +9 -0
  14. package/dist/conversion/hub/pipeline/hub-pipeline.js +489 -19
  15. package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage2_semantic_map/index.js +6 -0
  16. package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage1_semantic_map/index.js +11 -0
  17. package/dist/conversion/hub/policy/policy-engine.js +41 -9
  18. package/dist/conversion/hub/policy/protocol-spec.d.ts +25 -0
  19. package/dist/conversion/hub/policy/protocol-spec.js +73 -23
  20. package/dist/conversion/hub/process/chat-process.js +252 -41
  21. package/dist/conversion/hub/response/provider-response.js +175 -2
  22. package/dist/conversion/hub/response/response-runtime.js +1 -1
  23. package/dist/conversion/hub/semantic-mappers/anthropic-mapper.d.ts +1 -8
  24. package/dist/conversion/hub/semantic-mappers/anthropic-mapper.js +1 -365
  25. package/dist/conversion/hub/semantic-mappers/chat-mapper.d.ts +1 -8
  26. package/dist/conversion/hub/semantic-mappers/chat-mapper.js +1 -467
  27. package/dist/conversion/hub/semantic-mappers/gemini-mapper.d.ts +1 -7
  28. package/dist/conversion/hub/semantic-mappers/gemini-mapper.js +1 -903
  29. package/dist/conversion/hub/semantic-mappers/responses-mapper.d.ts +1 -21
  30. package/dist/conversion/hub/semantic-mappers/responses-mapper.js +1 -593
  31. package/dist/conversion/hub/tool-surface/tool-surface-engine.d.ts +18 -0
  32. package/dist/conversion/hub/tool-surface/tool-surface-engine.js +571 -0
  33. package/dist/conversion/responses/responses-openai-bridge.js +14 -2
  34. package/dist/conversion/shared/bridge-message-utils.js +2 -8
  35. package/dist/conversion/shared/bridge-policies.js +5 -105
  36. package/dist/conversion/shared/gemini-tool-utils.js +89 -15
  37. package/dist/conversion/shared/protocol-field-allowlists.d.ts +7 -0
  38. package/dist/conversion/shared/protocol-field-allowlists.js +145 -0
  39. package/dist/conversion/shared/reasoning-tool-normalizer.js +4 -2
  40. package/dist/conversion/shared/snapshot-hooks.js +166 -3
  41. package/dist/conversion/shared/text-markup-normalizer.d.ts +2 -0
  42. package/dist/conversion/shared/text-markup-normalizer.js +345 -9
  43. package/dist/conversion/shared/thought-signature-validator.d.ts +52 -0
  44. package/dist/conversion/shared/thought-signature-validator.js +170 -0
  45. package/dist/conversion/shared/tool-argument-repairer.d.ts +39 -0
  46. package/dist/conversion/shared/tool-argument-repairer.js +56 -0
  47. package/dist/conversion/shared/tool-call-id-manager.d.ts +113 -0
  48. package/dist/conversion/shared/tool-call-id-manager.js +231 -0
  49. package/dist/conversion/shared/tool-canonicalizer.js +2 -11
  50. package/dist/router/virtual-router/bootstrap.js +54 -5
  51. package/dist/router/virtual-router/engine-selection.js +132 -42
  52. package/dist/router/virtual-router/engine.d.ts +3 -0
  53. package/dist/router/virtual-router/engine.js +142 -33
  54. package/dist/router/virtual-router/health-weighted.d.ts +25 -0
  55. package/dist/router/virtual-router/health-weighted.js +63 -0
  56. package/dist/router/virtual-router/load-balancer.d.ts +2 -0
  57. package/dist/router/virtual-router/load-balancer.js +45 -16
  58. package/dist/router/virtual-router/routing-instructions.js +17 -1
  59. package/dist/router/virtual-router/sticky-session-store.js +136 -24
  60. package/dist/router/virtual-router/stop-message-file-resolver.d.ts +1 -0
  61. package/dist/router/virtual-router/stop-message-file-resolver.js +74 -0
  62. package/dist/router/virtual-router/stop-message-state-sync.d.ts +15 -0
  63. package/dist/router/virtual-router/stop-message-state-sync.js +57 -0
  64. package/dist/router/virtual-router/types.d.ts +70 -0
  65. package/dist/servertool/clock/config.d.ts +7 -0
  66. package/dist/servertool/clock/config.js +27 -0
  67. package/dist/servertool/clock/daemon.d.ts +3 -0
  68. package/dist/servertool/clock/daemon.js +79 -0
  69. package/dist/servertool/clock/io.d.ts +2 -0
  70. package/dist/servertool/clock/io.js +13 -0
  71. package/dist/servertool/clock/paths.d.ts +4 -0
  72. package/dist/servertool/clock/paths.js +25 -0
  73. package/dist/servertool/clock/session-store.d.ts +3 -0
  74. package/dist/servertool/clock/session-store.js +56 -0
  75. package/dist/servertool/clock/state.d.ts +5 -0
  76. package/dist/servertool/clock/state.js +62 -0
  77. package/dist/servertool/clock/task-store.d.ts +5 -0
  78. package/dist/servertool/clock/task-store.js +4 -0
  79. package/dist/servertool/clock/tasks.d.ts +17 -0
  80. package/dist/servertool/clock/tasks.js +221 -0
  81. package/dist/servertool/clock/types.d.ts +36 -0
  82. package/dist/servertool/clock/types.js +1 -0
  83. package/dist/servertool/engine.d.ts +2 -0
  84. package/dist/servertool/engine.js +161 -7
  85. package/dist/servertool/followup-shadow.d.ts +16 -0
  86. package/dist/servertool/followup-shadow.js +145 -0
  87. package/dist/servertool/handlers/apply-patch-guard.js +1 -265
  88. package/dist/servertool/handlers/clock-auto.d.ts +1 -0
  89. package/dist/servertool/handlers/clock-auto.js +160 -0
  90. package/dist/servertool/handlers/clock.d.ts +1 -0
  91. package/dist/servertool/handlers/clock.js +197 -0
  92. package/dist/servertool/handlers/exec-command-guard.js +7 -555
  93. package/dist/servertool/handlers/followup-request-builder.d.ts +15 -7
  94. package/dist/servertool/handlers/followup-request-builder.js +248 -28
  95. package/dist/servertool/handlers/gemini-empty-reply-continue.js +62 -169
  96. package/dist/servertool/handlers/iflow-model-error-retry.js +18 -28
  97. package/dist/servertool/handlers/recursive-detection-guard.d.ts +1 -0
  98. package/dist/servertool/handlers/recursive-detection-guard.js +333 -0
  99. package/dist/servertool/handlers/stop-message-auto.js +47 -175
  100. package/dist/servertool/handlers/vision.d.ts +7 -1
  101. package/dist/servertool/handlers/vision.js +61 -117
  102. package/dist/servertool/handlers/web-search.d.ts +7 -1
  103. package/dist/servertool/handlers/web-search.js +122 -105
  104. package/dist/servertool/reenter-backend.d.ts +23 -0
  105. package/dist/servertool/reenter-backend.js +18 -0
  106. package/dist/servertool/server-side-tools.d.ts +3 -2
  107. package/dist/servertool/server-side-tools.js +64 -10
  108. package/dist/servertool/types.d.ts +92 -3
  109. package/dist/sse/json-to-sse/event-generators/responses.js +3 -21
  110. package/dist/sse/shared/serializers/responses-event-serializer.d.ts +8 -0
  111. package/dist/sse/shared/serializers/responses-event-serializer.js +19 -0
  112. package/dist/sse/shared/writer.js +24 -7
  113. package/dist/tools/apply-patch/execution-capturer.js +3 -1
  114. package/dist/tools/apply-patch/json/parse-loose.d.ts +3 -0
  115. package/dist/tools/apply-patch/json/parse-loose.js +139 -0
  116. package/dist/tools/apply-patch/patch-text/context-diff.d.ts +1 -0
  117. package/dist/tools/apply-patch/patch-text/context-diff.js +173 -0
  118. package/dist/tools/apply-patch/patch-text/git-diff.d.ts +1 -0
  119. package/dist/tools/apply-patch/patch-text/git-diff.js +138 -0
  120. package/dist/tools/apply-patch/patch-text/looks-like-patch.d.ts +1 -0
  121. package/dist/tools/apply-patch/patch-text/looks-like-patch.js +13 -0
  122. package/dist/tools/apply-patch/patch-text/normalize.d.ts +3 -0
  123. package/dist/tools/apply-patch/patch-text/normalize.js +262 -0
  124. package/dist/tools/apply-patch/structured/coercion.d.ts +3 -0
  125. package/dist/tools/apply-patch/structured/coercion.js +82 -0
  126. package/dist/tools/apply-patch/validation/shared.d.ts +3 -0
  127. package/dist/tools/apply-patch/validation/shared.js +6 -0
  128. package/dist/tools/apply-patch/validator.d.ts +2 -2
  129. package/dist/tools/apply-patch/validator.js +6 -556
  130. package/package.json +1 -1
@@ -1,3 +1,4 @@
1
+ import { computeHealthWeight, resolveHealthWeightedConfig } from './health-weighted.js';
1
2
  import { DEFAULT_ROUTE, ROUTE_PRIORITY, VirtualRouterError, VirtualRouterErrorCode } from './types.js';
2
3
  export function selectProviderImpl(requestedRoute, metadata, classification, features, activeState, deps, options = {}) {
3
4
  const state = options.routingState ?? activeState;
@@ -294,12 +295,13 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
294
295
  if (excludedKeys.size > 0) {
295
296
  targets = targets.filter((key) => !excludedKeys.has(key));
296
297
  }
298
+ const isRecoveryAttempt = excludedKeys.size > 0;
299
+ const singleCandidateFallback = targets.length === 1 ? targets[0] : undefined;
297
300
  if (targets.length > 0) {
298
- const cooled = targets.filter((key) => !deps.isProviderCoolingDown(key));
299
- // provider 兜底:当一个 tier 只有一个候选 key 时,不因 cooldown 造成路由池为空。
300
- if (cooled.length > 0 || targets.length !== 1) {
301
- targets = cooled;
302
- }
301
+ // Always respect cooldown signals. If a route/tier is depleted due to cooldown,
302
+ // routing is expected to fall back to other tiers/routes (e.g. longcontext → default),
303
+ // rather than repeatedly selecting the cooled-down provider.
304
+ targets = targets.filter((key) => !deps.isProviderCoolingDown(key));
303
305
  }
304
306
  if (allowedProviders && allowedProviders.size > 0) {
305
307
  targets = targets.filter((key) => {
@@ -399,6 +401,7 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
399
401
  const prioritizedPools = buildContextCandidatePools(contextResult);
400
402
  const quotaView = deps.quotaView;
401
403
  const now = quotaView ? Date.now() : 0;
404
+ const healthWeightedCfg = resolveHealthWeightedConfig(deps.loadBalancer.getPolicy().healthWeighted);
402
405
  const selectFirstAvailable = (candidates) => {
403
406
  for (const key of candidates) {
404
407
  if (deps.healthManager.isAvailable(key)) {
@@ -410,11 +413,15 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
410
413
  const selectWithQuota = (candidates) => {
411
414
  if (!quotaView) {
412
415
  if (tier.mode === 'priority') {
413
- const selected = selectFirstAvailable(candidates);
414
- if (!selected && candidates.length === 1) {
415
- return candidates[0];
416
+ if (isRecoveryAttempt) {
417
+ return selectFirstAvailable(candidates);
416
418
  }
417
- return selected;
419
+ return deps.loadBalancer.select({
420
+ routeName: `${routeName}:${tier.id}:priority`,
421
+ candidates,
422
+ stickyKey: options.allowAliasRotation ? undefined : stickyKey,
423
+ availabilityCheck: (key) => deps.healthManager.isAvailable(key)
424
+ }, 'round-robin');
418
425
  }
419
426
  const selected = deps.loadBalancer.select({
420
427
  routeName: `${routeName}:${tier.id}`,
@@ -422,17 +429,15 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
422
429
  stickyKey: options.allowAliasRotation ? undefined : stickyKey,
423
430
  availabilityCheck: (key) => deps.healthManager.isAvailable(key)
424
431
  }, tier.mode === 'round-robin' ? 'round-robin' : undefined);
425
- if (!selected && candidates.length === 1) {
426
- return candidates[0];
427
- }
428
432
  return selected;
429
433
  }
430
434
  const buckets = new Map();
435
+ let order = 0;
431
436
  for (const key of candidates) {
432
437
  const entry = quotaView(key);
433
438
  if (!entry) {
434
439
  const list = buckets.get(100) ?? [];
435
- list.push(key);
440
+ list.push({ key, penalty: 0, order: order++ });
436
441
  buckets.set(100, list);
437
442
  continue;
438
443
  }
@@ -448,50 +453,140 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
448
453
  const tierPriority = typeof entry.priorityTier === 'number' && Number.isFinite(entry.priorityTier)
449
454
  ? entry.priorityTier
450
455
  : 100;
456
+ const penaltyRaw = entry.selectionPenalty;
457
+ const penalty = typeof penaltyRaw === 'number' && Number.isFinite(penaltyRaw) && penaltyRaw > 0 ? Math.floor(penaltyRaw) : 0;
451
458
  const list = buckets.get(tierPriority) ?? [];
452
- list.push(key);
459
+ list.push({ key, penalty, order: order++ });
453
460
  buckets.set(tierPriority, list);
454
461
  }
455
462
  const sortedPriorities = Array.from(buckets.keys()).sort((a, b) => a - b);
456
463
  for (const priority of sortedPriorities) {
457
- const bucketCandidates = buckets.get(priority) ?? [];
458
- if (!bucketCandidates.length) {
464
+ const bucket = buckets.get(priority) ?? [];
465
+ if (!bucket.length) {
459
466
  continue;
460
467
  }
461
- if (tier.mode === 'priority') {
462
- const selected = selectFirstAvailable(bucketCandidates);
463
- if (selected) {
464
- return selected;
468
+ bucket.sort((a, b) => (a.penalty - b.penalty) || (a.order - b.order));
469
+ const bucketCandidates = bucket.map((item) => item.key);
470
+ // antigravity special: avoid rotating across keys while the current key is healthy.
471
+ // Rationale: some upstream gateways reject rapid cross-key switching even when quota exists,
472
+ // causing repeated 429s. We therefore pin a single key per (providerId, modelId) until it is
473
+ // excluded by quota/cooldown, then fail over to the next available key.
474
+ //
475
+ // This is only applied when the request has no session-level sticky key, to avoid breaking
476
+ // explicit session stickiness.
477
+ const shouldPinAntigravityModel = (() => {
478
+ // Only respect explicit session/conversation stickiness. requestId-scoped sticky keys
479
+ // (used for request-chain pinning) should not prevent global antigravity key pinning.
480
+ if (typeof stickyKey === 'string' && (stickyKey.startsWith('session:') || stickyKey.startsWith('conversation:'))) {
481
+ return false;
482
+ }
483
+ if (bucketCandidates.length < 2) {
484
+ return false;
485
+ }
486
+ let modelId = null;
487
+ for (const key of bucketCandidates) {
488
+ const providerId = extractProviderId(key);
489
+ if (providerId !== 'antigravity') {
490
+ return false;
491
+ }
492
+ const candidateModel = getProviderModelId(key, deps.providerRegistry);
493
+ if (!candidateModel) {
494
+ return false;
495
+ }
496
+ if (modelId === null) {
497
+ modelId = candidateModel;
498
+ }
499
+ else if (modelId !== candidateModel) {
500
+ return false;
501
+ }
502
+ }
503
+ return Boolean(modelId);
504
+ })();
505
+ if (shouldPinAntigravityModel && !isRecoveryAttempt) {
506
+ const pinned = selectFirstAvailable(bucketCandidates);
507
+ if (pinned) {
508
+ return pinned;
465
509
  }
466
510
  }
467
- else {
511
+ const bucketWeights = {};
512
+ const bucketMultipliers = {};
513
+ for (const item of bucket) {
514
+ if (healthWeightedCfg.enabled) {
515
+ const entry = quotaView(item.key);
516
+ const { weight, multiplier } = computeHealthWeight(entry, now, healthWeightedCfg);
517
+ bucketWeights[item.key] = weight;
518
+ bucketMultipliers[item.key] = multiplier;
519
+ }
520
+ else {
521
+ // Legacy: penalty => lower weight, but never zero (unhealthy should still get a chance).
522
+ bucketWeights[item.key] = Math.max(1, Math.floor(100 / (1 + Math.max(0, item.penalty))));
523
+ bucketMultipliers[item.key] = 1;
524
+ }
525
+ }
526
+ if (tier.mode === 'priority') {
527
+ if (isRecoveryAttempt && healthWeightedCfg.enabled && healthWeightedCfg.recoverToBestOnRetry) {
528
+ let best = null;
529
+ let bestM = Number.NEGATIVE_INFINITY;
530
+ for (const key of bucketCandidates) {
531
+ if (!deps.healthManager.isAvailable(key))
532
+ continue;
533
+ const m = bucketMultipliers[key] ?? 1;
534
+ if (m > bestM) {
535
+ bestM = m;
536
+ best = key;
537
+ }
538
+ }
539
+ if (best) {
540
+ return best;
541
+ }
542
+ continue;
543
+ }
544
+ else if (isRecoveryAttempt) {
545
+ const recovered = selectFirstAvailable(bucketCandidates);
546
+ if (recovered)
547
+ return recovered;
548
+ continue;
549
+ }
468
550
  const selected = deps.loadBalancer.select({
469
- routeName: `${routeName}:${tier.id}`,
551
+ routeName: `${routeName}:${tier.id}:priority:${priority}`,
470
552
  candidates: bucketCandidates,
471
553
  stickyKey: options.allowAliasRotation ? undefined : stickyKey,
554
+ weights: bucketWeights,
472
555
  availabilityCheck: (key) => deps.healthManager.isAvailable(key)
473
- }, tier.mode === 'round-robin' ? 'round-robin' : undefined);
474
- if (selected) {
475
- return selected;
476
- }
477
- }
478
- }
479
- // default 路由永不因 quota gating 而“空池”:
480
- // 当 quotaView 过滤后没有任何可用候选时,默认路由允许忽略 quotaView,
481
- // 继续按健康/负载均衡选择一个 providerKey(但不覆盖 forced/required 约束)。
482
- const quotaBypassAllowed = routeName === DEFAULT_ROUTE && (!requiredProviderKeys || requiredProviderKeys.size === 0);
483
- if (quotaBypassAllowed) {
484
- if (tier.mode === 'priority') {
485
- const selected = selectFirstAvailable(candidates);
556
+ }, 'round-robin');
486
557
  if (selected) {
487
558
  return selected;
488
559
  }
489
560
  }
490
561
  else {
562
+ if (isRecoveryAttempt && healthWeightedCfg.enabled && healthWeightedCfg.recoverToBestOnRetry) {
563
+ let best = null;
564
+ let bestM = Number.NEGATIVE_INFINITY;
565
+ for (const key of bucketCandidates) {
566
+ if (!deps.healthManager.isAvailable(key))
567
+ continue;
568
+ const m = bucketMultipliers[key] ?? 1;
569
+ if (m > bestM) {
570
+ bestM = m;
571
+ best = key;
572
+ }
573
+ }
574
+ if (best) {
575
+ return best;
576
+ }
577
+ continue;
578
+ }
579
+ else if (isRecoveryAttempt) {
580
+ const recovered = selectFirstAvailable(bucketCandidates);
581
+ if (recovered)
582
+ return recovered;
583
+ continue;
584
+ }
491
585
  const selected = deps.loadBalancer.select({
492
- routeName: `${routeName}:${tier.id}:quota-bypass`,
493
- candidates,
586
+ routeName: `${routeName}:${tier.id}`,
587
+ candidates: bucketCandidates,
494
588
  stickyKey: options.allowAliasRotation ? undefined : stickyKey,
589
+ weights: bucketWeights,
495
590
  availabilityCheck: (key) => deps.healthManager.isAvailable(key)
496
591
  }, tier.mode === 'round-robin' ? 'round-robin' : undefined);
497
592
  if (selected) {
@@ -499,10 +594,6 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
499
594
  }
500
595
  }
501
596
  }
502
- // 单 provider 兜底:当只剩一个候选 key 时,不因 quota/blacklist/cooldown 或健康状态过滤导致无 provider。
503
- if (candidates.length === 1) {
504
- return candidates[0];
505
- }
506
597
  return null;
507
598
  };
508
599
  for (const candidatePool of prioritizedPools) {
@@ -802,7 +893,6 @@ function buildContextCandidatePools(result) {
802
893
  const ordered = [];
803
894
  if (result.safe.length) {
804
895
  ordered.push(result.safe);
805
- return ordered;
806
896
  }
807
897
  if (result.risky.length) {
808
898
  ordered.push(result.risky);
@@ -5,6 +5,7 @@ import type { ProviderQuotaView } from './types.js';
5
5
  interface RoutingInstructionStateStore {
6
6
  loadSync(key: string): RoutingInstructionState | null;
7
7
  saveAsync(key: string, state: RoutingInstructionState | null): void;
8
+ saveSync?: (key: string, state: RoutingInstructionState | null) => void;
8
9
  }
9
10
  export declare class VirtualRouterEngine {
10
11
  private routing;
@@ -63,9 +64,11 @@ export declare class VirtualRouterEngine {
63
64
  private providerHealthConfig;
64
65
  private resolveStickyKey;
65
66
  private resolveSessionScope;
67
+ private resolveStopMessageScope;
66
68
  private getRoutingInstructionState;
67
69
  private buildMetadataInstructions;
68
70
  private parseMetadataDisableDescriptor;
71
+ private parseMetadataForceProviderKey;
69
72
  private resolveRoutingMode;
70
73
  private resolveInstructionTarget;
71
74
  private filterCandidatesByRoutingState;
@@ -7,10 +7,11 @@ import { ContextAdvisor } from './context-advisor.js';
7
7
  import { DEFAULT_ROUTE, ROUTE_PRIORITY, VirtualRouterError, VirtualRouterErrorCode } from './types.js';
8
8
  import { getStatsCenter } from '../../telemetry/stats-center.js';
9
9
  import { parseRoutingInstructions, applyRoutingInstructions, cleanMessagesFromRoutingInstructions } from './routing-instructions.js';
10
- import { loadRoutingInstructionStateSync, saveRoutingInstructionStateAsync } from './sticky-session-store.js';
10
+ import { loadRoutingInstructionStateSync, saveRoutingInstructionStateAsync, saveRoutingInstructionStateSync } from './sticky-session-store.js';
11
11
  import { buildHitReason, formatVirtualRouterHit } from './engine-logging.js';
12
12
  import { selectDirectProviderModel, selectFromStickyPool, selectProviderImpl } from './engine-selection.js';
13
13
  import { applyQuotaDepletedImpl, applyQuotaRecoveryImpl, applySeriesCooldownImpl, handleProviderFailureImpl, mapProviderErrorImpl } from './engine-health.js';
14
+ import { mergeStopMessageFromPersisted } from './stop-message-state-sync.js';
14
15
  export class VirtualRouterEngine {
15
16
  routing = {};
16
17
  providerRegistry = new ProviderRegistry();
@@ -29,7 +30,8 @@ export class VirtualRouterEngine {
29
30
  healthStore;
30
31
  routingStateStore = {
31
32
  loadSync: loadRoutingInstructionStateSync,
32
- saveAsync: saveRoutingInstructionStateAsync
33
+ saveAsync: saveRoutingInstructionStateAsync,
34
+ saveSync: saveRoutingInstructionStateSync
33
35
  };
34
36
  routingInstructionState = new Map();
35
37
  quotaView;
@@ -56,7 +58,8 @@ export class VirtualRouterEngine {
56
58
  deps.routingStateStore ??
57
59
  {
58
60
  loadSync: loadRoutingInstructionStateSync,
59
- saveAsync: saveRoutingInstructionStateAsync
61
+ saveAsync: saveRoutingInstructionStateAsync,
62
+ saveSync: saveRoutingInstructionStateSync
60
63
  };
61
64
  // Routing state store changes require clearing in-memory cache to avoid stale reads.
62
65
  this.routingInstructionState.clear();
@@ -106,6 +109,7 @@ export class VirtualRouterEngine {
106
109
  route(request, metadata) {
107
110
  const stickyKey = this.resolveStickyKey(metadata);
108
111
  const sessionScope = this.resolveSessionScope(metadata);
112
+ const stopMessageScope = this.resolveStopMessageScope(metadata);
109
113
  // Routing instructions should be session/conversation-scoped when available (including /v1/responses),
110
114
  // while auto-sticky for Responses remains request-chain scoped via resolveStickyKey().
111
115
  const stateKey = sessionScope || stickyKey || 'default';
@@ -125,8 +129,8 @@ export class VirtualRouterEngine {
125
129
  preferTarget: undefined
126
130
  };
127
131
  }
128
- if (sessionScope) {
129
- const sessionState = this.getRoutingInstructionState(sessionScope);
132
+ if (stopMessageScope) {
133
+ const sessionState = this.getRoutingInstructionState(stopMessageScope);
130
134
  if (typeof sessionState.stopMessageText === 'string' ||
131
135
  typeof sessionState.stopMessageMaxRepeats === 'number') {
132
136
  routingState = {
@@ -141,8 +145,8 @@ export class VirtualRouterEngine {
141
145
  }
142
146
  const parsedInstructions = parseRoutingInstructions(request.messages);
143
147
  let instructions = parsedInstructions;
144
- if (sessionScope && parsedInstructions.length > 0) {
145
- const sessionState = this.getRoutingInstructionState(sessionScope);
148
+ if (stopMessageScope && parsedInstructions.length > 0) {
149
+ const sessionState = this.getRoutingInstructionState(stopMessageScope);
146
150
  const hasStopMessageClear = parsedInstructions.some((entry) => entry.type === 'stopMessageClear');
147
151
  const stopMessageSets = parsedInstructions.filter((entry) => entry.type === 'stopMessageSet');
148
152
  if (!hasStopMessageClear && stopMessageSets.length > 0) {
@@ -162,6 +166,14 @@ export class VirtualRouterEngine {
162
166
  }
163
167
  }
164
168
  }
169
+ // stopMessage must be session-scoped: require explicit sessionId in metadata.
170
+ // This prevents global/default persistence and ensures the trigger matches the setting sessionId.
171
+ if (parsedInstructions.length > 0) {
172
+ const hasStopMessageInstruction = parsedInstructions.some((entry) => entry.type === 'stopMessageSet' || entry.type === 'stopMessageClear');
173
+ if (hasStopMessageInstruction && !stopMessageScope) {
174
+ throw new VirtualRouterError('[stopMessage] requires sessionId (e.g. set x-session-id header or metadata.sessionId).', VirtualRouterErrorCode.CONFIG_ERROR, { requestId: metadata.requestId, entryEndpoint: metadata.entryEndpoint });
175
+ }
176
+ }
165
177
  if (parsedInstructions.length > 0) {
166
178
  request.messages = cleanMessagesFromRoutingInstructions(request.messages);
167
179
  }
@@ -171,21 +183,25 @@ export class VirtualRouterEngine {
171
183
  this.persistRoutingInstructionState(stateKey, routingState);
172
184
  // 对 stopMessage 指令补充一份基于 session/conversation 的持久化状态,
173
185
  // 便于 server-side 工具通过 session:*/conversation:* scope 读取到相同配置。
174
- if (sessionScope) {
186
+ // stopMessage is strictly session-scoped (sessionId only). Persist it under the session scope
187
+ // so servertool triggers always match the setting sessionId.
188
+ if (stopMessageScope) {
175
189
  const hasStopMessageSet = instructions.some((entry) => entry.type === 'stopMessageSet');
176
190
  const hasStopMessageClear = instructions.some((entry) => entry.type === 'stopMessageClear');
177
191
  if (hasStopMessageSet || hasStopMessageClear) {
178
- const sessionState = this.getRoutingInstructionState(sessionScope);
192
+ const sessionState = this.getRoutingInstructionState(stopMessageScope);
179
193
  let nextSessionState = {
180
194
  ...sessionState
181
195
  };
182
196
  let shouldPersistSessionState = false;
183
197
  if (hasStopMessageClear) {
198
+ const clearedAt = Date.now();
184
199
  nextSessionState.stopMessageText = undefined;
185
200
  nextSessionState.stopMessageMaxRepeats = undefined;
186
201
  nextSessionState.stopMessageUsed = undefined;
187
- nextSessionState.stopMessageUpdatedAt = undefined;
188
- nextSessionState.stopMessageLastUsedAt = undefined;
202
+ nextSessionState.stopMessageUpdatedAt = clearedAt;
203
+ nextSessionState.stopMessageLastUsedAt = clearedAt;
204
+ nextSessionState.stopMessageSource = undefined;
189
205
  shouldPersistSessionState = true;
190
206
  }
191
207
  else if (hasStopMessageSet) {
@@ -210,8 +226,8 @@ export class VirtualRouterEngine {
210
226
  }
211
227
  }
212
228
  if (shouldPersistSessionState) {
213
- this.routingInstructionState.set(sessionScope, nextSessionState);
214
- this.persistRoutingInstructionState(sessionScope, nextSessionState);
229
+ this.routingInstructionState.set(stopMessageScope, nextSessionState);
230
+ this.persistRoutingInstructionState(stopMessageScope, nextSessionState);
215
231
  }
216
232
  else {
217
233
  nextSessionState = sessionState;
@@ -228,8 +244,8 @@ export class VirtualRouterEngine {
228
244
  }
229
245
  }
230
246
  }
231
- if (instructions.length === 0 && sessionScope) {
232
- const sessionState = this.getRoutingInstructionState(sessionScope);
247
+ if (instructions.length === 0 && stopMessageScope) {
248
+ const sessionState = this.getRoutingInstructionState(stopMessageScope);
233
249
  if (typeof sessionState.stopMessageText === 'string' ||
234
250
  typeof sessionState.stopMessageMaxRepeats === 'number') {
235
251
  routingState.stopMessageText = sessionState.stopMessageText;
@@ -709,6 +725,13 @@ export class VirtualRouterEngine {
709
725
  }
710
726
  return undefined;
711
727
  }
728
+ resolveStopMessageScope(metadata) {
729
+ const sessionId = typeof metadata.sessionId === 'string' ? metadata.sessionId.trim() : '';
730
+ if (sessionId) {
731
+ return `session:${sessionId}`;
732
+ }
733
+ return undefined;
734
+ }
712
735
  getRoutingInstructionState(stickyKey) {
713
736
  const key = stickyKey || 'default';
714
737
  const existing = this.routingInstructionState.get(key);
@@ -718,22 +741,13 @@ export class VirtualRouterEngine {
718
741
  if (existing && (key.startsWith('session:') || key.startsWith('conversation:'))) {
719
742
  try {
720
743
  const persisted = this.routingStateStore.loadSync(key);
721
- if (persisted) {
722
- // 以持久化状态为准(包括清空后的 undefined),避免 stopMessage 状态“卡死”在内存中。
723
- existing.stopMessageText = persisted.stopMessageText;
724
- existing.stopMessageMaxRepeats = persisted.stopMessageMaxRepeats;
725
- existing.stopMessageUsed = persisted.stopMessageUsed;
726
- existing.stopMessageUpdatedAt = persisted.stopMessageUpdatedAt;
727
- existing.stopMessageLastUsedAt = persisted.stopMessageLastUsedAt;
728
- }
729
- else {
730
- // 文件被删除或无法解析时,将内存中的 stopMessage 状态一并清空。
731
- existing.stopMessageText = undefined;
732
- existing.stopMessageMaxRepeats = undefined;
733
- existing.stopMessageUsed = undefined;
734
- existing.stopMessageUpdatedAt = undefined;
735
- existing.stopMessageLastUsedAt = undefined;
736
- }
744
+ const merged = mergeStopMessageFromPersisted(existing, persisted);
745
+ existing.stopMessageSource = merged.stopMessageSource;
746
+ existing.stopMessageText = merged.stopMessageText;
747
+ existing.stopMessageMaxRepeats = merged.stopMessageMaxRepeats;
748
+ existing.stopMessageUsed = merged.stopMessageUsed;
749
+ existing.stopMessageUpdatedAt = merged.stopMessageUpdatedAt;
750
+ existing.stopMessageLastUsedAt = merged.stopMessageLastUsedAt;
737
751
  }
738
752
  catch {
739
753
  // 刷新失败不影响原有内存状态
@@ -753,6 +767,7 @@ export class VirtualRouterEngine {
753
767
  disabledProviders: new Set(),
754
768
  disabledKeys: new Map(),
755
769
  disabledModels: new Map(),
770
+ stopMessageSource: undefined,
756
771
  stopMessageText: undefined,
757
772
  stopMessageMaxRepeats: undefined,
758
773
  stopMessageUsed: undefined,
@@ -765,6 +780,12 @@ export class VirtualRouterEngine {
765
780
  }
766
781
  buildMetadataInstructions(metadata) {
767
782
  const instructions = [];
783
+ const forcedProviderKeyRaw = metadata
784
+ .__shadowCompareForcedProviderKey;
785
+ const forcedProviderKey = this.parseMetadataForceProviderKey(forcedProviderKeyRaw);
786
+ if (forcedProviderKey) {
787
+ instructions.push({ type: 'force', ...forcedProviderKey });
788
+ }
768
789
  if (Array.isArray(metadata.disabledProviderKeyAliases)) {
769
790
  for (const entry of metadata.disabledProviderKeyAliases) {
770
791
  const parsed = this.parseMetadataDisableDescriptor(entry);
@@ -797,6 +818,76 @@ export class VirtualRouterEngine {
797
818
  }
798
819
  return { provider, keyAlias: alias };
799
820
  }
821
+ parseMetadataForceProviderKey(entry) {
822
+ if (typeof entry !== 'string') {
823
+ return null;
824
+ }
825
+ const trimmed = entry.trim();
826
+ if (!trimmed) {
827
+ return null;
828
+ }
829
+ // Accept the bracket notation used in virtual-router-hit logs: provider[alias].model
830
+ // - provider[].model means provider.model across all aliases
831
+ const bracketMatch = trimmed.match(/^([a-zA-Z0-9_-]+)\[([a-zA-Z0-9_-]*)\](?:\.(.+))?$/);
832
+ if (bracketMatch) {
833
+ const provider = bracketMatch[1]?.trim() || '';
834
+ const keyAlias = bracketMatch[2]?.trim() || '';
835
+ const model = typeof bracketMatch[3] === 'string' ? bracketMatch[3].trim() : '';
836
+ if (!provider) {
837
+ return null;
838
+ }
839
+ if (keyAlias) {
840
+ return {
841
+ provider,
842
+ keyAlias,
843
+ ...(model ? { model } : {}),
844
+ pathLength: 3
845
+ };
846
+ }
847
+ if (model) {
848
+ return {
849
+ provider,
850
+ model,
851
+ pathLength: 2
852
+ };
853
+ }
854
+ return { provider, pathLength: 1 };
855
+ }
856
+ // Accept provider.keyAlias.model and provider.model (model may contain dots when keyAlias is explicit).
857
+ const parts = trimmed.split('.').map((part) => part.trim()).filter(Boolean);
858
+ if (parts.length === 0) {
859
+ return null;
860
+ }
861
+ const provider = parts[0] || '';
862
+ if (!provider) {
863
+ return null;
864
+ }
865
+ if (parts.length === 1) {
866
+ return { provider, pathLength: 1 };
867
+ }
868
+ if (parts.length === 2) {
869
+ const second = parts[1] || '';
870
+ if (!second) {
871
+ return null;
872
+ }
873
+ if (/^\d+$/.test(second)) {
874
+ const keyIndex = Number.parseInt(second, 10);
875
+ return Number.isFinite(keyIndex) && keyIndex > 0 ? { provider, keyIndex, pathLength: 2 } : null;
876
+ }
877
+ return { provider, model: second, pathLength: 2 };
878
+ }
879
+ const keyAlias = parts[1] || '';
880
+ const model = parts.slice(2).join('.').trim();
881
+ if (!keyAlias) {
882
+ return null;
883
+ }
884
+ return {
885
+ provider,
886
+ keyAlias,
887
+ ...(model ? { model } : {}),
888
+ pathLength: 3
889
+ };
890
+ }
800
891
  resolveRoutingMode(instructions, state) {
801
892
  const hasForce = instructions.some((inst) => inst.type === 'force');
802
893
  const hasAllow = instructions.some((inst) => inst.type === 'allow');
@@ -1281,11 +1372,29 @@ export class VirtualRouterEngine {
1281
1372
  if (!key || (!key.startsWith('session:') && !key.startsWith('conversation:'))) {
1282
1373
  return;
1283
1374
  }
1375
+ const supportsSync = typeof this.routingStateStore.saveSync === 'function';
1376
+ const prefersSync = supportsSync &&
1377
+ key.startsWith('session:') &&
1378
+ (Boolean(state.stopMessageText && state.stopMessageText.trim()) ||
1379
+ (typeof state.stopMessageMaxRepeats === 'number' && Number.isFinite(state.stopMessageMaxRepeats)) ||
1380
+ (typeof state.stopMessageUsed === 'number' && Number.isFinite(state.stopMessageUsed)) ||
1381
+ (typeof state.stopMessageUpdatedAt === 'number' && Number.isFinite(state.stopMessageUpdatedAt)) ||
1382
+ (typeof state.stopMessageLastUsedAt === 'number' && Number.isFinite(state.stopMessageLastUsedAt)));
1284
1383
  if (this.isRoutingStateEmpty(state)) {
1285
- this.routingStateStore.saveAsync(key, null);
1384
+ if (prefersSync) {
1385
+ this.routingStateStore.saveSync(key, null);
1386
+ }
1387
+ else {
1388
+ this.routingStateStore.saveAsync(key, null);
1389
+ }
1286
1390
  return;
1287
1391
  }
1288
- this.routingStateStore.saveAsync(key, state);
1392
+ if (prefersSync) {
1393
+ this.routingStateStore.saveSync(key, state);
1394
+ }
1395
+ else {
1396
+ this.routingStateStore.saveAsync(key, state);
1397
+ }
1289
1398
  }
1290
1399
  markProviderCooldown(providerKey, cooldownMs) {
1291
1400
  if (!providerKey) {
@@ -0,0 +1,25 @@
1
+ import type { HealthWeightedLoadBalancingConfig, ProviderQuotaViewEntry } from './types.js';
2
+ export type ResolvedHealthWeightedConfig = Required<{
3
+ enabled: boolean;
4
+ baseWeight: number;
5
+ minMultiplier: number;
6
+ beta: number;
7
+ halfLifeMs: number;
8
+ recoverToBestOnRetry: boolean;
9
+ }>;
10
+ /**
11
+ * AWRR constant table (defaults).
12
+ *
13
+ * Notes:
14
+ * - `minMultiplier=0.5` is the "50% of baseline share" floor: penalties will not reduce a key below ~half of
15
+ * its initial (equal) share within the same pool bucket.
16
+ * - `halfLifeMs=10min` means: if no new errors occur, the effect of the last error decays by 50% every 10 minutes.
17
+ * - `beta` controls how quickly errors reduce share; tune carefully.
18
+ */
19
+ export declare const DEFAULT_HEALTH_WEIGHTED_CONFIG: ResolvedHealthWeightedConfig;
20
+ export declare function resolveHealthWeightedConfig(raw?: HealthWeightedLoadBalancingConfig | null): ResolvedHealthWeightedConfig;
21
+ export declare function computeHealthMultiplier(entry: ProviderQuotaViewEntry | null, nowMs: number, cfg: ResolvedHealthWeightedConfig): number;
22
+ export declare function computeHealthWeight(entry: ProviderQuotaViewEntry | null, nowMs: number, cfg: ResolvedHealthWeightedConfig): {
23
+ weight: number;
24
+ multiplier: number;
25
+ };
@@ -0,0 +1,63 @@
1
+ /**
2
+ * AWRR constant table (defaults).
3
+ *
4
+ * Notes:
5
+ * - `minMultiplier=0.5` is the "50% of baseline share" floor: penalties will not reduce a key below ~half of
6
+ * its initial (equal) share within the same pool bucket.
7
+ * - `halfLifeMs=10min` means: if no new errors occur, the effect of the last error decays by 50% every 10 minutes.
8
+ * - `beta` controls how quickly errors reduce share; tune carefully.
9
+ */
10
+ export const DEFAULT_HEALTH_WEIGHTED_CONFIG = {
11
+ enabled: false,
12
+ baseWeight: 100,
13
+ minMultiplier: 0.5,
14
+ beta: 0.1,
15
+ halfLifeMs: 10 * 60 * 1000,
16
+ recoverToBestOnRetry: true
17
+ };
18
+ export function resolveHealthWeightedConfig(raw) {
19
+ const enabled = raw?.enabled ?? DEFAULT_HEALTH_WEIGHTED_CONFIG.enabled;
20
+ const baseWeight = typeof raw?.baseWeight === 'number' && Number.isFinite(raw.baseWeight) && raw.baseWeight > 0
21
+ ? Math.floor(raw.baseWeight)
22
+ : DEFAULT_HEALTH_WEIGHTED_CONFIG.baseWeight;
23
+ const minMultiplier = typeof raw?.minMultiplier === 'number' && Number.isFinite(raw.minMultiplier) && raw.minMultiplier > 0
24
+ ? Math.min(1, raw.minMultiplier)
25
+ : DEFAULT_HEALTH_WEIGHTED_CONFIG.minMultiplier;
26
+ const beta = typeof raw?.beta === 'number' && Number.isFinite(raw.beta) && raw.beta >= 0
27
+ ? raw.beta
28
+ : DEFAULT_HEALTH_WEIGHTED_CONFIG.beta;
29
+ const halfLifeMs = typeof raw?.halfLifeMs === 'number' && Number.isFinite(raw.halfLifeMs) && raw.halfLifeMs > 0
30
+ ? Math.floor(raw.halfLifeMs)
31
+ : DEFAULT_HEALTH_WEIGHTED_CONFIG.halfLifeMs;
32
+ const recoverToBestOnRetry = raw?.recoverToBestOnRetry ?? DEFAULT_HEALTH_WEIGHTED_CONFIG.recoverToBestOnRetry;
33
+ return {
34
+ enabled,
35
+ baseWeight,
36
+ minMultiplier,
37
+ beta,
38
+ halfLifeMs,
39
+ recoverToBestOnRetry
40
+ };
41
+ }
42
+ export function computeHealthMultiplier(entry, nowMs, cfg) {
43
+ if (!entry) {
44
+ return 1;
45
+ }
46
+ const lastErrorAtMs = typeof entry.lastErrorAtMs === 'number' && Number.isFinite(entry.lastErrorAtMs) ? entry.lastErrorAtMs : null;
47
+ const consecutiveErrorCount = typeof entry.consecutiveErrorCount === 'number' && Number.isFinite(entry.consecutiveErrorCount) && entry.consecutiveErrorCount > 0
48
+ ? Math.floor(entry.consecutiveErrorCount)
49
+ : 0;
50
+ if (!lastErrorAtMs || consecutiveErrorCount <= 0) {
51
+ return 1;
52
+ }
53
+ const elapsedMs = Math.max(0, nowMs - lastErrorAtMs);
54
+ const decay = Math.exp((-Math.log(2) * elapsedMs) / cfg.halfLifeMs);
55
+ const effectiveErrors = consecutiveErrorCount * decay;
56
+ const raw = 1 - cfg.beta * effectiveErrors;
57
+ return Math.max(cfg.minMultiplier, Math.min(1, raw));
58
+ }
59
+ export function computeHealthWeight(entry, nowMs, cfg) {
60
+ const multiplier = computeHealthMultiplier(entry, nowMs, cfg);
61
+ const weight = Math.max(1, Math.round(cfg.baseWeight * multiplier));
62
+ return { weight, multiplier };
63
+ }