@jsonstudio/llms 0.6.954 → 0.6.1172
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/conversion/hub/operation-table/operation-table-runner.d.ts +18 -0
- package/dist/conversion/hub/operation-table/operation-table-runner.js +158 -0
- package/dist/conversion/hub/operation-table/semantic-mappers/anthropic-mapper.d.ts +8 -0
- package/dist/conversion/hub/operation-table/semantic-mappers/anthropic-mapper.js +303 -0
- package/dist/conversion/hub/operation-table/semantic-mappers/chat-mapper.d.ts +8 -0
- package/dist/conversion/hub/operation-table/semantic-mappers/chat-mapper.js +413 -0
- package/dist/conversion/hub/operation-table/semantic-mappers/gemini-mapper.d.ts +7 -0
- package/dist/conversion/hub/operation-table/semantic-mappers/gemini-mapper.js +841 -0
- package/dist/conversion/hub/operation-table/semantic-mappers/responses-mapper.d.ts +21 -0
- package/dist/conversion/hub/operation-table/semantic-mappers/responses-mapper.js +535 -0
- package/dist/conversion/hub/ops/operations.d.ts +19 -0
- package/dist/conversion/hub/ops/operations.js +126 -0
- package/dist/conversion/hub/pipeline/hub-pipeline.d.ts +9 -0
- package/dist/conversion/hub/pipeline/hub-pipeline.js +489 -19
- package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage2_semantic_map/index.js +6 -0
- package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage1_semantic_map/index.js +11 -0
- package/dist/conversion/hub/policy/policy-engine.js +41 -9
- package/dist/conversion/hub/policy/protocol-spec.d.ts +25 -0
- package/dist/conversion/hub/policy/protocol-spec.js +73 -23
- package/dist/conversion/hub/process/chat-process.js +252 -41
- package/dist/conversion/hub/response/provider-response.js +175 -2
- package/dist/conversion/hub/response/response-runtime.js +1 -1
- package/dist/conversion/hub/semantic-mappers/anthropic-mapper.d.ts +1 -8
- package/dist/conversion/hub/semantic-mappers/anthropic-mapper.js +1 -365
- package/dist/conversion/hub/semantic-mappers/chat-mapper.d.ts +1 -8
- package/dist/conversion/hub/semantic-mappers/chat-mapper.js +1 -467
- package/dist/conversion/hub/semantic-mappers/gemini-mapper.d.ts +1 -7
- package/dist/conversion/hub/semantic-mappers/gemini-mapper.js +1 -903
- package/dist/conversion/hub/semantic-mappers/responses-mapper.d.ts +1 -21
- package/dist/conversion/hub/semantic-mappers/responses-mapper.js +1 -593
- package/dist/conversion/hub/tool-surface/tool-surface-engine.d.ts +18 -0
- package/dist/conversion/hub/tool-surface/tool-surface-engine.js +571 -0
- package/dist/conversion/responses/responses-openai-bridge.js +14 -2
- package/dist/conversion/shared/bridge-message-utils.js +2 -8
- package/dist/conversion/shared/bridge-policies.js +5 -105
- package/dist/conversion/shared/gemini-tool-utils.js +89 -15
- package/dist/conversion/shared/protocol-field-allowlists.d.ts +7 -0
- package/dist/conversion/shared/protocol-field-allowlists.js +145 -0
- package/dist/conversion/shared/reasoning-tool-normalizer.js +4 -2
- package/dist/conversion/shared/snapshot-hooks.js +166 -3
- package/dist/conversion/shared/text-markup-normalizer.d.ts +2 -0
- package/dist/conversion/shared/text-markup-normalizer.js +345 -9
- package/dist/conversion/shared/thought-signature-validator.d.ts +52 -0
- package/dist/conversion/shared/thought-signature-validator.js +170 -0
- package/dist/conversion/shared/tool-argument-repairer.d.ts +39 -0
- package/dist/conversion/shared/tool-argument-repairer.js +56 -0
- package/dist/conversion/shared/tool-call-id-manager.d.ts +113 -0
- package/dist/conversion/shared/tool-call-id-manager.js +231 -0
- package/dist/conversion/shared/tool-canonicalizer.js +2 -11
- package/dist/router/virtual-router/bootstrap.js +70 -5
- package/dist/router/virtual-router/context-advisor.d.ts +4 -0
- package/dist/router/virtual-router/context-advisor.js +3 -0
- package/dist/router/virtual-router/context-weighted.d.ts +31 -0
- package/dist/router/virtual-router/context-weighted.js +54 -0
- package/dist/router/virtual-router/engine-selection.js +284 -47
- package/dist/router/virtual-router/engine.d.ts +3 -0
- package/dist/router/virtual-router/engine.js +142 -33
- package/dist/router/virtual-router/health-weighted.d.ts +25 -0
- package/dist/router/virtual-router/health-weighted.js +63 -0
- package/dist/router/virtual-router/load-balancer.d.ts +2 -0
- package/dist/router/virtual-router/load-balancer.js +45 -16
- package/dist/router/virtual-router/routing-instructions.js +17 -1
- package/dist/router/virtual-router/sticky-session-store.js +136 -24
- package/dist/router/virtual-router/stop-message-file-resolver.d.ts +1 -0
- package/dist/router/virtual-router/stop-message-file-resolver.js +74 -0
- package/dist/router/virtual-router/stop-message-state-sync.d.ts +15 -0
- package/dist/router/virtual-router/stop-message-state-sync.js +57 -0
- package/dist/router/virtual-router/types.d.ts +98 -0
- package/dist/servertool/clock/config.d.ts +7 -0
- package/dist/servertool/clock/config.js +27 -0
- package/dist/servertool/clock/daemon.d.ts +3 -0
- package/dist/servertool/clock/daemon.js +79 -0
- package/dist/servertool/clock/io.d.ts +2 -0
- package/dist/servertool/clock/io.js +13 -0
- package/dist/servertool/clock/paths.d.ts +4 -0
- package/dist/servertool/clock/paths.js +25 -0
- package/dist/servertool/clock/session-store.d.ts +3 -0
- package/dist/servertool/clock/session-store.js +56 -0
- package/dist/servertool/clock/state.d.ts +5 -0
- package/dist/servertool/clock/state.js +62 -0
- package/dist/servertool/clock/task-store.d.ts +5 -0
- package/dist/servertool/clock/task-store.js +4 -0
- package/dist/servertool/clock/tasks.d.ts +17 -0
- package/dist/servertool/clock/tasks.js +221 -0
- package/dist/servertool/clock/types.d.ts +36 -0
- package/dist/servertool/clock/types.js +1 -0
- package/dist/servertool/engine.d.ts +2 -0
- package/dist/servertool/engine.js +161 -7
- package/dist/servertool/followup-shadow.d.ts +16 -0
- package/dist/servertool/followup-shadow.js +145 -0
- package/dist/servertool/handlers/apply-patch-guard.js +1 -265
- package/dist/servertool/handlers/clock-auto.d.ts +1 -0
- package/dist/servertool/handlers/clock-auto.js +160 -0
- package/dist/servertool/handlers/clock.d.ts +1 -0
- package/dist/servertool/handlers/clock.js +197 -0
- package/dist/servertool/handlers/exec-command-guard.js +7 -555
- package/dist/servertool/handlers/followup-request-builder.d.ts +15 -7
- package/dist/servertool/handlers/followup-request-builder.js +248 -28
- package/dist/servertool/handlers/gemini-empty-reply-continue.js +62 -169
- package/dist/servertool/handlers/iflow-model-error-retry.js +18 -28
- package/dist/servertool/handlers/recursive-detection-guard.d.ts +1 -0
- package/dist/servertool/handlers/recursive-detection-guard.js +333 -0
- package/dist/servertool/handlers/stop-message-auto.js +47 -175
- package/dist/servertool/handlers/vision.d.ts +7 -1
- package/dist/servertool/handlers/vision.js +61 -117
- package/dist/servertool/handlers/web-search.d.ts +7 -1
- package/dist/servertool/handlers/web-search.js +122 -105
- package/dist/servertool/reenter-backend.d.ts +23 -0
- package/dist/servertool/reenter-backend.js +18 -0
- package/dist/servertool/server-side-tools.d.ts +3 -2
- package/dist/servertool/server-side-tools.js +64 -10
- package/dist/servertool/types.d.ts +92 -3
- package/dist/sse/json-to-sse/event-generators/responses.js +3 -21
- package/dist/sse/shared/serializers/responses-event-serializer.d.ts +8 -0
- package/dist/sse/shared/serializers/responses-event-serializer.js +19 -0
- package/dist/sse/shared/writer.js +24 -7
- package/dist/tools/apply-patch/execution-capturer.js +3 -1
- package/dist/tools/apply-patch/json/parse-loose.d.ts +3 -0
- package/dist/tools/apply-patch/json/parse-loose.js +139 -0
- package/dist/tools/apply-patch/patch-text/context-diff.d.ts +1 -0
- package/dist/tools/apply-patch/patch-text/context-diff.js +173 -0
- package/dist/tools/apply-patch/patch-text/git-diff.d.ts +1 -0
- package/dist/tools/apply-patch/patch-text/git-diff.js +138 -0
- package/dist/tools/apply-patch/patch-text/looks-like-patch.d.ts +1 -0
- package/dist/tools/apply-patch/patch-text/looks-like-patch.js +13 -0
- package/dist/tools/apply-patch/patch-text/normalize.d.ts +3 -0
- package/dist/tools/apply-patch/patch-text/normalize.js +262 -0
- package/dist/tools/apply-patch/structured/coercion.d.ts +3 -0
- package/dist/tools/apply-patch/structured/coercion.js +82 -0
- package/dist/tools/apply-patch/validation/shared.d.ts +3 -0
- package/dist/tools/apply-patch/validation/shared.js +6 -0
- package/dist/tools/apply-patch/validator.d.ts +2 -2
- package/dist/tools/apply-patch/validator.js +6 -556
- package/package.json +1 -1
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { computeContextMultiplier, computeEffectiveSafeWindowTokens, resolveContextWeightedConfig } from './context-weighted.js';
|
|
2
|
+
import { computeHealthWeight, resolveHealthWeightedConfig } from './health-weighted.js';
|
|
1
3
|
import { DEFAULT_ROUTE, ROUTE_PRIORITY, VirtualRouterError, VirtualRouterErrorCode } from './types.js';
|
|
2
4
|
export function selectProviderImpl(requestedRoute, metadata, classification, features, activeState, deps, options = {}) {
|
|
3
5
|
const state = options.routingState ?? activeState;
|
|
@@ -294,12 +296,13 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
|
|
|
294
296
|
if (excludedKeys.size > 0) {
|
|
295
297
|
targets = targets.filter((key) => !excludedKeys.has(key));
|
|
296
298
|
}
|
|
299
|
+
const isRecoveryAttempt = excludedKeys.size > 0;
|
|
300
|
+
const singleCandidateFallback = targets.length === 1 ? targets[0] : undefined;
|
|
297
301
|
if (targets.length > 0) {
|
|
298
|
-
|
|
299
|
-
//
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
}
|
|
302
|
+
// Always respect cooldown signals. If a route/tier is depleted due to cooldown,
|
|
303
|
+
// routing is expected to fall back to other tiers/routes (e.g. longcontext → default),
|
|
304
|
+
// rather than repeatedly selecting the cooled-down provider.
|
|
305
|
+
targets = targets.filter((key) => !deps.isProviderCoolingDown(key));
|
|
303
306
|
}
|
|
304
307
|
if (allowedProviders && allowedProviders.size > 0) {
|
|
305
308
|
targets = targets.filter((key) => {
|
|
@@ -399,6 +402,10 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
|
|
|
399
402
|
const prioritizedPools = buildContextCandidatePools(contextResult);
|
|
400
403
|
const quotaView = deps.quotaView;
|
|
401
404
|
const now = quotaView ? Date.now() : 0;
|
|
405
|
+
const healthWeightedCfg = resolveHealthWeightedConfig(deps.loadBalancer.getPolicy().healthWeighted);
|
|
406
|
+
const contextWeightedCfg = resolveContextWeightedConfig(deps.loadBalancer.getPolicy().contextWeighted);
|
|
407
|
+
const warnRatio = deps.contextAdvisor.getConfig().warnRatio;
|
|
408
|
+
const nowForWeights = Date.now();
|
|
402
409
|
const selectFirstAvailable = (candidates) => {
|
|
403
410
|
for (const key of candidates) {
|
|
404
411
|
if (deps.healthManager.isAvailable(key)) {
|
|
@@ -407,32 +414,148 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
|
|
|
407
414
|
}
|
|
408
415
|
return null;
|
|
409
416
|
};
|
|
410
|
-
const
|
|
417
|
+
const resolvePriorityMeta = (orderedTargets) => {
|
|
418
|
+
// Priority mode semantics (strict group priority + alias-level balancing):
|
|
419
|
+
// - Targets are interpreted as ordered (providerId, modelId) groups.
|
|
420
|
+
// - Group base priorities: 100, 90, 80, ... (step=10) by appearance order.
|
|
421
|
+
// - Within a group (different auth aliases), base scores: 100, 99, 98, ... (step=1).
|
|
422
|
+
//
|
|
423
|
+
// Group selection is strict: always use the best group until it is unavailable.
|
|
424
|
+
// Alias selection is balanced within the chosen group (RR / health-weighted / context-weighted).
|
|
425
|
+
const meta = new Map();
|
|
426
|
+
if (!Array.isArray(orderedTargets) || orderedTargets.length === 0) {
|
|
427
|
+
return meta;
|
|
428
|
+
}
|
|
429
|
+
let groupIndex = -1;
|
|
430
|
+
let aliasOffset = 0;
|
|
431
|
+
let lastGroupKey = '';
|
|
432
|
+
for (const key of orderedTargets) {
|
|
433
|
+
const providerId = extractProviderId(key) ?? '';
|
|
434
|
+
const modelId = getProviderModelId(key, deps.providerRegistry) ?? '';
|
|
435
|
+
const groupKey = `${providerId}::${modelId}`;
|
|
436
|
+
if (groupKey !== lastGroupKey) {
|
|
437
|
+
groupIndex += 1;
|
|
438
|
+
aliasOffset = 0;
|
|
439
|
+
lastGroupKey = groupKey;
|
|
440
|
+
}
|
|
441
|
+
const groupBase = 100 - groupIndex * 10;
|
|
442
|
+
const base = groupBase - aliasOffset;
|
|
443
|
+
meta.set(key, { groupId: `${providerId}.${modelId}`, groupBase, base });
|
|
444
|
+
aliasOffset += 1;
|
|
445
|
+
}
|
|
446
|
+
return meta;
|
|
447
|
+
};
|
|
448
|
+
const pickPriorityGroup = (candidates, orderedTargets, penalties) => {
|
|
449
|
+
const meta = resolvePriorityMeta(orderedTargets);
|
|
450
|
+
let bestGroupId = null;
|
|
451
|
+
let bestScore = Number.NEGATIVE_INFINITY;
|
|
452
|
+
for (const key of candidates) {
|
|
453
|
+
if (!deps.healthManager.isAvailable(key))
|
|
454
|
+
continue;
|
|
455
|
+
const m = meta.get(key);
|
|
456
|
+
if (!m)
|
|
457
|
+
continue;
|
|
458
|
+
const penalty = penalties ? Math.max(0, Math.floor(penalties[key] ?? 0)) : 0;
|
|
459
|
+
const score = m.base - penalty;
|
|
460
|
+
if (score > bestScore) {
|
|
461
|
+
bestScore = score;
|
|
462
|
+
bestGroupId = m.groupId;
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
if (!bestGroupId)
|
|
466
|
+
return null;
|
|
467
|
+
const groupCandidates = candidates.filter((key) => meta.get(key)?.groupId === bestGroupId);
|
|
468
|
+
return groupCandidates.length ? { groupId: bestGroupId, groupCandidates } : null;
|
|
469
|
+
};
|
|
470
|
+
const computeContextWeightMultipliers = (candidates) => {
|
|
471
|
+
if (!contextWeightedCfg.enabled) {
|
|
472
|
+
return null;
|
|
473
|
+
}
|
|
474
|
+
const eff = {};
|
|
475
|
+
let ref = 1;
|
|
476
|
+
for (const key of candidates) {
|
|
477
|
+
const usage = contextResult.usage?.[key];
|
|
478
|
+
const limit = usage && typeof usage.limit === 'number' && Number.isFinite(usage.limit) ? Math.floor(usage.limit) : 0;
|
|
479
|
+
const safeEff = computeEffectiveSafeWindowTokens({
|
|
480
|
+
modelMaxTokens: Math.max(1, limit),
|
|
481
|
+
warnRatio,
|
|
482
|
+
clientCapTokens: contextWeightedCfg.clientCapTokens
|
|
483
|
+
});
|
|
484
|
+
eff[key] = safeEff;
|
|
485
|
+
if (safeEff > ref) {
|
|
486
|
+
ref = safeEff;
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
return { ref, eff };
|
|
490
|
+
};
|
|
491
|
+
const selectWithQuota = (candidates, isSafePool) => {
|
|
411
492
|
if (!quotaView) {
|
|
412
493
|
if (tier.mode === 'priority') {
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
494
|
+
if (isRecoveryAttempt) {
|
|
495
|
+
return selectFirstAvailable(candidates);
|
|
496
|
+
}
|
|
497
|
+
const group = pickPriorityGroup(candidates, tier.targets);
|
|
498
|
+
if (!group) {
|
|
499
|
+
return null;
|
|
416
500
|
}
|
|
417
|
-
|
|
501
|
+
const weights = (() => {
|
|
502
|
+
if (!isSafePool)
|
|
503
|
+
return undefined;
|
|
504
|
+
const ctx = computeContextWeightMultipliers(group.groupCandidates);
|
|
505
|
+
if (!ctx)
|
|
506
|
+
return undefined;
|
|
507
|
+
const out = {};
|
|
508
|
+
for (const key of group.groupCandidates) {
|
|
509
|
+
const m = computeContextMultiplier({
|
|
510
|
+
effectiveSafeRefTokens: ctx.ref,
|
|
511
|
+
effectiveSafeTokens: ctx.eff[key] ?? 1,
|
|
512
|
+
cfg: contextWeightedCfg
|
|
513
|
+
});
|
|
514
|
+
out[key] = Math.max(1, Math.round(100 * m));
|
|
515
|
+
}
|
|
516
|
+
return out;
|
|
517
|
+
})();
|
|
518
|
+
return deps.loadBalancer.select({
|
|
519
|
+
routeName: `${routeName}:${tier.id}:priority:group:${group.groupId}`,
|
|
520
|
+
candidates: group.groupCandidates,
|
|
521
|
+
stickyKey: options.allowAliasRotation ? undefined : stickyKey,
|
|
522
|
+
weights,
|
|
523
|
+
availabilityCheck: (key) => deps.healthManager.isAvailable(key)
|
|
524
|
+
}, 'round-robin');
|
|
418
525
|
}
|
|
526
|
+
const weights = (() => {
|
|
527
|
+
if (!isSafePool || !contextWeightedCfg.enabled)
|
|
528
|
+
return undefined;
|
|
529
|
+
const ctx = computeContextWeightMultipliers(candidates);
|
|
530
|
+
if (!ctx)
|
|
531
|
+
return undefined;
|
|
532
|
+
const out = {};
|
|
533
|
+
for (const key of candidates) {
|
|
534
|
+
const m = computeContextMultiplier({
|
|
535
|
+
effectiveSafeRefTokens: ctx.ref,
|
|
536
|
+
effectiveSafeTokens: ctx.eff[key] ?? 1,
|
|
537
|
+
cfg: contextWeightedCfg
|
|
538
|
+
});
|
|
539
|
+
out[key] = Math.max(1, Math.round(100 * m));
|
|
540
|
+
}
|
|
541
|
+
return out;
|
|
542
|
+
})();
|
|
419
543
|
const selected = deps.loadBalancer.select({
|
|
420
544
|
routeName: `${routeName}:${tier.id}`,
|
|
421
545
|
candidates,
|
|
422
546
|
stickyKey: options.allowAliasRotation ? undefined : stickyKey,
|
|
547
|
+
weights,
|
|
423
548
|
availabilityCheck: (key) => deps.healthManager.isAvailable(key)
|
|
424
549
|
}, tier.mode === 'round-robin' ? 'round-robin' : undefined);
|
|
425
|
-
if (!selected && candidates.length === 1) {
|
|
426
|
-
return candidates[0];
|
|
427
|
-
}
|
|
428
550
|
return selected;
|
|
429
551
|
}
|
|
430
552
|
const buckets = new Map();
|
|
553
|
+
let order = 0;
|
|
431
554
|
for (const key of candidates) {
|
|
432
555
|
const entry = quotaView(key);
|
|
433
556
|
if (!entry) {
|
|
434
557
|
const list = buckets.get(100) ?? [];
|
|
435
|
-
list.push(key);
|
|
558
|
+
list.push({ key, penalty: 0, order: order++ });
|
|
436
559
|
buckets.set(100, list);
|
|
437
560
|
continue;
|
|
438
561
|
}
|
|
@@ -448,50 +571,169 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
|
|
|
448
571
|
const tierPriority = typeof entry.priorityTier === 'number' && Number.isFinite(entry.priorityTier)
|
|
449
572
|
? entry.priorityTier
|
|
450
573
|
: 100;
|
|
574
|
+
const penaltyRaw = entry.selectionPenalty;
|
|
575
|
+
const penalty = typeof penaltyRaw === 'number' && Number.isFinite(penaltyRaw) && penaltyRaw > 0 ? Math.floor(penaltyRaw) : 0;
|
|
451
576
|
const list = buckets.get(tierPriority) ?? [];
|
|
452
|
-
list.push(key);
|
|
577
|
+
list.push({ key, penalty, order: order++ });
|
|
453
578
|
buckets.set(tierPriority, list);
|
|
454
579
|
}
|
|
455
580
|
const sortedPriorities = Array.from(buckets.keys()).sort((a, b) => a - b);
|
|
456
581
|
for (const priority of sortedPriorities) {
|
|
457
|
-
const
|
|
458
|
-
if (!
|
|
582
|
+
const bucket = buckets.get(priority) ?? [];
|
|
583
|
+
if (!bucket.length) {
|
|
459
584
|
continue;
|
|
460
585
|
}
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
586
|
+
bucket.sort((a, b) => (a.penalty - b.penalty) || (a.order - b.order));
|
|
587
|
+
const bucketCandidates = bucket.map((item) => item.key);
|
|
588
|
+
// antigravity special: avoid rotating across keys while the current key is healthy.
|
|
589
|
+
// Rationale: some upstream gateways reject rapid cross-key switching even when quota exists,
|
|
590
|
+
// causing repeated 429s. We therefore pin a single key per (providerId, modelId) until it is
|
|
591
|
+
// excluded by quota/cooldown, then fail over to the next available key.
|
|
592
|
+
//
|
|
593
|
+
// This is only applied when the request has no session-level sticky key, to avoid breaking
|
|
594
|
+
// explicit session stickiness.
|
|
595
|
+
const shouldPinAntigravityModel = (() => {
|
|
596
|
+
// Only respect explicit session/conversation stickiness. requestId-scoped sticky keys
|
|
597
|
+
// (used for request-chain pinning) should not prevent global antigravity key pinning.
|
|
598
|
+
if (typeof stickyKey === 'string' && (stickyKey.startsWith('session:') || stickyKey.startsWith('conversation:'))) {
|
|
599
|
+
return false;
|
|
600
|
+
}
|
|
601
|
+
if (bucketCandidates.length < 2) {
|
|
602
|
+
return false;
|
|
603
|
+
}
|
|
604
|
+
let modelId = null;
|
|
605
|
+
for (const key of bucketCandidates) {
|
|
606
|
+
const providerId = extractProviderId(key);
|
|
607
|
+
if (providerId !== 'antigravity') {
|
|
608
|
+
return false;
|
|
609
|
+
}
|
|
610
|
+
const candidateModel = getProviderModelId(key, deps.providerRegistry);
|
|
611
|
+
if (!candidateModel) {
|
|
612
|
+
return false;
|
|
613
|
+
}
|
|
614
|
+
if (modelId === null) {
|
|
615
|
+
modelId = candidateModel;
|
|
616
|
+
}
|
|
617
|
+
else if (modelId !== candidateModel) {
|
|
618
|
+
return false;
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
return Boolean(modelId);
|
|
622
|
+
})();
|
|
623
|
+
if (shouldPinAntigravityModel && !isRecoveryAttempt) {
|
|
624
|
+
const pinned = selectFirstAvailable(bucketCandidates);
|
|
625
|
+
if (pinned) {
|
|
626
|
+
return pinned;
|
|
465
627
|
}
|
|
466
628
|
}
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
if (
|
|
475
|
-
|
|
629
|
+
const bucketPenaltyMap = {};
|
|
630
|
+
for (const item of bucket) {
|
|
631
|
+
bucketPenaltyMap[item.key] = item.penalty;
|
|
632
|
+
}
|
|
633
|
+
const bucketWeights = {};
|
|
634
|
+
const bucketMultipliers = {};
|
|
635
|
+
for (const item of bucket) {
|
|
636
|
+
if (healthWeightedCfg.enabled) {
|
|
637
|
+
const entry = quotaView(item.key);
|
|
638
|
+
const { weight, multiplier } = computeHealthWeight(entry, nowForWeights, healthWeightedCfg);
|
|
639
|
+
bucketWeights[item.key] = weight;
|
|
640
|
+
bucketMultipliers[item.key] = multiplier;
|
|
641
|
+
}
|
|
642
|
+
else {
|
|
643
|
+
// Legacy: penalty => lower weight, but never zero (unhealthy should still get a chance).
|
|
644
|
+
bucketWeights[item.key] = Math.max(1, Math.floor(100 / (1 + Math.max(0, item.penalty))));
|
|
645
|
+
bucketMultipliers[item.key] = 1;
|
|
646
|
+
}
|
|
647
|
+
}
|
|
648
|
+
if (isSafePool && contextWeightedCfg.enabled) {
|
|
649
|
+
const ctx = computeContextWeightMultipliers(bucketCandidates);
|
|
650
|
+
if (ctx) {
|
|
651
|
+
for (const key of bucketCandidates) {
|
|
652
|
+
const m = computeContextMultiplier({
|
|
653
|
+
effectiveSafeRefTokens: ctx.ref,
|
|
654
|
+
effectiveSafeTokens: ctx.eff[key] ?? 1,
|
|
655
|
+
cfg: contextWeightedCfg
|
|
656
|
+
});
|
|
657
|
+
bucketWeights[key] = Math.max(1, Math.round((bucketWeights[key] ?? 1) * m));
|
|
658
|
+
}
|
|
476
659
|
}
|
|
477
660
|
}
|
|
478
|
-
}
|
|
479
|
-
// default 路由永不因 quota gating 而“空池”:
|
|
480
|
-
// 当 quotaView 过滤后没有任何可用候选时,默认路由允许忽略 quotaView,
|
|
481
|
-
// 继续按健康/负载均衡选择一个 providerKey(但不覆盖 forced/required 约束)。
|
|
482
|
-
const quotaBypassAllowed = routeName === DEFAULT_ROUTE && (!requiredProviderKeys || requiredProviderKeys.size === 0);
|
|
483
|
-
if (quotaBypassAllowed) {
|
|
484
661
|
if (tier.mode === 'priority') {
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
662
|
+
if (!isRecoveryAttempt) {
|
|
663
|
+
const group = pickPriorityGroup(bucketCandidates, tier.targets, bucketPenaltyMap);
|
|
664
|
+
if (!group) {
|
|
665
|
+
continue;
|
|
666
|
+
}
|
|
667
|
+
const groupWeights = {};
|
|
668
|
+
for (const key of group.groupCandidates) {
|
|
669
|
+
groupWeights[key] = bucketWeights[key] ?? 1;
|
|
670
|
+
}
|
|
671
|
+
const selected = deps.loadBalancer.select({
|
|
672
|
+
routeName: `${routeName}:${tier.id}:priority:${priority}:group:${group.groupId}`,
|
|
673
|
+
candidates: group.groupCandidates,
|
|
674
|
+
stickyKey: options.allowAliasRotation ? undefined : stickyKey,
|
|
675
|
+
weights: groupWeights,
|
|
676
|
+
availabilityCheck: (key) => deps.healthManager.isAvailable(key)
|
|
677
|
+
}, 'round-robin');
|
|
678
|
+
if (selected) {
|
|
679
|
+
return selected;
|
|
680
|
+
}
|
|
681
|
+
continue;
|
|
488
682
|
}
|
|
683
|
+
if (isRecoveryAttempt && healthWeightedCfg.enabled && healthWeightedCfg.recoverToBestOnRetry) {
|
|
684
|
+
let best = null;
|
|
685
|
+
let bestM = Number.NEGATIVE_INFINITY;
|
|
686
|
+
for (const key of bucketCandidates) {
|
|
687
|
+
if (!deps.healthManager.isAvailable(key))
|
|
688
|
+
continue;
|
|
689
|
+
const m = bucketMultipliers[key] ?? 1;
|
|
690
|
+
if (m > bestM) {
|
|
691
|
+
bestM = m;
|
|
692
|
+
best = key;
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
if (best) {
|
|
696
|
+
return best;
|
|
697
|
+
}
|
|
698
|
+
continue;
|
|
699
|
+
}
|
|
700
|
+
else if (isRecoveryAttempt) {
|
|
701
|
+
const recovered = selectFirstAvailable(bucketCandidates);
|
|
702
|
+
if (recovered)
|
|
703
|
+
return recovered;
|
|
704
|
+
continue;
|
|
705
|
+
}
|
|
706
|
+
// (unreachable) recovery handled above
|
|
489
707
|
}
|
|
490
708
|
else {
|
|
709
|
+
if (isRecoveryAttempt && healthWeightedCfg.enabled && healthWeightedCfg.recoverToBestOnRetry) {
|
|
710
|
+
let best = null;
|
|
711
|
+
let bestM = Number.NEGATIVE_INFINITY;
|
|
712
|
+
for (const key of bucketCandidates) {
|
|
713
|
+
if (!deps.healthManager.isAvailable(key))
|
|
714
|
+
continue;
|
|
715
|
+
const m = bucketMultipliers[key] ?? 1;
|
|
716
|
+
if (m > bestM) {
|
|
717
|
+
bestM = m;
|
|
718
|
+
best = key;
|
|
719
|
+
}
|
|
720
|
+
}
|
|
721
|
+
if (best) {
|
|
722
|
+
return best;
|
|
723
|
+
}
|
|
724
|
+
continue;
|
|
725
|
+
}
|
|
726
|
+
else if (isRecoveryAttempt) {
|
|
727
|
+
const recovered = selectFirstAvailable(bucketCandidates);
|
|
728
|
+
if (recovered)
|
|
729
|
+
return recovered;
|
|
730
|
+
continue;
|
|
731
|
+
}
|
|
491
732
|
const selected = deps.loadBalancer.select({
|
|
492
|
-
routeName: `${routeName}:${tier.id}
|
|
493
|
-
candidates,
|
|
733
|
+
routeName: `${routeName}:${tier.id}`,
|
|
734
|
+
candidates: bucketCandidates,
|
|
494
735
|
stickyKey: options.allowAliasRotation ? undefined : stickyKey,
|
|
736
|
+
weights: bucketWeights,
|
|
495
737
|
availabilityCheck: (key) => deps.healthManager.isAvailable(key)
|
|
496
738
|
}, tier.mode === 'round-robin' ? 'round-robin' : undefined);
|
|
497
739
|
if (selected) {
|
|
@@ -499,14 +741,10 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
|
|
|
499
741
|
}
|
|
500
742
|
}
|
|
501
743
|
}
|
|
502
|
-
// 单 provider 兜底:当只剩一个候选 key 时,不因 quota/blacklist/cooldown 或健康状态过滤导致无 provider。
|
|
503
|
-
if (candidates.length === 1) {
|
|
504
|
-
return candidates[0];
|
|
505
|
-
}
|
|
506
744
|
return null;
|
|
507
745
|
};
|
|
508
746
|
for (const candidatePool of prioritizedPools) {
|
|
509
|
-
const providerKey = selectWithQuota(candidatePool);
|
|
747
|
+
const providerKey = selectWithQuota(candidatePool, candidatePool === contextResult.safe);
|
|
510
748
|
if (providerKey) {
|
|
511
749
|
return { providerKey, poolTargets: tier.targets, tierId: tier.id };
|
|
512
750
|
}
|
|
@@ -802,7 +1040,6 @@ function buildContextCandidatePools(result) {
|
|
|
802
1040
|
const ordered = [];
|
|
803
1041
|
if (result.safe.length) {
|
|
804
1042
|
ordered.push(result.safe);
|
|
805
|
-
return ordered;
|
|
806
1043
|
}
|
|
807
1044
|
if (result.risky.length) {
|
|
808
1045
|
ordered.push(result.risky);
|
|
@@ -5,6 +5,7 @@ import type { ProviderQuotaView } from './types.js';
|
|
|
5
5
|
interface RoutingInstructionStateStore {
|
|
6
6
|
loadSync(key: string): RoutingInstructionState | null;
|
|
7
7
|
saveAsync(key: string, state: RoutingInstructionState | null): void;
|
|
8
|
+
saveSync?: (key: string, state: RoutingInstructionState | null) => void;
|
|
8
9
|
}
|
|
9
10
|
export declare class VirtualRouterEngine {
|
|
10
11
|
private routing;
|
|
@@ -63,9 +64,11 @@ export declare class VirtualRouterEngine {
|
|
|
63
64
|
private providerHealthConfig;
|
|
64
65
|
private resolveStickyKey;
|
|
65
66
|
private resolveSessionScope;
|
|
67
|
+
private resolveStopMessageScope;
|
|
66
68
|
private getRoutingInstructionState;
|
|
67
69
|
private buildMetadataInstructions;
|
|
68
70
|
private parseMetadataDisableDescriptor;
|
|
71
|
+
private parseMetadataForceProviderKey;
|
|
69
72
|
private resolveRoutingMode;
|
|
70
73
|
private resolveInstructionTarget;
|
|
71
74
|
private filterCandidatesByRoutingState;
|