@jsonstudio/llms 0.6.1354 → 0.6.1397
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/conversion/compat/profiles/chat-gemini.json +5 -0
- package/dist/conversion/hub/operation-table/semantic-mappers/gemini-mapper.js +310 -87
- package/dist/conversion/hub/pipeline/hub-pipeline/adapter-context.js +8 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/execute-request-stage.js +6 -0
- package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage2_thought_signature_inject/index.d.ts +10 -0
- package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage2_thought_signature_inject/index.js +172 -0
- package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage3_thought_signature_capture/index.d.ts +10 -0
- package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage3_thought_signature_capture/index.js +71 -0
- package/dist/conversion/hub/pipeline/thought-signature/thought-signature-center.d.ts +14 -0
- package/dist/conversion/hub/pipeline/thought-signature/thought-signature-center.js +289 -0
- package/dist/conversion/hub/response/provider-response.js +6 -0
- package/dist/router/virtual-router/bootstrap.js +6 -0
- package/dist/router/virtual-router/engine-selection/alias-selection.d.ts +15 -0
- package/dist/router/virtual-router/engine-selection/alias-selection.js +85 -4
- package/dist/router/virtual-router/engine-selection/tier-selection-select.js +40 -17
- package/dist/router/virtual-router/engine-selection/tier-selection.js +5 -2
- package/dist/router/virtual-router/engine.js +9 -1
- package/dist/router/virtual-router/types.d.ts +14 -1
- package/package.json +1 -1
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
const COOLDOWN_EMPTY_THRESHOLD_MS = 30_000;
|
|
1
2
|
// Default provider-level strategy table.
|
|
2
3
|
// This is a data-only default; callers can override via `loadBalancing.aliasSelection.providers`.
|
|
3
4
|
export const DEFAULT_PROVIDER_ALIAS_SELECTION = {
|
|
4
|
-
// Antigravity:
|
|
5
|
-
antigravity: '
|
|
5
|
+
// Antigravity: prefer the alias with highest remaining quota; fall back to sticky-queue when quota is unknown.
|
|
6
|
+
antigravity: 'best-quota'
|
|
6
7
|
};
|
|
7
8
|
export function resolveAliasSelectionStrategy(providerId, cfg) {
|
|
8
9
|
if (!providerId)
|
|
@@ -11,11 +12,11 @@ export function resolveAliasSelectionStrategy(providerId, cfg) {
|
|
|
11
12
|
return 'none';
|
|
12
13
|
const overrides = cfg?.providers ?? {};
|
|
13
14
|
const override = overrides[providerId];
|
|
14
|
-
if (override === 'none' || override === 'sticky-queue') {
|
|
15
|
+
if (override === 'none' || override === 'sticky-queue' || override === 'best-quota') {
|
|
15
16
|
return override;
|
|
16
17
|
}
|
|
17
18
|
const def = cfg?.defaultStrategy;
|
|
18
|
-
if (def === 'none' || def === 'sticky-queue') {
|
|
19
|
+
if (def === 'none' || def === 'sticky-queue' || def === 'best-quota') {
|
|
19
20
|
return def;
|
|
20
21
|
}
|
|
21
22
|
const table = DEFAULT_PROVIDER_ALIAS_SELECTION[providerId];
|
|
@@ -100,6 +101,86 @@ export function pinCandidatesByAliasQueue(opts) {
|
|
|
100
101
|
const selectedSet = new Set(selectedKeys);
|
|
101
102
|
return candidates.filter((key) => selectedSet.has(key));
|
|
102
103
|
}
|
|
104
|
+
export function pinCandidatesByBestQuota(opts) {
|
|
105
|
+
const { providerId, modelId, candidates, orderedTargets, aliasOfKey, modelIdOfKey, quotaView, now } = opts;
|
|
106
|
+
if (!quotaView)
|
|
107
|
+
return null;
|
|
108
|
+
if (!providerId || !modelId)
|
|
109
|
+
return null;
|
|
110
|
+
if (!Array.isArray(candidates) || candidates.length < 2)
|
|
111
|
+
return null;
|
|
112
|
+
const aliasBuckets = new Map();
|
|
113
|
+
const aliasOrder = new Map();
|
|
114
|
+
let order = 0;
|
|
115
|
+
for (const key of candidates) {
|
|
116
|
+
if (!key || typeof key !== 'string')
|
|
117
|
+
continue;
|
|
118
|
+
if (!key.startsWith(`${providerId}.`))
|
|
119
|
+
return null;
|
|
120
|
+
const m = modelIdOfKey(key);
|
|
121
|
+
if (!m || m !== modelId)
|
|
122
|
+
return null;
|
|
123
|
+
const alias = aliasOfKey(key);
|
|
124
|
+
if (!alias)
|
|
125
|
+
return null;
|
|
126
|
+
const list = aliasBuckets.get(alias) ?? [];
|
|
127
|
+
list.push(key);
|
|
128
|
+
aliasBuckets.set(alias, list);
|
|
129
|
+
if (!aliasOrder.has(alias)) {
|
|
130
|
+
aliasOrder.set(alias, order++);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
if (aliasBuckets.size <= 1)
|
|
134
|
+
return null;
|
|
135
|
+
const preferredOrder = resolveAliasOrderFromTargets({
|
|
136
|
+
orderedTargets,
|
|
137
|
+
providerId,
|
|
138
|
+
modelId,
|
|
139
|
+
aliasOfKey,
|
|
140
|
+
modelIdOfKey,
|
|
141
|
+
allowedAliases: new Set(aliasBuckets.keys())
|
|
142
|
+
});
|
|
143
|
+
for (const alias of preferredOrder) {
|
|
144
|
+
if (!aliasOrder.has(alias)) {
|
|
145
|
+
aliasOrder.set(alias, order++);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
const eligible = [];
|
|
149
|
+
for (const [alias, keys] of aliasBuckets.entries()) {
|
|
150
|
+
const entry = quotaView(keys[0] ?? '');
|
|
151
|
+
if (!entry || entry.inPool === false) {
|
|
152
|
+
continue;
|
|
153
|
+
}
|
|
154
|
+
if (entry.blacklistUntil && entry.blacklistUntil > now) {
|
|
155
|
+
continue;
|
|
156
|
+
}
|
|
157
|
+
if (entry.cooldownUntil && entry.cooldownUntil - now >= COOLDOWN_EMPTY_THRESHOLD_MS) {
|
|
158
|
+
continue;
|
|
159
|
+
}
|
|
160
|
+
const remainingRaw = entry.remainingFraction;
|
|
161
|
+
const remaining = typeof remainingRaw === 'number' && Number.isFinite(remainingRaw) ? remainingRaw : 0;
|
|
162
|
+
if (remaining <= 0) {
|
|
163
|
+
continue;
|
|
164
|
+
}
|
|
165
|
+
eligible.push({
|
|
166
|
+
alias,
|
|
167
|
+
score: remaining,
|
|
168
|
+
order: aliasOrder.get(alias) ?? Number.MAX_SAFE_INTEGER
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
if (!eligible.length) {
|
|
172
|
+
return null;
|
|
173
|
+
}
|
|
174
|
+
eligible.sort((a, b) => (b.score - a.score) || (a.order - b.order));
|
|
175
|
+
const selectedAlias = eligible[0]?.alias;
|
|
176
|
+
if (!selectedAlias)
|
|
177
|
+
return null;
|
|
178
|
+
const selectedKeys = aliasBuckets.get(selectedAlias) ?? [];
|
|
179
|
+
if (!selectedKeys.length)
|
|
180
|
+
return null;
|
|
181
|
+
const selectedSet = new Set(selectedKeys);
|
|
182
|
+
return candidates.filter((key) => selectedSet.has(key));
|
|
183
|
+
}
|
|
103
184
|
function resolveAliasOrderFromTargets(opts) {
|
|
104
185
|
const { orderedTargets, providerId, modelId, aliasOfKey, modelIdOfKey, allowedAliases } = opts;
|
|
105
186
|
if (!Array.isArray(orderedTargets) || orderedTargets.length === 0) {
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import { computeContextMultiplier } from '../context-weighted.js';
|
|
2
2
|
import { computeHealthWeight } from '../health-weighted.js';
|
|
3
|
-
import { pinCandidatesByAliasQueue, resolveAliasSelectionStrategy } from './alias-selection.js';
|
|
3
|
+
import { pinCandidatesByAliasQueue, pinCandidatesByBestQuota, resolveAliasSelectionStrategy } from './alias-selection.js';
|
|
4
4
|
import { computeContextWeightMultipliers } from './context-weight-multipliers.js';
|
|
5
5
|
import { extractKeyAlias, extractProviderId, getProviderModelId } from './key-parsing.js';
|
|
6
6
|
import { pickPriorityGroup } from './tier-priority.js';
|
|
7
|
+
const ANTIGRAVITY_COOLDOWN_ALIAS_THRESHOLD_MS = 30_000;
|
|
7
8
|
function applyAliasStickyQueuePinning(opts) {
|
|
8
|
-
const { candidates, orderedTargets, deps, excludedKeys } = opts;
|
|
9
|
+
const { candidates, orderedTargets, deps, excludedKeys, now } = opts;
|
|
9
10
|
if (!Array.isArray(candidates) || candidates.length < 2) {
|
|
10
11
|
return candidates;
|
|
11
12
|
}
|
|
@@ -26,7 +27,7 @@ function applyAliasStickyQueuePinning(opts) {
|
|
|
26
27
|
continue;
|
|
27
28
|
}
|
|
28
29
|
const strategy = resolveAliasSelectionStrategy(providerId, deps.loadBalancer.getPolicy().aliasSelection);
|
|
29
|
-
if (strategy !== 'sticky-queue') {
|
|
30
|
+
if (strategy !== 'sticky-queue' && strategy !== 'best-quota') {
|
|
30
31
|
continue;
|
|
31
32
|
}
|
|
32
33
|
const modelId = getProviderModelId(key, deps.providerRegistry) ?? '';
|
|
@@ -58,17 +59,33 @@ function applyAliasStickyQueuePinning(opts) {
|
|
|
58
59
|
if (aliases.size < 2) {
|
|
59
60
|
continue;
|
|
60
61
|
}
|
|
61
|
-
const
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
62
|
+
const strategy = resolveAliasSelectionStrategy(group.providerId, deps.loadBalancer.getPolicy().aliasSelection);
|
|
63
|
+
let pinned = null;
|
|
64
|
+
if (strategy === 'best-quota') {
|
|
65
|
+
pinned = pinCandidatesByBestQuota({
|
|
66
|
+
providerId: group.providerId,
|
|
67
|
+
modelId: group.modelId,
|
|
68
|
+
candidates: group.keys,
|
|
69
|
+
orderedTargets,
|
|
70
|
+
aliasOfKey: extractKeyAlias,
|
|
71
|
+
modelIdOfKey: (key) => getProviderModelId(key, deps.providerRegistry),
|
|
72
|
+
quotaView: deps.quotaView,
|
|
73
|
+
now
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
if (!pinned) {
|
|
77
|
+
pinned = pinCandidatesByAliasQueue({
|
|
78
|
+
queueStore: store,
|
|
79
|
+
providerId: group.providerId,
|
|
80
|
+
modelId: group.modelId,
|
|
81
|
+
candidates: group.keys,
|
|
82
|
+
orderedTargets,
|
|
83
|
+
excludedProviderKeys: excludedKeys,
|
|
84
|
+
aliasOfKey: extractKeyAlias,
|
|
85
|
+
modelIdOfKey: (key) => getProviderModelId(key, deps.providerRegistry),
|
|
86
|
+
availabilityCheck: (key) => deps.healthManager.isAvailable(key)
|
|
87
|
+
});
|
|
88
|
+
}
|
|
72
89
|
if (pinned && pinned.length) {
|
|
73
90
|
pinnedByGroup.set(groupId, new Set(pinned));
|
|
74
91
|
}
|
|
@@ -161,7 +178,8 @@ export function selectProviderKeyFromCandidatePool(opts) {
|
|
|
161
178
|
candidates: retryPreferredCandidates,
|
|
162
179
|
orderedTargets: tier.targets,
|
|
163
180
|
deps,
|
|
164
|
-
excludedKeys
|
|
181
|
+
excludedKeys,
|
|
182
|
+
now
|
|
165
183
|
});
|
|
166
184
|
if (tier.mode === 'priority') {
|
|
167
185
|
if (isRecoveryAttempt) {
|
|
@@ -240,7 +258,11 @@ export function selectProviderKeyFromCandidatePool(opts) {
|
|
|
240
258
|
continue;
|
|
241
259
|
}
|
|
242
260
|
if (entry.cooldownUntil && entry.cooldownUntil > now) {
|
|
243
|
-
|
|
261
|
+
const providerId = extractProviderId(key) ?? '';
|
|
262
|
+
const cooldownMs = entry.cooldownUntil - now;
|
|
263
|
+
if (providerId !== 'antigravity' || cooldownMs >= ANTIGRAVITY_COOLDOWN_ALIAS_THRESHOLD_MS) {
|
|
264
|
+
continue;
|
|
265
|
+
}
|
|
244
266
|
}
|
|
245
267
|
if (entry.blacklistUntil && entry.blacklistUntil > now) {
|
|
246
268
|
continue;
|
|
@@ -273,7 +295,8 @@ export function selectProviderKeyFromCandidatePool(opts) {
|
|
|
273
295
|
candidates: bucketCandidates,
|
|
274
296
|
orderedTargets: tier.targets,
|
|
275
297
|
deps,
|
|
276
|
-
excludedKeys
|
|
298
|
+
excludedKeys,
|
|
299
|
+
now
|
|
277
300
|
});
|
|
278
301
|
const bucketPenaltyMap = {};
|
|
279
302
|
for (const item of bucket) {
|
|
@@ -123,7 +123,7 @@ export function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, f
|
|
|
123
123
|
return { providerKey: null, poolTargets: [], tierId: tier.id, failureHint: `${routeName}:${tier.id}:empty` };
|
|
124
124
|
}
|
|
125
125
|
const contextResult = deps.contextAdvisor.classify(targets, estimatedTokens, (key) => deps.providerRegistry.get(key));
|
|
126
|
-
const prioritizedPools = buildContextCandidatePools(contextResult);
|
|
126
|
+
const prioritizedPools = buildContextCandidatePools(contextResult, routeName);
|
|
127
127
|
const quotaView = deps.quotaView;
|
|
128
128
|
const now = quotaView ? Date.now() : 0;
|
|
129
129
|
const healthWeightedCfg = resolveHealthWeightedConfig(deps.loadBalancer.getPolicy().healthWeighted);
|
|
@@ -200,7 +200,7 @@ function recordAliasQueueFailuresFromExcludedKeys(excludedKeys, orderedTargets,
|
|
|
200
200
|
}
|
|
201
201
|
}
|
|
202
202
|
}
|
|
203
|
-
function buildContextCandidatePools(result) {
|
|
203
|
+
function buildContextCandidatePools(result, routeName) {
|
|
204
204
|
const ordered = [];
|
|
205
205
|
if (result.safe.length) {
|
|
206
206
|
ordered.push(result.safe);
|
|
@@ -208,6 +208,9 @@ function buildContextCandidatePools(result) {
|
|
|
208
208
|
if (result.risky.length) {
|
|
209
209
|
ordered.push(result.risky);
|
|
210
210
|
}
|
|
211
|
+
if (routeName === 'longcontext' && result.overflow.length) {
|
|
212
|
+
ordered.push(result.overflow);
|
|
213
|
+
}
|
|
211
214
|
return ordered;
|
|
212
215
|
}
|
|
213
216
|
function describeAttempt(routeName, poolId, result) {
|
|
@@ -12,6 +12,7 @@ import { buildHitReason, formatVirtualRouterHit } from './engine-logging.js';
|
|
|
12
12
|
import { selectDirectProviderModel, selectFromStickyPool, selectProviderImpl } from './engine-selection.js';
|
|
13
13
|
import { applyQuotaDepletedImpl, applyQuotaRecoveryImpl, handleProviderFailureImpl, mapProviderErrorImpl } from './engine-health.js';
|
|
14
14
|
import { mergeStopMessageFromPersisted } from './stop-message-state-sync.js';
|
|
15
|
+
const ANTIGRAVITY_COOLDOWN_ALIAS_THRESHOLD_MS = 30_000;
|
|
15
16
|
export class VirtualRouterEngine {
|
|
16
17
|
routing = {};
|
|
17
18
|
providerRegistry = new ProviderRegistry();
|
|
@@ -1468,10 +1469,17 @@ export class VirtualRouterEngine {
|
|
|
1468
1469
|
if (!expiry) {
|
|
1469
1470
|
return false;
|
|
1470
1471
|
}
|
|
1471
|
-
|
|
1472
|
+
const now = Date.now();
|
|
1473
|
+
if (now >= expiry) {
|
|
1472
1474
|
this.providerCooldowns.delete(providerKey);
|
|
1473
1475
|
return false;
|
|
1474
1476
|
}
|
|
1477
|
+
if (providerKey.startsWith('antigravity.')) {
|
|
1478
|
+
const remaining = expiry - now;
|
|
1479
|
+
if (remaining < ANTIGRAVITY_COOLDOWN_ALIAS_THRESHOLD_MS) {
|
|
1480
|
+
return false;
|
|
1481
|
+
}
|
|
1482
|
+
}
|
|
1475
1483
|
return true;
|
|
1476
1484
|
}
|
|
1477
1485
|
restoreHealthFromStore() {
|
|
@@ -152,7 +152,7 @@ export interface HealthWeightedLoadBalancingConfig {
|
|
|
152
152
|
*/
|
|
153
153
|
recoverToBestOnRetry?: boolean;
|
|
154
154
|
}
|
|
155
|
-
export type AliasSelectionStrategy = 'none' | 'sticky-queue';
|
|
155
|
+
export type AliasSelectionStrategy = 'none' | 'sticky-queue' | 'best-quota';
|
|
156
156
|
export interface AliasSelectionConfig {
|
|
157
157
|
/**
|
|
158
158
|
* Global on/off switch. When false, no alias-level selection is applied.
|
|
@@ -514,6 +514,19 @@ export interface ProviderQuotaViewEntry {
|
|
|
514
514
|
inPool: boolean;
|
|
515
515
|
reason?: string;
|
|
516
516
|
priorityTier?: number;
|
|
517
|
+
/**
|
|
518
|
+
* Optional remaining quota fraction for the provider key (0..1).
|
|
519
|
+
* Used by alias-selection strategies that prefer higher remaining quota.
|
|
520
|
+
*/
|
|
521
|
+
remainingFraction?: number | null;
|
|
522
|
+
/**
|
|
523
|
+
* Optional quota reset timestamp (ms since epoch) for the provider key.
|
|
524
|
+
*/
|
|
525
|
+
quotaResetAtMs?: number | null;
|
|
526
|
+
/**
|
|
527
|
+
* Optional quota fetch timestamp (ms since epoch) for the provider key.
|
|
528
|
+
*/
|
|
529
|
+
quotaFetchedAtMs?: number | null;
|
|
517
530
|
/**
|
|
518
531
|
* Optional soft penalty hint for selection ordering.
|
|
519
532
|
* - 0 / undefined means no penalty
|