@jsonstudio/llms 0.6.1172 → 0.6.1397
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/conversion/codecs/gemini-openai-codec.d.ts +3 -1
- package/dist/conversion/codecs/gemini-openai-codec.js +10 -4
- package/dist/conversion/compat/actions/gemini-web-search.d.ts +1 -1
- package/dist/conversion/compat/actions/gemini-web-search.js +5 -2
- package/dist/conversion/compat/actions/iflow-tool-text-fallback.d.ts +12 -0
- package/dist/conversion/compat/actions/iflow-tool-text-fallback.js +199 -0
- package/dist/conversion/compat/actions/iflow-web-search.d.ts +1 -1
- package/dist/conversion/compat/actions/iflow-web-search.js +5 -2
- package/dist/conversion/compat/profiles/chat-gemini.json +5 -0
- package/dist/conversion/hub/operation-table/semantic-mappers/anthropic-mapper.js +47 -56
- package/dist/conversion/hub/operation-table/semantic-mappers/chat-mapper.js +1 -13
- package/dist/conversion/hub/operation-table/semantic-mappers/gemini-mapper.js +748 -52
- package/dist/conversion/hub/operation-table/semantic-mappers/responses-mapper.js +18 -38
- package/dist/conversion/hub/pipeline/compat/compat-pipeline-executor.js +6 -0
- package/dist/conversion/hub/pipeline/compat/compat-types.d.ts +3 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/adapter-context.d.ts +10 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/adapter-context.js +142 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/anthropic-alias-map.d.ts +6 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/anthropic-alias-map.js +79 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/apply-patch-tool-mode.d.ts +3 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/apply-patch-tool-mode.js +46 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/execute-chat-process-entry.d.ts +8 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/execute-chat-process-entry.js +366 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/execute-request-stage.d.ts +9 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/execute-request-stage.js +390 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/node-results.d.ts +3 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/node-results.js +14 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/payload-normalize.d.ts +2 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/payload-normalize.js +144 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/policy.d.ts +4 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/policy.js +32 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/protocol.d.ts +8 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/protocol.js +63 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/resolve-protocol-hooks.d.ts +2 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/resolve-protocol-hooks.js +43 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/semantic-gate.d.ts +1 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/semantic-gate.js +29 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/servertool-runtime-config.d.ts +2 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/servertool-runtime-config.js +16 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/types.d.ts +116 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/types.js +1 -0
- package/dist/conversion/hub/pipeline/hub-pipeline.d.ts +3 -95
- package/dist/conversion/hub/pipeline/hub-pipeline.js +19 -1281
- package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage1_format_parse/index.js +1 -1
- package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage2_semantic_map/index.d.ts +7 -0
- package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage2_semantic_map/index.js +65 -1
- package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage3_context_capture/index.js +25 -22
- package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage1_semantic_map/index.js +1 -1
- package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage2_format_build/index.d.ts +1 -1
- package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage2_format_build/index.js +2 -2
- package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage2_thought_signature_inject/index.d.ts +10 -0
- package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage2_thought_signature_inject/index.js +172 -0
- package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage3_compat/index.js +2 -2
- package/dist/conversion/hub/pipeline/stages/req_process/req_process_stage1_tool_governance/index.js +1 -1
- package/dist/conversion/hub/pipeline/stages/req_process/req_process_stage2_route_select/index.js +1 -1
- package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage1_sse_decode/index.js +11 -11
- package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage2_format_parse/index.js +1 -1
- package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage3_semantic_map/index.d.ts +1 -0
- package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage3_semantic_map/index.js +4 -2
- package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage3_thought_signature_capture/index.d.ts +10 -0
- package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage3_thought_signature_capture/index.js +71 -0
- package/dist/conversion/hub/pipeline/stages/resp_outbound/resp_outbound_stage1_client_remap/index.d.ts +1 -0
- package/dist/conversion/hub/pipeline/stages/resp_outbound/resp_outbound_stage1_client_remap/index.js +17 -9
- package/dist/conversion/hub/pipeline/stages/resp_outbound/resp_outbound_stage2_sse_stream/index.js +2 -2
- package/dist/conversion/hub/pipeline/stages/resp_process/resp_process_stage1_tool_governance/index.js +40 -2
- package/dist/conversion/hub/pipeline/stages/resp_process/resp_process_stage2_finalize/index.js +1 -1
- package/dist/conversion/hub/pipeline/target-utils.js +9 -5
- package/dist/conversion/hub/pipeline/thought-signature/thought-signature-center.d.ts +14 -0
- package/dist/conversion/hub/pipeline/thought-signature/thought-signature-center.js +289 -0
- package/dist/conversion/hub/process/chat-process.js +256 -16
- package/dist/conversion/hub/response/provider-response.d.ts +8 -0
- package/dist/conversion/hub/response/provider-response.js +91 -27
- package/dist/conversion/hub/response/response-mappers.d.ts +10 -3
- package/dist/conversion/hub/response/response-mappers.js +30 -6
- package/dist/conversion/hub/response/response-runtime.js +4 -38
- package/dist/conversion/hub/snapshot-recorder.js +5 -1
- package/dist/conversion/hub/standardized-bridge.js +23 -15
- package/dist/conversion/pipeline/codecs/v2/anthropic-openai-pipeline.js +36 -5
- package/dist/conversion/responses/responses-openai-bridge.js +20 -4
- package/dist/conversion/shared/gemini-tool-utils.d.ts +8 -1
- package/dist/conversion/shared/gemini-tool-utils.js +580 -108
- package/dist/conversion/shared/jsonish.js +1 -1
- package/dist/conversion/shared/mcp-injection.js +67 -33
- package/dist/conversion/shared/openai-finalizer.js +2 -1
- package/dist/conversion/shared/openai-message-normalize.js +76 -21
- package/dist/conversion/shared/responses-output-builder.js +6 -0
- package/dist/conversion/shared/runtime-metadata.d.ts +7 -0
- package/dist/conversion/shared/runtime-metadata.js +23 -0
- package/dist/conversion/shared/text-markup-normalizer.d.ts +2 -0
- package/dist/conversion/shared/text-markup-normalizer.js +284 -4
- package/dist/conversion/shared/tool-canonicalizer.js +2 -1
- package/dist/conversion/shared/tool-governor.js +3 -3
- package/dist/filters/engine.js +5 -5
- package/dist/filters/special/request-tool-list-filter.js +194 -60
- package/dist/filters/special/request-tools-normalize.js +1 -1
- package/dist/filters/special/response-tool-text-canonicalize.d.ts +4 -7
- package/dist/filters/special/response-tool-text-canonicalize.js +7 -35
- package/dist/filters/special/tool-filter-hooks.js +58 -62
- package/dist/guidance/index.js +5 -1
- package/dist/http/sse-response.js +6 -6
- package/dist/router/virtual-router/bootstrap.js +54 -4
- package/dist/router/virtual-router/engine-health.d.ts +1 -1
- package/dist/router/virtual-router/engine-health.js +11 -110
- package/dist/router/virtual-router/engine-selection/alias-selection.d.ts +30 -0
- package/dist/router/virtual-router/engine-selection/alias-selection.js +237 -0
- package/dist/router/virtual-router/engine-selection/context-weight-multipliers.d.ts +11 -0
- package/dist/router/virtual-router/engine-selection/context-weight-multipliers.js +23 -0
- package/dist/router/virtual-router/engine-selection/direct-provider-model.d.ts +9 -0
- package/dist/router/virtual-router/engine-selection/direct-provider-model.js +49 -0
- package/dist/router/virtual-router/engine-selection/instruction-target.d.ts +6 -0
- package/dist/router/virtual-router/engine-selection/instruction-target.js +54 -0
- package/dist/router/virtual-router/engine-selection/key-parsing.d.ts +8 -0
- package/dist/router/virtual-router/engine-selection/key-parsing.js +64 -0
- package/dist/router/virtual-router/engine-selection/route-utils.d.ts +12 -0
- package/dist/router/virtual-router/engine-selection/route-utils.js +150 -0
- package/dist/router/virtual-router/engine-selection/routing-state-filter.d.ts +4 -0
- package/dist/router/virtual-router/engine-selection/routing-state-filter.js +50 -0
- package/dist/router/virtual-router/engine-selection/selection-deps.d.ts +39 -0
- package/dist/router/virtual-router/engine-selection/selection-deps.js +1 -0
- package/dist/router/virtual-router/engine-selection/sticky-pool.d.ts +11 -0
- package/dist/router/virtual-router/engine-selection/sticky-pool.js +109 -0
- package/dist/router/virtual-router/engine-selection/tier-priority.d.ts +12 -0
- package/dist/router/virtual-router/engine-selection/tier-priority.js +55 -0
- package/dist/router/virtual-router/engine-selection/tier-selection-select.d.ts +22 -0
- package/dist/router/virtual-router/engine-selection/tier-selection-select.js +423 -0
- package/dist/router/virtual-router/engine-selection/tier-selection.d.ts +3 -0
- package/dist/router/virtual-router/engine-selection/tier-selection.js +228 -0
- package/dist/router/virtual-router/engine-selection.d.ts +4 -30
- package/dist/router/virtual-router/engine-selection.js +10 -962
- package/dist/router/virtual-router/engine.d.ts +1 -0
- package/dist/router/virtual-router/engine.js +64 -11
- package/dist/router/virtual-router/routing-instructions.js +6 -1
- package/dist/router/virtual-router/stop-message-state-sync.d.ts +5 -0
- package/dist/router/virtual-router/stop-message-state-sync.js +6 -14
- package/dist/router/virtual-router/types.d.ts +38 -1
- package/dist/servertool/clock/config.d.ts +8 -0
- package/dist/servertool/clock/config.js +22 -0
- package/dist/servertool/clock/log.d.ts +3 -0
- package/dist/servertool/clock/log.js +13 -0
- package/dist/servertool/clock/task-store.d.ts +1 -1
- package/dist/servertool/clock/task-store.js +1 -1
- package/dist/servertool/clock/tasks.js +1 -1
- package/dist/servertool/engine.js +146 -21
- package/dist/servertool/handlers/clock-auto.js +11 -6
- package/dist/servertool/handlers/clock.js +36 -10
- package/dist/servertool/handlers/followup-request-builder.js +8 -2
- package/dist/servertool/handlers/gemini-empty-reply-continue.js +15 -9
- package/dist/servertool/handlers/iflow-model-error-retry.js +6 -4
- package/dist/servertool/handlers/recursive-detection-guard.js +4 -2
- package/dist/servertool/handlers/stop-message-auto.js +100 -10
- package/dist/servertool/handlers/vision.js +4 -1
- package/dist/servertool/handlers/web-search.js +3 -1
- package/dist/servertool/pending-session.d.ts +19 -0
- package/dist/servertool/pending-session.js +97 -0
- package/dist/servertool/reenter-backend.js +5 -3
- package/dist/servertool/server-side-tools.js +235 -6
- package/dist/servertool/types.d.ts +13 -0
- package/dist/sse/json-to-sse/event-generators/responses.js +1 -1
- package/dist/sse/shared/chat-serializer.js +2 -2
- package/dist/sse/shared/constants.js +1 -1
- package/dist/sse/sse-to-json/anthropic-sse-to-json-converter.d.ts +7 -1
- package/dist/sse/sse-to-json/builders/response-builder.js +16 -0
- package/dist/sse/sse-to-json/responses-sse-to-json-converter.d.ts +1 -1
- package/dist/tools/apply-patch/execution-capturer.js +1 -1
- package/dist/tools/exec-command/normalize.js +4 -0
- package/dist/tools/exec-command/regression-capturer.js +1 -1
- package/package.json +10 -5
|
@@ -1,6 +1,12 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
1
|
+
import { DEFAULT_ROUTE, VirtualRouterError, VirtualRouterErrorCode } from './types.js';
|
|
2
|
+
import { extractExcludedProviderKeySet, extractProviderId } from './engine-selection/key-parsing.js';
|
|
3
|
+
import { trySelectFromTier } from './engine-selection/tier-selection.js';
|
|
4
|
+
import { resolveInstructionTarget } from './engine-selection/instruction-target.js';
|
|
5
|
+
import { filterCandidatesByRoutingState } from './engine-selection/routing-state-filter.js';
|
|
6
|
+
import { selectFromStickyPool as selectFromStickyPoolImpl } from './engine-selection/sticky-pool.js';
|
|
7
|
+
export { selectDirectProviderModel } from './engine-selection/direct-provider-model.js';
|
|
8
|
+
export { selectFromStickyPool } from './engine-selection/sticky-pool.js';
|
|
9
|
+
import { buildRouteCandidates, extendRouteCandidatesForState, initializeRouteQueue, normalizeRouteAlias, routeHasTargets, sortRoutePools } from './engine-selection/route-utils.js';
|
|
4
10
|
export function selectProviderImpl(requestedRoute, metadata, classification, features, activeState, deps, options = {}) {
|
|
5
11
|
const state = options.routingState ?? activeState;
|
|
6
12
|
const quotaView = deps.quotaView;
|
|
@@ -122,7 +128,7 @@ export function selectProviderImpl(requestedRoute, metadata, classification, fea
|
|
|
122
128
|
}
|
|
123
129
|
}
|
|
124
130
|
if (stickyKeySet && stickyKeySet.size > 0) {
|
|
125
|
-
const stickySelection =
|
|
131
|
+
const stickySelection = selectFromStickyPoolImpl(stickyKeySet, metadata, features, state, deps, { allowAliasRotation });
|
|
126
132
|
if (stickySelection) {
|
|
127
133
|
return stickySelection;
|
|
128
134
|
}
|
|
@@ -169,72 +175,6 @@ export function selectProviderImpl(requestedRoute, metadata, classification, fea
|
|
|
169
175
|
allowAliasRotation
|
|
170
176
|
});
|
|
171
177
|
}
|
|
172
|
-
function extendRouteCandidatesForState(candidates, state, routing) {
|
|
173
|
-
// When provider allowlists are active (e.g. "<**!glm**>"), routing should not be bounded by
|
|
174
|
-
// classifier candidates only. Otherwise, a perfectly valid provider that exists in config
|
|
175
|
-
// (e.g. in a backup/default pool) can become unreachable and cause PROVIDER_NOT_AVAILABLE.
|
|
176
|
-
//
|
|
177
|
-
// We keep original ordering, then append all known routes (by priority) as a fallback search space.
|
|
178
|
-
if (!state.allowedProviders || state.allowedProviders.size === 0) {
|
|
179
|
-
return candidates;
|
|
180
|
-
}
|
|
181
|
-
const allRoutes = sortByPriority(Object.keys(routing).filter((routeName) => routeName && routeHasTargets(routing[routeName])));
|
|
182
|
-
const expanded = Array.isArray(candidates) ? [...candidates] : [];
|
|
183
|
-
for (const routeName of allRoutes) {
|
|
184
|
-
if (!expanded.includes(routeName)) {
|
|
185
|
-
expanded.push(routeName);
|
|
186
|
-
}
|
|
187
|
-
}
|
|
188
|
-
return expanded;
|
|
189
|
-
}
|
|
190
|
-
export function selectDirectProviderModel(providerId, modelId, metadata, features, activeState, deps) {
|
|
191
|
-
const normalizedProvider = typeof providerId === 'string' ? providerId.trim() : '';
|
|
192
|
-
const normalizedModel = typeof modelId === 'string' ? modelId.trim() : '';
|
|
193
|
-
if (!normalizedProvider || !normalizedModel) {
|
|
194
|
-
return null;
|
|
195
|
-
}
|
|
196
|
-
const providerKeys = deps.providerRegistry.listProviderKeys(normalizedProvider);
|
|
197
|
-
if (providerKeys.length === 0) {
|
|
198
|
-
return null;
|
|
199
|
-
}
|
|
200
|
-
const matchingKeys = providerKeys.filter((key) => {
|
|
201
|
-
try {
|
|
202
|
-
const profile = deps.providerRegistry.get(key);
|
|
203
|
-
return profile?.modelId === normalizedModel;
|
|
204
|
-
}
|
|
205
|
-
catch {
|
|
206
|
-
return false;
|
|
207
|
-
}
|
|
208
|
-
});
|
|
209
|
-
if (matchingKeys.length === 0) {
|
|
210
|
-
return null;
|
|
211
|
-
}
|
|
212
|
-
const attempted = [];
|
|
213
|
-
const estimatedTokens = typeof features.estimatedTokens === 'number' && Number.isFinite(features.estimatedTokens)
|
|
214
|
-
? Math.max(0, features.estimatedTokens)
|
|
215
|
-
: 0;
|
|
216
|
-
const tier = {
|
|
217
|
-
id: `direct:${normalizedProvider}.${normalizedModel}`,
|
|
218
|
-
targets: matchingKeys,
|
|
219
|
-
priority: 100,
|
|
220
|
-
mode: 'round-robin',
|
|
221
|
-
backup: false
|
|
222
|
-
};
|
|
223
|
-
const { providerKey, poolTargets, tierId, failureHint } = trySelectFromTier('direct', tier, undefined, estimatedTokens, features, deps, {
|
|
224
|
-
disabledProviders: new Set(activeState.disabledProviders),
|
|
225
|
-
disabledKeysMap: new Map(activeState.disabledKeys),
|
|
226
|
-
allowedProviders: new Set(activeState.allowedProviders),
|
|
227
|
-
disabledModels: new Map(activeState.disabledModels),
|
|
228
|
-
allowAliasRotation: true
|
|
229
|
-
});
|
|
230
|
-
if (providerKey) {
|
|
231
|
-
return { providerKey, routeUsed: 'direct', pool: poolTargets, poolId: tierId };
|
|
232
|
-
}
|
|
233
|
-
if (failureHint) {
|
|
234
|
-
attempted.push(failureHint);
|
|
235
|
-
}
|
|
236
|
-
return null;
|
|
237
|
-
}
|
|
238
178
|
function selectFromCandidates(routes, metadata, classification, features, state, deps, options) {
|
|
239
179
|
const allowedProviders = new Set(state.allowedProviders);
|
|
240
180
|
const disabledProviders = new Set(state.disabledProviders);
|
|
@@ -283,895 +223,3 @@ function selectFromCandidates(routes, metadata, classification, features, state,
|
|
|
283
223
|
const requestedRoute = normalizeRouteAlias(classification.routeName || DEFAULT_ROUTE);
|
|
284
224
|
throw new VirtualRouterError(`All providers unavailable for route ${requestedRoute}`, VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, { routeName: requestedRoute, attempted });
|
|
285
225
|
}
|
|
286
|
-
function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features, deps, options) {
|
|
287
|
-
const { disabledProviders, disabledKeysMap, allowedProviders, disabledModels, requiredProviderKeys } = options;
|
|
288
|
-
let targets = Array.isArray(tier.targets) ? tier.targets : [];
|
|
289
|
-
const excludedRaw = features.metadata?.excludedProviderKeys &&
|
|
290
|
-
Array.isArray(features.metadata.excludedProviderKeys)
|
|
291
|
-
? features.metadata.excludedProviderKeys
|
|
292
|
-
: [];
|
|
293
|
-
const excludedKeys = new Set(excludedRaw
|
|
294
|
-
.map((val) => (typeof val === 'string' ? val.trim() : ''))
|
|
295
|
-
.filter((val) => Boolean(val)));
|
|
296
|
-
if (excludedKeys.size > 0) {
|
|
297
|
-
targets = targets.filter((key) => !excludedKeys.has(key));
|
|
298
|
-
}
|
|
299
|
-
const isRecoveryAttempt = excludedKeys.size > 0;
|
|
300
|
-
const singleCandidateFallback = targets.length === 1 ? targets[0] : undefined;
|
|
301
|
-
if (targets.length > 0) {
|
|
302
|
-
// Always respect cooldown signals. If a route/tier is depleted due to cooldown,
|
|
303
|
-
// routing is expected to fall back to other tiers/routes (e.g. longcontext → default),
|
|
304
|
-
// rather than repeatedly selecting the cooled-down provider.
|
|
305
|
-
targets = targets.filter((key) => !deps.isProviderCoolingDown(key));
|
|
306
|
-
}
|
|
307
|
-
if (allowedProviders && allowedProviders.size > 0) {
|
|
308
|
-
targets = targets.filter((key) => {
|
|
309
|
-
const providerId = extractProviderId(key);
|
|
310
|
-
return providerId && allowedProviders.has(providerId);
|
|
311
|
-
});
|
|
312
|
-
}
|
|
313
|
-
if (disabledProviders && disabledProviders.size > 0) {
|
|
314
|
-
targets = targets.filter((key) => {
|
|
315
|
-
const providerId = extractProviderId(key);
|
|
316
|
-
return providerId && !disabledProviders.has(providerId);
|
|
317
|
-
});
|
|
318
|
-
}
|
|
319
|
-
if (disabledKeysMap && disabledKeysMap.size > 0) {
|
|
320
|
-
targets = targets.filter((key) => {
|
|
321
|
-
const providerId = extractProviderId(key);
|
|
322
|
-
if (!providerId)
|
|
323
|
-
return true;
|
|
324
|
-
const disabledKeys = disabledKeysMap.get(providerId);
|
|
325
|
-
if (!disabledKeys || disabledKeys.size === 0)
|
|
326
|
-
return true;
|
|
327
|
-
const keyAlias = extractKeyAlias(key);
|
|
328
|
-
const keyIndex = extractKeyIndex(key);
|
|
329
|
-
if (keyAlias && disabledKeys.has(keyAlias)) {
|
|
330
|
-
return false;
|
|
331
|
-
}
|
|
332
|
-
if (keyIndex !== undefined && disabledKeys.has(keyIndex + 1)) {
|
|
333
|
-
return false;
|
|
334
|
-
}
|
|
335
|
-
return true;
|
|
336
|
-
});
|
|
337
|
-
}
|
|
338
|
-
if (disabledModels && disabledModels.size > 0) {
|
|
339
|
-
targets = targets.filter((key) => {
|
|
340
|
-
const providerId = extractProviderId(key);
|
|
341
|
-
if (!providerId) {
|
|
342
|
-
return true;
|
|
343
|
-
}
|
|
344
|
-
const disabled = disabledModels.get(providerId);
|
|
345
|
-
if (!disabled || disabled.size === 0) {
|
|
346
|
-
return true;
|
|
347
|
-
}
|
|
348
|
-
const modelId = getProviderModelId(key, deps.providerRegistry);
|
|
349
|
-
if (!modelId) {
|
|
350
|
-
return true;
|
|
351
|
-
}
|
|
352
|
-
return !disabled.has(modelId);
|
|
353
|
-
});
|
|
354
|
-
}
|
|
355
|
-
if (requiredProviderKeys && requiredProviderKeys.size > 0) {
|
|
356
|
-
targets = targets.filter((key) => requiredProviderKeys.has(key));
|
|
357
|
-
}
|
|
358
|
-
const serverToolRequired = features.metadata?.serverToolRequired === true;
|
|
359
|
-
if (serverToolRequired) {
|
|
360
|
-
const filtered = [];
|
|
361
|
-
for (const key of targets) {
|
|
362
|
-
try {
|
|
363
|
-
const profile = deps.providerRegistry.get(key);
|
|
364
|
-
if (!profile.serverToolsDisabled) {
|
|
365
|
-
filtered.push(key);
|
|
366
|
-
}
|
|
367
|
-
}
|
|
368
|
-
catch {
|
|
369
|
-
// ignore unknown providers when filtering for servertools
|
|
370
|
-
}
|
|
371
|
-
}
|
|
372
|
-
targets = filtered;
|
|
373
|
-
}
|
|
374
|
-
if (features.hasImageAttachment && (routeName === DEFAULT_ROUTE || routeName === 'thinking')) {
|
|
375
|
-
const prioritized = [];
|
|
376
|
-
const fallthrough = [];
|
|
377
|
-
for (const key of targets) {
|
|
378
|
-
try {
|
|
379
|
-
const profile = deps.providerRegistry.get(key);
|
|
380
|
-
if (profile.providerType === 'responses') {
|
|
381
|
-
prioritized.push(key);
|
|
382
|
-
}
|
|
383
|
-
else if (profile.providerType === 'gemini') {
|
|
384
|
-
prioritized.push(key);
|
|
385
|
-
}
|
|
386
|
-
else {
|
|
387
|
-
fallthrough.push(key);
|
|
388
|
-
}
|
|
389
|
-
}
|
|
390
|
-
catch {
|
|
391
|
-
fallthrough.push(key);
|
|
392
|
-
}
|
|
393
|
-
}
|
|
394
|
-
if (prioritized.length) {
|
|
395
|
-
targets = prioritized;
|
|
396
|
-
}
|
|
397
|
-
}
|
|
398
|
-
if (!targets.length) {
|
|
399
|
-
return { providerKey: null, poolTargets: [], tierId: tier.id, failureHint: `${routeName}:${tier.id}:empty` };
|
|
400
|
-
}
|
|
401
|
-
const contextResult = deps.contextAdvisor.classify(targets, estimatedTokens, (key) => deps.providerRegistry.get(key));
|
|
402
|
-
const prioritizedPools = buildContextCandidatePools(contextResult);
|
|
403
|
-
const quotaView = deps.quotaView;
|
|
404
|
-
const now = quotaView ? Date.now() : 0;
|
|
405
|
-
const healthWeightedCfg = resolveHealthWeightedConfig(deps.loadBalancer.getPolicy().healthWeighted);
|
|
406
|
-
const contextWeightedCfg = resolveContextWeightedConfig(deps.loadBalancer.getPolicy().contextWeighted);
|
|
407
|
-
const warnRatio = deps.contextAdvisor.getConfig().warnRatio;
|
|
408
|
-
const nowForWeights = Date.now();
|
|
409
|
-
const selectFirstAvailable = (candidates) => {
|
|
410
|
-
for (const key of candidates) {
|
|
411
|
-
if (deps.healthManager.isAvailable(key)) {
|
|
412
|
-
return key;
|
|
413
|
-
}
|
|
414
|
-
}
|
|
415
|
-
return null;
|
|
416
|
-
};
|
|
417
|
-
const resolvePriorityMeta = (orderedTargets) => {
|
|
418
|
-
// Priority mode semantics (strict group priority + alias-level balancing):
|
|
419
|
-
// - Targets are interpreted as ordered (providerId, modelId) groups.
|
|
420
|
-
// - Group base priorities: 100, 90, 80, ... (step=10) by appearance order.
|
|
421
|
-
// - Within a group (different auth aliases), base scores: 100, 99, 98, ... (step=1).
|
|
422
|
-
//
|
|
423
|
-
// Group selection is strict: always use the best group until it is unavailable.
|
|
424
|
-
// Alias selection is balanced within the chosen group (RR / health-weighted / context-weighted).
|
|
425
|
-
const meta = new Map();
|
|
426
|
-
if (!Array.isArray(orderedTargets) || orderedTargets.length === 0) {
|
|
427
|
-
return meta;
|
|
428
|
-
}
|
|
429
|
-
let groupIndex = -1;
|
|
430
|
-
let aliasOffset = 0;
|
|
431
|
-
let lastGroupKey = '';
|
|
432
|
-
for (const key of orderedTargets) {
|
|
433
|
-
const providerId = extractProviderId(key) ?? '';
|
|
434
|
-
const modelId = getProviderModelId(key, deps.providerRegistry) ?? '';
|
|
435
|
-
const groupKey = `${providerId}::${modelId}`;
|
|
436
|
-
if (groupKey !== lastGroupKey) {
|
|
437
|
-
groupIndex += 1;
|
|
438
|
-
aliasOffset = 0;
|
|
439
|
-
lastGroupKey = groupKey;
|
|
440
|
-
}
|
|
441
|
-
const groupBase = 100 - groupIndex * 10;
|
|
442
|
-
const base = groupBase - aliasOffset;
|
|
443
|
-
meta.set(key, { groupId: `${providerId}.${modelId}`, groupBase, base });
|
|
444
|
-
aliasOffset += 1;
|
|
445
|
-
}
|
|
446
|
-
return meta;
|
|
447
|
-
};
|
|
448
|
-
const pickPriorityGroup = (candidates, orderedTargets, penalties) => {
|
|
449
|
-
const meta = resolvePriorityMeta(orderedTargets);
|
|
450
|
-
let bestGroupId = null;
|
|
451
|
-
let bestScore = Number.NEGATIVE_INFINITY;
|
|
452
|
-
for (const key of candidates) {
|
|
453
|
-
if (!deps.healthManager.isAvailable(key))
|
|
454
|
-
continue;
|
|
455
|
-
const m = meta.get(key);
|
|
456
|
-
if (!m)
|
|
457
|
-
continue;
|
|
458
|
-
const penalty = penalties ? Math.max(0, Math.floor(penalties[key] ?? 0)) : 0;
|
|
459
|
-
const score = m.base - penalty;
|
|
460
|
-
if (score > bestScore) {
|
|
461
|
-
bestScore = score;
|
|
462
|
-
bestGroupId = m.groupId;
|
|
463
|
-
}
|
|
464
|
-
}
|
|
465
|
-
if (!bestGroupId)
|
|
466
|
-
return null;
|
|
467
|
-
const groupCandidates = candidates.filter((key) => meta.get(key)?.groupId === bestGroupId);
|
|
468
|
-
return groupCandidates.length ? { groupId: bestGroupId, groupCandidates } : null;
|
|
469
|
-
};
|
|
470
|
-
const computeContextWeightMultipliers = (candidates) => {
|
|
471
|
-
if (!contextWeightedCfg.enabled) {
|
|
472
|
-
return null;
|
|
473
|
-
}
|
|
474
|
-
const eff = {};
|
|
475
|
-
let ref = 1;
|
|
476
|
-
for (const key of candidates) {
|
|
477
|
-
const usage = contextResult.usage?.[key];
|
|
478
|
-
const limit = usage && typeof usage.limit === 'number' && Number.isFinite(usage.limit) ? Math.floor(usage.limit) : 0;
|
|
479
|
-
const safeEff = computeEffectiveSafeWindowTokens({
|
|
480
|
-
modelMaxTokens: Math.max(1, limit),
|
|
481
|
-
warnRatio,
|
|
482
|
-
clientCapTokens: contextWeightedCfg.clientCapTokens
|
|
483
|
-
});
|
|
484
|
-
eff[key] = safeEff;
|
|
485
|
-
if (safeEff > ref) {
|
|
486
|
-
ref = safeEff;
|
|
487
|
-
}
|
|
488
|
-
}
|
|
489
|
-
return { ref, eff };
|
|
490
|
-
};
|
|
491
|
-
const selectWithQuota = (candidates, isSafePool) => {
|
|
492
|
-
if (!quotaView) {
|
|
493
|
-
if (tier.mode === 'priority') {
|
|
494
|
-
if (isRecoveryAttempt) {
|
|
495
|
-
return selectFirstAvailable(candidates);
|
|
496
|
-
}
|
|
497
|
-
const group = pickPriorityGroup(candidates, tier.targets);
|
|
498
|
-
if (!group) {
|
|
499
|
-
return null;
|
|
500
|
-
}
|
|
501
|
-
const weights = (() => {
|
|
502
|
-
if (!isSafePool)
|
|
503
|
-
return undefined;
|
|
504
|
-
const ctx = computeContextWeightMultipliers(group.groupCandidates);
|
|
505
|
-
if (!ctx)
|
|
506
|
-
return undefined;
|
|
507
|
-
const out = {};
|
|
508
|
-
for (const key of group.groupCandidates) {
|
|
509
|
-
const m = computeContextMultiplier({
|
|
510
|
-
effectiveSafeRefTokens: ctx.ref,
|
|
511
|
-
effectiveSafeTokens: ctx.eff[key] ?? 1,
|
|
512
|
-
cfg: contextWeightedCfg
|
|
513
|
-
});
|
|
514
|
-
out[key] = Math.max(1, Math.round(100 * m));
|
|
515
|
-
}
|
|
516
|
-
return out;
|
|
517
|
-
})();
|
|
518
|
-
return deps.loadBalancer.select({
|
|
519
|
-
routeName: `${routeName}:${tier.id}:priority:group:${group.groupId}`,
|
|
520
|
-
candidates: group.groupCandidates,
|
|
521
|
-
stickyKey: options.allowAliasRotation ? undefined : stickyKey,
|
|
522
|
-
weights,
|
|
523
|
-
availabilityCheck: (key) => deps.healthManager.isAvailable(key)
|
|
524
|
-
}, 'round-robin');
|
|
525
|
-
}
|
|
526
|
-
const weights = (() => {
|
|
527
|
-
if (!isSafePool || !contextWeightedCfg.enabled)
|
|
528
|
-
return undefined;
|
|
529
|
-
const ctx = computeContextWeightMultipliers(candidates);
|
|
530
|
-
if (!ctx)
|
|
531
|
-
return undefined;
|
|
532
|
-
const out = {};
|
|
533
|
-
for (const key of candidates) {
|
|
534
|
-
const m = computeContextMultiplier({
|
|
535
|
-
effectiveSafeRefTokens: ctx.ref,
|
|
536
|
-
effectiveSafeTokens: ctx.eff[key] ?? 1,
|
|
537
|
-
cfg: contextWeightedCfg
|
|
538
|
-
});
|
|
539
|
-
out[key] = Math.max(1, Math.round(100 * m));
|
|
540
|
-
}
|
|
541
|
-
return out;
|
|
542
|
-
})();
|
|
543
|
-
const selected = deps.loadBalancer.select({
|
|
544
|
-
routeName: `${routeName}:${tier.id}`,
|
|
545
|
-
candidates,
|
|
546
|
-
stickyKey: options.allowAliasRotation ? undefined : stickyKey,
|
|
547
|
-
weights,
|
|
548
|
-
availabilityCheck: (key) => deps.healthManager.isAvailable(key)
|
|
549
|
-
}, tier.mode === 'round-robin' ? 'round-robin' : undefined);
|
|
550
|
-
return selected;
|
|
551
|
-
}
|
|
552
|
-
const buckets = new Map();
|
|
553
|
-
let order = 0;
|
|
554
|
-
for (const key of candidates) {
|
|
555
|
-
const entry = quotaView(key);
|
|
556
|
-
if (!entry) {
|
|
557
|
-
const list = buckets.get(100) ?? [];
|
|
558
|
-
list.push({ key, penalty: 0, order: order++ });
|
|
559
|
-
buckets.set(100, list);
|
|
560
|
-
continue;
|
|
561
|
-
}
|
|
562
|
-
if (!entry.inPool) {
|
|
563
|
-
continue;
|
|
564
|
-
}
|
|
565
|
-
if (entry.cooldownUntil && entry.cooldownUntil > now) {
|
|
566
|
-
continue;
|
|
567
|
-
}
|
|
568
|
-
if (entry.blacklistUntil && entry.blacklistUntil > now) {
|
|
569
|
-
continue;
|
|
570
|
-
}
|
|
571
|
-
const tierPriority = typeof entry.priorityTier === 'number' && Number.isFinite(entry.priorityTier)
|
|
572
|
-
? entry.priorityTier
|
|
573
|
-
: 100;
|
|
574
|
-
const penaltyRaw = entry.selectionPenalty;
|
|
575
|
-
const penalty = typeof penaltyRaw === 'number' && Number.isFinite(penaltyRaw) && penaltyRaw > 0 ? Math.floor(penaltyRaw) : 0;
|
|
576
|
-
const list = buckets.get(tierPriority) ?? [];
|
|
577
|
-
list.push({ key, penalty, order: order++ });
|
|
578
|
-
buckets.set(tierPriority, list);
|
|
579
|
-
}
|
|
580
|
-
const sortedPriorities = Array.from(buckets.keys()).sort((a, b) => a - b);
|
|
581
|
-
for (const priority of sortedPriorities) {
|
|
582
|
-
const bucket = buckets.get(priority) ?? [];
|
|
583
|
-
if (!bucket.length) {
|
|
584
|
-
continue;
|
|
585
|
-
}
|
|
586
|
-
bucket.sort((a, b) => (a.penalty - b.penalty) || (a.order - b.order));
|
|
587
|
-
const bucketCandidates = bucket.map((item) => item.key);
|
|
588
|
-
// antigravity special: avoid rotating across keys while the current key is healthy.
|
|
589
|
-
// Rationale: some upstream gateways reject rapid cross-key switching even when quota exists,
|
|
590
|
-
// causing repeated 429s. We therefore pin a single key per (providerId, modelId) until it is
|
|
591
|
-
// excluded by quota/cooldown, then fail over to the next available key.
|
|
592
|
-
//
|
|
593
|
-
// This is only applied when the request has no session-level sticky key, to avoid breaking
|
|
594
|
-
// explicit session stickiness.
|
|
595
|
-
const shouldPinAntigravityModel = (() => {
|
|
596
|
-
// Only respect explicit session/conversation stickiness. requestId-scoped sticky keys
|
|
597
|
-
// (used for request-chain pinning) should not prevent global antigravity key pinning.
|
|
598
|
-
if (typeof stickyKey === 'string' && (stickyKey.startsWith('session:') || stickyKey.startsWith('conversation:'))) {
|
|
599
|
-
return false;
|
|
600
|
-
}
|
|
601
|
-
if (bucketCandidates.length < 2) {
|
|
602
|
-
return false;
|
|
603
|
-
}
|
|
604
|
-
let modelId = null;
|
|
605
|
-
for (const key of bucketCandidates) {
|
|
606
|
-
const providerId = extractProviderId(key);
|
|
607
|
-
if (providerId !== 'antigravity') {
|
|
608
|
-
return false;
|
|
609
|
-
}
|
|
610
|
-
const candidateModel = getProviderModelId(key, deps.providerRegistry);
|
|
611
|
-
if (!candidateModel) {
|
|
612
|
-
return false;
|
|
613
|
-
}
|
|
614
|
-
if (modelId === null) {
|
|
615
|
-
modelId = candidateModel;
|
|
616
|
-
}
|
|
617
|
-
else if (modelId !== candidateModel) {
|
|
618
|
-
return false;
|
|
619
|
-
}
|
|
620
|
-
}
|
|
621
|
-
return Boolean(modelId);
|
|
622
|
-
})();
|
|
623
|
-
if (shouldPinAntigravityModel && !isRecoveryAttempt) {
|
|
624
|
-
const pinned = selectFirstAvailable(bucketCandidates);
|
|
625
|
-
if (pinned) {
|
|
626
|
-
return pinned;
|
|
627
|
-
}
|
|
628
|
-
}
|
|
629
|
-
const bucketPenaltyMap = {};
|
|
630
|
-
for (const item of bucket) {
|
|
631
|
-
bucketPenaltyMap[item.key] = item.penalty;
|
|
632
|
-
}
|
|
633
|
-
const bucketWeights = {};
|
|
634
|
-
const bucketMultipliers = {};
|
|
635
|
-
for (const item of bucket) {
|
|
636
|
-
if (healthWeightedCfg.enabled) {
|
|
637
|
-
const entry = quotaView(item.key);
|
|
638
|
-
const { weight, multiplier } = computeHealthWeight(entry, nowForWeights, healthWeightedCfg);
|
|
639
|
-
bucketWeights[item.key] = weight;
|
|
640
|
-
bucketMultipliers[item.key] = multiplier;
|
|
641
|
-
}
|
|
642
|
-
else {
|
|
643
|
-
// Legacy: penalty => lower weight, but never zero (unhealthy should still get a chance).
|
|
644
|
-
bucketWeights[item.key] = Math.max(1, Math.floor(100 / (1 + Math.max(0, item.penalty))));
|
|
645
|
-
bucketMultipliers[item.key] = 1;
|
|
646
|
-
}
|
|
647
|
-
}
|
|
648
|
-
if (isSafePool && contextWeightedCfg.enabled) {
|
|
649
|
-
const ctx = computeContextWeightMultipliers(bucketCandidates);
|
|
650
|
-
if (ctx) {
|
|
651
|
-
for (const key of bucketCandidates) {
|
|
652
|
-
const m = computeContextMultiplier({
|
|
653
|
-
effectiveSafeRefTokens: ctx.ref,
|
|
654
|
-
effectiveSafeTokens: ctx.eff[key] ?? 1,
|
|
655
|
-
cfg: contextWeightedCfg
|
|
656
|
-
});
|
|
657
|
-
bucketWeights[key] = Math.max(1, Math.round((bucketWeights[key] ?? 1) * m));
|
|
658
|
-
}
|
|
659
|
-
}
|
|
660
|
-
}
|
|
661
|
-
if (tier.mode === 'priority') {
|
|
662
|
-
if (!isRecoveryAttempt) {
|
|
663
|
-
const group = pickPriorityGroup(bucketCandidates, tier.targets, bucketPenaltyMap);
|
|
664
|
-
if (!group) {
|
|
665
|
-
continue;
|
|
666
|
-
}
|
|
667
|
-
const groupWeights = {};
|
|
668
|
-
for (const key of group.groupCandidates) {
|
|
669
|
-
groupWeights[key] = bucketWeights[key] ?? 1;
|
|
670
|
-
}
|
|
671
|
-
const selected = deps.loadBalancer.select({
|
|
672
|
-
routeName: `${routeName}:${tier.id}:priority:${priority}:group:${group.groupId}`,
|
|
673
|
-
candidates: group.groupCandidates,
|
|
674
|
-
stickyKey: options.allowAliasRotation ? undefined : stickyKey,
|
|
675
|
-
weights: groupWeights,
|
|
676
|
-
availabilityCheck: (key) => deps.healthManager.isAvailable(key)
|
|
677
|
-
}, 'round-robin');
|
|
678
|
-
if (selected) {
|
|
679
|
-
return selected;
|
|
680
|
-
}
|
|
681
|
-
continue;
|
|
682
|
-
}
|
|
683
|
-
if (isRecoveryAttempt && healthWeightedCfg.enabled && healthWeightedCfg.recoverToBestOnRetry) {
|
|
684
|
-
let best = null;
|
|
685
|
-
let bestM = Number.NEGATIVE_INFINITY;
|
|
686
|
-
for (const key of bucketCandidates) {
|
|
687
|
-
if (!deps.healthManager.isAvailable(key))
|
|
688
|
-
continue;
|
|
689
|
-
const m = bucketMultipliers[key] ?? 1;
|
|
690
|
-
if (m > bestM) {
|
|
691
|
-
bestM = m;
|
|
692
|
-
best = key;
|
|
693
|
-
}
|
|
694
|
-
}
|
|
695
|
-
if (best) {
|
|
696
|
-
return best;
|
|
697
|
-
}
|
|
698
|
-
continue;
|
|
699
|
-
}
|
|
700
|
-
else if (isRecoveryAttempt) {
|
|
701
|
-
const recovered = selectFirstAvailable(bucketCandidates);
|
|
702
|
-
if (recovered)
|
|
703
|
-
return recovered;
|
|
704
|
-
continue;
|
|
705
|
-
}
|
|
706
|
-
// (unreachable) recovery handled above
|
|
707
|
-
}
|
|
708
|
-
else {
|
|
709
|
-
if (isRecoveryAttempt && healthWeightedCfg.enabled && healthWeightedCfg.recoverToBestOnRetry) {
|
|
710
|
-
let best = null;
|
|
711
|
-
let bestM = Number.NEGATIVE_INFINITY;
|
|
712
|
-
for (const key of bucketCandidates) {
|
|
713
|
-
if (!deps.healthManager.isAvailable(key))
|
|
714
|
-
continue;
|
|
715
|
-
const m = bucketMultipliers[key] ?? 1;
|
|
716
|
-
if (m > bestM) {
|
|
717
|
-
bestM = m;
|
|
718
|
-
best = key;
|
|
719
|
-
}
|
|
720
|
-
}
|
|
721
|
-
if (best) {
|
|
722
|
-
return best;
|
|
723
|
-
}
|
|
724
|
-
continue;
|
|
725
|
-
}
|
|
726
|
-
else if (isRecoveryAttempt) {
|
|
727
|
-
const recovered = selectFirstAvailable(bucketCandidates);
|
|
728
|
-
if (recovered)
|
|
729
|
-
return recovered;
|
|
730
|
-
continue;
|
|
731
|
-
}
|
|
732
|
-
const selected = deps.loadBalancer.select({
|
|
733
|
-
routeName: `${routeName}:${tier.id}`,
|
|
734
|
-
candidates: bucketCandidates,
|
|
735
|
-
stickyKey: options.allowAliasRotation ? undefined : stickyKey,
|
|
736
|
-
weights: bucketWeights,
|
|
737
|
-
availabilityCheck: (key) => deps.healthManager.isAvailable(key)
|
|
738
|
-
}, tier.mode === 'round-robin' ? 'round-robin' : undefined);
|
|
739
|
-
if (selected) {
|
|
740
|
-
return selected;
|
|
741
|
-
}
|
|
742
|
-
}
|
|
743
|
-
}
|
|
744
|
-
return null;
|
|
745
|
-
};
|
|
746
|
-
for (const candidatePool of prioritizedPools) {
|
|
747
|
-
const providerKey = selectWithQuota(candidatePool, candidatePool === contextResult.safe);
|
|
748
|
-
if (providerKey) {
|
|
749
|
-
return { providerKey, poolTargets: tier.targets, tierId: tier.id };
|
|
750
|
-
}
|
|
751
|
-
}
|
|
752
|
-
return {
|
|
753
|
-
providerKey: null,
|
|
754
|
-
poolTargets: tier.targets,
|
|
755
|
-
tierId: tier.id,
|
|
756
|
-
failureHint: describeAttempt(routeName, tier.id, contextResult)
|
|
757
|
-
};
|
|
758
|
-
}
|
|
759
|
-
export function selectFromStickyPool(stickyKeySet, metadata, features, state, deps, options) {
|
|
760
|
-
if (!stickyKeySet || stickyKeySet.size === 0) {
|
|
761
|
-
return null;
|
|
762
|
-
}
|
|
763
|
-
const allowedProviders = new Set(state.allowedProviders);
|
|
764
|
-
const disabledProviders = new Set(state.disabledProviders);
|
|
765
|
-
const disabledKeysMap = new Map(Array.from(state.disabledKeys.entries()).map(([provider, keys]) => [
|
|
766
|
-
provider,
|
|
767
|
-
new Set(Array.from(keys).map((k) => (typeof k === 'string' ? k : k + 1)))
|
|
768
|
-
]));
|
|
769
|
-
const disabledModels = new Map(Array.from(state.disabledModels.entries()).map(([provider, models]) => [provider, new Set(models)]));
|
|
770
|
-
let candidates = Array.from(stickyKeySet).filter((key) => !deps.isProviderCoolingDown(key));
|
|
771
|
-
if (!candidates.length && stickyKeySet.size === 1) {
|
|
772
|
-
candidates = Array.from(stickyKeySet);
|
|
773
|
-
}
|
|
774
|
-
const quotaView = deps.quotaView;
|
|
775
|
-
const now = quotaView ? Date.now() : 0;
|
|
776
|
-
if (quotaView) {
|
|
777
|
-
const filtered = candidates.filter((key) => {
|
|
778
|
-
const entry = quotaView(key);
|
|
779
|
-
if (!entry) {
|
|
780
|
-
return true;
|
|
781
|
-
}
|
|
782
|
-
if (!entry.inPool) {
|
|
783
|
-
return false;
|
|
784
|
-
}
|
|
785
|
-
if (entry.cooldownUntil && entry.cooldownUntil > now) {
|
|
786
|
-
return false;
|
|
787
|
-
}
|
|
788
|
-
if (entry.blacklistUntil && entry.blacklistUntil > now) {
|
|
789
|
-
return false;
|
|
790
|
-
}
|
|
791
|
-
return true;
|
|
792
|
-
});
|
|
793
|
-
if (filtered.length > 0 || candidates.length !== 1) {
|
|
794
|
-
candidates = filtered;
|
|
795
|
-
}
|
|
796
|
-
}
|
|
797
|
-
if (allowedProviders.size > 0) {
|
|
798
|
-
candidates = candidates.filter((key) => {
|
|
799
|
-
const providerId = extractProviderId(key);
|
|
800
|
-
return providerId && allowedProviders.has(providerId);
|
|
801
|
-
});
|
|
802
|
-
}
|
|
803
|
-
if (disabledProviders.size > 0) {
|
|
804
|
-
candidates = candidates.filter((key) => {
|
|
805
|
-
const providerId = extractProviderId(key);
|
|
806
|
-
return providerId && !disabledProviders.has(providerId);
|
|
807
|
-
});
|
|
808
|
-
}
|
|
809
|
-
if (disabledKeysMap.size > 0 || disabledModels.size > 0) {
|
|
810
|
-
candidates = candidates.filter((key) => {
|
|
811
|
-
const providerId = extractProviderId(key);
|
|
812
|
-
if (!providerId) {
|
|
813
|
-
return true;
|
|
814
|
-
}
|
|
815
|
-
const disabledKeys = disabledKeysMap.get(providerId);
|
|
816
|
-
if (disabledKeys && disabledKeys.size > 0) {
|
|
817
|
-
const keyAlias = extractKeyAlias(key);
|
|
818
|
-
const keyIndex = extractKeyIndex(key);
|
|
819
|
-
if (keyAlias && disabledKeys.has(keyAlias)) {
|
|
820
|
-
return false;
|
|
821
|
-
}
|
|
822
|
-
if (keyIndex !== undefined && disabledKeys.has(keyIndex + 1)) {
|
|
823
|
-
return false;
|
|
824
|
-
}
|
|
825
|
-
}
|
|
826
|
-
const disabledModelSet = disabledModels.get(providerId);
|
|
827
|
-
if (disabledModelSet && disabledModelSet.size > 0) {
|
|
828
|
-
const modelId = getProviderModelId(key, deps.providerRegistry);
|
|
829
|
-
if (modelId && disabledModelSet.has(modelId)) {
|
|
830
|
-
return false;
|
|
831
|
-
}
|
|
832
|
-
}
|
|
833
|
-
return true;
|
|
834
|
-
});
|
|
835
|
-
}
|
|
836
|
-
if (!candidates.length) {
|
|
837
|
-
return null;
|
|
838
|
-
}
|
|
839
|
-
const stickyKey = options.allowAliasRotation ? undefined : deps.resolveStickyKey(metadata);
|
|
840
|
-
const estimatedTokens = typeof features.estimatedTokens === 'number' && Number.isFinite(features.estimatedTokens)
|
|
841
|
-
? Math.max(0, features.estimatedTokens)
|
|
842
|
-
: 0;
|
|
843
|
-
const tier = {
|
|
844
|
-
id: 'sticky-primary',
|
|
845
|
-
targets: candidates,
|
|
846
|
-
priority: 0
|
|
847
|
-
};
|
|
848
|
-
const { providerKey, poolTargets, tierId } = trySelectFromTier('sticky', tier, stickyKey, estimatedTokens, features, deps, {
|
|
849
|
-
disabledProviders,
|
|
850
|
-
disabledKeysMap,
|
|
851
|
-
allowedProviders,
|
|
852
|
-
disabledModels,
|
|
853
|
-
requiredProviderKeys: stickyKeySet,
|
|
854
|
-
allowAliasRotation: options.allowAliasRotation
|
|
855
|
-
});
|
|
856
|
-
if (!providerKey) {
|
|
857
|
-
return null;
|
|
858
|
-
}
|
|
859
|
-
return {
|
|
860
|
-
providerKey,
|
|
861
|
-
routeUsed: 'sticky',
|
|
862
|
-
pool: poolTargets,
|
|
863
|
-
poolId: tierId
|
|
864
|
-
};
|
|
865
|
-
}
|
|
866
|
-
function filterCandidatesByRoutingState(routes, state, routing, providerRegistry) {
|
|
867
|
-
if (state.allowedProviders.size === 0 &&
|
|
868
|
-
state.disabledProviders.size === 0 &&
|
|
869
|
-
state.disabledKeys.size === 0 &&
|
|
870
|
-
state.disabledModels.size === 0) {
|
|
871
|
-
return routes;
|
|
872
|
-
}
|
|
873
|
-
return routes.filter((routeName) => {
|
|
874
|
-
const pools = routing[routeName];
|
|
875
|
-
if (!pools)
|
|
876
|
-
return false;
|
|
877
|
-
for (const pool of pools) {
|
|
878
|
-
if (!Array.isArray(pool.targets) || pool.targets.length === 0) {
|
|
879
|
-
continue;
|
|
880
|
-
}
|
|
881
|
-
for (const providerKey of pool.targets) {
|
|
882
|
-
const providerId = extractProviderId(providerKey);
|
|
883
|
-
if (!providerId)
|
|
884
|
-
continue;
|
|
885
|
-
if (state.allowedProviders.size > 0 && !state.allowedProviders.has(providerId)) {
|
|
886
|
-
continue;
|
|
887
|
-
}
|
|
888
|
-
if (state.disabledProviders.has(providerId)) {
|
|
889
|
-
continue;
|
|
890
|
-
}
|
|
891
|
-
const disabledKeys = state.disabledKeys.get(providerId);
|
|
892
|
-
if (disabledKeys && disabledKeys.size > 0) {
|
|
893
|
-
const keyAlias = extractKeyAlias(providerKey);
|
|
894
|
-
const keyIndex = extractKeyIndex(providerKey);
|
|
895
|
-
if (keyAlias && disabledKeys.has(keyAlias)) {
|
|
896
|
-
continue;
|
|
897
|
-
}
|
|
898
|
-
if (keyIndex !== undefined && disabledKeys.has(keyIndex + 1)) {
|
|
899
|
-
continue;
|
|
900
|
-
}
|
|
901
|
-
}
|
|
902
|
-
const disabledModels = state.disabledModels.get(providerId);
|
|
903
|
-
if (disabledModels && disabledModels.size > 0) {
|
|
904
|
-
const modelId = getProviderModelId(providerKey, providerRegistry);
|
|
905
|
-
if (modelId && disabledModels.has(modelId)) {
|
|
906
|
-
continue;
|
|
907
|
-
}
|
|
908
|
-
}
|
|
909
|
-
return true;
|
|
910
|
-
}
|
|
911
|
-
}
|
|
912
|
-
return false;
|
|
913
|
-
});
|
|
914
|
-
}
|
|
915
|
-
function buildRouteCandidates(requestedRoute, classificationCandidates, features, routing, providerRegistry) {
|
|
916
|
-
const forceVision = routeHasForceFlag('vision', routing);
|
|
917
|
-
const normalized = normalizeRouteAlias(requestedRoute || DEFAULT_ROUTE);
|
|
918
|
-
const baseList = [];
|
|
919
|
-
if (classificationCandidates && classificationCandidates.length) {
|
|
920
|
-
for (const candidate of classificationCandidates) {
|
|
921
|
-
baseList.push(normalizeRouteAlias(candidate));
|
|
922
|
-
}
|
|
923
|
-
}
|
|
924
|
-
else if (normalized) {
|
|
925
|
-
baseList.push(normalized);
|
|
926
|
-
}
|
|
927
|
-
if (features.hasImageAttachment && !forceVision) {
|
|
928
|
-
const visionAwareRoutes = [DEFAULT_ROUTE, 'thinking'];
|
|
929
|
-
for (const routeName of visionAwareRoutes) {
|
|
930
|
-
if (routeHasTargets(routing[routeName])) {
|
|
931
|
-
if (!baseList.includes(routeName)) {
|
|
932
|
-
baseList.push(routeName);
|
|
933
|
-
}
|
|
934
|
-
}
|
|
935
|
-
}
|
|
936
|
-
}
|
|
937
|
-
let ordered = sortByPriority(baseList);
|
|
938
|
-
if (features.hasImageAttachment && !forceVision) {
|
|
939
|
-
ordered = reorderForInlineVision(ordered, routing, providerRegistry);
|
|
940
|
-
}
|
|
941
|
-
const deduped = [];
|
|
942
|
-
for (const routeName of ordered) {
|
|
943
|
-
if (routeName && !deduped.includes(routeName)) {
|
|
944
|
-
deduped.push(routeName);
|
|
945
|
-
}
|
|
946
|
-
}
|
|
947
|
-
if (!deduped.includes(DEFAULT_ROUTE)) {
|
|
948
|
-
deduped.push(DEFAULT_ROUTE);
|
|
949
|
-
}
|
|
950
|
-
const filtered = deduped.filter((routeName) => routeHasTargets(routing[routeName]));
|
|
951
|
-
if (!filtered.includes(DEFAULT_ROUTE) && routeHasTargets(routing[DEFAULT_ROUTE])) {
|
|
952
|
-
filtered.push(DEFAULT_ROUTE);
|
|
953
|
-
}
|
|
954
|
-
return filtered.length ? filtered : [DEFAULT_ROUTE];
|
|
955
|
-
}
|
|
956
|
-
function reorderForInlineVision(routeNames, routing, providerRegistry) {
|
|
957
|
-
const unique = Array.from(new Set(routeNames.filter(Boolean)));
|
|
958
|
-
if (!unique.length) {
|
|
959
|
-
return unique;
|
|
960
|
-
}
|
|
961
|
-
const inlinePreferred = [];
|
|
962
|
-
const inlineRoutes = [DEFAULT_ROUTE, 'thinking'];
|
|
963
|
-
for (const routeName of inlineRoutes) {
|
|
964
|
-
if (routeSupportsInlineVision(routeName, routing, providerRegistry) && !inlinePreferred.includes(routeName)) {
|
|
965
|
-
inlinePreferred.push(routeName);
|
|
966
|
-
}
|
|
967
|
-
}
|
|
968
|
-
if (!inlinePreferred.length) {
|
|
969
|
-
return unique;
|
|
970
|
-
}
|
|
971
|
-
const remaining = [];
|
|
972
|
-
for (const routeName of unique) {
|
|
973
|
-
if (!inlinePreferred.includes(routeName)) {
|
|
974
|
-
remaining.push(routeName);
|
|
975
|
-
}
|
|
976
|
-
}
|
|
977
|
-
return [...inlinePreferred, ...remaining];
|
|
978
|
-
}
|
|
979
|
-
function routeSupportsInlineVision(routeName, routing, providerRegistry) {
|
|
980
|
-
const pools = routing[routeName];
|
|
981
|
-
if (!Array.isArray(pools)) {
|
|
982
|
-
return false;
|
|
983
|
-
}
|
|
984
|
-
for (const pool of pools) {
|
|
985
|
-
if (!Array.isArray(pool.targets)) {
|
|
986
|
-
continue;
|
|
987
|
-
}
|
|
988
|
-
for (const providerKey of pool.targets) {
|
|
989
|
-
try {
|
|
990
|
-
const profile = providerRegistry.get(providerKey);
|
|
991
|
-
if (profile.providerType === 'responses' || profile.providerType === 'gemini') {
|
|
992
|
-
return true;
|
|
993
|
-
}
|
|
994
|
-
}
|
|
995
|
-
catch {
|
|
996
|
-
// ignore unknown providers when probing capabilities
|
|
997
|
-
}
|
|
998
|
-
}
|
|
999
|
-
}
|
|
1000
|
-
return false;
|
|
1001
|
-
}
|
|
1002
|
-
function normalizeRouteAlias(routeName) {
|
|
1003
|
-
const base = routeName && routeName.trim() ? routeName.trim() : DEFAULT_ROUTE;
|
|
1004
|
-
return base;
|
|
1005
|
-
}
|
|
1006
|
-
function routeHasForceFlag(routeName, routing) {
|
|
1007
|
-
const pools = routing[routeName];
|
|
1008
|
-
if (!Array.isArray(pools)) {
|
|
1009
|
-
return false;
|
|
1010
|
-
}
|
|
1011
|
-
return pools.some((pool) => pool.force);
|
|
1012
|
-
}
|
|
1013
|
-
function routeHasTargets(pools) {
|
|
1014
|
-
if (!Array.isArray(pools)) {
|
|
1015
|
-
return false;
|
|
1016
|
-
}
|
|
1017
|
-
return pools.some((pool) => Array.isArray(pool.targets) && pool.targets.length > 0);
|
|
1018
|
-
}
|
|
1019
|
-
function sortRoutePools(pools) {
|
|
1020
|
-
if (!Array.isArray(pools)) {
|
|
1021
|
-
return [];
|
|
1022
|
-
}
|
|
1023
|
-
return pools
|
|
1024
|
-
.filter((pool) => Array.isArray(pool.targets) && pool.targets.length > 0)
|
|
1025
|
-
.sort((a, b) => {
|
|
1026
|
-
if (a.backup && !b.backup)
|
|
1027
|
-
return 1;
|
|
1028
|
-
if (!a.backup && b.backup)
|
|
1029
|
-
return -1;
|
|
1030
|
-
if (a.priority !== b.priority) {
|
|
1031
|
-
return b.priority - a.priority;
|
|
1032
|
-
}
|
|
1033
|
-
return a.id.localeCompare(b.id);
|
|
1034
|
-
});
|
|
1035
|
-
}
|
|
1036
|
-
function initializeRouteQueue(candidates) {
|
|
1037
|
-
return Array.from(new Set(candidates));
|
|
1038
|
-
}
|
|
1039
|
-
function buildContextCandidatePools(result) {
|
|
1040
|
-
const ordered = [];
|
|
1041
|
-
if (result.safe.length) {
|
|
1042
|
-
ordered.push(result.safe);
|
|
1043
|
-
}
|
|
1044
|
-
if (result.risky.length) {
|
|
1045
|
-
ordered.push(result.risky);
|
|
1046
|
-
}
|
|
1047
|
-
return ordered;
|
|
1048
|
-
}
|
|
1049
|
-
function describeAttempt(routeName, poolId, result) {
|
|
1050
|
-
const prefix = poolId ? `${routeName}:${poolId}` : routeName;
|
|
1051
|
-
if (result.safe.length > 0) {
|
|
1052
|
-
return `${prefix}:health`;
|
|
1053
|
-
}
|
|
1054
|
-
if (result.risky.length > 0) {
|
|
1055
|
-
return `${prefix}:context_risky`;
|
|
1056
|
-
}
|
|
1057
|
-
if (result.overflow.length > 0) {
|
|
1058
|
-
return `${prefix}:max_context_window`;
|
|
1059
|
-
}
|
|
1060
|
-
return prefix;
|
|
1061
|
-
}
|
|
1062
|
-
function extractProviderId(providerKey) {
|
|
1063
|
-
const firstDot = providerKey.indexOf('.');
|
|
1064
|
-
if (firstDot <= 0)
|
|
1065
|
-
return null;
|
|
1066
|
-
return providerKey.substring(0, firstDot);
|
|
1067
|
-
}
|
|
1068
|
-
function extractKeyAlias(providerKey) {
|
|
1069
|
-
const parts = providerKey.split('.');
|
|
1070
|
-
if (parts.length === 3) {
|
|
1071
|
-
return normalizeAliasDescriptor(parts[1]);
|
|
1072
|
-
}
|
|
1073
|
-
return null;
|
|
1074
|
-
}
|
|
1075
|
-
function normalizeAliasDescriptor(alias) {
|
|
1076
|
-
if (/^\d+-/.test(alias)) {
|
|
1077
|
-
return alias.replace(/^\d+-/, '');
|
|
1078
|
-
}
|
|
1079
|
-
return alias;
|
|
1080
|
-
}
|
|
1081
|
-
function extractKeyIndex(providerKey) {
|
|
1082
|
-
const parts = providerKey.split('.');
|
|
1083
|
-
if (parts.length === 2) {
|
|
1084
|
-
const index = parseInt(parts[1], 10);
|
|
1085
|
-
if (!isNaN(index) && index > 0) {
|
|
1086
|
-
return index;
|
|
1087
|
-
}
|
|
1088
|
-
}
|
|
1089
|
-
return undefined;
|
|
1090
|
-
}
|
|
1091
|
-
function getProviderModelId(providerKey, providerRegistry) {
|
|
1092
|
-
const profile = providerRegistry.get(providerKey);
|
|
1093
|
-
if (profile.modelId) {
|
|
1094
|
-
return profile.modelId;
|
|
1095
|
-
}
|
|
1096
|
-
const parts = providerKey.split('.');
|
|
1097
|
-
if (parts.length === 2) {
|
|
1098
|
-
return parts[1] || null;
|
|
1099
|
-
}
|
|
1100
|
-
if (parts.length === 3) {
|
|
1101
|
-
return parts[2] || null;
|
|
1102
|
-
}
|
|
1103
|
-
return null;
|
|
1104
|
-
}
|
|
1105
|
-
function extractExcludedProviderKeySet(metadata) {
|
|
1106
|
-
if (!metadata) {
|
|
1107
|
-
return new Set();
|
|
1108
|
-
}
|
|
1109
|
-
const raw = metadata.excludedProviderKeys;
|
|
1110
|
-
if (!Array.isArray(raw) || raw.length === 0) {
|
|
1111
|
-
return new Set();
|
|
1112
|
-
}
|
|
1113
|
-
const normalized = raw
|
|
1114
|
-
.map((value) => (typeof value === 'string' ? value.trim() : ''))
|
|
1115
|
-
.filter((value) => Boolean(value));
|
|
1116
|
-
return new Set(normalized);
|
|
1117
|
-
}
|
|
1118
|
-
function sortByPriority(routeNames) {
|
|
1119
|
-
return [...routeNames].sort((a, b) => routeWeight(a) - routeWeight(b));
|
|
1120
|
-
}
|
|
1121
|
-
function routeWeight(routeName) {
|
|
1122
|
-
const idx = ROUTE_PRIORITY.indexOf(routeName);
|
|
1123
|
-
return idx >= 0 ? idx : ROUTE_PRIORITY.length;
|
|
1124
|
-
}
|
|
1125
|
-
function resolveInstructionTarget(target, providerRegistry) {
|
|
1126
|
-
if (!target || !target.provider) {
|
|
1127
|
-
return null;
|
|
1128
|
-
}
|
|
1129
|
-
const providerId = target.provider;
|
|
1130
|
-
const providerKeys = providerRegistry.listProviderKeys(providerId);
|
|
1131
|
-
if (providerKeys.length === 0) {
|
|
1132
|
-
return null;
|
|
1133
|
-
}
|
|
1134
|
-
const alias = typeof target.keyAlias === 'string' ? target.keyAlias.trim() : '';
|
|
1135
|
-
const aliasExplicit = alias.length > 0 && target.pathLength === 3;
|
|
1136
|
-
if (aliasExplicit) {
|
|
1137
|
-
const prefix = `${providerId}.${alias}.`;
|
|
1138
|
-
const aliasKeys = providerKeys.filter((key) => key.startsWith(prefix));
|
|
1139
|
-
if (aliasKeys.length > 0) {
|
|
1140
|
-
if (target.model && target.model.trim()) {
|
|
1141
|
-
const normalizedModel = target.model.trim();
|
|
1142
|
-
const matching = aliasKeys.filter((key) => getProviderModelId(key, providerRegistry) === normalizedModel);
|
|
1143
|
-
if (matching.length > 0) {
|
|
1144
|
-
// Prefer exact to keep sticky pool deterministic when only one key matches.
|
|
1145
|
-
if (matching.length === 1) {
|
|
1146
|
-
return { mode: 'exact', keys: [matching[0]] };
|
|
1147
|
-
}
|
|
1148
|
-
return { mode: 'filter', keys: matching };
|
|
1149
|
-
}
|
|
1150
|
-
}
|
|
1151
|
-
return { mode: 'filter', keys: aliasKeys };
|
|
1152
|
-
}
|
|
1153
|
-
}
|
|
1154
|
-
if (typeof target.keyIndex === 'number' && target.keyIndex > 0) {
|
|
1155
|
-
const runtimeKey = providerRegistry.resolveRuntimeKeyByIndex(providerId, target.keyIndex);
|
|
1156
|
-
if (runtimeKey) {
|
|
1157
|
-
return { mode: 'exact', keys: [runtimeKey] };
|
|
1158
|
-
}
|
|
1159
|
-
}
|
|
1160
|
-
if (target.model && target.model.trim()) {
|
|
1161
|
-
const normalizedModel = target.model.trim();
|
|
1162
|
-
const matchingKeys = providerKeys.filter((key) => {
|
|
1163
|
-
const modelId = getProviderModelId(key, providerRegistry);
|
|
1164
|
-
return modelId === normalizedModel;
|
|
1165
|
-
});
|
|
1166
|
-
if (matchingKeys.length > 0) {
|
|
1167
|
-
return { mode: 'filter', keys: matchingKeys };
|
|
1168
|
-
}
|
|
1169
|
-
}
|
|
1170
|
-
if (alias && !aliasExplicit) {
|
|
1171
|
-
const legacyKey = providerRegistry.resolveRuntimeKeyByAlias(providerId, alias);
|
|
1172
|
-
if (legacyKey) {
|
|
1173
|
-
return { mode: 'exact', keys: [legacyKey] };
|
|
1174
|
-
}
|
|
1175
|
-
}
|
|
1176
|
-
return { mode: 'filter', keys: providerKeys };
|
|
1177
|
-
}
|