@jsonstudio/llms 0.6.1164 → 0.6.1354
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/conversion/codecs/gemini-openai-codec.d.ts +3 -1
- package/dist/conversion/codecs/gemini-openai-codec.js +10 -4
- package/dist/conversion/compat/actions/gemini-web-search.d.ts +1 -1
- package/dist/conversion/compat/actions/gemini-web-search.js +5 -2
- package/dist/conversion/compat/actions/iflow-tool-text-fallback.d.ts +12 -0
- package/dist/conversion/compat/actions/iflow-tool-text-fallback.js +199 -0
- package/dist/conversion/compat/actions/iflow-web-search.d.ts +1 -1
- package/dist/conversion/compat/actions/iflow-web-search.js +5 -2
- package/dist/conversion/hub/operation-table/semantic-mappers/anthropic-mapper.js +47 -56
- package/dist/conversion/hub/operation-table/semantic-mappers/chat-mapper.js +1 -13
- package/dist/conversion/hub/operation-table/semantic-mappers/gemini-mapper.js +523 -50
- package/dist/conversion/hub/operation-table/semantic-mappers/responses-mapper.js +18 -38
- package/dist/conversion/hub/pipeline/compat/compat-pipeline-executor.js +6 -0
- package/dist/conversion/hub/pipeline/compat/compat-types.d.ts +3 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/adapter-context.d.ts +10 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/adapter-context.js +134 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/anthropic-alias-map.d.ts +6 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/anthropic-alias-map.js +79 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/apply-patch-tool-mode.d.ts +3 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/apply-patch-tool-mode.js +46 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/execute-chat-process-entry.d.ts +8 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/execute-chat-process-entry.js +366 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/execute-request-stage.d.ts +9 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/execute-request-stage.js +384 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/node-results.d.ts +3 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/node-results.js +14 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/payload-normalize.d.ts +2 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/payload-normalize.js +144 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/policy.d.ts +4 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/policy.js +32 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/protocol.d.ts +8 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/protocol.js +63 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/resolve-protocol-hooks.d.ts +2 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/resolve-protocol-hooks.js +43 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/semantic-gate.d.ts +1 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/semantic-gate.js +29 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/servertool-runtime-config.d.ts +2 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/servertool-runtime-config.js +16 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/types.d.ts +116 -0
- package/dist/conversion/hub/pipeline/hub-pipeline/types.js +1 -0
- package/dist/conversion/hub/pipeline/hub-pipeline.d.ts +3 -95
- package/dist/conversion/hub/pipeline/hub-pipeline.js +19 -1281
- package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage1_format_parse/index.js +1 -1
- package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage2_semantic_map/index.d.ts +7 -0
- package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage2_semantic_map/index.js +65 -1
- package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage3_context_capture/index.js +25 -22
- package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage1_semantic_map/index.js +1 -1
- package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage2_format_build/index.d.ts +1 -1
- package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage2_format_build/index.js +2 -2
- package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage3_compat/index.js +2 -2
- package/dist/conversion/hub/pipeline/stages/req_process/req_process_stage1_tool_governance/index.js +1 -1
- package/dist/conversion/hub/pipeline/stages/req_process/req_process_stage2_route_select/index.js +1 -1
- package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage1_sse_decode/index.js +11 -11
- package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage2_format_parse/index.js +1 -1
- package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage3_semantic_map/index.d.ts +1 -0
- package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage3_semantic_map/index.js +4 -2
- package/dist/conversion/hub/pipeline/stages/resp_outbound/resp_outbound_stage1_client_remap/index.d.ts +1 -0
- package/dist/conversion/hub/pipeline/stages/resp_outbound/resp_outbound_stage1_client_remap/index.js +17 -9
- package/dist/conversion/hub/pipeline/stages/resp_outbound/resp_outbound_stage2_sse_stream/index.js +2 -2
- package/dist/conversion/hub/pipeline/stages/resp_process/resp_process_stage1_tool_governance/index.js +40 -2
- package/dist/conversion/hub/pipeline/stages/resp_process/resp_process_stage2_finalize/index.js +1 -1
- package/dist/conversion/hub/pipeline/target-utils.js +9 -5
- package/dist/conversion/hub/process/chat-process.js +256 -16
- package/dist/conversion/hub/response/provider-response.d.ts +8 -0
- package/dist/conversion/hub/response/provider-response.js +85 -27
- package/dist/conversion/hub/response/response-mappers.d.ts +10 -3
- package/dist/conversion/hub/response/response-mappers.js +30 -6
- package/dist/conversion/hub/response/response-runtime.js +4 -38
- package/dist/conversion/hub/snapshot-recorder.js +5 -1
- package/dist/conversion/hub/standardized-bridge.js +23 -15
- package/dist/conversion/pipeline/codecs/v2/anthropic-openai-pipeline.js +36 -5
- package/dist/conversion/responses/responses-openai-bridge.js +20 -4
- package/dist/conversion/shared/gemini-tool-utils.d.ts +8 -1
- package/dist/conversion/shared/gemini-tool-utils.js +580 -108
- package/dist/conversion/shared/jsonish.js +1 -1
- package/dist/conversion/shared/mcp-injection.js +67 -33
- package/dist/conversion/shared/openai-finalizer.js +2 -1
- package/dist/conversion/shared/openai-message-normalize.js +76 -21
- package/dist/conversion/shared/responses-output-builder.js +6 -0
- package/dist/conversion/shared/runtime-metadata.d.ts +7 -0
- package/dist/conversion/shared/runtime-metadata.js +23 -0
- package/dist/conversion/shared/text-markup-normalizer.d.ts +2 -0
- package/dist/conversion/shared/text-markup-normalizer.js +284 -4
- package/dist/conversion/shared/tool-canonicalizer.js +2 -1
- package/dist/conversion/shared/tool-governor.js +3 -3
- package/dist/filters/engine.js +5 -5
- package/dist/filters/special/request-tool-list-filter.js +194 -60
- package/dist/filters/special/request-tools-normalize.js +1 -1
- package/dist/filters/special/response-tool-text-canonicalize.d.ts +4 -7
- package/dist/filters/special/response-tool-text-canonicalize.js +7 -35
- package/dist/filters/special/tool-filter-hooks.js +58 -62
- package/dist/guidance/index.js +5 -1
- package/dist/http/sse-response.js +6 -6
- package/dist/router/virtual-router/bootstrap.js +65 -5
- package/dist/router/virtual-router/context-advisor.d.ts +4 -0
- package/dist/router/virtual-router/context-advisor.js +3 -0
- package/dist/router/virtual-router/context-weighted.d.ts +31 -0
- package/dist/router/virtual-router/context-weighted.js +54 -0
- package/dist/router/virtual-router/engine-health.d.ts +1 -1
- package/dist/router/virtual-router/engine-health.js +11 -110
- package/dist/router/virtual-router/engine-selection/alias-selection.d.ts +15 -0
- package/dist/router/virtual-router/engine-selection/alias-selection.js +156 -0
- package/dist/router/virtual-router/engine-selection/context-weight-multipliers.d.ts +11 -0
- package/dist/router/virtual-router/engine-selection/context-weight-multipliers.js +23 -0
- package/dist/router/virtual-router/engine-selection/direct-provider-model.d.ts +9 -0
- package/dist/router/virtual-router/engine-selection/direct-provider-model.js +49 -0
- package/dist/router/virtual-router/engine-selection/instruction-target.d.ts +6 -0
- package/dist/router/virtual-router/engine-selection/instruction-target.js +54 -0
- package/dist/router/virtual-router/engine-selection/key-parsing.d.ts +8 -0
- package/dist/router/virtual-router/engine-selection/key-parsing.js +64 -0
- package/dist/router/virtual-router/engine-selection/route-utils.d.ts +12 -0
- package/dist/router/virtual-router/engine-selection/route-utils.js +150 -0
- package/dist/router/virtual-router/engine-selection/routing-state-filter.d.ts +4 -0
- package/dist/router/virtual-router/engine-selection/routing-state-filter.js +50 -0
- package/dist/router/virtual-router/engine-selection/selection-deps.d.ts +39 -0
- package/dist/router/virtual-router/engine-selection/selection-deps.js +1 -0
- package/dist/router/virtual-router/engine-selection/sticky-pool.d.ts +11 -0
- package/dist/router/virtual-router/engine-selection/sticky-pool.js +109 -0
- package/dist/router/virtual-router/engine-selection/tier-priority.d.ts +12 -0
- package/dist/router/virtual-router/engine-selection/tier-priority.js +55 -0
- package/dist/router/virtual-router/engine-selection/tier-selection-select.d.ts +22 -0
- package/dist/router/virtual-router/engine-selection/tier-selection-select.js +400 -0
- package/dist/router/virtual-router/engine-selection/tier-selection.d.ts +3 -0
- package/dist/router/virtual-router/engine-selection/tier-selection.js +225 -0
- package/dist/router/virtual-router/engine-selection.d.ts +4 -30
- package/dist/router/virtual-router/engine-selection.js +10 -815
- package/dist/router/virtual-router/engine.d.ts +1 -0
- package/dist/router/virtual-router/engine.js +55 -10
- package/dist/router/virtual-router/routing-instructions.js +6 -1
- package/dist/router/virtual-router/stop-message-state-sync.d.ts +5 -0
- package/dist/router/virtual-router/stop-message-state-sync.js +6 -14
- package/dist/router/virtual-router/types.d.ts +53 -1
- package/dist/servertool/clock/config.d.ts +8 -0
- package/dist/servertool/clock/config.js +22 -0
- package/dist/servertool/clock/log.d.ts +3 -0
- package/dist/servertool/clock/log.js +13 -0
- package/dist/servertool/clock/task-store.d.ts +1 -1
- package/dist/servertool/clock/task-store.js +1 -1
- package/dist/servertool/clock/tasks.js +1 -1
- package/dist/servertool/engine.js +146 -21
- package/dist/servertool/handlers/clock-auto.js +11 -6
- package/dist/servertool/handlers/clock.js +36 -10
- package/dist/servertool/handlers/followup-request-builder.js +8 -2
- package/dist/servertool/handlers/gemini-empty-reply-continue.js +15 -9
- package/dist/servertool/handlers/iflow-model-error-retry.js +6 -4
- package/dist/servertool/handlers/recursive-detection-guard.js +4 -2
- package/dist/servertool/handlers/stop-message-auto.js +100 -10
- package/dist/servertool/handlers/vision.js +4 -1
- package/dist/servertool/handlers/web-search.js +3 -1
- package/dist/servertool/pending-session.d.ts +19 -0
- package/dist/servertool/pending-session.js +97 -0
- package/dist/servertool/reenter-backend.js +5 -3
- package/dist/servertool/server-side-tools.js +235 -6
- package/dist/servertool/types.d.ts +13 -0
- package/dist/sse/json-to-sse/event-generators/responses.js +1 -1
- package/dist/sse/shared/chat-serializer.js +2 -2
- package/dist/sse/shared/constants.js +1 -1
- package/dist/sse/sse-to-json/anthropic-sse-to-json-converter.d.ts +7 -1
- package/dist/sse/sse-to-json/builders/response-builder.js +16 -0
- package/dist/sse/sse-to-json/responses-sse-to-json-converter.d.ts +1 -1
- package/dist/tools/apply-patch/execution-capturer.js +1 -1
- package/dist/tools/exec-command/normalize.js +4 -0
- package/dist/tools/exec-command/regression-capturer.js +1 -1
- package/package.json +10 -5
|
@@ -1094,7 +1094,7 @@ function normalizeScopeList(value) {
|
|
|
1094
1094
|
}
|
|
1095
1095
|
if (typeof value === 'string' && value.trim()) {
|
|
1096
1096
|
const normalized = value
|
|
1097
|
-
.split(/[
|
|
1097
|
+
.split(/[,\s]+/)
|
|
1098
1098
|
.map((item) => item.trim())
|
|
1099
1099
|
.filter(Boolean);
|
|
1100
1100
|
return normalized.length ? normalized : undefined;
|
|
@@ -1146,9 +1146,6 @@ function normalizeLoadBalancing(input) {
|
|
|
1146
1146
|
return undefined;
|
|
1147
1147
|
const record = input;
|
|
1148
1148
|
const strategyRaw = typeof record.strategy === 'string' ? record.strategy.trim().toLowerCase() : '';
|
|
1149
|
-
if (!strategyRaw)
|
|
1150
|
-
return undefined;
|
|
1151
|
-
const strategy = strategyRaw === 'weighted' || strategyRaw === 'sticky' ? strategyRaw : 'round-robin';
|
|
1152
1149
|
const weightsRaw = asRecord(record.weights);
|
|
1153
1150
|
const weightsEntries = {};
|
|
1154
1151
|
for (const [key, value] of Object.entries(weightsRaw)) {
|
|
@@ -1177,12 +1174,75 @@ function normalizeLoadBalancing(input) {
|
|
|
1177
1174
|
: {})
|
|
1178
1175
|
}
|
|
1179
1176
|
: undefined;
|
|
1177
|
+
const contextWeightedRaw = asRecord(record.contextWeighted);
|
|
1178
|
+
const contextWeighted = Object.keys(contextWeightedRaw).length > 0
|
|
1179
|
+
? {
|
|
1180
|
+
...(typeof contextWeightedRaw.enabled === 'boolean' ? { enabled: contextWeightedRaw.enabled } : {}),
|
|
1181
|
+
...(typeof contextWeightedRaw.clientCapTokens === 'number' && Number.isFinite(contextWeightedRaw.clientCapTokens)
|
|
1182
|
+
? { clientCapTokens: contextWeightedRaw.clientCapTokens }
|
|
1183
|
+
: {}),
|
|
1184
|
+
...(typeof contextWeightedRaw.gamma === 'number' && Number.isFinite(contextWeightedRaw.gamma)
|
|
1185
|
+
? { gamma: contextWeightedRaw.gamma }
|
|
1186
|
+
: {}),
|
|
1187
|
+
...(typeof contextWeightedRaw.maxMultiplier === 'number' && Number.isFinite(contextWeightedRaw.maxMultiplier)
|
|
1188
|
+
? { maxMultiplier: contextWeightedRaw.maxMultiplier }
|
|
1189
|
+
: {})
|
|
1190
|
+
}
|
|
1191
|
+
: undefined;
|
|
1192
|
+
const aliasSelection = normalizeAliasSelection(record.aliasSelection);
|
|
1193
|
+
const hasNonStrategyConfig = Object.keys(weightsEntries).length > 0 ||
|
|
1194
|
+
Boolean(healthWeighted) ||
|
|
1195
|
+
Boolean(contextWeighted) ||
|
|
1196
|
+
Boolean(aliasSelection);
|
|
1197
|
+
if (!strategyRaw && !hasNonStrategyConfig) {
|
|
1198
|
+
return undefined;
|
|
1199
|
+
}
|
|
1200
|
+
const strategy = strategyRaw === 'weighted' || strategyRaw === 'sticky' ? strategyRaw : 'round-robin';
|
|
1180
1201
|
return {
|
|
1181
1202
|
strategy,
|
|
1182
1203
|
...(Object.keys(weightsEntries).length ? { weights: weightsEntries } : {}),
|
|
1183
|
-
...(
|
|
1204
|
+
...(aliasSelection ? { aliasSelection } : {}),
|
|
1205
|
+
...(healthWeighted ? { healthWeighted } : {}),
|
|
1206
|
+
...(contextWeighted ? { contextWeighted } : {})
|
|
1184
1207
|
};
|
|
1185
1208
|
}
|
|
1209
|
+
function normalizeAliasSelection(raw) {
|
|
1210
|
+
if (!raw || typeof raw !== 'object' || Array.isArray(raw)) {
|
|
1211
|
+
return undefined;
|
|
1212
|
+
}
|
|
1213
|
+
const record = raw;
|
|
1214
|
+
const enabled = typeof record.enabled === 'boolean' ? record.enabled : undefined;
|
|
1215
|
+
const defaultStrategy = coerceAliasSelectionStrategy(record.defaultStrategy);
|
|
1216
|
+
const providersRaw = asRecord(record.providers);
|
|
1217
|
+
const providers = {};
|
|
1218
|
+
for (const [providerId, value] of Object.entries(providersRaw)) {
|
|
1219
|
+
const strategy = coerceAliasSelectionStrategy(value);
|
|
1220
|
+
if (strategy) {
|
|
1221
|
+
providers[providerId] = strategy;
|
|
1222
|
+
}
|
|
1223
|
+
}
|
|
1224
|
+
const out = {
|
|
1225
|
+
...(enabled !== undefined ? { enabled } : {}),
|
|
1226
|
+
...(defaultStrategy ? { defaultStrategy } : {}),
|
|
1227
|
+
...(Object.keys(providers).length ? { providers } : {})
|
|
1228
|
+
};
|
|
1229
|
+
return Object.keys(out).length ? out : undefined;
|
|
1230
|
+
}
|
|
1231
|
+
function coerceAliasSelectionStrategy(value) {
|
|
1232
|
+
if (typeof value !== 'string') {
|
|
1233
|
+
return undefined;
|
|
1234
|
+
}
|
|
1235
|
+
const normalized = value.trim().toLowerCase();
|
|
1236
|
+
if (!normalized) {
|
|
1237
|
+
return undefined;
|
|
1238
|
+
}
|
|
1239
|
+
if (normalized === 'none')
|
|
1240
|
+
return 'none';
|
|
1241
|
+
if (normalized === 'sticky-queue' || normalized === 'sticky_queue' || normalized === 'stickyqueue') {
|
|
1242
|
+
return 'sticky-queue';
|
|
1243
|
+
}
|
|
1244
|
+
return undefined;
|
|
1245
|
+
}
|
|
1186
1246
|
function coerceRatio(value) {
|
|
1187
1247
|
if (typeof value === 'number' && Number.isFinite(value)) {
|
|
1188
1248
|
return value;
|
|
@@ -16,4 +16,8 @@ export declare class ContextAdvisor {
|
|
|
16
16
|
private hardLimit;
|
|
17
17
|
configure(config?: VirtualRouterContextRoutingConfig | null): void;
|
|
18
18
|
classify(pool: string[], estimatedTokens: number, resolveProfile: (key: string) => ProviderProfile): ContextAdvisorResult;
|
|
19
|
+
getConfig(): {
|
|
20
|
+
warnRatio: number;
|
|
21
|
+
hardLimit: boolean;
|
|
22
|
+
};
|
|
19
23
|
}
|
|
@@ -55,6 +55,9 @@ export class ContextAdvisor {
|
|
|
55
55
|
allOverflow: safe.length === 0 && risky.length === 0 && overflow.length > 0
|
|
56
56
|
};
|
|
57
57
|
}
|
|
58
|
+
getConfig() {
|
|
59
|
+
return { warnRatio: this.warnRatio, hardLimit: this.hardLimit };
|
|
60
|
+
}
|
|
58
61
|
}
|
|
59
62
|
function clampWarnRatio(value) {
|
|
60
63
|
if (!Number.isFinite(value)) {
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import type { ContextWeightedLoadBalancingConfig } from './types.js';
|
|
2
|
+
export type ResolvedContextWeightedConfig = Required<{
|
|
3
|
+
enabled: boolean;
|
|
4
|
+
clientCapTokens: number;
|
|
5
|
+
gamma: number;
|
|
6
|
+
maxMultiplier: number;
|
|
7
|
+
}>;
|
|
8
|
+
/**
|
|
9
|
+
* Context-weighted constant table (defaults).
|
|
10
|
+
*
|
|
11
|
+
* Intended behavior:
|
|
12
|
+
* - Prefer smaller effective safe context windows early, so that larger windows remain available later.
|
|
13
|
+
* - Compensation is proportional by default (`gamma=1`), but capped by `maxMultiplier`.
|
|
14
|
+
*
|
|
15
|
+
* Notes:
|
|
16
|
+
* - `clientCapTokens` is the maximum effective context the client can consume, even if the model supports more.
|
|
17
|
+
* - The effective safe window is computed using ContextAdvisor's `warnRatio` and model "slack" above the client cap.
|
|
18
|
+
* - If a model has slack >= the reserved margin, it effectively gets the full client cap as safe window.
|
|
19
|
+
*/
|
|
20
|
+
export declare const DEFAULT_CONTEXT_WEIGHTED_CONFIG: ResolvedContextWeightedConfig;
|
|
21
|
+
export declare function resolveContextWeightedConfig(raw?: ContextWeightedLoadBalancingConfig | null): ResolvedContextWeightedConfig;
|
|
22
|
+
export declare function computeEffectiveSafeWindowTokens(options: {
|
|
23
|
+
modelMaxTokens: number;
|
|
24
|
+
warnRatio: number;
|
|
25
|
+
clientCapTokens: number;
|
|
26
|
+
}): number;
|
|
27
|
+
export declare function computeContextMultiplier(options: {
|
|
28
|
+
effectiveSafeRefTokens: number;
|
|
29
|
+
effectiveSafeTokens: number;
|
|
30
|
+
cfg: ResolvedContextWeightedConfig;
|
|
31
|
+
}): number;
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context-weighted constant table (defaults).
|
|
3
|
+
*
|
|
4
|
+
* Intended behavior:
|
|
5
|
+
* - Prefer smaller effective safe context windows early, so that larger windows remain available later.
|
|
6
|
+
* - Compensation is proportional by default (`gamma=1`), but capped by `maxMultiplier`.
|
|
7
|
+
*
|
|
8
|
+
* Notes:
|
|
9
|
+
* - `clientCapTokens` is the maximum effective context the client can consume, even if the model supports more.
|
|
10
|
+
* - The effective safe window is computed using ContextAdvisor's `warnRatio` and model "slack" above the client cap.
|
|
11
|
+
* - If a model has slack >= the reserved margin, it effectively gets the full client cap as safe window.
|
|
12
|
+
*/
|
|
13
|
+
export const DEFAULT_CONTEXT_WEIGHTED_CONFIG = {
|
|
14
|
+
enabled: false,
|
|
15
|
+
clientCapTokens: 200_000,
|
|
16
|
+
gamma: 1,
|
|
17
|
+
maxMultiplier: 2
|
|
18
|
+
};
|
|
19
|
+
export function resolveContextWeightedConfig(raw) {
|
|
20
|
+
const enabled = raw?.enabled ?? DEFAULT_CONTEXT_WEIGHTED_CONFIG.enabled;
|
|
21
|
+
const clientCapTokens = typeof raw?.clientCapTokens === 'number' && Number.isFinite(raw.clientCapTokens) && raw.clientCapTokens > 0
|
|
22
|
+
? Math.floor(raw.clientCapTokens)
|
|
23
|
+
: DEFAULT_CONTEXT_WEIGHTED_CONFIG.clientCapTokens;
|
|
24
|
+
const gamma = typeof raw?.gamma === 'number' && Number.isFinite(raw.gamma) && raw.gamma > 0
|
|
25
|
+
? raw.gamma
|
|
26
|
+
: DEFAULT_CONTEXT_WEIGHTED_CONFIG.gamma;
|
|
27
|
+
const maxMultiplier = typeof raw?.maxMultiplier === 'number' && Number.isFinite(raw.maxMultiplier) && raw.maxMultiplier >= 1
|
|
28
|
+
? raw.maxMultiplier
|
|
29
|
+
: DEFAULT_CONTEXT_WEIGHTED_CONFIG.maxMultiplier;
|
|
30
|
+
return { enabled, clientCapTokens, gamma, maxMultiplier };
|
|
31
|
+
}
|
|
32
|
+
export function computeEffectiveSafeWindowTokens(options) {
|
|
33
|
+
const modelMaxTokens = typeof options.modelMaxTokens === 'number' && Number.isFinite(options.modelMaxTokens) && options.modelMaxTokens > 0
|
|
34
|
+
? Math.floor(options.modelMaxTokens)
|
|
35
|
+
: 1;
|
|
36
|
+
const clientCapTokens = typeof options.clientCapTokens === 'number' && Number.isFinite(options.clientCapTokens) && options.clientCapTokens > 0
|
|
37
|
+
? Math.floor(options.clientCapTokens)
|
|
38
|
+
: DEFAULT_CONTEXT_WEIGHTED_CONFIG.clientCapTokens;
|
|
39
|
+
const warnRatio = typeof options.warnRatio === 'number' && Number.isFinite(options.warnRatio) && options.warnRatio > 0 && options.warnRatio < 1
|
|
40
|
+
? options.warnRatio
|
|
41
|
+
: 0.9;
|
|
42
|
+
const effectiveMax = Math.min(modelMaxTokens, clientCapTokens);
|
|
43
|
+
const reserve = Math.ceil(effectiveMax * (1 - warnRatio));
|
|
44
|
+
const slack = Math.max(0, modelMaxTokens - clientCapTokens);
|
|
45
|
+
const reserveEff = Math.max(0, reserve - slack);
|
|
46
|
+
return Math.max(1, effectiveMax - reserveEff);
|
|
47
|
+
}
|
|
48
|
+
export function computeContextMultiplier(options) {
|
|
49
|
+
const ref = Math.max(1, Math.floor(options.effectiveSafeRefTokens));
|
|
50
|
+
const cur = Math.max(1, Math.floor(options.effectiveSafeTokens));
|
|
51
|
+
const ratio = ref / cur;
|
|
52
|
+
const raw = Math.pow(Math.max(1, ratio), options.cfg.gamma);
|
|
53
|
+
return Math.min(options.cfg.maxMultiplier, raw);
|
|
54
|
+
}
|
|
@@ -7,7 +7,6 @@ type DebugLike = {
|
|
|
7
7
|
export declare function resetRateLimitBackoffForProvider(providerKey: string): void;
|
|
8
8
|
export declare function handleProviderFailureImpl(event: ProviderFailureEvent, healthManager: ProviderHealthManager, healthConfig: Required<ProviderHealthConfig>, markProviderCooldown: (providerKey: string, cooldownMs: number | undefined) => void): void;
|
|
9
9
|
export declare function mapProviderErrorImpl(event: ProviderErrorEvent, healthConfig: Required<ProviderHealthConfig>): ProviderFailureEvent | null;
|
|
10
|
-
export declare function applySeriesCooldownImpl(event: ProviderErrorEvent, providerRegistry: ProviderRegistry, healthManager: ProviderHealthManager, markProviderCooldown: (providerKey: string, cooldownMs: number | undefined) => void, debug?: DebugLike): void;
|
|
11
10
|
/**
|
|
12
11
|
* 处理来自 Host 侧的配额恢复事件:
|
|
13
12
|
* - 清除指定 providerKey 在健康管理器中的熔断/冷却状态;
|
|
@@ -18,5 +17,6 @@ export declare function applySeriesCooldownImpl(event: ProviderErrorEvent, provi
|
|
|
18
17
|
*/
|
|
19
18
|
export declare function applyQuotaRecoveryImpl(event: ProviderErrorEvent, healthManager: ProviderHealthManager, clearProviderCooldown: (providerKey: string) => void, debug?: DebugLike): boolean;
|
|
20
19
|
export declare function applyQuotaDepletedImpl(event: ProviderErrorEvent, healthManager: ProviderHealthManager, markProviderCooldown: (providerKey: string, cooldownMs: number | undefined) => void, debug?: DebugLike): boolean;
|
|
20
|
+
export declare function applySeriesCooldownImpl(event: ProviderErrorEvent, _providerRegistry: ProviderRegistry, _healthManager: ProviderHealthManager, _markProviderCooldown: (providerKey: string, cooldownMs: number | undefined) => void, debug?: DebugLike): boolean;
|
|
21
21
|
export declare function deriveReason(code: string, stage: string, statusCode?: number): string;
|
|
22
22
|
export {};
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
const SERIES_COOLDOWN_DETAIL_KEY = 'virtualRouterSeriesCooldown';
|
|
2
1
|
const QUOTA_RECOVERY_DETAIL_KEY = 'virtualRouterQuotaRecovery';
|
|
3
2
|
const QUOTA_DEPLETED_DETAIL_KEY = 'virtualRouterQuotaDepleted';
|
|
4
3
|
function parseDurationToMs(value) {
|
|
@@ -196,46 +195,6 @@ export function mapProviderErrorImpl(event, healthConfig) {
|
|
|
196
195
|
}
|
|
197
196
|
};
|
|
198
197
|
}
|
|
199
|
-
export function applySeriesCooldownImpl(event, providerRegistry, healthManager, markProviderCooldown, debug) {
|
|
200
|
-
const seriesDetail = extractSeriesCooldownDetail(event);
|
|
201
|
-
if (!seriesDetail) {
|
|
202
|
-
return;
|
|
203
|
-
}
|
|
204
|
-
const targetKeys = resolveSeriesCooldownTargets(seriesDetail, event, providerRegistry);
|
|
205
|
-
if (targetKeys.length === 0) {
|
|
206
|
-
debug?.log?.('[virtual-router] series cooldown skipped: no targets', {
|
|
207
|
-
providerId: seriesDetail.providerId,
|
|
208
|
-
providerKey: seriesDetail.providerKey,
|
|
209
|
-
series: seriesDetail.series
|
|
210
|
-
});
|
|
211
|
-
return;
|
|
212
|
-
}
|
|
213
|
-
const affected = [];
|
|
214
|
-
for (const providerKey of targetKeys) {
|
|
215
|
-
try {
|
|
216
|
-
const profile = providerRegistry.get(providerKey);
|
|
217
|
-
const modelSeries = resolveModelSeries(profile.modelId);
|
|
218
|
-
if (modelSeries !== seriesDetail.series) {
|
|
219
|
-
continue;
|
|
220
|
-
}
|
|
221
|
-
healthManager.tripProvider(providerKey, 'rate_limit', seriesDetail.cooldownMs);
|
|
222
|
-
markProviderCooldown(providerKey, seriesDetail.cooldownMs);
|
|
223
|
-
affected.push(providerKey);
|
|
224
|
-
}
|
|
225
|
-
catch {
|
|
226
|
-
// ignore lookup failures; invalid keys may show up if config drifted
|
|
227
|
-
}
|
|
228
|
-
}
|
|
229
|
-
if (affected.length) {
|
|
230
|
-
debug?.log?.('[virtual-router] series cooldown', {
|
|
231
|
-
providerId: seriesDetail.providerId,
|
|
232
|
-
providerKey: seriesDetail.providerKey,
|
|
233
|
-
series: seriesDetail.series,
|
|
234
|
-
cooldownMs: seriesDetail.cooldownMs,
|
|
235
|
-
affected
|
|
236
|
-
});
|
|
237
|
-
}
|
|
238
|
-
}
|
|
239
198
|
function extractQuotaRecoveryDetail(event) {
|
|
240
199
|
if (!event || !event.details || typeof event.details !== 'object') {
|
|
241
200
|
return null;
|
|
@@ -326,63 +285,21 @@ export function applyQuotaDepletedImpl(event, healthManager, markProviderCooldow
|
|
|
326
285
|
}
|
|
327
286
|
return true;
|
|
328
287
|
}
|
|
329
|
-
function
|
|
330
|
-
const candidates = new Set();
|
|
331
|
-
const push = (key) => {
|
|
332
|
-
if (typeof key !== 'string') {
|
|
333
|
-
return;
|
|
334
|
-
}
|
|
335
|
-
const trimmed = key.trim();
|
|
336
|
-
if (!trimmed) {
|
|
337
|
-
return;
|
|
338
|
-
}
|
|
339
|
-
if (providerRegistry.has(trimmed)) {
|
|
340
|
-
candidates.add(trimmed);
|
|
341
|
-
}
|
|
342
|
-
};
|
|
343
|
-
push(detail.providerKey);
|
|
344
|
-
const runtimeKey = (event.runtime?.target && typeof event.runtime.target === 'object'
|
|
345
|
-
? event.runtime.target.providerKey
|
|
346
|
-
: undefined) || event.runtime?.providerKey;
|
|
347
|
-
push(runtimeKey);
|
|
348
|
-
return Array.from(candidates);
|
|
349
|
-
}
|
|
350
|
-
function extractSeriesCooldownDetail(event) {
|
|
288
|
+
export function applySeriesCooldownImpl(event, _providerRegistry, _healthManager, _markProviderCooldown, debug) {
|
|
351
289
|
if (!event || !event.details || typeof event.details !== 'object') {
|
|
352
|
-
return
|
|
290
|
+
return false;
|
|
353
291
|
}
|
|
354
|
-
const raw = event.details
|
|
292
|
+
const raw = event.details.virtualRouterSeriesCooldown;
|
|
355
293
|
if (!raw || typeof raw !== 'object') {
|
|
356
|
-
return
|
|
357
|
-
}
|
|
358
|
-
const record = raw;
|
|
359
|
-
const providerIdRaw = record.providerId;
|
|
360
|
-
const seriesRaw = record.series;
|
|
361
|
-
const providerKeyRaw = record.providerKey;
|
|
362
|
-
const cooldownRaw = record.cooldownMs;
|
|
363
|
-
if (typeof providerIdRaw !== 'string' || !providerIdRaw.trim()) {
|
|
364
|
-
return null;
|
|
365
|
-
}
|
|
366
|
-
const normalizedSeries = typeof seriesRaw === 'string' ? seriesRaw.trim().toLowerCase() : '';
|
|
367
|
-
if (normalizedSeries !== 'gemini-pro' && normalizedSeries !== 'gemini-flash' && normalizedSeries !== 'claude') {
|
|
368
|
-
return null;
|
|
369
|
-
}
|
|
370
|
-
const cooldownMs = typeof cooldownRaw === 'number'
|
|
371
|
-
? cooldownRaw
|
|
372
|
-
: typeof cooldownRaw === 'string'
|
|
373
|
-
? Number.parseFloat(cooldownRaw)
|
|
374
|
-
: Number.NaN;
|
|
375
|
-
if (!Number.isFinite(cooldownMs) || cooldownMs <= 0) {
|
|
376
|
-
return null;
|
|
294
|
+
return false;
|
|
377
295
|
}
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
};
|
|
296
|
+
const detail = raw;
|
|
297
|
+
debug?.log?.('[virtual-router] series cooldown ignored', {
|
|
298
|
+
providerId: detail.providerId,
|
|
299
|
+
series: detail.series,
|
|
300
|
+
cooldownMs: detail.cooldownMs
|
|
301
|
+
});
|
|
302
|
+
return true;
|
|
386
303
|
}
|
|
387
304
|
export function deriveReason(code, stage, statusCode) {
|
|
388
305
|
if (code.includes('RATE') || code.includes('429'))
|
|
@@ -401,19 +318,3 @@ export function deriveReason(code, stage, statusCode) {
|
|
|
401
318
|
return 'client_error';
|
|
402
319
|
return 'unknown';
|
|
403
320
|
}
|
|
404
|
-
function resolveModelSeries(modelId) {
|
|
405
|
-
if (!modelId) {
|
|
406
|
-
return 'default';
|
|
407
|
-
}
|
|
408
|
-
const lower = modelId.toLowerCase();
|
|
409
|
-
if (lower.includes('claude') || lower.includes('opus')) {
|
|
410
|
-
return 'claude';
|
|
411
|
-
}
|
|
412
|
-
if (lower.includes('flash')) {
|
|
413
|
-
return 'gemini-flash';
|
|
414
|
-
}
|
|
415
|
-
if (lower.includes('gemini') || lower.includes('pro')) {
|
|
416
|
-
return 'gemini-pro';
|
|
417
|
-
}
|
|
418
|
-
return 'default';
|
|
419
|
-
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { AliasSelectionConfig, AliasSelectionStrategy } from '../types.js';
|
|
2
|
+
export type AliasQueueStore = Map<string, string[]>;
|
|
3
|
+
export declare const DEFAULT_PROVIDER_ALIAS_SELECTION: Record<string, AliasSelectionStrategy>;
|
|
4
|
+
export declare function resolveAliasSelectionStrategy(providerId: string, cfg: AliasSelectionConfig | undefined): AliasSelectionStrategy;
|
|
5
|
+
export declare function pinCandidatesByAliasQueue(opts: {
|
|
6
|
+
queueStore: AliasQueueStore | undefined;
|
|
7
|
+
providerId: string;
|
|
8
|
+
modelId: string;
|
|
9
|
+
candidates: string[];
|
|
10
|
+
orderedTargets: string[];
|
|
11
|
+
excludedProviderKeys: Set<string>;
|
|
12
|
+
aliasOfKey: (providerKey: string) => string | null;
|
|
13
|
+
modelIdOfKey: (providerKey: string) => string | null;
|
|
14
|
+
availabilityCheck: (providerKey: string) => boolean;
|
|
15
|
+
}): string[] | null;
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
// Default provider-level strategy table.
|
|
2
|
+
// This is a data-only default; callers can override via `loadBalancing.aliasSelection.providers`.
|
|
3
|
+
export const DEFAULT_PROVIDER_ALIAS_SELECTION = {
|
|
4
|
+
// Antigravity: upstream gateway may reject rapid cross-key switching; stick to one alias until error.
|
|
5
|
+
antigravity: 'sticky-queue'
|
|
6
|
+
};
|
|
7
|
+
export function resolveAliasSelectionStrategy(providerId, cfg) {
|
|
8
|
+
if (!providerId)
|
|
9
|
+
return 'none';
|
|
10
|
+
if (cfg?.enabled === false)
|
|
11
|
+
return 'none';
|
|
12
|
+
const overrides = cfg?.providers ?? {};
|
|
13
|
+
const override = overrides[providerId];
|
|
14
|
+
if (override === 'none' || override === 'sticky-queue') {
|
|
15
|
+
return override;
|
|
16
|
+
}
|
|
17
|
+
const def = cfg?.defaultStrategy;
|
|
18
|
+
if (def === 'none' || def === 'sticky-queue') {
|
|
19
|
+
return def;
|
|
20
|
+
}
|
|
21
|
+
const table = DEFAULT_PROVIDER_ALIAS_SELECTION[providerId];
|
|
22
|
+
return table ?? 'none';
|
|
23
|
+
}
|
|
24
|
+
export function pinCandidatesByAliasQueue(opts) {
|
|
25
|
+
const { queueStore, providerId, modelId, candidates, orderedTargets, excludedProviderKeys, aliasOfKey, modelIdOfKey, availabilityCheck } = opts;
|
|
26
|
+
if (!queueStore)
|
|
27
|
+
return null;
|
|
28
|
+
if (!providerId || !modelId)
|
|
29
|
+
return null;
|
|
30
|
+
if (!Array.isArray(candidates) || candidates.length < 2)
|
|
31
|
+
return null;
|
|
32
|
+
const aliasBuckets = new Map();
|
|
33
|
+
for (const key of candidates) {
|
|
34
|
+
if (!key || typeof key !== 'string')
|
|
35
|
+
continue;
|
|
36
|
+
if (!key.startsWith(`${providerId}.`))
|
|
37
|
+
return null;
|
|
38
|
+
const m = modelIdOfKey(key);
|
|
39
|
+
if (!m || m !== modelId)
|
|
40
|
+
return null;
|
|
41
|
+
const alias = aliasOfKey(key);
|
|
42
|
+
if (!alias)
|
|
43
|
+
return null;
|
|
44
|
+
const list = aliasBuckets.get(alias) ?? [];
|
|
45
|
+
list.push(key);
|
|
46
|
+
aliasBuckets.set(alias, list);
|
|
47
|
+
}
|
|
48
|
+
if (aliasBuckets.size <= 1)
|
|
49
|
+
return null;
|
|
50
|
+
const queueKey = `${providerId}::${modelId}`;
|
|
51
|
+
const desiredOrder = resolveAliasOrderFromTargets({
|
|
52
|
+
orderedTargets,
|
|
53
|
+
providerId,
|
|
54
|
+
modelId,
|
|
55
|
+
aliasOfKey,
|
|
56
|
+
modelIdOfKey,
|
|
57
|
+
allowedAliases: new Set(aliasBuckets.keys())
|
|
58
|
+
});
|
|
59
|
+
let queue = mergeAliasQueue(queueStore.get(queueKey) ?? [], desiredOrder);
|
|
60
|
+
// If this is a retry attempt and the previous alias was excluded, rotate it to the tail.
|
|
61
|
+
if (excludedProviderKeys && excludedProviderKeys.size) {
|
|
62
|
+
const excludedAliases = [];
|
|
63
|
+
for (const ex of excludedProviderKeys) {
|
|
64
|
+
if (!ex || typeof ex !== 'string')
|
|
65
|
+
continue;
|
|
66
|
+
if (!ex.startsWith(`${providerId}.`))
|
|
67
|
+
continue;
|
|
68
|
+
const exModel = modelIdOfKey(ex);
|
|
69
|
+
if (!exModel || exModel !== modelId)
|
|
70
|
+
continue;
|
|
71
|
+
const exAlias = aliasOfKey(ex);
|
|
72
|
+
if (exAlias)
|
|
73
|
+
excludedAliases.push(exAlias);
|
|
74
|
+
}
|
|
75
|
+
if (excludedAliases.length) {
|
|
76
|
+
queue = rotateQueueToTail(queue, excludedAliases);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
// Ensure the head alias points to an available candidate; otherwise rotate until we find one.
|
|
80
|
+
if (queue.length) {
|
|
81
|
+
for (let i = 0; i < queue.length; i += 1) {
|
|
82
|
+
const head = queue[0];
|
|
83
|
+
const keys = aliasBuckets.get(head) ?? [];
|
|
84
|
+
const hasAvailable = keys.some((key) => availabilityCheck(key));
|
|
85
|
+
if (hasAvailable) {
|
|
86
|
+
break;
|
|
87
|
+
}
|
|
88
|
+
queue = rotateQueueToTail(queue, [head]);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
// Persist queue updates (even if unchanged, ensure first-time init is stored).
|
|
92
|
+
queueStore.set(queueKey, queue);
|
|
93
|
+
const selectedAlias = queue[0];
|
|
94
|
+
if (!selectedAlias)
|
|
95
|
+
return null;
|
|
96
|
+
const selectedKeys = aliasBuckets.get(selectedAlias) ?? [];
|
|
97
|
+
if (!selectedKeys.length)
|
|
98
|
+
return null;
|
|
99
|
+
// Preserve original candidate order.
|
|
100
|
+
const selectedSet = new Set(selectedKeys);
|
|
101
|
+
return candidates.filter((key) => selectedSet.has(key));
|
|
102
|
+
}
|
|
103
|
+
function resolveAliasOrderFromTargets(opts) {
|
|
104
|
+
const { orderedTargets, providerId, modelId, aliasOfKey, modelIdOfKey, allowedAliases } = opts;
|
|
105
|
+
if (!Array.isArray(orderedTargets) || orderedTargets.length === 0) {
|
|
106
|
+
return Array.from(allowedAliases);
|
|
107
|
+
}
|
|
108
|
+
const out = [];
|
|
109
|
+
const seen = new Set();
|
|
110
|
+
for (const key of orderedTargets) {
|
|
111
|
+
if (!key || typeof key !== 'string')
|
|
112
|
+
continue;
|
|
113
|
+
if (!key.startsWith(`${providerId}.`))
|
|
114
|
+
continue;
|
|
115
|
+
const m = modelIdOfKey(key);
|
|
116
|
+
if (!m || m !== modelId)
|
|
117
|
+
continue;
|
|
118
|
+
const alias = aliasOfKey(key);
|
|
119
|
+
if (!alias || !allowedAliases.has(alias) || seen.has(alias))
|
|
120
|
+
continue;
|
|
121
|
+
seen.add(alias);
|
|
122
|
+
out.push(alias);
|
|
123
|
+
}
|
|
124
|
+
for (const alias of Array.from(allowedAliases)) {
|
|
125
|
+
if (!seen.has(alias))
|
|
126
|
+
out.push(alias);
|
|
127
|
+
}
|
|
128
|
+
return out;
|
|
129
|
+
}
|
|
130
|
+
function mergeAliasQueue(existing, desired) {
|
|
131
|
+
if (!Array.isArray(existing) || existing.length === 0) {
|
|
132
|
+
return [...desired];
|
|
133
|
+
}
|
|
134
|
+
const desiredSet = new Set(desired);
|
|
135
|
+
const merged = existing.filter((a) => desiredSet.has(a));
|
|
136
|
+
const mergedSet = new Set(merged);
|
|
137
|
+
for (const a of desired) {
|
|
138
|
+
if (!mergedSet.has(a))
|
|
139
|
+
merged.push(a);
|
|
140
|
+
}
|
|
141
|
+
return merged;
|
|
142
|
+
}
|
|
143
|
+
function rotateQueueToTail(queue, aliases) {
|
|
144
|
+
if (!Array.isArray(queue) || queue.length < 2)
|
|
145
|
+
return queue;
|
|
146
|
+
if (!Array.isArray(aliases) || aliases.length === 0)
|
|
147
|
+
return queue;
|
|
148
|
+
const toMove = new Set(aliases);
|
|
149
|
+
const kept = queue.filter((a) => !toMove.has(a));
|
|
150
|
+
const moved = [];
|
|
151
|
+
for (const a of queue) {
|
|
152
|
+
if (toMove.has(a) && !moved.includes(a))
|
|
153
|
+
moved.push(a);
|
|
154
|
+
}
|
|
155
|
+
return [...kept, ...moved];
|
|
156
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { type ResolvedContextWeightedConfig } from '../context-weighted.js';
|
|
2
|
+
import type { ContextAdvisorResult } from '../context-advisor.js';
|
|
3
|
+
export declare function computeContextWeightMultipliers(opts: {
|
|
4
|
+
candidates: string[];
|
|
5
|
+
usage: ContextAdvisorResult['usage'] | undefined;
|
|
6
|
+
warnRatio: number;
|
|
7
|
+
cfg: ResolvedContextWeightedConfig;
|
|
8
|
+
}): {
|
|
9
|
+
ref: number;
|
|
10
|
+
eff: Record<string, number>;
|
|
11
|
+
} | null;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { computeEffectiveSafeWindowTokens } from '../context-weighted.js';
|
|
2
|
+
export function computeContextWeightMultipliers(opts) {
|
|
3
|
+
const { candidates, usage, warnRatio, cfg } = opts;
|
|
4
|
+
if (!cfg.enabled) {
|
|
5
|
+
return null;
|
|
6
|
+
}
|
|
7
|
+
const eff = {};
|
|
8
|
+
let ref = 1;
|
|
9
|
+
for (const key of candidates) {
|
|
10
|
+
const entry = usage?.[key];
|
|
11
|
+
const limit = entry && typeof entry.limit === 'number' && Number.isFinite(entry.limit) ? Math.floor(entry.limit) : 0;
|
|
12
|
+
const safeEff = computeEffectiveSafeWindowTokens({
|
|
13
|
+
modelMaxTokens: Math.max(1, limit),
|
|
14
|
+
warnRatio,
|
|
15
|
+
clientCapTokens: cfg.clientCapTokens
|
|
16
|
+
});
|
|
17
|
+
eff[key] = safeEff;
|
|
18
|
+
if (safeEff > ref) {
|
|
19
|
+
ref = safeEff;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
return { ref, eff };
|
|
23
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { RouterMetadataInput, RoutingFeatures } from '../types.js';
|
|
2
|
+
import type { RoutingInstructionState } from '../routing-instructions.js';
|
|
3
|
+
import type { SelectionDeps } from './selection-deps.js';
|
|
4
|
+
export declare function selectDirectProviderModel(providerId: string, modelId: string, metadata: RouterMetadataInput, features: RoutingFeatures, activeState: RoutingInstructionState, deps: SelectionDeps): {
|
|
5
|
+
providerKey: string;
|
|
6
|
+
routeUsed: string;
|
|
7
|
+
pool: string[];
|
|
8
|
+
poolId?: string;
|
|
9
|
+
} | null;
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { trySelectFromTier } from './tier-selection.js';
|
|
2
|
+
export function selectDirectProviderModel(providerId, modelId, metadata, features, activeState, deps) {
|
|
3
|
+
const normalizedProvider = typeof providerId === 'string' ? providerId.trim() : '';
|
|
4
|
+
const normalizedModel = typeof modelId === 'string' ? modelId.trim() : '';
|
|
5
|
+
if (!normalizedProvider || !normalizedModel) {
|
|
6
|
+
return null;
|
|
7
|
+
}
|
|
8
|
+
const providerKeys = deps.providerRegistry.listProviderKeys(normalizedProvider);
|
|
9
|
+
if (providerKeys.length === 0) {
|
|
10
|
+
return null;
|
|
11
|
+
}
|
|
12
|
+
const matchingKeys = providerKeys.filter((key) => {
|
|
13
|
+
try {
|
|
14
|
+
const profile = deps.providerRegistry.get(key);
|
|
15
|
+
return profile?.modelId === normalizedModel;
|
|
16
|
+
}
|
|
17
|
+
catch {
|
|
18
|
+
return false;
|
|
19
|
+
}
|
|
20
|
+
});
|
|
21
|
+
if (matchingKeys.length === 0) {
|
|
22
|
+
return null;
|
|
23
|
+
}
|
|
24
|
+
const attempted = [];
|
|
25
|
+
const estimatedTokens = typeof features.estimatedTokens === 'number' && Number.isFinite(features.estimatedTokens)
|
|
26
|
+
? Math.max(0, features.estimatedTokens)
|
|
27
|
+
: 0;
|
|
28
|
+
const tier = {
|
|
29
|
+
id: `direct:${normalizedProvider}.${normalizedModel}`,
|
|
30
|
+
targets: matchingKeys,
|
|
31
|
+
priority: 100,
|
|
32
|
+
mode: 'round-robin',
|
|
33
|
+
backup: false
|
|
34
|
+
};
|
|
35
|
+
const { providerKey, poolTargets, tierId, failureHint } = trySelectFromTier('direct', tier, undefined, estimatedTokens, features, deps, {
|
|
36
|
+
disabledProviders: new Set(activeState.disabledProviders),
|
|
37
|
+
disabledKeysMap: new Map(activeState.disabledKeys),
|
|
38
|
+
allowedProviders: new Set(activeState.allowedProviders),
|
|
39
|
+
disabledModels: new Map(activeState.disabledModels),
|
|
40
|
+
allowAliasRotation: true
|
|
41
|
+
});
|
|
42
|
+
if (providerKey) {
|
|
43
|
+
return { providerKey, routeUsed: 'direct', pool: poolTargets, poolId: tierId };
|
|
44
|
+
}
|
|
45
|
+
if (failureHint) {
|
|
46
|
+
attempted.push(failureHint);
|
|
47
|
+
}
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { RoutingInstructionState } from '../routing-instructions.js';
|
|
2
|
+
import type { ProviderRegistry } from '../provider-registry.js';
|
|
3
|
+
export declare function resolveInstructionTarget(target: NonNullable<RoutingInstructionState['forcedTarget']>, providerRegistry: ProviderRegistry): {
|
|
4
|
+
mode: 'exact' | 'filter';
|
|
5
|
+
keys: string[];
|
|
6
|
+
} | null;
|