@jsonstudio/llms 0.6.147 → 0.6.198
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/conversion/codecs/gemini-openai-codec.js +15 -1
- package/dist/conversion/compat/actions/auto-thinking.d.ts +6 -0
- package/dist/conversion/compat/actions/auto-thinking.js +25 -0
- package/dist/conversion/compat/actions/field-mapping.d.ts +14 -0
- package/dist/conversion/compat/actions/field-mapping.js +155 -0
- package/dist/conversion/compat/actions/qwen-transform.d.ts +3 -0
- package/dist/conversion/compat/actions/qwen-transform.js +209 -0
- package/dist/conversion/compat/actions/request-rules.d.ts +24 -0
- package/dist/conversion/compat/actions/request-rules.js +63 -0
- package/dist/conversion/compat/actions/response-blacklist.d.ts +14 -0
- package/dist/conversion/compat/actions/response-blacklist.js +85 -0
- package/dist/conversion/compat/actions/response-normalize.d.ts +5 -0
- package/dist/conversion/compat/actions/response-normalize.js +121 -0
- package/dist/conversion/compat/actions/response-validate.d.ts +5 -0
- package/dist/conversion/compat/actions/response-validate.js +76 -0
- package/dist/conversion/compat/actions/snapshot.d.ts +8 -0
- package/dist/conversion/compat/actions/snapshot.js +21 -0
- package/dist/conversion/compat/actions/tool-schema.d.ts +6 -0
- package/dist/conversion/compat/actions/tool-schema.js +91 -0
- package/dist/conversion/compat/actions/universal-shape-filter.d.ts +74 -0
- package/dist/conversion/compat/actions/universal-shape-filter.js +382 -0
- package/dist/conversion/compat/profiles/chat-glm.json +187 -13
- package/dist/conversion/compat/profiles/chat-iflow.json +194 -26
- package/dist/conversion/compat/profiles/chat-lmstudio.json +43 -35
- package/dist/conversion/compat/profiles/chat-qwen.json +20 -16
- package/dist/conversion/compat/profiles/responses-c4m.json +42 -42
- package/dist/conversion/hub/pipeline/compat/compat-engine.d.ts +7 -2
- package/dist/conversion/hub/pipeline/compat/compat-engine.js +429 -5
- package/dist/conversion/hub/pipeline/compat/compat-types.d.ts +47 -0
- package/dist/conversion/hub/pipeline/hub-pipeline.d.ts +2 -0
- package/dist/conversion/hub/pipeline/hub-pipeline.js +35 -1
- package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage3_compat/index.js +2 -2
- package/dist/conversion/hub/pipeline/target-utils.js +3 -0
- package/dist/conversion/hub/response/response-runtime.js +23 -15
- package/dist/conversion/responses/responses-host-policy.d.ts +6 -0
- package/dist/conversion/responses/responses-host-policy.js +14 -0
- package/dist/conversion/responses/responses-openai-bridge.js +51 -2
- package/dist/conversion/shared/anthropic-message-utils.js +6 -0
- package/dist/conversion/shared/bridge-actions.js +1 -1
- package/dist/conversion/shared/bridge-policies.js +0 -1
- package/dist/conversion/shared/responses-conversation-store.js +3 -26
- package/dist/conversion/shared/responses-reasoning-registry.d.ts +4 -0
- package/dist/conversion/shared/responses-reasoning-registry.js +62 -1
- package/dist/conversion/shared/responses-response-utils.js +23 -1
- package/dist/conversion/shared/tool-canonicalizer.d.ts +2 -0
- package/dist/conversion/shared/tool-filter-pipeline.js +11 -0
- package/dist/router/virtual-router/bootstrap.js +239 -39
- package/dist/router/virtual-router/classifier.js +19 -51
- package/dist/router/virtual-router/context-advisor.d.ts +21 -0
- package/dist/router/virtual-router/context-advisor.js +76 -0
- package/dist/router/virtual-router/engine.d.ts +11 -27
- package/dist/router/virtual-router/engine.js +191 -396
- package/dist/router/virtual-router/features.js +24 -607
- package/dist/router/virtual-router/health-manager.js +2 -7
- package/dist/router/virtual-router/message-utils.d.ts +7 -0
- package/dist/router/virtual-router/message-utils.js +66 -0
- package/dist/router/virtual-router/provider-registry.js +6 -2
- package/dist/router/virtual-router/token-estimator.d.ts +2 -0
- package/dist/router/virtual-router/token-estimator.js +16 -0
- package/dist/router/virtual-router/token-file-scanner.d.ts +15 -0
- package/dist/router/virtual-router/token-file-scanner.js +56 -0
- package/dist/router/virtual-router/tool-signals.d.ts +13 -0
- package/dist/router/virtual-router/tool-signals.js +403 -0
- package/dist/router/virtual-router/types.d.ts +21 -7
- package/dist/router/virtual-router/types.js +1 -0
- package/package.json +2 -2
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import { VirtualRouterError, VirtualRouterErrorCode } from './types.js';
|
|
1
|
+
import { DEFAULT_MODEL_CONTEXT_TOKENS, VirtualRouterError, VirtualRouterErrorCode } from './types.js';
|
|
2
|
+
import { scanOAuthTokenFiles } from './token-file-scanner.js';
|
|
2
3
|
const DEFAULT_CLASSIFIER = {
|
|
3
4
|
longContextThresholdTokens: 180000,
|
|
4
5
|
thinkingKeywords: ['think step', 'analysis', 'reasoning', '仔细分析', '深度思考'],
|
|
@@ -8,6 +9,11 @@ const DEFAULT_CLASSIFIER = {
|
|
|
8
9
|
};
|
|
9
10
|
const DEFAULT_LOAD_BALANCING = { strategy: 'round-robin' };
|
|
10
11
|
const DEFAULT_HEALTH = { failureThreshold: 3, cooldownMs: 30_000, fatalCooldownMs: 300_000 };
|
|
12
|
+
const DEFAULT_CONTEXT_ROUTING = {
|
|
13
|
+
warnRatio: 0.9,
|
|
14
|
+
hardLimit: false,
|
|
15
|
+
fallbackRoute: 'longcontext'
|
|
16
|
+
};
|
|
11
17
|
/**
|
|
12
18
|
* 将用户提供的 Virtual Router 配置(或包含 virtualrouter 字段的整体配置)
|
|
13
19
|
* 规范化为 VirtualRouterConfig,供 HubPipeline / VirtualRouterEngine 直接使用。
|
|
@@ -31,12 +37,14 @@ export function bootstrapVirtualRouterConfig(input) {
|
|
|
31
37
|
const classifier = normalizeClassifier(section.classifier);
|
|
32
38
|
const loadBalancing = section.loadBalancing ?? DEFAULT_LOAD_BALANCING;
|
|
33
39
|
const health = section.health ?? DEFAULT_HEALTH;
|
|
40
|
+
const contextRouting = section.contextRouting ?? DEFAULT_CONTEXT_ROUTING;
|
|
34
41
|
const config = {
|
|
35
42
|
routing,
|
|
36
43
|
providers: providerProfiles,
|
|
37
44
|
classifier,
|
|
38
45
|
loadBalancing,
|
|
39
|
-
health
|
|
46
|
+
health,
|
|
47
|
+
contextRouting
|
|
40
48
|
};
|
|
41
49
|
return {
|
|
42
50
|
config,
|
|
@@ -56,7 +64,8 @@ function extractVirtualRouterSection(input) {
|
|
|
56
64
|
const classifier = (section.classifier ?? root.classifier);
|
|
57
65
|
const loadBalancing = normalizeLoadBalancing(section.loadBalancing ?? root.loadBalancing);
|
|
58
66
|
const health = normalizeHealth(section.health ?? root.health);
|
|
59
|
-
|
|
67
|
+
const contextRouting = normalizeContextRouting(section.contextRouting ?? root.contextRouting);
|
|
68
|
+
return { providers, routing, classifier, loadBalancing, health, contextRouting };
|
|
60
69
|
}
|
|
61
70
|
function buildProviderRuntimeEntries(providers) {
|
|
62
71
|
const runtimeEntries = {};
|
|
@@ -100,7 +109,11 @@ function buildProviderRuntimeEntries(providers) {
|
|
|
100
109
|
outboundProfile: normalizedProvider.outboundProfile,
|
|
101
110
|
compatibilityProfile: normalizedProvider.compatibilityProfile,
|
|
102
111
|
processMode: normalizedProvider.processMode,
|
|
103
|
-
responsesConfig: normalizedProvider.responsesConfig
|
|
112
|
+
responsesConfig: normalizedProvider.responsesConfig,
|
|
113
|
+
streaming: normalizedProvider.streaming,
|
|
114
|
+
modelStreaming: normalizedProvider.modelStreaming,
|
|
115
|
+
modelContextTokens: normalizedProvider.modelContextTokens,
|
|
116
|
+
defaultContextTokens: normalizedProvider.defaultContextTokens
|
|
104
117
|
};
|
|
105
118
|
}
|
|
106
119
|
}
|
|
@@ -146,6 +159,10 @@ function buildProviderProfiles(targetKeys, runtimeEntries) {
|
|
|
146
159
|
if (!runtime) {
|
|
147
160
|
throw new VirtualRouterError(`Routing target ${targetKey} references unknown runtime key ${runtimeKey}`, VirtualRouterErrorCode.CONFIG_ERROR);
|
|
148
161
|
}
|
|
162
|
+
const streamingPref = runtime.modelStreaming?.[parsed.modelId] !== undefined
|
|
163
|
+
? runtime.modelStreaming?.[parsed.modelId]
|
|
164
|
+
: runtime.streaming;
|
|
165
|
+
const contextTokens = resolveContextTokens(runtime, parsed.modelId);
|
|
149
166
|
profiles[targetKey] = {
|
|
150
167
|
providerKey: targetKey,
|
|
151
168
|
providerType: runtime.providerType,
|
|
@@ -156,15 +173,30 @@ function buildProviderProfiles(targetKeys, runtimeEntries) {
|
|
|
156
173
|
runtimeKey,
|
|
157
174
|
modelId: parsed.modelId,
|
|
158
175
|
processMode: runtime.processMode || 'chat',
|
|
159
|
-
responsesConfig: runtime.responsesConfig
|
|
176
|
+
responsesConfig: runtime.responsesConfig,
|
|
177
|
+
streaming: streamingPref,
|
|
178
|
+
maxContextTokens: contextTokens
|
|
160
179
|
};
|
|
161
180
|
targetRuntime[targetKey] = {
|
|
162
181
|
...runtime,
|
|
163
|
-
modelId: parsed.modelId
|
|
182
|
+
modelId: parsed.modelId,
|
|
183
|
+
streaming: streamingPref,
|
|
184
|
+
maxContextTokens: contextTokens
|
|
164
185
|
};
|
|
165
186
|
}
|
|
166
187
|
return { profiles, targetRuntime };
|
|
167
188
|
}
|
|
189
|
+
function resolveContextTokens(runtime, modelId) {
|
|
190
|
+
const specific = runtime.modelContextTokens?.[modelId];
|
|
191
|
+
if (typeof specific === 'number' && Number.isFinite(specific) && specific > 0) {
|
|
192
|
+
return Math.floor(specific);
|
|
193
|
+
}
|
|
194
|
+
const fallback = runtime.defaultContextTokens ?? runtime.maxContextTokens;
|
|
195
|
+
if (typeof fallback === 'number' && Number.isFinite(fallback) && fallback > 0) {
|
|
196
|
+
return Math.floor(fallback);
|
|
197
|
+
}
|
|
198
|
+
return DEFAULT_MODEL_CONTEXT_TOKENS;
|
|
199
|
+
}
|
|
168
200
|
function normalizeRouting(source) {
|
|
169
201
|
const routing = {};
|
|
170
202
|
for (const [routeName, entries] of Object.entries(source)) {
|
|
@@ -209,8 +241,12 @@ function normalizeProvider(providerId, raw) {
|
|
|
209
241
|
: '';
|
|
210
242
|
const headers = normalizeHeaders(provider.headers);
|
|
211
243
|
const compatibilityProfile = resolveCompatibilityProfile(providerId, provider);
|
|
212
|
-
const
|
|
244
|
+
const responsesNode = asRecord(provider.responses);
|
|
245
|
+
const responsesConfig = normalizeResponsesConfig(provider, responsesNode);
|
|
213
246
|
const processMode = normalizeProcessMode(provider.process);
|
|
247
|
+
const streaming = resolveProviderStreamingPreference(provider, responsesNode);
|
|
248
|
+
const modelStreaming = normalizeModelStreaming(provider);
|
|
249
|
+
const { modelContextTokens, defaultContextTokens } = normalizeModelContextTokens(provider);
|
|
214
250
|
return {
|
|
215
251
|
providerId,
|
|
216
252
|
providerType,
|
|
@@ -219,59 +255,105 @@ function normalizeProvider(providerId, raw) {
|
|
|
219
255
|
outboundProfile: mapOutboundProfile(providerType),
|
|
220
256
|
compatibilityProfile,
|
|
221
257
|
processMode,
|
|
222
|
-
responsesConfig
|
|
258
|
+
responsesConfig,
|
|
259
|
+
streaming,
|
|
260
|
+
modelStreaming,
|
|
261
|
+
modelContextTokens,
|
|
262
|
+
defaultContextTokens
|
|
223
263
|
};
|
|
224
264
|
}
|
|
225
|
-
function
|
|
226
|
-
const
|
|
227
|
-
if (!
|
|
265
|
+
function normalizeModelStreaming(provider) {
|
|
266
|
+
const modelsNode = asRecord(provider.models);
|
|
267
|
+
if (!modelsNode) {
|
|
228
268
|
return undefined;
|
|
229
269
|
}
|
|
230
|
-
const
|
|
231
|
-
const
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
const instructionsMode = normalizeResponsesInstructionsMode(node.instructionsMode);
|
|
240
|
-
if (instructionsMode) {
|
|
241
|
-
config.instructionsMode = instructionsMode;
|
|
270
|
+
const normalized = {};
|
|
271
|
+
for (const [modelId, modelRaw] of Object.entries(modelsNode)) {
|
|
272
|
+
if (!modelRaw || typeof modelRaw !== 'object') {
|
|
273
|
+
continue;
|
|
274
|
+
}
|
|
275
|
+
const preference = resolveStreamingPreference(modelRaw);
|
|
276
|
+
if (preference) {
|
|
277
|
+
normalized[modelId] = preference;
|
|
278
|
+
}
|
|
242
279
|
}
|
|
243
|
-
return Object.keys(
|
|
280
|
+
return Object.keys(normalized).length ? normalized : undefined;
|
|
244
281
|
}
|
|
245
|
-
function
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
282
|
+
function normalizeModelContextTokens(provider) {
|
|
283
|
+
const modelsNode = asRecord(provider.models);
|
|
284
|
+
const normalized = {};
|
|
285
|
+
for (const [modelId, modelRaw] of Object.entries(modelsNode)) {
|
|
286
|
+
if (!modelRaw || typeof modelRaw !== 'object') {
|
|
287
|
+
continue;
|
|
288
|
+
}
|
|
289
|
+
const candidate = readContextTokens(modelRaw);
|
|
290
|
+
if (candidate) {
|
|
291
|
+
normalized[modelId] = candidate;
|
|
292
|
+
}
|
|
251
293
|
}
|
|
294
|
+
const configNode = asRecord(provider.config);
|
|
295
|
+
const defaultsNode = asRecord(configNode?.userConfigDefaults);
|
|
296
|
+
const defaultCandidate = readContextTokens(provider) ??
|
|
297
|
+
readContextTokens(configNode) ??
|
|
298
|
+
readContextTokens(defaultsNode);
|
|
299
|
+
return {
|
|
300
|
+
modelContextTokens: Object.keys(normalized).length ? normalized : undefined,
|
|
301
|
+
defaultContextTokens: defaultCandidate
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
function resolveStreamingPreference(model) {
|
|
305
|
+
return (coerceStreamingPreference(model.streaming) ??
|
|
306
|
+
coerceStreamingPreference(model.stream) ??
|
|
307
|
+
coerceStreamingPreference(model.supportsStreaming));
|
|
308
|
+
}
|
|
309
|
+
function coerceStreamingPreference(value) {
|
|
252
310
|
if (typeof value === 'string') {
|
|
253
311
|
const normalized = value.trim().toLowerCase();
|
|
254
|
-
if (normalized === 'always' || normalized === '
|
|
312
|
+
if (normalized === 'always' || normalized === 'auto' || normalized === 'never') {
|
|
313
|
+
return normalized;
|
|
314
|
+
}
|
|
315
|
+
if (normalized === 'true') {
|
|
255
316
|
return 'always';
|
|
256
317
|
}
|
|
257
|
-
if (normalized === '
|
|
318
|
+
if (normalized === 'false') {
|
|
258
319
|
return 'never';
|
|
259
320
|
}
|
|
260
|
-
|
|
261
|
-
|
|
321
|
+
}
|
|
322
|
+
if (typeof value === 'boolean') {
|
|
323
|
+
return value ? 'always' : 'never';
|
|
324
|
+
}
|
|
325
|
+
if (value && typeof value === 'object') {
|
|
326
|
+
const record = value;
|
|
327
|
+
if (record.mode !== undefined) {
|
|
328
|
+
return coerceStreamingPreference(record.mode);
|
|
329
|
+
}
|
|
330
|
+
if (record.value !== undefined) {
|
|
331
|
+
return coerceStreamingPreference(record.value);
|
|
332
|
+
}
|
|
333
|
+
if (record.enabled !== undefined) {
|
|
334
|
+
return coerceStreamingPreference(record.enabled);
|
|
262
335
|
}
|
|
263
336
|
}
|
|
264
337
|
return undefined;
|
|
265
338
|
}
|
|
266
|
-
function
|
|
267
|
-
|
|
268
|
-
|
|
339
|
+
function normalizeResponsesConfig(provider, node) {
|
|
340
|
+
const source = node ?? asRecord(provider.responses);
|
|
341
|
+
if (!source) {
|
|
342
|
+
return undefined;
|
|
269
343
|
}
|
|
270
|
-
|
|
271
|
-
|
|
344
|
+
const rawStyle = typeof source.toolCallIdStyle === 'string' ? source.toolCallIdStyle.trim().toLowerCase() : undefined;
|
|
345
|
+
if (rawStyle === 'fc' || rawStyle === 'preserve') {
|
|
346
|
+
return { toolCallIdStyle: rawStyle };
|
|
272
347
|
}
|
|
273
348
|
return undefined;
|
|
274
349
|
}
|
|
350
|
+
function resolveProviderStreamingPreference(provider, responsesNode) {
|
|
351
|
+
const configNode = asRecord(provider.config);
|
|
352
|
+
const configResponses = configNode ? asRecord(configNode.responses) : undefined;
|
|
353
|
+
return (coerceStreamingPreference(provider.streaming ?? provider.stream ?? provider.supportsStreaming ?? provider.streamingPreference) ??
|
|
354
|
+
coerceStreamingPreference(responsesNode?.streaming ?? responsesNode?.stream ?? responsesNode?.supportsStreaming) ??
|
|
355
|
+
coerceStreamingPreference(configResponses?.streaming ?? configResponses?.stream));
|
|
356
|
+
}
|
|
275
357
|
function resolveCompatibilityProfile(providerId, provider) {
|
|
276
358
|
if (typeof provider.compatibilityProfile === 'string' && provider.compatibilityProfile.trim()) {
|
|
277
359
|
return provider.compatibilityProfile.trim();
|
|
@@ -298,6 +380,26 @@ function normalizeProcessMode(value) {
|
|
|
298
380
|
}
|
|
299
381
|
return 'chat';
|
|
300
382
|
}
|
|
383
|
+
function normalizeContextRouting(input) {
|
|
384
|
+
if (!input || typeof input !== 'object') {
|
|
385
|
+
return { ...DEFAULT_CONTEXT_ROUTING };
|
|
386
|
+
}
|
|
387
|
+
const record = input;
|
|
388
|
+
const warnCandidate = coerceRatio(record.warnRatio) ??
|
|
389
|
+
coerceRatio(record?.warn_ratio);
|
|
390
|
+
const hardLimitCandidate = coerceBoolean(record.hardLimit) ??
|
|
391
|
+
coerceBoolean(record?.hard_limit);
|
|
392
|
+
const fallbackCandidate = readOptionalString(record.fallbackRoute) ??
|
|
393
|
+
readOptionalString(record?.fallback_route);
|
|
394
|
+
const warnRatio = clampWarnRatio(warnCandidate ?? DEFAULT_CONTEXT_ROUTING.warnRatio);
|
|
395
|
+
const hardLimit = typeof hardLimitCandidate === 'boolean' ? hardLimitCandidate : DEFAULT_CONTEXT_ROUTING.hardLimit;
|
|
396
|
+
const fallbackRoute = fallbackCandidate ?? DEFAULT_CONTEXT_ROUTING.fallbackRoute;
|
|
397
|
+
return {
|
|
398
|
+
warnRatio,
|
|
399
|
+
hardLimit,
|
|
400
|
+
fallbackRoute
|
|
401
|
+
};
|
|
402
|
+
}
|
|
301
403
|
function extractProviderAuthEntries(providerId, raw) {
|
|
302
404
|
const provider = asRecord(raw);
|
|
303
405
|
const auth = asRecord(provider.auth);
|
|
@@ -428,6 +530,25 @@ function extractProviderAuthEntries(providerId, raw) {
|
|
|
428
530
|
else if (typeof apiKeyField === 'string' && apiKeyField.trim()) {
|
|
429
531
|
pushEntry(undefined, buildAuthCandidate(baseTypeSource, { value: apiKeyField.trim() }));
|
|
430
532
|
}
|
|
533
|
+
// 自动多 token 扫描:仅在未显式声明多 key、且为受支持的 OAuth 提供方时触发
|
|
534
|
+
if (!entries.length && baseType === 'oauth') {
|
|
535
|
+
const oauthProviderId = baseTypeInfo.oauthProviderId;
|
|
536
|
+
if (oauthProviderId && MULTI_TOKEN_OAUTH_PROVIDERS.has(oauthProviderId)) {
|
|
537
|
+
const tokenFiles = scanOAuthTokenFiles(oauthProviderId);
|
|
538
|
+
for (const match of tokenFiles) {
|
|
539
|
+
const alias = match.alias && match.alias !== 'default'
|
|
540
|
+
? `${match.sequence}-${match.alias}`
|
|
541
|
+
: String(match.sequence);
|
|
542
|
+
const authConfig = {
|
|
543
|
+
...defaults,
|
|
544
|
+
type: baseTypeSource ?? `${oauthProviderId}-oauth`,
|
|
545
|
+
tokenFile: match.filePath,
|
|
546
|
+
oauthProviderId
|
|
547
|
+
};
|
|
548
|
+
pushEntry(alias, authConfig);
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
}
|
|
431
552
|
if (!entries.length) {
|
|
432
553
|
const fallbackExtras = {
|
|
433
554
|
value: readOptionalString(auth.value),
|
|
@@ -591,6 +712,7 @@ function mergeScopes(primary, fallback) {
|
|
|
591
712
|
}
|
|
592
713
|
return merged.size ? Array.from(merged) : undefined;
|
|
593
714
|
}
|
|
715
|
+
const MULTI_TOKEN_OAUTH_PROVIDERS = new Set(['iflow']);
|
|
594
716
|
function interpretAuthType(value) {
|
|
595
717
|
if (typeof value !== 'string') {
|
|
596
718
|
return { type: 'apiKey' };
|
|
@@ -634,6 +756,47 @@ function normalizeLoadBalancing(input) {
|
|
|
634
756
|
? { strategy, weights: weightsEntries }
|
|
635
757
|
: { strategy };
|
|
636
758
|
}
|
|
759
|
+
function coerceRatio(value) {
|
|
760
|
+
if (typeof value === 'number' && Number.isFinite(value)) {
|
|
761
|
+
return value;
|
|
762
|
+
}
|
|
763
|
+
if (typeof value === 'string') {
|
|
764
|
+
const trimmed = value.trim();
|
|
765
|
+
if (!trimmed) {
|
|
766
|
+
return undefined;
|
|
767
|
+
}
|
|
768
|
+
const parsed = Number(trimmed);
|
|
769
|
+
if (Number.isFinite(parsed)) {
|
|
770
|
+
return parsed;
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
return undefined;
|
|
774
|
+
}
|
|
775
|
+
function clampWarnRatio(value) {
|
|
776
|
+
if (!Number.isFinite(value)) {
|
|
777
|
+
return DEFAULT_CONTEXT_ROUTING.warnRatio;
|
|
778
|
+
}
|
|
779
|
+
const clamped = Math.max(0.1, Math.min(value, 0.99));
|
|
780
|
+
return Number.isFinite(clamped) ? clamped : DEFAULT_CONTEXT_ROUTING.warnRatio;
|
|
781
|
+
}
|
|
782
|
+
function coerceBoolean(value) {
|
|
783
|
+
if (typeof value === 'boolean') {
|
|
784
|
+
return value;
|
|
785
|
+
}
|
|
786
|
+
if (typeof value === 'string') {
|
|
787
|
+
const normalized = value.trim().toLowerCase();
|
|
788
|
+
if (!normalized) {
|
|
789
|
+
return undefined;
|
|
790
|
+
}
|
|
791
|
+
if (['true', '1', 'yes', 'y'].includes(normalized)) {
|
|
792
|
+
return true;
|
|
793
|
+
}
|
|
794
|
+
if (['false', '0', 'no', 'n'].includes(normalized)) {
|
|
795
|
+
return false;
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
return undefined;
|
|
799
|
+
}
|
|
637
800
|
function normalizeHealth(input) {
|
|
638
801
|
if (!input || typeof input !== 'object')
|
|
639
802
|
return undefined;
|
|
@@ -648,6 +811,43 @@ function normalizeHealth(input) {
|
|
|
648
811
|
? { failureThreshold, cooldownMs, fatalCooldownMs }
|
|
649
812
|
: { failureThreshold, cooldownMs };
|
|
650
813
|
}
|
|
814
|
+
function readContextTokens(record) {
|
|
815
|
+
if (!record) {
|
|
816
|
+
return undefined;
|
|
817
|
+
}
|
|
818
|
+
const keys = [
|
|
819
|
+
'maxContextTokens',
|
|
820
|
+
'max_context_tokens',
|
|
821
|
+
'maxContext',
|
|
822
|
+
'max_context',
|
|
823
|
+
'contextTokens',
|
|
824
|
+
'context_tokens'
|
|
825
|
+
];
|
|
826
|
+
for (const key of keys) {
|
|
827
|
+
const value = record[key];
|
|
828
|
+
const parsed = normalizePositiveInteger(value);
|
|
829
|
+
if (parsed) {
|
|
830
|
+
return parsed;
|
|
831
|
+
}
|
|
832
|
+
}
|
|
833
|
+
return undefined;
|
|
834
|
+
}
|
|
835
|
+
function normalizePositiveInteger(value) {
|
|
836
|
+
if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
|
|
837
|
+
return Math.floor(value);
|
|
838
|
+
}
|
|
839
|
+
if (typeof value === 'string') {
|
|
840
|
+
const trimmed = value.trim();
|
|
841
|
+
if (!trimmed) {
|
|
842
|
+
return undefined;
|
|
843
|
+
}
|
|
844
|
+
const parsed = Number(trimmed);
|
|
845
|
+
if (Number.isFinite(parsed) && parsed > 0) {
|
|
846
|
+
return Math.floor(parsed);
|
|
847
|
+
}
|
|
848
|
+
}
|
|
849
|
+
return undefined;
|
|
850
|
+
}
|
|
651
851
|
function normalizeHeaders(input) {
|
|
652
852
|
if (!input || typeof input !== 'object') {
|
|
653
853
|
return undefined;
|
|
@@ -1,18 +1,5 @@
|
|
|
1
1
|
import { DEFAULT_ROUTE, ROUTE_PRIORITY } from './types.js';
|
|
2
2
|
const DEFAULT_LONG_CONTEXT_THRESHOLD = 180000;
|
|
3
|
-
const WEBSEARCH_HINT_KEYWORDS = [
|
|
4
|
-
'web search',
|
|
5
|
-
'search the web',
|
|
6
|
-
'search online',
|
|
7
|
-
'internet search',
|
|
8
|
-
'search internet',
|
|
9
|
-
'google it',
|
|
10
|
-
'bing it',
|
|
11
|
-
'网络搜索',
|
|
12
|
-
'上网搜索',
|
|
13
|
-
'查一下网络',
|
|
14
|
-
'搜一下网络'
|
|
15
|
-
];
|
|
16
3
|
export class RoutingClassifier {
|
|
17
4
|
config;
|
|
18
5
|
constructor(config) {
|
|
@@ -24,33 +11,20 @@ export class RoutingClassifier {
|
|
|
24
11
|
}
|
|
25
12
|
classify(features) {
|
|
26
13
|
const lastToolCategory = features.lastAssistantToolCategory;
|
|
27
|
-
const toolCategories = features.assistantToolCategories ?? [];
|
|
28
|
-
const hasSearchToolCall = toolCategories.includes('search');
|
|
29
|
-
const hasWriteToolCall = toolCategories.includes('write');
|
|
30
|
-
const hasReadToolCall = toolCategories.includes('read');
|
|
31
|
-
const hasOtherToolCall = toolCategories.includes('other');
|
|
32
|
-
const hasToolCall = toolCategories.length > 0;
|
|
33
14
|
const reachedLongContext = features.estimatedTokens >= (this.config.longContextThresholdTokens ?? DEFAULT_LONG_CONTEXT_THRESHOLD);
|
|
34
|
-
const
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
const
|
|
38
|
-
const
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
const toolContinuationReason = hasOtherToolCall
|
|
48
|
-
? formatToolContinuationReason(features.lastAssistantToolName, features.lastAssistantToolDetail)
|
|
49
|
-
: searchContinuation
|
|
50
|
-
? 'tools:last-tool-search'
|
|
51
|
-
: 'tools:tool-call-detected';
|
|
52
|
-
const thinkingContinuation = readingContinuation || (Boolean(features.hasThinkingKeyword) && !toolsContinuation && !codingContinuation);
|
|
53
|
-
const thinkingReason = readingContinuation ? 'thinking:last-tool-read' : 'thinking:keywords';
|
|
15
|
+
const latestMessageFromUser = features.latestMessageFromUser === true;
|
|
16
|
+
const codingContinuation = lastToolCategory === 'write';
|
|
17
|
+
const thinkingContinuation = lastToolCategory === 'read';
|
|
18
|
+
const searchContinuation = lastToolCategory === 'search';
|
|
19
|
+
const toolsContinuation = lastToolCategory === 'other';
|
|
20
|
+
if (latestMessageFromUser) {
|
|
21
|
+
const reasoning = 'thinking:user-input';
|
|
22
|
+
const evaluations = {
|
|
23
|
+
thinking: { triggered: true, reason: reasoning }
|
|
24
|
+
};
|
|
25
|
+
const candidates = this.ensureDefaultCandidate(['thinking']);
|
|
26
|
+
return this.buildResult('thinking', reasoning, evaluations, candidates);
|
|
27
|
+
}
|
|
54
28
|
const evaluationMap = {
|
|
55
29
|
vision: {
|
|
56
30
|
triggered: features.hasVisionTool && features.hasImageAttachment,
|
|
@@ -61,20 +35,20 @@ export class RoutingClassifier {
|
|
|
61
35
|
reason: 'longcontext:token-threshold'
|
|
62
36
|
},
|
|
63
37
|
websearch: {
|
|
64
|
-
triggered:
|
|
65
|
-
reason:
|
|
38
|
+
triggered: features.hasWebTool || searchContinuation,
|
|
39
|
+
reason: searchContinuation ? 'websearch:last-tool-search' : 'websearch:web-tools-detected'
|
|
66
40
|
},
|
|
67
41
|
coding: {
|
|
68
42
|
triggered: codingContinuation,
|
|
69
43
|
reason: 'coding:last-tool-write'
|
|
70
44
|
},
|
|
71
45
|
thinking: {
|
|
72
|
-
triggered: thinkingContinuation,
|
|
73
|
-
reason:
|
|
46
|
+
triggered: thinkingContinuation || latestMessageFromUser,
|
|
47
|
+
reason: thinkingContinuation ? 'thinking:last-tool-read' : 'thinking:user-input'
|
|
74
48
|
},
|
|
75
49
|
tools: {
|
|
76
|
-
triggered: toolsContinuation,
|
|
77
|
-
reason:
|
|
50
|
+
triggered: toolsContinuation || features.hasTools || features.hasToolCallResponses,
|
|
51
|
+
reason: toolsContinuation ? 'tools:last-tool-other' : 'tools:tool-request-detected'
|
|
78
52
|
},
|
|
79
53
|
background: {
|
|
80
54
|
triggered: containsKeywords(features.userTextSample, this.config.backgroundKeywords ?? []),
|
|
@@ -133,9 +107,3 @@ function containsKeywords(text, keywords) {
|
|
|
133
107
|
const normalized = text.toLowerCase();
|
|
134
108
|
return keywords.some((keyword) => normalized.includes(keyword));
|
|
135
109
|
}
|
|
136
|
-
function formatToolContinuationReason(toolName, toolDetail) {
|
|
137
|
-
const trimmedName = toolName?.trim() || 'tool';
|
|
138
|
-
const trimmedDetail = toolDetail?.trim();
|
|
139
|
-
const detailText = trimmedDetail ? `${trimmedName}: ${trimmedDetail}` : trimmedName;
|
|
140
|
-
return `tools:last-tool-other(${detailText})`;
|
|
141
|
-
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { type ProviderProfile, type VirtualRouterContextRoutingConfig } from './types.js';
|
|
2
|
+
export interface ContextUsageSnapshot {
|
|
3
|
+
ratio: number;
|
|
4
|
+
limit: number;
|
|
5
|
+
}
|
|
6
|
+
export interface ContextAdvisorResult {
|
|
7
|
+
safe: string[];
|
|
8
|
+
risky: string[];
|
|
9
|
+
overflow: string[];
|
|
10
|
+
usage: Record<string, ContextUsageSnapshot>;
|
|
11
|
+
estimatedTokens: number;
|
|
12
|
+
allOverflow: boolean;
|
|
13
|
+
}
|
|
14
|
+
export declare class ContextAdvisor {
|
|
15
|
+
private warnRatio;
|
|
16
|
+
private hardLimit;
|
|
17
|
+
configure(config?: VirtualRouterContextRoutingConfig | null): void;
|
|
18
|
+
classify(pool: string[], estimatedTokens: number, resolveProfile: (key: string) => ProviderProfile): ContextAdvisorResult;
|
|
19
|
+
prefersFallback(result: ContextAdvisorResult): boolean;
|
|
20
|
+
allowsOverflow(): boolean;
|
|
21
|
+
}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { DEFAULT_MODEL_CONTEXT_TOKENS } from './types.js';
|
|
2
|
+
const DEFAULT_WARN_RATIO = 0.9;
|
|
3
|
+
export class ContextAdvisor {
|
|
4
|
+
warnRatio = DEFAULT_WARN_RATIO;
|
|
5
|
+
hardLimit = false;
|
|
6
|
+
configure(config) {
|
|
7
|
+
if (config && typeof config.warnRatio === 'number' && Number.isFinite(config.warnRatio)) {
|
|
8
|
+
this.warnRatio = clampWarnRatio(config.warnRatio);
|
|
9
|
+
}
|
|
10
|
+
else {
|
|
11
|
+
this.warnRatio = DEFAULT_WARN_RATIO;
|
|
12
|
+
}
|
|
13
|
+
this.hardLimit = Boolean(config?.hardLimit);
|
|
14
|
+
}
|
|
15
|
+
classify(pool, estimatedTokens, resolveProfile) {
|
|
16
|
+
const normalizedTokens = typeof estimatedTokens === 'number' && Number.isFinite(estimatedTokens) && estimatedTokens > 0
|
|
17
|
+
? estimatedTokens
|
|
18
|
+
: 0;
|
|
19
|
+
const safe = [];
|
|
20
|
+
const risky = [];
|
|
21
|
+
const overflow = [];
|
|
22
|
+
const usage = {};
|
|
23
|
+
for (const providerKey of pool) {
|
|
24
|
+
let limit = DEFAULT_MODEL_CONTEXT_TOKENS;
|
|
25
|
+
try {
|
|
26
|
+
const profile = resolveProfile(providerKey);
|
|
27
|
+
if (profile?.maxContextTokens && Number.isFinite(profile.maxContextTokens)) {
|
|
28
|
+
limit = profile.maxContextTokens;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
catch {
|
|
32
|
+
limit = DEFAULT_MODEL_CONTEXT_TOKENS;
|
|
33
|
+
}
|
|
34
|
+
if (!limit || limit <= 0) {
|
|
35
|
+
limit = DEFAULT_MODEL_CONTEXT_TOKENS;
|
|
36
|
+
}
|
|
37
|
+
const ratio = limit > 0 ? normalizedTokens / limit : 0;
|
|
38
|
+
usage[providerKey] = { ratio, limit };
|
|
39
|
+
if (normalizedTokens === 0 || ratio < this.warnRatio) {
|
|
40
|
+
safe.push(providerKey);
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
43
|
+
if (ratio < 1) {
|
|
44
|
+
risky.push(providerKey);
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
47
|
+
overflow.push(providerKey);
|
|
48
|
+
}
|
|
49
|
+
return {
|
|
50
|
+
safe,
|
|
51
|
+
risky,
|
|
52
|
+
overflow,
|
|
53
|
+
usage,
|
|
54
|
+
estimatedTokens: normalizedTokens,
|
|
55
|
+
allOverflow: safe.length === 0 && risky.length === 0 && overflow.length > 0
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
prefersFallback(result) {
|
|
59
|
+
if (result.safe.length > 0) {
|
|
60
|
+
return false;
|
|
61
|
+
}
|
|
62
|
+
if (result.risky.length > 0) {
|
|
63
|
+
return true;
|
|
64
|
+
}
|
|
65
|
+
return result.allOverflow;
|
|
66
|
+
}
|
|
67
|
+
allowsOverflow() {
|
|
68
|
+
return !this.hardLimit;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
function clampWarnRatio(value) {
|
|
72
|
+
if (!Number.isFinite(value)) {
|
|
73
|
+
return DEFAULT_WARN_RATIO;
|
|
74
|
+
}
|
|
75
|
+
return Math.max(0.1, Math.min(0.99, value));
|
|
76
|
+
}
|
|
@@ -6,12 +6,11 @@ export declare class VirtualRouterEngine {
|
|
|
6
6
|
private readonly healthManager;
|
|
7
7
|
private loadBalancer;
|
|
8
8
|
private classifier;
|
|
9
|
+
private readonly contextAdvisor;
|
|
10
|
+
private contextRouting;
|
|
9
11
|
private routeStats;
|
|
10
12
|
private readonly debug;
|
|
11
13
|
private healthConfig;
|
|
12
|
-
private stickyPlans;
|
|
13
|
-
private selectionHistory;
|
|
14
|
-
private providerErrorStreaks;
|
|
15
14
|
initialize(config: VirtualRouterConfig): void;
|
|
16
15
|
route(request: StandardizedRequest | ProcessedRequest, metadata: RouterMetadataInput): {
|
|
17
16
|
target: TargetMetadata;
|
|
@@ -28,39 +27,24 @@ export declare class VirtualRouterEngine {
|
|
|
28
27
|
}>;
|
|
29
28
|
health: import("./types.js").ProviderHealthState[];
|
|
30
29
|
};
|
|
31
|
-
private consumeSticky;
|
|
32
|
-
private selectStickyTarget;
|
|
33
|
-
private buildStickyClassification;
|
|
34
|
-
private recordSelectionSnapshot;
|
|
35
|
-
private buildStickyPlan;
|
|
36
|
-
private storeStickyPlan;
|
|
37
|
-
private dropStickyForRequest;
|
|
38
|
-
private resolveStickyDescriptor;
|
|
39
|
-
private maybeForceStickyFromHistory;
|
|
40
|
-
private shouldForceApplyPatchSticky;
|
|
41
|
-
private extractPreviousRequestId;
|
|
42
|
-
private pruneStickyPlans;
|
|
43
|
-
private buildErrorSignature;
|
|
44
|
-
private bumpProviderErrorStreak;
|
|
45
|
-
private resetProviderErrorStreak;
|
|
46
30
|
private validateConfig;
|
|
47
31
|
private selectProvider;
|
|
48
32
|
private incrementRouteStat;
|
|
49
33
|
private providerHealthConfig;
|
|
34
|
+
private initializeRouteQueue;
|
|
35
|
+
private resolveFallbackRoute;
|
|
36
|
+
private maybeDeferToFallback;
|
|
37
|
+
private buildContextCandidatePools;
|
|
38
|
+
private describeAttempt;
|
|
50
39
|
private resolveStickyKey;
|
|
51
40
|
private mapProviderError;
|
|
52
41
|
private deriveReason;
|
|
53
42
|
private buildRouteCandidates;
|
|
54
|
-
private ensureConfiguredClassification;
|
|
55
|
-
private normalizeCandidateList;
|
|
56
|
-
private normalizeRouteName;
|
|
57
|
-
private isRouteConfigured;
|
|
58
43
|
private sortByPriority;
|
|
59
44
|
private routeWeight;
|
|
60
45
|
private buildHitReason;
|
|
61
|
-
private
|
|
62
|
-
private
|
|
63
|
-
private
|
|
64
|
-
private
|
|
65
|
-
private shouldColorVirtualRouterLogs;
|
|
46
|
+
private decorateWithDetail;
|
|
47
|
+
private formatVirtualRouterHit;
|
|
48
|
+
private resolveRouteColor;
|
|
49
|
+
private describeContextUsage;
|
|
66
50
|
}
|