@jsonstudio/llms 0.6.147 → 0.6.187
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/conversion/codecs/gemini-openai-codec.js +15 -1
- package/dist/conversion/compat/actions/auto-thinking.d.ts +6 -0
- package/dist/conversion/compat/actions/auto-thinking.js +25 -0
- package/dist/conversion/compat/actions/field-mapping.d.ts +14 -0
- package/dist/conversion/compat/actions/field-mapping.js +155 -0
- package/dist/conversion/compat/actions/qwen-transform.d.ts +3 -0
- package/dist/conversion/compat/actions/qwen-transform.js +209 -0
- package/dist/conversion/compat/actions/request-rules.d.ts +24 -0
- package/dist/conversion/compat/actions/request-rules.js +63 -0
- package/dist/conversion/compat/actions/response-blacklist.d.ts +14 -0
- package/dist/conversion/compat/actions/response-blacklist.js +85 -0
- package/dist/conversion/compat/actions/response-normalize.d.ts +5 -0
- package/dist/conversion/compat/actions/response-normalize.js +121 -0
- package/dist/conversion/compat/actions/response-validate.d.ts +5 -0
- package/dist/conversion/compat/actions/response-validate.js +76 -0
- package/dist/conversion/compat/actions/snapshot.d.ts +8 -0
- package/dist/conversion/compat/actions/snapshot.js +21 -0
- package/dist/conversion/compat/actions/tool-schema.d.ts +6 -0
- package/dist/conversion/compat/actions/tool-schema.js +91 -0
- package/dist/conversion/compat/actions/universal-shape-filter.d.ts +74 -0
- package/dist/conversion/compat/actions/universal-shape-filter.js +382 -0
- package/dist/conversion/compat/profiles/chat-glm.json +187 -13
- package/dist/conversion/compat/profiles/chat-iflow.json +177 -9
- package/dist/conversion/compat/profiles/chat-lmstudio.json +10 -2
- package/dist/conversion/compat/profiles/chat-qwen.json +14 -10
- package/dist/conversion/hub/pipeline/compat/compat-engine.d.ts +7 -2
- package/dist/conversion/hub/pipeline/compat/compat-engine.js +409 -5
- package/dist/conversion/hub/pipeline/compat/compat-types.d.ts +47 -0
- package/dist/conversion/hub/pipeline/hub-pipeline.d.ts +2 -0
- package/dist/conversion/hub/pipeline/hub-pipeline.js +35 -1
- package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage3_compat/index.js +2 -2
- package/dist/conversion/hub/pipeline/target-utils.js +3 -0
- package/dist/conversion/hub/response/response-runtime.js +23 -15
- package/dist/conversion/responses/responses-host-policy.d.ts +6 -0
- package/dist/conversion/responses/responses-host-policy.js +14 -0
- package/dist/conversion/responses/responses-openai-bridge.js +51 -2
- package/dist/conversion/shared/anthropic-message-utils.js +6 -0
- package/dist/conversion/shared/bridge-actions.js +1 -1
- package/dist/conversion/shared/bridge-policies.js +0 -1
- package/dist/conversion/shared/responses-conversation-store.js +3 -26
- package/dist/conversion/shared/responses-reasoning-registry.d.ts +4 -0
- package/dist/conversion/shared/responses-reasoning-registry.js +62 -1
- package/dist/conversion/shared/responses-response-utils.js +23 -1
- package/dist/conversion/shared/tool-canonicalizer.d.ts +2 -0
- package/dist/conversion/shared/tool-filter-pipeline.js +11 -0
- package/dist/router/virtual-router/bootstrap.js +218 -39
- package/dist/router/virtual-router/classifier.js +19 -51
- package/dist/router/virtual-router/context-advisor.d.ts +21 -0
- package/dist/router/virtual-router/context-advisor.js +76 -0
- package/dist/router/virtual-router/engine.d.ts +11 -27
- package/dist/router/virtual-router/engine.js +191 -396
- package/dist/router/virtual-router/features.js +24 -607
- package/dist/router/virtual-router/health-manager.js +2 -7
- package/dist/router/virtual-router/message-utils.d.ts +7 -0
- package/dist/router/virtual-router/message-utils.js +66 -0
- package/dist/router/virtual-router/provider-registry.js +6 -2
- package/dist/router/virtual-router/token-estimator.d.ts +2 -0
- package/dist/router/virtual-router/token-estimator.js +16 -0
- package/dist/router/virtual-router/tool-signals.d.ts +13 -0
- package/dist/router/virtual-router/tool-signals.js +403 -0
- package/dist/router/virtual-router/types.d.ts +21 -7
- package/dist/router/virtual-router/types.js +1 -0
- package/package.json +2 -2
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { VirtualRouterError, VirtualRouterErrorCode } from './types.js';
|
|
1
|
+
import { DEFAULT_MODEL_CONTEXT_TOKENS, VirtualRouterError, VirtualRouterErrorCode } from './types.js';
|
|
2
2
|
const DEFAULT_CLASSIFIER = {
|
|
3
3
|
longContextThresholdTokens: 180000,
|
|
4
4
|
thinkingKeywords: ['think step', 'analysis', 'reasoning', '仔细分析', '深度思考'],
|
|
@@ -8,6 +8,11 @@ const DEFAULT_CLASSIFIER = {
|
|
|
8
8
|
};
|
|
9
9
|
const DEFAULT_LOAD_BALANCING = { strategy: 'round-robin' };
|
|
10
10
|
const DEFAULT_HEALTH = { failureThreshold: 3, cooldownMs: 30_000, fatalCooldownMs: 300_000 };
|
|
11
|
+
const DEFAULT_CONTEXT_ROUTING = {
|
|
12
|
+
warnRatio: 0.9,
|
|
13
|
+
hardLimit: false,
|
|
14
|
+
fallbackRoute: 'longcontext'
|
|
15
|
+
};
|
|
11
16
|
/**
|
|
12
17
|
* 将用户提供的 Virtual Router 配置(或包含 virtualrouter 字段的整体配置)
|
|
13
18
|
* 规范化为 VirtualRouterConfig,供 HubPipeline / VirtualRouterEngine 直接使用。
|
|
@@ -31,12 +36,14 @@ export function bootstrapVirtualRouterConfig(input) {
|
|
|
31
36
|
const classifier = normalizeClassifier(section.classifier);
|
|
32
37
|
const loadBalancing = section.loadBalancing ?? DEFAULT_LOAD_BALANCING;
|
|
33
38
|
const health = section.health ?? DEFAULT_HEALTH;
|
|
39
|
+
const contextRouting = section.contextRouting ?? DEFAULT_CONTEXT_ROUTING;
|
|
34
40
|
const config = {
|
|
35
41
|
routing,
|
|
36
42
|
providers: providerProfiles,
|
|
37
43
|
classifier,
|
|
38
44
|
loadBalancing,
|
|
39
|
-
health
|
|
45
|
+
health,
|
|
46
|
+
contextRouting
|
|
40
47
|
};
|
|
41
48
|
return {
|
|
42
49
|
config,
|
|
@@ -56,7 +63,8 @@ function extractVirtualRouterSection(input) {
|
|
|
56
63
|
const classifier = (section.classifier ?? root.classifier);
|
|
57
64
|
const loadBalancing = normalizeLoadBalancing(section.loadBalancing ?? root.loadBalancing);
|
|
58
65
|
const health = normalizeHealth(section.health ?? root.health);
|
|
59
|
-
|
|
66
|
+
const contextRouting = normalizeContextRouting(section.contextRouting ?? root.contextRouting);
|
|
67
|
+
return { providers, routing, classifier, loadBalancing, health, contextRouting };
|
|
60
68
|
}
|
|
61
69
|
function buildProviderRuntimeEntries(providers) {
|
|
62
70
|
const runtimeEntries = {};
|
|
@@ -100,7 +108,11 @@ function buildProviderRuntimeEntries(providers) {
|
|
|
100
108
|
outboundProfile: normalizedProvider.outboundProfile,
|
|
101
109
|
compatibilityProfile: normalizedProvider.compatibilityProfile,
|
|
102
110
|
processMode: normalizedProvider.processMode,
|
|
103
|
-
responsesConfig: normalizedProvider.responsesConfig
|
|
111
|
+
responsesConfig: normalizedProvider.responsesConfig,
|
|
112
|
+
streaming: normalizedProvider.streaming,
|
|
113
|
+
modelStreaming: normalizedProvider.modelStreaming,
|
|
114
|
+
modelContextTokens: normalizedProvider.modelContextTokens,
|
|
115
|
+
defaultContextTokens: normalizedProvider.defaultContextTokens
|
|
104
116
|
};
|
|
105
117
|
}
|
|
106
118
|
}
|
|
@@ -146,6 +158,10 @@ function buildProviderProfiles(targetKeys, runtimeEntries) {
|
|
|
146
158
|
if (!runtime) {
|
|
147
159
|
throw new VirtualRouterError(`Routing target ${targetKey} references unknown runtime key ${runtimeKey}`, VirtualRouterErrorCode.CONFIG_ERROR);
|
|
148
160
|
}
|
|
161
|
+
const streamingPref = runtime.modelStreaming?.[parsed.modelId] !== undefined
|
|
162
|
+
? runtime.modelStreaming?.[parsed.modelId]
|
|
163
|
+
: runtime.streaming;
|
|
164
|
+
const contextTokens = resolveContextTokens(runtime, parsed.modelId);
|
|
149
165
|
profiles[targetKey] = {
|
|
150
166
|
providerKey: targetKey,
|
|
151
167
|
providerType: runtime.providerType,
|
|
@@ -156,15 +172,30 @@ function buildProviderProfiles(targetKeys, runtimeEntries) {
|
|
|
156
172
|
runtimeKey,
|
|
157
173
|
modelId: parsed.modelId,
|
|
158
174
|
processMode: runtime.processMode || 'chat',
|
|
159
|
-
responsesConfig: runtime.responsesConfig
|
|
175
|
+
responsesConfig: runtime.responsesConfig,
|
|
176
|
+
streaming: streamingPref,
|
|
177
|
+
maxContextTokens: contextTokens
|
|
160
178
|
};
|
|
161
179
|
targetRuntime[targetKey] = {
|
|
162
180
|
...runtime,
|
|
163
|
-
modelId: parsed.modelId
|
|
181
|
+
modelId: parsed.modelId,
|
|
182
|
+
streaming: streamingPref,
|
|
183
|
+
maxContextTokens: contextTokens
|
|
164
184
|
};
|
|
165
185
|
}
|
|
166
186
|
return { profiles, targetRuntime };
|
|
167
187
|
}
|
|
188
|
+
function resolveContextTokens(runtime, modelId) {
|
|
189
|
+
const specific = runtime.modelContextTokens?.[modelId];
|
|
190
|
+
if (typeof specific === 'number' && Number.isFinite(specific) && specific > 0) {
|
|
191
|
+
return Math.floor(specific);
|
|
192
|
+
}
|
|
193
|
+
const fallback = runtime.defaultContextTokens ?? runtime.maxContextTokens;
|
|
194
|
+
if (typeof fallback === 'number' && Number.isFinite(fallback) && fallback > 0) {
|
|
195
|
+
return Math.floor(fallback);
|
|
196
|
+
}
|
|
197
|
+
return DEFAULT_MODEL_CONTEXT_TOKENS;
|
|
198
|
+
}
|
|
168
199
|
function normalizeRouting(source) {
|
|
169
200
|
const routing = {};
|
|
170
201
|
for (const [routeName, entries] of Object.entries(source)) {
|
|
@@ -209,8 +240,12 @@ function normalizeProvider(providerId, raw) {
|
|
|
209
240
|
: '';
|
|
210
241
|
const headers = normalizeHeaders(provider.headers);
|
|
211
242
|
const compatibilityProfile = resolveCompatibilityProfile(providerId, provider);
|
|
212
|
-
const
|
|
243
|
+
const responsesNode = asRecord(provider.responses);
|
|
244
|
+
const responsesConfig = normalizeResponsesConfig(provider, responsesNode);
|
|
213
245
|
const processMode = normalizeProcessMode(provider.process);
|
|
246
|
+
const streaming = resolveProviderStreamingPreference(provider, responsesNode);
|
|
247
|
+
const modelStreaming = normalizeModelStreaming(provider);
|
|
248
|
+
const { modelContextTokens, defaultContextTokens } = normalizeModelContextTokens(provider);
|
|
214
249
|
return {
|
|
215
250
|
providerId,
|
|
216
251
|
providerType,
|
|
@@ -219,59 +254,105 @@ function normalizeProvider(providerId, raw) {
|
|
|
219
254
|
outboundProfile: mapOutboundProfile(providerType),
|
|
220
255
|
compatibilityProfile,
|
|
221
256
|
processMode,
|
|
222
|
-
responsesConfig
|
|
257
|
+
responsesConfig,
|
|
258
|
+
streaming,
|
|
259
|
+
modelStreaming,
|
|
260
|
+
modelContextTokens,
|
|
261
|
+
defaultContextTokens
|
|
223
262
|
};
|
|
224
263
|
}
|
|
225
|
-
function
|
|
226
|
-
const
|
|
227
|
-
if (!
|
|
264
|
+
function normalizeModelStreaming(provider) {
|
|
265
|
+
const modelsNode = asRecord(provider.models);
|
|
266
|
+
if (!modelsNode) {
|
|
228
267
|
return undefined;
|
|
229
268
|
}
|
|
230
|
-
const
|
|
231
|
-
const
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
const instructionsMode = normalizeResponsesInstructionsMode(node.instructionsMode);
|
|
240
|
-
if (instructionsMode) {
|
|
241
|
-
config.instructionsMode = instructionsMode;
|
|
269
|
+
const normalized = {};
|
|
270
|
+
for (const [modelId, modelRaw] of Object.entries(modelsNode)) {
|
|
271
|
+
if (!modelRaw || typeof modelRaw !== 'object') {
|
|
272
|
+
continue;
|
|
273
|
+
}
|
|
274
|
+
const preference = resolveStreamingPreference(modelRaw);
|
|
275
|
+
if (preference) {
|
|
276
|
+
normalized[modelId] = preference;
|
|
277
|
+
}
|
|
242
278
|
}
|
|
243
|
-
return Object.keys(
|
|
279
|
+
return Object.keys(normalized).length ? normalized : undefined;
|
|
244
280
|
}
|
|
245
|
-
function
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
281
|
+
function normalizeModelContextTokens(provider) {
|
|
282
|
+
const modelsNode = asRecord(provider.models);
|
|
283
|
+
const normalized = {};
|
|
284
|
+
for (const [modelId, modelRaw] of Object.entries(modelsNode)) {
|
|
285
|
+
if (!modelRaw || typeof modelRaw !== 'object') {
|
|
286
|
+
continue;
|
|
287
|
+
}
|
|
288
|
+
const candidate = readContextTokens(modelRaw);
|
|
289
|
+
if (candidate) {
|
|
290
|
+
normalized[modelId] = candidate;
|
|
291
|
+
}
|
|
251
292
|
}
|
|
293
|
+
const configNode = asRecord(provider.config);
|
|
294
|
+
const defaultsNode = asRecord(configNode?.userConfigDefaults);
|
|
295
|
+
const defaultCandidate = readContextTokens(provider) ??
|
|
296
|
+
readContextTokens(configNode) ??
|
|
297
|
+
readContextTokens(defaultsNode);
|
|
298
|
+
return {
|
|
299
|
+
modelContextTokens: Object.keys(normalized).length ? normalized : undefined,
|
|
300
|
+
defaultContextTokens: defaultCandidate
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
function resolveStreamingPreference(model) {
|
|
304
|
+
return (coerceStreamingPreference(model.streaming) ??
|
|
305
|
+
coerceStreamingPreference(model.stream) ??
|
|
306
|
+
coerceStreamingPreference(model.supportsStreaming));
|
|
307
|
+
}
|
|
308
|
+
function coerceStreamingPreference(value) {
|
|
252
309
|
if (typeof value === 'string') {
|
|
253
310
|
const normalized = value.trim().toLowerCase();
|
|
254
|
-
if (normalized === 'always' || normalized === '
|
|
311
|
+
if (normalized === 'always' || normalized === 'auto' || normalized === 'never') {
|
|
312
|
+
return normalized;
|
|
313
|
+
}
|
|
314
|
+
if (normalized === 'true') {
|
|
255
315
|
return 'always';
|
|
256
316
|
}
|
|
257
|
-
if (normalized === '
|
|
317
|
+
if (normalized === 'false') {
|
|
258
318
|
return 'never';
|
|
259
319
|
}
|
|
260
|
-
|
|
261
|
-
|
|
320
|
+
}
|
|
321
|
+
if (typeof value === 'boolean') {
|
|
322
|
+
return value ? 'always' : 'never';
|
|
323
|
+
}
|
|
324
|
+
if (value && typeof value === 'object') {
|
|
325
|
+
const record = value;
|
|
326
|
+
if (record.mode !== undefined) {
|
|
327
|
+
return coerceStreamingPreference(record.mode);
|
|
328
|
+
}
|
|
329
|
+
if (record.value !== undefined) {
|
|
330
|
+
return coerceStreamingPreference(record.value);
|
|
331
|
+
}
|
|
332
|
+
if (record.enabled !== undefined) {
|
|
333
|
+
return coerceStreamingPreference(record.enabled);
|
|
262
334
|
}
|
|
263
335
|
}
|
|
264
336
|
return undefined;
|
|
265
337
|
}
|
|
266
|
-
function
|
|
267
|
-
|
|
268
|
-
|
|
338
|
+
function normalizeResponsesConfig(provider, node) {
|
|
339
|
+
const source = node ?? asRecord(provider.responses);
|
|
340
|
+
if (!source) {
|
|
341
|
+
return undefined;
|
|
269
342
|
}
|
|
270
|
-
|
|
271
|
-
|
|
343
|
+
const rawStyle = typeof source.toolCallIdStyle === 'string' ? source.toolCallIdStyle.trim().toLowerCase() : undefined;
|
|
344
|
+
if (rawStyle === 'fc' || rawStyle === 'preserve') {
|
|
345
|
+
return { toolCallIdStyle: rawStyle };
|
|
272
346
|
}
|
|
273
347
|
return undefined;
|
|
274
348
|
}
|
|
349
|
+
function resolveProviderStreamingPreference(provider, responsesNode) {
|
|
350
|
+
const configNode = asRecord(provider.config);
|
|
351
|
+
const configResponses = configNode ? asRecord(configNode.responses) : undefined;
|
|
352
|
+
return (coerceStreamingPreference(provider.streaming ?? provider.stream ?? provider.supportsStreaming ?? provider.streamingPreference) ??
|
|
353
|
+
coerceStreamingPreference(responsesNode?.streaming ?? responsesNode?.stream ?? responsesNode?.supportsStreaming) ??
|
|
354
|
+
coerceStreamingPreference(configResponses?.streaming ?? configResponses?.stream));
|
|
355
|
+
}
|
|
275
356
|
function resolveCompatibilityProfile(providerId, provider) {
|
|
276
357
|
if (typeof provider.compatibilityProfile === 'string' && provider.compatibilityProfile.trim()) {
|
|
277
358
|
return provider.compatibilityProfile.trim();
|
|
@@ -298,6 +379,26 @@ function normalizeProcessMode(value) {
|
|
|
298
379
|
}
|
|
299
380
|
return 'chat';
|
|
300
381
|
}
|
|
382
|
+
function normalizeContextRouting(input) {
|
|
383
|
+
if (!input || typeof input !== 'object') {
|
|
384
|
+
return { ...DEFAULT_CONTEXT_ROUTING };
|
|
385
|
+
}
|
|
386
|
+
const record = input;
|
|
387
|
+
const warnCandidate = coerceRatio(record.warnRatio) ??
|
|
388
|
+
coerceRatio(record?.warn_ratio);
|
|
389
|
+
const hardLimitCandidate = coerceBoolean(record.hardLimit) ??
|
|
390
|
+
coerceBoolean(record?.hard_limit);
|
|
391
|
+
const fallbackCandidate = readOptionalString(record.fallbackRoute) ??
|
|
392
|
+
readOptionalString(record?.fallback_route);
|
|
393
|
+
const warnRatio = clampWarnRatio(warnCandidate ?? DEFAULT_CONTEXT_ROUTING.warnRatio);
|
|
394
|
+
const hardLimit = typeof hardLimitCandidate === 'boolean' ? hardLimitCandidate : DEFAULT_CONTEXT_ROUTING.hardLimit;
|
|
395
|
+
const fallbackRoute = fallbackCandidate ?? DEFAULT_CONTEXT_ROUTING.fallbackRoute;
|
|
396
|
+
return {
|
|
397
|
+
warnRatio,
|
|
398
|
+
hardLimit,
|
|
399
|
+
fallbackRoute
|
|
400
|
+
};
|
|
401
|
+
}
|
|
301
402
|
function extractProviderAuthEntries(providerId, raw) {
|
|
302
403
|
const provider = asRecord(raw);
|
|
303
404
|
const auth = asRecord(provider.auth);
|
|
@@ -634,6 +735,47 @@ function normalizeLoadBalancing(input) {
|
|
|
634
735
|
? { strategy, weights: weightsEntries }
|
|
635
736
|
: { strategy };
|
|
636
737
|
}
|
|
738
|
+
function coerceRatio(value) {
|
|
739
|
+
if (typeof value === 'number' && Number.isFinite(value)) {
|
|
740
|
+
return value;
|
|
741
|
+
}
|
|
742
|
+
if (typeof value === 'string') {
|
|
743
|
+
const trimmed = value.trim();
|
|
744
|
+
if (!trimmed) {
|
|
745
|
+
return undefined;
|
|
746
|
+
}
|
|
747
|
+
const parsed = Number(trimmed);
|
|
748
|
+
if (Number.isFinite(parsed)) {
|
|
749
|
+
return parsed;
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
return undefined;
|
|
753
|
+
}
|
|
754
|
+
function clampWarnRatio(value) {
|
|
755
|
+
if (!Number.isFinite(value)) {
|
|
756
|
+
return DEFAULT_CONTEXT_ROUTING.warnRatio;
|
|
757
|
+
}
|
|
758
|
+
const clamped = Math.max(0.1, Math.min(value, 0.99));
|
|
759
|
+
return Number.isFinite(clamped) ? clamped : DEFAULT_CONTEXT_ROUTING.warnRatio;
|
|
760
|
+
}
|
|
761
|
+
function coerceBoolean(value) {
|
|
762
|
+
if (typeof value === 'boolean') {
|
|
763
|
+
return value;
|
|
764
|
+
}
|
|
765
|
+
if (typeof value === 'string') {
|
|
766
|
+
const normalized = value.trim().toLowerCase();
|
|
767
|
+
if (!normalized) {
|
|
768
|
+
return undefined;
|
|
769
|
+
}
|
|
770
|
+
if (['true', '1', 'yes', 'y'].includes(normalized)) {
|
|
771
|
+
return true;
|
|
772
|
+
}
|
|
773
|
+
if (['false', '0', 'no', 'n'].includes(normalized)) {
|
|
774
|
+
return false;
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
return undefined;
|
|
778
|
+
}
|
|
637
779
|
function normalizeHealth(input) {
|
|
638
780
|
if (!input || typeof input !== 'object')
|
|
639
781
|
return undefined;
|
|
@@ -648,6 +790,43 @@ function normalizeHealth(input) {
|
|
|
648
790
|
? { failureThreshold, cooldownMs, fatalCooldownMs }
|
|
649
791
|
: { failureThreshold, cooldownMs };
|
|
650
792
|
}
|
|
793
|
+
function readContextTokens(record) {
|
|
794
|
+
if (!record) {
|
|
795
|
+
return undefined;
|
|
796
|
+
}
|
|
797
|
+
const keys = [
|
|
798
|
+
'maxContextTokens',
|
|
799
|
+
'max_context_tokens',
|
|
800
|
+
'maxContext',
|
|
801
|
+
'max_context',
|
|
802
|
+
'contextTokens',
|
|
803
|
+
'context_tokens'
|
|
804
|
+
];
|
|
805
|
+
for (const key of keys) {
|
|
806
|
+
const value = record[key];
|
|
807
|
+
const parsed = normalizePositiveInteger(value);
|
|
808
|
+
if (parsed) {
|
|
809
|
+
return parsed;
|
|
810
|
+
}
|
|
811
|
+
}
|
|
812
|
+
return undefined;
|
|
813
|
+
}
|
|
814
|
+
function normalizePositiveInteger(value) {
|
|
815
|
+
if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
|
|
816
|
+
return Math.floor(value);
|
|
817
|
+
}
|
|
818
|
+
if (typeof value === 'string') {
|
|
819
|
+
const trimmed = value.trim();
|
|
820
|
+
if (!trimmed) {
|
|
821
|
+
return undefined;
|
|
822
|
+
}
|
|
823
|
+
const parsed = Number(trimmed);
|
|
824
|
+
if (Number.isFinite(parsed) && parsed > 0) {
|
|
825
|
+
return Math.floor(parsed);
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
return undefined;
|
|
829
|
+
}
|
|
651
830
|
function normalizeHeaders(input) {
|
|
652
831
|
if (!input || typeof input !== 'object') {
|
|
653
832
|
return undefined;
|
|
@@ -1,18 +1,5 @@
|
|
|
1
1
|
import { DEFAULT_ROUTE, ROUTE_PRIORITY } from './types.js';
|
|
2
2
|
const DEFAULT_LONG_CONTEXT_THRESHOLD = 180000;
|
|
3
|
-
const WEBSEARCH_HINT_KEYWORDS = [
|
|
4
|
-
'web search',
|
|
5
|
-
'search the web',
|
|
6
|
-
'search online',
|
|
7
|
-
'internet search',
|
|
8
|
-
'search internet',
|
|
9
|
-
'google it',
|
|
10
|
-
'bing it',
|
|
11
|
-
'网络搜索',
|
|
12
|
-
'上网搜索',
|
|
13
|
-
'查一下网络',
|
|
14
|
-
'搜一下网络'
|
|
15
|
-
];
|
|
16
3
|
export class RoutingClassifier {
|
|
17
4
|
config;
|
|
18
5
|
constructor(config) {
|
|
@@ -24,33 +11,20 @@ export class RoutingClassifier {
|
|
|
24
11
|
}
|
|
25
12
|
classify(features) {
|
|
26
13
|
const lastToolCategory = features.lastAssistantToolCategory;
|
|
27
|
-
const toolCategories = features.assistantToolCategories ?? [];
|
|
28
|
-
const hasSearchToolCall = toolCategories.includes('search');
|
|
29
|
-
const hasWriteToolCall = toolCategories.includes('write');
|
|
30
|
-
const hasReadToolCall = toolCategories.includes('read');
|
|
31
|
-
const hasOtherToolCall = toolCategories.includes('other');
|
|
32
|
-
const hasToolCall = toolCategories.length > 0;
|
|
33
14
|
const reachedLongContext = features.estimatedTokens >= (this.config.longContextThresholdTokens ?? DEFAULT_LONG_CONTEXT_THRESHOLD);
|
|
34
|
-
const
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
const
|
|
38
|
-
const
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
const toolContinuationReason = hasOtherToolCall
|
|
48
|
-
? formatToolContinuationReason(features.lastAssistantToolName, features.lastAssistantToolDetail)
|
|
49
|
-
: searchContinuation
|
|
50
|
-
? 'tools:last-tool-search'
|
|
51
|
-
: 'tools:tool-call-detected';
|
|
52
|
-
const thinkingContinuation = readingContinuation || (Boolean(features.hasThinkingKeyword) && !toolsContinuation && !codingContinuation);
|
|
53
|
-
const thinkingReason = readingContinuation ? 'thinking:last-tool-read' : 'thinking:keywords';
|
|
15
|
+
const latestMessageFromUser = features.latestMessageFromUser === true;
|
|
16
|
+
const codingContinuation = lastToolCategory === 'write';
|
|
17
|
+
const thinkingContinuation = lastToolCategory === 'read';
|
|
18
|
+
const searchContinuation = lastToolCategory === 'search';
|
|
19
|
+
const toolsContinuation = lastToolCategory === 'other';
|
|
20
|
+
if (latestMessageFromUser) {
|
|
21
|
+
const reasoning = 'thinking:user-input';
|
|
22
|
+
const evaluations = {
|
|
23
|
+
thinking: { triggered: true, reason: reasoning }
|
|
24
|
+
};
|
|
25
|
+
const candidates = this.ensureDefaultCandidate(['thinking']);
|
|
26
|
+
return this.buildResult('thinking', reasoning, evaluations, candidates);
|
|
27
|
+
}
|
|
54
28
|
const evaluationMap = {
|
|
55
29
|
vision: {
|
|
56
30
|
triggered: features.hasVisionTool && features.hasImageAttachment,
|
|
@@ -61,20 +35,20 @@ export class RoutingClassifier {
|
|
|
61
35
|
reason: 'longcontext:token-threshold'
|
|
62
36
|
},
|
|
63
37
|
websearch: {
|
|
64
|
-
triggered:
|
|
65
|
-
reason:
|
|
38
|
+
triggered: features.hasWebTool || searchContinuation,
|
|
39
|
+
reason: searchContinuation ? 'websearch:last-tool-search' : 'websearch:web-tools-detected'
|
|
66
40
|
},
|
|
67
41
|
coding: {
|
|
68
42
|
triggered: codingContinuation,
|
|
69
43
|
reason: 'coding:last-tool-write'
|
|
70
44
|
},
|
|
71
45
|
thinking: {
|
|
72
|
-
triggered: thinkingContinuation,
|
|
73
|
-
reason:
|
|
46
|
+
triggered: thinkingContinuation || latestMessageFromUser,
|
|
47
|
+
reason: thinkingContinuation ? 'thinking:last-tool-read' : 'thinking:user-input'
|
|
74
48
|
},
|
|
75
49
|
tools: {
|
|
76
|
-
triggered: toolsContinuation,
|
|
77
|
-
reason:
|
|
50
|
+
triggered: toolsContinuation || features.hasTools || features.hasToolCallResponses,
|
|
51
|
+
reason: toolsContinuation ? 'tools:last-tool-other' : 'tools:tool-request-detected'
|
|
78
52
|
},
|
|
79
53
|
background: {
|
|
80
54
|
triggered: containsKeywords(features.userTextSample, this.config.backgroundKeywords ?? []),
|
|
@@ -133,9 +107,3 @@ function containsKeywords(text, keywords) {
|
|
|
133
107
|
const normalized = text.toLowerCase();
|
|
134
108
|
return keywords.some((keyword) => normalized.includes(keyword));
|
|
135
109
|
}
|
|
136
|
-
function formatToolContinuationReason(toolName, toolDetail) {
|
|
137
|
-
const trimmedName = toolName?.trim() || 'tool';
|
|
138
|
-
const trimmedDetail = toolDetail?.trim();
|
|
139
|
-
const detailText = trimmedDetail ? `${trimmedName}: ${trimmedDetail}` : trimmedName;
|
|
140
|
-
return `tools:last-tool-other(${detailText})`;
|
|
141
|
-
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { type ProviderProfile, type VirtualRouterContextRoutingConfig } from './types.js';
|
|
2
|
+
export interface ContextUsageSnapshot {
|
|
3
|
+
ratio: number;
|
|
4
|
+
limit: number;
|
|
5
|
+
}
|
|
6
|
+
export interface ContextAdvisorResult {
|
|
7
|
+
safe: string[];
|
|
8
|
+
risky: string[];
|
|
9
|
+
overflow: string[];
|
|
10
|
+
usage: Record<string, ContextUsageSnapshot>;
|
|
11
|
+
estimatedTokens: number;
|
|
12
|
+
allOverflow: boolean;
|
|
13
|
+
}
|
|
14
|
+
export declare class ContextAdvisor {
|
|
15
|
+
private warnRatio;
|
|
16
|
+
private hardLimit;
|
|
17
|
+
configure(config?: VirtualRouterContextRoutingConfig | null): void;
|
|
18
|
+
classify(pool: string[], estimatedTokens: number, resolveProfile: (key: string) => ProviderProfile): ContextAdvisorResult;
|
|
19
|
+
prefersFallback(result: ContextAdvisorResult): boolean;
|
|
20
|
+
allowsOverflow(): boolean;
|
|
21
|
+
}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { DEFAULT_MODEL_CONTEXT_TOKENS } from './types.js';
|
|
2
|
+
const DEFAULT_WARN_RATIO = 0.9;
|
|
3
|
+
export class ContextAdvisor {
|
|
4
|
+
warnRatio = DEFAULT_WARN_RATIO;
|
|
5
|
+
hardLimit = false;
|
|
6
|
+
configure(config) {
|
|
7
|
+
if (config && typeof config.warnRatio === 'number' && Number.isFinite(config.warnRatio)) {
|
|
8
|
+
this.warnRatio = clampWarnRatio(config.warnRatio);
|
|
9
|
+
}
|
|
10
|
+
else {
|
|
11
|
+
this.warnRatio = DEFAULT_WARN_RATIO;
|
|
12
|
+
}
|
|
13
|
+
this.hardLimit = Boolean(config?.hardLimit);
|
|
14
|
+
}
|
|
15
|
+
classify(pool, estimatedTokens, resolveProfile) {
|
|
16
|
+
const normalizedTokens = typeof estimatedTokens === 'number' && Number.isFinite(estimatedTokens) && estimatedTokens > 0
|
|
17
|
+
? estimatedTokens
|
|
18
|
+
: 0;
|
|
19
|
+
const safe = [];
|
|
20
|
+
const risky = [];
|
|
21
|
+
const overflow = [];
|
|
22
|
+
const usage = {};
|
|
23
|
+
for (const providerKey of pool) {
|
|
24
|
+
let limit = DEFAULT_MODEL_CONTEXT_TOKENS;
|
|
25
|
+
try {
|
|
26
|
+
const profile = resolveProfile(providerKey);
|
|
27
|
+
if (profile?.maxContextTokens && Number.isFinite(profile.maxContextTokens)) {
|
|
28
|
+
limit = profile.maxContextTokens;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
catch {
|
|
32
|
+
limit = DEFAULT_MODEL_CONTEXT_TOKENS;
|
|
33
|
+
}
|
|
34
|
+
if (!limit || limit <= 0) {
|
|
35
|
+
limit = DEFAULT_MODEL_CONTEXT_TOKENS;
|
|
36
|
+
}
|
|
37
|
+
const ratio = limit > 0 ? normalizedTokens / limit : 0;
|
|
38
|
+
usage[providerKey] = { ratio, limit };
|
|
39
|
+
if (normalizedTokens === 0 || ratio < this.warnRatio) {
|
|
40
|
+
safe.push(providerKey);
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
43
|
+
if (ratio < 1) {
|
|
44
|
+
risky.push(providerKey);
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
47
|
+
overflow.push(providerKey);
|
|
48
|
+
}
|
|
49
|
+
return {
|
|
50
|
+
safe,
|
|
51
|
+
risky,
|
|
52
|
+
overflow,
|
|
53
|
+
usage,
|
|
54
|
+
estimatedTokens: normalizedTokens,
|
|
55
|
+
allOverflow: safe.length === 0 && risky.length === 0 && overflow.length > 0
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
prefersFallback(result) {
|
|
59
|
+
if (result.safe.length > 0) {
|
|
60
|
+
return false;
|
|
61
|
+
}
|
|
62
|
+
if (result.risky.length > 0) {
|
|
63
|
+
return true;
|
|
64
|
+
}
|
|
65
|
+
return result.allOverflow;
|
|
66
|
+
}
|
|
67
|
+
allowsOverflow() {
|
|
68
|
+
return !this.hardLimit;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
function clampWarnRatio(value) {
|
|
72
|
+
if (!Number.isFinite(value)) {
|
|
73
|
+
return DEFAULT_WARN_RATIO;
|
|
74
|
+
}
|
|
75
|
+
return Math.max(0.1, Math.min(0.99, value));
|
|
76
|
+
}
|
|
@@ -6,12 +6,11 @@ export declare class VirtualRouterEngine {
|
|
|
6
6
|
private readonly healthManager;
|
|
7
7
|
private loadBalancer;
|
|
8
8
|
private classifier;
|
|
9
|
+
private readonly contextAdvisor;
|
|
10
|
+
private contextRouting;
|
|
9
11
|
private routeStats;
|
|
10
12
|
private readonly debug;
|
|
11
13
|
private healthConfig;
|
|
12
|
-
private stickyPlans;
|
|
13
|
-
private selectionHistory;
|
|
14
|
-
private providerErrorStreaks;
|
|
15
14
|
initialize(config: VirtualRouterConfig): void;
|
|
16
15
|
route(request: StandardizedRequest | ProcessedRequest, metadata: RouterMetadataInput): {
|
|
17
16
|
target: TargetMetadata;
|
|
@@ -28,39 +27,24 @@ export declare class VirtualRouterEngine {
|
|
|
28
27
|
}>;
|
|
29
28
|
health: import("./types.js").ProviderHealthState[];
|
|
30
29
|
};
|
|
31
|
-
private consumeSticky;
|
|
32
|
-
private selectStickyTarget;
|
|
33
|
-
private buildStickyClassification;
|
|
34
|
-
private recordSelectionSnapshot;
|
|
35
|
-
private buildStickyPlan;
|
|
36
|
-
private storeStickyPlan;
|
|
37
|
-
private dropStickyForRequest;
|
|
38
|
-
private resolveStickyDescriptor;
|
|
39
|
-
private maybeForceStickyFromHistory;
|
|
40
|
-
private shouldForceApplyPatchSticky;
|
|
41
|
-
private extractPreviousRequestId;
|
|
42
|
-
private pruneStickyPlans;
|
|
43
|
-
private buildErrorSignature;
|
|
44
|
-
private bumpProviderErrorStreak;
|
|
45
|
-
private resetProviderErrorStreak;
|
|
46
30
|
private validateConfig;
|
|
47
31
|
private selectProvider;
|
|
48
32
|
private incrementRouteStat;
|
|
49
33
|
private providerHealthConfig;
|
|
34
|
+
private initializeRouteQueue;
|
|
35
|
+
private resolveFallbackRoute;
|
|
36
|
+
private maybeDeferToFallback;
|
|
37
|
+
private buildContextCandidatePools;
|
|
38
|
+
private describeAttempt;
|
|
50
39
|
private resolveStickyKey;
|
|
51
40
|
private mapProviderError;
|
|
52
41
|
private deriveReason;
|
|
53
42
|
private buildRouteCandidates;
|
|
54
|
-
private ensureConfiguredClassification;
|
|
55
|
-
private normalizeCandidateList;
|
|
56
|
-
private normalizeRouteName;
|
|
57
|
-
private isRouteConfigured;
|
|
58
43
|
private sortByPriority;
|
|
59
44
|
private routeWeight;
|
|
60
45
|
private buildHitReason;
|
|
61
|
-
private
|
|
62
|
-
private
|
|
63
|
-
private
|
|
64
|
-
private
|
|
65
|
-
private shouldColorVirtualRouterLogs;
|
|
46
|
+
private decorateWithDetail;
|
|
47
|
+
private formatVirtualRouterHit;
|
|
48
|
+
private resolveRouteColor;
|
|
49
|
+
private describeContextUsage;
|
|
66
50
|
}
|