@jsonstudio/llms 0.6.141 → 0.6.187

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/dist/conversion/codecs/gemini-openai-codec.js +15 -1
  2. package/dist/conversion/compat/actions/auto-thinking.d.ts +6 -0
  3. package/dist/conversion/compat/actions/auto-thinking.js +25 -0
  4. package/dist/conversion/compat/actions/field-mapping.d.ts +14 -0
  5. package/dist/conversion/compat/actions/field-mapping.js +155 -0
  6. package/dist/conversion/compat/actions/qwen-transform.d.ts +3 -0
  7. package/dist/conversion/compat/actions/qwen-transform.js +209 -0
  8. package/dist/conversion/compat/actions/request-rules.d.ts +24 -0
  9. package/dist/conversion/compat/actions/request-rules.js +63 -0
  10. package/dist/conversion/compat/actions/response-blacklist.d.ts +14 -0
  11. package/dist/conversion/compat/actions/response-blacklist.js +85 -0
  12. package/dist/conversion/compat/actions/response-normalize.d.ts +5 -0
  13. package/dist/conversion/compat/actions/response-normalize.js +121 -0
  14. package/dist/conversion/compat/actions/response-validate.d.ts +5 -0
  15. package/dist/conversion/compat/actions/response-validate.js +76 -0
  16. package/dist/conversion/compat/actions/snapshot.d.ts +8 -0
  17. package/dist/conversion/compat/actions/snapshot.js +21 -0
  18. package/dist/conversion/compat/actions/tool-schema.d.ts +6 -0
  19. package/dist/conversion/compat/actions/tool-schema.js +91 -0
  20. package/dist/conversion/compat/actions/universal-shape-filter.d.ts +74 -0
  21. package/dist/conversion/compat/actions/universal-shape-filter.js +382 -0
  22. package/dist/conversion/compat/profiles/chat-glm.json +187 -13
  23. package/dist/conversion/compat/profiles/chat-iflow.json +177 -9
  24. package/dist/conversion/compat/profiles/chat-lmstudio.json +10 -2
  25. package/dist/conversion/compat/profiles/chat-qwen.json +14 -10
  26. package/dist/conversion/hub/pipeline/compat/compat-engine.d.ts +7 -2
  27. package/dist/conversion/hub/pipeline/compat/compat-engine.js +409 -5
  28. package/dist/conversion/hub/pipeline/compat/compat-types.d.ts +47 -0
  29. package/dist/conversion/hub/pipeline/hub-pipeline.d.ts +2 -0
  30. package/dist/conversion/hub/pipeline/hub-pipeline.js +35 -1
  31. package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage3_compat/index.js +2 -2
  32. package/dist/conversion/hub/pipeline/target-utils.js +3 -0
  33. package/dist/conversion/hub/response/response-runtime.js +19 -2
  34. package/dist/conversion/responses/responses-host-policy.d.ts +6 -0
  35. package/dist/conversion/responses/responses-host-policy.js +14 -0
  36. package/dist/conversion/responses/responses-openai-bridge.js +51 -2
  37. package/dist/conversion/shared/anthropic-message-utils.js +6 -0
  38. package/dist/conversion/shared/responses-conversation-store.js +3 -26
  39. package/dist/conversion/shared/responses-reasoning-registry.d.ts +4 -0
  40. package/dist/conversion/shared/responses-reasoning-registry.js +62 -1
  41. package/dist/conversion/shared/responses-response-utils.js +23 -1
  42. package/dist/conversion/shared/tool-canonicalizer.d.ts +2 -0
  43. package/dist/conversion/shared/tool-filter-pipeline.js +11 -0
  44. package/dist/router/virtual-router/bootstrap.js +218 -39
  45. package/dist/router/virtual-router/classifier.js +19 -52
  46. package/dist/router/virtual-router/context-advisor.d.ts +21 -0
  47. package/dist/router/virtual-router/context-advisor.js +76 -0
  48. package/dist/router/virtual-router/engine.d.ts +11 -26
  49. package/dist/router/virtual-router/engine.js +191 -386
  50. package/dist/router/virtual-router/features.js +24 -621
  51. package/dist/router/virtual-router/health-manager.js +2 -7
  52. package/dist/router/virtual-router/message-utils.d.ts +7 -0
  53. package/dist/router/virtual-router/message-utils.js +66 -0
  54. package/dist/router/virtual-router/provider-registry.js +6 -2
  55. package/dist/router/virtual-router/token-estimator.d.ts +2 -0
  56. package/dist/router/virtual-router/token-estimator.js +16 -0
  57. package/dist/router/virtual-router/tool-signals.d.ts +13 -0
  58. package/dist/router/virtual-router/tool-signals.js +403 -0
  59. package/dist/router/virtual-router/types.d.ts +21 -7
  60. package/dist/router/virtual-router/types.js +1 -0
  61. package/package.json +2 -2
@@ -1,4 +1,4 @@
1
- import { VirtualRouterError, VirtualRouterErrorCode } from './types.js';
1
+ import { DEFAULT_MODEL_CONTEXT_TOKENS, VirtualRouterError, VirtualRouterErrorCode } from './types.js';
2
2
  const DEFAULT_CLASSIFIER = {
3
3
  longContextThresholdTokens: 180000,
4
4
  thinkingKeywords: ['think step', 'analysis', 'reasoning', '仔细分析', '深度思考'],
@@ -8,6 +8,11 @@ const DEFAULT_CLASSIFIER = {
8
8
  };
9
9
  const DEFAULT_LOAD_BALANCING = { strategy: 'round-robin' };
10
10
  const DEFAULT_HEALTH = { failureThreshold: 3, cooldownMs: 30_000, fatalCooldownMs: 300_000 };
11
+ const DEFAULT_CONTEXT_ROUTING = {
12
+ warnRatio: 0.9,
13
+ hardLimit: false,
14
+ fallbackRoute: 'longcontext'
15
+ };
11
16
  /**
12
17
  * 将用户提供的 Virtual Router 配置(或包含 virtualrouter 字段的整体配置)
13
18
  * 规范化为 VirtualRouterConfig,供 HubPipeline / VirtualRouterEngine 直接使用。
@@ -31,12 +36,14 @@ export function bootstrapVirtualRouterConfig(input) {
31
36
  const classifier = normalizeClassifier(section.classifier);
32
37
  const loadBalancing = section.loadBalancing ?? DEFAULT_LOAD_BALANCING;
33
38
  const health = section.health ?? DEFAULT_HEALTH;
39
+ const contextRouting = section.contextRouting ?? DEFAULT_CONTEXT_ROUTING;
34
40
  const config = {
35
41
  routing,
36
42
  providers: providerProfiles,
37
43
  classifier,
38
44
  loadBalancing,
39
- health
45
+ health,
46
+ contextRouting
40
47
  };
41
48
  return {
42
49
  config,
@@ -56,7 +63,8 @@ function extractVirtualRouterSection(input) {
56
63
  const classifier = (section.classifier ?? root.classifier);
57
64
  const loadBalancing = normalizeLoadBalancing(section.loadBalancing ?? root.loadBalancing);
58
65
  const health = normalizeHealth(section.health ?? root.health);
59
- return { providers, routing, classifier, loadBalancing, health };
66
+ const contextRouting = normalizeContextRouting(section.contextRouting ?? root.contextRouting);
67
+ return { providers, routing, classifier, loadBalancing, health, contextRouting };
60
68
  }
61
69
  function buildProviderRuntimeEntries(providers) {
62
70
  const runtimeEntries = {};
@@ -100,7 +108,11 @@ function buildProviderRuntimeEntries(providers) {
100
108
  outboundProfile: normalizedProvider.outboundProfile,
101
109
  compatibilityProfile: normalizedProvider.compatibilityProfile,
102
110
  processMode: normalizedProvider.processMode,
103
- responsesConfig: normalizedProvider.responsesConfig
111
+ responsesConfig: normalizedProvider.responsesConfig,
112
+ streaming: normalizedProvider.streaming,
113
+ modelStreaming: normalizedProvider.modelStreaming,
114
+ modelContextTokens: normalizedProvider.modelContextTokens,
115
+ defaultContextTokens: normalizedProvider.defaultContextTokens
104
116
  };
105
117
  }
106
118
  }
@@ -146,6 +158,10 @@ function buildProviderProfiles(targetKeys, runtimeEntries) {
146
158
  if (!runtime) {
147
159
  throw new VirtualRouterError(`Routing target ${targetKey} references unknown runtime key ${runtimeKey}`, VirtualRouterErrorCode.CONFIG_ERROR);
148
160
  }
161
+ const streamingPref = runtime.modelStreaming?.[parsed.modelId] !== undefined
162
+ ? runtime.modelStreaming?.[parsed.modelId]
163
+ : runtime.streaming;
164
+ const contextTokens = resolveContextTokens(runtime, parsed.modelId);
149
165
  profiles[targetKey] = {
150
166
  providerKey: targetKey,
151
167
  providerType: runtime.providerType,
@@ -156,15 +172,30 @@ function buildProviderProfiles(targetKeys, runtimeEntries) {
156
172
  runtimeKey,
157
173
  modelId: parsed.modelId,
158
174
  processMode: runtime.processMode || 'chat',
159
- responsesConfig: runtime.responsesConfig
175
+ responsesConfig: runtime.responsesConfig,
176
+ streaming: streamingPref,
177
+ maxContextTokens: contextTokens
160
178
  };
161
179
  targetRuntime[targetKey] = {
162
180
  ...runtime,
163
- modelId: parsed.modelId
181
+ modelId: parsed.modelId,
182
+ streaming: streamingPref,
183
+ maxContextTokens: contextTokens
164
184
  };
165
185
  }
166
186
  return { profiles, targetRuntime };
167
187
  }
188
+ function resolveContextTokens(runtime, modelId) {
189
+ const specific = runtime.modelContextTokens?.[modelId];
190
+ if (typeof specific === 'number' && Number.isFinite(specific) && specific > 0) {
191
+ return Math.floor(specific);
192
+ }
193
+ const fallback = runtime.defaultContextTokens ?? runtime.maxContextTokens;
194
+ if (typeof fallback === 'number' && Number.isFinite(fallback) && fallback > 0) {
195
+ return Math.floor(fallback);
196
+ }
197
+ return DEFAULT_MODEL_CONTEXT_TOKENS;
198
+ }
168
199
  function normalizeRouting(source) {
169
200
  const routing = {};
170
201
  for (const [routeName, entries] of Object.entries(source)) {
@@ -209,8 +240,12 @@ function normalizeProvider(providerId, raw) {
209
240
  : '';
210
241
  const headers = normalizeHeaders(provider.headers);
211
242
  const compatibilityProfile = resolveCompatibilityProfile(providerId, provider);
212
- const responsesConfig = normalizeResponsesConfig(provider);
243
+ const responsesNode = asRecord(provider.responses);
244
+ const responsesConfig = normalizeResponsesConfig(provider, responsesNode);
213
245
  const processMode = normalizeProcessMode(provider.process);
246
+ const streaming = resolveProviderStreamingPreference(provider, responsesNode);
247
+ const modelStreaming = normalizeModelStreaming(provider);
248
+ const { modelContextTokens, defaultContextTokens } = normalizeModelContextTokens(provider);
214
249
  return {
215
250
  providerId,
216
251
  providerType,
@@ -219,59 +254,105 @@ function normalizeProvider(providerId, raw) {
219
254
  outboundProfile: mapOutboundProfile(providerType),
220
255
  compatibilityProfile,
221
256
  processMode,
222
- responsesConfig
257
+ responsesConfig,
258
+ streaming,
259
+ modelStreaming,
260
+ modelContextTokens,
261
+ defaultContextTokens
223
262
  };
224
263
  }
225
- function normalizeResponsesConfig(provider) {
226
- const node = asRecord(provider.responses);
227
- if (!node) {
264
+ function normalizeModelStreaming(provider) {
265
+ const modelsNode = asRecord(provider.models);
266
+ if (!modelsNode) {
228
267
  return undefined;
229
268
  }
230
- const config = {};
231
- const rawStyle = typeof node.toolCallIdStyle === 'string' ? node.toolCallIdStyle.trim().toLowerCase() : undefined;
232
- if (rawStyle === 'fc' || rawStyle === 'preserve') {
233
- config.toolCallIdStyle = rawStyle;
234
- }
235
- const streaming = normalizeResponsesStreaming(node.streaming);
236
- if (streaming) {
237
- config.streaming = streaming;
238
- }
239
- const instructionsMode = normalizeResponsesInstructionsMode(node.instructionsMode);
240
- if (instructionsMode) {
241
- config.instructionsMode = instructionsMode;
269
+ const normalized = {};
270
+ for (const [modelId, modelRaw] of Object.entries(modelsNode)) {
271
+ if (!modelRaw || typeof modelRaw !== 'object') {
272
+ continue;
273
+ }
274
+ const preference = resolveStreamingPreference(modelRaw);
275
+ if (preference) {
276
+ normalized[modelId] = preference;
277
+ }
242
278
  }
243
- return Object.keys(config).length ? config : undefined;
279
+ return Object.keys(normalized).length ? normalized : undefined;
244
280
  }
245
- function normalizeResponsesStreaming(value) {
246
- if (value === true) {
247
- return 'always';
248
- }
249
- if (value === false) {
250
- return 'never';
281
+ function normalizeModelContextTokens(provider) {
282
+ const modelsNode = asRecord(provider.models);
283
+ const normalized = {};
284
+ for (const [modelId, modelRaw] of Object.entries(modelsNode)) {
285
+ if (!modelRaw || typeof modelRaw !== 'object') {
286
+ continue;
287
+ }
288
+ const candidate = readContextTokens(modelRaw);
289
+ if (candidate) {
290
+ normalized[modelId] = candidate;
291
+ }
251
292
  }
293
+ const configNode = asRecord(provider.config);
294
+ const defaultsNode = asRecord(configNode?.userConfigDefaults);
295
+ const defaultCandidate = readContextTokens(provider) ??
296
+ readContextTokens(configNode) ??
297
+ readContextTokens(defaultsNode);
298
+ return {
299
+ modelContextTokens: Object.keys(normalized).length ? normalized : undefined,
300
+ defaultContextTokens: defaultCandidate
301
+ };
302
+ }
303
+ function resolveStreamingPreference(model) {
304
+ return (coerceStreamingPreference(model.streaming) ??
305
+ coerceStreamingPreference(model.stream) ??
306
+ coerceStreamingPreference(model.supportsStreaming));
307
+ }
308
+ function coerceStreamingPreference(value) {
252
309
  if (typeof value === 'string') {
253
310
  const normalized = value.trim().toLowerCase();
254
- if (normalized === 'always' || normalized === 'true' || normalized === '1' || normalized === 'yes') {
311
+ if (normalized === 'always' || normalized === 'auto' || normalized === 'never') {
312
+ return normalized;
313
+ }
314
+ if (normalized === 'true') {
255
315
  return 'always';
256
316
  }
257
- if (normalized === 'never' || normalized === 'false' || normalized === '0' || normalized === 'no') {
317
+ if (normalized === 'false') {
258
318
  return 'never';
259
319
  }
260
- if (normalized === 'auto') {
261
- return 'auto';
320
+ }
321
+ if (typeof value === 'boolean') {
322
+ return value ? 'always' : 'never';
323
+ }
324
+ if (value && typeof value === 'object') {
325
+ const record = value;
326
+ if (record.mode !== undefined) {
327
+ return coerceStreamingPreference(record.mode);
328
+ }
329
+ if (record.value !== undefined) {
330
+ return coerceStreamingPreference(record.value);
331
+ }
332
+ if (record.enabled !== undefined) {
333
+ return coerceStreamingPreference(record.enabled);
262
334
  }
263
335
  }
264
336
  return undefined;
265
337
  }
266
- function normalizeResponsesInstructionsMode(value) {
267
- if (value === 'inline') {
268
- return 'inline';
338
+ function normalizeResponsesConfig(provider, node) {
339
+ const source = node ?? asRecord(provider.responses);
340
+ if (!source) {
341
+ return undefined;
269
342
  }
270
- if (typeof value === 'string' && value.trim().toLowerCase() === 'inline') {
271
- return 'inline';
343
+ const rawStyle = typeof source.toolCallIdStyle === 'string' ? source.toolCallIdStyle.trim().toLowerCase() : undefined;
344
+ if (rawStyle === 'fc' || rawStyle === 'preserve') {
345
+ return { toolCallIdStyle: rawStyle };
272
346
  }
273
347
  return undefined;
274
348
  }
349
+ function resolveProviderStreamingPreference(provider, responsesNode) {
350
+ const configNode = asRecord(provider.config);
351
+ const configResponses = configNode ? asRecord(configNode.responses) : undefined;
352
+ return (coerceStreamingPreference(provider.streaming ?? provider.stream ?? provider.supportsStreaming ?? provider.streamingPreference) ??
353
+ coerceStreamingPreference(responsesNode?.streaming ?? responsesNode?.stream ?? responsesNode?.supportsStreaming) ??
354
+ coerceStreamingPreference(configResponses?.streaming ?? configResponses?.stream));
355
+ }
275
356
  function resolveCompatibilityProfile(providerId, provider) {
276
357
  if (typeof provider.compatibilityProfile === 'string' && provider.compatibilityProfile.trim()) {
277
358
  return provider.compatibilityProfile.trim();
@@ -298,6 +379,26 @@ function normalizeProcessMode(value) {
298
379
  }
299
380
  return 'chat';
300
381
  }
382
+ function normalizeContextRouting(input) {
383
+ if (!input || typeof input !== 'object') {
384
+ return { ...DEFAULT_CONTEXT_ROUTING };
385
+ }
386
+ const record = input;
387
+ const warnCandidate = coerceRatio(record.warnRatio) ??
388
+ coerceRatio(record?.warn_ratio);
389
+ const hardLimitCandidate = coerceBoolean(record.hardLimit) ??
390
+ coerceBoolean(record?.hard_limit);
391
+ const fallbackCandidate = readOptionalString(record.fallbackRoute) ??
392
+ readOptionalString(record?.fallback_route);
393
+ const warnRatio = clampWarnRatio(warnCandidate ?? DEFAULT_CONTEXT_ROUTING.warnRatio);
394
+ const hardLimit = typeof hardLimitCandidate === 'boolean' ? hardLimitCandidate : DEFAULT_CONTEXT_ROUTING.hardLimit;
395
+ const fallbackRoute = fallbackCandidate ?? DEFAULT_CONTEXT_ROUTING.fallbackRoute;
396
+ return {
397
+ warnRatio,
398
+ hardLimit,
399
+ fallbackRoute
400
+ };
401
+ }
301
402
  function extractProviderAuthEntries(providerId, raw) {
302
403
  const provider = asRecord(raw);
303
404
  const auth = asRecord(provider.auth);
@@ -634,6 +735,47 @@ function normalizeLoadBalancing(input) {
634
735
  ? { strategy, weights: weightsEntries }
635
736
  : { strategy };
636
737
  }
738
+ function coerceRatio(value) {
739
+ if (typeof value === 'number' && Number.isFinite(value)) {
740
+ return value;
741
+ }
742
+ if (typeof value === 'string') {
743
+ const trimmed = value.trim();
744
+ if (!trimmed) {
745
+ return undefined;
746
+ }
747
+ const parsed = Number(trimmed);
748
+ if (Number.isFinite(parsed)) {
749
+ return parsed;
750
+ }
751
+ }
752
+ return undefined;
753
+ }
754
+ function clampWarnRatio(value) {
755
+ if (!Number.isFinite(value)) {
756
+ return DEFAULT_CONTEXT_ROUTING.warnRatio;
757
+ }
758
+ const clamped = Math.max(0.1, Math.min(value, 0.99));
759
+ return Number.isFinite(clamped) ? clamped : DEFAULT_CONTEXT_ROUTING.warnRatio;
760
+ }
761
+ function coerceBoolean(value) {
762
+ if (typeof value === 'boolean') {
763
+ return value;
764
+ }
765
+ if (typeof value === 'string') {
766
+ const normalized = value.trim().toLowerCase();
767
+ if (!normalized) {
768
+ return undefined;
769
+ }
770
+ if (['true', '1', 'yes', 'y'].includes(normalized)) {
771
+ return true;
772
+ }
773
+ if (['false', '0', 'no', 'n'].includes(normalized)) {
774
+ return false;
775
+ }
776
+ }
777
+ return undefined;
778
+ }
637
779
  function normalizeHealth(input) {
638
780
  if (!input || typeof input !== 'object')
639
781
  return undefined;
@@ -648,6 +790,43 @@ function normalizeHealth(input) {
648
790
  ? { failureThreshold, cooldownMs, fatalCooldownMs }
649
791
  : { failureThreshold, cooldownMs };
650
792
  }
793
+ function readContextTokens(record) {
794
+ if (!record) {
795
+ return undefined;
796
+ }
797
+ const keys = [
798
+ 'maxContextTokens',
799
+ 'max_context_tokens',
800
+ 'maxContext',
801
+ 'max_context',
802
+ 'contextTokens',
803
+ 'context_tokens'
804
+ ];
805
+ for (const key of keys) {
806
+ const value = record[key];
807
+ const parsed = normalizePositiveInteger(value);
808
+ if (parsed) {
809
+ return parsed;
810
+ }
811
+ }
812
+ return undefined;
813
+ }
814
+ function normalizePositiveInteger(value) {
815
+ if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
816
+ return Math.floor(value);
817
+ }
818
+ if (typeof value === 'string') {
819
+ const trimmed = value.trim();
820
+ if (!trimmed) {
821
+ return undefined;
822
+ }
823
+ const parsed = Number(trimmed);
824
+ if (Number.isFinite(parsed) && parsed > 0) {
825
+ return Math.floor(parsed);
826
+ }
827
+ }
828
+ return undefined;
829
+ }
651
830
  function normalizeHeaders(input) {
652
831
  if (!input || typeof input !== 'object') {
653
832
  return undefined;
@@ -1,18 +1,5 @@
1
1
  import { DEFAULT_ROUTE, ROUTE_PRIORITY } from './types.js';
2
2
  const DEFAULT_LONG_CONTEXT_THRESHOLD = 180000;
3
- const WEBSEARCH_HINT_KEYWORDS = [
4
- 'web search',
5
- 'search the web',
6
- 'search online',
7
- 'internet search',
8
- 'search internet',
9
- 'google it',
10
- 'bing it',
11
- '网络搜索',
12
- '上网搜索',
13
- '查一下网络',
14
- '搜一下网络'
15
- ];
16
3
  export class RoutingClassifier {
17
4
  config;
18
5
  constructor(config) {
@@ -24,34 +11,20 @@ export class RoutingClassifier {
24
11
  }
25
12
  classify(features) {
26
13
  const lastToolCategory = features.lastAssistantToolCategory;
27
- const toolCategories = features.assistantToolCategories ?? [];
28
- const hasSearchToolCall = toolCategories.includes('search');
29
- const hasWriteToolCall = toolCategories.includes('write');
30
- const hasReadToolCall = toolCategories.includes('read');
31
- const hasOtherToolCall = toolCategories.includes('other');
32
- const hasToolCall = toolCategories.length > 0;
33
14
  const reachedLongContext = features.estimatedTokens >= (this.config.longContextThresholdTokens ?? DEFAULT_LONG_CONTEXT_THRESHOLD);
34
- const routeHint = typeof features.metadata?.routeHint === 'string'
35
- ? features.metadata.routeHint.trim().toLowerCase()
36
- : undefined;
37
- const websearchKeywordHit = containsKeywords(features.userTextSample, WEBSEARCH_HINT_KEYWORDS);
38
- const codingContinuation = hasWriteToolCall || lastToolCategory === 'write';
39
- const thinkingContinuation = hasReadToolCall || lastToolCategory === 'read';
40
- const userInputDetected = typeof features.userTextSample === 'string'
41
- ? features.userTextSample.trim().length > 0
42
- : false;
43
- const searchContinuation = features.assistantCalledWebSearchTool === true;
44
- const toolsContinuation = hasOtherToolCall ||
45
- searchContinuation ||
46
- (hasToolCall && !hasSearchToolCall && !hasWriteToolCall && !hasReadToolCall);
47
- const toolContinuationReason = hasOtherToolCall
48
- ? formatToolContinuationReason(features.lastAssistantToolName, features.lastAssistantToolDetail)
49
- : 'tools:tool-call-detected';
50
- const thinkingReason = thinkingContinuation
51
- ? 'thinking:last-tool-read'
52
- : userInputDetected
53
- ? 'thinking:user-input'
54
- : 'thinking';
15
+ const latestMessageFromUser = features.latestMessageFromUser === true;
16
+ const codingContinuation = lastToolCategory === 'write';
17
+ const thinkingContinuation = lastToolCategory === 'read';
18
+ const searchContinuation = lastToolCategory === 'search';
19
+ const toolsContinuation = lastToolCategory === 'other';
20
+ if (latestMessageFromUser) {
21
+ const reasoning = 'thinking:user-input';
22
+ const evaluations = {
23
+ thinking: { triggered: true, reason: reasoning }
24
+ };
25
+ const candidates = this.ensureDefaultCandidate(['thinking']);
26
+ return this.buildResult('thinking', reasoning, evaluations, candidates);
27
+ }
55
28
  const evaluationMap = {
56
29
  vision: {
57
30
  triggered: features.hasVisionTool && features.hasImageAttachment,
@@ -62,20 +35,20 @@ export class RoutingClassifier {
62
35
  reason: 'longcontext:token-threshold'
63
36
  },
64
37
  websearch: {
65
- triggered: routeHint === 'websearch' || websearchKeywordHit,
66
- reason: routeHint === 'websearch' ? 'websearch:route-hint' : 'websearch:keywords'
38
+ triggered: features.hasWebTool || searchContinuation,
39
+ reason: searchContinuation ? 'websearch:last-tool-search' : 'websearch:web-tools-detected'
67
40
  },
68
41
  coding: {
69
42
  triggered: codingContinuation,
70
43
  reason: 'coding:last-tool-write'
71
44
  },
72
45
  thinking: {
73
- triggered: thinkingContinuation || userInputDetected,
74
- reason: thinkingReason
46
+ triggered: thinkingContinuation || latestMessageFromUser,
47
+ reason: thinkingContinuation ? 'thinking:last-tool-read' : 'thinking:user-input'
75
48
  },
76
49
  tools: {
77
- triggered: toolsContinuation,
78
- reason: toolContinuationReason
50
+ triggered: toolsContinuation || features.hasTools || features.hasToolCallResponses,
51
+ reason: toolsContinuation ? 'tools:last-tool-other' : 'tools:tool-request-detected'
79
52
  },
80
53
  background: {
81
54
  triggered: containsKeywords(features.userTextSample, this.config.backgroundKeywords ?? []),
@@ -134,9 +107,3 @@ function containsKeywords(text, keywords) {
134
107
  const normalized = text.toLowerCase();
135
108
  return keywords.some((keyword) => normalized.includes(keyword));
136
109
  }
137
- function formatToolContinuationReason(toolName, toolDetail) {
138
- const trimmedName = toolName?.trim() || 'tool';
139
- const trimmedDetail = toolDetail?.trim();
140
- const detailText = trimmedDetail ? `${trimmedName}: ${trimmedDetail}` : trimmedName;
141
- return `tools:last-tool-other(${detailText})`;
142
- }
@@ -0,0 +1,21 @@
1
+ import { type ProviderProfile, type VirtualRouterContextRoutingConfig } from './types.js';
2
+ export interface ContextUsageSnapshot {
3
+ ratio: number;
4
+ limit: number;
5
+ }
6
+ export interface ContextAdvisorResult {
7
+ safe: string[];
8
+ risky: string[];
9
+ overflow: string[];
10
+ usage: Record<string, ContextUsageSnapshot>;
11
+ estimatedTokens: number;
12
+ allOverflow: boolean;
13
+ }
14
+ export declare class ContextAdvisor {
15
+ private warnRatio;
16
+ private hardLimit;
17
+ configure(config?: VirtualRouterContextRoutingConfig | null): void;
18
+ classify(pool: string[], estimatedTokens: number, resolveProfile: (key: string) => ProviderProfile): ContextAdvisorResult;
19
+ prefersFallback(result: ContextAdvisorResult): boolean;
20
+ allowsOverflow(): boolean;
21
+ }
@@ -0,0 +1,76 @@
1
+ import { DEFAULT_MODEL_CONTEXT_TOKENS } from './types.js';
2
+ const DEFAULT_WARN_RATIO = 0.9;
3
+ export class ContextAdvisor {
4
+ warnRatio = DEFAULT_WARN_RATIO;
5
+ hardLimit = false;
6
+ configure(config) {
7
+ if (config && typeof config.warnRatio === 'number' && Number.isFinite(config.warnRatio)) {
8
+ this.warnRatio = clampWarnRatio(config.warnRatio);
9
+ }
10
+ else {
11
+ this.warnRatio = DEFAULT_WARN_RATIO;
12
+ }
13
+ this.hardLimit = Boolean(config?.hardLimit);
14
+ }
15
+ classify(pool, estimatedTokens, resolveProfile) {
16
+ const normalizedTokens = typeof estimatedTokens === 'number' && Number.isFinite(estimatedTokens) && estimatedTokens > 0
17
+ ? estimatedTokens
18
+ : 0;
19
+ const safe = [];
20
+ const risky = [];
21
+ const overflow = [];
22
+ const usage = {};
23
+ for (const providerKey of pool) {
24
+ let limit = DEFAULT_MODEL_CONTEXT_TOKENS;
25
+ try {
26
+ const profile = resolveProfile(providerKey);
27
+ if (profile?.maxContextTokens && Number.isFinite(profile.maxContextTokens)) {
28
+ limit = profile.maxContextTokens;
29
+ }
30
+ }
31
+ catch {
32
+ limit = DEFAULT_MODEL_CONTEXT_TOKENS;
33
+ }
34
+ if (!limit || limit <= 0) {
35
+ limit = DEFAULT_MODEL_CONTEXT_TOKENS;
36
+ }
37
+ const ratio = limit > 0 ? normalizedTokens / limit : 0;
38
+ usage[providerKey] = { ratio, limit };
39
+ if (normalizedTokens === 0 || ratio < this.warnRatio) {
40
+ safe.push(providerKey);
41
+ continue;
42
+ }
43
+ if (ratio < 1) {
44
+ risky.push(providerKey);
45
+ continue;
46
+ }
47
+ overflow.push(providerKey);
48
+ }
49
+ return {
50
+ safe,
51
+ risky,
52
+ overflow,
53
+ usage,
54
+ estimatedTokens: normalizedTokens,
55
+ allOverflow: safe.length === 0 && risky.length === 0 && overflow.length > 0
56
+ };
57
+ }
58
+ prefersFallback(result) {
59
+ if (result.safe.length > 0) {
60
+ return false;
61
+ }
62
+ if (result.risky.length > 0) {
63
+ return true;
64
+ }
65
+ return result.allOverflow;
66
+ }
67
+ allowsOverflow() {
68
+ return !this.hardLimit;
69
+ }
70
+ }
71
+ function clampWarnRatio(value) {
72
+ if (!Number.isFinite(value)) {
73
+ return DEFAULT_WARN_RATIO;
74
+ }
75
+ return Math.max(0.1, Math.min(0.99, value));
76
+ }
@@ -6,12 +6,11 @@ export declare class VirtualRouterEngine {
6
6
  private readonly healthManager;
7
7
  private loadBalancer;
8
8
  private classifier;
9
+ private readonly contextAdvisor;
10
+ private contextRouting;
9
11
  private routeStats;
10
12
  private readonly debug;
11
13
  private healthConfig;
12
- private stickyPlans;
13
- private selectionHistory;
14
- private providerErrorStreaks;
15
14
  initialize(config: VirtualRouterConfig): void;
16
15
  route(request: StandardizedRequest | ProcessedRequest, metadata: RouterMetadataInput): {
17
16
  target: TargetMetadata;
@@ -28,38 +27,24 @@ export declare class VirtualRouterEngine {
28
27
  }>;
29
28
  health: import("./types.js").ProviderHealthState[];
30
29
  };
31
- private consumeSticky;
32
- private selectStickyTarget;
33
- private buildStickyClassification;
34
- private recordSelectionSnapshot;
35
- private buildStickyPlan;
36
- private storeStickyPlan;
37
- private resolveStickyDescriptor;
38
- private maybeForceStickyFromHistory;
39
- private shouldForceApplyPatchSticky;
40
- private extractPreviousRequestId;
41
- private pruneStickyPlans;
42
- private buildErrorSignature;
43
- private bumpProviderErrorStreak;
44
- private resetProviderErrorStreak;
45
30
  private validateConfig;
46
31
  private selectProvider;
47
32
  private incrementRouteStat;
48
33
  private providerHealthConfig;
34
+ private initializeRouteQueue;
35
+ private resolveFallbackRoute;
36
+ private maybeDeferToFallback;
37
+ private buildContextCandidatePools;
38
+ private describeAttempt;
49
39
  private resolveStickyKey;
50
40
  private mapProviderError;
51
41
  private deriveReason;
52
42
  private buildRouteCandidates;
53
- private ensureConfiguredClassification;
54
- private normalizeCandidateList;
55
- private normalizeRouteName;
56
- private isRouteConfigured;
57
43
  private sortByPriority;
58
44
  private routeWeight;
59
45
  private buildHitReason;
60
- private formatToolIdentifier;
61
- private decorateReason;
62
- private buildVirtualRouterHitLog;
63
- private colorizeVirtualRouterLog;
64
- private shouldColorVirtualRouterLogs;
46
+ private decorateWithDetail;
47
+ private formatVirtualRouterHit;
48
+ private resolveRouteColor;
49
+ private describeContextUsage;
65
50
  }