@jsonstudio/llms 0.6.141 → 0.6.187
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/conversion/codecs/gemini-openai-codec.js +15 -1
- package/dist/conversion/compat/actions/auto-thinking.d.ts +6 -0
- package/dist/conversion/compat/actions/auto-thinking.js +25 -0
- package/dist/conversion/compat/actions/field-mapping.d.ts +14 -0
- package/dist/conversion/compat/actions/field-mapping.js +155 -0
- package/dist/conversion/compat/actions/qwen-transform.d.ts +3 -0
- package/dist/conversion/compat/actions/qwen-transform.js +209 -0
- package/dist/conversion/compat/actions/request-rules.d.ts +24 -0
- package/dist/conversion/compat/actions/request-rules.js +63 -0
- package/dist/conversion/compat/actions/response-blacklist.d.ts +14 -0
- package/dist/conversion/compat/actions/response-blacklist.js +85 -0
- package/dist/conversion/compat/actions/response-normalize.d.ts +5 -0
- package/dist/conversion/compat/actions/response-normalize.js +121 -0
- package/dist/conversion/compat/actions/response-validate.d.ts +5 -0
- package/dist/conversion/compat/actions/response-validate.js +76 -0
- package/dist/conversion/compat/actions/snapshot.d.ts +8 -0
- package/dist/conversion/compat/actions/snapshot.js +21 -0
- package/dist/conversion/compat/actions/tool-schema.d.ts +6 -0
- package/dist/conversion/compat/actions/tool-schema.js +91 -0
- package/dist/conversion/compat/actions/universal-shape-filter.d.ts +74 -0
- package/dist/conversion/compat/actions/universal-shape-filter.js +382 -0
- package/dist/conversion/compat/profiles/chat-glm.json +187 -13
- package/dist/conversion/compat/profiles/chat-iflow.json +177 -9
- package/dist/conversion/compat/profiles/chat-lmstudio.json +10 -2
- package/dist/conversion/compat/profiles/chat-qwen.json +14 -10
- package/dist/conversion/hub/pipeline/compat/compat-engine.d.ts +7 -2
- package/dist/conversion/hub/pipeline/compat/compat-engine.js +409 -5
- package/dist/conversion/hub/pipeline/compat/compat-types.d.ts +47 -0
- package/dist/conversion/hub/pipeline/hub-pipeline.d.ts +2 -0
- package/dist/conversion/hub/pipeline/hub-pipeline.js +35 -1
- package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage3_compat/index.js +2 -2
- package/dist/conversion/hub/pipeline/target-utils.js +3 -0
- package/dist/conversion/hub/response/response-runtime.js +19 -2
- package/dist/conversion/responses/responses-host-policy.d.ts +6 -0
- package/dist/conversion/responses/responses-host-policy.js +14 -0
- package/dist/conversion/responses/responses-openai-bridge.js +51 -2
- package/dist/conversion/shared/anthropic-message-utils.js +6 -0
- package/dist/conversion/shared/responses-conversation-store.js +3 -26
- package/dist/conversion/shared/responses-reasoning-registry.d.ts +4 -0
- package/dist/conversion/shared/responses-reasoning-registry.js +62 -1
- package/dist/conversion/shared/responses-response-utils.js +23 -1
- package/dist/conversion/shared/tool-canonicalizer.d.ts +2 -0
- package/dist/conversion/shared/tool-filter-pipeline.js +11 -0
- package/dist/router/virtual-router/bootstrap.js +218 -39
- package/dist/router/virtual-router/classifier.js +19 -52
- package/dist/router/virtual-router/context-advisor.d.ts +21 -0
- package/dist/router/virtual-router/context-advisor.js +76 -0
- package/dist/router/virtual-router/engine.d.ts +11 -26
- package/dist/router/virtual-router/engine.js +191 -386
- package/dist/router/virtual-router/features.js +24 -621
- package/dist/router/virtual-router/health-manager.js +2 -7
- package/dist/router/virtual-router/message-utils.d.ts +7 -0
- package/dist/router/virtual-router/message-utils.js +66 -0
- package/dist/router/virtual-router/provider-registry.js +6 -2
- package/dist/router/virtual-router/token-estimator.d.ts +2 -0
- package/dist/router/virtual-router/token-estimator.js +16 -0
- package/dist/router/virtual-router/tool-signals.d.ts +13 -0
- package/dist/router/virtual-router/tool-signals.js +403 -0
- package/dist/router/virtual-router/types.d.ts +21 -7
- package/dist/router/virtual-router/types.js +1 -0
- package/package.json +2 -2
|
@@ -3,24 +3,19 @@ import { ProviderRegistry } from './provider-registry.js';
|
|
|
3
3
|
import { RouteLoadBalancer } from './load-balancer.js';
|
|
4
4
|
import { RoutingClassifier } from './classifier.js';
|
|
5
5
|
import { buildRoutingFeatures } from './features.js';
|
|
6
|
-
import {
|
|
7
|
-
|
|
8
|
-
const ANSI_RESET = '\x1b[0m';
|
|
9
|
-
const STICKY_PLAN_TTL_MS = 30 * 60 * 1000;
|
|
10
|
-
const ERROR_STREAK_TTL_MS = 10 * 60 * 1000;
|
|
11
|
-
const ERROR_STREAK_THRESHOLD = 4;
|
|
6
|
+
import { ContextAdvisor } from './context-advisor.js';
|
|
7
|
+
import { DEFAULT_MODEL_CONTEXT_TOKENS, DEFAULT_ROUTE, ROUTE_PRIORITY, VirtualRouterError, VirtualRouterErrorCode } from './types.js';
|
|
12
8
|
export class VirtualRouterEngine {
|
|
13
9
|
routing = {};
|
|
14
10
|
providerRegistry = new ProviderRegistry();
|
|
15
11
|
healthManager = new ProviderHealthManager();
|
|
16
12
|
loadBalancer = new RouteLoadBalancer();
|
|
17
13
|
classifier = new RoutingClassifier({});
|
|
14
|
+
contextAdvisor = new ContextAdvisor();
|
|
15
|
+
contextRouting;
|
|
18
16
|
routeStats = new Map();
|
|
19
17
|
debug = console; // thin hook; host may monkey-patch for colored logging
|
|
20
18
|
healthConfig = null;
|
|
21
|
-
stickyPlans = new Map();
|
|
22
|
-
selectionHistory = new Map();
|
|
23
|
-
providerErrorStreaks = new Map();
|
|
24
19
|
initialize(config) {
|
|
25
20
|
this.validateConfig(config);
|
|
26
21
|
this.routing = config.routing;
|
|
@@ -30,6 +25,8 @@ export class VirtualRouterEngine {
|
|
|
30
25
|
this.healthManager.registerProviders(Object.keys(config.providers));
|
|
31
26
|
this.loadBalancer = new RouteLoadBalancer(config.loadBalancing);
|
|
32
27
|
this.classifier = new RoutingClassifier(config.classifier);
|
|
28
|
+
this.contextRouting = config.contextRouting ?? { warnRatio: 0.9, hardLimit: false };
|
|
29
|
+
this.contextAdvisor.configure(this.contextRouting);
|
|
33
30
|
this.routeStats = new Map();
|
|
34
31
|
for (const routeName of Object.keys(this.routing)) {
|
|
35
32
|
this.routeStats.set(routeName, { hits: 0 });
|
|
@@ -37,42 +34,21 @@ export class VirtualRouterEngine {
|
|
|
37
34
|
}
|
|
38
35
|
route(request, metadata) {
|
|
39
36
|
const features = buildRoutingFeatures(request, metadata);
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
if (stickyActivation) {
|
|
44
|
-
selection = this.selectStickyTarget(stickyActivation, metadata);
|
|
45
|
-
if (selection) {
|
|
46
|
-
classification = this.buildStickyClassification(stickyActivation);
|
|
47
|
-
}
|
|
48
|
-
else {
|
|
49
|
-
stickyActivation = null;
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
if (!selection || !classification) {
|
|
53
|
-
classification = this.classifier.classify(features);
|
|
54
|
-
classification = this.ensureConfiguredClassification(classification);
|
|
55
|
-
const routeName = classification.routeName || DEFAULT_ROUTE;
|
|
56
|
-
selection = this.selectProvider(routeName, metadata, classification);
|
|
57
|
-
}
|
|
58
|
-
if (!selection || !classification) {
|
|
59
|
-
throw new VirtualRouterError('Virtual router failed to select provider', VirtualRouterErrorCode.ROUTE_NOT_FOUND);
|
|
60
|
-
}
|
|
37
|
+
const classification = this.classifier.classify(features);
|
|
38
|
+
const routeName = classification.routeName || DEFAULT_ROUTE;
|
|
39
|
+
const selection = this.selectProvider(routeName, metadata, classification, features);
|
|
61
40
|
const target = this.providerRegistry.buildTarget(selection.providerKey);
|
|
62
41
|
this.healthManager.recordSuccess(selection.providerKey);
|
|
63
|
-
this.resetProviderErrorStreak(selection.providerKey);
|
|
64
42
|
this.incrementRouteStat(selection.routeUsed, selection.providerKey);
|
|
65
|
-
const
|
|
66
|
-
this.
|
|
67
|
-
if (
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
const
|
|
74
|
-
this.debug?.log?.(hitLog);
|
|
75
|
-
const didFallback = selection.routeUsed !== (classification.routeName || DEFAULT_ROUTE) || classification.fallback;
|
|
43
|
+
const hitReason = this.buildHitReason(selection.routeUsed, selection.providerKey, classification, features);
|
|
44
|
+
const formatted = this.formatVirtualRouterHit(selection.routeUsed, selection.providerKey, target.modelId || '', hitReason);
|
|
45
|
+
if (formatted) {
|
|
46
|
+
this.debug?.log?.(formatted);
|
|
47
|
+
}
|
|
48
|
+
else {
|
|
49
|
+
this.debug?.log?.('[virtual-router-hit]', selection.routeUsed, selection.providerKey, target.modelId || '', hitReason ? `reason=${hitReason}` : '');
|
|
50
|
+
}
|
|
51
|
+
const didFallback = selection.routeUsed !== routeName || classification.fallback;
|
|
76
52
|
return {
|
|
77
53
|
target,
|
|
78
54
|
decision: {
|
|
@@ -129,199 +105,6 @@ export class VirtualRouterEngine {
|
|
|
129
105
|
health: this.healthManager.getSnapshot()
|
|
130
106
|
};
|
|
131
107
|
}
|
|
132
|
-
consumeSticky(metadata, features) {
|
|
133
|
-
const prevId = this.extractPreviousRequestId(metadata);
|
|
134
|
-
if (!prevId) {
|
|
135
|
-
return null;
|
|
136
|
-
}
|
|
137
|
-
this.pruneStickyPlans();
|
|
138
|
-
const planned = this.stickyPlans.get(prevId);
|
|
139
|
-
if (planned) {
|
|
140
|
-
this.stickyPlans.delete(prevId);
|
|
141
|
-
const activation = {
|
|
142
|
-
...planned,
|
|
143
|
-
sourceRequestId: prevId,
|
|
144
|
-
mode: 'planned'
|
|
145
|
-
};
|
|
146
|
-
if (planned.remainingRounds > 1 && metadata.requestId) {
|
|
147
|
-
this.stickyPlans.set(metadata.requestId, {
|
|
148
|
-
...planned,
|
|
149
|
-
remainingRounds: planned.remainingRounds - 1,
|
|
150
|
-
createdAt: Date.now()
|
|
151
|
-
});
|
|
152
|
-
}
|
|
153
|
-
return activation;
|
|
154
|
-
}
|
|
155
|
-
return this.maybeForceStickyFromHistory(prevId, features);
|
|
156
|
-
}
|
|
157
|
-
selectStickyTarget(sticky, metadata) {
|
|
158
|
-
if (sticky.strategy === 'target' && sticky.providerKey) {
|
|
159
|
-
if (!this.healthManager.isAvailable(sticky.providerKey)) {
|
|
160
|
-
return null;
|
|
161
|
-
}
|
|
162
|
-
const pool = this.routing[sticky.routeName] ?? [];
|
|
163
|
-
return { providerKey: sticky.providerKey, routeUsed: sticky.routeName, pool };
|
|
164
|
-
}
|
|
165
|
-
const pool = this.routing[sticky.routeName];
|
|
166
|
-
if (!Array.isArray(pool) || pool.length === 0) {
|
|
167
|
-
return null;
|
|
168
|
-
}
|
|
169
|
-
const stub = {
|
|
170
|
-
routeName: sticky.routeName,
|
|
171
|
-
confidence: 1,
|
|
172
|
-
reasoning: `sticky:${sticky.reason}`,
|
|
173
|
-
fallback: false,
|
|
174
|
-
candidates: [sticky.routeName]
|
|
175
|
-
};
|
|
176
|
-
return this.selectProvider(sticky.routeName, metadata, stub);
|
|
177
|
-
}
|
|
178
|
-
buildStickyClassification(sticky) {
|
|
179
|
-
return {
|
|
180
|
-
routeName: sticky.routeName,
|
|
181
|
-
confidence: 1,
|
|
182
|
-
reasoning: `sticky:${sticky.reason}`,
|
|
183
|
-
fallback: false,
|
|
184
|
-
candidates: [sticky.routeName]
|
|
185
|
-
};
|
|
186
|
-
}
|
|
187
|
-
recordSelectionSnapshot(requestId, routeName, providerKey, modelId) {
|
|
188
|
-
if (!requestId || !providerKey) {
|
|
189
|
-
return;
|
|
190
|
-
}
|
|
191
|
-
this.selectionHistory.set(requestId, {
|
|
192
|
-
routeName,
|
|
193
|
-
providerKey,
|
|
194
|
-
modelId,
|
|
195
|
-
createdAt: Date.now()
|
|
196
|
-
});
|
|
197
|
-
this.pruneStickyPlans();
|
|
198
|
-
}
|
|
199
|
-
buildStickyPlan(features, selection, target) {
|
|
200
|
-
const descriptor = this.resolveStickyDescriptor(selection.routeUsed, features);
|
|
201
|
-
if (!descriptor || descriptor.rounds <= 0) {
|
|
202
|
-
return null;
|
|
203
|
-
}
|
|
204
|
-
const plan = {
|
|
205
|
-
routeName: selection.routeUsed,
|
|
206
|
-
strategy: descriptor.strategy,
|
|
207
|
-
providerKey: descriptor.strategy === 'target' ? selection.providerKey : undefined,
|
|
208
|
-
modelId: descriptor.strategy === 'target' ? target.modelId : undefined,
|
|
209
|
-
remainingRounds: descriptor.rounds,
|
|
210
|
-
totalRounds: descriptor.rounds,
|
|
211
|
-
reason: descriptor.reason,
|
|
212
|
-
createdAt: Date.now()
|
|
213
|
-
};
|
|
214
|
-
return plan;
|
|
215
|
-
}
|
|
216
|
-
storeStickyPlan(requestId, plan) {
|
|
217
|
-
if (!requestId) {
|
|
218
|
-
return;
|
|
219
|
-
}
|
|
220
|
-
this.pruneStickyPlans();
|
|
221
|
-
if (plan && plan.remainingRounds > 0) {
|
|
222
|
-
this.stickyPlans.set(requestId, plan);
|
|
223
|
-
}
|
|
224
|
-
else {
|
|
225
|
-
this.stickyPlans.delete(requestId);
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
resolveStickyDescriptor(routeName, features) {
|
|
229
|
-
if (this.shouldForceApplyPatchSticky(features)) {
|
|
230
|
-
return { strategy: 'target', rounds: 1, reason: 'apply_patch' };
|
|
231
|
-
}
|
|
232
|
-
if (routeName === 'coding' || routeName === 'thinking') {
|
|
233
|
-
return { strategy: 'pool', rounds: 3, reason: routeName };
|
|
234
|
-
}
|
|
235
|
-
if (routeName === 'tools') {
|
|
236
|
-
return { strategy: 'pool', rounds: 0, reason: routeName };
|
|
237
|
-
}
|
|
238
|
-
if (routeName === DEFAULT_ROUTE || !routeName) {
|
|
239
|
-
return null;
|
|
240
|
-
}
|
|
241
|
-
return { strategy: 'pool', rounds: 1, reason: routeName };
|
|
242
|
-
}
|
|
243
|
-
maybeForceStickyFromHistory(prevId, features) {
|
|
244
|
-
if (!this.shouldForceApplyPatchSticky(features)) {
|
|
245
|
-
return null;
|
|
246
|
-
}
|
|
247
|
-
const snapshot = this.selectionHistory.get(prevId);
|
|
248
|
-
if (!snapshot) {
|
|
249
|
-
return null;
|
|
250
|
-
}
|
|
251
|
-
if (!this.healthManager.isAvailable(snapshot.providerKey)) {
|
|
252
|
-
return null;
|
|
253
|
-
}
|
|
254
|
-
return {
|
|
255
|
-
routeName: snapshot.routeName,
|
|
256
|
-
providerKey: snapshot.providerKey,
|
|
257
|
-
modelId: snapshot.modelId,
|
|
258
|
-
strategy: 'target',
|
|
259
|
-
remainingRounds: 0,
|
|
260
|
-
totalRounds: 1,
|
|
261
|
-
reason: 'apply_patch',
|
|
262
|
-
createdAt: Date.now(),
|
|
263
|
-
sourceRequestId: prevId,
|
|
264
|
-
mode: 'forced'
|
|
265
|
-
};
|
|
266
|
-
}
|
|
267
|
-
shouldForceApplyPatchSticky(features) {
|
|
268
|
-
const name = (features.lastAssistantToolName || '').toLowerCase();
|
|
269
|
-
if (name === 'apply_patch') {
|
|
270
|
-
return true;
|
|
271
|
-
}
|
|
272
|
-
const detail = (features.lastAssistantToolDetail || '').toLowerCase();
|
|
273
|
-
if (detail.includes('apply_patch')) {
|
|
274
|
-
return true;
|
|
275
|
-
}
|
|
276
|
-
return false;
|
|
277
|
-
}
|
|
278
|
-
extractPreviousRequestId(metadata) {
|
|
279
|
-
const resume = metadata.responsesResume;
|
|
280
|
-
if (resume && typeof resume.previousRequestId === 'string' && resume.previousRequestId.trim()) {
|
|
281
|
-
return resume.previousRequestId.trim();
|
|
282
|
-
}
|
|
283
|
-
return undefined;
|
|
284
|
-
}
|
|
285
|
-
pruneStickyPlans() {
|
|
286
|
-
const cutoff = Date.now() - STICKY_PLAN_TTL_MS;
|
|
287
|
-
for (const [key, plan] of this.stickyPlans.entries()) {
|
|
288
|
-
if (!plan || plan.createdAt < cutoff) {
|
|
289
|
-
this.stickyPlans.delete(key);
|
|
290
|
-
}
|
|
291
|
-
}
|
|
292
|
-
for (const [key, snapshot] of this.selectionHistory.entries()) {
|
|
293
|
-
if (!snapshot || snapshot.createdAt < cutoff) {
|
|
294
|
-
this.selectionHistory.delete(key);
|
|
295
|
-
}
|
|
296
|
-
}
|
|
297
|
-
}
|
|
298
|
-
buildErrorSignature(code, statusCode, message) {
|
|
299
|
-
const normalizedMessage = typeof message === 'string'
|
|
300
|
-
? message.trim().toLowerCase().replace(/\s+/g, ' ').slice(0, 120)
|
|
301
|
-
: '';
|
|
302
|
-
const codeToken = code?.toUpperCase() || 'ERR_UNKNOWN';
|
|
303
|
-
const statusToken = typeof statusCode === 'number' ? String(statusCode) : 'NA';
|
|
304
|
-
return `${statusToken}|${codeToken}|${normalizedMessage}`;
|
|
305
|
-
}
|
|
306
|
-
bumpProviderErrorStreak(providerKey, signature) {
|
|
307
|
-
if (!providerKey || !signature) {
|
|
308
|
-
return 0;
|
|
309
|
-
}
|
|
310
|
-
const now = Date.now();
|
|
311
|
-
const entry = this.providerErrorStreaks.get(providerKey);
|
|
312
|
-
if (!entry || entry.signature !== signature || now - entry.lastAt > ERROR_STREAK_TTL_MS) {
|
|
313
|
-
this.providerErrorStreaks.set(providerKey, { signature, count: 1, lastAt: now });
|
|
314
|
-
return 1;
|
|
315
|
-
}
|
|
316
|
-
const next = { signature, count: entry.count + 1, lastAt: now };
|
|
317
|
-
this.providerErrorStreaks.set(providerKey, next);
|
|
318
|
-
return next.count;
|
|
319
|
-
}
|
|
320
|
-
resetProviderErrorStreak(providerKey) {
|
|
321
|
-
if (providerKey) {
|
|
322
|
-
this.providerErrorStreaks.delete(providerKey);
|
|
323
|
-
}
|
|
324
|
-
}
|
|
325
108
|
validateConfig(config) {
|
|
326
109
|
if (!config.routing || typeof config.routing !== 'object') {
|
|
327
110
|
throw new VirtualRouterError('routing configuration is required', VirtualRouterErrorCode.CONFIG_ERROR);
|
|
@@ -348,30 +131,47 @@ export class VirtualRouterEngine {
|
|
|
348
131
|
}
|
|
349
132
|
}
|
|
350
133
|
}
|
|
351
|
-
selectProvider(requestedRoute, metadata, classification) {
|
|
352
|
-
const
|
|
353
|
-
const candidates = this.buildRouteCandidates(normalizedRoute, classification.candidates);
|
|
134
|
+
selectProvider(requestedRoute, metadata, classification, features) {
|
|
135
|
+
const candidates = this.buildRouteCandidates(requestedRoute, classification.candidates);
|
|
354
136
|
const stickyKey = this.resolveStickyKey(metadata);
|
|
355
137
|
const attempted = [];
|
|
356
|
-
|
|
138
|
+
const visitedRoutes = new Set();
|
|
139
|
+
const fallbackRoute = this.resolveFallbackRoute();
|
|
140
|
+
const routeQueue = this.initializeRouteQueue(candidates, fallbackRoute);
|
|
141
|
+
const estimatedTokens = typeof features.estimatedTokens === 'number' && Number.isFinite(features.estimatedTokens)
|
|
142
|
+
? Math.max(0, features.estimatedTokens)
|
|
143
|
+
: 0;
|
|
144
|
+
while (routeQueue.length) {
|
|
145
|
+
const routeName = routeQueue.shift();
|
|
146
|
+
if (visitedRoutes.has(routeName)) {
|
|
147
|
+
continue;
|
|
148
|
+
}
|
|
357
149
|
const pool = this.routing[routeName];
|
|
358
150
|
if (!Array.isArray(pool) || pool.length === 0) {
|
|
151
|
+
visitedRoutes.add(routeName);
|
|
359
152
|
attempted.push(routeName);
|
|
360
153
|
continue;
|
|
361
154
|
}
|
|
362
|
-
const
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
155
|
+
const contextResult = this.contextAdvisor.classify(pool, estimatedTokens, (key) => this.providerRegistry.get(key));
|
|
156
|
+
if (this.maybeDeferToFallback(routeName, contextResult, routeQueue, visitedRoutes, fallbackRoute)) {
|
|
157
|
+
continue;
|
|
158
|
+
}
|
|
159
|
+
visitedRoutes.add(routeName);
|
|
160
|
+
const prioritizedPools = this.buildContextCandidatePools(contextResult);
|
|
161
|
+
for (const candidatePool of prioritizedPools) {
|
|
162
|
+
const providerKey = this.loadBalancer.select({
|
|
163
|
+
routeName,
|
|
164
|
+
candidates: candidatePool,
|
|
165
|
+
stickyKey,
|
|
166
|
+
availabilityCheck: (key) => this.healthManager.isAvailable(key)
|
|
167
|
+
});
|
|
168
|
+
if (providerKey) {
|
|
169
|
+
return { providerKey, routeUsed: routeName, pool };
|
|
170
|
+
}
|
|
370
171
|
}
|
|
371
|
-
attempted.push(routeName);
|
|
172
|
+
attempted.push(this.describeAttempt(routeName, contextResult));
|
|
372
173
|
}
|
|
373
|
-
|
|
374
|
-
throw new VirtualRouterError(`All providers unavailable for route ${failureRoute}`, VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, { routeName: failureRoute, attempted });
|
|
174
|
+
throw new VirtualRouterError(`All providers unavailable for route ${requestedRoute}`, VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, { routeName: requestedRoute, attempted });
|
|
375
175
|
}
|
|
376
176
|
incrementRouteStat(routeName, providerKey) {
|
|
377
177
|
if (!this.routeStats.has(routeName)) {
|
|
@@ -385,6 +185,64 @@ export class VirtualRouterEngine {
|
|
|
385
185
|
providerHealthConfig() {
|
|
386
186
|
return this.healthManager.getConfig();
|
|
387
187
|
}
|
|
188
|
+
initializeRouteQueue(candidates, fallbackRoute) {
|
|
189
|
+
const queue = Array.from(new Set(candidates));
|
|
190
|
+
if (fallbackRoute && !queue.includes(fallbackRoute)) {
|
|
191
|
+
queue.push(fallbackRoute);
|
|
192
|
+
}
|
|
193
|
+
return queue;
|
|
194
|
+
}
|
|
195
|
+
resolveFallbackRoute() {
|
|
196
|
+
const candidate = this.contextRouting?.fallbackRoute;
|
|
197
|
+
if (!candidate) {
|
|
198
|
+
return undefined;
|
|
199
|
+
}
|
|
200
|
+
const pool = this.routing[candidate];
|
|
201
|
+
if (!Array.isArray(pool) || pool.length === 0) {
|
|
202
|
+
return undefined;
|
|
203
|
+
}
|
|
204
|
+
return candidate;
|
|
205
|
+
}
|
|
206
|
+
maybeDeferToFallback(routeName, contextResult, queue, visited, fallbackRoute) {
|
|
207
|
+
if (!fallbackRoute || fallbackRoute === routeName || visited.has(fallbackRoute)) {
|
|
208
|
+
return false;
|
|
209
|
+
}
|
|
210
|
+
if (!this.contextAdvisor.prefersFallback(contextResult)) {
|
|
211
|
+
return false;
|
|
212
|
+
}
|
|
213
|
+
const fallbackPool = this.routing[fallbackRoute];
|
|
214
|
+
if (!Array.isArray(fallbackPool) || fallbackPool.length === 0) {
|
|
215
|
+
return false;
|
|
216
|
+
}
|
|
217
|
+
queue.unshift(routeName);
|
|
218
|
+
queue.unshift(fallbackRoute);
|
|
219
|
+
return true;
|
|
220
|
+
}
|
|
221
|
+
buildContextCandidatePools(result) {
|
|
222
|
+
const ordered = [];
|
|
223
|
+
if (result.safe.length) {
|
|
224
|
+
ordered.push(result.safe);
|
|
225
|
+
}
|
|
226
|
+
if (result.risky.length) {
|
|
227
|
+
ordered.push(result.risky);
|
|
228
|
+
}
|
|
229
|
+
if (result.overflow.length && this.contextAdvisor.allowsOverflow()) {
|
|
230
|
+
ordered.push(result.overflow);
|
|
231
|
+
}
|
|
232
|
+
return ordered;
|
|
233
|
+
}
|
|
234
|
+
describeAttempt(routeName, result) {
|
|
235
|
+
if (result.safe.length > 0) {
|
|
236
|
+
return `${routeName}:health`;
|
|
237
|
+
}
|
|
238
|
+
if (result.risky.length > 0) {
|
|
239
|
+
return `${routeName}:context_risky`;
|
|
240
|
+
}
|
|
241
|
+
if (result.overflow.length > 0) {
|
|
242
|
+
return `${routeName}:context_overflow`;
|
|
243
|
+
}
|
|
244
|
+
return routeName;
|
|
245
|
+
}
|
|
388
246
|
resolveStickyKey(metadata) {
|
|
389
247
|
const resume = metadata.responsesResume;
|
|
390
248
|
if (resume && typeof resume.previousRequestId === 'string' && resume.previousRequestId.trim()) {
|
|
@@ -437,13 +295,6 @@ export class VirtualRouterEngine {
|
|
|
437
295
|
cooldownOverrideMs = Math.max(10 * 60_000, this.providerHealthConfig().fatalCooldownMs ?? 10 * 60_000);
|
|
438
296
|
reason = 'compatibility';
|
|
439
297
|
}
|
|
440
|
-
const signature = this.buildErrorSignature(code, statusCode, event.message);
|
|
441
|
-
const streak = this.bumpProviderErrorStreak(providerKey, signature);
|
|
442
|
-
if (streak >= ERROR_STREAK_THRESHOLD) {
|
|
443
|
-
fatal = true;
|
|
444
|
-
reason = reason === 'unknown' ? 'repeated_error' : `${reason}|repeated`;
|
|
445
|
-
cooldownOverrideMs = Math.max(this.providerHealthConfig().fatalCooldownMs ?? 5 * 60_000, 5 * 60_000);
|
|
446
|
-
}
|
|
447
298
|
return {
|
|
448
299
|
providerKey,
|
|
449
300
|
routeName,
|
|
@@ -507,59 +358,6 @@ export class VirtualRouterEngine {
|
|
|
507
358
|
}
|
|
508
359
|
return filtered.length ? filtered : [DEFAULT_ROUTE];
|
|
509
360
|
}
|
|
510
|
-
ensureConfiguredClassification(classification) {
|
|
511
|
-
const normalizedRoute = this.normalizeRouteName(classification.routeName);
|
|
512
|
-
const normalizedCandidates = this.normalizeCandidateList(normalizedRoute, classification.candidates);
|
|
513
|
-
const fallback = normalizedRoute === DEFAULT_ROUTE ? true : classification.fallback;
|
|
514
|
-
return {
|
|
515
|
-
...classification,
|
|
516
|
-
routeName: normalizedRoute,
|
|
517
|
-
fallback,
|
|
518
|
-
candidates: normalizedCandidates
|
|
519
|
-
};
|
|
520
|
-
}
|
|
521
|
-
normalizeCandidateList(primaryRoute, rawCandidates) {
|
|
522
|
-
const base = rawCandidates && rawCandidates.length ? rawCandidates : [primaryRoute];
|
|
523
|
-
const deduped = [];
|
|
524
|
-
for (const routeName of base) {
|
|
525
|
-
if (!routeName) {
|
|
526
|
-
continue;
|
|
527
|
-
}
|
|
528
|
-
if (!this.isRouteConfigured(routeName)) {
|
|
529
|
-
continue;
|
|
530
|
-
}
|
|
531
|
-
if (!deduped.includes(routeName)) {
|
|
532
|
-
deduped.push(routeName);
|
|
533
|
-
}
|
|
534
|
-
}
|
|
535
|
-
if (!deduped.includes(primaryRoute) && this.isRouteConfigured(primaryRoute)) {
|
|
536
|
-
deduped.push(primaryRoute);
|
|
537
|
-
}
|
|
538
|
-
if (!deduped.includes(DEFAULT_ROUTE) && this.isRouteConfigured(DEFAULT_ROUTE)) {
|
|
539
|
-
deduped.push(DEFAULT_ROUTE);
|
|
540
|
-
}
|
|
541
|
-
if (!deduped.length && this.isRouteConfigured(DEFAULT_ROUTE)) {
|
|
542
|
-
deduped.push(DEFAULT_ROUTE);
|
|
543
|
-
}
|
|
544
|
-
return this.sortByPriority(deduped);
|
|
545
|
-
}
|
|
546
|
-
normalizeRouteName(routeName) {
|
|
547
|
-
if (routeName && this.isRouteConfigured(routeName)) {
|
|
548
|
-
return routeName;
|
|
549
|
-
}
|
|
550
|
-
if (this.isRouteConfigured(DEFAULT_ROUTE)) {
|
|
551
|
-
return DEFAULT_ROUTE;
|
|
552
|
-
}
|
|
553
|
-
const firstConfigured = Object.keys(this.routing).find((key) => this.isRouteConfigured(key));
|
|
554
|
-
return firstConfigured || DEFAULT_ROUTE;
|
|
555
|
-
}
|
|
556
|
-
isRouteConfigured(routeName) {
|
|
557
|
-
if (!routeName) {
|
|
558
|
-
return false;
|
|
559
|
-
}
|
|
560
|
-
const pool = this.routing[routeName];
|
|
561
|
-
return Array.isArray(pool) && pool.length > 0;
|
|
562
|
-
}
|
|
563
361
|
sortByPriority(routeNames) {
|
|
564
362
|
return [...routeNames].sort((a, b) => this.routeWeight(a) - this.routeWeight(b));
|
|
565
363
|
}
|
|
@@ -567,89 +365,96 @@ export class VirtualRouterEngine {
|
|
|
567
365
|
const idx = ROUTE_PRIORITY.indexOf(routeName);
|
|
568
366
|
return idx >= 0 ? idx : ROUTE_PRIORITY.length;
|
|
569
367
|
}
|
|
570
|
-
buildHitReason(routeUsed, classification, features
|
|
368
|
+
buildHitReason(routeUsed, providerKey, classification, features) {
|
|
571
369
|
const reasoning = classification.reasoning || '';
|
|
572
370
|
const primary = reasoning.split('|')[0] || '';
|
|
573
|
-
const
|
|
574
|
-
const
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
return
|
|
594
|
-
}
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
}
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
371
|
+
const commandDetail = features.lastAssistantToolLabel;
|
|
372
|
+
const base = (() => {
|
|
373
|
+
if (routeUsed === 'tools') {
|
|
374
|
+
return this.decorateWithDetail(primary || 'tools', primary, commandDetail);
|
|
375
|
+
}
|
|
376
|
+
if (routeUsed === 'thinking') {
|
|
377
|
+
return this.decorateWithDetail(primary || 'thinking', primary, commandDetail);
|
|
378
|
+
}
|
|
379
|
+
if (routeUsed === 'coding') {
|
|
380
|
+
return this.decorateWithDetail(primary || 'coding', primary, commandDetail);
|
|
381
|
+
}
|
|
382
|
+
if (routeUsed === 'websearch') {
|
|
383
|
+
return this.decorateWithDetail(primary || 'websearch', primary, commandDetail);
|
|
384
|
+
}
|
|
385
|
+
if (routeUsed === DEFAULT_ROUTE && classification.fallback) {
|
|
386
|
+
return primary || 'fallback:default';
|
|
387
|
+
}
|
|
388
|
+
if (primary) {
|
|
389
|
+
return primary;
|
|
390
|
+
}
|
|
391
|
+
return routeUsed ? `route:${routeUsed}` : 'route:unknown';
|
|
392
|
+
})();
|
|
393
|
+
const contextDetail = this.describeContextUsage(providerKey, features.estimatedTokens);
|
|
394
|
+
if (contextDetail) {
|
|
395
|
+
return `${base}|context:${contextDetail}`;
|
|
396
|
+
}
|
|
397
|
+
return base;
|
|
398
|
+
}
|
|
399
|
+
decorateWithDetail(baseLabel, primaryReason, detail) {
|
|
400
|
+
const normalizedDetail = detail && detail.trim();
|
|
401
|
+
if (!normalizedDetail) {
|
|
402
|
+
return primaryReason || baseLabel;
|
|
403
|
+
}
|
|
404
|
+
if (primaryReason) {
|
|
405
|
+
return `${primaryReason}(${normalizedDetail})`;
|
|
406
|
+
}
|
|
407
|
+
return `${baseLabel}(${normalizedDetail})`;
|
|
408
|
+
}
|
|
409
|
+
formatVirtualRouterHit(routeName, providerKey, modelId, hitReason) {
|
|
410
|
+
try {
|
|
411
|
+
const prefixColor = '\x1b[38;5;208m';
|
|
412
|
+
const reset = '\x1b[0m';
|
|
413
|
+
const routeColor = this.resolveRouteColor(routeName);
|
|
414
|
+
const prefix = `${prefixColor}[virtual-router-hit]${reset}`;
|
|
415
|
+
const targetLabel = `${routeName} -> ${providerKey}${modelId ? '.' + modelId : ''}`;
|
|
416
|
+
const reasonLabel = hitReason ? ` reason=${hitReason}` : '';
|
|
417
|
+
return `${prefix} ${routeColor}${targetLabel}${reasonLabel}${reset}`;
|
|
418
|
+
}
|
|
419
|
+
catch {
|
|
420
|
+
return `[virtual-router-hit] ${routeName} -> ${providerKey}${modelId ? '.' + modelId : ''}${hitReason ? ` reason=${hitReason}` : ''}`;
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
resolveRouteColor(routeName) {
|
|
424
|
+
const reset = '\x1b[0m';
|
|
425
|
+
const map = {
|
|
426
|
+
tools: '\x1b[38;5;214m',
|
|
427
|
+
thinking: '\x1b[34m',
|
|
428
|
+
coding: '\x1b[35m',
|
|
429
|
+
longcontext: '\x1b[38;5;141m',
|
|
430
|
+
websearch: '\x1b[32m',
|
|
431
|
+
vision: '\x1b[38;5;207m',
|
|
432
|
+
background: '\x1b[90m'
|
|
433
|
+
};
|
|
434
|
+
return map[routeName] ?? '\x1b[36m';
|
|
630
435
|
}
|
|
631
|
-
|
|
632
|
-
if (!
|
|
633
|
-
return
|
|
436
|
+
describeContextUsage(providerKey, estimatedTokens) {
|
|
437
|
+
if (typeof estimatedTokens !== 'number' || !Number.isFinite(estimatedTokens) || estimatedTokens <= 0) {
|
|
438
|
+
return undefined;
|
|
634
439
|
}
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
440
|
+
let limit = DEFAULT_MODEL_CONTEXT_TOKENS;
|
|
441
|
+
try {
|
|
442
|
+
const profile = this.providerRegistry.get(providerKey);
|
|
443
|
+
if (profile?.maxContextTokens && Number.isFinite(profile.maxContextTokens)) {
|
|
444
|
+
limit = profile.maxContextTokens;
|
|
445
|
+
}
|
|
640
446
|
}
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
return false;
|
|
447
|
+
catch {
|
|
448
|
+
limit = DEFAULT_MODEL_CONTEXT_TOKENS;
|
|
644
449
|
}
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
return false;
|
|
450
|
+
if (!limit || limit <= 0) {
|
|
451
|
+
return undefined;
|
|
648
452
|
}
|
|
649
|
-
const
|
|
650
|
-
|
|
651
|
-
|
|
453
|
+
const ratio = estimatedTokens / limit;
|
|
454
|
+
const threshold = this.contextRouting?.warnRatio ?? 0.9;
|
|
455
|
+
if (ratio < threshold) {
|
|
456
|
+
return undefined;
|
|
652
457
|
}
|
|
653
|
-
return
|
|
458
|
+
return `${ratio.toFixed(2)}/${Math.round(limit)}`;
|
|
654
459
|
}
|
|
655
460
|
}
|